Skip to content

Commit da5a2e2

Browse files
committed
made plot size configurable (per insert correlation) and made correlation visible (per barcode correlation)
1 parent ae4aa83 commit da5a2e2

3 files changed

Lines changed: 53 additions & 21 deletions

File tree

workflow/rules/statistic/correlation.smk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,9 @@ rule statistic_correlation_calculate:
311311
if "label_file" in config["experiments"][wc.project]
312312
else ""
313313
),
314+
plot_width=29,
315+
plot_height=17,
316+
legend_nrow=6,
314317
log:
315318
temp(
316319
"results/logs/statistic/correlation/calculate.{project}.{condition}.{config}.{assignment}.log"
@@ -323,6 +326,9 @@ rule statistic_correlation_calculate:
323326
--files {params.files} \
324327
--replicates {params.replicates} \
325328
--threshold {params.thresh} \
329+
--plot_width {params.plot_width} \
330+
--plot_height {params.plot_height} \
331+
--legend_nrow {params.legend_nrow} \
326332
--outdir {params.outdir} &> {log}
327333
"""
328334

workflow/scripts/count/plot_perBCCounts_correlation.R

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,21 +85,21 @@ plot_correlations_dna <- function(data, plot_data, condition, r1, r2, name) {
8585
geom_point() +
8686
xlab(sprintf(paste("log2 Normalized DNA count per barcode,\n replicate", r1))) +
8787
ylab(sprintf(paste("log2 Normalized DNA count per barcode,\n replicate", r2))) +
88-
geom_text(x = min + 0.5, y = max - 0.5, label = sprintf(" r = %.2f", cor(data$DNA_normalized_log2.x, data$DNA_normalized_log2.y, method = "pearson")), size = 10) +
89-
geom_text(x = min + 0.5, y = max - 1.0, label = sprintf("rho = %.2f", cor(data$DNA_normalized.x, data$DNA_normalized.y, method = "spearman")), size = 10) +
88+
geom_text(x = -Inf, y = Inf, hjust=0, vjust=1, label = sprintf(" r = %.2f", cor(data$DNA_normalized_log2.x, data$DNA_normalized_log2.y, method = "pearson")), size = 10) +
89+
geom_text(x = -Inf, y = Inf, hjust=0, vjust=2.1, label = sprintf("rho = %.2f", cor(data$DNA_normalized.x, data$DNA_normalized.y, method = "spearman")), size = 10) +
9090
geom_abline(intercept = 0, slope = 1) +
9191
theme_classic(base_size = 30)
9292
return(dna_p)
9393
}
9494
plot_correlations_rna <- function(data, plot_data, condition, r1, r2, name) {
95-
max <- max(data$`RNA_normalized.y_log2`)
96-
min <- min(data$`RNA_normalized.x_log2`)
95+
max <- max(data$`RNA_normalized_log2.y`)
96+
min <- min(data$`RNA_normalized_log2.x`)
9797
rna_p <- ggplot(plot_data, aes(RNA_normalized_log2.x, RNA_normalized_log2.y)) +
9898
geom_point() +
9999
xlab(sprintf(paste("log2 Normalized RNA count per barcode,\n replicate", r1))) +
100100
ylab(sprintf(paste("log2 Normalized RNA count per barcode,\n replicate", r2))) +
101-
geom_text(x = min + 0.5, y = max - 0.5, label = sprintf(" r = %.2f", cor(data$RNA_normalized_log2.x, data$RNA_normalized_log2.y, method = "pearson")), size = 10) +
102-
geom_text(x = min + 0.5, y = max - 1.0, label = sprintf("rho = %.2f", cor(data$RNA_normalized.x, data$RNA_normalized.y, method = "spearman")), size = 10) +
101+
geom_text(x = -Inf, y = Inf, hjust=0, vjust=1, label = sprintf(" r = %.2f", cor(data$RNA_normalized_log2.x, data$RNA_normalized_log2.y, method = "pearson")), size = 10) +
102+
geom_text(x = -Inf, y = Inf, hjust=0, vjust=2.1, label = sprintf("rho = %.2f", cor(data$RNA_normalized.x, data$RNA_normalized.y, method = "spearman")), size = 10) +
103103
geom_abline(intercept = 0, slope = 1) +
104104
theme_classic(base_size = 30)
105105
return(rna_p)
@@ -111,8 +111,8 @@ plot_correlations_ratio <- function(data, plot_data, condition, r1, r2, name) {
111111
geom_point() +
112112
xlab(sprintf(paste("log2 RNA/DNA per barcode,\n replicate", r1))) +
113113
ylab(sprintf(paste("log2 RNA/DNA per barcode,\n replicate", r2))) +
114-
geom_text(x = min + 0.5, y = max - 0.5, label = sprintf(" r = %.2f", cor(data$Ratio_log2.x, res$Ratio_log2.y, method = "pearson")), size = 10) +
115-
geom_text(x = min + 0.5, y = max - 1.0, label = sprintf("rho = %.2f", cor(data$Ratio.x, data$Ratio.y, method = "spearman")), size = 10) +
114+
geom_text(x = -Inf, y = Inf, hjust=0, vjust=1, label = sprintf(" r = %.2f", cor(data$Ratio_log2.x, res$Ratio_log2.y, method = "pearson")), size = 10) +
115+
geom_text(x = -Inf, y = Inf, hjust=0, vjust=2.1, label = sprintf("rho = %.2f", cor(data$Ratio.x, data$Ratio.y, method = "spearman")), size = 10) +
116116
geom_abline(intercept = 0, slope = 1) +
117117
theme_classic(base_size = 30)
118118
return(ratio_p)
@@ -225,4 +225,5 @@ if (data %>% nrow() > 1) {
225225
writeCorrelationPlots(plots_correlations_rna, sprintf("%s_barcode_RNA_pairwise.png", outdir))
226226
writeCorrelationPlots(plots_correlations_ratio, sprintf("%s_barcode_Ratio_pairwise.png", outdir))
227227

228-
}
228+
}
229+

workflow/scripts/count/plot_perInsertCounts_correlation.R

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,31 @@ option_list <- list(
3131
type = "integer",
3232
default = 10,
3333
help = "Number of required barcodes (default 10)"
34+
),
35+
make_option(
36+
c("-w", "--plot_width"),
37+
type = "integer",
38+
default = 29,
39+
help = "Width of the plots created by this script (default 29)"
40+
),
41+
make_option(
42+
c("-h", "--plot_height"),
43+
type = "integer",
44+
default = 17,
45+
help = "Height of the plots created by this script (default 17)"
46+
),
47+
make_option(
48+
c("-n", "--legend_nrow"),
49+
type = "integer",
50+
default = 6,
51+
help = "Number of rows in the legend of the plots created by this script (default 6)"
3452
),
3553
make_option(c("-o", "--outdir"),
3654
type = "character",
3755
help = "Outdir of the plots and table.")
3856
)
3957

40-
parser <- OptionParser(option_list = option_list)
58+
parser <- OptionParser(add_help_option=False, option_list = option_list)
4159
arguments <- parse_args(parser, positional_arguments = TRUE)
4260
opt <- arguments$options
4361

@@ -72,7 +90,6 @@ if ("label" %in% names(opt)) {
7290
use_labels <- FALSE
7391
}
7492

75-
7693
# replicates and count files
7794
files <- strsplit(opt$files, ",")[[1]]
7895
replicates <- strsplit(opt$replicates, ",")[[1]]
@@ -85,13 +102,12 @@ data["Condition"] <- cond
85102

86103
print(data)
87104

88-
# pairwise comparison only if more than one replicate
89105
thresh <- opt$threshold
90106

91107
plot_correlations_dna <- function(data, condition, r1, r2, name) {
92108
dna_p <-
93109
ggplot(data, aes(dna_normalized_log2.x, dna_normalized_log2.y)) +
94-
geom_point(aes(colour = label.x), show.legend = TRUE) +
110+
geom_point(aes(colour = label.x)) +
95111
xlim(-5, 5) +
96112
ylim(-5, 5) +
97113
xlab(sprintf(
@@ -126,13 +142,16 @@ plot_correlations_dna <- function(data, condition, r1, r2, name) {
126142
size = 10
127143
) +
128144
geom_abline(intercept = 0, slope = 1) +
129-
theme_classic(base_size = 30)
145+
theme_classic(base_size = 30) +
146+
theme(legend.position="bottom") + # show legend below the plot
147+
guides(fill=guide_legend(nrow=legend_nrow, byrow=TRUE)) + # show labels in rows
148+
labs(color = "label\n") # legend name
130149
return(dna_p)
131150
}
132151
plot_correlations_rna <- function(data, condition, r1, r2, name) {
133152
rna_p <-
134153
ggplot(data, aes(rna_normalized_log2.x, rna_normalized_log2.y)) +
135-
geom_point(aes(colour = label.x), show.legend = TRUE) +
154+
geom_point(aes(colour = label.x)) +
136155
xlim(-5, 5) +
137156
ylim(-5, 5) +
138157
xlab(sprintf(
@@ -165,12 +184,15 @@ plot_correlations_rna <- function(data, condition, r1, r2, name) {
165184
size = 10
166185
) +
167186
geom_abline(intercept = 0, slope = 1) +
168-
theme_classic(base_size = 30)
187+
theme_classic(base_size = 30) +
188+
theme(legend.position="bottom") + # show legend below the plot
189+
guides(fill=guide_legend(nrow=legend_nrow, byrow=TRUE)) + # show labels in rows
190+
labs(color = "label\n") # legend name
169191
return(rna_p)
170192
}
171193
plot_correlations_ratio <- function(data, condition, r1, r2, name) {
172194
ratio_p <- ggplot(data, aes(ratio_log2.x, ratio_log2.y)) +
173-
geom_point(aes(colour = label.x), show.legend = TRUE) +
195+
geom_point(aes(colour = label.x)) +
174196
xlim(-5, 5) +
175197
ylim(-5, 5) +
176198
xlab(sprintf(paste(
@@ -198,7 +220,10 @@ plot_correlations_ratio <- function(data, condition, r1, r2, name) {
198220
size = 10
199221
) +
200222
geom_abline(intercept = 0, slope = 1) +
201-
theme_classic(base_size = 30)
223+
theme_classic(base_size = 30) +
224+
theme(legend.position="bottom") + # show legend below the plot
225+
guides(fill=guide_legend(nrow=legend_nrow, byrow=TRUE)) + # show labels in rows
226+
labs(color = "label\n") # legend name
202227
return(ratio_p)
203228
}
204229

@@ -268,8 +293,8 @@ write_correlation_plots <- function(plots, name) {
268293

269294
ggplot2::ggsave(name,
270295
correlation_plots,
271-
width = 15,
272-
height = 10 * length(plots))
296+
width = plot_width,
297+
height = plot_height * length(plots))
273298
}
274299

275300
write_correlation <- function(correlations, name) {
@@ -324,7 +349,7 @@ if (use_labels) {
324349
all$label <- "NA"
325350
}
326351
}
327-
352+
# pairwise comparison only if more than one replicate
328353
if (data %>% nrow() > 1 && nrow(all) > 1) {
329354
print("Pairwise comparisons")
330355
# make pairwise combinations

0 commit comments

Comments
 (0)