library(ggplot2) library(plyr) library(magrittr) library(dplyr) library(stringr) args = commandArgs(trailingOnly=TRUE) lm_eqn = function(plot) { m = lm(count ~ conc, plot); eq = sprintf("y = %sx + %s, R² = %s\nr = %s, p = %s", format(unname(coef(m)[2]), digits = 2), format(unname(coef(m)[1]), digits = 2), format(summary(m)$r.squared, digits = 3), format(cor(plot$count,plot$conc, method = "pearson"), digits = 2), format(cor(plot$count,plot$conc, method = "spearman"), digits = 3)) as.character(eq); } cbPalette <- c("#22635b", "#87bd5b") plot <- read.table("ercc_quantification_all.tsv", header=F, as.is=T, sep="\t") colnames(plot)<-c("sample","plat", "spikeIn","count", "conc", "type", "seqTech", "category") plot$seqTech=factor(plot$seqTech, levels=c("ont-Crg-CapTrap","ont-Ucsc-dRNA","ont-Ucsc-PcrOnt", "ont-Ucsc-R2C2", "pacBioSII-Uci-CapTrap","pacBioSII-Uci-SmartSeq2"), label = c("ont-CapTrap","ont-dRNA","ont-PcrOnt","ont-R2C2","pacBioSII-CapTrap","pacBioSII-SmartSeq2")) plot$sample_orig = plot$sample plot$sample = str_sub(plot$sample, 1, -7) plot$category = factor(plot$category, label=c("Human","Mouse")) plot = plot[ grepl("Rep1", plot$sample_orig), ] plot$sample_orig = gsub("Rep1", "", plot$sample_orig) missing = plot[ plot$count != 0, c("seqTech", "category", "sample_orig", "sample", "count")] %>% group_by(seqTech, category, sample_orig, sample) %>% summarize( detected = n()) missing$plat = sapply(str_split(missing$seqTech, "-"), head, 1) pdf("ercc_quantification_all.detected.pdf", bg = "white", width = 60, height = 25) ggplot(missing, aes(x=seqTech, y=detected, color = sample_orig, fill=seqTech)) + geom_bar( stat = "identity", position = position_dodge()) + scale_fill_manual(values = c("#0c406a","#21554c","#626c31","#458499","#8f1e15","#c45e2e")) + scale_color_manual(values=rep("black",54)) + theme_bw() + ylab("# ERCC detected") + xlab ("Technology") + ylim(c(0,92)) + theme(axis.text.x = element_text(size = 35, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 40), axis.title.x = element_text(size = 40), axis.title.y = element_text(size = 40), legend.text=element_text(size=30), legend.title=element_blank(), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("LRGASP ERCC Detection") + facet_wrap(category ~ sample, scales="free_y", ncol = 3) + #scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(angle = 60, hjust=1)) + geom_text(aes(label=detected), vjust=-1, size = 15, position=position_dodge(width = .9)) dev.off() pdf("ercc_quantification_all.detected.ont.pdf", bg = "white", width = 60, height = 25) ggplot(missing[missing$plat == "ont",], aes(x=seqTech, y=detected, color = sample_orig, fill=seqTech)) + geom_bar( stat = "identity", position = position_dodge()) + scale_fill_manual(values = c("#0c406a","#21554c","#626c31","#458499")) + scale_color_manual(values=rep("black", 36)) + theme_bw() + ylab("# ERCC detected") + xlab ("Technology") + ylim(c(0,92)) + theme(axis.text.x = element_text(size = 35, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 40), axis.title.x = element_text(size = 40), axis.title.y = element_text(size = 40), legend.text=element_text(size=30), legend.title=element_blank(), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("LRGASP ERCC Detection") + facet_wrap( category ~ sample, scales="free_y", ncol = 3) + #scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(angle = 60, hjust=1)) + geom_text(aes(label=detected), vjust=-1, size = 15, position=position_dodge(width = .9)) dev.off() pdf("ercc_quantification_all.detected.pbio.pdf", bg = "white", width = 60, height = 25) ggplot(missing[missing$plat != "ont",], aes(x=seqTech, y=detected, color = sample_orig, fill=seqTech)) + geom_bar( stat = "identity", position = position_dodge()) + scale_fill_manual(values = c("#8f1e15","#c45e2e")) + scale_color_manual(values=rep("black", 18)) + theme_bw() + ylab("# ERCC detected") + xlab ("Technology") + ylim(c(0,92)) + theme(axis.text.x = element_text(size = 35, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 40), axis.title.x = element_text(size = 40), axis.title.y = element_text(size = 40), legend.text=element_text(size=30), legend.title=element_blank(), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("LRGASP ERCC Detection") + facet_wrap( category ~ sample, scales="free_y", ncol = 3) + #scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(angle = 60, hjust=1)) + geom_text(aes(label=detected), vjust=-1, size = 15, position=position_dodge(width = .9)) dev.off() plot$count[plot$count == 0] = 0.0001 plot$count=log10(plot$count) plot$conc=log10(plot$conc) forE=plot[plot$count > -2.5,] finalE=forE[forE$conc > -2.5,] # text to annotate graphs graphLabels = data.frame( matrix(nrow = length(unique(plot$seqTech)), ncol = 4) ) colnames(graphLabels) = c("seqTech", "label", "sample", "plat") index = 1 for (i in unique(plot$seqTech)) { for (s in unique(plot$sample)) { graphLabels[index, "seqTech"] = i graphLabels[index, "sample"] = s graphLabels[index, "label"] = lm_eqn(finalE[finalE$seqTech == i & finalE$sample == s, ]) #r2 computed without zeroes graphLabels[index, "plat"] = unlist(str_split(i, "-"))[1] index = index + 1 } } pdf("ercc_quantification_all.pdf", bg = "white", width = 50, height = 35) ggplot(plot, aes(x=conc, y=count, color="type")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 30, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 30), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("ERCC LRGASP Quantification") + facet_grid( sample ~ seqTech, scales="free_y") + geom_smooth(data=finalE, method=lm, se=FALSE, fullrange=TRUE, size=1.5) + geom_text(data = graphLabels, aes(x = 0, y = 4.8, label = label), size = 12) dev.off() pdf("ercc_quantification_all.ont.pdf", bg = "white", width = 33, height = 35) ggplot(plot[plot$plat == "ont", ], aes(x=conc, y=count, color="type")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 30, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 30), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("ERCC LRGASP Quantification") + facet_grid( sample ~ seqTech, scales="free_y") + geom_smooth(data=finalE[finalE$plat == "ont", ], method=lm, se=FALSE, fullrange=TRUE, size=1.5) + geom_text(data = graphLabels[graphLabels$plat == "ont", ], aes(x = 0, y = 4.8, label = label), size = 12) dev.off() pdf("ercc_quantification_all.pbio.pdf", bg = "white", width = 17, height = 35) ggplot(plot[plot$plat != "ont", ], aes(x=conc, y=count, color="type")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 30, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 30), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("ERCC LRGASP Quantification") + facet_grid( sample ~ seqTech, scales="free_y") + geom_smooth(data=finalE[finalE$plat != "ont", ], method=lm, se=FALSE, fullrange=TRUE, size=1.5) + geom_text(data = graphLabels[graphLabels$plat != "ont", ], aes(x = 0, y = 4.8, label = label), size = 12) dev.off() ##### ADD ZEROES IN CALCULATIONS # text to annotate graphs graphLabels = data.frame( matrix(nrow = length(unique(plot$seqTech)), ncol = 4) ) colnames(graphLabels) = c("seqTech", "label", "sample", "plat") index = 1 for (i in unique(plot$seqTech)) { for (s in unique(plot$sample)) { graphLabels[index, "seqTech"] = i graphLabels[index, "sample"] = s graphLabels[index, "label"] = lm_eqn(plot[plot$seqTech == i & plot$sample == s, ]) #r2 computed with zeroes graphLabels[index, "plat"] = unlist(str_split(i, "-"))[1] index = index + 1 } } pdf("ercc_quantification_all.wzeroes.pdf", bg = "white", width = 50, height = 35) ggplot(plot, aes(x=conc, y=count, color="type")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 30, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 30), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("ERCC LRGASP Quantification") + facet_grid( sample ~ seqTech, scales="free_y") + geom_smooth(data=plot, method=lm, se=FALSE, fullrange=TRUE, size=1.5) + geom_text(data = graphLabels, aes(x = 0, y = 4.8, label = label), size = 12) dev.off() pdf("ercc_quantification_all.wzeroes.ont.pdf", bg = "white", width = 33, height = 35) ggplot(plot[plot$plat == "ont", ], aes(x=conc, y=count, color="type")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 30, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 30), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("ERCC LRGASP Quantification") + facet_grid( sample ~ seqTech, scales="free_y") + geom_smooth(data=plot[plot$plat == "ont", ], method=lm, se=FALSE, fullrange=TRUE, size=1.5) + geom_text(data = graphLabels[graphLabels$plat == "ont", ], aes(x = 0, y = 4.8, label = label), size = 12) dev.off() pdf("ercc_quantification_all.wzeroes.pbio.pdf", bg = "white", width = 17, height = 35) ggplot(plot[plot$plat != "ont", ], aes(x=conc, y=count, color="type")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 30, colour = "black"), axis.text.y = element_text(size = 30, colour = "black"), plot.title = element_text(size = 30), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none", strip.text.x=element_text(size=40), strip.text.y=element_text(size=40)) + ggtitle("ERCC LRGASP Quantification") + facet_grid( sample ~ seqTech, scales="free_y") + geom_smooth(data=plot[plot$plat != "ont", ], method=lm, se=FALSE, fullrange=TRUE, size=1.5) + geom_text(data = graphLabels[graphLabels$plat != "ont", ], aes(x = 0, y = 4.8, label = label), size = 12) dev.off()