library(ggplot2) library(plyr) library(magrittr) library(dplyr) args = commandArgs(trailingOnly=TRUE) lm_eqn = function(plot) { m = lm(count ~ conc, plot); # eq = substitute(italic(y) == a + b %.% italic(x)*","~~italic(R)^2~"="~r2, # list(a = format(coef(m)[1], digits = 2), b = format(coef(m)[2], digits = 2),r2 = format(summary(m)$r.squared, digits = 3))) eq = paste0("r2: ", format(summary(m)$r.squared, digits = 3)) # as.character(as.expression(eq)); eq } cbPalette <- c("#22635b", "#87bd5b") plot <- read.table(args[1], header=F, as.is=T, sep="\t") colnames(plot)<-c("sample","plat", "spikeIn","count", "conc", "type", "lab", "category") missing = plot[ plot$count == 0, ] %>% group_by(lab, sample, plat) %>% summarize( zeroes = n()) plot$count[plot$count == 0] = 0.0001 plot$count=log10(plot$count) plot$conc=log10(plot$conc) forE=plot[plot$count > -2.5,] finalE=forE[forE$conc > -2.5,] # text to annotate graphs graphLabels = data.frame( matrix(nrow = length(unique(plot$lab)), ncol = 4) ) colnames(graphLabels) = c("lab", "sample", "plat", "label") index = 1 for (i in unique(plot$lab)) { for (p in unique(plot$plat)) { for (s in unique(plot$sample)) { graphLabels[index, "lab"] = i graphLabels[index, "sample"] = s graphLabels[index, "plat"] = p definitive = finalE[finalE$lab == i & finalE$plat == p & finalE$sample == s, ] if ( nrow(definitive) != 0 ) { graphLabels[index, "label"] = paste0(as.character(as.expression(lm_eqn(definitive))), ", zeroes: ", missing[missing$lab == i & missing$sample == s & missing$plat == p, "zeroes"]) #r2 computed without zeroes # graphLabels[index, "label"] = paste0(as.character(as.expression(lm_eqn(plot[plot$lab == i & plot$plat == p & plot$sample == s,]))), ", zeroes: ", missing[missing$lab == i & missing$sample == s & missing$plat == p, "zeroes"]) index = index + 1 } } } } for (samp in unique(plot$sample)) { tmp = plot[plot$sample == samp, ] # pdf(gsub(".tsv", paste0("_zeroes_", samp, ".pdf"), args[1]), bg = "white", width = 40, height = 8) pdf(gsub(".tsv", paste0("_", samp, ".pdf"), args[1]), bg = "white", width = 40, height = 8) print( ggplot(tmp, aes(x=conc, y=count, color="typo")) + geom_point(size=5, shape=19, alpha=0.6) + scale_color_manual(values = cbPalette) + theme_bw() + xlim(-4,5) + ylim(-4,5) + xlab("log10(Spike-In concentration)") + ylab ("log10(#reads + 0.0001)") + theme(axis.text.x = element_text(size = 16, colour = "black"), axis.text.y = element_text(size = 16, colour = "black"), plot.title = element_text(size = 22), axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18), legend.position = "none", strip.text.x=element_text(size=22), strip.text.y=element_text(size=22)) + ggtitle(paste0(unique(tmp$category), " ", unique(tmp$sample))) + facet_wrap( plat ~ lab, ncol=6, scales="free") + geom_smooth(method=lm,se=FALSE,fullrange=TRUE, size=1.5) + theme(legend.position = "none") + geom_text(data = graphLabels[graphLabels$sample == samp, ], aes(x = -2, y = 5, label = label), size = 8) ) dev.off() }