options(warn = -1) library(ggplot2)
library(scales)
fgblue = "#2E9DD7"
fggreen = "#155936"
fgred = "firebrick3"
fgcolors = c(fgblue, fggreen, fgred)
args = commandArgs(trailingOnly=T)
familyData = read.table(args[1], sep="\t", header=T)
yieldData = read.table(args[2], sep="\t", header=T)
umiData = read.table(args[3], sep="\t", header=T)
outputFile = args[4]
sampleInfo = paste(args[5:length(args)], collapse=" ")
pdf(outputFile, width=11, height=8.5)
ggplot(familyData) + aes(x=family_size) +
geom_line(aes(y=ss_count, color="SS Families")) +
geom_line(aes(y=cs_count, color="By Coord+Strand")) +
scale_x_continuous(trans="log2", minor_breaks=seq(0,max(familyData$family_size), by=2)) +
scale_color_manual(values=c("SS Families"=fgblue, "By Coord+Strand"=fggreen)) +
labs(x="Family Size (log2 scaled)", y="Count of Families", title=paste("Family Size Distributions for", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
ggplot(familyData) + aes(x=family_size) +
geom_line(aes(y=ss_fraction_gt_or_eq_size, color="SS Families"), alpha=0.5) +
geom_line(aes(y=cs_fraction_gt_or_eq_size, color="By Coord+Strand"), alpha=0.5) +
scale_x_continuous(trans="log2", minor_breaks=seq(0,max(familyData$family_size), by=2)) +
scale_color_manual(values=c("SS Families"=fgblue, "By Coord+Strand"=fggreen)) +
labs(x="Family Size (log2 scaled)", y="Fraction of Families at >= Family Size", title=paste("Cumulative Family Size Distributions for", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
ggplot(yieldData) +
aes(x=read_pairs) +
geom_area(aes(y=ss_families, fill="SS Families")) +
geom_area(aes(y=ss_consensus_families, fill="Consensus-Eligible")) +
scale_fill_manual(values=c("SS Families"=fgblue, "Consensus-Eligible"=fggreen)) +
labs(x="Read Pairs", y="Count of SS Families", title=paste("Simplex Yield by Input Read Pairs for", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
ggplot(yieldData) +
aes(x=read_pairs, y=mean_ss_family_size) +
geom_line(color=fgblue) +
labs(x="Read Pairs", y="Mean SS Family Size", title=paste("Mean SS Family Size by Input Read Pairs for", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
ggplot(yieldData) +
aes(x=read_pairs, y=ss_singleton_fraction) +
geom_line(color=fgred) +
ylim(0, 1) +
labs(x="Read Pairs", y="Singleton Fraction", title=paste("Singleton Fraction by Input Read Pairs for", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
ggplot(subset(umiData, !grepl("N", umiData$umi, fixed=T))) +
aes(x=raw_observations, y=unique_observations) +
geom_point(color=fggreen) +
labs(x="Observations of UMI in Raw Reads", y="Unique Observations (Tag Families w/UMI)", title=paste("UMI Representation in", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
ggplot(familyData) + aes(x=family_size) +
geom_line(aes(y=cs_count * family_size, color="By Coord+Strand")) +
geom_line(aes(y=ss_count * family_size, color="SS Families")) +
scale_x_continuous(trans="log2", minor_breaks=seq(0,max(familyData$family_size), by=2)) +
scale_color_manual(values=c("By Coord+Strand"=fgblue, "SS Families"=fggreen)) +
labs(x="Family Size (log2 scaled)", y="Reads Allocated to Families of Size N", title=paste("Read Distribution Among Families for", sampleInfo)) +
theme(plot.title = element_text(hjust = 0.5), legend.title=element_blank())
dev.off()