use crate::gtf::Gene;
use crate::rna::dupradar::counting::CountResult;
use anyhow::Result;
use indexmap::IndexMap;
use std::io::Write;
use std::path::Path;
pub fn write_counts_file(
path: &Path,
genes: &IndexMap<String, Gene>,
counts: &CountResult,
bam_path: &str,
command_line: &str,
) -> Result<()> {
let file = std::fs::File::create(path)?;
let mut w = std::io::BufWriter::new(file);
writeln!(
w,
"# Program:featureCounts v2.0.6, generated by RustQC v{}; Command:\"{}\"",
env!("CARGO_PKG_VERSION"),
command_line
)?;
let bam_name = Path::new(bam_path)
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| bam_path.to_string());
writeln!(w, "Geneid\tChr\tStart\tEnd\tStrand\tLength\t{}", bam_name)?;
for (gene_id, gene) in genes.iter() {
if gene.exons.is_empty() {
continue;
}
let chrs: Vec<&str> = gene.exons.iter().map(|e| e.chrom.as_str()).collect();
let starts: Vec<String> = gene.exons.iter().map(|e| e.start.to_string()).collect();
let ends: Vec<String> = gene.exons.iter().map(|e| e.end.to_string()).collect();
let strands: Vec<String> = gene.exons.iter().map(|e| e.strand.to_string()).collect();
let count = counts
.gene_counts
.get(gene_id)
.map(|gc| gc.fc_reads)
.unwrap_or(0);
writeln!(
w,
"{}\t{}\t{}\t{}\t{}\t{}\t{}",
gene_id,
chrs.join(";"),
starts.join(";"),
ends.join(";"),
strands.join(";"),
gene.effective_length,
count,
)?;
}
Ok(())
}
pub fn write_summary_file(
path: &Path,
counts: &CountResult,
bam_path: &str,
biotype_in_gtf: bool,
) -> Result<()> {
let file = std::fs::File::create(path)?;
let mut w = std::io::BufWriter::new(file);
let bam_name = Path::new(bam_path)
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| bam_path.to_string());
let (assigned, ambiguous, no_features) = if biotype_in_gtf {
(
counts.fc_biotype_assigned,
counts.fc_biotype_ambiguous,
counts.fc_biotype_no_features,
)
} else {
(
counts.fc_assigned,
counts.fc_ambiguous,
counts.fc_no_features,
)
};
writeln!(w, "Status\t{}", bam_name)?;
writeln!(w, "Assigned\t{}", assigned)?;
writeln!(w, "Unassigned_Unmapped\t{}", counts.fc_unmapped)?;
writeln!(w, "Unassigned_Read_Type\t0")?;
writeln!(w, "Unassigned_Singleton\t{}", counts.fc_singleton)?;
writeln!(w, "Unassigned_MappingQuality\t0")?;
writeln!(w, "Unassigned_Chimera\t{}", counts.fc_chimera)?;
writeln!(w, "Unassigned_FragmentLength\t0")?;
writeln!(w, "Unassigned_Duplicate\t0")?;
writeln!(w, "Unassigned_MultiMapping\t{}", counts.fc_multimapping)?;
writeln!(w, "Unassigned_Secondary\t0")?;
writeln!(w, "Unassigned_NonSplit\t0")?;
writeln!(w, "Unassigned_NoFeatures\t{}", no_features)?;
writeln!(w, "Unassigned_Overlapping_Length\t0")?;
writeln!(w, "Unassigned_Ambiguity\t{}", ambiguous)?;
Ok(())
}
pub fn aggregate_biotype_counts(counts: &CountResult) -> IndexMap<String, u64> {
let mut biotype_counts: IndexMap<String, u64> = IndexMap::new();
for (idx, name) in counts.biotype_names.iter().enumerate() {
let count = counts.biotype_reads.get(idx).copied().unwrap_or(0);
biotype_counts.insert(name.clone(), count);
}
let mut pairs: Vec<(String, u64)> = biotype_counts.into_iter().collect();
pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
pairs.into_iter().collect()
}
pub fn write_biotype_counts(path: &Path, biotype_counts: &IndexMap<String, u64>) -> Result<()> {
let file = std::fs::File::create(path)?;
let mut w = std::io::BufWriter::new(file);
writeln!(w, "Biotype\tCount")?;
for (biotype, count) in biotype_counts.iter() {
writeln!(w, "{}\t{}", biotype, count)?;
}
Ok(())
}
pub fn write_biotype_counts_mqc(path: &Path, biotype_counts: &IndexMap<String, u64>) -> Result<()> {
let file = std::fs::File::create(path)?;
let mut w = std::io::BufWriter::new(file);
writeln!(w, "# id: 'biotype_counts'")?;
writeln!(w, "# section_name: 'Biotype Counts'")?;
writeln!(
w,
"# description: \"shows reads overlapping genomic features of different biotypes,"
)?;
writeln!(
w,
"# counted by RustQC based on the <a href='http://bioinf.wehi.edu.au/featureCounts'>featureCounts</a> implementation.\""
)?;
writeln!(w, "# plot_type: 'bargraph'")?;
writeln!(w, "# anchor: 'featurecounts_biotype'")?;
writeln!(w, "# pconfig:")?;
writeln!(w, "# id: \"featurecounts_biotype_plot\"")?;
writeln!(w, "# title: \"featureCounts: Biotypes\"")?;
writeln!(w, "# xlab: \"# Reads\"")?;
writeln!(w, "# cpswitch_counts_label: \"Number of Reads\"")?;
for (biotype, count) in biotype_counts.iter() {
writeln!(w, "{}\t{}", biotype, count)?;
}
Ok(())
}
pub fn write_biotype_rrna_mqc(
path: &Path,
biotype_counts: &IndexMap<String, u64>,
total_assigned: u64,
sample_name: &str,
) -> Result<()> {
let file = std::fs::File::create(path)?;
let mut w = std::io::BufWriter::new(file);
let rrna_count = biotype_counts.get("rRNA").copied().unwrap_or(0);
let rrna_pct = if total_assigned > 0 {
rrna_count as f64 / total_assigned as f64 * 100.0
} else {
0.0
};
writeln!(w, "#id: 'biotype-gs'")?;
writeln!(w, "#plot_type: 'generalstats'")?;
writeln!(w, "#pconfig:")?;
writeln!(w, "# percent_rRNA:")?;
writeln!(w, "# title: '% rRNA'")?;
writeln!(w, "# namespace: 'Biotype Counts'")?;
writeln!(
w,
"# description: '% reads overlapping rRNA features'"
)?;
writeln!(w, "# max: 100")?;
writeln!(w, "# min: 0")?;
writeln!(w, "# scale: 'RdYlGn-rev'")?;
writeln!(w, "Sample\tpercent_rRNA")?;
writeln!(w, "'{}'\t{}", sample_name, rrna_pct)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rna::dupradar::counting::GeneCounts;
use std::collections::HashMap;
fn make_test_gene(gene_id: &str, biotype: Option<&str>) -> Gene {
let mut attributes = HashMap::new();
if let Some(bt) = biotype {
attributes.insert("gene_biotype".to_string(), bt.to_string());
}
Gene {
gene_id: gene_id.to_string(),
chrom: "chr1".to_string(),
start: 100,
end: 200,
strand: '+',
exons: vec![crate::gtf::Exon {
chrom: "chr1".to_string(),
start: 100,
end: 200,
strand: '+',
}],
effective_length: 101,
attributes,
transcripts: Vec::new(),
}
}
#[test]
fn test_aggregate_biotype_counts() {
let mut genes = IndexMap::new();
genes.insert(
"gene1".to_string(),
make_test_gene("gene1", Some("protein_coding")),
);
genes.insert(
"gene2".to_string(),
make_test_gene("gene2", Some("protein_coding")),
);
genes.insert("gene3".to_string(), make_test_gene("gene3", Some("rRNA")));
genes.insert("gene4".to_string(), make_test_gene("gene4", None));
let mut gene_counts = IndexMap::new();
gene_counts.insert(
"gene1".to_string(),
GeneCounts {
all_unique: 100,
fc_reads: 100,
..Default::default()
},
);
gene_counts.insert(
"gene2".to_string(),
GeneCounts {
all_unique: 50,
fc_reads: 50,
..Default::default()
},
);
gene_counts.insert(
"gene3".to_string(),
GeneCounts {
all_unique: 25,
fc_reads: 25,
..Default::default()
},
);
gene_counts.insert(
"gene4".to_string(),
GeneCounts {
all_unique: 10,
fc_reads: 10,
..Default::default()
},
);
let count_result = CountResult {
gene_counts,
stat_total_reads: 200,
stat_assigned: 185,
stat_ambiguous: 5,
stat_no_features: 10,
stat_total_fragments: 200,
stat_singleton_unmapped_mates: 0,
stat_total_mapped: 200,
stat_total_dup: 0,
fc_assigned: 185,
fc_ambiguous: 5,
fc_no_features: 10,
fc_multimapping: 0,
fc_unmapped: 0,
fc_singleton: 0,
fc_chimera: 0,
biotype_reads: vec![150, 25],
biotype_names: vec!["protein_coding".to_string(), "rRNA".to_string()],
fc_biotype_assigned: 175,
fc_biotype_ambiguous: 0,
fc_biotype_no_features: 10,
rseqc: None,
qualimap: None,
};
let biotypes = aggregate_biotype_counts(&count_result);
assert_eq!(biotypes.get("protein_coding"), Some(&150));
assert_eq!(biotypes.get("rRNA"), Some(&25));
assert_eq!(biotypes.get("unknown"), None);
}
}