use std::io::Write;
use std::path::Path;
use anyhow::{Context, Result};
use indexmap::IndexMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum JunctionClass {
Annotated,
PartialNovel,
CompleteNovel,
}
impl JunctionClass {
pub fn label(self) -> &'static str {
match self {
JunctionClass::Annotated => "annotated",
JunctionClass::PartialNovel => "partial_novel",
JunctionClass::CompleteNovel => "complete_novel",
}
}
pub fn color(self) -> &'static str {
match self {
JunctionClass::Annotated => "205,0,0",
JunctionClass::PartialNovel => "0,205,0",
JunctionClass::CompleteNovel => "0,0,205",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Junction {
pub chrom: String,
pub intron_start: u64,
pub intron_end: u64,
}
#[derive(Debug)]
pub struct JunctionResults {
pub junctions: IndexMap<Junction, (u64, JunctionClass)>,
pub total_events: u64,
pub known_events: u64,
pub partial_novel_events: u64,
pub complete_novel_events: u64,
pub filtered_events: u64,
}
#[derive(Debug)]
pub struct JunctionCounts {
pub known: u64,
pub partial_novel: u64,
pub novel: u64,
pub total: u64,
}
impl JunctionResults {
pub fn junction_counts(&self) -> JunctionCounts {
let mut known: u64 = 0;
let mut partial_novel: u64 = 0;
let mut novel: u64 = 0;
for (_, class) in self.junctions.values() {
match class {
JunctionClass::Annotated => known += 1,
JunctionClass::PartialNovel => partial_novel += 1,
JunctionClass::CompleteNovel => novel += 1,
}
}
JunctionCounts {
known,
partial_novel,
novel,
total: known + partial_novel + novel,
}
}
}
fn format_chrom(chrom: &str) -> String {
chrom.replace("CHR", "chr")
}
pub fn write_junction_xls(results: &JunctionResults, path: &Path) -> Result<()> {
let mut f = std::fs::File::create(path)
.with_context(|| format!("Failed to create file: {}", path.display()))?;
writeln!(
f,
"chrom\tintron_st(0-based)\tintron_end(1-based)\tread_count\tannotation"
)?;
for (junction, (count, class)) in &results.junctions {
writeln!(
f,
"{}\t{}\t{}\t{}\t {}",
format_chrom(&junction.chrom),
junction.intron_start,
junction.intron_end,
count,
class.label()
)?;
}
Ok(())
}
pub fn write_junction_bed(results: &JunctionResults, path: &Path) -> Result<()> {
let mut f = std::fs::File::create(path)
.with_context(|| format!("Failed to create file: {}", path.display()))?;
for (junction, (count, class)) in &results.junctions {
let chrom = format_chrom(&junction.chrom);
let bed_start = junction.intron_start.saturating_sub(1); let bed_end = junction.intron_end + 1; let span = bed_end - bed_start;
writeln!(
f,
"{}\t{}\t{}\t{}\t{}\t.\t{}\t{}\t{}\t2\t1,1\t0,{}",
chrom,
bed_start,
bed_end,
class.label(),
count,
bed_start,
bed_end,
class.color(),
span - 1
)?;
}
Ok(())
}
pub fn write_junction_interact_bed(
results: &JunctionResults,
bam_file: &str,
path: &Path,
) -> Result<()> {
let mut f = std::fs::File::create(path)
.with_context(|| format!("Failed to create file: {}", path.display()))?;
writeln!(
f,
"track type=interact name=\"Splice junctions\" \
description=\"Splice junctions detected from {}\" \
maxHeightPixels=200:200:50 visibility=full",
bam_file
)?;
for (junction, (count, class)) in &results.junctions {
let chrom = format_chrom(&junction.chrom);
let chrom_start = junction.intron_start.saturating_sub(1);
let chrom_end = junction.intron_end + 1;
let name = format!("{}:{}-{}_{}", chrom, chrom_start, chrom_end, class.label());
let source_start = chrom_start;
let source_end = chrom_start + 1;
let source_name = format!("{}:{}-{}", chrom, source_start, source_end);
let target_start = chrom_end - 1;
let target_end = chrom_end;
let target_name = format!("{}:{}-{}", chrom, target_start, target_end);
let color = class.color();
writeln!(
f,
"{chrom}\t{chrom_start}\t{chrom_end}\t{name}\t{count}\t{count}.0\t\
RNAseq_junction\t{color}\t\
{chrom}\t{source_start}\t{source_end}\t{source_name}\t.\t\
{chrom}\t{target_start}\t{target_end}\t{target_name}\t.",
)?;
}
Ok(())
}
pub fn write_junction_plot_r(results: &JunctionResults, prefix: &str, path: &Path) -> Result<()> {
let mut f = std::fs::File::create(path)
.with_context(|| format!("Failed to create file: {}", path.display()))?;
let (e_known_pct, e_partial_pct, e_novel_pct) = if results.total_events > 0 {
(
results.known_events as f64 * 100.0 / results.total_events as f64,
results.partial_novel_events as f64 * 100.0 / results.total_events as f64,
results.complete_novel_events as f64 * 100.0 / results.total_events as f64,
)
} else {
(0.0, 0.0, 0.0)
};
let jc = results.junction_counts();
let (j_known_pct, j_partial_pct, j_novel_pct) = if jc.total > 0 {
(
jc.known as f64 * 100.0 / jc.total as f64,
jc.partial_novel as f64 * 100.0 / jc.total as f64,
jc.novel as f64 * 100.0 / jc.total as f64,
)
} else {
(0.0, 0.0, 0.0)
};
writeln!(f, "pdf(\"{}.splice_events.pdf\")", prefix)?;
writeln!(
f,
"events=c({},{},{})",
e_partial_pct, e_novel_pct, e_known_pct
)?;
writeln!(
f,
"pie(events,col=c(2,3,4),init.angle=30,angle=c(60,120,150),density=c(70,70,70),main=\"splicing events\",labels=c(\"partial_novel {}%\",\"complete_novel {}%\",\"known {}%\"))",
e_partial_pct.round() as u64,
e_novel_pct.round() as u64,
e_known_pct.round() as u64
)?;
writeln!(f, "dev.off()")?;
writeln!(f, "pdf(\"{}.splice_junction.pdf\")", prefix)?;
writeln!(
f,
"junction=c({},{},{})",
j_partial_pct, j_novel_pct, j_known_pct
)?;
writeln!(
f,
"pie(junction,col=c(2,3,4),init.angle=30,angle=c(60,120,150),density=c(70,70,70),main=\"splicing junctions\",labels=c(\"partial_novel {}%\",\"complete_novel {}%\",\"known {}%\"))",
j_partial_pct.round() as u64,
j_novel_pct.round() as u64,
j_known_pct.round() as u64
)?;
writeln!(f, "dev.off()")?;
Ok(())
}
pub fn write_summary(results: &JunctionResults, path: &Path, gtf_path: &str) -> Result<()> {
let mut f = std::fs::File::create(path)
.with_context(|| format!("Failed to create file: {}", path.display()))?;
let gtf_filename = Path::new(gtf_path)
.file_name()
.unwrap_or_default()
.to_string_lossy();
writeln!(
f,
"Reading reference gene model: {} ... Done",
gtf_filename
)?;
writeln!(f, "Load BAM file ... Done")?;
writeln!(f)?;
write_summary_to(results, &mut f)?;
writeln!(f, "Create BED file ...")?;
writeln!(f, "Create Interact file ...")?;
Ok(())
}
pub fn print_summary(results: &JunctionResults) {
let mut stderr = std::io::stderr();
let _ = write_summary_to(results, &mut stderr);
}
fn write_summary_to<W: std::io::Write>(results: &JunctionResults, f: &mut W) -> Result<()> {
let jc = results.junction_counts();
writeln!(
f,
"==================================================================="
)?;
writeln!(f, "Total splicing Events:\t{}", results.total_events)?;
writeln!(f, "Known Splicing Events:\t{}", results.known_events)?;
writeln!(
f,
"Partial Novel Splicing Events:\t{}",
results.partial_novel_events
)?;
writeln!(
f,
"Novel Splicing Events:\t{}",
results.complete_novel_events
)?;
writeln!(f, "Filtered Splicing Events:\t{}", results.filtered_events)?;
writeln!(f)?;
writeln!(f, "Total splicing Junctions:\t{}", jc.total)?;
writeln!(f, "Known Splicing Junctions:\t{}", jc.known)?;
writeln!(f, "Partial Novel Splicing Junctions:\t{}", jc.partial_novel)?;
writeln!(f, "Novel Splicing Junctions:\t{}", jc.novel)?;
writeln!(f)?;
writeln!(
f,
"==================================================================="
)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rna::rseqc::common::ReferenceJunctions;
use std::collections::{HashMap, HashSet};
use crate::rna::rseqc::common::classify_junction;
#[test]
fn test_classify_junction_annotated() {
let mut starts = HashMap::new();
let mut ends = HashMap::new();
starts.insert("CHR1".to_string(), HashSet::from([100, 500]));
ends.insert("CHR1".to_string(), HashSet::from([400, 900]));
let reference = ReferenceJunctions {
intron_starts: starts,
intron_ends: ends,
};
assert_eq!(
classify_junction("CHR1", 100, 400, &reference),
JunctionClass::Annotated
);
}
#[test]
fn test_classify_junction_partial_novel() {
let mut starts = HashMap::new();
let mut ends = HashMap::new();
starts.insert("CHR1".to_string(), HashSet::from([100]));
ends.insert("CHR1".to_string(), HashSet::from([400]));
let reference = ReferenceJunctions {
intron_starts: starts,
intron_ends: ends,
};
assert_eq!(
classify_junction("CHR1", 100, 999, &reference),
JunctionClass::PartialNovel
);
assert_eq!(
classify_junction("CHR1", 999, 400, &reference),
JunctionClass::PartialNovel
);
}
#[test]
fn test_classify_junction_complete_novel() {
let mut starts = HashMap::new();
let mut ends = HashMap::new();
starts.insert("CHR1".to_string(), HashSet::from([100]));
ends.insert("CHR1".to_string(), HashSet::from([400]));
let reference = ReferenceJunctions {
intron_starts: starts,
intron_ends: ends,
};
assert_eq!(
classify_junction("CHR1", 999, 888, &reference),
JunctionClass::CompleteNovel
);
}
#[test]
fn test_classify_junction_unknown_chrom() {
let reference = ReferenceJunctions {
intron_starts: HashMap::new(),
intron_ends: HashMap::new(),
};
assert_eq!(
classify_junction("CHRX", 100, 400, &reference),
JunctionClass::CompleteNovel
);
}
#[test]
fn test_junction_class_labels() {
assert_eq!(JunctionClass::Annotated.label(), "annotated");
assert_eq!(JunctionClass::PartialNovel.label(), "partial_novel");
assert_eq!(JunctionClass::CompleteNovel.label(), "complete_novel");
}
#[test]
fn test_junction_class_colors() {
assert_eq!(JunctionClass::Annotated.color(), "205,0,0");
assert_eq!(JunctionClass::PartialNovel.color(), "0,205,0");
assert_eq!(JunctionClass::CompleteNovel.color(), "0,0,205");
}
#[test]
fn test_format_chrom() {
assert_eq!(format_chrom("CHR1"), "chr1");
assert_eq!(format_chrom("CHRX"), "chrX");
assert_eq!(format_chrom("6"), "6"); }
}