#![deny(missing_docs)]
use bustools_core::{
consistent_genes::{find_consistent, InconsistentResolution, MappingMode, MappingResult}, consistent_transcripts::{find_consistent_transcripts, MappingResultTranscript}, io::BusReader, iterators::CbUmiGroupIterator
};
use core::panic;
use std::{collections::HashMap, fs::File, io::Write, path::Path};
#[derive(Debug)]
pub struct CUHistogram {
histogram: HashMap<usize, usize>,
}
impl CUHistogram {
pub fn new() -> Self {
CUHistogram { histogram: HashMap::new() }
}
pub fn get_nreads(&self) -> usize {
self.histogram
.iter()
.map(|(nreads, freq)| nreads * freq)
.sum()
}
pub fn get_numis(&self) -> usize {
self.histogram.values().sum()
}
pub fn get_fscm(&self) -> f64 {
let n1 = *self.histogram.get(&1).unwrap_or(&0);
(n1 as f64) / (self.get_numis() as f64)
}
pub fn to_disk(&self, fname: &Path) {
let mut fh = File::create(fname).unwrap();
fh.write_all("Amplification,Frequency\n".as_bytes())
.unwrap();
for (n_reads, n_umis) in self.histogram.iter() {
fh.write_all(format!("{},{}\n", n_reads, n_umis).as_bytes())
.unwrap();
}
}
pub fn add_counts(&mut self, freq: usize, count: usize) {
let v =self.histogram.entry(freq).or_insert(0);
*v += count
}
pub fn get_histogram(self) -> HashMap<usize, usize>{
self.histogram
}
}
impl From<HashMap<usize, usize>> for CUHistogram {
fn from(value: HashMap<usize, usize>) -> Self {
Self {histogram: value}
}
}
impl From<CUHistogram> for HashMap<usize, usize> {
fn from(value: CUHistogram) -> Self {
value.histogram
}
}
pub fn make_ecs(busfile: &Path, mapping_mode: MappingMode) -> CUHistogram {
let mut h: CUHistogram = CUHistogram::new();
let reader = BusReader::new(Path::new(busfile));
let mut multimapped = 0;
let mut inconsistent = 0;
let mut total = 0;
for ((_cb, _umi), recordlist) in reader.groupby_cbumi() {
total += 1;
match &mapping_mode {
MappingMode::Gene(ecmapper, resolution_mode) => {
match find_consistent(&recordlist, ecmapper) {
MappingResult::SingleGene(_) => {
let nreads: usize = recordlist.iter().map(|x| x.COUNT as usize).sum();
h.add_counts(nreads, 1);
}
MappingResult::Multimapped(_) => multimapped += 1,
MappingResult::Inconsistent => {
match resolution_mode {
InconsistentResolution::IgnoreInconsistent => {inconsistent += 1},
InconsistentResolution::AsDistinct => panic!("not implemented"),
InconsistentResolution::AsSingle => {
let nreads: usize = recordlist.iter().map(|x| x.COUNT as usize).sum();
h.add_counts(nreads, 1);
},
}
},
}
},
MappingMode::EC(mapping_mode) => {
match mapping_mode{
InconsistentResolution::IgnoreInconsistent => {
if recordlist.len() == 1 {
let nreads = recordlist[0].COUNT as usize;
h.add_counts(nreads, 1);
} else {
inconsistent += 1
}
},
InconsistentResolution::AsDistinct => panic!("not implemented"),
InconsistentResolution::AsSingle => {
let nreads: usize = recordlist.iter().map(|x| x.COUNT as usize).sum();
h.add_counts(nreads, 1);
},
}
}
MappingMode::Transcript(ecmapper, resolution_mode) => {
match find_consistent_transcripts(&recordlist, ecmapper) {
MappingResultTranscript::SingleTranscript(_) => {
let nreads: usize = recordlist.iter().map(|x| x.COUNT as usize).sum();
h.add_counts(nreads, 1);
}
MappingResultTranscript::Multimapped(_) => multimapped += 1,
MappingResultTranscript::Inconsistent => {
match resolution_mode {
InconsistentResolution::IgnoreInconsistent => {inconsistent += 1},
InconsistentResolution::AsDistinct => panic!("not implemented"),
InconsistentResolution::AsSingle => {
let nreads: usize = recordlist.iter().map(|x| x.COUNT as usize).sum();
h.add_counts(nreads, 1);
},
}
},
}
},
}
}
println!(
"Total CB-UMI {}, Multimapped {} ({}%), Discarded/Inconsistent {} ({}%)",
total,
multimapped,
100.0 * (multimapped as f32) / (total as f32),
inconsistent,
100.0 * (inconsistent as f32) / (total as f32)
);
h
}
#[cfg(test)]
mod testing {
use crate::butterfly::{make_ecs, CUHistogram};
use bustools_core::{
consistent_genes::{EC, Ec2GeneMapper, Genename, InconsistentResolution, MappingMode}, io::{BusFolder, BusRecord}, set,
};
use statrs::assert_almost_eq;
use std::collections::{HashMap, HashSet};
#[test]
pub fn testing() {
let h: HashMap<usize, usize> = vec![(1, 2), (3, 3)].into_iter().collect();
let c = CUHistogram { histogram: h };
assert_eq!(c.get_nreads(), 11);
assert_eq!(c.get_numis(), 5);
assert_almost_eq!(c.get_fscm(), 2.0 / 5.0, 0.00000000000000001);
}
#[test]
fn test_butterfly() {
let ec0 = set!(Genename("A".to_string()));
let ec1 = set!(Genename("B".to_string()));
let ec2 = set!(Genename("A".to_string()), Genename("B".to_string()));
let ec3 = set!(Genename("C".to_string()), Genename("D".to_string()));
let ec_dict: HashMap<EC, HashSet<Genename>> = HashMap::from([
(EC(0), ec0.clone()),
(EC(1), ec1.clone()),
(EC(2), ec2.clone()),
(EC(3), ec3.clone()),
]);
let es = Ec2GeneMapper::new(ec_dict);
let r1 = BusRecord { CB: 0, UMI: 1, EC: 0, COUNT: 12, FLAG: 0 };
let r2 = BusRecord { CB: 0, UMI: 1, EC: 1, COUNT: 2, FLAG: 0 };
let r3 = BusRecord { CB: 0, UMI: 2, EC: 0, COUNT: 12, FLAG: 0 };
let r4 = BusRecord { CB: 1, UMI: 1, EC: 1, COUNT: 2, FLAG: 0 };
let r5 = BusRecord { CB: 1, UMI: 2, EC: 1, COUNT: 2, FLAG: 0 };
let r6 = BusRecord { CB: 2, UMI: 1, EC: 0, COUNT: 2, FLAG: 0 };
let r7 = BusRecord { CB: 2, UMI: 1, EC: 2, COUNT: 2, FLAG: 0 };
let records = vec![
r1.clone(),
r2.clone(),
r3.clone(),
r4.clone(),
r5.clone(),
r6.clone(),
r7.clone(),
];
let (_busname, _dir) = bustools_core::io::setup_busfile(&records);
let b = BusFolder::new(&_dir.path());
let mapping_mode = MappingMode::Gene(es.clone(), InconsistentResolution::IgnoreInconsistent);
let h = make_ecs(&b.get_busfile(), mapping_mode);
let expected: HashMap<usize, usize> = vec![(12, 1), (2, 2), (4, 1)].into_iter().collect();
assert_eq!(h.histogram, expected);
let mapping_mode = MappingMode::Gene(es, InconsistentResolution::AsSingle);
let h = make_ecs(&b.get_busfile(), mapping_mode);
let expected: HashMap<usize, usize> = vec![(12, 1), (2, 2), (4, 1), (14,1)].into_iter().collect();
assert_eq!(h.histogram, expected);
let mapping_mode = MappingMode::EC(InconsistentResolution::IgnoreInconsistent);
let h = make_ecs(&b.get_busfile(), mapping_mode);
let expected: HashMap<usize, usize> = vec![
(12, 1),
(2, 2),
]
.into_iter()
.collect();
assert_eq!(h.histogram, expected);
}
}