use std::collections::HashMap;
use std::fs::File;
use std::io::{self, Read};
use std::path::PathBuf;
use rayon::prelude::*;
pub mod record;
pub use record::*;
pub fn get_lengths(exons: HashMap<String, Vec<(u32, u32)>>) -> Option<Vec<(String, u32)>> {
let lengths = exons
.into_par_iter()
.map(|(gene, exons)| {
let (min_start, max_end) = exons.iter().fold((u32::MAX, 0), |acc, &(start, end)| {
(acc.0.min(start), acc.1.max(end))
});
let mut bp = vec![0; (max_end - min_start + 1) as usize];
for &(start, end) in exons.iter() {
for i in (start - min_start)..(end - min_start + 1) {
bp[i as usize] = 1;
}
}
let total_bp: u32 = bp.iter().sum();
(gene, total_bp)
})
.collect::<Vec<(String, u32)>>();
Some(lengths)
}
pub fn noel_reader(input: &PathBuf) -> Option<HashMap<String, Vec<(u32, u32)>>> {
let contents = reader(input).unwrap();
let records = parallel_parse(&contents).unwrap();
let exons: HashMap<String, Vec<(u32, u32)>> = records
.into_par_iter()
.filter(|line| line.feature() == "exon") .fold(
|| HashMap::new(),
|mut map, line| {
map.entry(line.gene_id)
.or_insert_with(Vec::new)
.push((line.start, line.end));
map
},
)
.reduce(
|| HashMap::new(),
|mut exons, thread_map| {
for (key, value) in thread_map {
exons.entry(key).or_insert_with(Vec::new).extend(value);
}
exons
},
);
Some(exons)
}
fn reader(file: &PathBuf) -> io::Result<String> {
let mut file = File::open(file)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(contents)
}
fn parallel_parse<'a>(s: &'a str) -> Result<Vec<Record>, NoelError> {
let records: Result<Vec<Record>, NoelError> = s
.par_lines()
.map(|line| match Record::new(line) {
Ok(record) => Ok(record),
Err(_) => Ok(Record {
feat: "".to_string(),
start: 0,
end: 0,
gene_id: "".to_string(),
}),
})
.collect();
return records;
}