use fxhash::FxHashMap;
use num_traits::CheckedSub;
use std::cmp::{Ord, PartialOrd, min, max};
use std::ops::Sub;
use crate::structs::structs::{Coordinates, Interval, Named};
pub fn intersection<T>(
start1: T, end1: T, start2: T, end2: T
) -> Option<T>
where T: Ord + PartialOrd + Sub<Output = T> + CheckedSub<Output = T>{
let min_end: T = min(end1, end2); let max_start: T = max(start1, start2); min_end.checked_sub(&max_start)
}
pub fn merge<T>(inter1: T, inter2: T) -> Option<Interval>
where
T: Coordinates
{
let s1 = *inter1.start().expect("Cannot merge intervals with undefined coordinates");
let e1 = *inter1.end().expect("Cannot merge intervals with undefined coordinates");
let s2 = *inter2.start().expect("Cannot merge intervals with undefined coordinates");
let e2 = *inter2.end().expect("Cannot merge intervals with undefined coordinates");
match intersection(s1, e1, s2, e2) {
None => {return None},
Some(_) => {
let mut merged: Interval = Interval::new();
let merged_start = min(s1, s2);
merged.update_start(merged_start);
let merged_end = max(e1, e2);
merged.update_end(merged_end);
return Some(merged);
}
};
}
pub fn merge_multiple<T>(intervals: &mut Vec<T>) -> Vec<Interval>
where
T: Coordinates
{
let mut out_vec: Vec<Interval> = Vec::new();
if intervals.len() == 0 {return out_vec}
let mut prev_start: u64 = 0;
let mut prev_end: u64 = 0;
for el in intervals {
let curr_start = *el.start().unwrap();
let curr_end = *el.end().unwrap();
match intersection(prev_start, prev_end, curr_start, curr_end) {
Some(_) => {
let _ = out_vec.pop();
prev_start = min(prev_start, curr_start);
prev_end = max(prev_end, curr_end);
let mut merged: Interval = Interval::new();
merged.update_chrom(el.chrom().unwrap().clone());
merged.update_start(prev_start);
merged.update_end(prev_end);
out_vec.push(merged);
},
None => {
prev_start = curr_start;
prev_end = curr_end;
let mut out_interval = Interval::new();
out_interval.update_chrom(el.chrom().unwrap().clone());
out_interval.update_start(prev_start);
out_interval.update_end(prev_end);
out_vec.push(out_interval);
}
};
}
out_vec.sort_by(
|a, b| if a.start().unwrap() == b.start().unwrap() {
a.end().unwrap().cmp(&b.end().unwrap())
} else {
a.start().unwrap().cmp(&b.start().unwrap())
}
);
out_vec
}
pub fn total_span<T>(intervals: &mut Vec<T>) -> Interval
where
T: Coordinates
{
intervals.sort_by(
|a, b| if a.start().unwrap() == b.start().unwrap() {
a.end().unwrap().cmp(&b.end().unwrap())
} else {
a.start().unwrap().cmp(&b.start().unwrap())
}
);
let chrom: String = intervals[0]
.chrom()
.expect("Intervals for total span inference must have a defined")
.clone();
let start: u64 = *intervals[0].start().unwrap();
let end: u64 = *intervals[intervals.len() - 1].end().unwrap();
let name: String = String::from(format!("{}:{}-{}", chrom, start, end));
Interval::from(Some(chrom), Some(start), Some(end), Some(name))
}
pub fn discrete_interval_map<T>(intervals: &mut Vec<T>) -> (Vec<Interval>, FxHashMap<String, Vec<&str>>)
where
T: Coordinates + Named
{
let mut interval_vec: Vec<Interval> = Vec::new();
let mut out_map: FxHashMap<String, Vec<&str>> = FxHashMap::default();
if intervals.len() == 0 {
return (interval_vec, out_map);
}
intervals.sort_by(
|a, b| if a.start().unwrap() == b.start().unwrap() {
a.end().unwrap().cmp(&b.end().unwrap())
} else {
a.start().unwrap().cmp(&b.start().unwrap())
}
);
let mut curr: usize = 0;
let mut next: usize = 1;
let mut start_points: Vec<u64> = Vec::new();
let mut start2trs: FxHashMap<u64, Vec<&str>> = FxHashMap::default();
let chrom: Option<String> = match intervals[0].chrom() {
Some(x) => {Some(x.clone())},
None => {None}
};
let mut curr_interval: u64 = 0;
while curr < intervals.len() {
let first_start: u64 = match intervals[curr].start() {
Some(x) => {*x},
None => {
panic!(
"Cannot discretize intervals with undefined coordinates; found an undefined start coordinate for interval {}", curr
)
}
};
let first_end: u64 = match intervals[curr].end() {
Some(x) => {*x},
None => {
panic!(
"Cannot discretize intervals with undefined coordinates; found an undefined end coordinate for interval {}", curr
)
}
};
if let None = intervals[curr].name() {
panic!("Cannot discretize unnamed intervals");
}
let mut curr_end = first_end;
start_points.push(first_start);
start_points.push(first_end);
start2trs.entry(first_start).or_insert(Vec::new()).push(intervals[curr].name().unwrap());
while next < intervals.len() {
let next_start: u64 = match intervals[next].start() {
Some(x) => {*x},
None => {
panic!(
"Cannot discretize intervals with undefined coordinates; found an undefined start coordinate for interval {}", next
)
}
};
let next_end: u64 = match intervals[next].end() {
Some(x) => {*x},
None => {
panic!(
"Cannot discretize intervals with undefined coordinates; found an undefined end coordinate for interval {}", next
)
}
};
if let None = intervals[next].name() {
panic!("Cannot discretize unnamed intervals");
}
if next_start > curr_end {
break
}
curr_end = max(curr_end, next_end);
if !start_points.contains(&next_start) {start_points.push(next_start)};
if !start_points.contains(&next_end) {start_points.push(next_end)};
for i in &start_points {
if *i < next_start {continue};
if *i >= next_end {continue};
start2trs
.entry(*i)
.and_modify(|x|
if x.contains(&intervals[next].name().unwrap()) {} else {x.push(intervals[next].name().unwrap())}
)
.or_insert(vec![intervals[next].name().unwrap()]);
}
for i in curr..next+1 {
let i_end: u64 = *intervals[i].end().unwrap();
if i_end > next_start {
start2trs
.entry(next_start)
.and_modify(|x|
if x.contains(&intervals[i].name().unwrap()) {} else {x.push(intervals[i].name().unwrap())}
)
.or_insert(vec![intervals[i].name().unwrap()]);
};
if i_end > next_end {
start2trs
.entry(next_end)
.and_modify(|x|
if x.contains(&intervals[i].name().unwrap()) {} else {x.push(intervals[i].name().unwrap())}
)
.or_insert(vec![intervals[i].name().unwrap()]);
};
}
next += 1;
}
start_points.sort();
for i in 1..start_points.len() {
let inter_start: u64 = start_points[i-1];
let inter_end: u64 = start_points[i];
let tr_names: &Vec<&str> = start2trs.get(&inter_start).unwrap_or_else(||
{
println!("{:#?}", start2trs);
println!("{:#?}", start_points);
panic!("No transcripts overlapping this value: {}!", inter_start);
}
);
let interval_name: String = curr_interval.to_string();
out_map.insert(interval_name.clone(), tr_names.clone());
let discrete_interval: Interval = Interval::from(
chrom.clone(), Some(inter_start), Some(inter_end), Some(interval_name)
);
interval_vec.push(discrete_interval);
curr_interval += 1;
}
start_points.clear();
start2trs.clear();
curr = next;
}
(interval_vec, out_map)
}
#[cfg(test)]
mod discretizer_test{
use super::*;
#[test]
fn discretizer_identical(){
let mut input: Vec<Interval> = vec![
Interval::from(Some(String::from("chr1")), Some(100), Some(200), Some(String::from("one"))),
Interval::from(Some(String::from("chr1")), Some(100), Some(200), Some(String::from("two")))
];
let (vec, map) = discrete_interval_map(&mut input);
println!("{:#?}", vec);
println!("{:#?}", map);
}
#[test]
fn discretizer_simple_overlap(){
let mut input: Vec<Interval> = vec![
Interval::from(Some(String::from("chr1")), Some(100), Some(200), Some(String::from("one"))),
Interval::from(Some(String::from("chr1")), Some(150), Some(220), Some(String::from("two")))
];
let (vec, map) = discrete_interval_map(&mut input);
println!("{:#?}", vec);
println!("{:#?}", map);
}
#[test]
fn discretizer_nested_overlap(){
let mut input: Vec<Interval> = vec![
Interval::from(Some(String::from("chr1")), Some(100), Some(200), Some(String::from("one"))),
Interval::from(Some(String::from("chr1")), Some(150), Some(180), Some(String::from("two")))
];
let (vec, map) = discrete_interval_map(&mut input);
println!("{:#?}", vec);
println!("{:#?}", map);
}
#[test]
fn discretizer_shared_start(){
let mut input: Vec<Interval> = vec![
Interval::from(Some(String::from("chr1")), Some(100), Some(200), Some(String::from("one"))),
Interval::from(Some(String::from("chr1")), Some(100), Some(220), Some(String::from("two")))
];
let (vec, map) = discrete_interval_map(&mut input);
println!("{:#?}", vec);
println!("{:#?}", map);
}
#[test]
fn discretizer_three_intervals(){
let mut input: Vec<Interval> = vec![
Interval::from(Some(String::from("chr1")), Some(100), Some(200), Some(String::from("one"))),
Interval::from(Some(String::from("chr1")), Some(100), Some(220), Some(String::from("two"))),
Interval::from(Some(String::from("chr1")), Some(230), Some(250), Some(String::from("three")))
];
let (vec, map) = discrete_interval_map(&mut input);
println!("{:#?}", vec);
println!("{:#?}", map);
}
#[test]
fn real_life_test(){
let mut input: Vec<Interval> = vec![
Interval::from(Some(String::from("chr9")), Some(113042724), Some(113044268), Some(String::from("ENST00000374227.8#ZFP37_1"))),
Interval::from(Some(String::from("chr9")), Some(113049361), Some(113049496), Some(String::from("ENST00000374227.8#ZFP37_2"))),
Interval::from(Some(String::from("chr9")), Some(113049790), Some(113049872), Some(String::from("ENST00000374227.8#ZFP37_3"))),
Interval::from(Some(String::from("chr9")), Some(113056556), Some(113056688), Some(String::from("ENST00000374227.8#ZFP37_4"))),
Interval::from(Some(String::from("chr9")), Some(113042724), Some(113044268), Some(String::from("NM_001282515.2#ZFP37_1"))),
Interval::from(Some(String::from("chr9")), Some(113049361), Some(113049496), Some(String::from("NM_001282515.2#ZFP37_2"))),
Interval::from(Some(String::from("chr9")), Some(113049790), Some(113049917), Some(String::from("NM_001282515.2#ZFP37_3"))),
Interval::from(Some(String::from("chr9")), Some(113056556), Some(113056688), Some(String::from("NM_001282515.2#ZFP37_4"))),
Interval::from(Some(String::from("chr9")), Some(113042724), Some(113044268), Some(String::from("NM_001282518.2#ZFP37_1"))),
Interval::from(Some(String::from("chr9")), Some(113049361), Some(113049496), Some(String::from("NM_001282518.2#ZFP37_2"))),
Interval::from(Some(String::from("chr9")), Some(113049790), Some(113049875), Some(String::from("NM_001282518.2#ZFP37_3"))),
Interval::from(Some(String::from("chr9")), Some(113056556), Some(113056688), Some(String::from("NM_001282518.2#ZFP37_4"))),
];
let (vec, map) = discrete_interval_map(&mut input);
println!("{:#?}", vec);
println!("{:#?}", map);
}
}