crussmap/
interval.rs

1use crate::{
2    parser::{ChainRecords, Strand},
3    utils::get_data_from_input,
4};
5use log::{error, warn};
6use rust_lapper::{Interval, Lapper};
7use std::{
8    cmp::{max, min},
9    collections::HashMap,
10    fmt,
11};
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct Block {
15    pub name: String,
16    pub start: usize,
17    pub end: usize,
18    pub strand: Strand,
19}
20
21impl Eq for Block {}
22
23#[derive(Debug)]
24pub struct Region<'a> {
25    pub chrom: &'a String,
26    pub start: usize,
27    pub end: usize,
28    pub strand: Strand,
29}
30
31impl fmt::Display for Region<'_> {
32    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
33        write!(
34            f,
35            "{}\t{}\t{}\t{}",
36            self.chrom, self.start, self.end, self.strand,
37        )
38    }
39}
40
41pub type BlockIvl = Interval<usize, Block>;
42
43pub fn get_block_ivl(block_target: Block, block_query: Block) -> BlockIvl {
44    BlockIvl {
45        start: block_target.start,
46        stop: block_target.end,
47        val: block_query,
48    }
49}
50
51pub fn get_lapper_hashmap(input: &Option<String>) -> HashMap<String, Lapper<usize, Block>> {
52    let data = get_data_from_input(input);
53    let chain_record_iter = ChainRecords(&data);
54    let mut chrom_ivls_hashmap: HashMap<String, Lapper<usize, Block>> = HashMap::new();
55    let mut chrom_ivls_vec_hashmap: HashMap<String, Vec<Interval<usize, Block>>> = HashMap::new();
56    for chain_record in chain_record_iter {
57        let chain_record = chain_record.unwrap();
58        let target_chrom = chain_record.header.target.name;
59        let block_ivls = chain_record.block_ivls;
60        // combine interval vecs when target_chroms are same:
61        if chrom_ivls_vec_hashmap.contains_key(&target_chrom) {
62            let chrom_ivls_vec = chrom_ivls_vec_hashmap.get_mut(&target_chrom).unwrap();
63            chrom_ivls_vec.extend(block_ivls);
64        } else {
65            chrom_ivls_vec_hashmap.insert(target_chrom.clone(), block_ivls);
66        }
67    }
68    for (chrom, ivls) in chrom_ivls_vec_hashmap {
69        let lapper = Lapper::new(ivls);
70        chrom_ivls_hashmap.insert(chrom, lapper);
71    }
72    chrom_ivls_hashmap
73}
74
75fn intersect_two_region(
76    start1: usize,
77    end1: usize,
78    start2: usize,
79    end2: usize,
80) -> Option<(usize, usize)> {
81    if start1 > end2 || start2 > end1 {
82        return None;
83    }
84    let final_start = max(start1, start2);
85    let final_end = min(end1, end2);
86    Some((final_start, final_end))
87}
88
89pub fn find_in_lapper<'a>(
90    lapper_hashmap: &'a HashMap<String, Lapper<usize, Block>>,
91    q_region: &Region<'a>,
92) -> Vec<Region<'a>> {
93    let lapper = match lapper_hashmap.get(q_region.chrom) {
94        Some(lapper) => lapper,
95        None => {
96            warn!("chrom:{} not found in chain file", q_region.chrom);
97            return Vec::new();
98        }
99    };
100    // info!("get chrom: {} lapper: {:?}", q_chrom, lapper);
101    let targets = lapper
102        .find(q_region.start, q_region.end)
103        .collect::<Vec<&BlockIvl>>();
104    // info!("get targets: {:?}", targets);
105    let mut matches: Vec<Region> = Vec::new();
106    for target in targets {
107        let target_region = Region {
108            chrom: &target.val.name,
109            start: target.val.start,
110            end: target.val.start,
111            strand: target.val.strand,
112        };
113        let (real_start, real_end) =
114            match intersect_two_region(q_region.start, q_region.end, target.start, target.stop) {
115                Some((start, end)) => (start, end),
116                None => {
117                    error!(
118                        "intersect_two_region error in {}:{}{}",
119                        q_region.chrom, q_region.start, q_region.end
120                    );
121                    continue;
122                }
123            };
124        let l_offset = real_start.abs_diff(target.start);
125        let size = real_end.abs_diff(real_start);
126        matches.push(Region {
127            chrom: q_region.chrom,
128            start: real_start,
129            end: real_end,
130            strand: q_region.strand,
131        });
132        let i_start = match target_region.strand {
133            Strand::Positive => target_region.start + l_offset,
134            Strand::Negative => target_region.end - l_offset - size,
135        };
136        let apdx_strand = match q_region.strand {
137            Strand::Positive => target_region.strand,
138            Strand::Negative => target_region.strand.reverse(),
139        };
140        matches.push(Region {
141            chrom: target_region.chrom,
142            start: i_start,
143            end: i_start + size,
144            strand: apdx_strand,
145        });
146    }
147    matches
148}