1use crate::{
2 parser::{ChainRecords, Strand},
3 utils::get_data_from_input,
4};
5use log::{error, warn};
6use rust_lapper::{Interval, Lapper};
7use std::{
8 cmp::{max, min},
9 collections::HashMap,
10 fmt,
11};
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct Block {
15 pub name: String,
16 pub start: usize,
17 pub end: usize,
18 pub strand: Strand,
19}
20
21impl Eq for Block {}
22
23#[derive(Debug)]
24pub struct Region<'a> {
25 pub chrom: &'a String,
26 pub start: usize,
27 pub end: usize,
28 pub strand: Strand,
29}
30
31impl fmt::Display for Region<'_> {
32 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
33 write!(
34 f,
35 "{}\t{}\t{}\t{}",
36 self.chrom, self.start, self.end, self.strand,
37 )
38 }
39}
40
41pub type BlockIvl = Interval<usize, Block>;
42
43pub fn get_block_ivl(block_target: Block, block_query: Block) -> BlockIvl {
44 BlockIvl {
45 start: block_target.start,
46 stop: block_target.end,
47 val: block_query,
48 }
49}
50
51pub fn get_lapper_hashmap(input: &Option<String>) -> HashMap<String, Lapper<usize, Block>> {
52 let data = get_data_from_input(input);
53 let chain_record_iter = ChainRecords(&data);
54 let mut chrom_ivls_hashmap: HashMap<String, Lapper<usize, Block>> = HashMap::new();
55 let mut chrom_ivls_vec_hashmap: HashMap<String, Vec<Interval<usize, Block>>> = HashMap::new();
56 for chain_record in chain_record_iter {
57 let chain_record = chain_record.unwrap();
58 let target_chrom = chain_record.header.target.name;
59 let block_ivls = chain_record.block_ivls;
60 if chrom_ivls_vec_hashmap.contains_key(&target_chrom) {
62 let chrom_ivls_vec = chrom_ivls_vec_hashmap.get_mut(&target_chrom).unwrap();
63 chrom_ivls_vec.extend(block_ivls);
64 } else {
65 chrom_ivls_vec_hashmap.insert(target_chrom.clone(), block_ivls);
66 }
67 }
68 for (chrom, ivls) in chrom_ivls_vec_hashmap {
69 let lapper = Lapper::new(ivls);
70 chrom_ivls_hashmap.insert(chrom, lapper);
71 }
72 chrom_ivls_hashmap
73}
74
75fn intersect_two_region(
76 start1: usize,
77 end1: usize,
78 start2: usize,
79 end2: usize,
80) -> Option<(usize, usize)> {
81 if start1 > end2 || start2 > end1 {
82 return None;
83 }
84 let final_start = max(start1, start2);
85 let final_end = min(end1, end2);
86 Some((final_start, final_end))
87}
88
89pub fn find_in_lapper<'a>(
90 lapper_hashmap: &'a HashMap<String, Lapper<usize, Block>>,
91 q_region: &Region<'a>,
92) -> Vec<Region<'a>> {
93 let lapper = match lapper_hashmap.get(q_region.chrom) {
94 Some(lapper) => lapper,
95 None => {
96 warn!("chrom:{} not found in chain file", q_region.chrom);
97 return Vec::new();
98 }
99 };
100 let targets = lapper
102 .find(q_region.start, q_region.end)
103 .collect::<Vec<&BlockIvl>>();
104 let mut matches: Vec<Region> = Vec::new();
106 for target in targets {
107 let target_region = Region {
108 chrom: &target.val.name,
109 start: target.val.start,
110 end: target.val.start,
111 strand: target.val.strand,
112 };
113 let (real_start, real_end) =
114 match intersect_two_region(q_region.start, q_region.end, target.start, target.stop) {
115 Some((start, end)) => (start, end),
116 None => {
117 error!(
118 "intersect_two_region error in {}:{}{}",
119 q_region.chrom, q_region.start, q_region.end
120 );
121 continue;
122 }
123 };
124 let l_offset = real_start.abs_diff(target.start);
125 let size = real_end.abs_diff(real_start);
126 matches.push(Region {
127 chrom: q_region.chrom,
128 start: real_start,
129 end: real_end,
130 strand: q_region.strand,
131 });
132 let i_start = match target_region.strand {
133 Strand::Positive => target_region.start + l_offset,
134 Strand::Negative => target_region.end - l_offset - size,
135 };
136 let apdx_strand = match q_region.strand {
137 Strand::Positive => target_region.strand,
138 Strand::Negative => target_region.strand.reverse(),
139 };
140 matches.push(Region {
141 chrom: target_region.chrom,
142 start: i_start,
143 end: i_start + size,
144 strand: apdx_strand,
145 });
146 }
147 matches
148}