1use std::ops::Index;
2use std::collections::{HashMap, VecDeque};
3
4use super::{GenomicRange, BEDLike};
5use crate::intervaltree::{Interval, IterFind, IterLapper, Lapper};
6
7#[derive(Debug, Clone)]
11pub struct GIntervalMap<D>(HashMap<String, Lapper<u64, D>>);
12
13impl<D> GIntervalMap<D> {
14 pub fn new() -> Self {
15 Self(HashMap::new())
16 }
17
18 pub fn len(&self) -> usize {
20 self.0.values().map(|x| x.len()).sum()
21 }
22
23 pub fn iter(&self) -> Iter<'_, D> {
24 Iter(self.0.iter().map(|(k, v)| (k, v.iter())).collect::<VecDeque<_>>())
25 }
26
27 pub fn insert<B: BEDLike>(&mut self, bed: &B, value: D) {
29 let chr = bed.chrom();
30 let interval = Interval { start: bed.start(), stop: bed.end(), val: value };
31 let tree = self.0.entry(chr.to_string()).or_insert(Lapper::new(Vec::new()));
32 tree.insert(interval);
33 }
34
35 pub fn find<B: BEDLike>(&self, bed: &B) -> GIntervalQueryIter<'_, D> {
37 let chr = bed.chrom().to_string();
38 match self.0.get(&chr) {
39 None => GIntervalQueryIter { chrom: chr, query_iter: None },
40 Some(tree) => GIntervalQueryIter { chrom: chr, query_iter: Some(tree.find(bed.start(), bed.end())) }
41 }
42 }
43
44 pub fn is_overlapped<B: BEDLike>(&self, bed: &B) -> bool {
46 self.find(bed).next().is_some()
47 }
48}
49
50impl<B: BEDLike, D> FromIterator<(B, D)> for GIntervalMap<D> {
51 fn from_iter<I: IntoIterator<Item = (B, D)>>(iter: I) -> Self {
52 let mut hmap: HashMap<String, Vec<_>> = HashMap::new();
53 for (bed, data) in iter {
54 let chr = bed.chrom();
55 let interval = Interval { start: bed.start(), stop: bed.end(), val: data};
56 let vec = hmap.entry(chr.to_string()).or_insert(Vec::new());
57 vec.push(interval);
58 }
59 let hm = hmap.into_iter().map(|(chr, vec)| (chr, Lapper::new(vec))).collect();
60 Self(hm)
61 }
62}
63
64#[derive(Debug)]
67pub struct GIntervalQueryIter<'a, D> {
68 chrom: String,
69 query_iter: Option<IterFind<'a, u64, D>>,
70}
71
72impl<'a, D: 'a> Iterator for GIntervalQueryIter<'a, D> {
73 type Item = (GenomicRange, &'a D);
74
75 fn next(&mut self) -> Option<Self::Item> {
76 match self.query_iter {
77 None => return None,
78 Some(ref mut iter) => match iter.next() {
79 None => return None,
80 Some(interval) => {
81 let bed = GenomicRange::new(&self.chrom, interval.start, interval.stop);
82 Some((bed, &interval.val))
83 }
84 }
85 }
86 }
87}
88
89pub struct Iter<'a, D>(VecDeque<(&'a String, IterLapper<'a, u64, D>)>);
90
91impl<'a, D> Iterator for Iter<'a, D> {
92 type Item = (GenomicRange, &'a D);
93
94 fn next(&mut self) -> Option<Self::Item> {
95 if self.0.is_empty() {
96 return None
97 }
98 match self.0[0].1.next() {
99 None => {
100 self.0.pop_front();
101 self.next()
102 }
103 Some(interval) => {
104 let bed = GenomicRange::new(self.0[0].0, interval.start, interval.stop);
105 Some((bed, &interval.val))
106 }
107 }
108 }
109}
110
111#[derive(Debug, Clone)]
113pub struct GIntervalIndexMap<D> {
114 data: Vec<D>,
115 indices: GIntervalMap<usize>,
116}
117
118impl<D> GIntervalIndexMap<D> {
119 pub fn new() -> Self {
120 Self { data: Vec::new(), indices: GIntervalMap::new() }
121 }
122
123 pub fn len(&self) -> usize {
124 self.data.len()
125 }
126
127 pub fn find<B: BEDLike>(&self, bed: &B) -> impl Iterator<Item = (GenomicRange, &D)> {
129 self.indices.find(bed).map(|(g, i)| (g, &self.data[*i]))
130 }
131
132 pub fn find_index_of<B: BEDLike>(&self, bed: &B) -> GIntervalQueryIter<'_, usize> {
134 self.indices.find(bed)
135 }
136
137 pub fn get(&self, index: usize) -> Option<&D> {
138 self.data.get(index)
139 }
140}
141
142impl<B: BEDLike, D> FromIterator<(B, D)> for GIntervalIndexMap<D> {
143 fn from_iter<I: IntoIterator<Item = (B, D)>>(iter: I) -> Self {
144 let mut data = Vec::new();
145 let indices = iter.into_iter().enumerate().map(|(i, (bed, val))| {
146 data.push(val);
147 (bed, i)
148 }).collect();
149 Self { data, indices }
150 }
151}
152
153
154#[derive(Debug, Clone)]
156pub struct GIntervalIndexSet {
157 data: Vec<GenomicRange>,
158 indices: GIntervalMap<usize>,
159}
160
161impl Index<usize> for GIntervalIndexSet {
162 type Output = GenomicRange;
163
164 fn index(&self, index: usize) -> &Self::Output {
165 &self.data[index]
166 }
167}
168
169impl GIntervalIndexSet {
170 pub fn len(&self) -> usize {
171 self.data.len()
172 }
173
174 pub fn iter(&self) -> impl Iterator<Item = &GenomicRange> {
175 self.data.iter()
176 }
177
178 pub fn is_overlapped<B: BEDLike>(&self, bed: &B) -> bool {
179 self.find_full(bed).next().is_some()
180 }
181
182 pub fn find<B: BEDLike>(&self, bed: &B) -> impl Iterator<Item = GenomicRange> + '_ {
184 self.indices.find(bed).map(|x| x.0)
185 }
186
187 pub fn find_index_of<B: BEDLike>(&self, bed: &B) -> impl Iterator<Item = usize> + '_ {
189 self.indices.find(bed).map(|x| *x.1)
190 }
191
192 pub fn find_full<B: BEDLike>(&self, bed: &B) -> GIntervalQueryIter<'_, usize> {
193 self.indices.find(bed)
194 }
195
196 pub fn get(&self, index: usize) -> Option<&GenomicRange> {
197 self.data.get(index)
198 }
199}
200
201impl IntoIterator for GIntervalIndexSet {
202 type IntoIter = std::vec::IntoIter<Self::Item>;
203 type Item = GenomicRange;
204
205 fn into_iter(self) -> Self::IntoIter {
206 self.data.into_iter()
207 }
208}
209
210impl<B: BEDLike> FromIterator<B> for GIntervalIndexSet {
211 fn from_iter<I: IntoIterator<Item = B>>(iter: I) -> Self {
212 let mut data = Vec::new();
213 let indices = iter.into_iter().enumerate().map(|(i, bed)| {
214 data.push(bed.to_genomic_range());
215 (bed, i)
216 }).collect();
217 Self { data, indices }
218 }
219}
220
221#[cfg(test)]
222mod bed_intersect_tests {
223 use super::*;
224 use itertools::Itertools;
225
226 #[test]
227 fn test_create() {
228 let beds: Vec<GenomicRange> = [
229 "chr1:200-500",
230 "chr1:200-500",
231 "chr1:1000-2000",
232 "chr1:1000-2000",
233 ].into_iter().map(|x| x.parse().unwrap()).collect();
234 let values = vec![2, 3, 5, 5];
235 let mut map: GIntervalMap<_> = beds.into_iter().zip(values.into_iter()).collect();
236 map.insert(&GenomicRange::new("chr1", 1000, 2000), 5);
237 map.insert(&GenomicRange::new("chr1", 1000, 2000), 6);
238
239 let result: Vec<_> = map.iter().sorted().collect();
240 let expected = vec![
241 (GenomicRange::new("chr1", 200, 500), &2),
242 (GenomicRange::new("chr1", 200, 500), &3),
243 (GenomicRange::new("chr1", 1000, 2000), &5),
244 (GenomicRange::new("chr1", 1000, 2000), &5),
245 (GenomicRange::new("chr1", 1000, 2000), &5),
246 (GenomicRange::new("chr1", 1000, 2000), &6),
247 ];
248 assert_eq!(result, expected);
249 }
250
251 #[test]
252 fn test_intersect() {
253 let bed_set1: Vec<GenomicRange> = [
254 "chr1:200-500",
255 "chr1:200-500",
256 "chr1:1000-2000",
257 ].into_iter().map(|x| x.parse().unwrap()).collect();
258 let bed_set2: Vec<GenomicRange> = [
259 "chr1:100-210",
260 "chr1:100-200",
261 ].into_iter().map(|x| x.parse().unwrap()).collect();
262 let tree: GIntervalIndexSet = bed_set1.clone().into_iter().collect();
263
264 assert_eq!(
265 tree.find(&bed_set2[0]).collect::<Vec<_>>(),
266 vec![bed_set1.as_slice()[0].clone(), bed_set1.as_slice()[0].clone()]
267 );
268
269 let result: Vec<GenomicRange> = bed_set2.into_iter().filter(|x| tree.is_overlapped(x)).collect();
270 let expected = vec!["chr1:100-210".parse().unwrap()];
271 assert_eq!(result, expected);
272
273 }
274}