Skip to main content

rsomics_intervals/
set.rs

1use std::collections::BTreeMap;
2
3use crate::interval::Interval;
4
5// BTreeMap → lexicographic chrom order, matches bedtools sort default
6#[derive(Debug, Default, Clone)]
7pub struct IntervalSet {
8    by_chrom: BTreeMap<String, Vec<Interval>>,
9    sorted: bool,
10}
11
12impl IntervalSet {
13    #[must_use]
14    pub fn new() -> Self {
15        Self::default()
16    }
17
18    pub fn push(&mut self, interval: Interval) {
19        self.by_chrom
20            .entry(interval.chrom.clone())
21            .or_default()
22            .push(interval);
23        self.sorted = false;
24    }
25
26    pub fn extend<I: IntoIterator<Item = Interval>>(&mut self, items: I) {
27        for iv in items {
28            self.by_chrom.entry(iv.chrom.clone()).or_default().push(iv);
29        }
30        self.sorted = false;
31    }
32
33    #[must_use]
34    pub fn len(&self) -> usize {
35        self.by_chrom.values().map(Vec::len).sum()
36    }
37
38    #[must_use]
39    pub fn is_empty(&self) -> bool {
40        self.len() == 0
41    }
42
43    pub fn sort(&mut self) {
44        if self.sorted {
45            return;
46        }
47        for ivs in self.by_chrom.values_mut() {
48            ivs.sort_by(|a, b| a.start.cmp(&b.start).then(a.end.cmp(&b.end)));
49        }
50        self.sorted = true;
51    }
52
53    #[must_use]
54    pub const fn is_sorted(&self) -> bool {
55        self.sorted
56    }
57
58    pub fn chroms(&self) -> impl Iterator<Item = &str> {
59        self.by_chrom.keys().map(String::as_str)
60    }
61
62    #[must_use]
63    pub fn get(&self, chrom: &str) -> Option<&[Interval]> {
64        self.by_chrom.get(chrom).map(Vec::as_slice)
65    }
66
67    pub fn iter(&self) -> impl Iterator<Item = &Interval> {
68        self.by_chrom.values().flat_map(|v| v.iter())
69    }
70
71    pub fn iter_chroms(&self) -> impl Iterator<Item = (&str, &[Interval])> {
72        self.by_chrom
73            .iter()
74            .map(|(c, v)| (c.as_str(), v.as_slice()))
75    }
76}
77
78impl FromIterator<Interval> for IntervalSet {
79    fn from_iter<I: IntoIterator<Item = Interval>>(items: I) -> Self {
80        let mut s = Self::new();
81        s.extend(items);
82        s
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    fn iv(chrom: &str, start: u64, end: u64) -> Interval {
91        Interval::new(chrom, start, end).unwrap()
92    }
93
94    #[test]
95    fn push_and_len() {
96        let mut s = IntervalSet::new();
97        assert!(s.is_empty());
98        s.push(iv("chr1", 100, 200));
99        s.push(iv("chr2", 300, 400));
100        s.push(iv("chr1", 50, 150));
101        assert_eq!(s.len(), 3);
102        assert_eq!(s.get("chr1").unwrap().len(), 2);
103    }
104
105    #[test]
106    fn sort_orders_within_chrom() {
107        let mut s = IntervalSet::from_iter([
108            iv("chr1", 300, 400),
109            iv("chr1", 100, 200),
110            iv("chr1", 200, 300),
111        ]);
112        assert!(!s.is_sorted());
113        s.sort();
114        let v = s.get("chr1").unwrap();
115        assert_eq!((v[0].start, v[1].start, v[2].start), (100, 200, 300));
116        assert!(s.is_sorted());
117    }
118
119    #[test]
120    fn chroms_iter_is_lexicographic() {
121        let s = IntervalSet::from_iter([iv("chr10", 0, 1), iv("chr1", 0, 1), iv("chr2", 0, 1)]);
122        let chroms: Vec<_> = s.chroms().collect();
123        assert_eq!(chroms, vec!["chr1", "chr10", "chr2"]);
124    }
125}