Skip to main content

rsomics_intervals/
interval.rs

1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
4#[non_exhaustive]
5pub enum Strand {
6    Forward,
7    Reverse,
8}
9
10impl Strand {
11    #[must_use]
12    pub const fn as_byte(self) -> u8 {
13        match self {
14            Self::Forward => b'+',
15            Self::Reverse => b'-',
16        }
17    }
18
19    #[must_use]
20    pub const fn from_byte(b: u8) -> Option<Self> {
21        match b {
22            b'+' => Some(Self::Forward),
23            b'-' => Some(Self::Reverse),
24            _ => None,
25        }
26    }
27}
28
29// 0-based half-open [start, end); kept tiny — hot-path iterates millions, per-record extras go on a wrapper
30#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
31pub struct Interval {
32    pub chrom: String,
33    pub start: u64,
34    pub end: u64,
35    #[serde(skip_serializing_if = "Option::is_none", default)]
36    pub strand: Option<Strand>,
37}
38
39impl Interval {
40    #[allow(clippy::missing_errors_doc)]
41    pub fn new(chrom: impl Into<String>, start: u64, end: u64) -> Result<Self, IntervalError> {
42        if start >= end {
43            return Err(IntervalError::Empty { start, end });
44        }
45        Ok(Self {
46            chrom: chrom.into(),
47            start,
48            end,
49            strand: None,
50        })
51    }
52
53    #[allow(clippy::missing_errors_doc)]
54    pub fn with_strand(
55        chrom: impl Into<String>,
56        start: u64,
57        end: u64,
58        strand: Strand,
59    ) -> Result<Self, IntervalError> {
60        let mut iv = Self::new(chrom, start, end)?;
61        iv.strand = Some(strand);
62        Ok(iv)
63    }
64
65    #[must_use]
66    pub fn len(&self) -> u64 {
67        self.end - self.start
68    }
69
70    // always false — Interval::new rejects empty intervals at construction
71    #[must_use]
72    pub const fn is_empty(&self) -> bool {
73        false
74    }
75
76    #[must_use]
77    pub fn overlaps(&self, other: &Self) -> bool {
78        self.chrom == other.chrom && self.start < other.end && other.start < self.end
79    }
80
81    #[must_use]
82    pub fn contains(&self, other: &Self) -> bool {
83        self.chrom == other.chrom && self.start <= other.start && other.end <= self.end
84    }
85
86    #[must_use]
87    pub fn overlap_bases(&self, other: &Self) -> u64 {
88        if self.chrom != other.chrom {
89            return 0;
90        }
91        let lo = self.start.max(other.start);
92        let hi = self.end.min(other.end);
93        hi.saturating_sub(lo)
94    }
95}
96
97#[derive(Debug, thiserror::Error)]
98#[non_exhaustive]
99pub enum IntervalError {
100    #[error("empty or inverted interval: start={start} >= end={end}")]
101    Empty { start: u64, end: u64 },
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    fn iv(chrom: &str, start: u64, end: u64) -> Interval {
109        Interval::new(chrom, start, end).unwrap()
110    }
111
112    #[test]
113    fn empty_interval_rejected() {
114        assert!(matches!(
115            Interval::new("chr1", 100, 100),
116            Err(IntervalError::Empty { .. })
117        ));
118        assert!(matches!(
119            Interval::new("chr1", 200, 100),
120            Err(IntervalError::Empty { .. })
121        ));
122    }
123
124    #[test]
125    fn length_is_end_minus_start() {
126        assert_eq!(iv("chr1", 100, 150).len(), 50);
127    }
128
129    #[test]
130    fn overlaps_same_chrom() {
131        let a = iv("chr1", 100, 200);
132        assert!(a.overlaps(&iv("chr1", 150, 250)));
133        assert!(a.overlaps(&iv("chr1", 50, 150)));
134        assert!(a.overlaps(&iv("chr1", 100, 200)));
135        assert!(!a.overlaps(&iv("chr1", 200, 300)), "half-open touching");
136        assert!(!a.overlaps(&iv("chr1", 0, 100)), "half-open touching low");
137        assert!(!a.overlaps(&iv("chr1", 250, 300)));
138    }
139
140    #[test]
141    fn overlaps_different_chrom_is_false() {
142        assert!(!iv("chr1", 100, 200).overlaps(&iv("chr2", 100, 200)));
143    }
144
145    #[test]
146    fn contains_is_inclusive_at_edges() {
147        let outer = iv("chr1", 100, 200);
148        assert!(outer.contains(&iv("chr1", 100, 200)));
149        assert!(outer.contains(&iv("chr1", 120, 180)));
150        assert!(!outer.contains(&iv("chr1", 100, 201)));
151        assert!(!outer.contains(&iv("chr1", 99, 150)));
152    }
153
154    #[test]
155    fn overlap_bases_counts_intersection_length() {
156        assert_eq!(
157            iv("chr1", 100, 200).overlap_bases(&iv("chr1", 150, 250)),
158            50
159        );
160        assert_eq!(iv("chr1", 100, 200).overlap_bases(&iv("chr1", 200, 300)), 0);
161        assert_eq!(iv("chr1", 100, 200).overlap_bases(&iv("chr2", 100, 200)), 0);
162        assert_eq!(
163            iv("chr1", 100, 200).overlap_bases(&iv("chr1", 120, 180)),
164            60
165        );
166    }
167
168    #[test]
169    fn strand_round_trips() {
170        assert_eq!(Strand::Forward.as_byte(), b'+');
171        assert_eq!(Strand::from_byte(b'+'), Some(Strand::Forward));
172        assert_eq!(Strand::from_byte(b'.'), None);
173    }
174}