deepbiop_utils/interval/
genomics.rs

1use anyhow::Result;
2use derive_builder::Builder;
3use pyo3::prelude::*;
4
5use super::traits::Overlap;
6
7use bstr::BString;
8use std::str::FromStr;
9
10use pyo3_stub_gen::derive::*;
11
12/// A segment is a genomic interval defined by a chromosome, a start position and an end position.
13/// The start position is inclusive and the end position is exclusive.
14#[gen_stub_pyclass]
15#[pyclass(module = "deepbiop.utils")]
16#[derive(Debug, Builder, Clone, PartialEq)]
17#[builder(build_fn(validate = "Self::validate"))]
18pub struct GenomicInterval {
19    pub chr: BString,
20    #[pyo3(get, set)]
21    pub start: usize,
22    #[pyo3(get, set)]
23    pub end: usize,
24}
25
26impl FromStr for GenomicInterval {
27    type Err = anyhow::Error;
28
29    /// Parse a string into a GenomicInterval. The string should be formatted as
30    /// # Example
31    /// ```
32    /// use deepbiop_utils::interval::GenomicInterval;
33    /// let  value =  "chr1:100-200";
34    /// let interval: GenomicInterval = value.parse().unwrap();
35    /// ```
36    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
37        let parts: Vec<&str> = s.split(':').collect();
38
39        if parts.len() != 2 {
40            return Err(anyhow::anyhow!("Invalid format"));
41        }
42
43        let chr = parts[0];
44        let positions: Vec<&str> = parts[1].split('-').collect();
45
46        if positions.len() != 2 {
47            return Err(anyhow::anyhow!("Invalid format"));
48        }
49
50        let start: usize = positions[0].parse()?;
51        let end: usize = positions[1].parse()?;
52
53        Ok(Self {
54            chr: chr.into(),
55            start,
56            end,
57        })
58    }
59}
60
61impl GenomicIntervalBuilder {
62    fn validate(&self) -> Result<(), String> {
63        if self.start > self.end {
64            Err("start must be less than end".to_string())
65        } else {
66            Ok(())
67        }
68    }
69}
70
71impl GenomicInterval {
72    pub fn new(chr: &str, start: usize, end: usize) -> Result<Self> {
73        if start > end {
74            Err(anyhow::anyhow!("start must be less than end"))
75        } else {
76            Ok(Self {
77                chr: chr.into(),
78                start,
79                end,
80            })
81        }
82    }
83}
84
85impl Overlap for GenomicInterval {
86    fn overlap(&self, other: &Self) -> bool {
87        self.chr == other.chr && self.start < other.end && self.end > other.start
88    }
89}
90
91impl PartialOrd for GenomicInterval {
92    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
93        if self.chr == other.chr {
94            if self.start == other.start {
95                self.end.partial_cmp(&other.end)
96            } else {
97                self.start.partial_cmp(&other.start)
98            }
99        } else {
100            self.chr.partial_cmp(&other.chr)
101        }
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn test_segment() {
111        let segment = GenomicIntervalBuilder::default()
112            .chr("chr1".into())
113            .start(100)
114            .end(200)
115            .build()
116            .unwrap();
117
118        let segment2 = GenomicIntervalBuilder::default()
119            .chr("chr1".into())
120            .start(150)
121            .end(250)
122            .build()
123            .unwrap();
124
125        assert!(segment.overlap(&segment2));
126
127        let segment3 = GenomicIntervalBuilder::default()
128            .chr("chr2".into())
129            .start(350)
130            .end(250)
131            .build();
132        assert!(segment3.is_err());
133
134        let segment11 = GenomicInterval::new("chr1", 100, 200).unwrap();
135        assert!(segment.overlap(&segment11));
136
137        let segment4 = GenomicInterval::new("chr2", 150, 300).unwrap();
138
139        assert!(!segment.overlap(&segment4));
140        assert!(!segment4.overlap(&segment));
141
142        let segment5 = GenomicInterval::new("chr1", 150, 300).unwrap();
143        let segment6 = GenomicInterval::new("chr1", 170, 200).unwrap();
144
145        assert!(segment5.overlap(&segment));
146        assert!(segment.overlap(&segment5));
147
148        assert!(segment5.overlap(&segment6));
149        assert!(segment6.overlap(&segment5));
150    }
151}