deepbiop_utils/interval/
genomics.rs1use anyhow::Result;
2use derive_builder::Builder;
3use pyo3::prelude::*;
4
5use super::traits::Overlap;
6
7use bstr::BString;
8use std::str::FromStr;
9
10use pyo3_stub_gen::derive::*;
11
12#[gen_stub_pyclass]
15#[pyclass(module = "deepbiop.utils")]
16#[derive(Debug, Builder, Clone, PartialEq)]
17#[builder(build_fn(validate = "Self::validate"))]
18pub struct GenomicInterval {
19 pub chr: BString,
20 #[pyo3(get, set)]
21 pub start: usize,
22 #[pyo3(get, set)]
23 pub end: usize,
24}
25
26impl FromStr for GenomicInterval {
27 type Err = anyhow::Error;
28
29 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
37 let parts: Vec<&str> = s.split(':').collect();
38
39 if parts.len() != 2 {
40 return Err(anyhow::anyhow!("Invalid format"));
41 }
42
43 let chr = parts[0];
44 let positions: Vec<&str> = parts[1].split('-').collect();
45
46 if positions.len() != 2 {
47 return Err(anyhow::anyhow!("Invalid format"));
48 }
49
50 let start: usize = positions[0].parse()?;
51 let end: usize = positions[1].parse()?;
52
53 Ok(Self {
54 chr: chr.into(),
55 start,
56 end,
57 })
58 }
59}
60
61impl GenomicIntervalBuilder {
62 fn validate(&self) -> Result<(), String> {
63 if self.start > self.end {
64 Err("start must be less than end".to_string())
65 } else {
66 Ok(())
67 }
68 }
69}
70
71impl GenomicInterval {
72 pub fn new(chr: &str, start: usize, end: usize) -> Result<Self> {
73 if start > end {
74 Err(anyhow::anyhow!("start must be less than end"))
75 } else {
76 Ok(Self {
77 chr: chr.into(),
78 start,
79 end,
80 })
81 }
82 }
83}
84
85impl Overlap for GenomicInterval {
86 fn overlap(&self, other: &Self) -> bool {
87 self.chr == other.chr && self.start < other.end && self.end > other.start
88 }
89}
90
91impl PartialOrd for GenomicInterval {
92 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
93 if self.chr == other.chr {
94 if self.start == other.start {
95 self.end.partial_cmp(&other.end)
96 } else {
97 self.start.partial_cmp(&other.start)
98 }
99 } else {
100 self.chr.partial_cmp(&other.chr)
101 }
102 }
103}
104
105#[cfg(test)]
106mod tests {
107 use super::*;
108
109 #[test]
110 fn test_segment() {
111 let segment = GenomicIntervalBuilder::default()
112 .chr("chr1".into())
113 .start(100)
114 .end(200)
115 .build()
116 .unwrap();
117
118 let segment2 = GenomicIntervalBuilder::default()
119 .chr("chr1".into())
120 .start(150)
121 .end(250)
122 .build()
123 .unwrap();
124
125 assert!(segment.overlap(&segment2));
126
127 let segment3 = GenomicIntervalBuilder::default()
128 .chr("chr2".into())
129 .start(350)
130 .end(250)
131 .build();
132 assert!(segment3.is_err());
133
134 let segment11 = GenomicInterval::new("chr1", 100, 200).unwrap();
135 assert!(segment.overlap(&segment11));
136
137 let segment4 = GenomicInterval::new("chr2", 150, 300).unwrap();
138
139 assert!(!segment.overlap(&segment4));
140 assert!(!segment4.overlap(&segment));
141
142 let segment5 = GenomicInterval::new("chr1", 150, 300).unwrap();
143 let segment6 = GenomicInterval::new("chr1", 170, 200).unwrap();
144
145 assert!(segment5.overlap(&segment));
146 assert!(segment.overlap(&segment5));
147
148 assert!(segment5.overlap(&segment6));
149 assert!(segment6.overlap(&segment5));
150 }
151}