Skip to main content

use_genomic_range/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7/// Error returned by genomic range constructors.
8#[derive(Clone, Copy, Debug, Eq, PartialEq)]
9pub enum GenomicRangeError {
10    /// The end position was before the start position.
11    EndBeforeStart,
12}
13
14impl fmt::Display for GenomicRangeError {
15    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
16        match self {
17            Self::EndBeforeStart => formatter.write_str("genomic range end cannot be before start"),
18        }
19    }
20}
21
22impl Error for GenomicRangeError {}
23
24/// A genomic position value.
25#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
26pub struct GenomicPosition(u64);
27
28impl GenomicPosition {
29    /// Creates a genomic position.
30    #[must_use]
31    pub const fn new(value: u64) -> Self {
32        Self(value)
33    }
34
35    /// Returns the stored position value.
36    #[must_use]
37    pub const fn value(self) -> u64 {
38        self.0
39    }
40}
41
42impl fmt::Display for GenomicPosition {
43    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
44        write!(formatter, "{}", self.0)
45    }
46}
47
48/// Strand orientation vocabulary.
49#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
50pub enum Strand {
51    /// Forward strand.
52    Forward,
53    /// Reverse strand.
54    Reverse,
55    /// Unstranded interval.
56    Unstranded,
57    /// Unknown strand.
58    Unknown,
59}
60
61impl fmt::Display for Strand {
62    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
63        match self {
64            Self::Forward => formatter.write_str("forward"),
65            Self::Reverse => formatter.write_str("reverse"),
66            Self::Unstranded => formatter.write_str("unstranded"),
67            Self::Unknown => formatter.write_str("unknown"),
68        }
69    }
70}
71
72impl FromStr for Strand {
73    type Err = core::convert::Infallible;
74
75    fn from_str(value: &str) -> Result<Self, Self::Err> {
76        let strand = match value.trim().to_ascii_lowercase().as_str() {
77            "+" | "forward" | "plus" => Self::Forward,
78            "-" | "reverse" | "minus" => Self::Reverse,
79            "." | "unstranded" | "none" => Self::Unstranded,
80            _ => Self::Unknown,
81        };
82
83        Ok(strand)
84    }
85}
86
87/// Coordinate-system vocabulary for genomic ranges.
88#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
89pub enum CoordinateSystem {
90    /// Zero-based half-open coordinates: `[start, end)`.
91    ZeroBasedHalfOpen,
92    /// One-based closed coordinates: `[start, end]`.
93    OneBasedClosed,
94    /// Unknown coordinate assumptions.
95    Unknown,
96    /// Domain-specific coordinate system.
97    Custom(String),
98}
99
100impl fmt::Display for CoordinateSystem {
101    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
102        match self {
103            Self::ZeroBasedHalfOpen => formatter.write_str("zero-based-half-open"),
104            Self::OneBasedClosed => formatter.write_str("one-based-closed"),
105            Self::Unknown => formatter.write_str("unknown"),
106            Self::Custom(system) => formatter.write_str(system),
107        }
108    }
109}
110
111impl FromStr for CoordinateSystem {
112    type Err = core::convert::Infallible;
113
114    fn from_str(value: &str) -> Result<Self, Self::Err> {
115        let system = match value.trim().to_ascii_lowercase().as_str() {
116            "zero-based-half-open" | "zero_based_half_open" | "0-based-half-open" => {
117                Self::ZeroBasedHalfOpen
118            },
119            "one-based-closed" | "one_based_closed" | "1-based-closed" => Self::OneBasedClosed,
120            "unknown" | "" => Self::Unknown,
121            _ => Self::Custom(value.to_string()),
122        };
123
124        Ok(system)
125    }
126}
127
128/// A genomic interval with explicit coordinate assumptions.
129#[derive(Clone, Debug, Eq, PartialEq)]
130pub struct GenomicRange {
131    start: GenomicPosition,
132    end: GenomicPosition,
133    strand: Strand,
134    coordinate_system: CoordinateSystem,
135}
136
137impl GenomicRange {
138    /// Creates a genomic range using zero-based half-open coordinates and unstranded orientation.
139    ///
140    /// # Errors
141    ///
142    /// Returns [`GenomicRangeError::EndBeforeStart`] when `end < start`.
143    pub fn new(start: GenomicPosition, end: GenomicPosition) -> Result<Self, GenomicRangeError> {
144        if end < start {
145            Err(GenomicRangeError::EndBeforeStart)
146        } else {
147            Ok(Self {
148                start,
149                end,
150                strand: Strand::Unstranded,
151                coordinate_system: CoordinateSystem::ZeroBasedHalfOpen,
152            })
153        }
154    }
155
156    /// Sets the strand without changing positions.
157    #[must_use]
158    pub const fn with_strand(mut self, strand: Strand) -> Self {
159        self.strand = strand;
160        self
161    }
162
163    /// Sets the coordinate system without converting positions.
164    #[must_use]
165    pub fn with_coordinate_system(mut self, coordinate_system: CoordinateSystem) -> Self {
166        self.coordinate_system = coordinate_system;
167        self
168    }
169
170    /// Returns the start position.
171    #[must_use]
172    pub const fn start(&self) -> GenomicPosition {
173        self.start
174    }
175
176    /// Returns the end position.
177    #[must_use]
178    pub const fn end(&self) -> GenomicPosition {
179        self.end
180    }
181
182    /// Returns the strand.
183    #[must_use]
184    pub const fn strand(&self) -> &Strand {
185        &self.strand
186    }
187
188    /// Returns the coordinate system.
189    #[must_use]
190    pub const fn coordinate_system(&self) -> &CoordinateSystem {
191        &self.coordinate_system
192    }
193
194    /// Returns the interval length according to the stored coordinate system.
195    ///
196    /// No coordinate conversion is performed. For unknown and custom coordinate systems, the
197    /// helper returns `end - start` after constructor validation.
198    #[must_use]
199    pub const fn len(&self) -> u64 {
200        match &self.coordinate_system {
201            CoordinateSystem::OneBasedClosed => self.end.value() - self.start.value() + 1,
202            CoordinateSystem::ZeroBasedHalfOpen
203            | CoordinateSystem::Unknown
204            | CoordinateSystem::Custom(_) => self.end.value() - self.start.value(),
205        }
206    }
207
208    /// Returns true when the coordinate-system-specific length is zero.
209    #[must_use]
210    pub const fn is_empty(&self) -> bool {
211        self.len() == 0
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::{CoordinateSystem, GenomicPosition, GenomicRange, GenomicRangeError, Strand};
218    use core::str::FromStr;
219
220    #[test]
221    fn creates_valid_range() {
222        let range = GenomicRange::new(GenomicPosition::new(2), GenomicPosition::new(8))
223            .expect("valid range");
224
225        assert_eq!(range.start().value(), 2);
226        assert_eq!(range.end().value(), 8);
227    }
228
229    #[test]
230    fn rejects_reversed_range() {
231        assert_eq!(
232            GenomicRange::new(GenomicPosition::new(8), GenomicPosition::new(2)),
233            Err(GenomicRangeError::EndBeforeStart)
234        );
235    }
236
237    #[test]
238    fn strand_displays_and_parses() {
239        assert_eq!(Strand::Forward.to_string(), "forward");
240        assert_eq!(Strand::from_str("-"), Ok(Strand::Reverse));
241    }
242
243    #[test]
244    fn coordinate_system_displays_and_parses() {
245        assert_eq!(
246            CoordinateSystem::ZeroBasedHalfOpen.to_string(),
247            "zero-based-half-open"
248        );
249        assert_eq!(
250            CoordinateSystem::from_str("one-based-closed"),
251            Ok(CoordinateSystem::OneBasedClosed)
252        );
253    }
254
255    #[test]
256    fn length_helper_uses_coordinate_system() {
257        let zero_based = GenomicRange::new(GenomicPosition::new(10), GenomicPosition::new(15))
258            .expect("valid range");
259        let one_based = GenomicRange::new(GenomicPosition::new(10), GenomicPosition::new(15))
260            .expect("valid range")
261            .with_coordinate_system(CoordinateSystem::OneBasedClosed);
262
263        assert_eq!(zero_based.len(), 5);
264        assert_eq!(one_based.len(), 6);
265    }
266}