Skip to main content

use_motif/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6use use_genomic_range::GenomicRange;
7
8fn non_empty_text(value: impl AsRef<str>) -> Result<String, MotifValueError> {
9    let trimmed = value.as_ref().trim();
10
11    if trimmed.is_empty() {
12        Err(MotifValueError::Empty)
13    } else {
14        Ok(trimmed.to_string())
15    }
16}
17
18/// Error returned by motif vocabulary constructors.
19#[derive(Clone, Copy, Debug, Eq, PartialEq)]
20pub enum MotifValueError {
21    /// The supplied value was empty after trimming surrounding whitespace.
22    Empty,
23    /// The supplied sequence range ended before it started.
24    SequenceRangeEndBeforeStart,
25}
26
27impl fmt::Display for MotifValueError {
28    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
29        match self {
30            Self::Empty => formatter.write_str("motif value cannot be empty"),
31            Self::SequenceRangeEndBeforeStart => {
32                formatter.write_str("motif hit sequence range end cannot be before start")
33            },
34        }
35    }
36}
37
38impl Error for MotifValueError {}
39
40/// A non-empty motif name.
41#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
42pub struct MotifName(String);
43
44impl MotifName {
45    /// Creates a motif name from non-empty text.
46    ///
47    /// # Errors
48    ///
49    /// Returns [`MotifValueError::Empty`] when the trimmed name is empty.
50    pub fn new(value: impl AsRef<str>) -> Result<Self, MotifValueError> {
51        non_empty_text(value).map(Self)
52    }
53
54    /// Returns the motif name text.
55    #[must_use]
56    pub fn as_str(&self) -> &str {
57        &self.0
58    }
59}
60
61impl fmt::Display for MotifName {
62    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
63        formatter.write_str(self.as_str())
64    }
65}
66
67impl FromStr for MotifName {
68    type Err = MotifValueError;
69
70    fn from_str(value: &str) -> Result<Self, Self::Err> {
71        Self::new(value)
72    }
73}
74
75/// A non-empty plain motif pattern.
76#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
77pub struct MotifPattern(String);
78
79impl MotifPattern {
80    /// Creates a motif pattern from non-empty plain text.
81    ///
82    /// The pattern is stored descriptively. It is not treated as a regex or search expression.
83    ///
84    /// # Errors
85    ///
86    /// Returns [`MotifValueError::Empty`] when the trimmed pattern is empty.
87    pub fn new(value: impl AsRef<str>) -> Result<Self, MotifValueError> {
88        non_empty_text(value).map(Self)
89    }
90
91    /// Returns the motif pattern text.
92    #[must_use]
93    pub fn as_str(&self) -> &str {
94        &self.0
95    }
96}
97
98impl fmt::Display for MotifPattern {
99    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
100        formatter.write_str(self.as_str())
101    }
102}
103
104impl FromStr for MotifPattern {
105    type Err = MotifValueError;
106
107    fn from_str(value: &str) -> Result<Self, Self::Err> {
108        Self::new(value)
109    }
110}
111
112/// A descriptive motif kind.
113#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
114pub enum MotifKind {
115    /// DNA motif.
116    Dna,
117    /// RNA motif.
118    Rna,
119    /// Protein motif.
120    Protein,
121    /// Regulatory motif.
122    Regulatory,
123    /// Binding-site motif.
124    BindingSite,
125    /// Repeat motif.
126    Repeat,
127    /// Unknown motif kind.
128    Unknown,
129    /// Domain-specific motif kind.
130    Custom(String),
131}
132
133impl fmt::Display for MotifKind {
134    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
135        match self {
136            Self::Dna => formatter.write_str("dna"),
137            Self::Rna => formatter.write_str("rna"),
138            Self::Protein => formatter.write_str("protein"),
139            Self::Regulatory => formatter.write_str("regulatory"),
140            Self::BindingSite => formatter.write_str("binding-site"),
141            Self::Repeat => formatter.write_str("repeat"),
142            Self::Unknown => formatter.write_str("unknown"),
143            Self::Custom(kind) => formatter.write_str(kind),
144        }
145    }
146}
147
148impl FromStr for MotifKind {
149    type Err = core::convert::Infallible;
150
151    fn from_str(value: &str) -> Result<Self, Self::Err> {
152        let kind = match value.trim().to_ascii_lowercase().as_str() {
153            "dna" => Self::Dna,
154            "rna" => Self::Rna,
155            "protein" => Self::Protein,
156            "regulatory" => Self::Regulatory,
157            "binding-site" | "binding_site" | "binding site" => Self::BindingSite,
158            "repeat" => Self::Repeat,
159            "unknown" | "" => Self::Unknown,
160            _ => Self::Custom(value.to_string()),
161        };
162
163        Ok(kind)
164    }
165}
166
167/// A motif hit with optional sequence or genomic location.
168#[derive(Clone, Debug, Eq, PartialEq)]
169pub struct MotifHit {
170    name: MotifName,
171    pattern: MotifPattern,
172    kind: MotifKind,
173    genomic_range: Option<GenomicRange>,
174    sequence_range: Option<(usize, usize)>,
175}
176
177impl MotifHit {
178    /// Creates motif hit metadata with no location.
179    #[must_use]
180    pub const fn new(name: MotifName, pattern: MotifPattern, kind: MotifKind) -> Self {
181        Self {
182            name,
183            pattern,
184            kind,
185            genomic_range: None,
186            sequence_range: None,
187        }
188    }
189
190    /// Sets the optional genomic range.
191    #[must_use]
192    pub fn with_genomic_range(mut self, range: GenomicRange) -> Self {
193        self.genomic_range = Some(range);
194        self
195    }
196
197    /// Sets the optional sequence range.
198    ///
199    /// # Errors
200    ///
201    /// Returns [`MotifValueError::SequenceRangeEndBeforeStart`] when `end < start`.
202    pub fn with_sequence_range(
203        mut self,
204        start: usize,
205        end: usize,
206    ) -> Result<Self, MotifValueError> {
207        if end < start {
208            Err(MotifValueError::SequenceRangeEndBeforeStart)
209        } else {
210            self.sequence_range = Some((start, end));
211            Ok(self)
212        }
213    }
214
215    /// Returns the motif name.
216    #[must_use]
217    pub const fn name(&self) -> &MotifName {
218        &self.name
219    }
220
221    /// Returns the motif pattern.
222    #[must_use]
223    pub const fn pattern(&self) -> &MotifPattern {
224        &self.pattern
225    }
226
227    /// Returns the motif kind.
228    #[must_use]
229    pub const fn kind(&self) -> &MotifKind {
230        &self.kind
231    }
232
233    /// Returns the optional genomic range.
234    #[must_use]
235    pub const fn genomic_range(&self) -> Option<&GenomicRange> {
236        self.genomic_range.as_ref()
237    }
238
239    /// Returns the optional sequence range as `(start, end)`.
240    #[must_use]
241    pub const fn sequence_range(&self) -> Option<(usize, usize)> {
242        self.sequence_range
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::{MotifHit, MotifKind, MotifName, MotifPattern, MotifValueError};
249    use core::str::FromStr;
250
251    #[test]
252    fn creates_valid_motif_name() {
253        let name = MotifName::new("TATA box").expect("valid motif name");
254
255        assert_eq!(name.as_str(), "TATA box");
256    }
257
258    #[test]
259    fn rejects_empty_motif_name() {
260        assert_eq!(MotifName::new(" "), Err(MotifValueError::Empty));
261    }
262
263    #[test]
264    fn creates_valid_motif_pattern() {
265        let pattern = MotifPattern::new("TATA").expect("valid motif pattern");
266
267        assert_eq!(pattern.as_str(), "TATA");
268    }
269
270    #[test]
271    fn rejects_empty_motif_pattern() {
272        assert_eq!(MotifPattern::new(""), Err(MotifValueError::Empty));
273    }
274
275    #[test]
276    fn motif_kind_displays_and_parses() {
277        assert_eq!(MotifKind::BindingSite.to_string(), "binding-site");
278        assert_eq!(MotifKind::from_str("dna"), Ok(MotifKind::Dna));
279    }
280
281    #[test]
282    fn constructs_motif_hit() {
283        let hit = MotifHit::new(
284            MotifName::new("TATA box").expect("valid motif name"),
285            MotifPattern::new("TATA").expect("valid motif pattern"),
286            MotifKind::Dna,
287        )
288        .with_sequence_range(3, 7)
289        .expect("valid sequence range");
290
291        assert_eq!(hit.pattern().as_str(), "TATA");
292        assert_eq!(hit.sequence_range(), Some((3, 7)));
293    }
294
295    #[test]
296    fn supports_custom_motif_kind() {
297        assert_eq!(
298            MotifKind::from_str("hairpin"),
299            Ok(MotifKind::Custom("hairpin".into()))
300        );
301    }
302}