Skip to main content

use_sequence/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7/// Error returned by sequence vocabulary constructors.
8#[derive(Clone, Copy, Debug, Eq, PartialEq)]
9pub enum SequenceError {
10    /// Sequence text was empty.
11    EmptyText,
12}
13
14impl fmt::Display for SequenceError {
15    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
16        match self {
17            Self::EmptyText => formatter.write_str("sequence text cannot be empty"),
18        }
19    }
20}
21
22impl Error for SequenceError {}
23
24/// A descriptive kind for biological sequence text.
25#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
26pub enum SequenceKind {
27    /// DNA sequence text.
28    Dna,
29    /// RNA sequence text.
30    Rna,
31    /// Protein sequence text.
32    Protein,
33    /// Generic nucleotide sequence text.
34    Nucleotide,
35    /// Generic amino-acid sequence text.
36    AminoAcid,
37    /// Unknown sequence kind.
38    Unknown,
39    /// Domain-specific sequence kind.
40    Custom(String),
41}
42
43impl fmt::Display for SequenceKind {
44    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
45        match self {
46            Self::Dna => formatter.write_str("dna"),
47            Self::Rna => formatter.write_str("rna"),
48            Self::Protein => formatter.write_str("protein"),
49            Self::Nucleotide => formatter.write_str("nucleotide"),
50            Self::AminoAcid => formatter.write_str("amino-acid"),
51            Self::Unknown => formatter.write_str("unknown"),
52            Self::Custom(kind) => formatter.write_str(kind),
53        }
54    }
55}
56
57impl FromStr for SequenceKind {
58    type Err = core::convert::Infallible;
59
60    fn from_str(value: &str) -> Result<Self, Self::Err> {
61        let kind = match value.trim().to_ascii_lowercase().as_str() {
62            "dna" => Self::Dna,
63            "rna" => Self::Rna,
64            "protein" => Self::Protein,
65            "nucleotide" => Self::Nucleotide,
66            "amino-acid" | "amino_acid" | "amino acid" => Self::AminoAcid,
67            "unknown" | "" => Self::Unknown,
68            _ => Self::Custom(value.to_string()),
69        };
70
71        Ok(kind)
72    }
73}
74
75/// Owned biological sequence text.
76#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
77pub struct SequenceText(String);
78
79impl SequenceText {
80    /// Creates sequence text from a non-empty string.
81    ///
82    /// The text is stored exactly as supplied. Casing is not normalized.
83    ///
84    /// # Errors
85    ///
86    /// Returns [`SequenceError::EmptyText`] when the supplied text is empty.
87    pub fn new(value: impl AsRef<str>) -> Result<Self, SequenceError> {
88        let value = value.as_ref();
89
90        if value.is_empty() {
91            Err(SequenceError::EmptyText)
92        } else {
93            Ok(Self(value.to_string()))
94        }
95    }
96
97    /// Creates explicitly empty sequence text.
98    ///
99    /// Empty sequences are only produced through this constructor so callers can make that choice
100    /// visible in code.
101    #[must_use]
102    pub const fn empty() -> Self {
103        Self(String::new())
104    }
105
106    /// Returns the number of Unicode scalar values in the sequence text.
107    #[must_use]
108    pub fn len(&self) -> usize {
109        self.0.chars().count()
110    }
111
112    /// Returns true when the sequence text is empty.
113    #[must_use]
114    pub const fn is_empty(&self) -> bool {
115        self.0.is_empty()
116    }
117
118    /// Returns the sequence text.
119    #[must_use]
120    pub fn as_str(&self) -> &str {
121        &self.0
122    }
123
124    /// Consumes the sequence text and returns the owned string.
125    #[must_use]
126    pub fn into_string(self) -> String {
127        self.0
128    }
129}
130
131impl AsRef<str> for SequenceText {
132    fn as_ref(&self) -> &str {
133        self.as_str()
134    }
135}
136
137impl fmt::Display for SequenceText {
138    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
139        formatter.write_str(self.as_str())
140    }
141}
142
143impl FromStr for SequenceText {
144    type Err = SequenceError;
145
146    fn from_str(value: &str) -> Result<Self, Self::Err> {
147        Self::new(value)
148    }
149}
150
151/// A sequence length value.
152#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
153pub struct SequenceLength(usize);
154
155impl SequenceLength {
156    /// Creates a sequence length from a count.
157    #[must_use]
158    pub const fn new(value: usize) -> Self {
159        Self(value)
160    }
161
162    /// Returns the stored length.
163    #[must_use]
164    pub const fn value(self) -> usize {
165        self.0
166    }
167
168    /// Returns true when the length is zero.
169    #[must_use]
170    pub const fn is_empty(self) -> bool {
171        self.0 == 0
172    }
173}
174
175impl fmt::Display for SequenceLength {
176    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
177        write!(formatter, "{}", self.0)
178    }
179}
180
181/// Owned biological sequence data with a descriptive kind.
182#[derive(Clone, Debug, Eq, PartialEq)]
183pub struct BioSequence {
184    kind: SequenceKind,
185    text: SequenceText,
186}
187
188impl BioSequence {
189    /// Creates a biological sequence from non-empty owned text.
190    ///
191    /// # Errors
192    ///
193    /// Returns [`SequenceError::EmptyText`] when the supplied text is empty.
194    pub fn new(kind: SequenceKind, text: impl AsRef<str>) -> Result<Self, SequenceError> {
195        Ok(Self {
196            kind,
197            text: SequenceText::new(text)?,
198        })
199    }
200
201    /// Creates an explicitly empty sequence with the given kind.
202    #[must_use]
203    pub const fn empty(kind: SequenceKind) -> Self {
204        Self {
205            kind,
206            text: SequenceText::empty(),
207        }
208    }
209
210    /// Returns the descriptive sequence kind.
211    #[must_use]
212    pub const fn kind(&self) -> &SequenceKind {
213        &self.kind
214    }
215
216    /// Returns the owned sequence text wrapper.
217    #[must_use]
218    pub const fn text(&self) -> &SequenceText {
219        &self.text
220    }
221
222    /// Returns the number of Unicode scalar values in the sequence text.
223    #[must_use]
224    pub fn len(&self) -> usize {
225        self.text.len()
226    }
227
228    /// Returns the sequence length wrapper.
229    #[must_use]
230    pub fn sequence_length(&self) -> SequenceLength {
231        SequenceLength::new(self.len())
232    }
233
234    /// Returns true when the sequence text is empty.
235    #[must_use]
236    pub const fn is_empty(&self) -> bool {
237        self.text.is_empty()
238    }
239
240    /// Returns the sequence text.
241    #[must_use]
242    pub fn as_str(&self) -> &str {
243        self.text.as_str()
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::{BioSequence, SequenceError, SequenceKind, SequenceLength, SequenceText};
250    use core::str::FromStr;
251
252    #[test]
253    fn creates_valid_sequence() {
254        let sequence = BioSequence::new(SequenceKind::Dna, "ACGT").expect("valid sequence");
255
256        assert_eq!(sequence.kind(), &SequenceKind::Dna);
257        assert_eq!(sequence.as_str(), "ACGT");
258    }
259
260    #[test]
261    fn rejects_empty_sequence_text_by_default() {
262        assert_eq!(SequenceText::new(""), Err(SequenceError::EmptyText));
263        assert_eq!(
264            BioSequence::new(SequenceKind::Unknown, ""),
265            Err(SequenceError::EmptyText)
266        );
267    }
268
269    #[test]
270    fn supports_explicit_empty_sequence() {
271        let sequence = BioSequence::empty(SequenceKind::Unknown);
272
273        assert!(sequence.is_empty());
274        assert_eq!(sequence.len(), 0);
275    }
276
277    #[test]
278    fn sequence_kind_displays_and_parses() {
279        assert_eq!(SequenceKind::from_str("DNA"), Ok(SequenceKind::Dna));
280        assert_eq!(SequenceKind::AminoAcid.to_string(), "amino-acid");
281        assert_eq!(
282            SequenceKind::from_str("plasmid"),
283            Ok(SequenceKind::Custom("plasmid".into()))
284        );
285    }
286
287    #[test]
288    fn length_helper_reports_text_length() {
289        let sequence = BioSequence::new(SequenceKind::Rna, "ACGU").expect("valid sequence");
290
291        assert_eq!(sequence.len(), 4);
292        assert_eq!(sequence.sequence_length(), SequenceLength::new(4));
293    }
294
295    #[test]
296    fn sequence_text_casing_is_preserved() {
297        let sequence = BioSequence::new(SequenceKind::Dna, "AcgTn").expect("valid sequence");
298
299        assert_eq!(sequence.as_str(), "AcgTn");
300    }
301}