Skip to main content

use_nucleotide/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7/// Error returned when parsing nucleotide symbols.
8#[derive(Clone, Copy, Debug, Eq, PartialEq)]
9pub enum NucleotideParseError {
10    /// The supplied symbol was not recognized as a primitive nucleotide symbol.
11    InvalidSymbol(char),
12    /// The supplied text was not exactly one character.
13    InvalidSymbolText,
14}
15
16impl fmt::Display for NucleotideParseError {
17    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
18        match self {
19            Self::InvalidSymbol(symbol) => {
20                write!(formatter, "invalid nucleotide symbol `{symbol}`")
21            },
22            Self::InvalidSymbolText => {
23                formatter.write_str("nucleotide symbol text must be one character")
24            },
25        }
26    }
27}
28
29impl Error for NucleotideParseError {}
30
31/// A descriptive nucleotide kind.
32#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
33pub enum NucleotideKind {
34    /// A DNA-only nucleotide symbol.
35    Dna,
36    /// An RNA-only nucleotide symbol.
37    Rna,
38    /// A nucleotide symbol shared by DNA and RNA alphabets.
39    Shared,
40    /// Gap symbol.
41    Gap,
42    /// Ambiguous nucleotide symbol.
43    Ambiguous,
44    /// Unknown nucleotide symbol.
45    Unknown,
46    /// Domain-specific nucleotide kind.
47    Custom(String),
48}
49
50impl fmt::Display for NucleotideKind {
51    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
52        match self {
53            Self::Dna => formatter.write_str("dna"),
54            Self::Rna => formatter.write_str("rna"),
55            Self::Shared => formatter.write_str("shared"),
56            Self::Gap => formatter.write_str("gap"),
57            Self::Ambiguous => formatter.write_str("ambiguous"),
58            Self::Unknown => formatter.write_str("unknown"),
59            Self::Custom(kind) => formatter.write_str(kind),
60        }
61    }
62}
63
64impl FromStr for NucleotideKind {
65    type Err = core::convert::Infallible;
66
67    fn from_str(value: &str) -> Result<Self, Self::Err> {
68        let kind = match value.trim().to_ascii_lowercase().as_str() {
69            "dna" => Self::Dna,
70            "rna" => Self::Rna,
71            "shared" => Self::Shared,
72            "gap" => Self::Gap,
73            "ambiguous" => Self::Ambiguous,
74            "unknown" | "" => Self::Unknown,
75            _ => Self::Custom(value.to_string()),
76        };
77
78        Ok(kind)
79    }
80}
81
82/// A descriptive nucleotide sequence kind.
83#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
84pub enum NucleotideSequenceKind {
85    /// DNA sequence.
86    Dna,
87    /// RNA sequence.
88    Rna,
89    /// Mixed or generic nucleotide sequence.
90    Nucleotide,
91    /// Unknown nucleotide sequence kind.
92    Unknown,
93    /// Domain-specific sequence kind.
94    Custom(String),
95}
96
97impl fmt::Display for NucleotideSequenceKind {
98    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
99        match self {
100            Self::Dna => formatter.write_str("dna"),
101            Self::Rna => formatter.write_str("rna"),
102            Self::Nucleotide => formatter.write_str("nucleotide"),
103            Self::Unknown => formatter.write_str("unknown"),
104            Self::Custom(kind) => formatter.write_str(kind),
105        }
106    }
107}
108
109impl FromStr for NucleotideSequenceKind {
110    type Err = core::convert::Infallible;
111
112    fn from_str(value: &str) -> Result<Self, Self::Err> {
113        let kind = match value.trim().to_ascii_lowercase().as_str() {
114            "dna" => Self::Dna,
115            "rna" => Self::Rna,
116            "nucleotide" => Self::Nucleotide,
117            "unknown" | "" => Self::Unknown,
118            _ => Self::Custom(value.to_string()),
119        };
120
121        Ok(kind)
122    }
123}
124
125/// A primitive nucleotide symbol.
126#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
127pub enum Nucleotide {
128    /// Adenine, displayed as `A`.
129    Adenine,
130    /// Cytosine, displayed as `C`.
131    Cytosine,
132    /// Guanine, displayed as `G`.
133    Guanine,
134    /// Thymine, displayed as `T`.
135    Thymine,
136    /// Uracil, displayed as `U`.
137    Uracil,
138    /// Gap, displayed as `-`.
139    Gap,
140    /// Ambiguous nucleotide symbol, such as `N`.
141    Ambiguous(char),
142    /// Explicit unknown nucleotide, displayed as `?`.
143    Unknown,
144}
145
146impl Nucleotide {
147    /// Parses a common nucleotide symbol.
148    ///
149    /// Recognized symbols are `A`, `C`, `G`, `T`, `U`, `-`, and `N` in either case for letters.
150    /// Other symbols return [`NucleotideParseError::InvalidSymbol`].
151    ///
152    /// # Errors
153    ///
154    /// Returns [`NucleotideParseError::InvalidSymbol`] when the symbol is not recognized.
155    pub const fn parse_symbol(symbol: char) -> Result<Self, NucleotideParseError> {
156        match symbol.to_ascii_uppercase() {
157            'A' => Ok(Self::Adenine),
158            'C' => Ok(Self::Cytosine),
159            'G' => Ok(Self::Guanine),
160            'T' => Ok(Self::Thymine),
161            'U' => Ok(Self::Uracil),
162            'N' => Ok(Self::Ambiguous('N')),
163            '-' => Ok(Self::Gap),
164            _ => Err(NucleotideParseError::InvalidSymbol(symbol)),
165        }
166    }
167
168    /// Returns the canonical display symbol.
169    #[must_use]
170    pub const fn symbol(self) -> char {
171        match self {
172            Self::Adenine => 'A',
173            Self::Cytosine => 'C',
174            Self::Guanine => 'G',
175            Self::Thymine => 'T',
176            Self::Uracil => 'U',
177            Self::Gap => '-',
178            Self::Ambiguous(symbol) => symbol,
179            Self::Unknown => '?',
180        }
181    }
182
183    /// Returns the descriptive nucleotide kind.
184    #[must_use]
185    pub const fn kind(self) -> NucleotideKind {
186        match self {
187            Self::Thymine => NucleotideKind::Dna,
188            Self::Uracil => NucleotideKind::Rna,
189            Self::Adenine | Self::Cytosine | Self::Guanine => NucleotideKind::Shared,
190            Self::Gap => NucleotideKind::Gap,
191            Self::Ambiguous(_) => NucleotideKind::Ambiguous,
192            Self::Unknown => NucleotideKind::Unknown,
193        }
194    }
195}
196
197impl fmt::Display for Nucleotide {
198    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
199        write!(formatter, "{}", self.symbol())
200    }
201}
202
203impl FromStr for Nucleotide {
204    type Err = NucleotideParseError;
205
206    fn from_str(value: &str) -> Result<Self, Self::Err> {
207        let mut chars = value.chars();
208        let Some(symbol) = chars.next() else {
209            return Err(NucleotideParseError::InvalidSymbolText);
210        };
211
212        if chars.next().is_some() {
213            Err(NucleotideParseError::InvalidSymbolText)
214        } else {
215            Self::parse_symbol(symbol)
216        }
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::{Nucleotide, NucleotideKind, NucleotideParseError, NucleotideSequenceKind};
223    use core::str::FromStr;
224
225    #[test]
226    fn parses_common_nucleotide_symbols() {
227        assert_eq!(Nucleotide::parse_symbol('A'), Ok(Nucleotide::Adenine));
228        assert_eq!(Nucleotide::parse_symbol('C'), Ok(Nucleotide::Cytosine));
229        assert_eq!(Nucleotide::parse_symbol('G'), Ok(Nucleotide::Guanine));
230        assert_eq!(Nucleotide::parse_symbol('T'), Ok(Nucleotide::Thymine));
231        assert_eq!(Nucleotide::parse_symbol('U'), Ok(Nucleotide::Uracil));
232    }
233
234    #[test]
235    fn parses_gap_symbol() {
236        assert_eq!(Nucleotide::parse_symbol('-'), Ok(Nucleotide::Gap));
237    }
238
239    #[test]
240    fn parses_ambiguous_n_symbol() {
241        assert_eq!(
242            Nucleotide::parse_symbol('N'),
243            Ok(Nucleotide::Ambiguous('N'))
244        );
245        assert_eq!(
246            Nucleotide::parse_symbol('n'),
247            Ok(Nucleotide::Ambiguous('N'))
248        );
249    }
250
251    #[test]
252    fn displays_canonical_symbols() {
253        assert_eq!(Nucleotide::Adenine.to_string(), "A");
254        assert_eq!(Nucleotide::Gap.to_string(), "-");
255        assert_eq!(Nucleotide::Ambiguous('N').to_string(), "N");
256    }
257
258    #[test]
259    fn invalid_symbols_are_rejected() {
260        assert_eq!(
261            Nucleotide::parse_symbol('X'),
262            Err(NucleotideParseError::InvalidSymbol('X'))
263        );
264        assert_eq!(
265            Nucleotide::from_str("AC"),
266            Err(NucleotideParseError::InvalidSymbolText)
267        );
268    }
269
270    #[test]
271    fn nucleotide_kinds_are_descriptive() {
272        assert_eq!(Nucleotide::Thymine.kind(), NucleotideKind::Dna);
273        assert_eq!(
274            NucleotideSequenceKind::from_str("rna"),
275            Ok(NucleotideSequenceKind::Rna)
276        );
277    }
278}