Skip to main content

use_amino_acid/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7/// Error returned when parsing amino-acid symbols.
8#[derive(Clone, Copy, Debug, Eq, PartialEq)]
9pub enum AminoAcidParseError {
10    /// The supplied symbol was not recognized as a primitive amino-acid symbol.
11    InvalidSymbol(char),
12    /// The supplied text was not exactly one character.
13    InvalidSymbolText,
14}
15
16impl fmt::Display for AminoAcidParseError {
17    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
18        match self {
19            Self::InvalidSymbol(symbol) => {
20                write!(formatter, "invalid amino-acid symbol `{symbol}`")
21            },
22            Self::InvalidSymbolText => {
23                formatter.write_str("amino-acid symbol text must be one character")
24            },
25        }
26    }
27}
28
29impl Error for AminoAcidParseError {}
30
31/// A descriptive amino-acid kind.
32#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
33pub enum AminoAcidKind {
34    /// Standard amino acid.
35    Standard,
36    /// Stop symbol.
37    Stop,
38    /// Ambiguous amino-acid symbol.
39    Ambiguous,
40    /// Unknown amino-acid symbol.
41    Unknown,
42    /// Domain-specific amino-acid kind.
43    Custom(String),
44}
45
46impl fmt::Display for AminoAcidKind {
47    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
48        match self {
49            Self::Standard => formatter.write_str("standard"),
50            Self::Stop => formatter.write_str("stop"),
51            Self::Ambiguous => formatter.write_str("ambiguous"),
52            Self::Unknown => formatter.write_str("unknown"),
53            Self::Custom(kind) => formatter.write_str(kind),
54        }
55    }
56}
57
58impl FromStr for AminoAcidKind {
59    type Err = core::convert::Infallible;
60
61    fn from_str(value: &str) -> Result<Self, Self::Err> {
62        let kind = match value.trim().to_ascii_lowercase().as_str() {
63            "standard" => Self::Standard,
64            "stop" => Self::Stop,
65            "ambiguous" => Self::Ambiguous,
66            "unknown" | "" => Self::Unknown,
67            _ => Self::Custom(value.to_string()),
68        };
69
70        Ok(kind)
71    }
72}
73
74/// A validated one-letter amino-acid code.
75#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
76pub struct AminoAcidCode(char);
77
78impl AminoAcidCode {
79    /// Creates a one-letter amino-acid code from a recognized symbol.
80    ///
81    /// # Errors
82    ///
83    /// Returns [`AminoAcidParseError::InvalidSymbol`] for unrecognized symbols.
84    pub fn new(symbol: char) -> Result<Self, AminoAcidParseError> {
85        let amino_acid = AminoAcid::parse_symbol(symbol)?;
86        Ok(Self(amino_acid.symbol()))
87    }
88
89    /// Returns the one-letter code.
90    #[must_use]
91    pub const fn as_char(self) -> char {
92        self.0
93    }
94}
95
96impl fmt::Display for AminoAcidCode {
97    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
98        write!(formatter, "{}", self.0)
99    }
100}
101
102/// A primitive amino-acid symbol.
103#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
104pub enum AminoAcid {
105    /// Alanine.
106    Alanine,
107    /// Arginine.
108    Arginine,
109    /// Asparagine.
110    Asparagine,
111    /// Aspartic acid.
112    AsparticAcid,
113    /// Cysteine.
114    Cysteine,
115    /// Glutamine.
116    Glutamine,
117    /// Glutamic acid.
118    GlutamicAcid,
119    /// Glycine.
120    Glycine,
121    /// Histidine.
122    Histidine,
123    /// Isoleucine.
124    Isoleucine,
125    /// Leucine.
126    Leucine,
127    /// Lysine.
128    Lysine,
129    /// Methionine.
130    Methionine,
131    /// Phenylalanine.
132    Phenylalanine,
133    /// Proline.
134    Proline,
135    /// Serine.
136    Serine,
137    /// Threonine.
138    Threonine,
139    /// Tryptophan.
140    Tryptophan,
141    /// Tyrosine.
142    Tyrosine,
143    /// Valine.
144    Valine,
145    /// Stop symbol.
146    Stop,
147    /// Ambiguous amino-acid symbol, such as `X`.
148    Ambiguous(char),
149    /// Explicit unknown amino acid, displayed as `?`.
150    Unknown,
151}
152
153impl AminoAcid {
154    /// Parses a common one-letter amino-acid symbol.
155    ///
156    /// Recognized symbols are the 20 common one-letter codes, `X` for ambiguous, and `*` for stop.
157    ///
158    /// # Errors
159    ///
160    /// Returns [`AminoAcidParseError::InvalidSymbol`] when the symbol is not recognized.
161    pub const fn parse_symbol(symbol: char) -> Result<Self, AminoAcidParseError> {
162        match symbol.to_ascii_uppercase() {
163            'A' => Ok(Self::Alanine),
164            'R' => Ok(Self::Arginine),
165            'N' => Ok(Self::Asparagine),
166            'D' => Ok(Self::AsparticAcid),
167            'C' => Ok(Self::Cysteine),
168            'Q' => Ok(Self::Glutamine),
169            'E' => Ok(Self::GlutamicAcid),
170            'G' => Ok(Self::Glycine),
171            'H' => Ok(Self::Histidine),
172            'I' => Ok(Self::Isoleucine),
173            'L' => Ok(Self::Leucine),
174            'K' => Ok(Self::Lysine),
175            'M' => Ok(Self::Methionine),
176            'F' => Ok(Self::Phenylalanine),
177            'P' => Ok(Self::Proline),
178            'S' => Ok(Self::Serine),
179            'T' => Ok(Self::Threonine),
180            'W' => Ok(Self::Tryptophan),
181            'Y' => Ok(Self::Tyrosine),
182            'V' => Ok(Self::Valine),
183            'X' => Ok(Self::Ambiguous('X')),
184            '*' => Ok(Self::Stop),
185            _ => Err(AminoAcidParseError::InvalidSymbol(symbol)),
186        }
187    }
188
189    /// Returns the canonical one-letter symbol.
190    #[must_use]
191    pub const fn symbol(self) -> char {
192        match self {
193            Self::Alanine => 'A',
194            Self::Arginine => 'R',
195            Self::Asparagine => 'N',
196            Self::AsparticAcid => 'D',
197            Self::Cysteine => 'C',
198            Self::Glutamine => 'Q',
199            Self::GlutamicAcid => 'E',
200            Self::Glycine => 'G',
201            Self::Histidine => 'H',
202            Self::Isoleucine => 'I',
203            Self::Leucine => 'L',
204            Self::Lysine => 'K',
205            Self::Methionine => 'M',
206            Self::Phenylalanine => 'F',
207            Self::Proline => 'P',
208            Self::Serine => 'S',
209            Self::Threonine => 'T',
210            Self::Tryptophan => 'W',
211            Self::Tyrosine => 'Y',
212            Self::Valine => 'V',
213            Self::Stop => '*',
214            Self::Ambiguous(symbol) => symbol,
215            Self::Unknown => '?',
216        }
217    }
218
219    /// Returns the descriptive amino-acid kind.
220    #[must_use]
221    pub const fn kind(self) -> AminoAcidKind {
222        match self {
223            Self::Stop => AminoAcidKind::Stop,
224            Self::Ambiguous(_) => AminoAcidKind::Ambiguous,
225            Self::Unknown => AminoAcidKind::Unknown,
226            _ => AminoAcidKind::Standard,
227        }
228    }
229
230    /// Returns the common three-letter code where one is defined by this primitive vocabulary.
231    #[must_use]
232    pub const fn three_letter_code(self) -> Option<&'static str> {
233        match self {
234            Self::Alanine => Some("Ala"),
235            Self::Arginine => Some("Arg"),
236            Self::Asparagine => Some("Asn"),
237            Self::AsparticAcid => Some("Asp"),
238            Self::Cysteine => Some("Cys"),
239            Self::Glutamine => Some("Gln"),
240            Self::GlutamicAcid => Some("Glu"),
241            Self::Glycine => Some("Gly"),
242            Self::Histidine => Some("His"),
243            Self::Isoleucine => Some("Ile"),
244            Self::Leucine => Some("Leu"),
245            Self::Lysine => Some("Lys"),
246            Self::Methionine => Some("Met"),
247            Self::Phenylalanine => Some("Phe"),
248            Self::Proline => Some("Pro"),
249            Self::Serine => Some("Ser"),
250            Self::Threonine => Some("Thr"),
251            Self::Tryptophan => Some("Trp"),
252            Self::Tyrosine => Some("Tyr"),
253            Self::Valine => Some("Val"),
254            Self::Stop => Some("Ter"),
255            Self::Ambiguous('X') => Some("Xaa"),
256            Self::Ambiguous(_) | Self::Unknown => None,
257        }
258    }
259}
260
261impl fmt::Display for AminoAcid {
262    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
263        write!(formatter, "{}", self.symbol())
264    }
265}
266
267impl FromStr for AminoAcid {
268    type Err = AminoAcidParseError;
269
270    fn from_str(value: &str) -> Result<Self, Self::Err> {
271        let mut chars = value.chars();
272        let Some(symbol) = chars.next() else {
273            return Err(AminoAcidParseError::InvalidSymbolText);
274        };
275
276        if chars.next().is_some() {
277            Err(AminoAcidParseError::InvalidSymbolText)
278        } else {
279            Self::parse_symbol(symbol)
280        }
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::{AminoAcid, AminoAcidCode, AminoAcidKind, AminoAcidParseError};
287    use core::str::FromStr;
288
289    #[test]
290    fn parses_common_one_letter_codes() {
291        assert_eq!(AminoAcid::parse_symbol('A'), Ok(AminoAcid::Alanine));
292        assert_eq!(AminoAcid::parse_symbol('W'), Ok(AminoAcid::Tryptophan));
293        assert_eq!(AminoAcid::parse_symbol('v'), Ok(AminoAcid::Valine));
294    }
295
296    #[test]
297    fn displays_one_letter_codes() {
298        assert_eq!(AminoAcid::Alanine.to_string(), "A");
299        assert_eq!(AminoAcid::Tryptophan.three_letter_code(), Some("Trp"));
300    }
301
302    #[test]
303    fn parses_ambiguous_amino_acid() {
304        assert_eq!(AminoAcid::parse_symbol('X'), Ok(AminoAcid::Ambiguous('X')));
305        assert_eq!(AminoAcid::Ambiguous('X').kind(), AminoAcidKind::Ambiguous);
306    }
307
308    #[test]
309    fn parses_stop_symbol_when_explicitly_supplied() {
310        assert_eq!(AminoAcid::parse_symbol('*'), Ok(AminoAcid::Stop));
311        assert_eq!(AminoAcid::Stop.to_string(), "*");
312    }
313
314    #[test]
315    fn invalid_symbol_behavior_is_documented() {
316        assert_eq!(
317            AminoAcid::parse_symbol('#'),
318            Err(AminoAcidParseError::InvalidSymbol('#'))
319        );
320        assert_eq!(
321            AminoAcid::from_str("AA"),
322            Err(AminoAcidParseError::InvalidSymbolText)
323        );
324    }
325
326    #[test]
327    fn amino_acid_code_preserves_valid_symbol() {
328        let code = AminoAcidCode::new('m').expect("valid amino-acid code");
329
330        assert_eq!(code.as_char(), 'M');
331    }
332}