1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::{collections::BTreeSet, error::Error};
6
7#[derive(Clone, Copy, Debug, Eq, PartialEq)]
9pub enum AlphabetError {
10 EmptySymbolSet,
12}
13
14impl fmt::Display for AlphabetError {
15 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
16 match self {
17 Self::EmptySymbolSet => formatter.write_str("alphabet symbol set cannot be empty"),
18 }
19 }
20}
21
22impl Error for AlphabetError {}
23
24#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
26pub enum AlphabetKind {
27 Dna,
29 Rna,
31 Protein,
33 DnaWithAmbiguity,
35 RnaWithAmbiguity,
37 ProteinWithAmbiguity,
39 Custom(String),
41}
42
43impl fmt::Display for AlphabetKind {
44 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 Self::Dna => formatter.write_str("dna"),
47 Self::Rna => formatter.write_str("rna"),
48 Self::Protein => formatter.write_str("protein"),
49 Self::DnaWithAmbiguity => formatter.write_str("dna-with-ambiguity"),
50 Self::RnaWithAmbiguity => formatter.write_str("rna-with-ambiguity"),
51 Self::ProteinWithAmbiguity => formatter.write_str("protein-with-ambiguity"),
52 Self::Custom(kind) => formatter.write_str(kind),
53 }
54 }
55}
56
57impl FromStr for AlphabetKind {
58 type Err = core::convert::Infallible;
59
60 fn from_str(value: &str) -> Result<Self, Self::Err> {
61 let kind = match value.trim().to_ascii_lowercase().as_str() {
62 "dna" => Self::Dna,
63 "rna" => Self::Rna,
64 "protein" => Self::Protein,
65 "dna-with-ambiguity" | "dna_with_ambiguity" => Self::DnaWithAmbiguity,
66 "rna-with-ambiguity" | "rna_with_ambiguity" => Self::RnaWithAmbiguity,
67 "protein-with-ambiguity" | "protein_with_ambiguity" => Self::ProteinWithAmbiguity,
68 _ => Self::Custom(value.to_string()),
69 };
70
71 Ok(kind)
72 }
73}
74
75#[derive(Clone, Debug, Eq, PartialEq)]
77pub struct AlphabetSymbolSet {
78 symbols: BTreeSet<char>,
79}
80
81impl AlphabetSymbolSet {
82 pub fn new(symbols: impl IntoIterator<Item = char>) -> Result<Self, AlphabetError> {
88 let symbols = symbols.into_iter().collect::<BTreeSet<_>>();
89
90 if symbols.is_empty() {
91 Err(AlphabetError::EmptySymbolSet)
92 } else {
93 Ok(Self { symbols })
94 }
95 }
96
97 pub fn from_symbols(symbols: impl AsRef<str>) -> Result<Self, AlphabetError> {
103 Self::new(symbols.as_ref().chars())
104 }
105
106 #[must_use]
108 pub fn contains(&self, symbol: char) -> bool {
109 self.symbols.contains(&symbol)
110 }
111
112 #[must_use]
114 pub fn len(&self) -> usize {
115 self.symbols.len()
116 }
117
118 #[must_use]
120 pub fn is_empty(&self) -> bool {
121 self.symbols.is_empty()
122 }
123
124 pub fn iter(&self) -> impl Iterator<Item = &char> {
126 self.symbols.iter()
127 }
128}
129
130#[derive(Clone, Debug, Eq, PartialEq)]
132pub struct BioAlphabet {
133 kind: AlphabetKind,
134 symbols: AlphabetSymbolSet,
135}
136
137impl BioAlphabet {
138 #[must_use]
140 pub const fn new(kind: AlphabetKind, symbols: AlphabetSymbolSet) -> Self {
141 Self { kind, symbols }
142 }
143
144 #[must_use]
146 pub fn dna() -> Self {
147 Self::from_static(AlphabetKind::Dna, "ACGT")
148 }
149
150 #[must_use]
152 pub fn rna() -> Self {
153 Self::from_static(AlphabetKind::Rna, "ACGU")
154 }
155
156 #[must_use]
158 pub fn protein() -> Self {
159 Self::from_static(AlphabetKind::Protein, "ACDEFGHIKLMNPQRSTVWY")
160 }
161
162 #[must_use]
164 pub fn dna_with_ambiguity() -> Self {
165 Self::from_static(AlphabetKind::DnaWithAmbiguity, "ACGTRYSWKMBDHVN")
166 }
167
168 #[must_use]
170 pub fn rna_with_ambiguity() -> Self {
171 Self::from_static(AlphabetKind::RnaWithAmbiguity, "ACGURYSWKMBDHVN")
172 }
173
174 #[must_use]
176 pub fn protein_with_ambiguity() -> Self {
177 Self::from_static(
178 AlphabetKind::ProteinWithAmbiguity,
179 "ABCDEFGHIKLMNPQRSTVWXYZ*",
180 )
181 }
182
183 pub fn custom(
189 kind: impl Into<String>,
190 symbols: impl AsRef<str>,
191 ) -> Result<Self, AlphabetError> {
192 Ok(Self::new(
193 AlphabetKind::Custom(kind.into()),
194 AlphabetSymbolSet::from_symbols(symbols)?,
195 ))
196 }
197
198 #[must_use]
200 pub const fn kind(&self) -> &AlphabetKind {
201 &self.kind
202 }
203
204 #[must_use]
206 pub const fn symbols(&self) -> &AlphabetSymbolSet {
207 &self.symbols
208 }
209
210 #[must_use]
212 pub fn contains(&self, symbol: char) -> bool {
213 self.symbols.contains(symbol)
214 }
215
216 #[must_use]
218 pub fn contains_all(&self, text: impl AsRef<str>) -> bool {
219 text.as_ref().chars().all(|symbol| self.contains(symbol))
220 }
221
222 fn from_static(kind: AlphabetKind, symbols: &str) -> Self {
223 let symbols = AlphabetSymbolSet {
224 symbols: symbols.chars().collect(),
225 };
226 Self::new(kind, symbols)
227 }
228}
229
230#[cfg(test)]
231mod tests {
232 use super::{AlphabetError, AlphabetKind, AlphabetSymbolSet, BioAlphabet};
233 use core::str::FromStr;
234
235 #[test]
236 fn dna_alphabet_contains_standard_symbols() {
237 let dna = BioAlphabet::dna();
238
239 assert!(dna.contains_all("ACGT"));
240 assert_eq!(dna.kind(), &AlphabetKind::Dna);
241 }
242
243 #[test]
244 fn rna_alphabet_contains_standard_symbols() {
245 let rna = BioAlphabet::rna();
246
247 assert!(rna.contains_all("ACGU"));
248 assert_eq!(rna.kind(), &AlphabetKind::Rna);
249 }
250
251 #[test]
252 fn protein_alphabet_contains_common_symbols() {
253 let protein = BioAlphabet::protein();
254
255 assert!(protein.contains_all("ACDEFGHIKLMNPQRSTVWY"));
256 }
257
258 #[test]
259 fn invalid_symbol_is_rejected_by_membership_check() {
260 let dna = BioAlphabet::dna();
261
262 assert!(!dna.contains('U'));
263 assert!(!dna.contains_all("ACGU"));
264 }
265
266 #[test]
267 fn constructs_custom_alphabet() {
268 let alphabet = BioAlphabet::custom("toy", "ABC").expect("valid alphabet");
269
270 assert_eq!(alphabet.kind(), &AlphabetKind::Custom("toy".into()));
271 assert!(alphabet.contains_all("CBA"));
272 }
273
274 #[test]
275 fn rejects_empty_symbol_set() {
276 assert_eq!(
277 AlphabetSymbolSet::from_symbols(""),
278 Err(AlphabetError::EmptySymbolSet)
279 );
280 }
281
282 #[test]
283 fn alphabet_kind_displays_and_parses() {
284 assert_eq!(
285 AlphabetKind::DnaWithAmbiguity.to_string(),
286 "dna-with-ambiguity"
287 );
288 assert_eq!(AlphabetKind::from_str("protein"), Ok(AlphabetKind::Protein));
289 }
290}