#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Alphabet {
Dna,
Rna,
Protein,
}
impl Alphabet {
pub fn detect(sequence: &str) -> Self {
let mut saw_u = false;
let mut saw_protein_only = false;
for ch in sequence.chars() {
match ch.to_ascii_uppercase() {
'U' => saw_u = true,
'E' | 'F' | 'I' | 'L' | 'P' | 'Q' | 'Z' => saw_protein_only = true,
_ => {}
}
}
if saw_protein_only {
Alphabet::Protein
} else if saw_u {
Alphabet::Rna
} else {
Alphabet::Dna
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_defaults_to_dna() {
assert_eq!(Alphabet::detect(""), Alphabet::Dna);
}
#[test]
fn pure_acgt_is_dna() {
assert_eq!(Alphabet::detect("ACGTACGT"), Alphabet::Dna);
}
#[test]
fn lowercase_acgt_is_dna() {
assert_eq!(Alphabet::detect("acgtacgt"), Alphabet::Dna);
}
#[test]
fn dna_with_ambiguity_codes_is_dna() {
assert_eq!(Alphabet::detect("ACGTNRYKMSWBDH-"), Alphabet::Dna);
}
#[test]
fn any_u_means_rna() {
assert_eq!(Alphabet::detect("ACGUACGU"), Alphabet::Rna);
assert_eq!(Alphabet::detect("acgu"), Alphabet::Rna);
}
#[test]
fn protein_letters_override_u() {
assert_eq!(Alphabet::detect("MULTILINE"), Alphabet::Protein);
}
#[test]
fn protein_only_letters_detected() {
assert_eq!(Alphabet::detect("MVSKGEEL"), Alphabet::Protein);
assert_eq!(Alphabet::detect("FILPQ"), Alphabet::Protein);
}
#[test]
fn stop_codon_in_protein_accepted() {
assert_eq!(Alphabet::detect("MVSKEEL*"), Alphabet::Protein);
}
}