1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
//! Alphabets for genome sequences.

use std::convert::{TryFrom, TryInto};
use thiserror::Error;

pub mod dna_alphabet;
pub mod dna_alphabet_or_n;

/// A character in an alphabet.
pub trait AlphabetCharacter: Into<u8> + TryFrom<u8> {
    /// The amount of characters in the alphabet.
    const ALPHABET_SIZE: usize;

    /// The index of this character in the alphabet.
    fn index(&self) -> usize;

    /// Constructs the character from the given index, returning `None` if it is invalid.
    fn from_index(index: usize) -> Result<Self, AlphabetError>;

    /// Constructs the character from the given index, returning `None` if it is invalid.
    /// This method returns a static reference to the character type, so it can only be implemented via lookup in a static table.
    /// It is required to create an implementation of [std::ops::Index] for genome sequence types that do not store the characters in plain format.
    fn from_index_ref(index: usize) -> Result<&'static Self, AlphabetError>;

    /// Constructs the complement of this character.
    fn complement(&self) -> Self;
}

/// An alphabet as a subset of the ASCII alphabet.
pub trait Alphabet: Sized {
    /// The amount of characters in the alphabet.
    const SIZE: usize = Self::CharacterType::ALPHABET_SIZE;

    /// The internal character type used by the alphabet.
    type CharacterType: AlphabetCharacter + Eq + Ord + Clone + 'static;

    /// Converts the given ASCII character into an alphabet character.
    /// If the ASCII character is not mapped to an alphabet character, then `None` is returned.
    fn ascii_to_character(ascii: u8) -> Result<Self::CharacterType, AlphabetError> {
        ascii
            .try_into()
            .map_err(|_| AlphabetError::AsciiNotPartOfAlphabet { ascii })
    }

    /// Converts this alphabet character into an ASCII character.
    fn character_to_ascii(character: Self::CharacterType) -> u8 {
        character.into()
    }
}

/// An error when dealing with alphabets.
#[derive(Debug, Clone, Eq, PartialEq, Error)]
pub enum AlphabetError {
    #[error("found an ASCII character that is not part of the alphabet: {ascii}")]
    /// An ascii character was attempted to convert to an alphabet character, but it is not part of the alphabet.
    AsciiNotPartOfAlphabet {
        /// The offending ascii character.
        ascii: u8,
    },

    #[error("found an index that is not part of the alphabet: {index}")]
    /// An index was attempted to convert to an alphabet character, but it is not part of the alphabet.
    IndexNotPartOfAlphabet {
        /// The offending index.
        index: usize,
    },
}