1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
//! Alphabets for genome sequences.
use std::convert::{TryFrom, TryInto};
use thiserror::Error;
pub mod dna_alphabet;
pub mod dna_alphabet_or_n;
/// A character in an alphabet.
pub trait AlphabetCharacter: Into<u8> + TryFrom<u8> {
/// The amount of characters in the alphabet.
const ALPHABET_SIZE: usize;
/// The index of this character in the alphabet.
fn index(&self) -> usize;
/// Constructs the character from the given index, returning `None` if it is invalid.
fn from_index(index: usize) -> Result<Self, AlphabetError>;
/// Constructs the character from the given index, returning `None` if it is invalid.
/// This method returns a static reference to the character type, so it can only be implemented via lookup in a static table.
/// It is required to create an implementation of [std::ops::Index] for genome sequence types that do not store the characters in plain format.
fn from_index_ref(index: usize) -> Result<&'static Self, AlphabetError>;
/// Constructs the complement of this character.
fn complement(&self) -> Self;
}
/// An alphabet as a subset of the ASCII alphabet.
pub trait Alphabet: Sized {
/// The amount of characters in the alphabet.
const SIZE: usize = Self::CharacterType::ALPHABET_SIZE;
/// The internal character type used by the alphabet.
type CharacterType: AlphabetCharacter + Eq + Ord + Clone + 'static;
/// Converts the given ASCII character into an alphabet character.
/// If the ASCII character is not mapped to an alphabet character, then `None` is returned.
fn ascii_to_character(ascii: u8) -> Result<Self::CharacterType, AlphabetError> {
ascii
.try_into()
.map_err(|_| AlphabetError::AsciiNotPartOfAlphabet { ascii })
}
/// Converts this alphabet character into an ASCII character.
fn character_to_ascii(character: Self::CharacterType) -> u8 {
character.into()
}
}
/// An error when dealing with alphabets.
#[derive(Debug, Clone, Eq, PartialEq, Error)]
pub enum AlphabetError {
#[error("found an ASCII character that is not part of the alphabet: {ascii}")]
/// An ascii character was attempted to convert to an alphabet character, but it is not part of the alphabet.
AsciiNotPartOfAlphabet {
/// The offending ascii character.
ascii: u8,
},
#[error("found an index that is not part of the alphabet: {index}")]
/// An index was attempted to convert to an alphabet character, but it is not part of the alphabet.
IndexNotPartOfAlphabet {
/// The offending index.
index: usize,
},
}