1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//! # Genetic Code Translation
//!
//! This module provides traits
//!
//! ## Examples
//!
//! ## Errors
//!
use core::fmt::Display;
use std::collections::HashMap;

use crate::codec::Codec;
use crate::error::TranslationError;
use crate::prelude::{Seq, SeqSlice};

mod standard;

pub use crate::translation::standard::STANDARD;

/// A codon translation table where all codons map to amino acids
pub trait TranslationTable<A: Codec, B: Codec> {
    fn to_amino(&self, codon: &SeqSlice<A>) -> B;
    fn to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError>;
}

/// A partial translation table where not all triples of characters map to amino acids
pub trait PartialTranslationTable<A: Codec, B: Codec> {
    fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError>;
    fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError>;
}

/// A customisable translation table
pub struct CodonTable<A: Codec, B: Codec> {
    // I'm open to using a better bidirectional mapping datastructure
    table: HashMap<Seq<A>, B>,
    inverse_table: HashMap<B, Option<Seq<A>>>,
}

impl<A: Codec, B: Codec + Display> CodonTable<A, B> {
    pub fn from_map<T>(table: T) -> Self
    where
        T: Into<HashMap<Seq<A>, B>>,
    {
        let table: HashMap<Seq<A>, B> = table.into();
        let mut inverse_table = HashMap::new();
        for (codon, amino) in &table {
            if inverse_table.contains_key(amino) {
                inverse_table.insert(*amino, None);
            } else {
                inverse_table.insert(*amino, Some(codon.clone()));
            }
        }
        CodonTable {
            table,
            inverse_table,
        }
    }
}

impl<A: Codec, B: Codec + Display> PartialTranslationTable<A, B> for CodonTable<A, B> {
    fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError> {
        match self.table.get(&Seq::from(codon)) {
            Some(amino) => Ok(*amino),
            None => Err(TranslationError::InvalidCodon),
        }
    }

    fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError> {
        match self.inverse_table.get(&amino) {
            Some(Some(codon)) => Ok(codon.clone()),
            Some(None) => Err(TranslationError::AmbiguousCodon),
            None => Err(TranslationError::InvalidCodon),
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::prelude::*;
    use crate::translation::{
        CodonTable, PartialTranslationTable, TranslationError, TranslationTable,
    };

    #[test]
    fn custom_codon_table() {
        let mito: [(Seq<Dna>, Amino); 6] = [
            (dna!("AAA"), Amino::A),
            (dna!("ATG"), Amino::A),
            (dna!("CCC"), Amino::C),
            (dna!("GGG"), Amino::E),
            (dna!("TTT"), Amino::D),
            (dna!("TTA"), Amino::F),
        ];

        let table = CodonTable::from_map(mito);

        let seq: Seq<Dna> = dna!("AAACCCGGGTTTTTATTAATG");
        let mut amino_seq: Seq<Amino> = Seq::new();
        for codon in seq.chunks(3) {
            amino_seq.push(table.try_to_amino(codon).unwrap());
        }
        assert_eq!(amino_seq, amino!("ACEDFFA"));

        assert_ne!(table.try_to_codon(Amino::E), Ok(dna!("CCC")));
        assert_eq!(table.try_to_codon(Amino::C), Ok(dna!("CCC")));
        assert_eq!(
            table.try_to_codon(Amino::A),
            Err(TranslationError::AmbiguousCodon)
        );
        assert_eq!(
            table.try_to_codon(Amino::X),
            Err(TranslationError::InvalidCodon)
        );
    }

    #[test]
    fn mitochondrial_coding_table() {
        struct Mitochondria;

        impl TranslationTable<Dna, Amino> for Mitochondria {
            fn to_amino(&self, codon: &SeqSlice<Dna>) -> Amino {
                if *codon == dna!("AGA") {
                    Amino::X
                } else if *codon == dna!("AGG") {
                    Amino::X
                } else if *codon == dna!("ATA") {
                    Amino::M
                } else if *codon == dna!("TGA") {
                    Amino::W
                } else {
                    Amino::unsafe_from_bits(Into::<u8>::into(codon))
                }
            }

            fn to_codon(&self, _amino: Amino) -> Result<Seq<Dna>, TranslationError> {
                unimplemented!()
            }
        }

        let seq: Seq<Dna> =
            dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA");
        let aminos: Seq<Amino> = seq
            .windows(3)
            .map(|codon| Mitochondria.to_amino(&codon))
            .collect::<Seq<Amino>>();
        assert_eq!(seq.len() - 2, aminos.len());

        for (x, y) in aminos.into_iter().zip(
            amino!("NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYMWE*GDTRDISQSAHTPHM*K*ENTQK").into_iter(),
        ) {
            assert_eq!(x, y)
        }
    }
}