1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
//! # Amino acid translation tables
//!
//! This module provides traits for implementing amino acid translation tables.
//!
//! Enable the translation feature in `Cargo.toml`:
//!
//! ```toml
//! [dependencies]
//! bio-seq = { version="0.13", features=["translation"] }
//! ```
//!
//! ## Examples
//!
//! The standard genetic code is provided as a `translation::STANDARD` constant:
//!
//! ```rust
//! use bio_seq::prelude::*;
//! use bio_seq::translation::STANDARD;
//! use bio_seq::translation::TranslationTable;
//!
//! let seq = dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA");
//!
//! let aminos: Seq<Amino> = seq
//!     .windows(3)
//!     .map(|codon| STANDARD.to_amino(&codon))
//!     .collect::<Seq<Amino>>();
//!
//! assert_eq!(
//!     aminos,
//!     Seq::<Amino>::try_from("NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYM*ERGDTRDISQSAHTPHI*KRENTQK").unwrap()
//! );
//!
//! ```
//!
//! Custom translation tables can be implemented from associative datastructures:
//!
//! ```
//! use bio_seq::prelude::*;
//! use bio_seq::translation::{TranslationTable, TranslationError};
//!
//! struct Mitochondria;
//! impl TranslationTable<Dna, Amino> for Mitochondria {
//!     fn to_amino(&self, codon: &SeqSlice<Dna>) -> Amino {
//!         if codon == dna!("AGA") {
//!             Amino::X
//!         } else if codon == dna!("AGG") {
//!             Amino::X
//!         } else if codon == dna!("ATA") {
//!             Amino::M
//!        } else if codon == dna!("TGA") {
//!             Amino::W
//!         } else {
//!                 Amino::unsafe_from_bits(Into::<u8>::into(codon))
//!               }
//!           }
//!
//!          fn to_codon(&self, _amino: Amino) -> Result<Seq<Dna>, TranslationError> {
//!               unimplemented!()
//!           }
//!       }
//!
//!        let seq: Seq<Dna> =
//!            dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA").into();
//!        let aminos: Seq<Amino> = seq
//!            .windows(3)
//!            .map(|codon| Mitochondria.to_amino(&codon))
//!            .collect::<Seq<Amino>>();
//!        assert_eq!(seq.len() - 2, aminos.len());
//!
//!        for (x, y) in aminos.into_iter().zip(
//!            Seq::<Amino>::try_from(
//!                "NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYMWE*GDTRDISQSAHTPHM*K*ENTQK",
//!            )
//!            .unwrap()
//!            .into_iter(),
//!        ) {
//!            assert_eq!(x, y)
//!        }
//! ```
//!
//! ## Errors
//!
//! Translation tables may not be complete or they may be ambiguous
//!
use core::fmt;
use std::collections::HashMap;

use crate::codec::Codec;
use crate::prelude::{Amino, Dna, Seq, SeqSlice};

mod standard;

pub use crate::translation::standard::STANDARD;

/// Error conditions for codon/amino acid translation
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum TranslationError<A: Codec = Dna, B: Codec + fmt::Display + fmt::Debug = Amino> {
    /// Amino acid can be translation from multiple codons
    AmbiguousCodon(B),
    /// Codon sequence maps to multiple amino acids
    AmbiguousTranslation(Seq<A>),
    /// Codon sequence does not map to an amino acid
    InvalidCodon(Seq<A>),
    /// Amino acid symbol is not valid (i.e. `X`)
    InvalidAmino(B),
}

impl<A: Codec, B: Codec + fmt::Display> fmt::Display for TranslationError<A, B> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            TranslationError::AmbiguousCodon(amino) => {
                write!(f, "Multiple codon sequences: {amino}")
            }
            TranslationError::AmbiguousTranslation(codon) => {
                write!(f, "Ambiguous translations for codon: {codon}")
            }
            TranslationError::InvalidCodon(codon) => write!(f, "Invalid codon sequence: {codon}"),
            TranslationError::InvalidAmino(amino) => {
                write!(f, "Invalid amino acid character: {amino:?}")
            }
        }
    }
}

// #![feature(error_in_core)
impl<A: Codec, B: Codec + fmt::Display + fmt::Debug> std::error::Error for TranslationError<A, B> {}

/// A codon translation table where all codons map to amino acids
pub trait TranslationTable<A: Codec, B: Codec + fmt::Display> {
    fn to_amino(&self, codon: &SeqSlice<A>) -> B;

    /// # Errors
    ///
    /// Will return `Err` when an amino acid has multiple codons (most cases)
    fn to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError<A, B>>;
}

/// A partial translation table where not all triples of characters map to amino acids
pub trait PartialTranslationTable<A: Codec, B: Codec + fmt::Display> {
    /// # Errors
    ///
    /// Will return an `Err` if a codon does not map to an amino acid. This would be
    /// the case for a translation table from codons with ambiguous nucleotide codes such as `ANC`, `SWS`, `NNN`, etc.
    fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError<A, B>>;
    /// # Errors
    ///
    /// Will return an `Err` if the amino acid can be translated from different codons
    fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError<A, B>>;
}

/// A customisable translation table
pub struct CodonTable<A: Codec, B: Codec> {
    // I'm open to using a better bidirectional mapping datastructure
    table: HashMap<Seq<A>, B>,
    inverse_table: HashMap<B, Option<Seq<A>>>,
}

impl<A: Codec, B: Codec + fmt::Display> CodonTable<A, B> {
    pub fn from_map<T>(table: T) -> Self
    where
        T: Into<HashMap<Seq<A>, B>>,
    {
        let table: HashMap<Seq<A>, B> = table.into();
        let mut inverse_table = HashMap::new();
        for (codon, amino) in &table {
            if inverse_table.contains_key(amino) {
                inverse_table.insert(*amino, None);
            } else {
                inverse_table.insert(*amino, Some(codon.clone()));
            }
        }
        CodonTable {
            table,
            inverse_table,
        }
    }
}

impl<A: Codec, B: Codec + fmt::Display> PartialTranslationTable<A, B> for CodonTable<A, B> {
    fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError<A, B>> {
        match self.table.get(codon) {
            Some(amino) => Ok(*amino),
            None => Err(TranslationError::InvalidCodon(codon.into())),
        }
    }

    fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError<A, B>> {
        if let Some(codon) = self.inverse_table.get(&amino) {
            match codon {
                Some(codon) => Ok(codon.clone()),
                None => Err(TranslationError::AmbiguousCodon(amino)),
            }
        } else {
            Err(TranslationError::InvalidAmino(amino))
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::prelude::*;
    use crate::translation::{
        CodonTable, PartialTranslationTable, TranslationError, TranslationTable,
    };

    #[test]
    fn custom_codon_table() {
        let mito: [(Seq<Dna>, Amino); 6] = [
            (dna!("AAA").into(), Amino::A),
            (dna!("ATG").into(), Amino::A),
            (dna!("CCC").into(), Amino::C),
            (dna!("GGG").into(), Amino::E),
            (dna!("TTT").into(), Amino::D),
            (dna!("TTA").into(), Amino::F),
        ];

        let table = CodonTable::from_map(mito);

        let seq: Seq<Dna> = dna!("AAACCCGGGTTTTTATTAATG").into();
        let mut amino_seq: Seq<Amino> = Seq::new();
        for codon in seq.chunks(3) {
            amino_seq.push(table.try_to_amino(codon).unwrap());
        }
        assert_eq!(amino_seq, Seq::<Amino>::try_from("ACEDFFA").unwrap());

        assert_ne!(table.try_to_codon(Amino::E), Ok(dna!("CCC").into()));
        assert_eq!(table.try_to_codon(Amino::C), Ok(dna!("CCC").into()));
        assert_eq!(
            table.try_to_codon(Amino::A),
            Err(TranslationError::AmbiguousCodon(Amino::A))
        );
        assert_eq!(
            table.try_to_codon(Amino::X),
            Err(TranslationError::InvalidAmino(Amino::X))
        );
    }

    #[test]
    fn mitochondrial_coding_table() {
        struct Mitochondria;

        impl TranslationTable<Dna, Amino> for Mitochondria {
            fn to_amino(&self, codon: &SeqSlice<Dna>) -> Amino {
                if codon == dna!("AGA") {
                    Amino::X
                } else if codon == dna!("AGG") {
                    Amino::X
                } else if codon == dna!("ATA") {
                    Amino::M
                } else if codon == dna!("TGA") {
                    Amino::W
                } else {
                    Amino::unsafe_from_bits(Into::<u8>::into(codon))
                }
            }

            fn to_codon(&self, _amino: Amino) -> Result<Seq<Dna>, TranslationError> {
                unimplemented!()
            }
        }

        let seq: Seq<Dna> =
            dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA").into();
        let aminos: Seq<Amino> = seq
            .windows(3)
            .map(|codon| Mitochondria.to_amino(&codon))
            .collect::<Seq<Amino>>();
        assert_eq!(seq.len() - 2, aminos.len());

        for (x, y) in aminos.into_iter().zip(
            Seq::<Amino>::try_from(
                "NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYMWE*GDTRDISQSAHTPHM*K*ENTQK",
            )
            .unwrap()
            .into_iter(),
        ) {
            assert_eq!(x, y)
        }
    }
}