bio_seq/
translation.rs

1//! # Amino acid translation tables
2//!
3//! This module provides traits for implementing amino acid translation tables.
4//!
5//! Enable the translation feature in `Cargo.toml`:
6//!
7//! ```toml
8//! [dependencies]
9//! bio-seq = { version="0.13", features=["translation"] }
10//! ```
11//!
12//! ## Examples
13//!
14//! The standard genetic code is provided as a `translation::STANDARD` constant:
15//!
16//! ```rust
17//! use bio_seq::prelude::*;
18//! use bio_seq::translation::STANDARD;
19//! use bio_seq::translation::TranslationTable;
20//!
21//! let seq = dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA");
22//!
23//! let aminos: Seq<Amino> = seq
24//!     .windows(3)
25//!     .map(|codon| STANDARD.to_amino(&codon))
26//!     .collect::<Seq<Amino>>();
27//!
28//! assert_eq!(
29//!     aminos,
30//!     Seq::<Amino>::try_from("NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYM*ERGDTRDISQSAHTPHI*KRENTQK").unwrap()
31//! );
32//!
33//! ```
34//!
35//! Custom translation tables can be implemented from associative datastructures:
36//!
37//! ```
38//! use bio_seq::prelude::*;
39//! use bio_seq::translation::{TranslationTable, TranslationError};
40//!
41//! struct Mitochondria;
42//! impl TranslationTable<Dna, Amino> for Mitochondria {
43//!     fn to_amino(&self, codon: &SeqSlice<Dna>) -> Amino {
44//!         if codon == dna!("AGA") {
45//!             Amino::X
46//!         } else if codon == dna!("AGG") {
47//!             Amino::X
48//!         } else if codon == dna!("ATA") {
49//!             Amino::M
50//!        } else if codon == dna!("TGA") {
51//!             Amino::W
52//!         } else {
53//!                 Amino::unsafe_from_bits(Into::<u8>::into(codon))
54//!               }
55//!           }
56//!
57//!          fn to_codon(&self, _amino: Amino) -> Result<Seq<Dna>, TranslationError> {
58//!               unimplemented!()
59//!           }
60//!       }
61//!
62//!        let seq: Seq<Dna> =
63//!            dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA").into();
64//!        let aminos: Seq<Amino> = seq
65//!            .windows(3)
66//!            .map(|codon| Mitochondria.to_amino(&codon))
67//!            .collect::<Seq<Amino>>();
68//!        assert_eq!(seq.len() - 2, aminos.len());
69//!
70//!        for (x, y) in aminos.into_iter().zip(
71//!            Seq::<Amino>::try_from(
72//!                "NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYMWE*GDTRDISQSAHTPHM*K*ENTQK",
73//!            )
74//!            .unwrap()
75//!            .into_iter(),
76//!        ) {
77//!            assert_eq!(x, y)
78//!        }
79//! ```
80//!
81//! ## Errors
82//!
83//! Translation tables may not be complete or they may be ambiguous
84//!
85use core::cmp::Eq;
86use core::fmt;
87use std::collections::HashMap;
88
89use crate::codec::Codec;
90use crate::prelude::{Amino, Dna, Seq, SeqSlice};
91
92mod standard;
93
94pub use crate::translation::standard::STANDARD;
95
96/// Error conditions for codon/amino acid translation
97#[derive(Debug, PartialEq, Eq, Clone)]
98pub enum TranslationError<A: Codec = Dna, B: Codec = Amino> {
99    /// Amino acid can be translation from multiple codons
100    AmbiguousCodon(B),
101    /// Codon sequence maps to multiple amino acids
102    AmbiguousTranslation(Seq<A>),
103    /// Codon sequence does not map to an amino acid
104    InvalidCodon(Seq<A>),
105    /// Amino acid symbol is not valid (i.e. `X`)
106    InvalidAmino(B),
107}
108
109impl<A: Codec, B: Codec> fmt::Display for TranslationError<A, B> {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        match self {
112            TranslationError::AmbiguousCodon(amino) => {
113                let amino = amino.to_char();
114                write!(f, "Multiple codon sequences: {amino}")
115            }
116            TranslationError::AmbiguousTranslation(codon) => {
117                write!(f, "Ambiguous translations for codon: {codon}")
118            }
119            TranslationError::InvalidCodon(codon) => write!(f, "Invalid codon sequence: {codon}"),
120            TranslationError::InvalidAmino(amino) => {
121                let amino = amino.to_char();
122                write!(f, "Invalid amino acid character: {amino}")
123            }
124        }
125    }
126}
127
128// #![feature(error_in_core)
129impl<A: Codec, B: Codec> std::error::Error for TranslationError<A, B> {}
130
131/// A codon translation table where all codons map to amino acids
132pub trait TranslationTable<A: Codec, B: Codec> {
133    fn to_amino(&self, codon: &SeqSlice<A>) -> B;
134
135    /// # Errors
136    ///
137    /// Will return `Err` when an amino acid has multiple codons (most cases)
138    fn to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError<A, B>>;
139}
140
141/// A partial translation table where not all triples of characters map to amino acids
142pub trait PartialTranslationTable<A: Codec, B: Codec> {
143    /// # Errors
144    ///
145    /// Will return an `Err` if a codon does not map to an amino acid. This would be
146    /// the case for a translation table from codons with ambiguous nucleotide codes such as `ANC`, `SWS`, `NNN`, etc.
147    fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError<A, B>>;
148    /// # Errors
149    ///
150    /// Will return an `Err` if the amino acid can be translated from different codons
151    fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError<A, B>>;
152}
153
154/// A customisable translation table
155pub struct CodonTable<A: Codec, B: Codec> {
156    // I'm open to using a better bidirectional mapping datastructure
157    table: HashMap<Seq<A>, B>,
158    inverse_table: HashMap<B, Option<Seq<A>>>,
159}
160
161impl<A: Codec, B: Codec> CodonTable<A, B> {
162    pub fn from_map<T>(table: T) -> Self
163    where
164        T: Into<HashMap<Seq<A>, B>>,
165    {
166        let table: HashMap<Seq<A>, B> = table.into();
167        let mut inverse_table = HashMap::new();
168        for (codon, amino) in &table {
169            if inverse_table.contains_key(amino) {
170                inverse_table.insert(*amino, None);
171            } else {
172                inverse_table.insert(*amino, Some(codon.clone()));
173            }
174        }
175        CodonTable {
176            table,
177            inverse_table,
178        }
179    }
180}
181
182impl<A: Codec, B: Codec> PartialTranslationTable<A, B> for CodonTable<A, B> {
183    fn try_to_amino(&self, codon: &SeqSlice<A>) -> Result<B, TranslationError<A, B>> {
184        self.table
185            .get(codon)
186            .ok_or_else(|| TranslationError::InvalidCodon(codon.into()))
187            .copied()
188    }
189
190    fn try_to_codon(&self, amino: B) -> Result<Seq<A>, TranslationError<A, B>> {
191        if let Some(codon) = self.inverse_table.get(&amino) {
192            match codon {
193                Some(codon) => Ok(codon.clone()),
194                None => Err(TranslationError::AmbiguousCodon(amino)),
195            }
196        } else {
197            Err(TranslationError::InvalidAmino(amino))
198        }
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use crate::prelude::*;
205    use crate::translation::{
206        CodonTable, PartialTranslationTable, TranslationError, TranslationTable,
207    };
208
209    #[test]
210    fn custom_codon_table() {
211        let mito: [(Seq<Dna>, Amino); 6] = [
212            (dna!("AAA").into(), Amino::A),
213            (dna!("ATG").into(), Amino::A),
214            (dna!("CCC").into(), Amino::C),
215            (dna!("GGG").into(), Amino::E),
216            (dna!("TTT").into(), Amino::D),
217            (dna!("TTA").into(), Amino::F),
218        ];
219
220        let table = CodonTable::from_map(mito);
221
222        let seq: Seq<Dna> = dna!("AAACCCGGGTTTTTATTAATG").into();
223        let mut amino_seq: Seq<Amino> = Seq::new();
224        for codon in seq.chunks(3) {
225            amino_seq.push(table.try_to_amino(codon).unwrap());
226        }
227        assert_eq!(amino_seq, Seq::<Amino>::try_from("ACEDFFA").unwrap());
228
229        assert_ne!(table.try_to_codon(Amino::E), Ok(dna!("CCC").into()));
230        assert_eq!(table.try_to_codon(Amino::C), Ok(dna!("CCC").into()));
231        assert_eq!(
232            table.try_to_codon(Amino::A),
233            Err(TranslationError::AmbiguousCodon(Amino::A))
234        );
235        assert_eq!(
236            table.try_to_codon(Amino::X),
237            Err(TranslationError::InvalidAmino(Amino::X))
238        );
239    }
240
241    #[test]
242    fn mitochondrial_coding_table() {
243        struct Mitochondria;
244
245        impl TranslationTable<Dna, Amino> for Mitochondria {
246            fn to_amino(&self, codon: &SeqSlice<Dna>) -> Amino {
247                if codon == dna!("AGA") {
248                    Amino::X
249                } else if codon == dna!("AGG") {
250                    Amino::X
251                } else if codon == dna!("ATA") {
252                    Amino::M
253                } else if codon == dna!("TGA") {
254                    Amino::W
255                } else {
256                    Amino::unsafe_from_bits(Into::<u8>::into(codon))
257                }
258            }
259
260            fn to_codon(&self, _amino: Amino) -> Result<Seq<Dna>, TranslationError> {
261                unimplemented!()
262            }
263        }
264
265        let seq: Seq<Dna> =
266            dna!("AATTTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATGAGGACGATCAGCACCATAAGAACAAA").into();
267        let aminos: Seq<Amino> = seq
268            .windows(3)
269            .map(|codon| Mitochondria.to_amino(&codon))
270            .collect::<Seq<Amino>>();
271        assert_eq!(seq.len() - 2, aminos.len());
272
273        for (x, y) in aminos.into_iter().zip(
274            Seq::<Amino>::try_from(
275                "NIFLCVWGGVFSRVSLCARGALSPRAPPLL*SVYTLYMWE*GDTRDISQSAHTPHM*K*ENTQK",
276            )
277            .unwrap()
278            .into_iter(),
279        ) {
280            assert_eq!(x, y)
281        }
282    }
283}