use crate::{AmbiAmino, AmbiNuc, Amino, DnaSlice, Nuc, Nucleotide};
pub trait GeneticCode {
fn translate<N: Nucleotide>(&self, codon: [N; 3]) -> N::Amino {
N::translate(self, codon)
}
fn translate_rc<N: Nucleotide>(&self, codon: [N; 3]) -> N::Amino {
N::translate_rc(self, codon)
}
fn translate_concrete_codon(&self, codon: [Nuc; 3]) -> Amino;
fn translate_ambiguous_codon(&self, codon: [AmbiNuc; 3]) -> AmbiAmino {
let [ambi_n1, ambi_n2, ambi_n3] = codon;
ambi_n1
.iter()
.flat_map(|n1| ambi_n2.iter().map(move |n2| [n1, n2]))
.flat_map(|[n1, n2]| ambi_n3.iter().map(move |n3| [n1, n2, n3]))
.map(|codon| AmbiAmino::from(self.translate_concrete_codon(codon)))
.reduce(|a, b| a | b)
.expect("BUG: null nucleotide encountered")
}
fn translate_rc_concrete_codon(&self, mut codon: [Nuc; 3]) -> Amino {
codon.reverse();
codon.complement();
self.translate_concrete_codon(codon)
}
fn translate_rc_ambiguous_codon(&self, mut codon: [AmbiNuc; 3]) -> AmbiAmino {
codon.reverse();
codon.complement();
self.translate_ambiguous_codon(codon)
}
}
impl<F: Fn([Nuc; 3]) -> Amino> GeneticCode for F {
fn translate_concrete_codon(&self, codon: [Nuc; 3]) -> Amino {
self(codon)
}
}
impl GeneticCode for &[Amino; 64] {
fn translate_concrete_codon(&self, codon: [Nuc; 3]) -> Amino {
let [n1, n2, n3] = codon;
let idx = 16 * (n1 as u8).trailing_zeros()
+ 4 * (n2 as u8).trailing_zeros()
+ (n3 as u8).trailing_zeros();
self[idx as usize]
}
}
#[derive(Clone)]
pub struct FullLookup {
lookup: ConcreteLookup,
lookup_rc: ConcreteLookup,
ambi_lookup: AmbiLookup,
ambi_lookup_rc: AmbiLookup,
}
impl FullLookup {
#[must_use]
pub fn from_genetic_code<G: GeneticCode>(genetic_code: &G) -> Self {
Self::from_table(&std::array::from_fn(|i| {
let codon = [4, 2, 0].map(|offset| Nuc::ALL[(i >> offset) & 0b11]);
genetic_code.translate(codon)
}))
}
#[must_use]
pub const fn from_table(table: &[Amino; 64]) -> Self {
let lookup = ConcreteLookup::from_table(table);
let lookup_rc = lookup.reverse_complement();
let ambi_lookup = lookup.to_ambi_lookup();
let ambi_lookup_rc = ambi_lookup.reverse_complement();
Self {
lookup,
lookup_rc,
ambi_lookup,
ambi_lookup_rc,
}
}
#[must_use]
pub const fn to_table(&self) -> [Amino; 64] {
self.lookup.to_table()
}
}
impl GeneticCode for &FullLookup {
#[inline(always)]
fn translate_concrete_codon(&self, codon: [Nuc; 3]) -> Amino {
(&self.lookup).translate_concrete_codon(codon)
}
#[inline(always)]
fn translate_ambiguous_codon(&self, codon: [AmbiNuc; 3]) -> AmbiAmino {
(&self.ambi_lookup).translate_ambiguous_codon(codon)
}
#[inline(always)]
fn translate_rc_concrete_codon(&self, codon: [Nuc; 3]) -> Amino {
(&self.lookup_rc).translate_concrete_codon(codon)
}
#[inline(always)]
fn translate_rc_ambiguous_codon(&self, codon: [AmbiNuc; 3]) -> AmbiAmino {
(&self.ambi_lookup_rc).translate_ambiguous_codon(codon)
}
}
impl std::fmt::Debug for FullLookup {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.lookup.fmt(f)
}
}
#[derive(Clone)]
pub struct ConcreteLookup([Amino; 1 + 0x888]);
impl ConcreteLookup {
#[must_use]
pub const fn from_table(table: &[Amino; 64]) -> Self {
let mut lookup = [Amino::A; _];
let mut i = 0;
while i < table.len() {
lookup[Self::table_index_to_fast_lookup_index(i)] = table[i];
i += 1;
}
Self(lookup)
}
#[must_use]
pub const fn to_table(&self) -> [Amino; 64] {
let mut table = [Amino::A; _];
let mut i = 0;
while i < table.len() {
table[i] = self.0[Self::table_index_to_fast_lookup_index(i)];
i += 1;
}
table
}
const fn table_index_to_fast_lookup_index(idx: usize) -> usize {
let n1 = 1 << (0b11 & (idx >> 4));
let n2 = 1 << (0b11 & (idx >> 2));
let n3 = 1 << (0b11 & idx);
(n1 << 8) | (n2 << 4) | n3
}
#[must_use]
pub const fn reverse_complement(&self) -> Self {
let mut lookup = [Amino::A; _];
let mut n1 = 0x100;
while n1 <= 0x800 {
let mut n2 = 0x010;
while n2 <= 0x080 {
let mut n3 = 0x001;
while n3 <= 0x008 {
let n1_n2_n3 = n1 | n2 | n3;
lookup[n1_n2_n3] = self.0[reverse_complement_index(n1_n2_n3)];
n3 <<= 1;
}
n2 <<= 1;
}
n1 <<= 1;
}
Self(lookup)
}
#[must_use]
pub const fn to_ambi_lookup(&self) -> AmbiLookup {
let mut lookup = [AmbiAmino::X; _];
let mut n1 = 0x100;
while n1 <= 0xf00 {
let (n1_a, n1_b) = Self::split_lowest_bit(n1);
if n1_b == 0 {
let mut n2 = 0x010;
while n2 <= 0x0f0 {
let n1_n2 = n1 | n2;
let (n2_a, n2_b) = Self::split_lowest_bit(n2);
if n2_b == 0 {
let mut n3 = 0x001;
while n3 <= 0x00f {
let n1_n2_n3 = n1_n2 | n3;
let (n3_a, n3_b) = Self::split_lowest_bit(n3);
if n3_b == 0 {
lookup[n1_n2_n3] = AmbiAmino::from_amino(self.0[n1_n2_n3]);
} else {
lookup[n1_n2_n3] = lookup[n1_n2 | n3_a].or(lookup[n1_n2 | n3_b]);
}
n3 += 0x001;
}
} else {
let n1_n2_a = n1 | n2_a;
let n1_n2_b = n1 | n2_b;
let mut n3 = 0x001;
while n3 <= 0x00f {
lookup[n1_n2 | n3] = lookup[n1_n2_a | n3].or(lookup[n1_n2_b | n3]);
n3 += 1;
}
}
n2 += 0x010;
}
} else {
let mut n2_n3 = 0x011;
while n2_n3 <= 0x0ff {
lookup[n1 | n2_n3] = lookup[n1_a | n2_n3].or(lookup[n1_b | n2_n3]);
n2_n3 += 1;
}
}
n1 += 0x100;
}
AmbiLookup(lookup)
}
const fn split_lowest_bit(i: usize) -> (usize, usize) {
let lowest = 1 << i.trailing_zeros();
(lowest, i & !lowest)
}
}
impl GeneticCode for &ConcreteLookup {
#[inline(always)]
fn translate_concrete_codon(&self, codon: [Nuc; 3]) -> Amino {
let [n1, n2, n3] = codon;
self.0[((n1 as usize) << 8) | ((n2 as usize) << 4) | (n3 as usize)]
}
}
impl std::fmt::Debug for ConcreteLookup {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt_genetic_code(&self, f)
}
}
#[derive(Clone)]
pub struct AmbiLookup([AmbiAmino; 1 + 0xfff]);
impl AmbiLookup {
#[must_use]
pub const fn reverse_complement(&self) -> Self {
let mut ambi_lookup = [AmbiAmino::X; _];
let mut n1 = 0x100;
while n1 <= 0xf00 {
let mut n2 = 0x010;
while n2 <= 0x0f0 {
let mut n3 = 0x001;
while n3 <= 0x00f {
let n1_n2_n3 = n1 | n2 | n3;
ambi_lookup[n1_n2_n3] = self.0[reverse_complement_index(n1_n2_n3)];
n3 += 0x001;
}
n2 += 0x010;
}
n1 += 0x100;
}
Self(ambi_lookup)
}
}
impl GeneticCode for &AmbiLookup {
#[inline(always)]
fn translate_concrete_codon(&self, codon: [Nuc; 3]) -> Amino {
let [n1, n2, n3] = codon;
let amino = self.translate_ambiguous_codon([n1.into(), n2.into(), n3.into()]);
amino
.try_into()
.expect("BUG: to_ambi_lookup mapped concrete codon to ambi amino")
}
#[inline(always)]
fn translate_ambiguous_codon(&self, codon: [AmbiNuc; 3]) -> AmbiAmino {
let [n1, n2, n3] = codon;
self.0[((n1 as usize) << 8) | ((n2 as usize) << 4) | (n3 as usize)]
}
}
impl std::fmt::Debug for AmbiLookup {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt_genetic_code(&self, f)
}
}
#[allow(
clippy::cast_possible_truncation,
reason = "valid lookup indexes are 12 bits"
)]
const fn reverse_complement_index(idx: usize) -> usize {
((idx as u16).reverse_bits() >> 4) as usize
}
fn fmt_genetic_code(
genetic_code: &impl GeneticCode,
f: &mut std::fmt::Formatter<'_>,
) -> std::fmt::Result {
let mut map = f.debug_map();
for n1 in Nuc::ALL {
for n2 in Nuc::ALL {
for n3 in Nuc::ALL {
let codon = [n1, n2, n3];
map.entry(&codon.display(), &genetic_code.translate([n1, n2, n3]));
}
}
}
map.finish()
}
pub const NCBI1: &FullLookup = &ncbi(0);
pub const NCBI2: &FullLookup = &ncbi(1);
pub const NCBI3: &FullLookup = &ncbi(2);
pub const NCBI4: &FullLookup = &ncbi(3);
pub const NCBI5: &FullLookup = &ncbi(4);
pub const NCBI6: &FullLookup = &ncbi(5);
pub const NCBI9: &FullLookup = &ncbi(6);
pub const NCBI10: &FullLookup = &ncbi(7);
pub const NCBI11: &FullLookup = &ncbi(8);
pub const NCBI12: &FullLookup = &ncbi(9);
pub const NCBI13: &FullLookup = &ncbi(10);
pub const NCBI14: &FullLookup = &ncbi(11);
pub const NCBI15: &FullLookup = &ncbi(12);
pub const NCBI16: &FullLookup = &ncbi(13);
pub const NCBI21: &FullLookup = &ncbi(14);
pub const NCBI22: &FullLookup = &ncbi(15);
pub const NCBI23: &FullLookup = &ncbi(16);
pub const NCBI24: &FullLookup = &ncbi(17);
pub const NCBI25: &FullLookup = &ncbi(18);
pub const NCBI26: &FullLookup = &ncbi(19);
pub const NCBI27: &FullLookup = &ncbi(20);
pub const NCBI28: &FullLookup = &ncbi(21);
pub const NCBI29: &FullLookup = &ncbi(22);
pub const NCBI30: &FullLookup = &ncbi(23);
pub const NCBI31: &FullLookup = &ncbi(24);
pub const NCBI32: &FullLookup = &ncbi(25);
pub const NCBI33: &FullLookup = &ncbi(26);
pub const NCBI34: &FullLookup = &ncbi(27);
pub const NCBI35: &FullLookup = &ncbi(28);
pub const NCBI36: &FullLookup = &ncbi(29);
pub const NCBI37: &FullLookup = &ncbi(30);
const fn ncbi(i: usize) -> FullLookup {
FullLookup::from_table(&NCBI_DATA[i])
}
macro_rules! aminos {
[ $( [$( $c:tt )+ ]),+ $(,)? ] => {{
[$(
[$(to_amino!($c)),+]
),+]
}}
}
macro_rules! to_amino {
(*) => {{ Amino::Stop }};
($v:tt) => {{ Amino::$v }};
}
const NCBI_DATA: [[Amino; 64]; 31] = transpose(aminos! [
[K K K K K K N K K K K N K K N K K K K K K K K K K K K K K K K],
[N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N],
[K K K K K K K K K K K K K K K K K K K K K K K K K K K K K K K],
[N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N],
[T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T],
[T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T],
[T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T],
[T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T T],
[R * R R S R S R R R G S R R S R R S R R R R R R R R S R R R R],
[S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S],
[R * R R S R S R R R G S R R S R R K R R R R R R R R K M R R R],
[S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S],
[I M M I M I I I I I M I I I M I I I I I I I I I I I I I I I I],
[I I I I I I I I I I I I I I I I I I I I I I I I I I I I I I I],
[M M M M M M M M M M M M M M M M M M M M M M M M M M M M M M M],
[I I I I I I I I I I I I I I I I I I I I I I I I I I I I I I I],
[Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q],
[H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H],
[Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q],
[H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H],
[P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P],
[P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P],
[P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P],
[P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P],
[R R R R R R R R R R R R R R R R R R R R R R R R R R R R R R W],
[R R R R R R R R R R R R R R R R R R R R R R R R R R R R R R R],
[R R R R R R R R R R R R R R R R R R R R R R R R R R R R Q W W],
[R R R R R R R R R R R R R R R R R R R R R R R R R R R R R R R],
[L L T L L L L L L L L L L L L L L L L L L L L L L L L L L L L],
[L L T L L L L L L L L L L L L L L L L L L L L L L L L L L L L],
[L L T L L L L L L S L L L L L L L L L A L L L L L L L L L L L],
[L L T L L L L L L L L L L L L L L L L L L L L L L L L L L L L],
[E E E E E E E E E E E E E E E E E E E E E E E E E E E E E E E],
[D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D],
[E E E E E E E E E E E E E E E E E E E E E E E E E E E E E E E],
[D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D],
[A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A],
[A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A],
[A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A],
[A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A],
[G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G],
[G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G],
[G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G],
[G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G G],
[V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V],
[V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V],
[V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V],
[V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V V],
[* * * * * Q * * * * * Y * * * * * * * * Q Q Y E E * Y * * * *],
[Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y],
[* * * * * Q * * * * * * Q L * L * * * * Q Q Y E E W * * * * *],
[Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y],
[S S S S S S S S S S S S S S S * S S S S S S S S S S S S S S S],
[S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S],
[S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S],
[S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S S],
[* W W W W * W C * * W W * * W * * W G * W W * * W * W * * * G],
[C C C C C C C C C C C C C C C C C C C C C C C C C C C C C C C],
[W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W],
[C C C C C C C C C C C C C C C C C C C C C C C C C C C C C C C],
[L L L L L L L L L L L L L L L L * L L L L L L L L L L L L L L],
[F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F],
[L L L L L L L L L L L L L L L L L L L L L L L L L L L L L L L],
[F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F],
]);
const fn transpose<const N: usize, const M: usize>(table: [[Amino; N]; M]) -> [[Amino; M]; N] {
let mut output = [[Amino::A; M]; N];
let mut i = 0;
while i < N {
let mut j = 0;
while j < M {
output[i][j] = table[j][i];
j += 1;
}
i += 1;
}
output
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(miri, ignore = "slow in miri; shouldn't touch unsafe code anyway")]
#[test]
fn compare_fast_lookup_against_reference_implementation() {
for raw in &NCBI_DATA {
let fast = &FullLookup::from_table(raw);
assert_genetic_codes_eq(&fast, &raw);
}
}
#[cfg_attr(miri, ignore = "slow in miri; shouldn't touch unsafe code anyway")]
#[test]
fn fast_lookup_can_be_built_from_other_genetic_code() {
let new = &FullLookup::from_genetic_code(&NCBI1);
assert_genetic_codes_eq(&new, &NCBI1);
}
#[cfg_attr(miri, ignore = "slow in miri; shouldn't touch unsafe code anyway")]
#[test]
fn fast_lookup_conversion_roundtrips() {
let table = &NCBI_DATA[0];
assert_eq!(&FullLookup::from_table(table).to_table(), table);
}
#[test]
fn fast_lookup_reverse_complement() {
let dna = Nuc::arr(b"ATCTTCGGGGGGAATTAAAAACTAATAAAGTTCAACAATGGTTGGCATCTCTTCCCGGGG");
let peptide = dna.rc_translated_to_vec_by(NCBI1);
assert_eq!(peptide, Amino::arr(b"PREEMPTIVELY*FLIPPED"));
}
#[test]
fn exhaustively_test_reverse_complement_concrete_lookups() {
for n1 in Nuc::ALL {
for n2 in Nuc::ALL {
for n3 in Nuc::ALL {
let codon = [n1, n2, n3];
let mut codon_rc = codon;
codon_rc.revcomp();
assert_eq!(
NCBI1.translate_rc(codon),
NCBI1.translate(codon_rc),
"Mismatch for {codon:?}"
);
}
}
}
}
#[test]
fn exhaustively_test_reverse_complement_ambiguous_lookups() {
for n1 in AmbiNuc::ALL {
for n2 in AmbiNuc::ALL {
for n3 in AmbiNuc::ALL {
let codon = [n1, n2, n3];
let mut codon_rc = codon;
codon_rc.revcomp();
assert_eq!(
NCBI1.translate_rc(codon),
NCBI1.translate(codon_rc),
"Mismatch for {codon:?}"
);
}
}
}
}
fn assert_genetic_codes_eq(g1: &impl GeneticCode, g2: &impl GeneticCode) {
for n1 in Nuc::ALL {
for n2 in Nuc::ALL {
for n3 in Nuc::ALL {
let codon = [n1, n2, n3];
assert_eq!(g1.translate(codon), g2.translate(codon));
}
}
}
for n1 in AmbiNuc::ALL {
for n2 in AmbiNuc::ALL {
for n3 in AmbiNuc::ALL {
let codon = [n1, n2, n3];
assert_eq!(g1.translate(codon), g2.translate(codon));
}
}
}
}
}