Skip to main content

handlegraph/
util.rs

1pub mod dna {
2
3    const fn comp_base_impl(base: u8) -> u8 {
4        match base {
5            b'A' => b'T',
6            b'G' => b'C',
7            b'C' => b'G',
8            b'T' => b'A',
9            b'Y' => b'R',
10            b'R' => b'Y',
11            b'W' => b'W',
12            b'S' => b'S',
13            b'K' => b'M',
14            b'M' => b'K',
15            b'D' => b'H',
16            b'V' => b'B',
17            b'H' => b'D',
18            b'B' => b'V',
19            _ => b'N',
20        }
21    }
22
23    // loops can be used in const fns since Rust 1.46, meaning we can
24    // build a lookup table at compile time
25    const fn comp_base_table() -> [u8; 256] {
26        let mut i = 0;
27        let mut table: [u8; 256] = [0; 256];
28        while i <= 255 {
29            let offset = 32 * ((i as u8).is_ascii_lowercase() as u8);
30            let comp = comp_base_impl((i as u8) - offset);
31
32            if comp == b'N' {
33                table[i] = i as u8;
34            } else {
35                table[i] = comp + offset;
36            }
37
38            i += 1;
39        }
40        table
41    }
42
43    const DNA_COMP_TABLE: [u8; 256] = comp_base_table();
44
45    /// Retrieves the DNA complement for the provided base using a
46    /// lookup-table built at compile time using the `const fn`
47    /// `comp_base_table()`.
48    #[inline]
49    pub const fn comp_base(base: u8) -> u8 {
50        DNA_COMP_TABLE[base as usize]
51    }
52
53    /// Calculates the reverse complement for a sequence provided as a
54    /// double-ended iterator. Collects into a `Vec<u8>` for
55    /// convenience.
56    #[inline]
57    pub fn rev_comp<I, B>(seq: I) -> Vec<u8>
58    where
59        B: std::borrow::Borrow<u8>,
60        I: IntoIterator<Item = B>,
61        I::IntoIter: DoubleEndedIterator,
62    {
63        seq.into_iter()
64            .rev()
65            .map(|b| comp_base(*b.borrow()))
66            .collect()
67    }
68
69    /// Given a sequence provided as a double-ended iterator over
70    /// nucleotides, returns an iterator over the reverse complement
71    /// of the sequence.
72    #[inline]
73    pub fn rev_comp_iter<I, B>(seq: I) -> impl Iterator<Item = u8>
74    where
75        B: std::borrow::Borrow<u8>,
76        I: IntoIterator<Item = B>,
77        I::IntoIter: DoubleEndedIterator,
78    {
79        seq.into_iter().rev().map(|b| comp_base(*b.borrow()))
80    }
81
82    #[cfg(test)]
83    mod tests {
84        use super::*;
85
86        use quickcheck::{Arbitrary, Gen, QuickCheck};
87
88        #[derive(Debug, Clone, Copy, PartialEq, Eq)]
89        struct Base(u8);
90
91        impl Base {
92            fn from_num(n: u8) -> Base {
93                match n {
94                    0 => Base(b'T'),
95                    1 => Base(b'C'),
96                    2 => Base(b'G'),
97                    3 => Base(b'A'),
98                    4 => Base(b't'),
99                    5 => Base(b'c'),
100                    6 => Base(b'g'),
101                    7 => Base(b'a'),
102                    _ => Base(b'N'),
103                }
104            }
105        }
106
107        impl From<u8> for Base {
108            fn from(base: u8) -> Base {
109                Base(base)
110            }
111        }
112
113        impl Into<u8> for Base {
114            fn into(self) -> u8 {
115                self.0
116            }
117        }
118
119        impl Arbitrary for Base {
120            fn arbitrary<G: Gen>(g: &mut G) -> Base {
121                let n = u8::arbitrary(g) % 8;
122                Base::from_num(n)
123            }
124        }
125
126        fn is_comp_isomorphic(b: Base) -> bool {
127            let base = b.0;
128            comp_base(comp_base(base)) == base
129        }
130
131        #[test]
132        fn comp_isomorphic_check() {
133            for x in 0..10 {
134                let i = x as u8;
135                let base = Base::from_num(i);
136                let comp = comp_base(base.0);
137                let back = comp_base(comp);
138                println!(
139                    "{:2} -> {} -> {} -> {}",
140                    i,
141                    char::from(base.0),
142                    char::from(comp),
143                    char::from(back),
144                );
145            }
146        }
147
148        #[test]
149        fn comp_isomorphic() {
150            QuickCheck::new()
151                .tests(10000)
152                .quickcheck(is_comp_isomorphic as fn(Base) -> bool);
153        }
154
155        fn is_rev_comp_isomorphic(seq: Vec<Base>) -> bool {
156            let seq = seq.into_iter().map(|b| b.0).collect::<Vec<_>>();
157            rev_comp(rev_comp(seq.clone())) == seq
158        }
159
160        #[test]
161        fn rev_comp_isomorphic() {
162            QuickCheck::new()
163                .tests(10000)
164                .quickcheck(is_rev_comp_isomorphic as fn(Vec<Base>) -> bool);
165        }
166    }
167}