nanalogue_core/utils/
complement.rs1use std::array;
7use std::borrow::Borrow;
8use std::sync::LazyLock;
9
10static COMPLEMENT: LazyLock<[u8; 256]> = LazyLock::new(|| {
12 let mut comp = array::from_fn(|v| u8::try_from(v).expect("array indices 0..=255 fit in u8"));
13 b"AGCTYRWSKMDVHBN"
14 .iter()
15 .zip(b"TCGARYWSMKHBDVN".iter())
16 .for_each(|(&a, &b)| {
17 *comp
18 .get_mut(a as usize)
19 .expect("ASCII nucleotide bytes are valid lookup indices") = b;
20 *comp
21 .get_mut(a as usize + 32)
22 .expect("ASCII lowercase nucleotide bytes are valid lookup indices") = b + 32;
23 });
24 comp
25});
26
27#[inline]
37#[must_use]
38pub fn complement(a: u8) -> u8 {
39 COMPLEMENT
40 .get(a as usize)
41 .copied()
42 .expect("u8 values always index into the 256-byte complement table")
43}
44
45#[must_use]
51pub fn revcomp<C, T>(text: T) -> Vec<u8>
52where
53 C: Borrow<u8>,
54 T: IntoIterator<Item = C>,
55 T::IntoIter: DoubleEndedIterator,
56{
57 const MAX_PRE_RESERVE: usize = 3usize * 1024 * 1024 * 1024;
60
61 let iter = text.into_iter().rev().map(|a| complement(*a.borrow()));
62 let (lo, hi) = iter.size_hint();
63 let upper = hi.unwrap_or(lo);
64
65 if upper <= MAX_PRE_RESERVE {
66 iter.collect()
67 } else {
68 Vec::new()
69 }
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75
76 #[test]
77 fn complement_preserves_iupac_behavior() {
78 assert_eq!(complement(b'A'), b'T');
79 assert_eq!(complement(b'c'), b'g');
80 assert_eq!(complement(b'N'), b'N');
81 assert_eq!(complement(b'Y'), b'R');
82 assert_eq!(complement(b's'), b's');
83 assert_eq!(complement(b'='), b'=');
84 assert_eq!(complement(b'X'), b'X');
85 }
86
87 #[test]
88 fn complement_table_has_all_u8_entries() {
89 let table = &*COMPLEMENT;
90 assert_eq!(table.len(), usize::from(u8::MAX) + 1);
91
92 for byte in u8::MIN..=u8::MAX {
93 assert!(table.get(usize::from(byte)).is_some());
94 }
95 }
96
97 #[test]
98 fn revcomp_preserves_iupac_behavior() {
99 assert_eq!(revcomp(b"ACGTN"), b"NACGT");
100 assert_eq!(revcomp(b"GaTtaCA"), b"TGtaAtC");
101 assert_eq!(revcomp(b"AGCTYRWSKMDVHBN"), b"NVDBHKMSWYRAGCT");
102 }
103
104 #[test]
107 fn revcomp_does_not_abort_on_adversarial_size_hint() {
108 let it = std::iter::repeat_n(0u8, usize::MAX);
109 let out = revcomp(it);
110 assert!(
111 out.is_empty(),
112 "adversarial size_hint must produce an empty Vec, got {}",
113 out.len(),
114 );
115 }
116}