use std::array;
use std::borrow::Borrow;
use std::sync::LazyLock;
static COMPLEMENT: LazyLock<[u8; 256]> = LazyLock::new(|| {
let mut comp = array::from_fn(|v| u8::try_from(v).expect("array indices 0..=255 fit in u8"));
b"AGCTYRWSKMDVHBN"
.iter()
.zip(b"TCGARYWSMKHBDVN".iter())
.for_each(|(&a, &b)| {
*comp
.get_mut(a as usize)
.expect("ASCII nucleotide bytes are valid lookup indices") = b;
*comp
.get_mut(a as usize + 32)
.expect("ASCII lowercase nucleotide bytes are valid lookup indices") = b + 32;
});
comp
});
#[inline]
#[must_use]
pub fn complement(a: u8) -> u8 {
COMPLEMENT
.get(a as usize)
.copied()
.expect("u8 values always index into the 256-byte complement table")
}
#[must_use]
pub fn revcomp<C, T>(text: T) -> Vec<u8>
where
C: Borrow<u8>,
T: IntoIterator<Item = C>,
T::IntoIter: DoubleEndedIterator,
{
const MAX_PRE_RESERVE: usize = 3usize * 1024 * 1024 * 1024;
let iter = text.into_iter().rev().map(|a| complement(*a.borrow()));
let (lo, hi) = iter.size_hint();
let upper = hi.unwrap_or(lo);
if upper <= MAX_PRE_RESERVE {
iter.collect()
} else {
Vec::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn complement_preserves_iupac_behavior() {
assert_eq!(complement(b'A'), b'T');
assert_eq!(complement(b'c'), b'g');
assert_eq!(complement(b'N'), b'N');
assert_eq!(complement(b'Y'), b'R');
assert_eq!(complement(b's'), b's');
assert_eq!(complement(b'='), b'=');
assert_eq!(complement(b'X'), b'X');
}
#[test]
fn complement_table_has_all_u8_entries() {
let table = &*COMPLEMENT;
assert_eq!(table.len(), usize::from(u8::MAX) + 1);
for byte in u8::MIN..=u8::MAX {
assert!(table.get(usize::from(byte)).is_some());
}
}
#[test]
fn revcomp_preserves_iupac_behavior() {
assert_eq!(revcomp(b"ACGTN"), b"NACGT");
assert_eq!(revcomp(b"GaTtaCA"), b"TGtaAtC");
assert_eq!(revcomp(b"AGCTYRWSKMDVHBN"), b"NVDBHKMSWYRAGCT");
}
#[test]
fn revcomp_does_not_abort_on_adversarial_size_hint() {
let it = std::iter::repeat_n(0u8, usize::MAX);
let out = revcomp(it);
assert!(
out.is_empty(),
"adversarial size_hint must produce an empty Vec, got {}",
out.len(),
);
}
}