zoe/data/validation/
check.rs

1use crate::simd::SimdByteFunctions;
2use std::simd::{LaneCount, SupportedLaneCount, prelude::*};
3
4/// Provides SIMD-accelerated sequence validation methods.
5pub trait CheckSequence {
6    /// Checks if all bytes in the sequence are ASCII using SIMD operations
7    fn is_ascii_simd<const N: usize>(&self) -> bool
8    where
9        LaneCount<N>: SupportedLaneCount;
10
11    /// Checks if all bytes in the sequence are printable ASCII using SIMD operations
12    fn is_graphic_simd<const N: usize>(&self) -> bool
13    where
14        LaneCount<N>: SupportedLaneCount;
15}
16
17impl<T> CheckSequence for T
18where
19    T: AsRef<[u8]>,
20{
21    #[inline]
22    fn is_ascii_simd<const N: usize>(&self) -> bool
23    where
24        LaneCount<N>: SupportedLaneCount, {
25        let (pre, mid, suffix) = self.as_ref().as_simd();
26        pre.is_ascii() && suffix.is_ascii() && mid.iter().fold(Mask::splat(true), |acc, b| acc & b.is_ascii()).all()
27    }
28
29    #[inline]
30    fn is_graphic_simd<const N: usize>(&self) -> bool
31    where
32        LaneCount<N>: SupportedLaneCount, {
33        let (pre, mid, suffix) = self.as_ref().as_simd();
34        pre.iter().fold(true, |acc, b| acc & b.is_ascii_graphic())
35            && suffix.iter().fold(true, |acc, b| acc & b.is_ascii_graphic())
36            && mid.iter().fold(Mask::splat(true), |acc, b| acc & b.is_ascii_graphic()).all()
37    }
38}
39
40#[cfg(test)]
41mod test {
42    use super::CheckSequence;
43    use crate::data::alphas::AA_DAIS_WITH_GAPS_X;
44
45    #[test]
46    fn is_ascii() {
47        let s = crate::generate::rand_sequence(AA_DAIS_WITH_GAPS_X, 151, 42);
48        assert_eq!(s.is_ascii(), s.is_ascii_simd::<16>());
49    }
50}
51
52#[cfg(test)]
53mod bench {
54    use super::CheckSequence;
55    use crate::data::alphas::AA_DAIS_WITH_GAPS_X;
56    use std::sync::LazyLock;
57    use test::Bencher;
58    extern crate test;
59
60    const N: usize = 151;
61    const SEED: u64 = 99;
62
63    static SEQ: LazyLock<Vec<u8>> = LazyLock::new(|| crate::generate::rand_sequence(AA_DAIS_WITH_GAPS_X, N, SEED));
64
65    #[bench]
66    fn is_ascii_std(b: &mut Bencher) {
67        b.iter(|| SEQ.is_ascii());
68    }
69
70    #[bench]
71    fn is_ascii_zoe(b: &mut Bencher) {
72        let (p, m, s) = SEQ.as_simd::<16>();
73        eprintln!("{p} {m} {s}", p = p.len(), m = m.len(), s = s.len());
74        b.iter(|| SEQ.is_ascii_simd::<16>());
75    }
76}