oak_core/source/
simd.rs

1use std::simd::prelude::*;
2
3/// SIMD-accelerated scanning utilities.
4pub struct SimdScanner;
5
6impl SimdScanner {
7    /// Finds the first occurrence of `needle` in `text`.
8    #[inline(always)]
9    pub fn find_byte(text: &[u8], needle: u8) -> Option<usize> {
10        let mut i = 0;
11        let len = text.len();
12        const LANES: usize = 32;
13
14        // Process 32 bytes at a time
15        while i + LANES <= len {
16            // SAFETY: We checked bounds.
17            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
18            let mask = chunk.simd_eq(Simd::splat(needle));
19            if let Some(idx) = mask.first_set() {
20                return Some(i + idx);
21            }
22            i += LANES;
23        }
24
25        // Process remaining bytes
26        while i < len {
27            if unsafe { *text.get_unchecked(i) } == needle {
28                return Some(i);
29            }
30            i += 1;
31        }
32        None
33    }
34
35    /// Skips bytes while they match `byte`. Returns number of skipped bytes.
36    #[inline(always)]
37    pub fn skip_byte(text: &[u8], byte: u8) -> usize {
38        let mut i = 0;
39        let len = text.len();
40        const LANES: usize = 32;
41
42        while i + LANES <= len {
43            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
44            let mask = chunk.simd_eq(Simd::splat(byte));
45            // If not all match, we found a stopper
46            if !mask.all() {
47                // !mask has 1s where characters DO NOT match
48                let not_mask = !mask;
49                if let Some(idx) = not_mask.first_set() {
50                    return i + idx;
51                }
52            }
53            i += LANES;
54        }
55
56        while i < len {
57            if unsafe { *text.get_unchecked(i) } != byte {
58                break;
59            }
60            i += 1;
61        }
62        i
63    }
64
65    /// Skips bytes while they match either of two bytes.
66    #[inline(always)]
67    pub fn skip_two_bytes(text: &[u8], b1: u8, b2: u8) -> usize {
68        let mut i = 0;
69        let len = text.len();
70        const LANES: usize = 32;
71
72        while i + LANES <= len {
73            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
74            let m1 = chunk.simd_eq(Simd::splat(b1));
75            let m2 = chunk.simd_eq(Simd::splat(b2));
76            let mask = m1 | m2;
77            if !mask.all() {
78                let not_mask = !mask;
79                if let Some(idx) = not_mask.first_set() {
80                    return i + idx;
81                }
82            }
83            i += LANES;
84        }
85
86        while i < len {
87            let b = unsafe { *text.get_unchecked(i) };
88            if b != b1 && b != b2 {
89                break;
90            }
91            i += 1;
92        }
93        i
94    }
95
96    /// Skips common ASCII whitespace (' ', '\t', '\n', '\r').
97    #[inline(always)]
98    pub fn skip_ascii_whitespace(text: &[u8]) -> usize {
99        let mut i = 0;
100        let len = text.len();
101        const LANES: usize = 32;
102        while i + LANES <= len {
103            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
104            let m1 = chunk.simd_eq(Simd::splat(b' '));
105            let m2 = chunk.simd_eq(Simd::splat(b'\t'));
106            let m3 = chunk.simd_eq(Simd::splat(b'\n'));
107            let m4 = chunk.simd_eq(Simd::splat(b'\r'));
108            let mask = m1 | m2 | m3 | m4;
109            if !mask.all() {
110                let not_mask = !mask;
111                if let Some(idx) = not_mask.first_set() {
112                    return i + idx;
113                }
114            }
115            i += LANES;
116        }
117        while i < len {
118            let b = unsafe { *text.get_unchecked(i) };
119            if b != b' ' && b != b'\t' && b != b'\n' && b != b'\r' {
120                break;
121            }
122            i += 1;
123        }
124        i
125    }
126
127    /// Skips ASCII digits ('0'-'9').
128    #[inline(always)]
129    pub fn skip_ascii_digits(text: &[u8]) -> usize {
130        let mut i = 0;
131        let len = text.len();
132        const LANES: usize = 32;
133        while i + LANES <= len {
134            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
135            let ge0 = chunk.simd_ge(Simd::splat(b'0'));
136            let le9 = chunk.simd_le(Simd::splat(b'9'));
137            let mask = ge0 & le9;
138            if !mask.all() {
139                let not_mask = !mask;
140                if let Some(idx) = not_mask.first_set() {
141                    return i + idx;
142                }
143            }
144            i += LANES;
145        }
146        while i < len {
147            let b = unsafe { *text.get_unchecked(i) };
148            if !b.is_ascii_digit() {
149                break;
150            }
151            i += 1;
152        }
153        i
154    }
155
156    /// Skips ASCII identifier characters (a-z, A-Z, 0-9, _).
157    #[inline(always)]
158    pub fn skip_ascii_ident_continue(text: &[u8]) -> usize {
159        let mut i = 0;
160        let len = text.len();
161        const LANES: usize = 32;
162        while i + LANES <= len {
163            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
164            let low = chunk.simd_ge(Simd::splat(b'a')) & chunk.simd_le(Simd::splat(b'z'));
165            let up = chunk.simd_ge(Simd::splat(b'A')) & chunk.simd_le(Simd::splat(b'Z'));
166            let dig = chunk.simd_ge(Simd::splat(b'0')) & chunk.simd_le(Simd::splat(b'9'));
167            let und = chunk.simd_eq(Simd::splat(b'_'));
168            let mask = low | up | dig | und;
169            if !mask.all() {
170                let not_mask = !mask;
171                if let Some(idx) = not_mask.first_set() {
172                    return i + idx;
173                }
174            }
175            i += LANES;
176        }
177        while i < len {
178            let b = unsafe { *text.get_unchecked(i) };
179            if !b.is_ascii_alphanumeric() && b != b'_' {
180                break;
181            }
182            i += 1;
183        }
184        i
185    }
186
187    /// Skips ASCII hex digits (0-9, a-f, A-F).
188    #[inline(always)]
189    pub fn skip_ascii_hexdigits(text: &[u8]) -> usize {
190        let mut i = 0;
191        let len = text.len();
192        const LANES: usize = 32;
193        while i + LANES <= len {
194            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
195            let dig = chunk.simd_ge(Simd::splat(b'0')) & chunk.simd_le(Simd::splat(b'9'));
196            let low = chunk.simd_ge(Simd::splat(b'a')) & chunk.simd_le(Simd::splat(b'f'));
197            let up = chunk.simd_ge(Simd::splat(b'A')) & chunk.simd_le(Simd::splat(b'F'));
198            let mask = dig | low | up;
199            if !mask.all() {
200                let not_mask = !mask;
201                if let Some(idx) = not_mask.first_set() {
202                    return i + idx;
203                }
204            }
205            i += LANES;
206        }
207        while i < len {
208            let b = unsafe { *text.get_unchecked(i) };
209            if !b.is_ascii_hexdigit() {
210                break;
211            }
212            i += 1;
213        }
214        i
215    }
216
217    /// Finds the first occurrence of any of the 4 bytes.
218    #[inline(always)]
219    pub fn find_first_of_4(text: &[u8], a: u8, b: u8, c: u8, d: u8) -> Option<usize> {
220        let mut i = 0;
221        let len = text.len();
222        const LANES: usize = 32;
223
224        while i + LANES <= len {
225            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
226            let ma = chunk.simd_eq(Simd::splat(a));
227            let mb = chunk.simd_eq(Simd::splat(b));
228            let mc = chunk.simd_eq(Simd::splat(c));
229            let md = chunk.simd_eq(Simd::splat(d));
230            let mask = ma | mb | mc | md;
231            if mask.any() {
232                if let Some(idx) = mask.first_set() {
233                    return Some(i + idx);
234                }
235            }
236            i += LANES;
237        }
238
239        while i < len {
240            let byte = unsafe { *text.get_unchecked(i) };
241            if byte == a || byte == b || byte == c || byte == d {
242                return Some(i);
243            }
244            i += 1;
245        }
246        None
247    }
248
249    /// Skips until the specified byte is found.
250    #[inline(always)]
251    pub fn skip_until(text: &[u8], target: u8) -> usize {
252        let mut i = 0;
253        let len = text.len();
254        const LANES: usize = 32;
255
256        while i + LANES <= len {
257            let chunk = Simd::<u8, LANES>::from_slice(unsafe { text.get_unchecked(i..i + LANES) });
258            let mask = chunk.simd_eq(Simd::splat(target));
259            if mask.any() {
260                if let Some(idx) = mask.first_set() {
261                    return i + idx;
262                }
263            }
264            i += LANES;
265        }
266
267        while i < len {
268            if unsafe { *text.get_unchecked(i) } == target {
269                break;
270            }
271            i += 1;
272        }
273        i
274    }
275}