Skip to main content

fionn_stream/skiptape/
simd_ops.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! SIMD operations for high-performance JSON processing
3//!
4//! This module provides SIMD-accelerated operations for character classification,
5//! string processing, and pattern matching used in JSON parsing and schema filtering.
6
7/// SIMD-accelerated character classification for JSON tokens
8pub struct SimdCharClassifier;
9
10/// High-performance SIMD JSON structural detector
11/// Processes entire buffers using SIMD operations instead of per-character loops
12pub struct SimdJsonStructuralDetector {
13    classifier: SimdCharClassifier,
14}
15
16impl SimdJsonStructuralDetector {
17    /// Create a new SIMD JSON structural detector
18    #[must_use]
19    pub const fn new() -> Self {
20        Self {
21            classifier: SimdCharClassifier::new(),
22        }
23    }
24
25    /// Process an entire JSON buffer using SIMD operations
26    /// Returns positions of all structural characters in one pass
27    #[must_use]
28    #[inline]
29    pub fn find_structural_characters(&self, json_bytes: &[u8]) -> Vec<usize> {
30        let mut positions = Vec::new();
31
32        // Process in 64-byte chunks for optimal SIMD performance
33        for chunk_start in (0..json_bytes.len()).step_by(64) {
34            let chunk_end = (chunk_start + 64).min(json_bytes.len());
35            // Create a padded chunk if we're at the end
36            let mut padding = [0u8; 64];
37            let len = chunk_end - chunk_start;
38            padding[..len].copy_from_slice(&json_bytes[chunk_start..chunk_end]);
39
40            if let Some(structural_pos) = self.process_chunk_simd(&padding, len, chunk_start) {
41                positions.extend(structural_pos);
42            }
43        }
44
45        positions
46    }
47
48    /// Process a 64-byte chunk using SIMD operations
49    #[inline]
50    fn process_chunk_simd(
51        &self,
52        chunk: &[u8; 64],
53        valid_len: usize,
54        offset: usize,
55    ) -> Option<Vec<usize>> {
56        // Use the classifier to get bitmasks for all character types in parallel
57        let classes = self.classifier.classify_chunk(chunk);
58
59        // Combine all structural character positions
60        // We're interested in anything that is whitespace, structural, string, or number
61        let all_structural =
62            classes.whitespace | classes.structural | classes.string_chars | classes.numbers;
63
64        if all_structural == 0 {
65            return None;
66        }
67
68        // Convert bitmask to positions
69        let mut positions = Vec::with_capacity(all_structural.count_ones() as usize);
70
71        // Iterate over set bits
72        let mut mask = all_structural;
73        while mask != 0 {
74            let idx = mask.trailing_zeros() as usize;
75            if idx < valid_len {
76                positions.push(offset + idx);
77            }
78            mask &= !(1u64 << idx);
79        }
80
81        Some(positions)
82    }
83}
84
85impl SimdCharClassifier {
86    /// Create a new SIMD character classifier
87    #[must_use]
88    pub const fn new() -> Self {
89        Self
90    }
91}
92
93/// Result of SIMD character classification
94#[derive(Debug, Clone)]
95pub struct CharacterClasses {
96    /// Bitmask of whitespace character positions
97    pub whitespace: u64,
98    /// Bitmask of structural character positions
99    pub structural: u64,
100    /// Bitmask of string-related character positions
101    pub string_chars: u64,
102    /// Bitmask of numeric character positions
103    pub numbers: u64,
104}
105
106/// SIMD-accelerated string operations
107pub struct SimdStringOps;
108
109impl SimdStringOps {
110    /// SIMD-accelerated string equality check
111    #[inline]
112    #[must_use]
113    pub fn equals(a: &[u8], b: &[u8]) -> bool {
114        // Rust's slice equality uses memcmp which is highly optimized (AVX/SSE)
115        a == b
116    }
117
118    /// SIMD-accelerated substring search
119    #[inline]
120    #[must_use]
121    pub fn find_substring(haystack: &[u8], needle: &[u8]) -> Option<usize> {
122        if needle.is_empty() {
123            return Some(0);
124        }
125        // Use memchr::memmem for SIMD-accelerated substring search (AVX2/SSE4.2)
126        memchr::memmem::find(haystack, needle)
127    }
128
129    /// SIMD-accelerated hash computation for field names
130    #[inline]
131    #[must_use]
132    pub fn hash_field_name(field: &[u8]) -> u64 {
133        // Use AHash for high-performance hashing (often uses AES-NI or similar)
134        use std::hash::{Hash, Hasher};
135        let mut hasher = ahash::AHasher::default();
136        field.hash(&mut hasher);
137        hasher.finish()
138    }
139}
140
141/// SIMD-accelerated line separator detection for JSONL
142pub struct SimdLineSeparator {
143    // Markers are implicit in memchr
144}
145
146impl SimdLineSeparator {
147    /// Create a new SIMD line separator detector
148    #[must_use]
149    pub const fn new() -> Self {
150        Self {}
151    }
152
153    /// Detect line boundaries in a data chunk using SIMD
154    #[must_use]
155    pub fn find_line_boundaries(&self, data: &[u8]) -> Vec<usize> {
156        // Use memchr iterator which exploits SIMD for finding byte occurrences
157        let mut boundaries: Vec<usize> = memchr::memchr_iter(b'\n', data)
158            .map(|pos| pos + 1) // Position after the \n
159            .collect();
160
161        // If data doesn't end with \n, add the end position
162        if !data.is_empty() && data[data.len() - 1] != b'\n' {
163            boundaries.push(data.len());
164        }
165
166        boundaries
167    }
168}
169
170/// SIMD-accelerated structural filtering for JSONL documents
171pub struct SimdStructuralFilter {
172    // No pre-computed masks needed for memchr implementation
173}
174
175impl SimdStructuralFilter {
176    /// Create a new SIMD structural filter
177    #[must_use]
178    pub const fn new() -> Self {
179        Self {}
180    }
181
182    /// Check if a JSON line contains required schema fields using SIMD
183    #[must_use]
184    pub fn matches_schema(&self, line: &[u8], required_fields: &[String]) -> bool {
185        if line.is_empty() {
186            return false;
187        }
188
189        // Fast pre-filter using memchr::memmem to check for required fields in the raw bytes
190        // This avoids utf-8 validation overhead if we just want to check presence
191        for field in required_fields {
192            // Need to search for "field" to be accurate, but strict JSON parsing is expensive here.
193            // Approximating with finding "field" substring is usually good enough for pre-filter.
194            // We construct the search needle: "field"
195            let needle = format!("\"{field}\"");
196            if memchr::memmem::find(line, needle.as_bytes()).is_none() {
197                return false;
198            }
199        }
200        true
201    }
202}
203
204#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
205use std::arch::x86_64::{
206    __m256i, _mm256_and_si256, _mm256_andnot_si256, _mm256_cmpeq_epi8, _mm256_cmpgt_epi8,
207    _mm256_movemask_epi8, _mm256_or_si256, _mm256_set1_epi8,
208};
209
210impl SimdCharClassifier {
211    /// Classify characters in a 64-byte chunk using SIMD (AVX2 optimized)
212    ///
213    /// # Safety
214    /// Uses SIMD intrinsics that are safe on `x86`/`x86_64` with AVX2 support.
215    /// The `_mm256_loadu_si256` intrinsic handles unaligned loads safely.
216    ///
217    /// # Panics
218    /// This function does not panic. The `try_into().unwrap()` calls are
219    /// guaranteed to succeed because we split a 64-byte array at index 32,
220    /// producing exactly two 32-byte slices.
221    #[inline]
222    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
223    #[must_use]
224    #[allow(clippy::too_many_lines)] // SIMD code benefits from inline processing
225    pub fn classify_chunk(&self, chunk: &[u8; 64]) -> CharacterClasses {
226        // Helper to convert u8 to i8 for SIMD constants (wrapping is intentional for byte values)
227        const fn to_i8(b: u8) -> i8 {
228            // Use from_ne_bytes for const-safe conversion that avoids the wrap warning
229            i8::from_ne_bytes([b])
230        }
231
232        // Helper to convert i32 movemask result to u32 (reinterpret bits)
233        const fn mask_to_u32(mask: i32) -> u32 {
234            // Use from_ne_bytes to reinterpret bits without sign-related warnings
235            u32::from_ne_bytes(mask.to_ne_bytes())
236        }
237
238        unsafe {
239            // Load 64 bytes into two 256-bit AVX2 registers
240            // We use std::mem::transmute to load the bytes, which is safe because
241            // _mm256_loadu_si256 is designed for unaligned loads.
242            // We avoid direct pointer cast to satisfy clippy's alignment checks.
243            let (first_half, second_half) = chunk.split_at(32);
244            let first_array: &[u8; 32] = first_half.try_into().unwrap();
245            let second_array: &[u8; 32] = second_half.try_into().unwrap();
246
247            // Use transmute to reinterpret the bytes as __m256i
248            // This is safe because we're just reinterpreting bytes
249            let v0: __m256i = std::mem::transmute::<[u8; 32], __m256i>(*first_array);
250            let v1: __m256i = std::mem::transmute::<[u8; 32], __m256i>(*second_array);
251
252            // 1. Whitespace: \t (09), \n (0A), \r (0D), space (20)
253            let space = _mm256_set1_epi8(to_i8(b' '));
254            let tab = _mm256_set1_epi8(to_i8(b'\t'));
255            let lf = _mm256_set1_epi8(to_i8(b'\n'));
256            let cr = _mm256_set1_epi8(to_i8(b'\r'));
257
258            let ws0 = _mm256_or_si256(
259                _mm256_or_si256(_mm256_cmpeq_epi8(v0, space), _mm256_cmpeq_epi8(v0, tab)),
260                _mm256_or_si256(_mm256_cmpeq_epi8(v0, lf), _mm256_cmpeq_epi8(v0, cr)),
261            );
262            let ws1 = _mm256_or_si256(
263                _mm256_or_si256(_mm256_cmpeq_epi8(v1, space), _mm256_cmpeq_epi8(v1, tab)),
264                _mm256_or_si256(_mm256_cmpeq_epi8(v1, lf), _mm256_cmpeq_epi8(v1, cr)),
265            );
266
267            let whitespace_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(ws1))) << 32)
268                | u64::from(mask_to_u32(_mm256_movemask_epi8(ws0)));
269
270            // 2. Structural: { } [ ] : ,
271            let brace_o = _mm256_set1_epi8(to_i8(b'{'));
272            let brace_c = _mm256_set1_epi8(to_i8(b'}'));
273            let bracket_o = _mm256_set1_epi8(to_i8(b'['));
274            let bracket_c = _mm256_set1_epi8(to_i8(b']'));
275            let colon = _mm256_set1_epi8(to_i8(b':'));
276            let comma = _mm256_set1_epi8(to_i8(b','));
277
278            let struct0 = _mm256_or_si256(
279                _mm256_or_si256(
280                    _mm256_cmpeq_epi8(v0, brace_o),
281                    _mm256_cmpeq_epi8(v0, brace_c),
282                ),
283                _mm256_or_si256(
284                    _mm256_or_si256(
285                        _mm256_cmpeq_epi8(v0, bracket_o),
286                        _mm256_cmpeq_epi8(v0, bracket_c),
287                    ),
288                    _mm256_or_si256(_mm256_cmpeq_epi8(v0, colon), _mm256_cmpeq_epi8(v0, comma)),
289                ),
290            );
291            let struct1 = _mm256_or_si256(
292                _mm256_or_si256(
293                    _mm256_cmpeq_epi8(v1, brace_o),
294                    _mm256_cmpeq_epi8(v1, brace_c),
295                ),
296                _mm256_or_si256(
297                    _mm256_or_si256(
298                        _mm256_cmpeq_epi8(v1, bracket_o),
299                        _mm256_cmpeq_epi8(v1, bracket_c),
300                    ),
301                    _mm256_or_si256(_mm256_cmpeq_epi8(v1, colon), _mm256_cmpeq_epi8(v1, comma)),
302                ),
303            );
304            let structural_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(struct1))) << 32)
305                | u64::from(mask_to_u32(_mm256_movemask_epi8(struct0)));
306
307            // 3. String: " and \
308            let quote = _mm256_set1_epi8(to_i8(b'"'));
309            let backslash = _mm256_set1_epi8(to_i8(b'\\'));
310
311            let str0 = _mm256_or_si256(
312                _mm256_cmpeq_epi8(v0, quote),
313                _mm256_cmpeq_epi8(v0, backslash),
314            );
315            let str1 = _mm256_or_si256(
316                _mm256_cmpeq_epi8(v1, quote),
317                _mm256_cmpeq_epi8(v1, backslash),
318            );
319            let string_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(str1))) << 32)
320                | u64::from(mask_to_u32(_mm256_movemask_epi8(str0)));
321
322            // 4. Numbers: 0-9, -, +, .
323            let dot = _mm256_set1_epi8(to_i8(b'.'));
324            let minus = _mm256_set1_epi8(to_i8(b'-'));
325            let plus = _mm256_set1_epi8(to_i8(b'+'));
326
327            // Range check for digits: x >= '0' && x <= '9'
328            // val >= 48 <=> val > 47. val <= 57 <=> !(val > 57)
329            let lower_bound = _mm256_set1_epi8(47);
330            let fifty_seven = _mm256_set1_epi8(57);
331            let all_ones = _mm256_set1_epi8(-1);
332
333            let is_digit0 = _mm256_and_si256(
334                _mm256_cmpgt_epi8(v0, lower_bound),
335                _mm256_andnot_si256(_mm256_cmpgt_epi8(v0, fifty_seven), all_ones),
336            );
337            let is_digit1 = _mm256_and_si256(
338                _mm256_cmpgt_epi8(v1, lower_bound),
339                _mm256_andnot_si256(_mm256_cmpgt_epi8(v1, fifty_seven), all_ones),
340            );
341
342            let num_markers0 = _mm256_or_si256(
343                _mm256_or_si256(_mm256_cmpeq_epi8(v0, dot), _mm256_cmpeq_epi8(v0, minus)),
344                _mm256_cmpeq_epi8(v0, plus),
345            );
346            let num_markers1 = _mm256_or_si256(
347                _mm256_or_si256(_mm256_cmpeq_epi8(v1, dot), _mm256_cmpeq_epi8(v1, minus)),
348                _mm256_cmpeq_epi8(v1, plus),
349            );
350
351            let num0 = _mm256_or_si256(is_digit0, num_markers0);
352            let num1 = _mm256_or_si256(is_digit1, num_markers1);
353
354            let number_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(num1))) << 32)
355                | u64::from(mask_to_u32(_mm256_movemask_epi8(num0)));
356
357            CharacterClasses {
358                whitespace: whitespace_mask,
359                structural: structural_mask,
360                string_chars: string_mask,
361                numbers: number_mask,
362            }
363        }
364    }
365
366    /// Classify characters in a 64-byte chunk using SIMD (ARM NEON optimized)
367    ///
368    /// # Safety
369    /// Uses SIMD intrinsics that are safe on aarch64 with NEON support.
370    #[inline]
371    #[cfg(target_arch = "aarch64")]
372    #[must_use]
373    #[allow(clippy::too_many_lines)] // SIMD code benefits from inlining
374    pub fn classify_chunk(&self, chunk: &[u8; 64]) -> CharacterClasses {
375        use std::arch::aarch64::{
376            vandq_u8, vceqq_u8, vcgtq_s8, vdupq_n_s8, vdupq_n_u8, vld1q_u8, vmvnq_u8, vorrq_u8,
377            vreinterpretq_s8_u8,
378        };
379
380        unsafe {
381            // Load 64 bytes as 4x 128-bit NEON vectors
382            let v0 = vld1q_u8(chunk.as_ptr());
383            let v1 = vld1q_u8(chunk.as_ptr().add(16));
384            let v2 = vld1q_u8(chunk.as_ptr().add(32));
385            let v3 = vld1q_u8(chunk.as_ptr().add(48));
386
387            // 1. Whitespace: \t (09), \n (0A), \r (0D), space (20)
388            let space = vdupq_n_u8(b' ');
389            let tab = vdupq_n_u8(b'\t');
390            let lf = vdupq_n_u8(b'\n');
391            let cr = vdupq_n_u8(b'\r');
392
393            let ws0 = vorrq_u8(
394                vorrq_u8(vceqq_u8(v0, space), vceqq_u8(v0, tab)),
395                vorrq_u8(vceqq_u8(v0, lf), vceqq_u8(v0, cr)),
396            );
397            let ws1 = vorrq_u8(
398                vorrq_u8(vceqq_u8(v1, space), vceqq_u8(v1, tab)),
399                vorrq_u8(vceqq_u8(v1, lf), vceqq_u8(v1, cr)),
400            );
401            let ws2 = vorrq_u8(
402                vorrq_u8(vceqq_u8(v2, space), vceqq_u8(v2, tab)),
403                vorrq_u8(vceqq_u8(v2, lf), vceqq_u8(v2, cr)),
404            );
405            let ws3 = vorrq_u8(
406                vorrq_u8(vceqq_u8(v3, space), vceqq_u8(v3, tab)),
407                vorrq_u8(vceqq_u8(v3, lf), vceqq_u8(v3, cr)),
408            );
409
410            let whitespace_mask = neon_to_bitmask_64(ws0, ws1, ws2, ws3);
411
412            // 2. Structural: { } [ ] : ,
413            let brace_o = vdupq_n_u8(b'{');
414            let brace_c = vdupq_n_u8(b'}');
415            let bracket_o = vdupq_n_u8(b'[');
416            let bracket_c = vdupq_n_u8(b']');
417            let colon = vdupq_n_u8(b':');
418            let comma = vdupq_n_u8(b',');
419
420            let struct0 = vorrq_u8(
421                vorrq_u8(vceqq_u8(v0, brace_o), vceqq_u8(v0, brace_c)),
422                vorrq_u8(
423                    vorrq_u8(vceqq_u8(v0, bracket_o), vceqq_u8(v0, bracket_c)),
424                    vorrq_u8(vceqq_u8(v0, colon), vceqq_u8(v0, comma)),
425                ),
426            );
427            let struct1 = vorrq_u8(
428                vorrq_u8(vceqq_u8(v1, brace_o), vceqq_u8(v1, brace_c)),
429                vorrq_u8(
430                    vorrq_u8(vceqq_u8(v1, bracket_o), vceqq_u8(v1, bracket_c)),
431                    vorrq_u8(vceqq_u8(v1, colon), vceqq_u8(v1, comma)),
432                ),
433            );
434            let struct2 = vorrq_u8(
435                vorrq_u8(vceqq_u8(v2, brace_o), vceqq_u8(v2, brace_c)),
436                vorrq_u8(
437                    vorrq_u8(vceqq_u8(v2, bracket_o), vceqq_u8(v2, bracket_c)),
438                    vorrq_u8(vceqq_u8(v2, colon), vceqq_u8(v2, comma)),
439                ),
440            );
441            let struct3 = vorrq_u8(
442                vorrq_u8(vceqq_u8(v3, brace_o), vceqq_u8(v3, brace_c)),
443                vorrq_u8(
444                    vorrq_u8(vceqq_u8(v3, bracket_o), vceqq_u8(v3, bracket_c)),
445                    vorrq_u8(vceqq_u8(v3, colon), vceqq_u8(v3, comma)),
446                ),
447            );
448
449            let structural_mask = neon_to_bitmask_64(struct0, struct1, struct2, struct3);
450
451            // 3. String: " and \
452            let quote = vdupq_n_u8(b'"');
453            let backslash = vdupq_n_u8(b'\\');
454
455            let str0 = vorrq_u8(vceqq_u8(v0, quote), vceqq_u8(v0, backslash));
456            let str1 = vorrq_u8(vceqq_u8(v1, quote), vceqq_u8(v1, backslash));
457            let str2 = vorrq_u8(vceqq_u8(v2, quote), vceqq_u8(v2, backslash));
458            let str3 = vorrq_u8(vceqq_u8(v3, quote), vceqq_u8(v3, backslash));
459
460            let string_mask = neon_to_bitmask_64(str0, str1, str2, str3);
461
462            // 4. Numbers: 0-9, -, +, .
463            let dot = vdupq_n_u8(b'.');
464            let minus = vdupq_n_u8(b'-');
465            let plus = vdupq_n_u8(b'+');
466
467            // Range check for digits: x >= '0' && x <= '9'
468            // Using signed comparison: val > 47 && val <= 57
469            let lower_bound = vdupq_n_s8(47);
470            let upper_bound = vdupq_n_s8(57);
471
472            let v0_s = vreinterpretq_s8_u8(v0);
473            let v1_s = vreinterpretq_s8_u8(v1);
474            let v2_s = vreinterpretq_s8_u8(v2);
475            let v3_s = vreinterpretq_s8_u8(v3);
476
477            // vcgtq_s8 returns uint8x16_t (0xFF for true, 0x00 for false)
478            let is_digit0 = vandq_u8(
479                vcgtq_s8(v0_s, lower_bound),
480                vmvnq_u8(vcgtq_s8(v0_s, upper_bound)),
481            );
482            let is_digit1 = vandq_u8(
483                vcgtq_s8(v1_s, lower_bound),
484                vmvnq_u8(vcgtq_s8(v1_s, upper_bound)),
485            );
486            let is_digit2 = vandq_u8(
487                vcgtq_s8(v2_s, lower_bound),
488                vmvnq_u8(vcgtq_s8(v2_s, upper_bound)),
489            );
490            let is_digit3 = vandq_u8(
491                vcgtq_s8(v3_s, lower_bound),
492                vmvnq_u8(vcgtq_s8(v3_s, upper_bound)),
493            );
494
495            let num_markers0 = vorrq_u8(
496                vorrq_u8(vceqq_u8(v0, dot), vceqq_u8(v0, minus)),
497                vceqq_u8(v0, plus),
498            );
499            let num_markers1 = vorrq_u8(
500                vorrq_u8(vceqq_u8(v1, dot), vceqq_u8(v1, minus)),
501                vceqq_u8(v1, plus),
502            );
503            let num_markers2 = vorrq_u8(
504                vorrq_u8(vceqq_u8(v2, dot), vceqq_u8(v2, minus)),
505                vceqq_u8(v2, plus),
506            );
507            let num_markers3 = vorrq_u8(
508                vorrq_u8(vceqq_u8(v3, dot), vceqq_u8(v3, minus)),
509                vceqq_u8(v3, plus),
510            );
511
512            let num0 = vorrq_u8(is_digit0, num_markers0);
513            let num1 = vorrq_u8(is_digit1, num_markers1);
514            let num2 = vorrq_u8(is_digit2, num_markers2);
515            let num3 = vorrq_u8(is_digit3, num_markers3);
516
517            let number_mask = neon_to_bitmask_64(num0, num1, num2, num3);
518
519            CharacterClasses {
520                whitespace: whitespace_mask,
521                structural: structural_mask,
522                string_chars: string_mask,
523                numbers: number_mask,
524            }
525        }
526    }
527
528    /// Scalar fallback for architectures without SIMD support
529    #[inline]
530    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
531    #[must_use]
532    pub fn classify_chunk(&self, chunk: &[u8; 64]) -> CharacterClasses {
533        let mut whitespace: u64 = 0;
534        let mut structural: u64 = 0;
535        let mut string_chars: u64 = 0;
536        let mut numbers: u64 = 0;
537
538        for (i, &byte) in chunk.iter().enumerate() {
539            let bit = 1u64 << i;
540            match byte {
541                b' ' | b'\t' | b'\n' | b'\r' => whitespace |= bit,
542                b'{' | b'}' | b'[' | b']' | b':' | b',' => structural |= bit,
543                b'"' | b'\\' => string_chars |= bit,
544                b'0'..=b'9' | b'-' | b'+' | b'.' => numbers |= bit,
545                _ => {}
546            }
547        }
548
549        CharacterClasses {
550            whitespace,
551            structural,
552            string_chars,
553            numbers,
554        }
555    }
556}
557
558/// Convert NEON comparison results to a 16-bit bitmask
559///
560/// NEON doesn't have a direct movemask equivalent, so we extract
561/// the non-zero bytes and pack them into a u16.
562/// Each byte is either 0x00 or 0xFF after comparison operations.
563#[cfg(target_arch = "aarch64")]
564#[inline]
565unsafe fn neon_to_bitmask_16(v: std::arch::aarch64::uint8x16_t) -> u16 {
566    // SAFETY: uint8x16_t is a 16-byte SIMD vector, safe to transmute to [u8; 16]
567    let arr: [u8; 16] = unsafe { std::mem::transmute(v) };
568    let mut result: u16 = 0;
569    for (i, &byte) in arr.iter().enumerate() {
570        if byte != 0 {
571            result |= 1 << i;
572        }
573    }
574    result
575}
576
577/// Convert 4 NEON vectors (64 bytes total) to a 64-bit bitmask
578#[cfg(target_arch = "aarch64")]
579#[inline]
580unsafe fn neon_to_bitmask_64(
581    v0: std::arch::aarch64::uint8x16_t,
582    v1: std::arch::aarch64::uint8x16_t,
583    v2: std::arch::aarch64::uint8x16_t,
584    v3: std::arch::aarch64::uint8x16_t,
585) -> u64 {
586    // SAFETY: Called from within unsafe context, vectors are valid
587    unsafe {
588        let m0 = u64::from(neon_to_bitmask_16(v0));
589        let m1 = u64::from(neon_to_bitmask_16(v1));
590        let m2 = u64::from(neon_to_bitmask_16(v2));
591        let m3 = u64::from(neon_to_bitmask_16(v3));
592
593        m0 | (m1 << 16) | (m2 << 32) | (m3 << 48)
594    }
595}
596
597/// SIMD-accelerated pattern matching for schema operations
598pub struct SimdPatternMatcher {
599    /// Pre-compiled SIMD patterns
600    patterns: Vec<SimdPattern>,
601}
602
603impl SimdPatternMatcher {
604    /// Create a new SIMD pattern matcher
605    #[must_use]
606    pub const fn new() -> Self {
607        Self {
608            patterns: Vec::new(),
609        }
610    }
611
612    /// Add a pattern to match against
613    pub fn add_pattern(&mut self, pattern: &str) {
614        // Compile pattern into SIMD-friendly representation
615        let compiled = SimdPattern::compile(pattern);
616        self.patterns.push(compiled);
617    }
618
619    /// Check if text matches any of the compiled patterns
620    #[must_use]
621    pub fn matches_any(&self, text: &[u8]) -> bool {
622        for pattern in &self.patterns {
623            if pattern.matches(text) {
624                return true;
625            }
626        }
627        false
628    }
629}
630
631/// SIMD-compiled pattern for fast matching
632struct SimdPattern {
633    /// Pattern bytes for SIMD comparison
634    pattern_bytes: Vec<u8>,
635    /// SIMD-friendly hash for fast pre-filtering
636    pattern_hash: u64,
637}
638
639impl SimdPattern {
640    fn compile(pattern: &str) -> Self {
641        let pattern_bytes = pattern.as_bytes().to_vec();
642        let pattern_hash = SimdStringOps::hash_field_name(&pattern_bytes);
643
644        Self {
645            pattern_bytes,
646            pattern_hash,
647        }
648    }
649
650    fn matches(&self, text: &[u8]) -> bool {
651        // SIMD-accelerated pattern matching
652        // First check hash for fast rejection
653        let text_hash = SimdStringOps::hash_field_name(text);
654        if text_hash != self.pattern_hash {
655            return false;
656        }
657
658        // Then do full SIMD comparison
659        SimdStringOps::equals(text, &self.pattern_bytes)
660    }
661}
662
663impl Default for SimdCharClassifier {
664    fn default() -> Self {
665        Self::new()
666    }
667}
668
669impl Default for SimdJsonStructuralDetector {
670    fn default() -> Self {
671        Self::new()
672    }
673}
674
675impl Default for SimdLineSeparator {
676    fn default() -> Self {
677        Self::new()
678    }
679}
680
681impl Default for SimdStructuralFilter {
682    fn default() -> Self {
683        Self::new()
684    }
685}
686
687impl Default for SimdPatternMatcher {
688    fn default() -> Self {
689        Self::new()
690    }
691}
692
693#[cfg(test)]
694mod tests {
695    use super::*;
696
697    #[test]
698    fn test_simd_char_classifier_new() {
699        let classifier = SimdCharClassifier::new();
700        let chunk = [0u8; 64];
701        let classes = classifier.classify_chunk(&chunk);
702        assert_eq!(classes.whitespace, 0);
703        assert_eq!(classes.structural, 0);
704    }
705
706    #[test]
707    fn test_simd_char_classifier_whitespace() {
708        let classifier = SimdCharClassifier::new();
709        let mut chunk = [0u8; 64];
710        chunk[0] = b' ';
711        chunk[1] = b'\t';
712        chunk[2] = b'\n';
713        chunk[3] = b'\r';
714        let classes = classifier.classify_chunk(&chunk);
715        assert!(classes.whitespace != 0);
716    }
717
718    #[test]
719    fn test_simd_char_classifier_structural() {
720        let classifier = SimdCharClassifier::new();
721        let mut chunk = [0u8; 64];
722        chunk[0] = b'{';
723        chunk[1] = b'}';
724        chunk[2] = b'[';
725        chunk[3] = b']';
726        chunk[4] = b':';
727        chunk[5] = b',';
728        let classes = classifier.classify_chunk(&chunk);
729        assert!(classes.structural != 0);
730    }
731
732    #[test]
733    fn test_simd_char_classifier_string_chars() {
734        let classifier = SimdCharClassifier::new();
735        let mut chunk = [0u8; 64];
736        chunk[0] = b'"';
737        chunk[1] = b'\\';
738        let classes = classifier.classify_chunk(&chunk);
739        assert!(classes.string_chars != 0);
740    }
741
742    #[test]
743    fn test_simd_char_classifier_numbers() {
744        let classifier = SimdCharClassifier::new();
745        let mut chunk = [0u8; 64];
746        chunk[0] = b'0';
747        chunk[1] = b'5';
748        chunk[2] = b'9';
749        chunk[3] = b'-';
750        chunk[4] = b'+';
751        chunk[5] = b'.';
752        let classes = classifier.classify_chunk(&chunk);
753        assert!(classes.numbers != 0);
754    }
755
756    #[test]
757    fn test_character_classes_clone() {
758        let classes = CharacterClasses {
759            whitespace: 0xFF,
760            structural: 0xAA,
761            string_chars: 0x55,
762            numbers: 0x11,
763        };
764        let cloned = classes;
765        assert_eq!(cloned.whitespace, 0xFF);
766        assert_eq!(cloned.structural, 0xAA);
767    }
768
769    #[test]
770    fn test_simd_json_structural_detector_new() {
771        let detector = SimdJsonStructuralDetector::new();
772        let positions = detector.find_structural_characters(b"{}");
773        assert!(!positions.is_empty());
774    }
775
776    #[test]
777    fn test_simd_json_structural_detector_empty() {
778        let detector = SimdJsonStructuralDetector::new();
779        let positions = detector.find_structural_characters(b"");
780        assert!(positions.is_empty());
781    }
782
783    #[test]
784    fn test_simd_json_structural_detector_json() {
785        let detector = SimdJsonStructuralDetector::new();
786        let json = b"{\"name\":\"test\",\"value\":123}";
787        let positions = detector.find_structural_characters(json);
788        assert!(!positions.is_empty());
789    }
790
791    #[test]
792    fn test_simd_json_structural_detector_large() {
793        let detector = SimdJsonStructuralDetector::new();
794        // Create a buffer larger than 64 bytes to test chunk processing
795        let json =
796            b"{\"name\":\"test\",\"value\":123,\"extra\":\"more data here to exceed 64 bytes\"}";
797        let positions = detector.find_structural_characters(json);
798        assert!(!positions.is_empty());
799    }
800
801    #[test]
802    fn test_simd_string_ops_equals() {
803        assert!(SimdStringOps::equals(b"hello", b"hello"));
804        assert!(!SimdStringOps::equals(b"hello", b"world"));
805        assert!(!SimdStringOps::equals(b"hello", b"hell"));
806    }
807
808    #[test]
809    fn test_simd_string_ops_equals_empty() {
810        assert!(SimdStringOps::equals(b"", b""));
811        assert!(!SimdStringOps::equals(b"", b"a"));
812    }
813
814    #[test]
815    fn test_simd_string_ops_find_substring() {
816        assert_eq!(
817            SimdStringOps::find_substring(b"hello world", b"world"),
818            Some(6)
819        );
820        assert_eq!(
821            SimdStringOps::find_substring(b"hello world", b"hello"),
822            Some(0)
823        );
824        assert_eq!(SimdStringOps::find_substring(b"hello world", b"xyz"), None);
825    }
826
827    #[test]
828    fn test_simd_string_ops_find_substring_empty() {
829        assert_eq!(SimdStringOps::find_substring(b"hello", b""), Some(0));
830        assert_eq!(SimdStringOps::find_substring(b"", b"a"), None);
831    }
832
833    #[test]
834    fn test_simd_string_ops_hash_field_name() {
835        let hash1 = SimdStringOps::hash_field_name(b"name");
836        let hash2 = SimdStringOps::hash_field_name(b"name");
837        let hash3 = SimdStringOps::hash_field_name(b"value");
838        assert_eq!(hash1, hash2);
839        assert_ne!(hash1, hash3);
840    }
841
842    #[test]
843    fn test_simd_line_separator_new() {
844        let separator = SimdLineSeparator::new();
845        let boundaries = separator.find_line_boundaries(b"line1\nline2\n");
846        assert!(!boundaries.is_empty());
847    }
848
849    #[test]
850    fn test_simd_line_separator_empty() {
851        let separator = SimdLineSeparator::new();
852        let boundaries = separator.find_line_boundaries(b"");
853        assert!(boundaries.is_empty());
854    }
855
856    #[test]
857    fn test_simd_line_separator_no_newline() {
858        let separator = SimdLineSeparator::new();
859        let boundaries = separator.find_line_boundaries(b"single line");
860        assert_eq!(boundaries.len(), 1);
861        assert_eq!(boundaries[0], 11); // End of data
862    }
863
864    #[test]
865    fn test_simd_line_separator_multiple_lines() {
866        let separator = SimdLineSeparator::new();
867        let boundaries = separator.find_line_boundaries(b"line1\nline2\nline3");
868        assert_eq!(boundaries.len(), 3);
869    }
870
871    #[test]
872    fn test_simd_line_separator_ends_with_newline() {
873        let separator = SimdLineSeparator::new();
874        let boundaries = separator.find_line_boundaries(b"line1\nline2\n");
875        assert_eq!(boundaries.len(), 2);
876    }
877
878    #[test]
879    fn test_simd_structural_filter_new() {
880        let filter = SimdStructuralFilter::new();
881        let matches = filter.matches_schema(b"{\"name\":\"test\"}", &["name".to_string()]);
882        assert!(matches);
883    }
884
885    #[test]
886    fn test_simd_structural_filter_empty() {
887        let filter = SimdStructuralFilter::new();
888        let matches = filter.matches_schema(b"", &["name".to_string()]);
889        assert!(!matches);
890    }
891
892    #[test]
893    fn test_simd_structural_filter_no_match() {
894        let filter = SimdStructuralFilter::new();
895        let matches = filter.matches_schema(b"{\"value\":123}", &["name".to_string()]);
896        assert!(!matches);
897    }
898
899    #[test]
900    fn test_simd_structural_filter_multiple_fields() {
901        let filter = SimdStructuralFilter::new();
902        let json = b"{\"name\":\"test\",\"age\":30}";
903        let matches = filter.matches_schema(json, &["name".to_string(), "age".to_string()]);
904        assert!(matches);
905    }
906
907    #[test]
908    fn test_simd_structural_filter_partial_match() {
909        let filter = SimdStructuralFilter::new();
910        let json = b"{\"name\":\"test\"}";
911        let matches = filter.matches_schema(json, &["name".to_string(), "age".to_string()]);
912        assert!(!matches); // All fields must match
913    }
914
915    #[test]
916    fn test_simd_pattern_matcher_new() {
917        let matcher = SimdPatternMatcher::new();
918        assert!(!matcher.matches_any(b"test"));
919    }
920
921    #[test]
922    fn test_simd_pattern_matcher_add_pattern() {
923        let mut matcher = SimdPatternMatcher::new();
924        matcher.add_pattern("test");
925        assert!(matcher.matches_any(b"test"));
926        assert!(!matcher.matches_any(b"other"));
927    }
928
929    #[test]
930    fn test_simd_pattern_matcher_multiple_patterns() {
931        let mut matcher = SimdPatternMatcher::new();
932        matcher.add_pattern("hello");
933        matcher.add_pattern("world");
934        assert!(matcher.matches_any(b"hello"));
935        assert!(matcher.matches_any(b"world"));
936        assert!(!matcher.matches_any(b"other"));
937    }
938
939    #[test]
940    fn test_simd_pattern_compile_and_match() {
941        let pattern = SimdPattern::compile("test");
942        assert!(pattern.matches(b"test"));
943        assert!(!pattern.matches(b"other"));
944    }
945
946    #[test]
947    fn test_simd_pattern_hash_mismatch() {
948        let pattern = SimdPattern::compile("test");
949        assert!(!pattern.matches(b"different"));
950    }
951
952    #[test]
953    fn test_character_classes_debug() {
954        let classes = CharacterClasses {
955            whitespace: 1,
956            structural: 2,
957            string_chars: 3,
958            numbers: 4,
959        };
960        let debug = format!("{classes:?}");
961        assert!(debug.contains("whitespace"));
962        assert!(debug.contains("structural"));
963    }
964
965    #[test]
966    fn test_simd_json_structural_detector_process_chunk() {
967        let detector = SimdJsonStructuralDetector::new();
968        // Test with exactly 64 bytes
969        let mut json = [b' '; 64];
970        json[0] = b'{';
971        json[63] = b'}';
972        let positions = detector.find_structural_characters(&json);
973        assert!(!positions.is_empty());
974    }
975}