1pub struct SimdCharClassifier;
9
10pub struct SimdJsonStructuralDetector {
13 classifier: SimdCharClassifier,
14}
15
16impl SimdJsonStructuralDetector {
17 #[must_use]
19 pub const fn new() -> Self {
20 Self {
21 classifier: SimdCharClassifier::new(),
22 }
23 }
24
25 #[must_use]
28 #[inline]
29 pub fn find_structural_characters(&self, json_bytes: &[u8]) -> Vec<usize> {
30 let mut positions = Vec::new();
31
32 for chunk_start in (0..json_bytes.len()).step_by(64) {
34 let chunk_end = (chunk_start + 64).min(json_bytes.len());
35 let mut padding = [0u8; 64];
37 let len = chunk_end - chunk_start;
38 padding[..len].copy_from_slice(&json_bytes[chunk_start..chunk_end]);
39
40 if let Some(structural_pos) = self.process_chunk_simd(&padding, len, chunk_start) {
41 positions.extend(structural_pos);
42 }
43 }
44
45 positions
46 }
47
48 #[inline]
50 fn process_chunk_simd(
51 &self,
52 chunk: &[u8; 64],
53 valid_len: usize,
54 offset: usize,
55 ) -> Option<Vec<usize>> {
56 let classes = self.classifier.classify_chunk(chunk);
58
59 let all_structural =
62 classes.whitespace | classes.structural | classes.string_chars | classes.numbers;
63
64 if all_structural == 0 {
65 return None;
66 }
67
68 let mut positions = Vec::with_capacity(all_structural.count_ones() as usize);
70
71 let mut mask = all_structural;
73 while mask != 0 {
74 let idx = mask.trailing_zeros() as usize;
75 if idx < valid_len {
76 positions.push(offset + idx);
77 }
78 mask &= !(1u64 << idx);
79 }
80
81 Some(positions)
82 }
83}
84
85impl SimdCharClassifier {
86 #[must_use]
88 pub const fn new() -> Self {
89 Self
90 }
91}
92
93#[derive(Debug, Clone)]
95pub struct CharacterClasses {
96 pub whitespace: u64,
98 pub structural: u64,
100 pub string_chars: u64,
102 pub numbers: u64,
104}
105
106pub struct SimdStringOps;
108
109impl SimdStringOps {
110 #[inline]
112 #[must_use]
113 pub fn equals(a: &[u8], b: &[u8]) -> bool {
114 a == b
116 }
117
118 #[inline]
120 #[must_use]
121 pub fn find_substring(haystack: &[u8], needle: &[u8]) -> Option<usize> {
122 if needle.is_empty() {
123 return Some(0);
124 }
125 memchr::memmem::find(haystack, needle)
127 }
128
129 #[inline]
131 #[must_use]
132 pub fn hash_field_name(field: &[u8]) -> u64 {
133 use std::hash::{Hash, Hasher};
135 let mut hasher = ahash::AHasher::default();
136 field.hash(&mut hasher);
137 hasher.finish()
138 }
139}
140
141pub struct SimdLineSeparator {
143 }
145
146impl SimdLineSeparator {
147 #[must_use]
149 pub const fn new() -> Self {
150 Self {}
151 }
152
153 #[must_use]
155 pub fn find_line_boundaries(&self, data: &[u8]) -> Vec<usize> {
156 let mut boundaries: Vec<usize> = memchr::memchr_iter(b'\n', data)
158 .map(|pos| pos + 1) .collect();
160
161 if !data.is_empty() && data[data.len() - 1] != b'\n' {
163 boundaries.push(data.len());
164 }
165
166 boundaries
167 }
168}
169
170pub struct SimdStructuralFilter {
172 }
174
175impl SimdStructuralFilter {
176 #[must_use]
178 pub const fn new() -> Self {
179 Self {}
180 }
181
182 #[must_use]
184 pub fn matches_schema(&self, line: &[u8], required_fields: &[String]) -> bool {
185 if line.is_empty() {
186 return false;
187 }
188
189 for field in required_fields {
192 let needle = format!("\"{field}\"");
196 if memchr::memmem::find(line, needle.as_bytes()).is_none() {
197 return false;
198 }
199 }
200 true
201 }
202}
203
204#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
205use std::arch::x86_64::{
206 __m256i, _mm256_and_si256, _mm256_andnot_si256, _mm256_cmpeq_epi8, _mm256_cmpgt_epi8,
207 _mm256_movemask_epi8, _mm256_or_si256, _mm256_set1_epi8,
208};
209
210impl SimdCharClassifier {
211 #[inline]
222 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
223 #[must_use]
224 #[allow(clippy::too_many_lines)] pub fn classify_chunk(&self, chunk: &[u8; 64]) -> CharacterClasses {
226 const fn to_i8(b: u8) -> i8 {
228 i8::from_ne_bytes([b])
230 }
231
232 const fn mask_to_u32(mask: i32) -> u32 {
234 u32::from_ne_bytes(mask.to_ne_bytes())
236 }
237
238 unsafe {
239 let (first_half, second_half) = chunk.split_at(32);
244 let first_array: &[u8; 32] = first_half.try_into().unwrap();
245 let second_array: &[u8; 32] = second_half.try_into().unwrap();
246
247 let v0: __m256i = std::mem::transmute::<[u8; 32], __m256i>(*first_array);
250 let v1: __m256i = std::mem::transmute::<[u8; 32], __m256i>(*second_array);
251
252 let space = _mm256_set1_epi8(to_i8(b' '));
254 let tab = _mm256_set1_epi8(to_i8(b'\t'));
255 let lf = _mm256_set1_epi8(to_i8(b'\n'));
256 let cr = _mm256_set1_epi8(to_i8(b'\r'));
257
258 let ws0 = _mm256_or_si256(
259 _mm256_or_si256(_mm256_cmpeq_epi8(v0, space), _mm256_cmpeq_epi8(v0, tab)),
260 _mm256_or_si256(_mm256_cmpeq_epi8(v0, lf), _mm256_cmpeq_epi8(v0, cr)),
261 );
262 let ws1 = _mm256_or_si256(
263 _mm256_or_si256(_mm256_cmpeq_epi8(v1, space), _mm256_cmpeq_epi8(v1, tab)),
264 _mm256_or_si256(_mm256_cmpeq_epi8(v1, lf), _mm256_cmpeq_epi8(v1, cr)),
265 );
266
267 let whitespace_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(ws1))) << 32)
268 | u64::from(mask_to_u32(_mm256_movemask_epi8(ws0)));
269
270 let brace_o = _mm256_set1_epi8(to_i8(b'{'));
272 let brace_c = _mm256_set1_epi8(to_i8(b'}'));
273 let bracket_o = _mm256_set1_epi8(to_i8(b'['));
274 let bracket_c = _mm256_set1_epi8(to_i8(b']'));
275 let colon = _mm256_set1_epi8(to_i8(b':'));
276 let comma = _mm256_set1_epi8(to_i8(b','));
277
278 let struct0 = _mm256_or_si256(
279 _mm256_or_si256(
280 _mm256_cmpeq_epi8(v0, brace_o),
281 _mm256_cmpeq_epi8(v0, brace_c),
282 ),
283 _mm256_or_si256(
284 _mm256_or_si256(
285 _mm256_cmpeq_epi8(v0, bracket_o),
286 _mm256_cmpeq_epi8(v0, bracket_c),
287 ),
288 _mm256_or_si256(_mm256_cmpeq_epi8(v0, colon), _mm256_cmpeq_epi8(v0, comma)),
289 ),
290 );
291 let struct1 = _mm256_or_si256(
292 _mm256_or_si256(
293 _mm256_cmpeq_epi8(v1, brace_o),
294 _mm256_cmpeq_epi8(v1, brace_c),
295 ),
296 _mm256_or_si256(
297 _mm256_or_si256(
298 _mm256_cmpeq_epi8(v1, bracket_o),
299 _mm256_cmpeq_epi8(v1, bracket_c),
300 ),
301 _mm256_or_si256(_mm256_cmpeq_epi8(v1, colon), _mm256_cmpeq_epi8(v1, comma)),
302 ),
303 );
304 let structural_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(struct1))) << 32)
305 | u64::from(mask_to_u32(_mm256_movemask_epi8(struct0)));
306
307 let quote = _mm256_set1_epi8(to_i8(b'"'));
309 let backslash = _mm256_set1_epi8(to_i8(b'\\'));
310
311 let str0 = _mm256_or_si256(
312 _mm256_cmpeq_epi8(v0, quote),
313 _mm256_cmpeq_epi8(v0, backslash),
314 );
315 let str1 = _mm256_or_si256(
316 _mm256_cmpeq_epi8(v1, quote),
317 _mm256_cmpeq_epi8(v1, backslash),
318 );
319 let string_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(str1))) << 32)
320 | u64::from(mask_to_u32(_mm256_movemask_epi8(str0)));
321
322 let dot = _mm256_set1_epi8(to_i8(b'.'));
324 let minus = _mm256_set1_epi8(to_i8(b'-'));
325 let plus = _mm256_set1_epi8(to_i8(b'+'));
326
327 let lower_bound = _mm256_set1_epi8(47);
330 let fifty_seven = _mm256_set1_epi8(57);
331 let all_ones = _mm256_set1_epi8(-1);
332
333 let is_digit0 = _mm256_and_si256(
334 _mm256_cmpgt_epi8(v0, lower_bound),
335 _mm256_andnot_si256(_mm256_cmpgt_epi8(v0, fifty_seven), all_ones),
336 );
337 let is_digit1 = _mm256_and_si256(
338 _mm256_cmpgt_epi8(v1, lower_bound),
339 _mm256_andnot_si256(_mm256_cmpgt_epi8(v1, fifty_seven), all_ones),
340 );
341
342 let num_markers0 = _mm256_or_si256(
343 _mm256_or_si256(_mm256_cmpeq_epi8(v0, dot), _mm256_cmpeq_epi8(v0, minus)),
344 _mm256_cmpeq_epi8(v0, plus),
345 );
346 let num_markers1 = _mm256_or_si256(
347 _mm256_or_si256(_mm256_cmpeq_epi8(v1, dot), _mm256_cmpeq_epi8(v1, minus)),
348 _mm256_cmpeq_epi8(v1, plus),
349 );
350
351 let num0 = _mm256_or_si256(is_digit0, num_markers0);
352 let num1 = _mm256_or_si256(is_digit1, num_markers1);
353
354 let number_mask = (u64::from(mask_to_u32(_mm256_movemask_epi8(num1))) << 32)
355 | u64::from(mask_to_u32(_mm256_movemask_epi8(num0)));
356
357 CharacterClasses {
358 whitespace: whitespace_mask,
359 structural: structural_mask,
360 string_chars: string_mask,
361 numbers: number_mask,
362 }
363 }
364 }
365
366 #[inline]
371 #[cfg(target_arch = "aarch64")]
372 #[must_use]
373 #[allow(clippy::too_many_lines)] pub fn classify_chunk(&self, chunk: &[u8; 64]) -> CharacterClasses {
375 use std::arch::aarch64::{
376 vandq_u8, vceqq_u8, vcgtq_s8, vdupq_n_s8, vdupq_n_u8, vld1q_u8, vmvnq_u8, vorrq_u8,
377 vreinterpretq_s8_u8,
378 };
379
380 unsafe {
381 let v0 = vld1q_u8(chunk.as_ptr());
383 let v1 = vld1q_u8(chunk.as_ptr().add(16));
384 let v2 = vld1q_u8(chunk.as_ptr().add(32));
385 let v3 = vld1q_u8(chunk.as_ptr().add(48));
386
387 let space = vdupq_n_u8(b' ');
389 let tab = vdupq_n_u8(b'\t');
390 let lf = vdupq_n_u8(b'\n');
391 let cr = vdupq_n_u8(b'\r');
392
393 let ws0 = vorrq_u8(
394 vorrq_u8(vceqq_u8(v0, space), vceqq_u8(v0, tab)),
395 vorrq_u8(vceqq_u8(v0, lf), vceqq_u8(v0, cr)),
396 );
397 let ws1 = vorrq_u8(
398 vorrq_u8(vceqq_u8(v1, space), vceqq_u8(v1, tab)),
399 vorrq_u8(vceqq_u8(v1, lf), vceqq_u8(v1, cr)),
400 );
401 let ws2 = vorrq_u8(
402 vorrq_u8(vceqq_u8(v2, space), vceqq_u8(v2, tab)),
403 vorrq_u8(vceqq_u8(v2, lf), vceqq_u8(v2, cr)),
404 );
405 let ws3 = vorrq_u8(
406 vorrq_u8(vceqq_u8(v3, space), vceqq_u8(v3, tab)),
407 vorrq_u8(vceqq_u8(v3, lf), vceqq_u8(v3, cr)),
408 );
409
410 let whitespace_mask = neon_to_bitmask_64(ws0, ws1, ws2, ws3);
411
412 let brace_o = vdupq_n_u8(b'{');
414 let brace_c = vdupq_n_u8(b'}');
415 let bracket_o = vdupq_n_u8(b'[');
416 let bracket_c = vdupq_n_u8(b']');
417 let colon = vdupq_n_u8(b':');
418 let comma = vdupq_n_u8(b',');
419
420 let struct0 = vorrq_u8(
421 vorrq_u8(vceqq_u8(v0, brace_o), vceqq_u8(v0, brace_c)),
422 vorrq_u8(
423 vorrq_u8(vceqq_u8(v0, bracket_o), vceqq_u8(v0, bracket_c)),
424 vorrq_u8(vceqq_u8(v0, colon), vceqq_u8(v0, comma)),
425 ),
426 );
427 let struct1 = vorrq_u8(
428 vorrq_u8(vceqq_u8(v1, brace_o), vceqq_u8(v1, brace_c)),
429 vorrq_u8(
430 vorrq_u8(vceqq_u8(v1, bracket_o), vceqq_u8(v1, bracket_c)),
431 vorrq_u8(vceqq_u8(v1, colon), vceqq_u8(v1, comma)),
432 ),
433 );
434 let struct2 = vorrq_u8(
435 vorrq_u8(vceqq_u8(v2, brace_o), vceqq_u8(v2, brace_c)),
436 vorrq_u8(
437 vorrq_u8(vceqq_u8(v2, bracket_o), vceqq_u8(v2, bracket_c)),
438 vorrq_u8(vceqq_u8(v2, colon), vceqq_u8(v2, comma)),
439 ),
440 );
441 let struct3 = vorrq_u8(
442 vorrq_u8(vceqq_u8(v3, brace_o), vceqq_u8(v3, brace_c)),
443 vorrq_u8(
444 vorrq_u8(vceqq_u8(v3, bracket_o), vceqq_u8(v3, bracket_c)),
445 vorrq_u8(vceqq_u8(v3, colon), vceqq_u8(v3, comma)),
446 ),
447 );
448
449 let structural_mask = neon_to_bitmask_64(struct0, struct1, struct2, struct3);
450
451 let quote = vdupq_n_u8(b'"');
453 let backslash = vdupq_n_u8(b'\\');
454
455 let str0 = vorrq_u8(vceqq_u8(v0, quote), vceqq_u8(v0, backslash));
456 let str1 = vorrq_u8(vceqq_u8(v1, quote), vceqq_u8(v1, backslash));
457 let str2 = vorrq_u8(vceqq_u8(v2, quote), vceqq_u8(v2, backslash));
458 let str3 = vorrq_u8(vceqq_u8(v3, quote), vceqq_u8(v3, backslash));
459
460 let string_mask = neon_to_bitmask_64(str0, str1, str2, str3);
461
462 let dot = vdupq_n_u8(b'.');
464 let minus = vdupq_n_u8(b'-');
465 let plus = vdupq_n_u8(b'+');
466
467 let lower_bound = vdupq_n_s8(47);
470 let upper_bound = vdupq_n_s8(57);
471
472 let v0_s = vreinterpretq_s8_u8(v0);
473 let v1_s = vreinterpretq_s8_u8(v1);
474 let v2_s = vreinterpretq_s8_u8(v2);
475 let v3_s = vreinterpretq_s8_u8(v3);
476
477 let is_digit0 = vandq_u8(
479 vcgtq_s8(v0_s, lower_bound),
480 vmvnq_u8(vcgtq_s8(v0_s, upper_bound)),
481 );
482 let is_digit1 = vandq_u8(
483 vcgtq_s8(v1_s, lower_bound),
484 vmvnq_u8(vcgtq_s8(v1_s, upper_bound)),
485 );
486 let is_digit2 = vandq_u8(
487 vcgtq_s8(v2_s, lower_bound),
488 vmvnq_u8(vcgtq_s8(v2_s, upper_bound)),
489 );
490 let is_digit3 = vandq_u8(
491 vcgtq_s8(v3_s, lower_bound),
492 vmvnq_u8(vcgtq_s8(v3_s, upper_bound)),
493 );
494
495 let num_markers0 = vorrq_u8(
496 vorrq_u8(vceqq_u8(v0, dot), vceqq_u8(v0, minus)),
497 vceqq_u8(v0, plus),
498 );
499 let num_markers1 = vorrq_u8(
500 vorrq_u8(vceqq_u8(v1, dot), vceqq_u8(v1, minus)),
501 vceqq_u8(v1, plus),
502 );
503 let num_markers2 = vorrq_u8(
504 vorrq_u8(vceqq_u8(v2, dot), vceqq_u8(v2, minus)),
505 vceqq_u8(v2, plus),
506 );
507 let num_markers3 = vorrq_u8(
508 vorrq_u8(vceqq_u8(v3, dot), vceqq_u8(v3, minus)),
509 vceqq_u8(v3, plus),
510 );
511
512 let num0 = vorrq_u8(is_digit0, num_markers0);
513 let num1 = vorrq_u8(is_digit1, num_markers1);
514 let num2 = vorrq_u8(is_digit2, num_markers2);
515 let num3 = vorrq_u8(is_digit3, num_markers3);
516
517 let number_mask = neon_to_bitmask_64(num0, num1, num2, num3);
518
519 CharacterClasses {
520 whitespace: whitespace_mask,
521 structural: structural_mask,
522 string_chars: string_mask,
523 numbers: number_mask,
524 }
525 }
526 }
527
528 #[inline]
530 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
531 #[must_use]
532 pub fn classify_chunk(&self, chunk: &[u8; 64]) -> CharacterClasses {
533 let mut whitespace: u64 = 0;
534 let mut structural: u64 = 0;
535 let mut string_chars: u64 = 0;
536 let mut numbers: u64 = 0;
537
538 for (i, &byte) in chunk.iter().enumerate() {
539 let bit = 1u64 << i;
540 match byte {
541 b' ' | b'\t' | b'\n' | b'\r' => whitespace |= bit,
542 b'{' | b'}' | b'[' | b']' | b':' | b',' => structural |= bit,
543 b'"' | b'\\' => string_chars |= bit,
544 b'0'..=b'9' | b'-' | b'+' | b'.' => numbers |= bit,
545 _ => {}
546 }
547 }
548
549 CharacterClasses {
550 whitespace,
551 structural,
552 string_chars,
553 numbers,
554 }
555 }
556}
557
558#[cfg(target_arch = "aarch64")]
564#[inline]
565unsafe fn neon_to_bitmask_16(v: std::arch::aarch64::uint8x16_t) -> u16 {
566 let arr: [u8; 16] = unsafe { std::mem::transmute(v) };
568 let mut result: u16 = 0;
569 for (i, &byte) in arr.iter().enumerate() {
570 if byte != 0 {
571 result |= 1 << i;
572 }
573 }
574 result
575}
576
577#[cfg(target_arch = "aarch64")]
579#[inline]
580unsafe fn neon_to_bitmask_64(
581 v0: std::arch::aarch64::uint8x16_t,
582 v1: std::arch::aarch64::uint8x16_t,
583 v2: std::arch::aarch64::uint8x16_t,
584 v3: std::arch::aarch64::uint8x16_t,
585) -> u64 {
586 unsafe {
588 let m0 = u64::from(neon_to_bitmask_16(v0));
589 let m1 = u64::from(neon_to_bitmask_16(v1));
590 let m2 = u64::from(neon_to_bitmask_16(v2));
591 let m3 = u64::from(neon_to_bitmask_16(v3));
592
593 m0 | (m1 << 16) | (m2 << 32) | (m3 << 48)
594 }
595}
596
597pub struct SimdPatternMatcher {
599 patterns: Vec<SimdPattern>,
601}
602
603impl SimdPatternMatcher {
604 #[must_use]
606 pub const fn new() -> Self {
607 Self {
608 patterns: Vec::new(),
609 }
610 }
611
612 pub fn add_pattern(&mut self, pattern: &str) {
614 let compiled = SimdPattern::compile(pattern);
616 self.patterns.push(compiled);
617 }
618
619 #[must_use]
621 pub fn matches_any(&self, text: &[u8]) -> bool {
622 for pattern in &self.patterns {
623 if pattern.matches(text) {
624 return true;
625 }
626 }
627 false
628 }
629}
630
631struct SimdPattern {
633 pattern_bytes: Vec<u8>,
635 pattern_hash: u64,
637}
638
639impl SimdPattern {
640 fn compile(pattern: &str) -> Self {
641 let pattern_bytes = pattern.as_bytes().to_vec();
642 let pattern_hash = SimdStringOps::hash_field_name(&pattern_bytes);
643
644 Self {
645 pattern_bytes,
646 pattern_hash,
647 }
648 }
649
650 fn matches(&self, text: &[u8]) -> bool {
651 let text_hash = SimdStringOps::hash_field_name(text);
654 if text_hash != self.pattern_hash {
655 return false;
656 }
657
658 SimdStringOps::equals(text, &self.pattern_bytes)
660 }
661}
662
663impl Default for SimdCharClassifier {
664 fn default() -> Self {
665 Self::new()
666 }
667}
668
669impl Default for SimdJsonStructuralDetector {
670 fn default() -> Self {
671 Self::new()
672 }
673}
674
675impl Default for SimdLineSeparator {
676 fn default() -> Self {
677 Self::new()
678 }
679}
680
681impl Default for SimdStructuralFilter {
682 fn default() -> Self {
683 Self::new()
684 }
685}
686
687impl Default for SimdPatternMatcher {
688 fn default() -> Self {
689 Self::new()
690 }
691}
692
693#[cfg(test)]
694mod tests {
695 use super::*;
696
697 #[test]
698 fn test_simd_char_classifier_new() {
699 let classifier = SimdCharClassifier::new();
700 let chunk = [0u8; 64];
701 let classes = classifier.classify_chunk(&chunk);
702 assert_eq!(classes.whitespace, 0);
703 assert_eq!(classes.structural, 0);
704 }
705
706 #[test]
707 fn test_simd_char_classifier_whitespace() {
708 let classifier = SimdCharClassifier::new();
709 let mut chunk = [0u8; 64];
710 chunk[0] = b' ';
711 chunk[1] = b'\t';
712 chunk[2] = b'\n';
713 chunk[3] = b'\r';
714 let classes = classifier.classify_chunk(&chunk);
715 assert!(classes.whitespace != 0);
716 }
717
718 #[test]
719 fn test_simd_char_classifier_structural() {
720 let classifier = SimdCharClassifier::new();
721 let mut chunk = [0u8; 64];
722 chunk[0] = b'{';
723 chunk[1] = b'}';
724 chunk[2] = b'[';
725 chunk[3] = b']';
726 chunk[4] = b':';
727 chunk[5] = b',';
728 let classes = classifier.classify_chunk(&chunk);
729 assert!(classes.structural != 0);
730 }
731
732 #[test]
733 fn test_simd_char_classifier_string_chars() {
734 let classifier = SimdCharClassifier::new();
735 let mut chunk = [0u8; 64];
736 chunk[0] = b'"';
737 chunk[1] = b'\\';
738 let classes = classifier.classify_chunk(&chunk);
739 assert!(classes.string_chars != 0);
740 }
741
742 #[test]
743 fn test_simd_char_classifier_numbers() {
744 let classifier = SimdCharClassifier::new();
745 let mut chunk = [0u8; 64];
746 chunk[0] = b'0';
747 chunk[1] = b'5';
748 chunk[2] = b'9';
749 chunk[3] = b'-';
750 chunk[4] = b'+';
751 chunk[5] = b'.';
752 let classes = classifier.classify_chunk(&chunk);
753 assert!(classes.numbers != 0);
754 }
755
756 #[test]
757 fn test_character_classes_clone() {
758 let classes = CharacterClasses {
759 whitespace: 0xFF,
760 structural: 0xAA,
761 string_chars: 0x55,
762 numbers: 0x11,
763 };
764 let cloned = classes;
765 assert_eq!(cloned.whitespace, 0xFF);
766 assert_eq!(cloned.structural, 0xAA);
767 }
768
769 #[test]
770 fn test_simd_json_structural_detector_new() {
771 let detector = SimdJsonStructuralDetector::new();
772 let positions = detector.find_structural_characters(b"{}");
773 assert!(!positions.is_empty());
774 }
775
776 #[test]
777 fn test_simd_json_structural_detector_empty() {
778 let detector = SimdJsonStructuralDetector::new();
779 let positions = detector.find_structural_characters(b"");
780 assert!(positions.is_empty());
781 }
782
783 #[test]
784 fn test_simd_json_structural_detector_json() {
785 let detector = SimdJsonStructuralDetector::new();
786 let json = b"{\"name\":\"test\",\"value\":123}";
787 let positions = detector.find_structural_characters(json);
788 assert!(!positions.is_empty());
789 }
790
791 #[test]
792 fn test_simd_json_structural_detector_large() {
793 let detector = SimdJsonStructuralDetector::new();
794 let json =
796 b"{\"name\":\"test\",\"value\":123,\"extra\":\"more data here to exceed 64 bytes\"}";
797 let positions = detector.find_structural_characters(json);
798 assert!(!positions.is_empty());
799 }
800
801 #[test]
802 fn test_simd_string_ops_equals() {
803 assert!(SimdStringOps::equals(b"hello", b"hello"));
804 assert!(!SimdStringOps::equals(b"hello", b"world"));
805 assert!(!SimdStringOps::equals(b"hello", b"hell"));
806 }
807
808 #[test]
809 fn test_simd_string_ops_equals_empty() {
810 assert!(SimdStringOps::equals(b"", b""));
811 assert!(!SimdStringOps::equals(b"", b"a"));
812 }
813
814 #[test]
815 fn test_simd_string_ops_find_substring() {
816 assert_eq!(
817 SimdStringOps::find_substring(b"hello world", b"world"),
818 Some(6)
819 );
820 assert_eq!(
821 SimdStringOps::find_substring(b"hello world", b"hello"),
822 Some(0)
823 );
824 assert_eq!(SimdStringOps::find_substring(b"hello world", b"xyz"), None);
825 }
826
827 #[test]
828 fn test_simd_string_ops_find_substring_empty() {
829 assert_eq!(SimdStringOps::find_substring(b"hello", b""), Some(0));
830 assert_eq!(SimdStringOps::find_substring(b"", b"a"), None);
831 }
832
833 #[test]
834 fn test_simd_string_ops_hash_field_name() {
835 let hash1 = SimdStringOps::hash_field_name(b"name");
836 let hash2 = SimdStringOps::hash_field_name(b"name");
837 let hash3 = SimdStringOps::hash_field_name(b"value");
838 assert_eq!(hash1, hash2);
839 assert_ne!(hash1, hash3);
840 }
841
842 #[test]
843 fn test_simd_line_separator_new() {
844 let separator = SimdLineSeparator::new();
845 let boundaries = separator.find_line_boundaries(b"line1\nline2\n");
846 assert!(!boundaries.is_empty());
847 }
848
849 #[test]
850 fn test_simd_line_separator_empty() {
851 let separator = SimdLineSeparator::new();
852 let boundaries = separator.find_line_boundaries(b"");
853 assert!(boundaries.is_empty());
854 }
855
856 #[test]
857 fn test_simd_line_separator_no_newline() {
858 let separator = SimdLineSeparator::new();
859 let boundaries = separator.find_line_boundaries(b"single line");
860 assert_eq!(boundaries.len(), 1);
861 assert_eq!(boundaries[0], 11); }
863
864 #[test]
865 fn test_simd_line_separator_multiple_lines() {
866 let separator = SimdLineSeparator::new();
867 let boundaries = separator.find_line_boundaries(b"line1\nline2\nline3");
868 assert_eq!(boundaries.len(), 3);
869 }
870
871 #[test]
872 fn test_simd_line_separator_ends_with_newline() {
873 let separator = SimdLineSeparator::new();
874 let boundaries = separator.find_line_boundaries(b"line1\nline2\n");
875 assert_eq!(boundaries.len(), 2);
876 }
877
878 #[test]
879 fn test_simd_structural_filter_new() {
880 let filter = SimdStructuralFilter::new();
881 let matches = filter.matches_schema(b"{\"name\":\"test\"}", &["name".to_string()]);
882 assert!(matches);
883 }
884
885 #[test]
886 fn test_simd_structural_filter_empty() {
887 let filter = SimdStructuralFilter::new();
888 let matches = filter.matches_schema(b"", &["name".to_string()]);
889 assert!(!matches);
890 }
891
892 #[test]
893 fn test_simd_structural_filter_no_match() {
894 let filter = SimdStructuralFilter::new();
895 let matches = filter.matches_schema(b"{\"value\":123}", &["name".to_string()]);
896 assert!(!matches);
897 }
898
899 #[test]
900 fn test_simd_structural_filter_multiple_fields() {
901 let filter = SimdStructuralFilter::new();
902 let json = b"{\"name\":\"test\",\"age\":30}";
903 let matches = filter.matches_schema(json, &["name".to_string(), "age".to_string()]);
904 assert!(matches);
905 }
906
907 #[test]
908 fn test_simd_structural_filter_partial_match() {
909 let filter = SimdStructuralFilter::new();
910 let json = b"{\"name\":\"test\"}";
911 let matches = filter.matches_schema(json, &["name".to_string(), "age".to_string()]);
912 assert!(!matches); }
914
915 #[test]
916 fn test_simd_pattern_matcher_new() {
917 let matcher = SimdPatternMatcher::new();
918 assert!(!matcher.matches_any(b"test"));
919 }
920
921 #[test]
922 fn test_simd_pattern_matcher_add_pattern() {
923 let mut matcher = SimdPatternMatcher::new();
924 matcher.add_pattern("test");
925 assert!(matcher.matches_any(b"test"));
926 assert!(!matcher.matches_any(b"other"));
927 }
928
929 #[test]
930 fn test_simd_pattern_matcher_multiple_patterns() {
931 let mut matcher = SimdPatternMatcher::new();
932 matcher.add_pattern("hello");
933 matcher.add_pattern("world");
934 assert!(matcher.matches_any(b"hello"));
935 assert!(matcher.matches_any(b"world"));
936 assert!(!matcher.matches_any(b"other"));
937 }
938
939 #[test]
940 fn test_simd_pattern_compile_and_match() {
941 let pattern = SimdPattern::compile("test");
942 assert!(pattern.matches(b"test"));
943 assert!(!pattern.matches(b"other"));
944 }
945
946 #[test]
947 fn test_simd_pattern_hash_mismatch() {
948 let pattern = SimdPattern::compile("test");
949 assert!(!pattern.matches(b"different"));
950 }
951
952 #[test]
953 fn test_character_classes_debug() {
954 let classes = CharacterClasses {
955 whitespace: 1,
956 structural: 2,
957 string_chars: 3,
958 numbers: 4,
959 };
960 let debug = format!("{classes:?}");
961 assert!(debug.contains("whitespace"));
962 assert!(debug.contains("structural"));
963 }
964
965 #[test]
966 fn test_simd_json_structural_detector_process_chunk() {
967 let detector = SimdJsonStructuralDetector::new();
968 let mut json = [b' '; 64];
970 json[0] = b'{';
971 json[63] = b'}';
972 let positions = detector.find_structural_characters(&json);
973 assert!(!positions.is_empty());
974 }
975}