Skip to main content

fionn_core/
path.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2use dashmap::DashMap;
3use memchr::{memchr, memchr_iter, memchr2};
4use std::ops::Range;
5use std::sync::{Arc, OnceLock};
6
7/// Component of a JSON path.
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum PathComponent {
10    /// A field name (e.g., "name" in "user.name")
11    Field(String),
12    /// An array index (e.g., 0 in "items\[0\]")
13    ArrayIndex(usize),
14}
15
16/// Borrowed component of a JSON path.
17#[derive(Debug, Clone, Copy)]
18pub enum PathComponentRef<'a> {
19    /// A field name reference
20    Field(&'a str),
21    /// An array index
22    ArrayIndex(usize),
23}
24
25/// Stored path component referencing the owned path string.
26#[derive(Debug, Clone)]
27pub enum PathComponentRange {
28    /// A field name as a range into the path string
29    Field(Range<usize>),
30    /// An array index
31    ArrayIndex(usize),
32}
33
34/// Parsed path with owned storage and component ranges.
35#[derive(Debug, Clone)]
36pub struct ParsedPath {
37    path: String,
38    components: Vec<PathComponentRange>,
39}
40
41impl ParsedPath {
42    /// Parse a path string into components.
43    #[must_use]
44    pub fn parse(path: &str) -> Self {
45        let mut components = Vec::new();
46        parse_simd_ranges(path, &mut components);
47        Self {
48            path: path.to_string(),
49            components,
50        }
51    }
52
53    /// Get the original path string.
54    #[must_use]
55    pub fn path(&self) -> &str {
56        &self.path
57    }
58
59    /// Get the parsed components.
60    #[must_use]
61    pub fn components(&self) -> &[PathComponentRange] {
62        &self.components
63    }
64
65    /// Convert components to borrowed references.
66    pub fn components_ref<'a>(&'a self, out: &mut Vec<PathComponentRef<'a>>) {
67        out.clear();
68        out.reserve(self.components.len());
69        for component in &self.components {
70            match component {
71                PathComponentRange::Field(range) => {
72                    out.push(PathComponentRef::Field(&self.path[range.clone()]));
73                }
74                PathComponentRange::ArrayIndex(index) => {
75                    out.push(PathComponentRef::ArrayIndex(*index));
76                }
77            }
78        }
79    }
80}
81
82/// Concurrent cache for parsed paths.
83pub struct PathCache {
84    map: DashMap<String, Arc<ParsedPath>>,
85}
86
87impl PathCache {
88    /// Create a new empty path cache.
89    #[must_use]
90    pub fn new() -> Self {
91        Self {
92            map: DashMap::new(),
93        }
94    }
95
96    /// Get a cached parsed path or parse and cache it.
97    #[must_use]
98    pub fn get_or_parse(&self, path: &str) -> Arc<ParsedPath> {
99        if let Some(entry) = self.map.get(path) {
100            return entry.clone();
101        }
102
103        let parsed = Arc::new(ParsedPath::parse(path));
104        self.map.insert(path.to_string(), parsed.clone());
105        parsed
106    }
107}
108
109impl Default for PathCache {
110    fn default() -> Self {
111        Self::new()
112    }
113}
114
115const SIMD_CUTOFF_DEFAULT: usize = 64;
116const SIMD_CUTOFF_64: usize = 64;
117const SIMD_CUTOFF_96: usize = 96;
118const SIMD_CUTOFF_128: usize = 128;
119
120// x86/x86_64 SIMD thresholds
121#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
122const SIMD_SSE2_THRESHOLD: usize = 64;
123#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
124const SIMD_AVX2_THRESHOLD: usize = 128;
125#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
126const SIMD_AVX512_THRESHOLD: usize = 256;
127
128/// Baseline scalar JSON path parser.
129#[inline]
130#[must_use]
131pub fn parse_baseline(path: &str) -> Vec<PathComponent> {
132    let mut components = Vec::new();
133    let bytes = path.as_bytes();
134    components.reserve(estimate_components(bytes));
135    let mut current_start = 0;
136    let mut i = 0;
137
138    while i < bytes.len() {
139        let next = memchr2(b'.', b'[', &bytes[i..]);
140        let Some(rel_pos) = next else {
141            break;
142        };
143        let pos = i + rel_pos;
144
145        match bytes[pos] {
146            b'.' => {
147                if pos > current_start {
148                    let field_name = extract_field_name(bytes, current_start, pos);
149                    components.push(PathComponent::Field(field_name));
150                }
151                current_start = pos + 1;
152                i = pos + 1;
153            }
154            b'[' => {
155                if pos > current_start {
156                    let field_name = extract_field_name(bytes, current_start, pos);
157                    components.push(PathComponent::Field(field_name));
158                }
159
160                let start = pos + 1;
161                let end_rel = memchr(b']', &bytes[start..]);
162                if let Some(rel_end) = end_rel {
163                    let end = start + rel_end;
164                    let index_str = &bytes[start..end];
165                    let index = parse_usize(index_str);
166                    components.push(PathComponent::ArrayIndex(index));
167                    current_start = end + 1;
168                    i = end + 1;
169                } else {
170                    break;
171                }
172            }
173            _ => {
174                i = pos + 1;
175            }
176        }
177    }
178
179    if current_start < bytes.len() {
180        let field_name = extract_field_name(bytes, current_start, bytes.len());
181        components.push(PathComponent::Field(field_name));
182    }
183
184    components
185}
186
187/// Original scalar JSON path parser (byte iteration).
188#[inline]
189#[must_use]
190pub fn parse_original(path: &str) -> Vec<PathComponent> {
191    let mut components = Vec::new();
192    let mut current_start = 0;
193    let bytes = path.as_bytes();
194    components.reserve(estimate_components(bytes));
195
196    for (i, &byte) in bytes.iter().enumerate() {
197        match byte {
198            b'.' => {
199                if i > current_start {
200                    let field_name = extract_field_name(bytes, current_start, i);
201                    components.push(PathComponent::Field(field_name));
202                }
203                current_start = i + 1;
204            }
205            b'[' => {
206                if i > current_start {
207                    let field_name = extract_field_name(bytes, current_start, i);
208                    components.push(PathComponent::Field(field_name));
209                }
210
211                let start = i + 1;
212                let mut end = start;
213                while end < bytes.len() && bytes[end] != b']' {
214                    end += 1;
215                }
216
217                if end < bytes.len() {
218                    let index_str = &bytes[start..end];
219                    let index = parse_usize(index_str);
220                    components.push(PathComponent::ArrayIndex(index));
221                    current_start = end + 1;
222                }
223            }
224            _ => {}
225        }
226    }
227
228    if current_start < bytes.len() {
229        let field_name = extract_field_name(bytes, current_start, bytes.len());
230        components.push(PathComponent::Field(field_name));
231    }
232
233    components
234}
235
236/// SIMD-friendly JSON path parser with explicit SIMD delimiter scan.
237#[inline]
238#[must_use]
239pub fn parse_simd(path: &str) -> Vec<PathComponent> {
240    parse_simd_with_cutoff(path, SIMD_CUTOFF_DEFAULT)
241}
242
243/// Parse path with 64-byte SIMD cutoff.
244#[inline]
245#[must_use]
246pub fn parse_simd_cutoff_64(path: &str) -> Vec<PathComponent> {
247    parse_simd_with_cutoff(path, SIMD_CUTOFF_64)
248}
249
250/// Parse path with 96-byte SIMD cutoff.
251#[inline]
252#[must_use]
253pub fn parse_simd_cutoff_96(path: &str) -> Vec<PathComponent> {
254    parse_simd_with_cutoff(path, SIMD_CUTOFF_96)
255}
256
257/// Parse path with 128-byte SIMD cutoff.
258#[inline]
259#[must_use]
260pub fn parse_simd_cutoff_128(path: &str) -> Vec<PathComponent> {
261    parse_simd_with_cutoff(path, SIMD_CUTOFF_128)
262}
263
264/// Parse path returning borrowed component references.
265#[inline]
266#[must_use]
267pub fn parse_simd_ref(path: &str) -> Vec<PathComponentRef<'_>> {
268    let mut components = Vec::new();
269    parse_simd_ref_into(path, &mut components);
270    components
271}
272
273/// Parse path into borrowed components using provided buffer.
274#[inline]
275pub fn parse_simd_ref_into<'a>(path: &'a str, components: &mut Vec<PathComponentRef<'a>>) {
276    components.clear();
277    let bytes = path.as_bytes();
278    let mut current_start = 0;
279    let mut i = 0;
280    components.reserve(estimate_components(bytes));
281    let dispatch = DISPATCH.get_or_init(init_dispatch);
282
283    while i < bytes.len() {
284        let remaining = bytes.len().saturating_sub(i);
285        let Some(pos) = (if remaining < SIMD_CUTOFF_DEFAULT {
286            memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
287        } else {
288            find_delim_dynamic(bytes, i, remaining, *dispatch)
289        }) else {
290            break;
291        };
292
293        match bytes[pos] {
294            b'.' => {
295                if pos > current_start {
296                    let field_name = extract_field_name_ref(bytes, current_start, pos);
297                    components.push(PathComponentRef::Field(field_name));
298                }
299                current_start = pos + 1;
300                i = pos + 1;
301            }
302            b'[' => {
303                if pos > current_start {
304                    let field_name = extract_field_name_ref(bytes, current_start, pos);
305                    components.push(PathComponentRef::Field(field_name));
306                }
307
308                let start = pos + 1;
309                let remaining = bytes.len().saturating_sub(start);
310                let end = if remaining < SIMD_CUTOFF_DEFAULT {
311                    memchr(b']', &bytes[start..]).map(|pos| start + pos)
312                } else {
313                    find_byte_dynamic(bytes, start, remaining, b']', *dispatch)
314                };
315                if let Some(end) = end {
316                    let index_str = &bytes[start..end];
317                    let index = parse_usize(index_str);
318                    components.push(PathComponentRef::ArrayIndex(index));
319                    current_start = end + 1;
320                    i = end + 1;
321                } else {
322                    break;
323                }
324            }
325            _ => {
326                i = pos + 1;
327            }
328        }
329    }
330
331    if current_start < bytes.len() {
332        let field_name = extract_field_name_ref(bytes, current_start, bytes.len());
333        components.push(PathComponentRef::Field(field_name));
334    }
335}
336
337#[inline]
338fn parse_simd_ranges(path: &str, components: &mut Vec<PathComponentRange>) {
339    components.clear();
340    let bytes = path.as_bytes();
341    let mut current_start = 0;
342    let mut i = 0;
343    components.reserve(estimate_components(bytes));
344    let dispatch = DISPATCH.get_or_init(init_dispatch);
345
346    while i < bytes.len() {
347        let remaining = bytes.len().saturating_sub(i);
348        let Some(pos) = (if remaining < SIMD_CUTOFF_DEFAULT {
349            memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
350        } else {
351            find_delim_dynamic(bytes, i, remaining, *dispatch)
352        }) else {
353            break;
354        };
355
356        match bytes[pos] {
357            b'.' => {
358                if pos > current_start {
359                    components.push(PathComponentRange::Field(current_start..pos));
360                }
361                current_start = pos + 1;
362                i = pos + 1;
363            }
364            b'[' => {
365                if pos > current_start {
366                    components.push(PathComponentRange::Field(current_start..pos));
367                }
368
369                let start = pos + 1;
370                let remaining = bytes.len().saturating_sub(start);
371                let end = if remaining < SIMD_CUTOFF_DEFAULT {
372                    memchr(b']', &bytes[start..]).map(|pos| start + pos)
373                } else {
374                    find_byte_dynamic(bytes, start, remaining, b']', *dispatch)
375                };
376                if let Some(end) = end {
377                    let index_str = &bytes[start..end];
378                    let index = parse_usize(index_str);
379                    components.push(PathComponentRange::ArrayIndex(index));
380                    current_start = end + 1;
381                    i = end + 1;
382                } else {
383                    break;
384                }
385            }
386            _ => {
387                i = pos + 1;
388            }
389        }
390    }
391
392    if current_start < bytes.len() {
393        components.push(PathComponentRange::Field(current_start..bytes.len()));
394    }
395}
396
397/// Parse path using forced AVX2 instructions.
398#[cfg(target_arch = "x86_64")]
399#[inline]
400pub fn parse_simd_forced_avx2(path: &str) -> Vec<PathComponent> {
401    parse_simd_with_forced(path, find_delim_avx2_wrapper, find_byte_avx2_wrapper)
402}
403
404/// Parse path using forced AVX-512 instructions.
405#[cfg(target_arch = "x86_64")]
406#[inline]
407pub fn parse_simd_forced_avx512(path: &str) -> Vec<PathComponent> {
408    parse_simd_with_forced(path, find_delim_avx512_wrapper, find_byte_avx512_wrapper)
409}
410
411/// Parse path using forced SSE2 instructions.
412#[cfg(target_arch = "x86_64")]
413#[inline]
414pub fn parse_simd_forced_sse2(path: &str) -> Vec<PathComponent> {
415    parse_simd_with_forced(path, find_delim_sse2, find_byte_sse2)
416}
417
418/// Parse path using forced SSE2 instructions.
419#[cfg(target_arch = "x86")]
420#[inline]
421pub fn parse_simd_forced_sse2(path: &str) -> Vec<PathComponent> {
422    parse_simd_with_forced(path, find_delim_sse2, find_byte_sse2)
423}
424
425#[inline]
426fn parse_simd_with_cutoff(path: &str, cutoff: usize) -> Vec<PathComponent> {
427    parse_simd_with(path, cutoff, None)
428}
429
430/// Helper for x86 forced SIMD instruction parsing
431#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
432#[inline]
433fn parse_simd_with_forced(
434    path: &str,
435    find_delim: DelimFinder,
436    find_byte: ByteFinder,
437) -> Vec<PathComponent> {
438    parse_simd_with(path, 0, Some((find_delim, find_byte)))
439}
440
441#[inline]
442fn parse_simd_with(
443    path: &str,
444    cutoff: usize,
445    forced: Option<(DelimFinder, ByteFinder)>,
446) -> Vec<PathComponent> {
447    let mut components = Vec::new();
448    let bytes = path.as_bytes();
449    let mut current_start = 0;
450    let mut i = 0;
451    components.reserve(estimate_components(bytes));
452    let dispatch = DISPATCH.get_or_init(init_dispatch);
453
454    while i < bytes.len() {
455        let remaining = bytes.len().saturating_sub(i);
456        let Some(pos) = (if remaining < cutoff {
457            memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
458        } else if let Some((find_delim, _)) = forced {
459            find_delim(bytes, i)
460        } else {
461            find_delim_dynamic(bytes, i, remaining, *dispatch)
462        }) else {
463            break;
464        };
465
466        match bytes[pos] {
467            b'.' => {
468                if pos > current_start {
469                    let field_name = extract_field_name(bytes, current_start, pos);
470                    components.push(PathComponent::Field(field_name));
471                }
472                current_start = pos + 1;
473                i = pos + 1;
474            }
475            b'[' => {
476                if pos > current_start {
477                    let field_name = extract_field_name(bytes, current_start, pos);
478                    components.push(PathComponent::Field(field_name));
479                }
480
481                let start = pos + 1;
482                let remaining = bytes.len().saturating_sub(start);
483                let end = if remaining < cutoff {
484                    memchr(b']', &bytes[start..]).map(|pos| start + pos)
485                } else if let Some((_, find_byte)) = forced {
486                    find_byte(bytes, start, b']')
487                } else {
488                    find_byte_dynamic(bytes, start, remaining, b']', *dispatch)
489                };
490                if let Some(end) = end {
491                    let index_str = &bytes[start..end];
492                    let index = parse_usize(index_str);
493                    components.push(PathComponent::ArrayIndex(index));
494                    current_start = end + 1;
495                    i = end + 1;
496                } else {
497                    break;
498                }
499            }
500            _ => {
501                i = pos + 1;
502            }
503        }
504    }
505
506    if current_start < bytes.len() {
507        let field_name = extract_field_name(bytes, current_start, bytes.len());
508        components.push(PathComponent::Field(field_name));
509    }
510
511    components
512}
513
514/// Extract field name as owned String from byte slice.
515///
516/// # Safety
517/// Caller must ensure `bytes[start..end]` contains valid UTF-8.
518/// This is guaranteed when `bytes` originates from `&str::as_bytes()`.
519#[inline]
520fn extract_field_name(bytes: &[u8], start: usize, end: usize) -> String {
521    // SAFETY: All callers pass bytes from `&str::as_bytes()`, which guarantees valid UTF-8.
522    // The start..end range is computed by scanning for ASCII delimiters (`.` and `[`),
523    // which cannot split a multi-byte UTF-8 sequence.
524    unsafe { String::from_utf8_unchecked(bytes[start..end].to_vec()) }
525}
526
527/// Extract field name as borrowed str from byte slice.
528///
529/// # Safety
530/// Caller must ensure `bytes[start..end]` contains valid UTF-8.
531/// This is guaranteed when `bytes` originates from `&str::as_bytes()`.
532#[inline]
533fn extract_field_name_ref(bytes: &[u8], start: usize, end: usize) -> &str {
534    // SAFETY: All callers pass bytes from `&str::as_bytes()`, which guarantees valid UTF-8.
535    // The start..end range is computed by scanning for ASCII delimiters (`.` and `[`),
536    // which cannot split a multi-byte UTF-8 sequence.
537    unsafe { std::str::from_utf8_unchecked(&bytes[start..end]) }
538}
539
540#[inline]
541fn parse_usize(bytes: &[u8]) -> usize {
542    let mut result = 0usize;
543    for &byte in bytes {
544        if byte.is_ascii_digit() {
545            // Use saturating arithmetic to prevent overflow on huge digit strings
546            result = result
547                .saturating_mul(10)
548                .saturating_add((byte - b'0') as usize);
549        }
550    }
551    result
552}
553
554#[inline]
555fn estimate_components(bytes: &[u8]) -> usize {
556    let dots = memchr_iter(b'.', bytes).count();
557    let brackets = memchr_iter(b'[', bytes).count();
558    dots + brackets + 1
559}
560
561type DelimFinder = fn(&[u8], usize) -> Option<usize>;
562type ByteFinder = fn(&[u8], usize, u8) -> Option<usize>;
563
564#[derive(Clone, Copy)]
565#[allow(clippy::struct_excessive_bools)] // These are CPU feature flags, bools are appropriate
566#[allow(dead_code)] // Fields used conditionally based on target_arch
567struct SimdDispatch {
568    has_sse2: bool,
569    has_avx2: bool,
570    has_avx512: bool,
571    has_neon: bool,
572}
573
574static DISPATCH: OnceLock<SimdDispatch> = OnceLock::new();
575
576#[cfg(target_arch = "x86_64")]
577fn init_dispatch() -> SimdDispatch {
578    SimdDispatch {
579        has_sse2: std::is_x86_feature_detected!("sse2"),
580        has_avx2: std::is_x86_feature_detected!("avx2"),
581        has_avx512: std::is_x86_feature_detected!("avx512bw")
582            && std::is_x86_feature_detected!("avx512f"),
583        has_neon: false,
584    }
585}
586
587#[cfg(target_arch = "x86")]
588fn init_dispatch() -> SimdDispatch {
589    SimdDispatch {
590        has_sse2: std::is_x86_feature_detected!("sse2"),
591        has_avx2: false,
592        has_avx512: false,
593        has_neon: false,
594    }
595}
596
597#[cfg(target_arch = "aarch64")]
598const fn init_dispatch() -> SimdDispatch {
599    SimdDispatch {
600        has_sse2: false,
601        has_avx2: false,
602        has_avx512: false,
603        has_neon: true, // NEON is mandatory on aarch64
604    }
605}
606
607#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
608fn init_dispatch() -> SimdDispatch {
609    SimdDispatch {
610        has_sse2: false,
611        has_avx2: false,
612        has_avx512: false,
613        has_neon: false,
614    }
615}
616
617/// NEON threshold for using SIMD path finding (16 bytes = 128-bit vector)
618#[cfg(target_arch = "aarch64")]
619const SIMD_NEON_THRESHOLD: usize = 16;
620
621#[inline]
622fn find_delim_dynamic(
623    bytes: &[u8],
624    start: usize,
625    remaining: usize,
626    dispatch: SimdDispatch,
627) -> Option<usize> {
628    #[cfg(target_arch = "x86_64")]
629    {
630        if dispatch.has_avx512 && remaining >= SIMD_AVX512_THRESHOLD {
631            return find_delim_avx512_wrapper(bytes, start);
632        }
633        if dispatch.has_avx2 && remaining >= SIMD_AVX2_THRESHOLD {
634            return find_delim_avx2_wrapper(bytes, start);
635        }
636        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
637            return find_delim_sse2(bytes, start);
638        }
639    }
640
641    #[cfg(target_arch = "x86")]
642    {
643        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
644            return find_delim_sse2(bytes, start);
645        }
646    }
647
648    #[cfg(target_arch = "aarch64")]
649    {
650        if dispatch.has_neon && remaining >= SIMD_NEON_THRESHOLD {
651            return find_delim_neon(bytes, start);
652        }
653    }
654
655    memchr2(b'.', b'[', &bytes[start..]).map(|pos| start + pos)
656}
657
658#[inline]
659fn find_byte_dynamic(
660    bytes: &[u8],
661    start: usize,
662    remaining: usize,
663    needle: u8,
664    dispatch: SimdDispatch,
665) -> Option<usize> {
666    #[cfg(target_arch = "x86_64")]
667    {
668        if dispatch.has_avx512 && remaining >= SIMD_AVX512_THRESHOLD {
669            return find_byte_avx512_wrapper(bytes, start, needle);
670        }
671        if dispatch.has_avx2 && remaining >= SIMD_AVX2_THRESHOLD {
672            return find_byte_avx2_wrapper(bytes, start, needle);
673        }
674        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
675            return find_byte_sse2(bytes, start, needle);
676        }
677    }
678
679    #[cfg(target_arch = "x86")]
680    {
681        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
682            return find_byte_sse2(bytes, start, needle);
683        }
684    }
685
686    #[cfg(target_arch = "aarch64")]
687    {
688        if dispatch.has_neon && remaining >= SIMD_NEON_THRESHOLD {
689            return find_byte_neon(bytes, start, needle);
690        }
691    }
692
693    memchr(needle, &bytes[start..]).map(|pos| start + pos)
694}
695
696#[cfg(target_arch = "x86_64")]
697#[inline]
698fn find_delim_sse2(bytes: &[u8], start: usize) -> Option<usize> {
699    use std::arch::x86_64::{
700        _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8,
701    };
702
703    let mut i = start;
704    let len = bytes.len();
705
706    let dot = unsafe { _mm_set1_epi8(b'.'.cast_signed()) };
707    let bracket = unsafe { _mm_set1_epi8(b'['.cast_signed()) };
708
709    while i + 16 <= len {
710        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
711        let eq_dot = unsafe { _mm_cmpeq_epi8(chunk, dot) };
712        let eq_bracket = unsafe { _mm_cmpeq_epi8(chunk, bracket) };
713        let mask = unsafe { _mm_movemask_epi8(_mm_or_si128(eq_dot, eq_bracket)) }.cast_unsigned();
714
715        if mask != 0 {
716            let offset = mask.trailing_zeros() as usize;
717            return Some(i + offset);
718        }
719
720        i += 16;
721    }
722
723    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
724}
725
726#[cfg(target_arch = "x86_64")]
727#[target_feature(enable = "avx2")]
728#[inline]
729unsafe fn find_delim_avx2(bytes: &[u8], start: usize) -> Option<usize> {
730    use std::arch::x86_64::{
731        _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_or_si256,
732        _mm256_set1_epi8,
733    };
734
735    let mut i = start;
736    let len = bytes.len();
737    let dot = _mm256_set1_epi8(b'.'.cast_signed());
738    let bracket = _mm256_set1_epi8(b'['.cast_signed());
739
740    while i + 32 <= len {
741        let chunk = unsafe { _mm256_loadu_si256(bytes.as_ptr().add(i).cast()) };
742        let eq_dot = _mm256_cmpeq_epi8(chunk, dot);
743        let eq_bracket = _mm256_cmpeq_epi8(chunk, bracket);
744        let mask = _mm256_movemask_epi8(_mm256_or_si256(eq_dot, eq_bracket)).cast_unsigned();
745
746        if mask != 0 {
747            let offset = mask.trailing_zeros() as usize;
748            return Some(i + offset);
749        }
750
751        i += 32;
752    }
753
754    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
755}
756
757#[cfg(target_arch = "x86_64")]
758#[inline]
759fn find_delim_avx2_wrapper(bytes: &[u8], start: usize) -> Option<usize> {
760    unsafe { find_delim_avx2(bytes, start) }
761}
762
763#[cfg(target_arch = "x86_64")]
764#[target_feature(enable = "avx512bw,avx512f")]
765#[inline]
766unsafe fn find_delim_avx512(bytes: &[u8], start: usize) -> Option<usize> {
767    use std::arch::x86_64::{_mm512_cmpeq_epi8_mask, _mm512_loadu_si512, _mm512_set1_epi8};
768
769    let mut i = start;
770    let len = bytes.len();
771    let dot = _mm512_set1_epi8(b'.'.cast_signed());
772    let bracket = _mm512_set1_epi8(b'['.cast_signed());
773
774    while i + 64 <= len {
775        let chunk = unsafe { _mm512_loadu_si512(bytes.as_ptr().add(i).cast()) };
776        let dot_mask = _mm512_cmpeq_epi8_mask(chunk, dot);
777        let bracket_mask = _mm512_cmpeq_epi8_mask(chunk, bracket);
778        let mask = dot_mask | bracket_mask;
779
780        if mask != 0 {
781            let offset = mask.trailing_zeros() as usize;
782            return Some(i + offset);
783        }
784
785        i += 64;
786    }
787
788    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
789}
790
791#[cfg(target_arch = "x86_64")]
792#[inline]
793fn find_delim_avx512_wrapper(bytes: &[u8], start: usize) -> Option<usize> {
794    unsafe { find_delim_avx512(bytes, start) }
795}
796
797#[cfg(target_arch = "x86_64")]
798#[inline]
799fn find_byte_sse2(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
800    use std::arch::x86_64::{_mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8};
801
802    let mut i = start;
803    let len = bytes.len();
804    let needle_vec = unsafe { _mm_set1_epi8(needle.cast_signed()) };
805
806    while i + 16 <= len {
807        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
808        let eq = unsafe { _mm_cmpeq_epi8(chunk, needle_vec) };
809        let mask = unsafe { _mm_movemask_epi8(eq) }.cast_unsigned();
810
811        if mask != 0 {
812            let offset = mask.trailing_zeros() as usize;
813            return Some(i + offset);
814        }
815
816        i += 16;
817    }
818
819    memchr(needle, &bytes[i..]).map(|pos| i + pos)
820}
821
822#[cfg(target_arch = "x86_64")]
823#[target_feature(enable = "avx2")]
824#[inline]
825unsafe fn find_byte_avx2(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
826    use std::arch::x86_64::{
827        _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_set1_epi8,
828    };
829
830    let mut i = start;
831    let len = bytes.len();
832    let needle_vec = _mm256_set1_epi8(needle.cast_signed());
833
834    while i + 32 <= len {
835        let chunk = unsafe { _mm256_loadu_si256(bytes.as_ptr().add(i).cast()) };
836        let eq = _mm256_cmpeq_epi8(chunk, needle_vec);
837        let mask = _mm256_movemask_epi8(eq).cast_unsigned();
838
839        if mask != 0 {
840            let offset = mask.trailing_zeros() as usize;
841            return Some(i + offset);
842        }
843
844        i += 32;
845    }
846
847    memchr(needle, &bytes[i..]).map(|pos| i + pos)
848}
849
850#[cfg(target_arch = "x86_64")]
851#[inline]
852fn find_byte_avx2_wrapper(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
853    unsafe { find_byte_avx2(bytes, start, needle) }
854}
855
856#[cfg(target_arch = "x86_64")]
857#[target_feature(enable = "avx512bw,avx512f")]
858#[inline]
859unsafe fn find_byte_avx512(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
860    use std::arch::x86_64::{_mm512_cmpeq_epi8_mask, _mm512_loadu_si512, _mm512_set1_epi8};
861
862    let mut i = start;
863    let len = bytes.len();
864    let needle_vec = _mm512_set1_epi8(needle.cast_signed());
865
866    while i + 64 <= len {
867        let chunk = unsafe { _mm512_loadu_si512(bytes.as_ptr().add(i).cast()) };
868        let mask = _mm512_cmpeq_epi8_mask(chunk, needle_vec);
869
870        if mask != 0 {
871            let offset = mask.trailing_zeros() as usize;
872            return Some(i + offset);
873        }
874
875        i += 64;
876    }
877
878    memchr(needle, &bytes[i..]).map(|pos| i + pos)
879}
880
881#[cfg(target_arch = "x86_64")]
882#[inline]
883fn find_byte_avx512_wrapper(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
884    unsafe { find_byte_avx512(bytes, start, needle) }
885}
886
887#[cfg(target_arch = "x86")]
888#[inline]
889fn find_delim_sse2(bytes: &[u8], start: usize) -> Option<usize> {
890    use std::arch::x86::{
891        _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8,
892    };
893
894    let mut i = start;
895    let len = bytes.len();
896
897    let dot = unsafe { _mm_set1_epi8(b'.'.cast_signed()) };
898    let bracket = unsafe { _mm_set1_epi8(b'['.cast_signed()) };
899
900    while i + 16 <= len {
901        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
902        let eq_dot = unsafe { _mm_cmpeq_epi8(chunk, dot) };
903        let eq_bracket = unsafe { _mm_cmpeq_epi8(chunk, bracket) };
904        let mask = unsafe { _mm_movemask_epi8(_mm_or_si128(eq_dot, eq_bracket)) }.cast_unsigned();
905
906        if mask != 0 {
907            let offset = mask.trailing_zeros() as usize;
908            return Some(i + offset);
909        }
910
911        i += 16;
912    }
913
914    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
915}
916
917#[cfg(target_arch = "x86")]
918#[inline]
919fn find_byte_sse2(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
920    use std::arch::x86::{_mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8};
921
922    let mut i = start;
923    let len = bytes.len();
924    let needle_vec = unsafe { _mm_set1_epi8(needle.cast_signed()) };
925
926    while i + 16 <= len {
927        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
928        let eq = unsafe { _mm_cmpeq_epi8(chunk, needle_vec) };
929        let mask = unsafe { _mm_movemask_epi8(eq) }.cast_unsigned();
930
931        if mask != 0 {
932            let offset = mask.trailing_zeros() as usize;
933            return Some(i + offset);
934        }
935
936        i += 16;
937    }
938
939    memchr(needle, &bytes[i..]).map(|pos| i + pos)
940}
941
942// =============================================================================
943// ARM NEON implementations
944// =============================================================================
945
946/// Find delimiter (`.` or `[`) using ARM NEON SIMD
947#[cfg(target_arch = "aarch64")]
948#[inline]
949fn find_delim_neon(bytes: &[u8], start: usize) -> Option<usize> {
950    use std::arch::aarch64::{vceqq_u8, vdupq_n_u8, vld1q_u8, vorrq_u8};
951
952    let mut i = start;
953    let len = bytes.len();
954
955    let dot = unsafe { vdupq_n_u8(b'.') };
956    let bracket = unsafe { vdupq_n_u8(b'[') };
957
958    while i + 16 <= len {
959        let chunk = unsafe { vld1q_u8(bytes.as_ptr().add(i)) };
960        let eq_dot = unsafe { vceqq_u8(chunk, dot) };
961        let eq_bracket = unsafe { vceqq_u8(chunk, bracket) };
962        let combined = unsafe { vorrq_u8(eq_dot, eq_bracket) };
963
964        // SAFETY: combined is a valid NEON vector from the operations above
965        if let Some(offset) = unsafe { neon_first_set_byte(combined) } {
966            return Some(i + offset);
967        }
968
969        i += 16;
970    }
971
972    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
973}
974
975/// Find single byte using ARM NEON SIMD
976#[cfg(target_arch = "aarch64")]
977#[inline]
978fn find_byte_neon(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
979    use std::arch::aarch64::{vceqq_u8, vdupq_n_u8, vld1q_u8};
980
981    let mut i = start;
982    let len = bytes.len();
983
984    let needle_vec = unsafe { vdupq_n_u8(needle) };
985
986    while i + 16 <= len {
987        let chunk = unsafe { vld1q_u8(bytes.as_ptr().add(i)) };
988        let eq = unsafe { vceqq_u8(chunk, needle_vec) };
989
990        // SAFETY: eq is a valid NEON vector from the operations above
991        if let Some(offset) = unsafe { neon_first_set_byte(eq) } {
992            return Some(i + offset);
993        }
994
995        i += 16;
996    }
997
998    memchr(needle, &bytes[i..]).map(|pos| i + pos)
999}
1000
1001/// Find the index of the first non-zero byte in a NEON vector
1002///
1003/// Returns None if all bytes are zero.
1004#[cfg(target_arch = "aarch64")]
1005#[inline]
1006unsafe fn neon_first_set_byte(v: std::arch::aarch64::uint8x16_t) -> Option<usize> {
1007    // SAFETY: uint8x16_t is a 16-byte SIMD vector, safe to transmute to [u8; 16]
1008    let arr: [u8; 16] = unsafe { std::mem::transmute(v) };
1009    for (i, &byte) in arr.iter().enumerate() {
1010        if byte != 0 {
1011            return Some(i);
1012        }
1013    }
1014    None
1015}
1016
1017#[cfg(test)]
1018mod tests {
1019    use super::*;
1020
1021    // PathComponent tests
1022    #[test]
1023    fn test_path_component_field() {
1024        let comp = PathComponent::Field("test".to_string());
1025        if let PathComponent::Field(s) = comp {
1026            assert_eq!(s, "test");
1027        } else {
1028            panic!("Expected Field");
1029        }
1030    }
1031
1032    #[test]
1033    fn test_path_component_array_index() {
1034        let comp = PathComponent::ArrayIndex(42);
1035        if let PathComponent::ArrayIndex(i) = comp {
1036            assert_eq!(i, 42);
1037        } else {
1038            panic!("Expected ArrayIndex");
1039        }
1040    }
1041
1042    #[test]
1043    fn test_path_component_clone() {
1044        let comp = PathComponent::Field("test".to_string());
1045        let cloned = comp;
1046        if let PathComponent::Field(s) = cloned {
1047            assert_eq!(s, "test");
1048        }
1049    }
1050
1051    #[test]
1052    fn test_path_component_debug() {
1053        let comp = PathComponent::Field("test".to_string());
1054        let debug = format!("{comp:?}");
1055        assert!(debug.contains("Field"));
1056    }
1057
1058    // PathComponentRef tests
1059    #[test]
1060    fn test_path_component_ref_field() {
1061        let comp = PathComponentRef::Field("test");
1062        if let PathComponentRef::Field(s) = comp {
1063            assert_eq!(s, "test");
1064        } else {
1065            panic!("Expected Field");
1066        }
1067    }
1068
1069    #[test]
1070    fn test_path_component_ref_array_index() {
1071        let comp = PathComponentRef::ArrayIndex(42);
1072        if let PathComponentRef::ArrayIndex(i) = comp {
1073            assert_eq!(i, 42);
1074        } else {
1075            panic!("Expected ArrayIndex");
1076        }
1077    }
1078
1079    #[test]
1080    fn test_path_component_ref_copy() {
1081        let comp = PathComponentRef::Field("test");
1082        let copied = comp;
1083        if let PathComponentRef::Field(s) = copied {
1084            assert_eq!(s, "test");
1085        }
1086    }
1087
1088    // PathComponentRange tests
1089    #[test]
1090    fn test_path_component_range_field() {
1091        let comp = PathComponentRange::Field(0..4);
1092        if let PathComponentRange::Field(range) = comp {
1093            assert_eq!(range, 0..4);
1094        } else {
1095            panic!("Expected Field");
1096        }
1097    }
1098
1099    #[test]
1100    fn test_path_component_range_array_index() {
1101        let comp = PathComponentRange::ArrayIndex(42);
1102        if let PathComponentRange::ArrayIndex(i) = comp {
1103            assert_eq!(i, 42);
1104        } else {
1105            panic!("Expected ArrayIndex");
1106        }
1107    }
1108
1109    #[test]
1110    fn test_path_component_range_clone() {
1111        let comp = PathComponentRange::Field(0..4);
1112        let cloned = comp;
1113        if let PathComponentRange::Field(range) = cloned {
1114            assert_eq!(range, 0..4);
1115        }
1116    }
1117
1118    // ParsedPath tests
1119    #[test]
1120    fn test_parsed_path_simple() {
1121        let path = ParsedPath::parse("user.name");
1122        assert_eq!(path.path(), "user.name");
1123        assert_eq!(path.components().len(), 2);
1124    }
1125
1126    #[test]
1127    fn test_parsed_path_with_array() {
1128        let path = ParsedPath::parse("users[0].name");
1129        assert_eq!(path.components().len(), 3);
1130    }
1131
1132    #[test]
1133    fn test_parsed_path_empty() {
1134        let path = ParsedPath::parse("");
1135        assert_eq!(path.path(), "");
1136        assert_eq!(path.components().len(), 0);
1137    }
1138
1139    #[test]
1140    fn test_parsed_path_single_field() {
1141        let path = ParsedPath::parse("field");
1142        assert_eq!(path.components().len(), 1);
1143    }
1144
1145    #[test]
1146    fn test_parsed_path_components_ref() {
1147        let path = ParsedPath::parse("user.name");
1148        let mut refs = Vec::new();
1149        path.components_ref(&mut refs);
1150        assert_eq!(refs.len(), 2);
1151        if let PathComponentRef::Field(s) = refs[0] {
1152            assert_eq!(s, "user");
1153        }
1154        if let PathComponentRef::Field(s) = refs[1] {
1155            assert_eq!(s, "name");
1156        }
1157    }
1158
1159    #[test]
1160    fn test_parsed_path_components_ref_with_array() {
1161        let path = ParsedPath::parse("users[5].name");
1162        let mut refs = Vec::new();
1163        path.components_ref(&mut refs);
1164        assert_eq!(refs.len(), 3);
1165        if let PathComponentRef::ArrayIndex(i) = refs[1] {
1166            assert_eq!(i, 5);
1167        }
1168    }
1169
1170    #[test]
1171    fn test_parsed_path_clone() {
1172        let path = ParsedPath::parse("user.name");
1173        let cloned = path;
1174        assert_eq!(cloned.path(), "user.name");
1175    }
1176
1177    // PathCache tests
1178    #[test]
1179    fn test_path_cache_new() {
1180        let cache = PathCache::new();
1181        let path = cache.get_or_parse("user.name");
1182        assert_eq!(path.path(), "user.name");
1183    }
1184
1185    #[test]
1186    fn test_path_cache_returns_same_instance() {
1187        let cache = PathCache::new();
1188        let path1 = cache.get_or_parse("user.name");
1189        let path2 = cache.get_or_parse("user.name");
1190        assert!(Arc::ptr_eq(&path1, &path2));
1191    }
1192
1193    #[test]
1194    fn test_path_cache_different_paths() {
1195        let cache = PathCache::new();
1196        let path1 = cache.get_or_parse("user.name");
1197        let path2 = cache.get_or_parse("user.age");
1198        assert!(!Arc::ptr_eq(&path1, &path2));
1199    }
1200
1201    // parse_baseline tests
1202    #[test]
1203    fn test_parse_baseline_simple() {
1204        let components = parse_baseline("user.name");
1205        assert_eq!(components.len(), 2);
1206        if let PathComponent::Field(s) = &components[0] {
1207            assert_eq!(s, "user");
1208        }
1209    }
1210
1211    #[test]
1212    fn test_parse_baseline_with_array() {
1213        let components = parse_baseline("users[0].name");
1214        assert_eq!(components.len(), 3);
1215        if let PathComponent::ArrayIndex(i) = &components[1] {
1216            assert_eq!(*i, 0);
1217        }
1218    }
1219
1220    #[test]
1221    fn test_parse_baseline_empty() {
1222        let components = parse_baseline("");
1223        assert!(components.is_empty());
1224    }
1225
1226    #[test]
1227    fn test_parse_baseline_single_field() {
1228        let components = parse_baseline("field");
1229        assert_eq!(components.len(), 1);
1230    }
1231
1232    #[test]
1233    fn test_parse_baseline_nested() {
1234        let components = parse_baseline("a.b.c.d.e");
1235        assert_eq!(components.len(), 5);
1236    }
1237
1238    #[test]
1239    fn test_parse_baseline_multiple_arrays() {
1240        let components = parse_baseline("a[0][1][2]");
1241        assert_eq!(components.len(), 4);
1242    }
1243
1244    #[test]
1245    fn test_parse_baseline_leading_dot() {
1246        let components = parse_baseline(".field");
1247        assert_eq!(components.len(), 1);
1248    }
1249
1250    #[test]
1251    fn test_parse_baseline_trailing_dot() {
1252        let components = parse_baseline("field.");
1253        assert_eq!(components.len(), 1);
1254    }
1255
1256    #[test]
1257    fn test_parse_baseline_unclosed_bracket() {
1258        let components = parse_baseline("field[0");
1259        // Should handle gracefully
1260        assert!(!components.is_empty());
1261    }
1262
1263    // parse_original tests
1264    #[test]
1265    fn test_parse_original_simple() {
1266        let components = parse_original("user.name");
1267        assert_eq!(components.len(), 2);
1268    }
1269
1270    #[test]
1271    fn test_parse_original_with_array() {
1272        let components = parse_original("users[0].name");
1273        assert_eq!(components.len(), 3);
1274    }
1275
1276    #[test]
1277    fn test_parse_original_empty() {
1278        let components = parse_original("");
1279        assert!(components.is_empty());
1280    }
1281
1282    #[test]
1283    fn test_parse_original_nested() {
1284        let components = parse_original("a.b.c.d.e");
1285        assert_eq!(components.len(), 5);
1286    }
1287
1288    // parse_simd tests
1289    #[test]
1290    fn test_parse_simd_simple() {
1291        let components = parse_simd("user.name");
1292        assert_eq!(components.len(), 2);
1293    }
1294
1295    #[test]
1296    fn test_parse_simd_with_array() {
1297        let components = parse_simd("users[0].name");
1298        assert_eq!(components.len(), 3);
1299    }
1300
1301    #[test]
1302    fn test_parse_simd_empty() {
1303        let components = parse_simd("");
1304        assert!(components.is_empty());
1305    }
1306
1307    #[test]
1308    fn test_parse_simd_nested() {
1309        let components = parse_simd("a.b.c.d.e");
1310        assert_eq!(components.len(), 5);
1311    }
1312
1313    #[test]
1314    fn test_parse_simd_long_path() {
1315        // Create a path long enough to trigger SIMD processing
1316        let path = (0..100)
1317            .map(|i| format!("field{i}"))
1318            .collect::<Vec<_>>()
1319            .join(".");
1320        let components = parse_simd(&path);
1321        assert_eq!(components.len(), 100);
1322    }
1323
1324    // parse_simd cutoff variants
1325    #[test]
1326    fn test_parse_simd_cutoff_64() {
1327        let components = parse_simd_cutoff_64("user.name");
1328        assert_eq!(components.len(), 2);
1329    }
1330
1331    #[test]
1332    fn test_parse_simd_cutoff_96() {
1333        let components = parse_simd_cutoff_96("user.name");
1334        assert_eq!(components.len(), 2);
1335    }
1336
1337    #[test]
1338    fn test_parse_simd_cutoff_128() {
1339        let components = parse_simd_cutoff_128("user.name");
1340        assert_eq!(components.len(), 2);
1341    }
1342
1343    // parse_simd_ref tests
1344    #[test]
1345    fn test_parse_simd_ref_simple() {
1346        let components = parse_simd_ref("user.name");
1347        assert_eq!(components.len(), 2);
1348    }
1349
1350    #[test]
1351    fn test_parse_simd_ref_with_array() {
1352        let components = parse_simd_ref("users[0].name");
1353        assert_eq!(components.len(), 3);
1354    }
1355
1356    #[test]
1357    fn test_parse_simd_ref_empty() {
1358        let components = parse_simd_ref("");
1359        assert!(components.is_empty());
1360    }
1361
1362    #[test]
1363    fn test_parse_simd_ref_into() {
1364        let mut components = Vec::new();
1365        parse_simd_ref_into("user.name", &mut components);
1366        assert_eq!(components.len(), 2);
1367    }
1368
1369    #[test]
1370    fn test_parse_simd_ref_into_reuses_vec() {
1371        let mut components = Vec::new();
1372        parse_simd_ref_into("a.b.c", &mut components);
1373        assert_eq!(components.len(), 3);
1374        parse_simd_ref_into("x.y", &mut components);
1375        assert_eq!(components.len(), 2);
1376    }
1377
1378    #[test]
1379    fn test_parse_simd_ref_long_path() {
1380        let path = (0..100)
1381            .map(|i| format!("field{i}"))
1382            .collect::<Vec<_>>()
1383            .join(".");
1384        let components = parse_simd_ref(&path);
1385        assert_eq!(components.len(), 100);
1386    }
1387
1388    // Forced SIMD variant tests (x86_64 only)
1389    #[cfg(target_arch = "x86_64")]
1390    #[test]
1391    fn test_parse_simd_forced_avx2() {
1392        if std::is_x86_feature_detected!("avx2") {
1393            let components = parse_simd_forced_avx2("user.name");
1394            assert_eq!(components.len(), 2);
1395        }
1396    }
1397
1398    #[cfg(target_arch = "x86_64")]
1399    #[test]
1400    fn test_parse_simd_forced_avx2_with_array() {
1401        if std::is_x86_feature_detected!("avx2") {
1402            let components = parse_simd_forced_avx2("users[0].name");
1403            assert_eq!(components.len(), 3);
1404        }
1405    }
1406
1407    #[cfg(target_arch = "x86_64")]
1408    #[test]
1409    fn test_parse_simd_forced_avx2_long() {
1410        if std::is_x86_feature_detected!("avx2") {
1411            let path = (0..100)
1412                .map(|i| format!("field{i}"))
1413                .collect::<Vec<_>>()
1414                .join(".");
1415            let components = parse_simd_forced_avx2(&path);
1416            assert_eq!(components.len(), 100);
1417        }
1418    }
1419
1420    #[cfg(target_arch = "x86_64")]
1421    #[test]
1422    fn test_parse_simd_forced_avx512() {
1423        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1424            let components = parse_simd_forced_avx512("user.name");
1425            assert_eq!(components.len(), 2);
1426        }
1427    }
1428
1429    #[cfg(target_arch = "x86_64")]
1430    #[test]
1431    fn test_parse_simd_forced_avx512_with_array() {
1432        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1433            let components = parse_simd_forced_avx512("users[0].name");
1434            assert_eq!(components.len(), 3);
1435        }
1436    }
1437
1438    #[cfg(target_arch = "x86_64")]
1439    #[test]
1440    fn test_parse_simd_forced_avx512_long() {
1441        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1442            let path = (0..100)
1443                .map(|i| format!("field{i}"))
1444                .collect::<Vec<_>>()
1445                .join(".");
1446            let components = parse_simd_forced_avx512(&path);
1447            assert_eq!(components.len(), 100);
1448        }
1449    }
1450
1451    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1452    #[test]
1453    fn test_parse_simd_forced_sse2() {
1454        if std::is_x86_feature_detected!("sse2") {
1455            let components = parse_simd_forced_sse2("user.name");
1456            assert_eq!(components.len(), 2);
1457        }
1458    }
1459
1460    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1461    #[test]
1462    fn test_parse_simd_forced_sse2_with_array() {
1463        if std::is_x86_feature_detected!("sse2") {
1464            let components = parse_simd_forced_sse2("users[0].name");
1465            assert_eq!(components.len(), 3);
1466        }
1467    }
1468
1469    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1470    #[test]
1471    fn test_parse_simd_forced_sse2_long() {
1472        if std::is_x86_feature_detected!("sse2") {
1473            let path = (0..100)
1474                .map(|i| format!("field{i}"))
1475                .collect::<Vec<_>>()
1476                .join(".");
1477            let components = parse_simd_forced_sse2(&path);
1478            assert_eq!(components.len(), 100);
1479        }
1480    }
1481
1482    // Helper function tests
1483    #[test]
1484    fn test_parse_usize_basic() {
1485        assert_eq!(parse_usize(b"123"), 123);
1486        assert_eq!(parse_usize(b"0"), 0);
1487        assert_eq!(parse_usize(b"999"), 999);
1488    }
1489
1490    #[test]
1491    fn test_parse_usize_empty() {
1492        assert_eq!(parse_usize(b""), 0);
1493    }
1494
1495    #[test]
1496    fn test_parse_usize_with_non_digits() {
1497        assert_eq!(parse_usize(b"12x34"), 1234);
1498    }
1499
1500    #[test]
1501    fn test_estimate_components() {
1502        // estimate_components counts dots + brackets + 1
1503        // "a.b.c" has 2 dots, 0 brackets => 2 + 0 + 1 = 3
1504        assert_eq!(estimate_components(b"a.b.c"), 3);
1505        // "a[0][1]" has 0 dots, 2 brackets => 0 + 2 + 1 = 3
1506        assert_eq!(estimate_components(b"a[0][1]"), 3);
1507        assert_eq!(estimate_components(b"field"), 1);
1508    }
1509
1510    #[test]
1511    fn test_extract_field_name() {
1512        let bytes = b"hello.world";
1513        let field = extract_field_name(bytes, 0, 5);
1514        assert_eq!(field, "hello");
1515    }
1516
1517    #[test]
1518    fn test_extract_field_name_ref() {
1519        let bytes = b"hello.world";
1520        let field = extract_field_name_ref(bytes, 0, 5);
1521        assert_eq!(field, "hello");
1522    }
1523
1524    // SIMD dispatch tests
1525    #[test]
1526    fn test_simd_dispatch_init() {
1527        let dispatch = init_dispatch();
1528        // Just check that it doesn't panic
1529        let _ = dispatch.has_sse2;
1530        let _ = dispatch.has_avx2;
1531        let _ = dispatch.has_avx512;
1532    }
1533
1534    // Edge case tests
1535    #[test]
1536    fn test_consecutive_dots() {
1537        let components = parse_simd("a..b");
1538        // Should skip empty field
1539        assert_eq!(components.len(), 2);
1540    }
1541
1542    #[test]
1543    fn test_consecutive_arrays() {
1544        let components = parse_simd("a[0][1][2]");
1545        assert_eq!(components.len(), 4);
1546    }
1547
1548    #[test]
1549    fn test_array_at_start() {
1550        let components = parse_simd("[0].name");
1551        assert_eq!(components.len(), 2);
1552    }
1553
1554    #[test]
1555    fn test_array_at_end() {
1556        let components = parse_simd("users[0]");
1557        assert_eq!(components.len(), 2);
1558    }
1559
1560    #[test]
1561    fn test_complex_path() {
1562        let components = parse_simd("data.users[0].profile.settings[1].value");
1563        // data, users, [0], profile, settings, [1], value = 7 components
1564        assert_eq!(components.len(), 7);
1565    }
1566
1567    #[test]
1568    fn test_large_array_index() {
1569        let components = parse_simd("users[999999]");
1570        assert_eq!(components.len(), 2);
1571        if let PathComponent::ArrayIndex(i) = &components[1] {
1572            assert_eq!(*i, 999_999);
1573        }
1574    }
1575
1576    // Comparison tests - ensure all parsers produce same results
1577    #[test]
1578    fn test_parsers_produce_same_results() {
1579        let paths = vec![
1580            "user.name",
1581            "users[0].name",
1582            "a.b.c.d.e",
1583            "data[0][1][2]",
1584            "simple",
1585            "",
1586        ];
1587
1588        for path in paths {
1589            let baseline = parse_baseline(path);
1590            let original = parse_original(path);
1591            let simd = parse_simd(path);
1592
1593            assert_eq!(baseline.len(), original.len(), "Path: {path}");
1594            assert_eq!(baseline.len(), simd.len(), "Path: {path}");
1595
1596            for i in 0..baseline.len() {
1597                match (&baseline[i], &original[i], &simd[i]) {
1598                    (PathComponent::Field(a), PathComponent::Field(b), PathComponent::Field(c)) => {
1599                        assert_eq!(a, b);
1600                        assert_eq!(a, c);
1601                    }
1602                    (
1603                        PathComponent::ArrayIndex(a),
1604                        PathComponent::ArrayIndex(b),
1605                        PathComponent::ArrayIndex(c),
1606                    ) => {
1607                        assert_eq!(a, b);
1608                        assert_eq!(a, c);
1609                    }
1610                    _ => panic!("Mismatched component types at index {i} for path: {path}"),
1611                }
1612            }
1613        }
1614    }
1615
1616    // Long path tests to trigger SIMD branches
1617    #[test]
1618    fn test_very_long_path_baseline() {
1619        let path = (0..200)
1620            .map(|i| format!("f{i}"))
1621            .collect::<Vec<_>>()
1622            .join(".");
1623        let components = parse_baseline(&path);
1624        assert_eq!(components.len(), 200);
1625    }
1626
1627    #[test]
1628    fn test_very_long_path_simd() {
1629        let path = (0..200)
1630            .map(|i| format!("f{i}"))
1631            .collect::<Vec<_>>()
1632            .join(".");
1633        let components = parse_simd(&path);
1634        assert_eq!(components.len(), 200);
1635    }
1636
1637    #[test]
1638    fn test_very_long_path_with_arrays() {
1639        let path = (0..50)
1640            .map(|i| format!("f{i}[{i}]"))
1641            .collect::<Vec<_>>()
1642            .join(".");
1643        let components = parse_simd(&path);
1644        // Each segment has a field and an array index
1645        assert_eq!(components.len(), 100);
1646    }
1647
1648    // Dynamic dispatch tests
1649    #[test]
1650    fn test_find_delim_dynamic_short_path() {
1651        let dispatch = init_dispatch();
1652        let bytes = b"a.b";
1653        let result = find_delim_dynamic(bytes, 0, bytes.len(), dispatch);
1654        assert_eq!(result, Some(1));
1655    }
1656
1657    #[test]
1658    fn test_find_delim_dynamic_no_delim() {
1659        let dispatch = init_dispatch();
1660        let bytes = b"abcdefgh";
1661        let result = find_delim_dynamic(bytes, 0, bytes.len(), dispatch);
1662        assert_eq!(result, None);
1663    }
1664
1665    #[test]
1666    fn test_find_byte_dynamic_short() {
1667        let dispatch = init_dispatch();
1668        let bytes = b"a]b";
1669        let result = find_byte_dynamic(bytes, 0, bytes.len(), b']', dispatch);
1670        assert_eq!(result, Some(1));
1671    }
1672
1673    #[test]
1674    fn test_find_byte_dynamic_not_found() {
1675        let dispatch = init_dispatch();
1676        let bytes = b"abcdefgh";
1677        let result = find_byte_dynamic(bytes, 0, bytes.len(), b']', dispatch);
1678        assert_eq!(result, None);
1679    }
1680
1681    // SSE2 tests (x86_64)
1682    #[cfg(target_arch = "x86_64")]
1683    #[test]
1684    fn test_find_delim_sse2_basic() {
1685        let bytes = b"abcdefghijklmnop.rest";
1686        let result = find_delim_sse2(bytes, 0);
1687        assert_eq!(result, Some(16));
1688    }
1689
1690    #[cfg(target_arch = "x86_64")]
1691    #[test]
1692    fn test_find_delim_sse2_bracket() {
1693        let bytes = b"abcdefghijklmnop[rest";
1694        let result = find_delim_sse2(bytes, 0);
1695        assert_eq!(result, Some(16));
1696    }
1697
1698    #[cfg(target_arch = "x86_64")]
1699    #[test]
1700    fn test_find_delim_sse2_no_match() {
1701        let bytes = b"abcdefghijklmnopqrstuvwxyz";
1702        let result = find_delim_sse2(bytes, 0);
1703        assert_eq!(result, None);
1704    }
1705
1706    #[cfg(target_arch = "x86_64")]
1707    #[test]
1708    fn test_find_byte_sse2_basic() {
1709        let bytes = b"abcdefghijklmnop]rest";
1710        let result = find_byte_sse2(bytes, 0, b']');
1711        assert_eq!(result, Some(16));
1712    }
1713
1714    #[cfg(target_arch = "x86_64")]
1715    #[test]
1716    fn test_find_byte_sse2_no_match() {
1717        let bytes = b"abcdefghijklmnopqrstuvwxyz";
1718        let result = find_byte_sse2(bytes, 0, b']');
1719        assert_eq!(result, None);
1720    }
1721
1722    // AVX2 wrapper tests
1723    #[cfg(target_arch = "x86_64")]
1724    #[test]
1725    fn test_find_delim_avx2_wrapper() {
1726        if std::is_x86_feature_detected!("avx2") {
1727            let bytes = vec![b'x'; 100];
1728            let result = find_delim_avx2_wrapper(&bytes, 0);
1729            assert_eq!(result, None);
1730        }
1731    }
1732
1733    #[cfg(target_arch = "x86_64")]
1734    #[test]
1735    fn test_find_byte_avx2_wrapper() {
1736        if std::is_x86_feature_detected!("avx2") {
1737            let bytes = vec![b'x'; 100];
1738            let result = find_byte_avx2_wrapper(&bytes, 0, b']');
1739            assert_eq!(result, None);
1740        }
1741    }
1742
1743    // AVX512 wrapper tests
1744    #[cfg(target_arch = "x86_64")]
1745    #[test]
1746    fn test_find_delim_avx512_wrapper() {
1747        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1748            let bytes = vec![b'x'; 200];
1749            let result = find_delim_avx512_wrapper(&bytes, 0);
1750            assert_eq!(result, None);
1751        }
1752    }
1753
1754    #[cfg(target_arch = "x86_64")]
1755    #[test]
1756    fn test_find_byte_avx512_wrapper() {
1757        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1758            let bytes = vec![b'x'; 200];
1759            let result = find_byte_avx512_wrapper(&bytes, 0, b']');
1760            assert_eq!(result, None);
1761        }
1762    }
1763
1764    // =========================================================================
1765    // Additional Coverage Tests
1766    // =========================================================================
1767
1768    #[test]
1769    fn test_path_cache_default() {
1770        let cache = PathCache::default();
1771        let path = cache.get_or_parse("test");
1772        assert_eq!(path.path(), "test");
1773    }
1774
1775    #[test]
1776    fn test_path_component_equality() {
1777        let a = PathComponent::Field("test".to_string());
1778        let b = PathComponent::Field("test".to_string());
1779        let c = PathComponent::Field("other".to_string());
1780        assert_eq!(a, b);
1781        assert_ne!(a, c);
1782    }
1783
1784    #[test]
1785    fn test_path_component_array_equality() {
1786        let a = PathComponent::ArrayIndex(0);
1787        let b = PathComponent::ArrayIndex(0);
1788        let c = PathComponent::ArrayIndex(1);
1789        assert_eq!(a, b);
1790        assert_ne!(a, c);
1791    }
1792
1793    #[test]
1794    fn test_path_component_mixed_inequality() {
1795        let field = PathComponent::Field("0".to_string());
1796        let index = PathComponent::ArrayIndex(0);
1797        assert_ne!(field, index);
1798    }
1799
1800    #[test]
1801    fn test_path_component_ref_debug() {
1802        let comp = PathComponentRef::Field("test");
1803        let debug = format!("{comp:?}");
1804        assert!(debug.contains("Field"));
1805    }
1806
1807    #[test]
1808    fn test_path_component_range_debug() {
1809        let comp = PathComponentRange::Field(0..4);
1810        let debug = format!("{comp:?}");
1811        assert!(debug.contains("Field"));
1812    }
1813
1814    #[test]
1815    fn test_parse_simd_ranges_direct() {
1816        let mut components = Vec::new();
1817        parse_simd_ranges("user.name[0]", &mut components);
1818        assert_eq!(components.len(), 3);
1819    }
1820
1821    #[test]
1822    fn test_parse_original_unclosed_bracket() {
1823        let components = parse_original("field[0");
1824        // Should handle gracefully
1825        assert!(!components.is_empty());
1826    }
1827
1828    #[test]
1829    fn test_parse_original_leading_dot() {
1830        let components = parse_original(".field");
1831        assert_eq!(components.len(), 1);
1832    }
1833
1834    #[test]
1835    fn test_parse_original_trailing_dot() {
1836        let components = parse_original("field.");
1837        assert_eq!(components.len(), 1);
1838    }
1839
1840    #[test]
1841    fn test_parse_original_consecutive_dots() {
1842        let components = parse_original("a..b");
1843        assert_eq!(components.len(), 2);
1844    }
1845
1846    #[test]
1847    fn test_parse_usize_large() {
1848        assert_eq!(parse_usize(b"1_234_567_890"), 1_234_567_890);
1849    }
1850
1851    #[test]
1852    fn test_estimate_components_mixed() {
1853        // "a.b[0].c[1]" has 2 dots, 2 brackets => 2 + 2 + 1 = 5
1854        assert_eq!(estimate_components(b"a.b[0].c[1]"), 5);
1855    }
1856
1857    #[test]
1858    fn test_extract_field_name_utf8() {
1859        let bytes = "日本語.field".as_bytes();
1860        let field = extract_field_name(bytes, 0, 9);
1861        assert_eq!(field, "日本語");
1862    }
1863
1864    #[test]
1865    fn test_parsed_path_clone_trait() {
1866        let path = ParsedPath::parse("a.b");
1867        let cloned = path;
1868        assert_eq!(cloned.path(), "a.b");
1869    }
1870
1871    #[test]
1872    fn test_parsed_path_debug() {
1873        let path = ParsedPath::parse("a.b");
1874        let debug = format!("{path:?}");
1875        assert!(debug.contains("ParsedPath"));
1876    }
1877
1878    #[test]
1879    fn test_path_builder_many_indices() {
1880        let components = parse_simd("arr[0][1][2][3][4][5][6][7][8][9]");
1881        assert_eq!(components.len(), 11);
1882    }
1883
1884    #[test]
1885    fn test_find_delim_dynamic_with_start() {
1886        let dispatch = init_dispatch();
1887        let bytes = b"aaa.bbb";
1888        let result = find_delim_dynamic(bytes, 2, bytes.len(), dispatch);
1889        assert_eq!(result, Some(3));
1890    }
1891
1892    #[test]
1893    fn test_find_byte_dynamic_with_start() {
1894        let dispatch = init_dispatch();
1895        let bytes = b"aa]bb]cc";
1896        let result = find_byte_dynamic(bytes, 3, bytes.len(), b']', dispatch);
1897        assert_eq!(result, Some(5));
1898    }
1899
1900    #[cfg(target_arch = "x86_64")]
1901    #[test]
1902    fn test_find_delim_sse2_with_offset() {
1903        let bytes = b"abcdefghijklmnop.qrstuvwxyz.123";
1904        let result = find_delim_sse2(bytes, 17);
1905        assert_eq!(result, Some(27));
1906    }
1907
1908    #[cfg(target_arch = "x86_64")]
1909    #[test]
1910    fn test_find_byte_sse2_with_offset() {
1911        let bytes = b"abcdefghijklmnop]qrstuvwxyz]123";
1912        let result = find_byte_sse2(bytes, 17, b']');
1913        assert_eq!(result, Some(27));
1914    }
1915
1916    #[test]
1917    fn test_parse_simd_ref_long_with_arrays() {
1918        let path = (0..30)
1919            .map(|i| format!("field{i}[{i}]"))
1920            .collect::<Vec<_>>()
1921            .join(".");
1922        let components = parse_simd_ref(&path);
1923        assert_eq!(components.len(), 60);
1924    }
1925
1926    #[test]
1927    fn test_simd_dispatch_fields() {
1928        let dispatch = init_dispatch();
1929        // Test all fields are accessible
1930        let _ = dispatch.has_sse2;
1931        let _ = dispatch.has_avx2;
1932        let _ = dispatch.has_avx512;
1933        let _ = dispatch.has_neon;
1934    }
1935
1936    #[test]
1937    fn test_global_dispatch() {
1938        let dispatch = DISPATCH.get_or_init(init_dispatch);
1939        // Just verify it doesn't panic and returns valid dispatch
1940        let _ = dispatch.has_sse2;
1941    }
1942
1943    #[test]
1944    fn test_parse_simd_very_short() {
1945        let components = parse_simd("a");
1946        assert_eq!(components.len(), 1);
1947    }
1948
1949    #[test]
1950    fn test_parse_baseline_only_bracket() {
1951        let components = parse_baseline("[0]");
1952        assert_eq!(components.len(), 1);
1953    }
1954
1955    #[test]
1956    fn test_parse_simd_ref_only_bracket() {
1957        let components = parse_simd_ref("[0]");
1958        assert_eq!(components.len(), 1);
1959    }
1960
1961    // Regression test for libFuzzer crash: stack overflow on huge array index
1962    // Found by: cargo +nightly fuzz run fuzz_tape_libfuzzer
1963    // Input: [2777777777777777777777777777777777\t\0\0\0\0\0\0\0]
1964    #[test]
1965    fn test_parse_usize_overflow_regression() {
1966        // This input caused stack overflow before saturating arithmetic fix
1967        let input = "[2777777777777777777777777777777777\t\0\0\0\0\0\0\0]";
1968        let components = parse_simd(input);
1969        // Should parse without crashing; index saturates to usize::MAX
1970        assert_eq!(components.len(), 1);
1971        match &components[0] {
1972            PathComponent::ArrayIndex(idx) => {
1973                // Saturated value - exact value doesn't matter, just shouldn't crash
1974                assert!(*idx > 0);
1975            }
1976            PathComponent::Field(_) => panic!("expected ArrayIndex"),
1977        }
1978    }
1979
1980    #[test]
1981    fn test_parse_usize_large_number() {
1982        // Test that large numbers saturate instead of overflowing
1983        let input = "data[99999999999999999999999999999999]";
1984        let components = parse_simd(input);
1985        assert_eq!(components.len(), 2);
1986        match &components[1] {
1987            PathComponent::ArrayIndex(idx) => {
1988                assert_eq!(*idx, usize::MAX);
1989            }
1990            PathComponent::Field(_) => panic!("expected ArrayIndex"),
1991        }
1992    }
1993}