fionn_core/
path.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2use dashmap::DashMap;
3use memchr::{memchr, memchr_iter, memchr2};
4use std::ops::Range;
5use std::sync::{Arc, OnceLock};
6
7/// Component of a JSON path.
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum PathComponent {
10    /// A field name (e.g., "name" in "user.name")
11    Field(String),
12    /// An array index (e.g., 0 in "items[0]")
13    ArrayIndex(usize),
14}
15
16/// Borrowed component of a JSON path.
17#[derive(Debug, Clone, Copy)]
18pub enum PathComponentRef<'a> {
19    /// A field name reference
20    Field(&'a str),
21    /// An array index
22    ArrayIndex(usize),
23}
24
25/// Stored path component referencing the owned path string.
26#[derive(Debug, Clone)]
27pub enum PathComponentRange {
28    /// A field name as a range into the path string
29    Field(Range<usize>),
30    /// An array index
31    ArrayIndex(usize),
32}
33
34/// Parsed path with owned storage and component ranges.
35#[derive(Debug, Clone)]
36pub struct ParsedPath {
37    path: String,
38    components: Vec<PathComponentRange>,
39}
40
41impl ParsedPath {
42    /// Parse a path string into components.
43    #[must_use]
44    pub fn parse(path: &str) -> Self {
45        let mut components = Vec::new();
46        parse_simd_ranges(path, &mut components);
47        Self {
48            path: path.to_string(),
49            components,
50        }
51    }
52
53    /// Get the original path string.
54    #[must_use]
55    pub fn path(&self) -> &str {
56        &self.path
57    }
58
59    /// Get the parsed components.
60    #[must_use]
61    pub fn components(&self) -> &[PathComponentRange] {
62        &self.components
63    }
64
65    /// Convert components to borrowed references.
66    pub fn components_ref<'a>(&'a self, out: &mut Vec<PathComponentRef<'a>>) {
67        out.clear();
68        out.reserve(self.components.len());
69        for component in &self.components {
70            match component {
71                PathComponentRange::Field(range) => {
72                    out.push(PathComponentRef::Field(&self.path[range.clone()]));
73                }
74                PathComponentRange::ArrayIndex(index) => {
75                    out.push(PathComponentRef::ArrayIndex(*index));
76                }
77            }
78        }
79    }
80}
81
82/// Concurrent cache for parsed paths.
83pub struct PathCache {
84    map: DashMap<String, Arc<ParsedPath>>,
85}
86
87impl PathCache {
88    /// Create a new empty path cache.
89    #[must_use]
90    pub fn new() -> Self {
91        Self {
92            map: DashMap::new(),
93        }
94    }
95
96    /// Get a cached parsed path or parse and cache it.
97    #[must_use]
98    pub fn get_or_parse(&self, path: &str) -> Arc<ParsedPath> {
99        if let Some(entry) = self.map.get(path) {
100            return entry.clone();
101        }
102
103        let parsed = Arc::new(ParsedPath::parse(path));
104        self.map.insert(path.to_string(), parsed.clone());
105        parsed
106    }
107}
108
109impl Default for PathCache {
110    fn default() -> Self {
111        Self::new()
112    }
113}
114
115const SIMD_CUTOFF_DEFAULT: usize = 64;
116const SIMD_CUTOFF_64: usize = 64;
117const SIMD_CUTOFF_96: usize = 96;
118const SIMD_CUTOFF_128: usize = 128;
119
120// x86/x86_64 SIMD thresholds
121#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
122const SIMD_SSE2_THRESHOLD: usize = 64;
123#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
124const SIMD_AVX2_THRESHOLD: usize = 128;
125#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
126const SIMD_AVX512_THRESHOLD: usize = 256;
127
128/// Baseline scalar JSON path parser.
129#[inline]
130#[must_use]
131pub fn parse_baseline(path: &str) -> Vec<PathComponent> {
132    let mut components = Vec::new();
133    let bytes = path.as_bytes();
134    components.reserve(estimate_components(bytes));
135    let mut current_start = 0;
136    let mut i = 0;
137
138    while i < bytes.len() {
139        let next = memchr2(b'.', b'[', &bytes[i..]);
140        let Some(rel_pos) = next else {
141            break;
142        };
143        let pos = i + rel_pos;
144
145        match bytes[pos] {
146            b'.' => {
147                if pos > current_start {
148                    let field_name = extract_field_name(bytes, current_start, pos);
149                    components.push(PathComponent::Field(field_name));
150                }
151                current_start = pos + 1;
152                i = pos + 1;
153            }
154            b'[' => {
155                if pos > current_start {
156                    let field_name = extract_field_name(bytes, current_start, pos);
157                    components.push(PathComponent::Field(field_name));
158                }
159
160                let start = pos + 1;
161                let end_rel = memchr(b']', &bytes[start..]);
162                if let Some(rel_end) = end_rel {
163                    let end = start + rel_end;
164                    let index_str = &bytes[start..end];
165                    let index = parse_usize(index_str);
166                    components.push(PathComponent::ArrayIndex(index));
167                    current_start = end + 1;
168                    i = end + 1;
169                } else {
170                    break;
171                }
172            }
173            _ => {
174                i = pos + 1;
175            }
176        }
177    }
178
179    if current_start < bytes.len() {
180        let field_name = extract_field_name(bytes, current_start, bytes.len());
181        components.push(PathComponent::Field(field_name));
182    }
183
184    components
185}
186
187/// Original scalar JSON path parser (byte iteration).
188#[inline]
189#[must_use]
190pub fn parse_original(path: &str) -> Vec<PathComponent> {
191    let mut components = Vec::new();
192    let mut current_start = 0;
193    let bytes = path.as_bytes();
194    components.reserve(estimate_components(bytes));
195
196    for (i, &byte) in bytes.iter().enumerate() {
197        match byte {
198            b'.' => {
199                if i > current_start {
200                    let field_name = extract_field_name(bytes, current_start, i);
201                    components.push(PathComponent::Field(field_name));
202                }
203                current_start = i + 1;
204            }
205            b'[' => {
206                if i > current_start {
207                    let field_name = extract_field_name(bytes, current_start, i);
208                    components.push(PathComponent::Field(field_name));
209                }
210
211                let start = i + 1;
212                let mut end = start;
213                while end < bytes.len() && bytes[end] != b']' {
214                    end += 1;
215                }
216
217                if end < bytes.len() {
218                    let index_str = &bytes[start..end];
219                    let index = parse_usize(index_str);
220                    components.push(PathComponent::ArrayIndex(index));
221                    current_start = end + 1;
222                }
223            }
224            _ => {}
225        }
226    }
227
228    if current_start < bytes.len() {
229        let field_name = extract_field_name(bytes, current_start, bytes.len());
230        components.push(PathComponent::Field(field_name));
231    }
232
233    components
234}
235
236/// SIMD-friendly JSON path parser with explicit SIMD delimiter scan.
237#[inline]
238#[must_use]
239pub fn parse_simd(path: &str) -> Vec<PathComponent> {
240    parse_simd_with_cutoff(path, SIMD_CUTOFF_DEFAULT)
241}
242
243/// Parse path with 64-byte SIMD cutoff.
244#[inline]
245#[must_use]
246pub fn parse_simd_cutoff_64(path: &str) -> Vec<PathComponent> {
247    parse_simd_with_cutoff(path, SIMD_CUTOFF_64)
248}
249
250/// Parse path with 96-byte SIMD cutoff.
251#[inline]
252#[must_use]
253pub fn parse_simd_cutoff_96(path: &str) -> Vec<PathComponent> {
254    parse_simd_with_cutoff(path, SIMD_CUTOFF_96)
255}
256
257/// Parse path with 128-byte SIMD cutoff.
258#[inline]
259#[must_use]
260pub fn parse_simd_cutoff_128(path: &str) -> Vec<PathComponent> {
261    parse_simd_with_cutoff(path, SIMD_CUTOFF_128)
262}
263
264/// Parse path returning borrowed component references.
265#[inline]
266#[must_use]
267pub fn parse_simd_ref(path: &str) -> Vec<PathComponentRef<'_>> {
268    let mut components = Vec::new();
269    parse_simd_ref_into(path, &mut components);
270    components
271}
272
273/// Parse path into borrowed components using provided buffer.
274#[inline]
275pub fn parse_simd_ref_into<'a>(path: &'a str, components: &mut Vec<PathComponentRef<'a>>) {
276    components.clear();
277    let bytes = path.as_bytes();
278    let mut current_start = 0;
279    let mut i = 0;
280    components.reserve(estimate_components(bytes));
281    let dispatch = DISPATCH.get_or_init(init_dispatch);
282
283    while i < bytes.len() {
284        let remaining = bytes.len().saturating_sub(i);
285        let Some(pos) = (if remaining < SIMD_CUTOFF_DEFAULT {
286            memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
287        } else {
288            find_delim_dynamic(bytes, i, remaining, *dispatch)
289        }) else {
290            break;
291        };
292
293        match bytes[pos] {
294            b'.' => {
295                if pos > current_start {
296                    let field_name = extract_field_name_ref(bytes, current_start, pos);
297                    components.push(PathComponentRef::Field(field_name));
298                }
299                current_start = pos + 1;
300                i = pos + 1;
301            }
302            b'[' => {
303                if pos > current_start {
304                    let field_name = extract_field_name_ref(bytes, current_start, pos);
305                    components.push(PathComponentRef::Field(field_name));
306                }
307
308                let start = pos + 1;
309                let remaining = bytes.len().saturating_sub(start);
310                let end = if remaining < SIMD_CUTOFF_DEFAULT {
311                    memchr(b']', &bytes[start..]).map(|pos| start + pos)
312                } else {
313                    find_byte_dynamic(bytes, start, remaining, b']', *dispatch)
314                };
315                if let Some(end) = end {
316                    let index_str = &bytes[start..end];
317                    let index = parse_usize(index_str);
318                    components.push(PathComponentRef::ArrayIndex(index));
319                    current_start = end + 1;
320                    i = end + 1;
321                } else {
322                    break;
323                }
324            }
325            _ => {
326                i = pos + 1;
327            }
328        }
329    }
330
331    if current_start < bytes.len() {
332        let field_name = extract_field_name_ref(bytes, current_start, bytes.len());
333        components.push(PathComponentRef::Field(field_name));
334    }
335}
336
337#[inline]
338fn parse_simd_ranges(path: &str, components: &mut Vec<PathComponentRange>) {
339    components.clear();
340    let bytes = path.as_bytes();
341    let mut current_start = 0;
342    let mut i = 0;
343    components.reserve(estimate_components(bytes));
344    let dispatch = DISPATCH.get_or_init(init_dispatch);
345
346    while i < bytes.len() {
347        let remaining = bytes.len().saturating_sub(i);
348        let Some(pos) = (if remaining < SIMD_CUTOFF_DEFAULT {
349            memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
350        } else {
351            find_delim_dynamic(bytes, i, remaining, *dispatch)
352        }) else {
353            break;
354        };
355
356        match bytes[pos] {
357            b'.' => {
358                if pos > current_start {
359                    components.push(PathComponentRange::Field(current_start..pos));
360                }
361                current_start = pos + 1;
362                i = pos + 1;
363            }
364            b'[' => {
365                if pos > current_start {
366                    components.push(PathComponentRange::Field(current_start..pos));
367                }
368
369                let start = pos + 1;
370                let remaining = bytes.len().saturating_sub(start);
371                let end = if remaining < SIMD_CUTOFF_DEFAULT {
372                    memchr(b']', &bytes[start..]).map(|pos| start + pos)
373                } else {
374                    find_byte_dynamic(bytes, start, remaining, b']', *dispatch)
375                };
376                if let Some(end) = end {
377                    let index_str = &bytes[start..end];
378                    let index = parse_usize(index_str);
379                    components.push(PathComponentRange::ArrayIndex(index));
380                    current_start = end + 1;
381                    i = end + 1;
382                } else {
383                    break;
384                }
385            }
386            _ => {
387                i = pos + 1;
388            }
389        }
390    }
391
392    if current_start < bytes.len() {
393        components.push(PathComponentRange::Field(current_start..bytes.len()));
394    }
395}
396
397/// Parse path using forced AVX2 instructions.
398#[cfg(target_arch = "x86_64")]
399#[inline]
400pub fn parse_simd_forced_avx2(path: &str) -> Vec<PathComponent> {
401    parse_simd_with_forced(path, find_delim_avx2_wrapper, find_byte_avx2_wrapper)
402}
403
404/// Parse path using forced AVX-512 instructions.
405#[cfg(target_arch = "x86_64")]
406#[inline]
407pub fn parse_simd_forced_avx512(path: &str) -> Vec<PathComponent> {
408    parse_simd_with_forced(path, find_delim_avx512_wrapper, find_byte_avx512_wrapper)
409}
410
411/// Parse path using forced SSE2 instructions.
412#[cfg(target_arch = "x86_64")]
413#[inline]
414pub fn parse_simd_forced_sse2(path: &str) -> Vec<PathComponent> {
415    parse_simd_with_forced(path, find_delim_sse2, find_byte_sse2)
416}
417
418/// Parse path using forced SSE2 instructions.
419#[cfg(target_arch = "x86")]
420#[inline]
421pub fn parse_simd_forced_sse2(path: &str) -> Vec<PathComponent> {
422    parse_simd_with_forced(path, find_delim_sse2, find_byte_sse2)
423}
424
425#[inline]
426fn parse_simd_with_cutoff(path: &str, cutoff: usize) -> Vec<PathComponent> {
427    parse_simd_with(path, cutoff, None)
428}
429
430/// Helper for x86 forced SIMD instruction parsing
431#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
432#[inline]
433fn parse_simd_with_forced(
434    path: &str,
435    find_delim: DelimFinder,
436    find_byte: ByteFinder,
437) -> Vec<PathComponent> {
438    parse_simd_with(path, 0, Some((find_delim, find_byte)))
439}
440
441#[inline]
442fn parse_simd_with(
443    path: &str,
444    cutoff: usize,
445    forced: Option<(DelimFinder, ByteFinder)>,
446) -> Vec<PathComponent> {
447    let mut components = Vec::new();
448    let bytes = path.as_bytes();
449    let mut current_start = 0;
450    let mut i = 0;
451    components.reserve(estimate_components(bytes));
452    let dispatch = DISPATCH.get_or_init(init_dispatch);
453
454    while i < bytes.len() {
455        let remaining = bytes.len().saturating_sub(i);
456        let Some(pos) = (if remaining < cutoff {
457            memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
458        } else if let Some((find_delim, _)) = forced {
459            find_delim(bytes, i)
460        } else {
461            find_delim_dynamic(bytes, i, remaining, *dispatch)
462        }) else {
463            break;
464        };
465
466        match bytes[pos] {
467            b'.' => {
468                if pos > current_start {
469                    let field_name = extract_field_name(bytes, current_start, pos);
470                    components.push(PathComponent::Field(field_name));
471                }
472                current_start = pos + 1;
473                i = pos + 1;
474            }
475            b'[' => {
476                if pos > current_start {
477                    let field_name = extract_field_name(bytes, current_start, pos);
478                    components.push(PathComponent::Field(field_name));
479                }
480
481                let start = pos + 1;
482                let remaining = bytes.len().saturating_sub(start);
483                let end = if remaining < cutoff {
484                    memchr(b']', &bytes[start..]).map(|pos| start + pos)
485                } else if let Some((_, find_byte)) = forced {
486                    find_byte(bytes, start, b']')
487                } else {
488                    find_byte_dynamic(bytes, start, remaining, b']', *dispatch)
489                };
490                if let Some(end) = end {
491                    let index_str = &bytes[start..end];
492                    let index = parse_usize(index_str);
493                    components.push(PathComponent::ArrayIndex(index));
494                    current_start = end + 1;
495                    i = end + 1;
496                } else {
497                    break;
498                }
499            }
500            _ => {
501                i = pos + 1;
502            }
503        }
504    }
505
506    if current_start < bytes.len() {
507        let field_name = extract_field_name(bytes, current_start, bytes.len());
508        components.push(PathComponent::Field(field_name));
509    }
510
511    components
512}
513
514/// Extract field name as owned String from byte slice.
515///
516/// # Safety
517/// Caller must ensure `bytes[start..end]` contains valid UTF-8.
518/// This is guaranteed when `bytes` originates from `&str::as_bytes()`.
519#[inline]
520fn extract_field_name(bytes: &[u8], start: usize, end: usize) -> String {
521    // SAFETY: All callers pass bytes from `&str::as_bytes()`, which guarantees valid UTF-8.
522    // The start..end range is computed by scanning for ASCII delimiters (`.` and `[`),
523    // which cannot split a multi-byte UTF-8 sequence.
524    unsafe { String::from_utf8_unchecked(bytes[start..end].to_vec()) }
525}
526
527/// Extract field name as borrowed str from byte slice.
528///
529/// # Safety
530/// Caller must ensure `bytes[start..end]` contains valid UTF-8.
531/// This is guaranteed when `bytes` originates from `&str::as_bytes()`.
532#[inline]
533fn extract_field_name_ref(bytes: &[u8], start: usize, end: usize) -> &str {
534    // SAFETY: All callers pass bytes from `&str::as_bytes()`, which guarantees valid UTF-8.
535    // The start..end range is computed by scanning for ASCII delimiters (`.` and `[`),
536    // which cannot split a multi-byte UTF-8 sequence.
537    unsafe { std::str::from_utf8_unchecked(&bytes[start..end]) }
538}
539
540#[inline]
541fn parse_usize(bytes: &[u8]) -> usize {
542    let mut result = 0usize;
543    for &byte in bytes {
544        if byte.is_ascii_digit() {
545            result = result * 10 + (byte - b'0') as usize;
546        }
547    }
548    result
549}
550
551#[inline]
552fn estimate_components(bytes: &[u8]) -> usize {
553    let dots = memchr_iter(b'.', bytes).count();
554    let brackets = memchr_iter(b'[', bytes).count();
555    dots + brackets + 1
556}
557
558type DelimFinder = fn(&[u8], usize) -> Option<usize>;
559type ByteFinder = fn(&[u8], usize, u8) -> Option<usize>;
560
561#[derive(Clone, Copy)]
562#[allow(clippy::struct_excessive_bools)] // These are CPU feature flags, bools are appropriate
563#[allow(dead_code)] // Fields used conditionally based on target_arch
564struct SimdDispatch {
565    has_sse2: bool,
566    has_avx2: bool,
567    has_avx512: bool,
568    has_neon: bool,
569}
570
571static DISPATCH: OnceLock<SimdDispatch> = OnceLock::new();
572
573#[cfg(target_arch = "x86_64")]
574fn init_dispatch() -> SimdDispatch {
575    SimdDispatch {
576        has_sse2: std::is_x86_feature_detected!("sse2"),
577        has_avx2: std::is_x86_feature_detected!("avx2"),
578        has_avx512: std::is_x86_feature_detected!("avx512bw")
579            && std::is_x86_feature_detected!("avx512f"),
580        has_neon: false,
581    }
582}
583
584#[cfg(target_arch = "x86")]
585fn init_dispatch() -> SimdDispatch {
586    SimdDispatch {
587        has_sse2: std::is_x86_feature_detected!("sse2"),
588        has_avx2: false,
589        has_avx512: false,
590        has_neon: false,
591    }
592}
593
594#[cfg(target_arch = "aarch64")]
595const fn init_dispatch() -> SimdDispatch {
596    SimdDispatch {
597        has_sse2: false,
598        has_avx2: false,
599        has_avx512: false,
600        has_neon: true, // NEON is mandatory on aarch64
601    }
602}
603
604#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
605fn init_dispatch() -> SimdDispatch {
606    SimdDispatch {
607        has_sse2: false,
608        has_avx2: false,
609        has_avx512: false,
610        has_neon: false,
611    }
612}
613
614/// NEON threshold for using SIMD path finding (16 bytes = 128-bit vector)
615#[cfg(target_arch = "aarch64")]
616const SIMD_NEON_THRESHOLD: usize = 16;
617
618#[inline]
619fn find_delim_dynamic(
620    bytes: &[u8],
621    start: usize,
622    remaining: usize,
623    dispatch: SimdDispatch,
624) -> Option<usize> {
625    #[cfg(target_arch = "x86_64")]
626    {
627        if dispatch.has_avx512 && remaining >= SIMD_AVX512_THRESHOLD {
628            return find_delim_avx512_wrapper(bytes, start);
629        }
630        if dispatch.has_avx2 && remaining >= SIMD_AVX2_THRESHOLD {
631            return find_delim_avx2_wrapper(bytes, start);
632        }
633        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
634            return find_delim_sse2(bytes, start);
635        }
636    }
637
638    #[cfg(target_arch = "x86")]
639    {
640        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
641            return find_delim_sse2(bytes, start);
642        }
643    }
644
645    #[cfg(target_arch = "aarch64")]
646    {
647        if dispatch.has_neon && remaining >= SIMD_NEON_THRESHOLD {
648            return find_delim_neon(bytes, start);
649        }
650    }
651
652    memchr2(b'.', b'[', &bytes[start..]).map(|pos| start + pos)
653}
654
655#[inline]
656fn find_byte_dynamic(
657    bytes: &[u8],
658    start: usize,
659    remaining: usize,
660    needle: u8,
661    dispatch: SimdDispatch,
662) -> Option<usize> {
663    #[cfg(target_arch = "x86_64")]
664    {
665        if dispatch.has_avx512 && remaining >= SIMD_AVX512_THRESHOLD {
666            return find_byte_avx512_wrapper(bytes, start, needle);
667        }
668        if dispatch.has_avx2 && remaining >= SIMD_AVX2_THRESHOLD {
669            return find_byte_avx2_wrapper(bytes, start, needle);
670        }
671        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
672            return find_byte_sse2(bytes, start, needle);
673        }
674    }
675
676    #[cfg(target_arch = "x86")]
677    {
678        if dispatch.has_sse2 && remaining >= SIMD_SSE2_THRESHOLD {
679            return find_byte_sse2(bytes, start, needle);
680        }
681    }
682
683    #[cfg(target_arch = "aarch64")]
684    {
685        if dispatch.has_neon && remaining >= SIMD_NEON_THRESHOLD {
686            return find_byte_neon(bytes, start, needle);
687        }
688    }
689
690    memchr(needle, &bytes[start..]).map(|pos| start + pos)
691}
692
693#[cfg(target_arch = "x86_64")]
694#[inline]
695fn find_delim_sse2(bytes: &[u8], start: usize) -> Option<usize> {
696    use std::arch::x86_64::{
697        _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8,
698    };
699
700    let mut i = start;
701    let len = bytes.len();
702
703    let dot = unsafe { _mm_set1_epi8(b'.'.cast_signed()) };
704    let bracket = unsafe { _mm_set1_epi8(b'['.cast_signed()) };
705
706    while i + 16 <= len {
707        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
708        let eq_dot = unsafe { _mm_cmpeq_epi8(chunk, dot) };
709        let eq_bracket = unsafe { _mm_cmpeq_epi8(chunk, bracket) };
710        let mask = unsafe { _mm_movemask_epi8(_mm_or_si128(eq_dot, eq_bracket)) }.cast_unsigned();
711
712        if mask != 0 {
713            let offset = mask.trailing_zeros() as usize;
714            return Some(i + offset);
715        }
716
717        i += 16;
718    }
719
720    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
721}
722
723#[cfg(target_arch = "x86_64")]
724#[target_feature(enable = "avx2")]
725#[inline]
726unsafe fn find_delim_avx2(bytes: &[u8], start: usize) -> Option<usize> {
727    use std::arch::x86_64::{
728        _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_or_si256,
729        _mm256_set1_epi8,
730    };
731
732    let mut i = start;
733    let len = bytes.len();
734    let dot = _mm256_set1_epi8(b'.'.cast_signed());
735    let bracket = _mm256_set1_epi8(b'['.cast_signed());
736
737    while i + 32 <= len {
738        let chunk = unsafe { _mm256_loadu_si256(bytes.as_ptr().add(i).cast()) };
739        let eq_dot = _mm256_cmpeq_epi8(chunk, dot);
740        let eq_bracket = _mm256_cmpeq_epi8(chunk, bracket);
741        let mask = _mm256_movemask_epi8(_mm256_or_si256(eq_dot, eq_bracket)).cast_unsigned();
742
743        if mask != 0 {
744            let offset = mask.trailing_zeros() as usize;
745            return Some(i + offset);
746        }
747
748        i += 32;
749    }
750
751    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
752}
753
754#[cfg(target_arch = "x86_64")]
755#[inline]
756fn find_delim_avx2_wrapper(bytes: &[u8], start: usize) -> Option<usize> {
757    unsafe { find_delim_avx2(bytes, start) }
758}
759
760#[cfg(target_arch = "x86_64")]
761#[target_feature(enable = "avx512bw,avx512f")]
762#[inline]
763unsafe fn find_delim_avx512(bytes: &[u8], start: usize) -> Option<usize> {
764    use std::arch::x86_64::{_mm512_cmpeq_epi8_mask, _mm512_loadu_si512, _mm512_set1_epi8};
765
766    let mut i = start;
767    let len = bytes.len();
768    let dot = _mm512_set1_epi8(b'.'.cast_signed());
769    let bracket = _mm512_set1_epi8(b'['.cast_signed());
770
771    while i + 64 <= len {
772        let chunk = unsafe { _mm512_loadu_si512(bytes.as_ptr().add(i).cast()) };
773        let dot_mask = _mm512_cmpeq_epi8_mask(chunk, dot);
774        let bracket_mask = _mm512_cmpeq_epi8_mask(chunk, bracket);
775        let mask = dot_mask | bracket_mask;
776
777        if mask != 0 {
778            let offset = mask.trailing_zeros() as usize;
779            return Some(i + offset);
780        }
781
782        i += 64;
783    }
784
785    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
786}
787
788#[cfg(target_arch = "x86_64")]
789#[inline]
790fn find_delim_avx512_wrapper(bytes: &[u8], start: usize) -> Option<usize> {
791    unsafe { find_delim_avx512(bytes, start) }
792}
793
794#[cfg(target_arch = "x86_64")]
795#[inline]
796fn find_byte_sse2(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
797    use std::arch::x86_64::{_mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8};
798
799    let mut i = start;
800    let len = bytes.len();
801    let needle_vec = unsafe { _mm_set1_epi8(needle.cast_signed()) };
802
803    while i + 16 <= len {
804        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
805        let eq = unsafe { _mm_cmpeq_epi8(chunk, needle_vec) };
806        let mask = unsafe { _mm_movemask_epi8(eq) }.cast_unsigned();
807
808        if mask != 0 {
809            let offset = mask.trailing_zeros() as usize;
810            return Some(i + offset);
811        }
812
813        i += 16;
814    }
815
816    memchr(needle, &bytes[i..]).map(|pos| i + pos)
817}
818
819#[cfg(target_arch = "x86_64")]
820#[target_feature(enable = "avx2")]
821#[inline]
822unsafe fn find_byte_avx2(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
823    use std::arch::x86_64::{
824        _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_set1_epi8,
825    };
826
827    let mut i = start;
828    let len = bytes.len();
829    let needle_vec = _mm256_set1_epi8(needle.cast_signed());
830
831    while i + 32 <= len {
832        let chunk = unsafe { _mm256_loadu_si256(bytes.as_ptr().add(i).cast()) };
833        let eq = _mm256_cmpeq_epi8(chunk, needle_vec);
834        let mask = _mm256_movemask_epi8(eq).cast_unsigned();
835
836        if mask != 0 {
837            let offset = mask.trailing_zeros() as usize;
838            return Some(i + offset);
839        }
840
841        i += 32;
842    }
843
844    memchr(needle, &bytes[i..]).map(|pos| i + pos)
845}
846
847#[cfg(target_arch = "x86_64")]
848#[inline]
849fn find_byte_avx2_wrapper(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
850    unsafe { find_byte_avx2(bytes, start, needle) }
851}
852
853#[cfg(target_arch = "x86_64")]
854#[target_feature(enable = "avx512bw,avx512f")]
855#[inline]
856unsafe fn find_byte_avx512(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
857    use std::arch::x86_64::{_mm512_cmpeq_epi8_mask, _mm512_loadu_si512, _mm512_set1_epi8};
858
859    let mut i = start;
860    let len = bytes.len();
861    let needle_vec = _mm512_set1_epi8(needle.cast_signed());
862
863    while i + 64 <= len {
864        let chunk = unsafe { _mm512_loadu_si512(bytes.as_ptr().add(i).cast()) };
865        let mask = _mm512_cmpeq_epi8_mask(chunk, needle_vec);
866
867        if mask != 0 {
868            let offset = mask.trailing_zeros() as usize;
869            return Some(i + offset);
870        }
871
872        i += 64;
873    }
874
875    memchr(needle, &bytes[i..]).map(|pos| i + pos)
876}
877
878#[cfg(target_arch = "x86_64")]
879#[inline]
880fn find_byte_avx512_wrapper(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
881    unsafe { find_byte_avx512(bytes, start, needle) }
882}
883
884#[cfg(target_arch = "x86")]
885#[inline]
886fn find_delim_sse2(bytes: &[u8], start: usize) -> Option<usize> {
887    use std::arch::x86::{
888        _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8,
889    };
890
891    let mut i = start;
892    let len = bytes.len();
893
894    let dot = unsafe { _mm_set1_epi8(b'.'.cast_signed()) };
895    let bracket = unsafe { _mm_set1_epi8(b'['.cast_signed()) };
896
897    while i + 16 <= len {
898        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
899        let eq_dot = unsafe { _mm_cmpeq_epi8(chunk, dot) };
900        let eq_bracket = unsafe { _mm_cmpeq_epi8(chunk, bracket) };
901        let mask = unsafe { _mm_movemask_epi8(_mm_or_si128(eq_dot, eq_bracket)) }.cast_unsigned();
902
903        if mask != 0 {
904            let offset = mask.trailing_zeros() as usize;
905            return Some(i + offset);
906        }
907
908        i += 16;
909    }
910
911    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
912}
913
914#[cfg(target_arch = "x86")]
915#[inline]
916fn find_byte_sse2(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
917    use std::arch::x86::{_mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8};
918
919    let mut i = start;
920    let len = bytes.len();
921    let needle_vec = unsafe { _mm_set1_epi8(needle.cast_signed()) };
922
923    while i + 16 <= len {
924        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i).cast()) };
925        let eq = unsafe { _mm_cmpeq_epi8(chunk, needle_vec) };
926        let mask = unsafe { _mm_movemask_epi8(eq) }.cast_unsigned();
927
928        if mask != 0 {
929            let offset = mask.trailing_zeros() as usize;
930            return Some(i + offset);
931        }
932
933        i += 16;
934    }
935
936    memchr(needle, &bytes[i..]).map(|pos| i + pos)
937}
938
939// =============================================================================
940// ARM NEON implementations
941// =============================================================================
942
943/// Find delimiter (`.` or `[`) using ARM NEON SIMD
944#[cfg(target_arch = "aarch64")]
945#[inline]
946fn find_delim_neon(bytes: &[u8], start: usize) -> Option<usize> {
947    use std::arch::aarch64::{vceqq_u8, vdupq_n_u8, vld1q_u8, vorrq_u8};
948
949    let mut i = start;
950    let len = bytes.len();
951
952    let dot = unsafe { vdupq_n_u8(b'.') };
953    let bracket = unsafe { vdupq_n_u8(b'[') };
954
955    while i + 16 <= len {
956        let chunk = unsafe { vld1q_u8(bytes.as_ptr().add(i)) };
957        let eq_dot = unsafe { vceqq_u8(chunk, dot) };
958        let eq_bracket = unsafe { vceqq_u8(chunk, bracket) };
959        let combined = unsafe { vorrq_u8(eq_dot, eq_bracket) };
960
961        // SAFETY: combined is a valid NEON vector from the operations above
962        if let Some(offset) = unsafe { neon_first_set_byte(combined) } {
963            return Some(i + offset);
964        }
965
966        i += 16;
967    }
968
969    memchr2(b'.', b'[', &bytes[i..]).map(|pos| i + pos)
970}
971
972/// Find single byte using ARM NEON SIMD
973#[cfg(target_arch = "aarch64")]
974#[inline]
975fn find_byte_neon(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
976    use std::arch::aarch64::{vceqq_u8, vdupq_n_u8, vld1q_u8};
977
978    let mut i = start;
979    let len = bytes.len();
980
981    let needle_vec = unsafe { vdupq_n_u8(needle) };
982
983    while i + 16 <= len {
984        let chunk = unsafe { vld1q_u8(bytes.as_ptr().add(i)) };
985        let eq = unsafe { vceqq_u8(chunk, needle_vec) };
986
987        // SAFETY: eq is a valid NEON vector from the operations above
988        if let Some(offset) = unsafe { neon_first_set_byte(eq) } {
989            return Some(i + offset);
990        }
991
992        i += 16;
993    }
994
995    memchr(needle, &bytes[i..]).map(|pos| i + pos)
996}
997
998/// Find the index of the first non-zero byte in a NEON vector
999///
1000/// Returns None if all bytes are zero.
1001#[cfg(target_arch = "aarch64")]
1002#[inline]
1003unsafe fn neon_first_set_byte(v: std::arch::aarch64::uint8x16_t) -> Option<usize> {
1004    // SAFETY: uint8x16_t is a 16-byte SIMD vector, safe to transmute to [u8; 16]
1005    let arr: [u8; 16] = unsafe { std::mem::transmute(v) };
1006    for (i, &byte) in arr.iter().enumerate() {
1007        if byte != 0 {
1008            return Some(i);
1009        }
1010    }
1011    None
1012}
1013
1014#[cfg(test)]
1015mod tests {
1016    use super::*;
1017
1018    // PathComponent tests
1019    #[test]
1020    fn test_path_component_field() {
1021        let comp = PathComponent::Field("test".to_string());
1022        if let PathComponent::Field(s) = comp {
1023            assert_eq!(s, "test");
1024        } else {
1025            panic!("Expected Field");
1026        }
1027    }
1028
1029    #[test]
1030    fn test_path_component_array_index() {
1031        let comp = PathComponent::ArrayIndex(42);
1032        if let PathComponent::ArrayIndex(i) = comp {
1033            assert_eq!(i, 42);
1034        } else {
1035            panic!("Expected ArrayIndex");
1036        }
1037    }
1038
1039    #[test]
1040    fn test_path_component_clone() {
1041        let comp = PathComponent::Field("test".to_string());
1042        let cloned = comp;
1043        if let PathComponent::Field(s) = cloned {
1044            assert_eq!(s, "test");
1045        }
1046    }
1047
1048    #[test]
1049    fn test_path_component_debug() {
1050        let comp = PathComponent::Field("test".to_string());
1051        let debug = format!("{comp:?}");
1052        assert!(debug.contains("Field"));
1053    }
1054
1055    // PathComponentRef tests
1056    #[test]
1057    fn test_path_component_ref_field() {
1058        let comp = PathComponentRef::Field("test");
1059        if let PathComponentRef::Field(s) = comp {
1060            assert_eq!(s, "test");
1061        } else {
1062            panic!("Expected Field");
1063        }
1064    }
1065
1066    #[test]
1067    fn test_path_component_ref_array_index() {
1068        let comp = PathComponentRef::ArrayIndex(42);
1069        if let PathComponentRef::ArrayIndex(i) = comp {
1070            assert_eq!(i, 42);
1071        } else {
1072            panic!("Expected ArrayIndex");
1073        }
1074    }
1075
1076    #[test]
1077    fn test_path_component_ref_copy() {
1078        let comp = PathComponentRef::Field("test");
1079        let copied = comp;
1080        if let PathComponentRef::Field(s) = copied {
1081            assert_eq!(s, "test");
1082        }
1083    }
1084
1085    // PathComponentRange tests
1086    #[test]
1087    fn test_path_component_range_field() {
1088        let comp = PathComponentRange::Field(0..4);
1089        if let PathComponentRange::Field(range) = comp {
1090            assert_eq!(range, 0..4);
1091        } else {
1092            panic!("Expected Field");
1093        }
1094    }
1095
1096    #[test]
1097    fn test_path_component_range_array_index() {
1098        let comp = PathComponentRange::ArrayIndex(42);
1099        if let PathComponentRange::ArrayIndex(i) = comp {
1100            assert_eq!(i, 42);
1101        } else {
1102            panic!("Expected ArrayIndex");
1103        }
1104    }
1105
1106    #[test]
1107    fn test_path_component_range_clone() {
1108        let comp = PathComponentRange::Field(0..4);
1109        let cloned = comp;
1110        if let PathComponentRange::Field(range) = cloned {
1111            assert_eq!(range, 0..4);
1112        }
1113    }
1114
1115    // ParsedPath tests
1116    #[test]
1117    fn test_parsed_path_simple() {
1118        let path = ParsedPath::parse("user.name");
1119        assert_eq!(path.path(), "user.name");
1120        assert_eq!(path.components().len(), 2);
1121    }
1122
1123    #[test]
1124    fn test_parsed_path_with_array() {
1125        let path = ParsedPath::parse("users[0].name");
1126        assert_eq!(path.components().len(), 3);
1127    }
1128
1129    #[test]
1130    fn test_parsed_path_empty() {
1131        let path = ParsedPath::parse("");
1132        assert_eq!(path.path(), "");
1133        assert_eq!(path.components().len(), 0);
1134    }
1135
1136    #[test]
1137    fn test_parsed_path_single_field() {
1138        let path = ParsedPath::parse("field");
1139        assert_eq!(path.components().len(), 1);
1140    }
1141
1142    #[test]
1143    fn test_parsed_path_components_ref() {
1144        let path = ParsedPath::parse("user.name");
1145        let mut refs = Vec::new();
1146        path.components_ref(&mut refs);
1147        assert_eq!(refs.len(), 2);
1148        if let PathComponentRef::Field(s) = refs[0] {
1149            assert_eq!(s, "user");
1150        }
1151        if let PathComponentRef::Field(s) = refs[1] {
1152            assert_eq!(s, "name");
1153        }
1154    }
1155
1156    #[test]
1157    fn test_parsed_path_components_ref_with_array() {
1158        let path = ParsedPath::parse("users[5].name");
1159        let mut refs = Vec::new();
1160        path.components_ref(&mut refs);
1161        assert_eq!(refs.len(), 3);
1162        if let PathComponentRef::ArrayIndex(i) = refs[1] {
1163            assert_eq!(i, 5);
1164        }
1165    }
1166
1167    #[test]
1168    fn test_parsed_path_clone() {
1169        let path = ParsedPath::parse("user.name");
1170        let cloned = path;
1171        assert_eq!(cloned.path(), "user.name");
1172    }
1173
1174    // PathCache tests
1175    #[test]
1176    fn test_path_cache_new() {
1177        let cache = PathCache::new();
1178        let path = cache.get_or_parse("user.name");
1179        assert_eq!(path.path(), "user.name");
1180    }
1181
1182    #[test]
1183    fn test_path_cache_returns_same_instance() {
1184        let cache = PathCache::new();
1185        let path1 = cache.get_or_parse("user.name");
1186        let path2 = cache.get_or_parse("user.name");
1187        assert!(Arc::ptr_eq(&path1, &path2));
1188    }
1189
1190    #[test]
1191    fn test_path_cache_different_paths() {
1192        let cache = PathCache::new();
1193        let path1 = cache.get_or_parse("user.name");
1194        let path2 = cache.get_or_parse("user.age");
1195        assert!(!Arc::ptr_eq(&path1, &path2));
1196    }
1197
1198    // parse_baseline tests
1199    #[test]
1200    fn test_parse_baseline_simple() {
1201        let components = parse_baseline("user.name");
1202        assert_eq!(components.len(), 2);
1203        if let PathComponent::Field(s) = &components[0] {
1204            assert_eq!(s, "user");
1205        }
1206    }
1207
1208    #[test]
1209    fn test_parse_baseline_with_array() {
1210        let components = parse_baseline("users[0].name");
1211        assert_eq!(components.len(), 3);
1212        if let PathComponent::ArrayIndex(i) = &components[1] {
1213            assert_eq!(*i, 0);
1214        }
1215    }
1216
1217    #[test]
1218    fn test_parse_baseline_empty() {
1219        let components = parse_baseline("");
1220        assert!(components.is_empty());
1221    }
1222
1223    #[test]
1224    fn test_parse_baseline_single_field() {
1225        let components = parse_baseline("field");
1226        assert_eq!(components.len(), 1);
1227    }
1228
1229    #[test]
1230    fn test_parse_baseline_nested() {
1231        let components = parse_baseline("a.b.c.d.e");
1232        assert_eq!(components.len(), 5);
1233    }
1234
1235    #[test]
1236    fn test_parse_baseline_multiple_arrays() {
1237        let components = parse_baseline("a[0][1][2]");
1238        assert_eq!(components.len(), 4);
1239    }
1240
1241    #[test]
1242    fn test_parse_baseline_leading_dot() {
1243        let components = parse_baseline(".field");
1244        assert_eq!(components.len(), 1);
1245    }
1246
1247    #[test]
1248    fn test_parse_baseline_trailing_dot() {
1249        let components = parse_baseline("field.");
1250        assert_eq!(components.len(), 1);
1251    }
1252
1253    #[test]
1254    fn test_parse_baseline_unclosed_bracket() {
1255        let components = parse_baseline("field[0");
1256        // Should handle gracefully
1257        assert!(!components.is_empty());
1258    }
1259
1260    // parse_original tests
1261    #[test]
1262    fn test_parse_original_simple() {
1263        let components = parse_original("user.name");
1264        assert_eq!(components.len(), 2);
1265    }
1266
1267    #[test]
1268    fn test_parse_original_with_array() {
1269        let components = parse_original("users[0].name");
1270        assert_eq!(components.len(), 3);
1271    }
1272
1273    #[test]
1274    fn test_parse_original_empty() {
1275        let components = parse_original("");
1276        assert!(components.is_empty());
1277    }
1278
1279    #[test]
1280    fn test_parse_original_nested() {
1281        let components = parse_original("a.b.c.d.e");
1282        assert_eq!(components.len(), 5);
1283    }
1284
1285    // parse_simd tests
1286    #[test]
1287    fn test_parse_simd_simple() {
1288        let components = parse_simd("user.name");
1289        assert_eq!(components.len(), 2);
1290    }
1291
1292    #[test]
1293    fn test_parse_simd_with_array() {
1294        let components = parse_simd("users[0].name");
1295        assert_eq!(components.len(), 3);
1296    }
1297
1298    #[test]
1299    fn test_parse_simd_empty() {
1300        let components = parse_simd("");
1301        assert!(components.is_empty());
1302    }
1303
1304    #[test]
1305    fn test_parse_simd_nested() {
1306        let components = parse_simd("a.b.c.d.e");
1307        assert_eq!(components.len(), 5);
1308    }
1309
1310    #[test]
1311    fn test_parse_simd_long_path() {
1312        // Create a path long enough to trigger SIMD processing
1313        let path = (0..100)
1314            .map(|i| format!("field{i}"))
1315            .collect::<Vec<_>>()
1316            .join(".");
1317        let components = parse_simd(&path);
1318        assert_eq!(components.len(), 100);
1319    }
1320
1321    // parse_simd cutoff variants
1322    #[test]
1323    fn test_parse_simd_cutoff_64() {
1324        let components = parse_simd_cutoff_64("user.name");
1325        assert_eq!(components.len(), 2);
1326    }
1327
1328    #[test]
1329    fn test_parse_simd_cutoff_96() {
1330        let components = parse_simd_cutoff_96("user.name");
1331        assert_eq!(components.len(), 2);
1332    }
1333
1334    #[test]
1335    fn test_parse_simd_cutoff_128() {
1336        let components = parse_simd_cutoff_128("user.name");
1337        assert_eq!(components.len(), 2);
1338    }
1339
1340    // parse_simd_ref tests
1341    #[test]
1342    fn test_parse_simd_ref_simple() {
1343        let components = parse_simd_ref("user.name");
1344        assert_eq!(components.len(), 2);
1345    }
1346
1347    #[test]
1348    fn test_parse_simd_ref_with_array() {
1349        let components = parse_simd_ref("users[0].name");
1350        assert_eq!(components.len(), 3);
1351    }
1352
1353    #[test]
1354    fn test_parse_simd_ref_empty() {
1355        let components = parse_simd_ref("");
1356        assert!(components.is_empty());
1357    }
1358
1359    #[test]
1360    fn test_parse_simd_ref_into() {
1361        let mut components = Vec::new();
1362        parse_simd_ref_into("user.name", &mut components);
1363        assert_eq!(components.len(), 2);
1364    }
1365
1366    #[test]
1367    fn test_parse_simd_ref_into_reuses_vec() {
1368        let mut components = Vec::new();
1369        parse_simd_ref_into("a.b.c", &mut components);
1370        assert_eq!(components.len(), 3);
1371        parse_simd_ref_into("x.y", &mut components);
1372        assert_eq!(components.len(), 2);
1373    }
1374
1375    #[test]
1376    fn test_parse_simd_ref_long_path() {
1377        let path = (0..100)
1378            .map(|i| format!("field{i}"))
1379            .collect::<Vec<_>>()
1380            .join(".");
1381        let components = parse_simd_ref(&path);
1382        assert_eq!(components.len(), 100);
1383    }
1384
1385    // Forced SIMD variant tests (x86_64 only)
1386    #[cfg(target_arch = "x86_64")]
1387    #[test]
1388    fn test_parse_simd_forced_avx2() {
1389        if std::is_x86_feature_detected!("avx2") {
1390            let components = parse_simd_forced_avx2("user.name");
1391            assert_eq!(components.len(), 2);
1392        }
1393    }
1394
1395    #[cfg(target_arch = "x86_64")]
1396    #[test]
1397    fn test_parse_simd_forced_avx2_with_array() {
1398        if std::is_x86_feature_detected!("avx2") {
1399            let components = parse_simd_forced_avx2("users[0].name");
1400            assert_eq!(components.len(), 3);
1401        }
1402    }
1403
1404    #[cfg(target_arch = "x86_64")]
1405    #[test]
1406    fn test_parse_simd_forced_avx2_long() {
1407        if std::is_x86_feature_detected!("avx2") {
1408            let path = (0..100)
1409                .map(|i| format!("field{i}"))
1410                .collect::<Vec<_>>()
1411                .join(".");
1412            let components = parse_simd_forced_avx2(&path);
1413            assert_eq!(components.len(), 100);
1414        }
1415    }
1416
1417    #[cfg(target_arch = "x86_64")]
1418    #[test]
1419    fn test_parse_simd_forced_avx512() {
1420        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1421            let components = parse_simd_forced_avx512("user.name");
1422            assert_eq!(components.len(), 2);
1423        }
1424    }
1425
1426    #[cfg(target_arch = "x86_64")]
1427    #[test]
1428    fn test_parse_simd_forced_avx512_with_array() {
1429        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1430            let components = parse_simd_forced_avx512("users[0].name");
1431            assert_eq!(components.len(), 3);
1432        }
1433    }
1434
1435    #[cfg(target_arch = "x86_64")]
1436    #[test]
1437    fn test_parse_simd_forced_avx512_long() {
1438        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1439            let path = (0..100)
1440                .map(|i| format!("field{i}"))
1441                .collect::<Vec<_>>()
1442                .join(".");
1443            let components = parse_simd_forced_avx512(&path);
1444            assert_eq!(components.len(), 100);
1445        }
1446    }
1447
1448    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1449    #[test]
1450    fn test_parse_simd_forced_sse2() {
1451        if std::is_x86_feature_detected!("sse2") {
1452            let components = parse_simd_forced_sse2("user.name");
1453            assert_eq!(components.len(), 2);
1454        }
1455    }
1456
1457    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1458    #[test]
1459    fn test_parse_simd_forced_sse2_with_array() {
1460        if std::is_x86_feature_detected!("sse2") {
1461            let components = parse_simd_forced_sse2("users[0].name");
1462            assert_eq!(components.len(), 3);
1463        }
1464    }
1465
1466    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1467    #[test]
1468    fn test_parse_simd_forced_sse2_long() {
1469        if std::is_x86_feature_detected!("sse2") {
1470            let path = (0..100)
1471                .map(|i| format!("field{i}"))
1472                .collect::<Vec<_>>()
1473                .join(".");
1474            let components = parse_simd_forced_sse2(&path);
1475            assert_eq!(components.len(), 100);
1476        }
1477    }
1478
1479    // Helper function tests
1480    #[test]
1481    fn test_parse_usize_basic() {
1482        assert_eq!(parse_usize(b"123"), 123);
1483        assert_eq!(parse_usize(b"0"), 0);
1484        assert_eq!(parse_usize(b"999"), 999);
1485    }
1486
1487    #[test]
1488    fn test_parse_usize_empty() {
1489        assert_eq!(parse_usize(b""), 0);
1490    }
1491
1492    #[test]
1493    fn test_parse_usize_with_non_digits() {
1494        assert_eq!(parse_usize(b"12x34"), 1234);
1495    }
1496
1497    #[test]
1498    fn test_estimate_components() {
1499        // estimate_components counts dots + brackets + 1
1500        // "a.b.c" has 2 dots, 0 brackets => 2 + 0 + 1 = 3
1501        assert_eq!(estimate_components(b"a.b.c"), 3);
1502        // "a[0][1]" has 0 dots, 2 brackets => 0 + 2 + 1 = 3
1503        assert_eq!(estimate_components(b"a[0][1]"), 3);
1504        assert_eq!(estimate_components(b"field"), 1);
1505    }
1506
1507    #[test]
1508    fn test_extract_field_name() {
1509        let bytes = b"hello.world";
1510        let field = extract_field_name(bytes, 0, 5);
1511        assert_eq!(field, "hello");
1512    }
1513
1514    #[test]
1515    fn test_extract_field_name_ref() {
1516        let bytes = b"hello.world";
1517        let field = extract_field_name_ref(bytes, 0, 5);
1518        assert_eq!(field, "hello");
1519    }
1520
1521    // SIMD dispatch tests
1522    #[test]
1523    fn test_simd_dispatch_init() {
1524        let dispatch = init_dispatch();
1525        // Just check that it doesn't panic
1526        let _ = dispatch.has_sse2;
1527        let _ = dispatch.has_avx2;
1528        let _ = dispatch.has_avx512;
1529    }
1530
1531    // Edge case tests
1532    #[test]
1533    fn test_consecutive_dots() {
1534        let components = parse_simd("a..b");
1535        // Should skip empty field
1536        assert_eq!(components.len(), 2);
1537    }
1538
1539    #[test]
1540    fn test_consecutive_arrays() {
1541        let components = parse_simd("a[0][1][2]");
1542        assert_eq!(components.len(), 4);
1543    }
1544
1545    #[test]
1546    fn test_array_at_start() {
1547        let components = parse_simd("[0].name");
1548        assert_eq!(components.len(), 2);
1549    }
1550
1551    #[test]
1552    fn test_array_at_end() {
1553        let components = parse_simd("users[0]");
1554        assert_eq!(components.len(), 2);
1555    }
1556
1557    #[test]
1558    fn test_complex_path() {
1559        let components = parse_simd("data.users[0].profile.settings[1].value");
1560        // data, users, [0], profile, settings, [1], value = 7 components
1561        assert_eq!(components.len(), 7);
1562    }
1563
1564    #[test]
1565    fn test_large_array_index() {
1566        let components = parse_simd("users[999999]");
1567        assert_eq!(components.len(), 2);
1568        if let PathComponent::ArrayIndex(i) = &components[1] {
1569            assert_eq!(*i, 999_999);
1570        }
1571    }
1572
1573    // Comparison tests - ensure all parsers produce same results
1574    #[test]
1575    fn test_parsers_produce_same_results() {
1576        let paths = vec![
1577            "user.name",
1578            "users[0].name",
1579            "a.b.c.d.e",
1580            "data[0][1][2]",
1581            "simple",
1582            "",
1583        ];
1584
1585        for path in paths {
1586            let baseline = parse_baseline(path);
1587            let original = parse_original(path);
1588            let simd = parse_simd(path);
1589
1590            assert_eq!(baseline.len(), original.len(), "Path: {path}");
1591            assert_eq!(baseline.len(), simd.len(), "Path: {path}");
1592
1593            for i in 0..baseline.len() {
1594                match (&baseline[i], &original[i], &simd[i]) {
1595                    (PathComponent::Field(a), PathComponent::Field(b), PathComponent::Field(c)) => {
1596                        assert_eq!(a, b);
1597                        assert_eq!(a, c);
1598                    }
1599                    (
1600                        PathComponent::ArrayIndex(a),
1601                        PathComponent::ArrayIndex(b),
1602                        PathComponent::ArrayIndex(c),
1603                    ) => {
1604                        assert_eq!(a, b);
1605                        assert_eq!(a, c);
1606                    }
1607                    _ => panic!("Mismatched component types at index {i} for path: {path}"),
1608                }
1609            }
1610        }
1611    }
1612
1613    // Long path tests to trigger SIMD branches
1614    #[test]
1615    fn test_very_long_path_baseline() {
1616        let path = (0..200)
1617            .map(|i| format!("f{i}"))
1618            .collect::<Vec<_>>()
1619            .join(".");
1620        let components = parse_baseline(&path);
1621        assert_eq!(components.len(), 200);
1622    }
1623
1624    #[test]
1625    fn test_very_long_path_simd() {
1626        let path = (0..200)
1627            .map(|i| format!("f{i}"))
1628            .collect::<Vec<_>>()
1629            .join(".");
1630        let components = parse_simd(&path);
1631        assert_eq!(components.len(), 200);
1632    }
1633
1634    #[test]
1635    fn test_very_long_path_with_arrays() {
1636        let path = (0..50)
1637            .map(|i| format!("f{i}[{i}]"))
1638            .collect::<Vec<_>>()
1639            .join(".");
1640        let components = parse_simd(&path);
1641        // Each segment has a field and an array index
1642        assert_eq!(components.len(), 100);
1643    }
1644
1645    // Dynamic dispatch tests
1646    #[test]
1647    fn test_find_delim_dynamic_short_path() {
1648        let dispatch = init_dispatch();
1649        let bytes = b"a.b";
1650        let result = find_delim_dynamic(bytes, 0, bytes.len(), dispatch);
1651        assert_eq!(result, Some(1));
1652    }
1653
1654    #[test]
1655    fn test_find_delim_dynamic_no_delim() {
1656        let dispatch = init_dispatch();
1657        let bytes = b"abcdefgh";
1658        let result = find_delim_dynamic(bytes, 0, bytes.len(), dispatch);
1659        assert_eq!(result, None);
1660    }
1661
1662    #[test]
1663    fn test_find_byte_dynamic_short() {
1664        let dispatch = init_dispatch();
1665        let bytes = b"a]b";
1666        let result = find_byte_dynamic(bytes, 0, bytes.len(), b']', dispatch);
1667        assert_eq!(result, Some(1));
1668    }
1669
1670    #[test]
1671    fn test_find_byte_dynamic_not_found() {
1672        let dispatch = init_dispatch();
1673        let bytes = b"abcdefgh";
1674        let result = find_byte_dynamic(bytes, 0, bytes.len(), b']', dispatch);
1675        assert_eq!(result, None);
1676    }
1677
1678    // SSE2 tests (x86_64)
1679    #[cfg(target_arch = "x86_64")]
1680    #[test]
1681    fn test_find_delim_sse2_basic() {
1682        let bytes = b"abcdefghijklmnop.rest";
1683        let result = find_delim_sse2(bytes, 0);
1684        assert_eq!(result, Some(16));
1685    }
1686
1687    #[cfg(target_arch = "x86_64")]
1688    #[test]
1689    fn test_find_delim_sse2_bracket() {
1690        let bytes = b"abcdefghijklmnop[rest";
1691        let result = find_delim_sse2(bytes, 0);
1692        assert_eq!(result, Some(16));
1693    }
1694
1695    #[cfg(target_arch = "x86_64")]
1696    #[test]
1697    fn test_find_delim_sse2_no_match() {
1698        let bytes = b"abcdefghijklmnopqrstuvwxyz";
1699        let result = find_delim_sse2(bytes, 0);
1700        assert_eq!(result, None);
1701    }
1702
1703    #[cfg(target_arch = "x86_64")]
1704    #[test]
1705    fn test_find_byte_sse2_basic() {
1706        let bytes = b"abcdefghijklmnop]rest";
1707        let result = find_byte_sse2(bytes, 0, b']');
1708        assert_eq!(result, Some(16));
1709    }
1710
1711    #[cfg(target_arch = "x86_64")]
1712    #[test]
1713    fn test_find_byte_sse2_no_match() {
1714        let bytes = b"abcdefghijklmnopqrstuvwxyz";
1715        let result = find_byte_sse2(bytes, 0, b']');
1716        assert_eq!(result, None);
1717    }
1718
1719    // AVX2 wrapper tests
1720    #[cfg(target_arch = "x86_64")]
1721    #[test]
1722    fn test_find_delim_avx2_wrapper() {
1723        if std::is_x86_feature_detected!("avx2") {
1724            let bytes = vec![b'x'; 100];
1725            let result = find_delim_avx2_wrapper(&bytes, 0);
1726            assert_eq!(result, None);
1727        }
1728    }
1729
1730    #[cfg(target_arch = "x86_64")]
1731    #[test]
1732    fn test_find_byte_avx2_wrapper() {
1733        if std::is_x86_feature_detected!("avx2") {
1734            let bytes = vec![b'x'; 100];
1735            let result = find_byte_avx2_wrapper(&bytes, 0, b']');
1736            assert_eq!(result, None);
1737        }
1738    }
1739
1740    // AVX512 wrapper tests
1741    #[cfg(target_arch = "x86_64")]
1742    #[test]
1743    fn test_find_delim_avx512_wrapper() {
1744        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1745            let bytes = vec![b'x'; 200];
1746            let result = find_delim_avx512_wrapper(&bytes, 0);
1747            assert_eq!(result, None);
1748        }
1749    }
1750
1751    #[cfg(target_arch = "x86_64")]
1752    #[test]
1753    fn test_find_byte_avx512_wrapper() {
1754        if std::is_x86_feature_detected!("avx512bw") && std::is_x86_feature_detected!("avx512f") {
1755            let bytes = vec![b'x'; 200];
1756            let result = find_byte_avx512_wrapper(&bytes, 0, b']');
1757            assert_eq!(result, None);
1758        }
1759    }
1760
1761    // =========================================================================
1762    // Additional Coverage Tests
1763    // =========================================================================
1764
1765    #[test]
1766    fn test_path_cache_default() {
1767        let cache = PathCache::default();
1768        let path = cache.get_or_parse("test");
1769        assert_eq!(path.path(), "test");
1770    }
1771
1772    #[test]
1773    fn test_path_component_equality() {
1774        let a = PathComponent::Field("test".to_string());
1775        let b = PathComponent::Field("test".to_string());
1776        let c = PathComponent::Field("other".to_string());
1777        assert_eq!(a, b);
1778        assert_ne!(a, c);
1779    }
1780
1781    #[test]
1782    fn test_path_component_array_equality() {
1783        let a = PathComponent::ArrayIndex(0);
1784        let b = PathComponent::ArrayIndex(0);
1785        let c = PathComponent::ArrayIndex(1);
1786        assert_eq!(a, b);
1787        assert_ne!(a, c);
1788    }
1789
1790    #[test]
1791    fn test_path_component_mixed_inequality() {
1792        let field = PathComponent::Field("0".to_string());
1793        let index = PathComponent::ArrayIndex(0);
1794        assert_ne!(field, index);
1795    }
1796
1797    #[test]
1798    fn test_path_component_ref_debug() {
1799        let comp = PathComponentRef::Field("test");
1800        let debug = format!("{comp:?}");
1801        assert!(debug.contains("Field"));
1802    }
1803
1804    #[test]
1805    fn test_path_component_range_debug() {
1806        let comp = PathComponentRange::Field(0..4);
1807        let debug = format!("{comp:?}");
1808        assert!(debug.contains("Field"));
1809    }
1810
1811    #[test]
1812    fn test_parse_simd_ranges_direct() {
1813        let mut components = Vec::new();
1814        parse_simd_ranges("user.name[0]", &mut components);
1815        assert_eq!(components.len(), 3);
1816    }
1817
1818    #[test]
1819    fn test_parse_original_unclosed_bracket() {
1820        let components = parse_original("field[0");
1821        // Should handle gracefully
1822        assert!(!components.is_empty());
1823    }
1824
1825    #[test]
1826    fn test_parse_original_leading_dot() {
1827        let components = parse_original(".field");
1828        assert_eq!(components.len(), 1);
1829    }
1830
1831    #[test]
1832    fn test_parse_original_trailing_dot() {
1833        let components = parse_original("field.");
1834        assert_eq!(components.len(), 1);
1835    }
1836
1837    #[test]
1838    fn test_parse_original_consecutive_dots() {
1839        let components = parse_original("a..b");
1840        assert_eq!(components.len(), 2);
1841    }
1842
1843    #[test]
1844    fn test_parse_usize_large() {
1845        assert_eq!(parse_usize(b"1_234_567_890"), 1_234_567_890);
1846    }
1847
1848    #[test]
1849    fn test_estimate_components_mixed() {
1850        // "a.b[0].c[1]" has 2 dots, 2 brackets => 2 + 2 + 1 = 5
1851        assert_eq!(estimate_components(b"a.b[0].c[1]"), 5);
1852    }
1853
1854    #[test]
1855    fn test_extract_field_name_utf8() {
1856        let bytes = "日本語.field".as_bytes();
1857        let field = extract_field_name(bytes, 0, 9);
1858        assert_eq!(field, "日本語");
1859    }
1860
1861    #[test]
1862    fn test_parsed_path_clone_trait() {
1863        let path = ParsedPath::parse("a.b");
1864        let cloned = path;
1865        assert_eq!(cloned.path(), "a.b");
1866    }
1867
1868    #[test]
1869    fn test_parsed_path_debug() {
1870        let path = ParsedPath::parse("a.b");
1871        let debug = format!("{path:?}");
1872        assert!(debug.contains("ParsedPath"));
1873    }
1874
1875    #[test]
1876    fn test_path_builder_many_indices() {
1877        let components = parse_simd("arr[0][1][2][3][4][5][6][7][8][9]");
1878        assert_eq!(components.len(), 11);
1879    }
1880
1881    #[test]
1882    fn test_find_delim_dynamic_with_start() {
1883        let dispatch = init_dispatch();
1884        let bytes = b"aaa.bbb";
1885        let result = find_delim_dynamic(bytes, 2, bytes.len(), dispatch);
1886        assert_eq!(result, Some(3));
1887    }
1888
1889    #[test]
1890    fn test_find_byte_dynamic_with_start() {
1891        let dispatch = init_dispatch();
1892        let bytes = b"aa]bb]cc";
1893        let result = find_byte_dynamic(bytes, 3, bytes.len(), b']', dispatch);
1894        assert_eq!(result, Some(5));
1895    }
1896
1897    #[cfg(target_arch = "x86_64")]
1898    #[test]
1899    fn test_find_delim_sse2_with_offset() {
1900        let bytes = b"abcdefghijklmnop.qrstuvwxyz.123";
1901        let result = find_delim_sse2(bytes, 17);
1902        assert_eq!(result, Some(27));
1903    }
1904
1905    #[cfg(target_arch = "x86_64")]
1906    #[test]
1907    fn test_find_byte_sse2_with_offset() {
1908        let bytes = b"abcdefghijklmnop]qrstuvwxyz]123";
1909        let result = find_byte_sse2(bytes, 17, b']');
1910        assert_eq!(result, Some(27));
1911    }
1912
1913    #[test]
1914    fn test_parse_simd_ref_long_with_arrays() {
1915        let path = (0..30)
1916            .map(|i| format!("field{i}[{i}]"))
1917            .collect::<Vec<_>>()
1918            .join(".");
1919        let components = parse_simd_ref(&path);
1920        assert_eq!(components.len(), 60);
1921    }
1922
1923    #[test]
1924    fn test_simd_dispatch_fields() {
1925        let dispatch = init_dispatch();
1926        // Test all fields are accessible
1927        let _ = dispatch.has_sse2;
1928        let _ = dispatch.has_avx2;
1929        let _ = dispatch.has_avx512;
1930        let _ = dispatch.has_neon;
1931    }
1932
1933    #[test]
1934    fn test_global_dispatch() {
1935        let dispatch = DISPATCH.get_or_init(init_dispatch);
1936        // Just verify it doesn't panic and returns valid dispatch
1937        let _ = dispatch.has_sse2;
1938    }
1939
1940    #[test]
1941    fn test_parse_simd_very_short() {
1942        let components = parse_simd("a");
1943        assert_eq!(components.len(), 1);
1944    }
1945
1946    #[test]
1947    fn test_parse_baseline_only_bracket() {
1948        let components = parse_baseline("[0]");
1949        assert_eq!(components.len(), 1);
1950    }
1951
1952    #[test]
1953    fn test_parse_simd_ref_only_bracket() {
1954        let components = parse_simd_ref("[0]");
1955        assert_eq!(components.len(), 1);
1956    }
1957}