Skip to main content

asmjson/
lib.rs

1#![doc = include_str!("../README.md")]
2
3#[cfg(feature = "serde")]
4pub mod de;
5pub mod dom;
6pub mod sax;
7
8#[cfg(feature = "serde")]
9pub use de::from_taperef;
10pub use dom::json_ref::JsonRef;
11pub use dom::{Dom, DomArrayIter, DomEntry, DomEntryKind, DomObjectIter, DomRef};
12pub use sax::Sax;
13
14use dom::DomWriter;
15
16// ---------------------------------------------------------------------------
17// Hand-written x86-64 AVX-512BW assembly parser (direct-threading, C vtable)
18// ---------------------------------------------------------------------------
19//
20// Instead of indexing directly into Rust's implementation-defined dyn-trait
21// vtable, we supply a *stable* `#[repr(C)]` function-pointer struct.  The
22// assembly uses fixed offsets 0, 8, 16, … into this struct.
23
24/// Stable C-layout vtable passed to the assembly parser.
25///
26/// Every field is an `unsafe extern "C"` function pointer with the calling
27/// convention that the assembly uses for each `JsonWriter` method.
28#[cfg(target_arch = "x86_64")]
29#[repr(C)]
30struct ZmmVtab {
31    null: unsafe extern "C" fn(*mut ()),
32    bool_val: unsafe extern "C" fn(*mut (), bool),
33    number: unsafe extern "C" fn(*mut (), *const u8, usize),
34    string: unsafe extern "C" fn(*mut (), *const u8, usize),
35    escaped_string: unsafe extern "C" fn(*mut (), *const u8, usize),
36    key: unsafe extern "C" fn(*mut (), *const u8, usize),
37    escaped_key: unsafe extern "C" fn(*mut (), *const u8, usize),
38    start_object: unsafe extern "C" fn(*mut ()),
39    end_object: unsafe extern "C" fn(*mut ()),
40    start_array: unsafe extern "C" fn(*mut ()),
41    end_array: unsafe extern "C" fn(*mut ()),
42}
43
44// ---------------------------------------------------------------------------
45// Generic C-ABI trampolines for any JsonWriter
46// ---------------------------------------------------------------------------
47//
48// `WriterForZmm` is a private bridge trait that exposes every `JsonWriter`
49// method via raw pointer / length pairs so that the `extern "C"` trampolines
50// below need no lifetime parameters.  It is implemented for every
51// `W: JsonWriter<'a>` via the blanket impl; the `transmute` in each `src_*`
52// method is sound because the raw pointers always point into the source JSON
53// which lives for at least `'a`, matching the lifetime the concrete writer
54// expects.
55
56#[cfg(target_arch = "x86_64")]
57pub(crate) trait WriterForZmm {
58    unsafe fn wfz_null(&mut self);
59    unsafe fn wfz_bool_val(&mut self, v: bool);
60    unsafe fn wfz_number(&mut self, ptr: *const u8, len: usize);
61    unsafe fn wfz_string(&mut self, ptr: *const u8, len: usize);
62    unsafe fn wfz_escaped_string(&mut self, ptr: *const u8, len: usize);
63    unsafe fn wfz_key(&mut self, ptr: *const u8, len: usize);
64    unsafe fn wfz_escaped_key(&mut self, ptr: *const u8, len: usize);
65    unsafe fn wfz_start_object(&mut self);
66    unsafe fn wfz_end_object(&mut self);
67    unsafe fn wfz_start_array(&mut self);
68    unsafe fn wfz_end_array(&mut self);
69}
70
71#[cfg(target_arch = "x86_64")]
72impl<'a, W: Sax<'a>> WriterForZmm for W {
73    unsafe fn wfz_null(&mut self) {
74        self.null()
75    }
76    unsafe fn wfz_bool_val(&mut self, v: bool) {
77        self.bool_val(v)
78    }
79    unsafe fn wfz_number(&mut self, ptr: *const u8, len: usize) {
80        let s: &'a str = unsafe {
81            std::mem::transmute(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
82                ptr, len,
83            )))
84        };
85        self.number(s)
86    }
87    unsafe fn wfz_string(&mut self, ptr: *const u8, len: usize) {
88        let s: &'a str = unsafe {
89            std::mem::transmute(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
90                ptr, len,
91            )))
92        };
93        self.string(s)
94    }
95    unsafe fn wfz_escaped_string(&mut self, ptr: *const u8, len: usize) {
96        let s = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len)) };
97        self.escaped_string(s)
98    }
99    unsafe fn wfz_key(&mut self, ptr: *const u8, len: usize) {
100        let s: &'a str = unsafe {
101            std::mem::transmute(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
102                ptr, len,
103            )))
104        };
105        self.key(s)
106    }
107    unsafe fn wfz_escaped_key(&mut self, ptr: *const u8, len: usize) {
108        let s = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len)) };
109        self.escaped_key(s)
110    }
111    unsafe fn wfz_start_object(&mut self) {
112        self.start_object()
113    }
114    unsafe fn wfz_end_object(&mut self) {
115        self.end_object()
116    }
117    unsafe fn wfz_start_array(&mut self) {
118        self.start_array()
119    }
120    unsafe fn wfz_end_array(&mut self) {
121        self.end_array()
122    }
123}
124
125#[cfg(target_arch = "x86_64")]
126unsafe extern "C" fn zw_null<W: WriterForZmm>(data: *mut ()) {
127    unsafe { (*(data as *mut W)).wfz_null() }
128}
129#[cfg(target_arch = "x86_64")]
130unsafe extern "C" fn zw_bool_val<W: WriterForZmm>(data: *mut (), v: bool) {
131    unsafe { (*(data as *mut W)).wfz_bool_val(v) }
132}
133#[cfg(target_arch = "x86_64")]
134unsafe extern "C" fn zw_number<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
135    unsafe { (*(data as *mut W)).wfz_number(ptr, len) }
136}
137#[cfg(target_arch = "x86_64")]
138unsafe extern "C" fn zw_string<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
139    unsafe { (*(data as *mut W)).wfz_string(ptr, len) }
140}
141#[cfg(target_arch = "x86_64")]
142unsafe extern "C" fn zw_escaped_string<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
143    unsafe { (*(data as *mut W)).wfz_escaped_string(ptr, len) }
144}
145#[cfg(target_arch = "x86_64")]
146unsafe extern "C" fn zw_key<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
147    unsafe { (*(data as *mut W)).wfz_key(ptr, len) }
148}
149#[cfg(target_arch = "x86_64")]
150unsafe extern "C" fn zw_escaped_key<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
151    unsafe { (*(data as *mut W)).wfz_escaped_key(ptr, len) }
152}
153#[cfg(target_arch = "x86_64")]
154unsafe extern "C" fn zw_start_object<W: WriterForZmm>(data: *mut ()) {
155    unsafe { (*(data as *mut W)).wfz_start_object() }
156}
157#[cfg(target_arch = "x86_64")]
158unsafe extern "C" fn zw_end_object<W: WriterForZmm>(data: *mut ()) {
159    unsafe { (*(data as *mut W)).wfz_end_object() }
160}
161#[cfg(target_arch = "x86_64")]
162unsafe extern "C" fn zw_start_array<W: WriterForZmm>(data: *mut ()) {
163    unsafe { (*(data as *mut W)).wfz_start_array() }
164}
165#[cfg(target_arch = "x86_64")]
166unsafe extern "C" fn zw_end_array<W: WriterForZmm>(data: *mut ()) {
167    unsafe { (*(data as *mut W)).wfz_end_array() }
168}
169
170/// Build a [`ZmmVtab`] whose function pointers are monomorphised for writer
171/// type `W`.  `W` must implement [`WriterForZmm`], which is blanket-impl'd
172/// for every `JsonWriter<'a>`.
173#[cfg(target_arch = "x86_64")]
174fn build_zmm_vtab<W: WriterForZmm>() -> ZmmVtab {
175    ZmmVtab {
176        null: zw_null::<W>,
177        bool_val: zw_bool_val::<W>,
178        number: zw_number::<W>,
179        string: zw_string::<W>,
180        escaped_string: zw_escaped_string::<W>,
181        key: zw_key::<W>,
182        escaped_key: zw_escaped_key::<W>,
183        start_object: zw_start_object::<W>,
184        end_object: zw_end_object::<W>,
185        start_array: zw_start_array::<W>,
186        end_array: zw_end_array::<W>,
187    }
188}
189
190#[cfg(target_arch = "x86_64")]
191#[allow(improper_ctypes)]
192unsafe extern "C" {
193    /// Entry point assembled from `asm/x86_64/parse_json_zmm_sax.S`.
194    ///
195    /// Calls writer methods through the supplied `ZmmVtab`.  Does NOT call
196    /// `finish`.  Returns `true` on success.
197    fn parse_json_zmm_sax(
198        src_ptr: *const u8,
199        src_len: usize,
200        writer_data: *mut (),
201        writer_vtab: *const ZmmVtab,
202        frames_buf: *mut u8,
203    ) -> bool;
204
205    /// Entry point assembled from `asm/x86_64/parse_json_zmm_dom.S`.
206    ///
207    /// Writes [`DomEntry`] values directly into the pre-allocated `tape_ptr`
208    /// array (up to `tape_cap` entries).  On success sets `*tape_len_out` to
209    /// the number of entries written and returns `RESULT_OK` (0).  Sets
210    /// `*has_escapes_out` to `true` if any `EscapedString` or `EscapedKey`
211    /// entry was written.  Returns `RESULT_PARSE_ERROR` (1) for invalid JSON
212    /// or `RESULT_TAPE_OVERFLOW` (2) if `tape_cap` entries are not sufficient.
213    fn parse_json_zmm_dom(
214        src_ptr: *const u8,
215        src_len: usize,
216        tape_ptr: *mut DomEntry<'static>,
217        tape_len_out: *mut usize,
218        frames_buf: *mut u8,
219        open_buf: *mut u64,
220        has_escapes_out: *mut bool,
221        tape_cap: usize,
222    ) -> u8;
223}
224
225// ---------------------------------------------------------------------------
226// Optional state-entry statistics (compiled in with --features stats).
227// ---------------------------------------------------------------------------
228
229#[cfg(feature = "stats")]
230pub mod stats {
231    use std::sync::atomic::{AtomicU64, Ordering::Relaxed};
232
233    pub static VALUE_WHITESPACE: AtomicU64 = AtomicU64::new(0);
234    pub static STRING_CHARS: AtomicU64 = AtomicU64::new(0);
235    pub static STRING_ESCAPE: AtomicU64 = AtomicU64::new(0);
236    pub static KEY_CHARS: AtomicU64 = AtomicU64::new(0);
237    pub static KEY_ESCAPE: AtomicU64 = AtomicU64::new(0);
238    pub static KEY_END: AtomicU64 = AtomicU64::new(0);
239    pub static AFTER_COLON: AtomicU64 = AtomicU64::new(0);
240    pub static ATOM_CHARS: AtomicU64 = AtomicU64::new(0);
241    pub static OBJECT_START: AtomicU64 = AtomicU64::new(0);
242    pub static ARRAY_START: AtomicU64 = AtomicU64::new(0);
243    pub static AFTER_VALUE: AtomicU64 = AtomicU64::new(0);
244
245    pub fn reset() {
246        for s in all() {
247            s.store(0, Relaxed);
248        }
249    }
250
251    fn all() -> [&'static AtomicU64; 11] {
252        [
253            &VALUE_WHITESPACE,
254            &STRING_CHARS,
255            &STRING_ESCAPE,
256            &KEY_CHARS,
257            &KEY_ESCAPE,
258            &KEY_END,
259            &AFTER_COLON,
260            &ATOM_CHARS,
261            &OBJECT_START,
262            &ARRAY_START,
263            &AFTER_VALUE,
264        ]
265    }
266
267    pub struct StateStats {
268        pub value_whitespace: u64,
269        pub string_chars: u64,
270        pub string_escape: u64,
271        pub key_chars: u64,
272        pub key_escape: u64,
273        pub key_end: u64,
274        pub after_colon: u64,
275        pub atom_chars: u64,
276        pub object_start: u64,
277        pub array_start: u64,
278        pub after_value: u64,
279    }
280
281    pub fn get() -> StateStats {
282        StateStats {
283            value_whitespace: VALUE_WHITESPACE.load(Relaxed),
284            string_chars: STRING_CHARS.load(Relaxed),
285            string_escape: STRING_ESCAPE.load(Relaxed),
286            key_chars: KEY_CHARS.load(Relaxed),
287            key_escape: KEY_ESCAPE.load(Relaxed),
288            key_end: KEY_END.load(Relaxed),
289            after_colon: AFTER_COLON.load(Relaxed),
290            atom_chars: ATOM_CHARS.load(Relaxed),
291            object_start: OBJECT_START.load(Relaxed),
292            array_start: ARRAY_START.load(Relaxed),
293            after_value: AFTER_VALUE.load(Relaxed),
294        }
295    }
296}
297
298/// Increment a state counter when the `stats` feature is enabled; a no-op otherwise.
299macro_rules! stat {
300    ($counter:path) => {
301        #[cfg(feature = "stats")]
302        $counter.fetch_add(1, ::std::sync::atomic::Ordering::Relaxed);
303    };
304}
305
306#[derive(PartialEq)]
307enum State {
308    // Waiting for the first byte of any JSON value.
309    ValueWhitespace,
310
311    // Inside a quoted string value.
312    StringChars,
313
314    // Inside a key string (left-hand side of an object member).
315    KeyChars,
316    // Closing `"` of a key consumed; skip whitespace then expect `:`.
317    KeyEnd,
318    // `:` consumed; skip whitespace then dispatch a value.
319    AfterColon,
320
321    // Inside an unquoted atom (number / true / false / null).
322    AtomChars,
323
324    // An invalid token was encountered; the parse will return None.
325    Error,
326
327    // `{` consumed; skip whitespace then expect `"` (key) or `}`.
328    ObjectStart,
329
330    // `[` consumed; skip whitespace then expect a value or `]`.
331    ArrayStart,
332
333    // A complete value was produced; skip whitespace then pop the context stack.
334    AfterValue,
335}
336
337// ---------------------------------------------------------------------------
338// Lightweight frame kind — the parser only needs to know Object vs Array for
339// routing commas and validating bracket matches.  Value construction lives in
340// the writer implementations below.
341// ---------------------------------------------------------------------------
342
343#[derive(Copy, Clone, PartialEq)]
344#[repr(u8)]
345enum FrameKind {
346    Object = 0,
347    Array = 1,
348}
349
350/// Maximum supported JSON nesting depth (objects + arrays combined).
351pub const MAX_JSON_DEPTH: usize = 64;
352
353// The Sax trait (SAX-style event sink) lives in the `sax` module.
354// Re-exported at crate root as `pub use sax::Sax`.
355
356// ---------------------------------------------------------------------------
357// Atom helper — Validate a JSON number.
358// ---------------------------------------------------------------------------
359
360fn is_valid_json_number(s: &[u8]) -> bool {
361    let mut i = 0;
362    let n = s.len();
363    if n == 0 {
364        return false;
365    }
366    if s[i] == b'-' {
367        i += 1;
368        if i == n {
369            return false;
370        }
371    }
372    if s[i] == b'0' {
373        i += 1;
374        if i < n && s[i].is_ascii_digit() {
375            return false;
376        }
377    } else if s[i].is_ascii_digit() {
378        while i < n && s[i].is_ascii_digit() {
379            i += 1;
380        }
381    } else {
382        return false;
383    }
384    if i < n && s[i] == b'.' {
385        i += 1;
386        if i == n || !s[i].is_ascii_digit() {
387            return false;
388        }
389        while i < n && s[i].is_ascii_digit() {
390            i += 1;
391        }
392    }
393    if i < n && (s[i] == b'e' || s[i] == b'E') {
394        i += 1;
395        if i < n && (s[i] == b'+' || s[i] == b'-') {
396            i += 1;
397        }
398        if i == n || !s[i].is_ascii_digit() {
399            return false;
400        }
401        while i < n && s[i].is_ascii_digit() {
402            i += 1;
403        }
404    }
405    i == n
406}
407
408/// C-linkage entry point for the hand-written assembly parser.
409/// Returns 1 if `bytes[..len]` is a valid JSON number, 0 otherwise.
410#[doc(hidden)]
411#[unsafe(no_mangle)]
412pub extern "C" fn is_valid_json_number_c(ptr: *const u8, len: usize) -> bool {
413    let s = unsafe { std::slice::from_raw_parts(ptr, len) };
414    is_valid_json_number(s)
415}
416
417/// Called from `parse_json_zmm_dom` to unescape and box a raw JSON string
418/// in one step.
419///
420/// Decodes the still-escaped bytes at `raw_ptr[..raw_len]` via
421/// [`unescape_str`], moves the result into a `Box<str>`, writes the data
422/// pointer and length to `*out_ptr` / `*out_len`, then **leaks** the box.
423/// Ownership is transferred to the `DomEntry` written immediately after this
424/// call, which will free it on `Drop`.
425#[doc(hidden)]
426#[cfg(target_arch = "x86_64")]
427#[unsafe(no_mangle)]
428#[inline(never)]
429pub extern "C" fn dom_unescape_to_box_str(
430    raw_ptr: *const u8,
431    raw_len: usize,
432    out_ptr: *mut *const u8,
433    out_len: *mut usize,
434) {
435    unsafe {
436        let raw = std::str::from_utf8_unchecked(std::slice::from_raw_parts(raw_ptr, raw_len));
437        let mut buf = String::new();
438        unescape_str(raw, &mut buf);
439        let boxed: Box<str> = buf.into_boxed_str();
440        let len = boxed.len();
441        let raw_out: *mut str = Box::into_raw(boxed);
442        *out_ptr = raw_out as *mut u8 as *const u8;
443        *out_len = len;
444    }
445}
446
447fn write_atom<'a, W: Sax<'a>>(s: &'a str, w: &mut W) -> bool {
448    match s {
449        "true" => {
450            w.bool_val(true);
451            true
452        }
453        "false" => {
454            w.bool_val(false);
455            true
456        }
457        "null" => {
458            w.null();
459            true
460        }
461        n => {
462            if is_valid_json_number(n.as_bytes()) {
463                w.number(n);
464                true
465            } else {
466                false
467            }
468        }
469    }
470}
471
472// ---------------------------------------------------------------------------
473// Public parse entry points
474// ---------------------------------------------------------------------------
475
476/// Parse `src` into a flat [`Dom`] using the portable SWAR classifier.
477///
478/// Returns `None` if the input is not valid JSON.
479///
480/// `StartObject(n)` / `StartArray(n)` entries carry the index of the matching
481/// closer so entire subtrees can be skipped in O(1).  Access the tape via
482/// [`Dom::root`] which returns a [`DomRef`] cursor that implements [`JsonRef`].
483///
484/// For maximum throughput on CPUs with AVX-512BW, use [`parse_to_dom_zmm`].
485///
486/// ```rust
487/// use asmjson::{parse_to_dom, JsonRef};
488/// let tape = parse_to_dom(r#"{"x":1}"#).unwrap();
489/// assert_eq!(tape.root().get("x").as_i64(), Some(1));
490/// ```
491pub fn parse_to_dom<'a>(src: &'a str) -> Option<Dom<'a>> {
492    parse_with(src, DomWriter::new())
493}
494
495/// Parse `src` to a [`Dom`] using the hand-written x86-64 AVX-512BW
496/// assembly parser that writes [`DomEntry`] values directly into a
497/// pre-allocated array, bypassing all virtual dispatch.
498///
499/// `initial_capacity` controls how many [`DomEntry`] slots the first
500/// allocation reserves.  Pass `None` to use the default of `src.len() / 4`,
501/// which is large enough for well-formed JSON without triggering a retry on
502/// typical inputs.  Pass `Some(n)` to hint a known-good size and avoid any
503/// retry allocation.  On overflow the capacity is doubled and the parse is
504/// retried automatically regardless of the initial hint.
505///
506/// Only available on `x86_64` targets.  Returns `None` if the JSON is
507/// invalid or nesting exceeds [`MAX_JSON_DEPTH`] levels.
508///
509/// # Safety
510///
511/// The caller must ensure the CPU supports AVX-512BW.  Invoking this on a CPU
512/// without AVX-512BW support will trigger an illegal instruction fault.  Use
513/// [`parse_to_dom`] for portable code.
514///
515/// ```rust
516/// #[cfg(target_arch = "x86_64")]
517/// {
518///     use asmjson::parse_to_dom_zmm;
519///     let tape = unsafe { parse_to_dom_zmm(r#"{"x":1}"#, None) }.unwrap();
520///     use asmjson::JsonRef;
521///     assert_eq!(tape.root().get("x").as_i64(), Some(1));
522/// }
523/// ```
524#[cfg(target_arch = "x86_64")]
525pub unsafe fn parse_to_dom_zmm<'a>(
526    src: &'a str,
527    initial_capacity: Option<usize>,
528) -> Option<Dom<'a>> {
529    // Result codes matching the assembly RESULT_* constants.
530    const RESULT_OK: u8 = 0;
531    const RESULT_PARSE_ERROR: u8 = 1;
532    const RESULT_TAPE_OVERFLOW: u8 = 2;
533
534    let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
535    let mut open_buf = [0u64; MAX_JSON_DEPTH];
536
537    // Start at the caller-supplied hint, or default to src.len()/4 entries.
538    // For well-formed JSON this default comfortably exceeds the tape length
539    // (each record is ~130 bytes and emits ~22 entries; 130/4 = 32.5 > 22),
540    // so no retry should be needed in practice.
541    let mut capacity = initial_capacity.unwrap_or_else(|| (src.len() / 4).max(2));
542
543    loop {
544        let mut tape_data: Vec<DomEntry<'a>> = Vec::with_capacity(capacity);
545        let tape_ptr = tape_data.as_mut_ptr() as *mut DomEntry<'static>;
546        let mut tape_len: usize = 0;
547        let mut has_escapes: bool = false;
548
549        // SAFETY:
550        //   • `tape_data` has exactly `capacity` entries; the assembly checks
551        //     bounds before every write and returns RESULT_TAPE_OVERFLOW if
552        //     the capacity is exceeded.
553        //   • `src` lives for at least `'a`; string pointers stored in tape
554        //     entries point into `src`'s bytes and remain valid for `'a`.
555        //   • EscapedString / EscapedKey entries own a `Box<str>` allocated by
556        //     `dom_unescape_to_box_str`; `DomEntry::drop` frees them.
557        //   • `parse_json_zmm_dom` does NOT call `finish`.
558        let result = unsafe {
559            parse_json_zmm_dom(
560                src.as_ptr(),
561                src.len(),
562                tape_ptr,
563                &raw mut tape_len,
564                frames_buf.as_mut_ptr() as *mut u8,
565                open_buf.as_mut_ptr(),
566                &raw mut has_escapes,
567                capacity,
568            )
569        };
570
571        match result {
572            RESULT_OK => {
573                // SAFETY: assembly wrote exactly `tape_len` initialised entries.
574                unsafe { tape_data.set_len(tape_len) };
575                return Some(Dom {
576                    entries: tape_data,
577                    has_escapes,
578                });
579            }
580            RESULT_PARSE_ERROR => return None,
581            RESULT_TAPE_OVERFLOW => {
582                // The tape was too small; double capacity and retry.
583                // First, set the vec length to `tape_len` so that any
584                // EscapedString / EscapedKey entries already written (which
585                // own a Box<str>) are properly dropped when tape_data goes
586                // out of scope at the end of this block.
587                unsafe { tape_data.set_len(tape_len) };
588                capacity = capacity.saturating_mul(2).max(capacity + 1);
589                continue;
590            }
591            _ => return None, // should not happen
592        }
593    }
594}
595
596/// Parse `src` using a custom [`JsonWriter`], returning its output.
597///
598/// This is the generic entry point: supply your own writer to produce any
599/// output in a single pass over the source.  Uses the portable SWAR
600/// classifier; works on any architecture.
601///
602/// For maximum throughput on CPUs with AVX-512BW, use [`parse_with_zmm`].
603pub fn parse_with<'a, W: Sax<'a>>(src: &'a str, writer: W) -> Option<W::Output> {
604    let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
605    parse_json_impl(src, writer, &mut frames_buf)
606}
607
608/// Parse `src` using a custom [`JsonWriter`] and the hand-written x86-64
609/// AVX-512BW assembly parser with direct-threaded state dispatch.
610///
611/// Only available on `x86_64` targets.  Returns `None` if the JSON is
612/// invalid or nesting exceeds [`MAX_JSON_DEPTH`] levels.
613///
614/// # Safety
615///
616/// The caller must ensure the CPU supports AVX-512BW.  Invoking this on a CPU
617/// without AVX-512BW support will trigger an illegal instruction fault.  Use
618/// [`parse_with`] for portable code.
619///
620#[cfg(target_arch = "x86_64")]
621pub unsafe fn parse_with_zmm<'a, W: Sax<'a>>(src: &'a str, mut writer: W) -> Option<W::Output> {
622    let vtab = build_zmm_vtab::<W>();
623    let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
624    // SAFETY (caller obligation): CPU supports AVX-512BW.
625    // SAFETY (internal): writer and src both live for 'a, outlasting this
626    // synchronous call.  parse_json_zmm_sax does NOT call finish.
627    let ok = unsafe {
628        parse_json_zmm_sax(
629            src.as_ptr(),
630            src.len(),
631            &raw mut writer as *mut (),
632            &vtab,
633            frames_buf.as_mut_ptr() as *mut u8,
634        )
635    };
636    if ok { writer.finish() } else { None }
637}
638
639fn parse_json_impl<'a, W: Sax<'a>>(
640    src: &'a str,
641    mut writer: W,
642    frames_buf: &mut [FrameKind; MAX_JSON_DEPTH],
643) -> Option<W::Output> {
644    let bytes = src.as_bytes();
645    let mut frames_depth: usize = 0;
646    let mut str_start: usize = 0; // absolute byte offset of char after opening '"'
647    let mut str_escaped = false; // true if the current string contained any backslash
648    let mut bs_count: usize = 0; // consecutive backslashes immediately before current pos
649    let mut atom_start: usize = 0; // absolute byte offset of first atom byte
650    let mut current_key_raw: &'a str = ""; // raw key slice captured when KeyChars closes
651    let mut current_key_escaped = false; // true when the key contained backslash escapes
652    let mut after_comma = false; // true when ObjectStart/ArrayStart was reached via a `,`
653    let mut state = State::ValueWhitespace;
654
655    let mut pos = 0;
656    while pos < bytes.len() {
657        let chunk_len = (bytes.len() - pos).min(64);
658        let chunk = &bytes[pos..pos + chunk_len];
659        let byte_state = classify_u64(chunk);
660
661        let mut chunk_offset = 0;
662        'inner: while chunk_offset < chunk_len {
663            state = match state {
664                State::ValueWhitespace => {
665                    stat!(crate::stats::VALUE_WHITESPACE);
666                    let ahead = (!byte_state.whitespace) >> chunk_offset;
667                    let skip = ahead.trailing_zeros() as usize;
668                    chunk_offset += skip;
669                    if chunk_offset >= chunk_len {
670                        break 'inner;
671                    }
672                    let byte = chunk[chunk_offset];
673                    match byte {
674                        b'{' => {
675                            if frames_depth >= MAX_JSON_DEPTH {
676                                State::Error
677                            } else {
678                                frames_buf[frames_depth] = FrameKind::Object;
679                                frames_depth += 1;
680                                writer.start_object();
681                                State::ObjectStart
682                            }
683                        }
684                        b'[' => {
685                            if frames_depth >= MAX_JSON_DEPTH {
686                                State::Error
687                            } else {
688                                frames_buf[frames_depth] = FrameKind::Array;
689                                frames_depth += 1;
690                                writer.start_array();
691                                State::ArrayStart
692                            }
693                        }
694                        b'"' => {
695                            str_start = pos + chunk_offset + 1;
696                            str_escaped = false;
697                            bs_count = 0;
698                            State::StringChars
699                        }
700                        _ => {
701                            atom_start = pos + chunk_offset;
702                            State::AtomChars
703                        }
704                    }
705                }
706
707                State::StringChars => {
708                    stat!(crate::stats::STRING_CHARS);
709                    // Scan for either '\' or '"'; handle runs of backslashes here
710                    // rather than via a separate state so that even/odd counting is
711                    // correct for sequences like `\\"` (two backslashes + quote).
712                    let interesting = (byte_state.backslashes | byte_state.quotes) >> chunk_offset;
713                    let skip = interesting.trailing_zeros() as usize;
714                    chunk_offset = (chunk_offset + skip).min(chunk_len);
715                    if chunk_offset >= chunk_len {
716                        break 'inner;
717                    }
718                    // Any ordinary chars between the last event and here break the run.
719                    if skip > 0 {
720                        bs_count = 0;
721                    }
722                    let byte = chunk[chunk_offset];
723                    match byte {
724                        b'\\' => {
725                            // Count consecutive backslashes; parity decides whether
726                            // the next quote (if any) is escaped.
727                            bs_count += 1;
728                            str_escaped = true;
729                            State::StringChars
730                        }
731                        b'"' if bs_count & 1 == 1 => {
732                            // Odd run of preceding backslashes: this quote is escaped.
733                            bs_count = 0;
734                            State::StringChars
735                        }
736                        _ => {
737                            // Even run (0, 2, 4 …): string ends here.
738                            bs_count = 0;
739                            let raw = &src[str_start..pos + chunk_offset];
740                            if str_escaped {
741                                writer.escaped_string(raw);
742                            } else {
743                                writer.string(raw);
744                            }
745                            State::AfterValue
746                        }
747                    }
748                }
749
750                State::KeyChars => {
751                    stat!(crate::stats::KEY_CHARS);
752                    let interesting = (byte_state.backslashes | byte_state.quotes) >> chunk_offset;
753                    let skip = interesting.trailing_zeros() as usize;
754                    chunk_offset = (chunk_offset + skip).min(chunk_len);
755                    if chunk_offset >= chunk_len {
756                        break 'inner;
757                    }
758                    if skip > 0 {
759                        bs_count = 0;
760                    }
761                    let byte = chunk[chunk_offset];
762                    match byte {
763                        b'\\' => {
764                            bs_count += 1;
765                            str_escaped = true;
766                            State::KeyChars
767                        }
768                        b'"' if bs_count & 1 == 1 => {
769                            // Odd run of preceding backslashes: this quote is escaped.
770                            bs_count = 0;
771                            State::KeyChars
772                        }
773                        _ => {
774                            // Even run: key ends here.
775                            bs_count = 0;
776                            current_key_raw = &src[str_start..pos + chunk_offset];
777                            current_key_escaped = str_escaped;
778                            State::KeyEnd
779                        }
780                    }
781                }
782                State::KeyEnd => {
783                    stat!(crate::stats::KEY_END);
784                    let ahead = (!byte_state.whitespace) >> chunk_offset;
785                    let skip = ahead.trailing_zeros() as usize;
786                    chunk_offset += skip;
787                    if chunk_offset >= chunk_len {
788                        break 'inner;
789                    }
790                    let byte = chunk[chunk_offset];
791                    match byte {
792                        b':' => {
793                            if current_key_escaped {
794                                writer.escaped_key(current_key_raw);
795                            } else {
796                                writer.key(current_key_raw);
797                            }
798                            State::AfterColon
799                        }
800                        _ => State::Error,
801                    }
802                }
803                State::AfterColon => {
804                    stat!(crate::stats::AFTER_COLON);
805                    let ahead = (!byte_state.whitespace) >> chunk_offset;
806                    let skip = ahead.trailing_zeros() as usize;
807                    chunk_offset += skip;
808                    if chunk_offset >= chunk_len {
809                        break 'inner;
810                    }
811                    let byte = chunk[chunk_offset];
812                    match byte {
813                        b'{' => {
814                            if frames_depth >= MAX_JSON_DEPTH {
815                                State::Error
816                            } else {
817                                frames_buf[frames_depth] = FrameKind::Object;
818                                frames_depth += 1;
819                                writer.start_object();
820                                State::ObjectStart
821                            }
822                        }
823                        b'[' => {
824                            if frames_depth >= MAX_JSON_DEPTH {
825                                State::Error
826                            } else {
827                                frames_buf[frames_depth] = FrameKind::Array;
828                                frames_depth += 1;
829                                writer.start_array();
830                                State::ArrayStart
831                            }
832                        }
833                        b'"' => {
834                            str_start = pos + chunk_offset + 1;
835                            str_escaped = false;
836                            bs_count = 0;
837                            State::StringChars
838                        }
839                        _ => {
840                            atom_start = pos + chunk_offset;
841                            State::AtomChars
842                        }
843                    }
844                }
845
846                State::AtomChars => {
847                    stat!(crate::stats::ATOM_CHARS);
848                    let ahead = byte_state.delimiters >> chunk_offset;
849                    let skip = ahead.trailing_zeros() as usize;
850                    chunk_offset += skip;
851                    if chunk_offset >= chunk_len {
852                        break 'inner;
853                    }
854                    let byte = chunk[chunk_offset];
855                    if !write_atom(&src[atom_start..pos + chunk_offset], &mut writer) {
856                        State::Error
857                    } else {
858                        match byte {
859                            b'}' => {
860                                if frames_depth == 0
861                                    || frames_buf[frames_depth - 1] != FrameKind::Object
862                                {
863                                    State::Error
864                                } else {
865                                    frames_depth -= 1;
866                                    writer.end_object();
867                                    State::AfterValue
868                                }
869                            }
870                            b']' => {
871                                if frames_depth == 0
872                                    || frames_buf[frames_depth - 1] != FrameKind::Array
873                                {
874                                    State::Error
875                                } else {
876                                    frames_depth -= 1;
877                                    writer.end_array();
878                                    State::AfterValue
879                                }
880                            }
881                            b',' => {
882                                if frames_depth == 0 {
883                                    State::Error
884                                } else {
885                                    match frames_buf[frames_depth - 1] {
886                                        FrameKind::Array => {
887                                            after_comma = true;
888                                            State::ArrayStart
889                                        }
890                                        FrameKind::Object => {
891                                            after_comma = true;
892                                            State::ObjectStart
893                                        }
894                                    }
895                                }
896                            }
897                            _ => State::AfterValue, // whitespace delimiter
898                        }
899                    }
900                }
901
902                State::Error => break 'inner,
903
904                State::ObjectStart => {
905                    stat!(crate::stats::OBJECT_START);
906                    let ahead = (!byte_state.whitespace) >> chunk_offset;
907                    let skip = ahead.trailing_zeros() as usize;
908                    chunk_offset += skip;
909                    if chunk_offset >= chunk_len {
910                        break 'inner;
911                    }
912                    let byte = chunk[chunk_offset];
913                    match byte {
914                        b'"' => {
915                            after_comma = false;
916                            str_start = pos + chunk_offset + 1;
917                            str_escaped = false;
918                            bs_count = 0;
919                            State::KeyChars
920                        }
921                        b'}' => {
922                            if after_comma {
923                                State::Error
924                            } else if frames_depth > 0
925                                && frames_buf[frames_depth - 1] == FrameKind::Object
926                            {
927                                frames_depth -= 1;
928                                writer.end_object();
929                                State::AfterValue
930                            } else {
931                                State::Error
932                            }
933                        }
934                        _ => State::Error,
935                    }
936                }
937
938                State::ArrayStart => {
939                    stat!(crate::stats::ARRAY_START);
940                    let ahead = (!byte_state.whitespace) >> chunk_offset;
941                    let skip = ahead.trailing_zeros() as usize;
942                    chunk_offset += skip;
943                    if chunk_offset >= chunk_len {
944                        break 'inner;
945                    }
946                    let byte = chunk[chunk_offset];
947                    match byte {
948                        b']' => {
949                            if after_comma {
950                                State::Error
951                            } else if frames_depth > 0
952                                && frames_buf[frames_depth - 1] == FrameKind::Array
953                            {
954                                frames_depth -= 1;
955                                writer.end_array();
956                                State::AfterValue
957                            } else {
958                                State::Error
959                            }
960                        }
961                        b'{' => {
962                            after_comma = false;
963                            if frames_depth >= MAX_JSON_DEPTH {
964                                State::Error
965                            } else {
966                                frames_buf[frames_depth] = FrameKind::Object;
967                                frames_depth += 1;
968                                writer.start_object();
969                                State::ObjectStart
970                            }
971                        }
972                        b'[' => {
973                            after_comma = false;
974                            if frames_depth >= MAX_JSON_DEPTH {
975                                State::Error
976                            } else {
977                                frames_buf[frames_depth] = FrameKind::Array;
978                                frames_depth += 1;
979                                writer.start_array();
980                                State::ArrayStart
981                            }
982                        }
983                        b'"' => {
984                            after_comma = false;
985                            str_start = pos + chunk_offset + 1;
986                            str_escaped = false;
987                            bs_count = 0;
988                            State::StringChars
989                        }
990                        _ => {
991                            after_comma = false;
992                            atom_start = pos + chunk_offset;
993                            State::AtomChars
994                        }
995                    }
996                }
997
998                State::AfterValue => {
999                    stat!(crate::stats::AFTER_VALUE);
1000                    let ahead = (!byte_state.whitespace) >> chunk_offset;
1001                    let skip = ahead.trailing_zeros() as usize;
1002                    chunk_offset += skip;
1003                    if chunk_offset >= chunk_len {
1004                        break 'inner;
1005                    }
1006                    let byte = chunk[chunk_offset];
1007                    match byte {
1008                        b',' => {
1009                            if frames_depth == 0 {
1010                                State::Error
1011                            } else {
1012                                match frames_buf[frames_depth - 1] {
1013                                    FrameKind::Object => {
1014                                        after_comma = true;
1015                                        State::ObjectStart
1016                                    }
1017                                    FrameKind::Array => {
1018                                        after_comma = true;
1019                                        State::ArrayStart
1020                                    }
1021                                }
1022                            }
1023                        }
1024                        b'}' => {
1025                            if frames_depth > 0 && frames_buf[frames_depth - 1] == FrameKind::Object
1026                            {
1027                                frames_depth -= 1;
1028                                writer.end_object();
1029                                State::AfterValue
1030                            } else {
1031                                State::Error
1032                            }
1033                        }
1034                        b']' => {
1035                            if frames_depth > 0 && frames_buf[frames_depth - 1] == FrameKind::Array
1036                            {
1037                                frames_depth -= 1;
1038                                writer.end_array();
1039                                State::AfterValue
1040                            } else {
1041                                State::Error
1042                            }
1043                        }
1044                        _ => State::Error,
1045                    }
1046                }
1047            };
1048            chunk_offset += 1;
1049        }
1050        pos += chunk_len;
1051    }
1052
1053    // Flush a trailing atom not followed by a delimiter (e.g. top-level `42`).
1054    if state == State::AtomChars {
1055        if !write_atom(&src[atom_start..], &mut writer) {
1056            return None;
1057        }
1058    } else if state != State::AfterValue {
1059        return None;
1060    }
1061
1062    if state == State::Error {
1063        return None;
1064    }
1065
1066    // Unclosed objects or arrays.
1067    if frames_depth != 0 {
1068        return None;
1069    }
1070
1071    writer.finish()
1072}
1073
1074/// Decode all JSON string escape sequences within `s` (the raw content between
1075/// the opening and closing quotes, with no surrounding quotes).  Clears `out`
1076/// and writes the decoded text into it.
1077///
1078/// Supported escapes: `\"` `\\` `\/` `\b` `\f` `\n` `\r` `\t` `\uXXXX`
1079/// (including surrogate pairs).  Unknown escapes are passed through verbatim.
1080#[doc(hidden)]
1081#[unsafe(no_mangle)]
1082#[inline(never)]
1083pub fn unescape_str(s: &str, out: &mut String) {
1084    out.clear();
1085    let bytes = s.as_bytes();
1086    let mut i = 0;
1087    while i < bytes.len() {
1088        if bytes[i] != b'\\' {
1089            // Copy one UTF-8 character verbatim.
1090            let ch = s[i..].chars().next().unwrap();
1091            out.push(ch);
1092            i += ch.len_utf8();
1093            continue;
1094        }
1095        // Skip the backslash.
1096        i += 1;
1097        if i >= bytes.len() {
1098            break;
1099        }
1100        match bytes[i] {
1101            b'"' => {
1102                out.push('"');
1103                i += 1;
1104            }
1105            b'\\' => {
1106                out.push('\\');
1107                i += 1;
1108            }
1109            b'/' => {
1110                out.push('/');
1111                i += 1;
1112            }
1113            b'b' => {
1114                out.push('\x08');
1115                i += 1;
1116            }
1117            b'f' => {
1118                out.push('\x0C');
1119                i += 1;
1120            }
1121            b'n' => {
1122                out.push('\n');
1123                i += 1;
1124            }
1125            b'r' => {
1126                out.push('\r');
1127                i += 1;
1128            }
1129            b't' => {
1130                out.push('\t');
1131                i += 1;
1132            }
1133            b'u' => {
1134                i += 1; // skip 'u'
1135                if i + 4 <= bytes.len() {
1136                    if let Ok(hi) = u16::from_str_radix(&s[i..i + 4], 16) {
1137                        i += 4;
1138                        // Surrogate pair: high surrogate \uD800-\uDBFF + low \uDC00-\uDFFF.
1139                        if (0xD800..0xDC00).contains(&hi)
1140                            && i + 6 <= bytes.len()
1141                            && bytes[i] == b'\\'
1142                            && bytes[i + 1] == b'u'
1143                        {
1144                            if let Ok(lo) = u16::from_str_radix(&s[i + 2..i + 6], 16) {
1145                                if (0xDC00..=0xDFFF).contains(&lo) {
1146                                    let cp = 0x1_0000u32
1147                                        + ((hi as u32 - 0xD800) << 10)
1148                                        + (lo as u32 - 0xDC00);
1149                                    if let Some(ch) = char::from_u32(cp) {
1150                                        out.push(ch);
1151                                        i += 6;
1152                                        continue;
1153                                    }
1154                                }
1155                            }
1156                        }
1157                        if let Some(ch) = char::from_u32(hi as u32) {
1158                            out.push(ch);
1159                        }
1160                    }
1161                }
1162                // i was already advanced past uXXXX inside the block above.
1163            }
1164            b => {
1165                out.push('\\');
1166                out.push(b as char);
1167                i += 1;
1168            }
1169        }
1170    }
1171}
1172
1173/// Per-chunk classification masks produced by the classifier functions.
1174#[repr(C)]
1175#[derive(Debug, PartialEq)]
1176pub struct ByteState {
1177    whitespace: u64,  // bit n set => byte n is whitespace (<= 0x20)
1178    quotes: u64,      // bit n set => byte n is '"'
1179    backslashes: u64, // bit n set => byte n is '\\'
1180    delimiters: u64,  // bit n set => byte n ends an atom (whitespace | ',' | '}' | ']')
1181}
1182
1183// ---------------------------------------------------------------------------
1184// U64 (portable SWAR) — 8 × u64 words, no SIMD
1185// ---------------------------------------------------------------------------
1186
1187/// Classify up to 64 bytes purely in software using SWAR
1188/// (SIMD Within A Register) bit-manipulation on eight `u64` words.
1189/// The Rust parse path always uses this classifier.
1190///
1191/// Three tricks are used:
1192///
1193/// * **Whitespace (`byte ≤ 0x20`)**: mask off the high bit with `v & 0x7f…`,
1194///   then add `0x5f` per byte.  The sum overflows into bit 7 exactly when the
1195///   original byte is ≥ 0x21; OR-ing back the original high bit excludes
1196///   bytes ≥ 0x80 (not whitespace).  Invert and mask to get the flag.
1197///
1198/// * **Byte equality**: XOR the word with a broadcast of the target byte
1199///   (`b * 0x0101_0101_0101_0101`), then test for a zero byte via
1200///   `(v − 0x0101…) & ∼v & 0x8080…`.
1201///
1202/// * **Movemask**: collect the MSB of each byte into the low 8 bits by
1203///   multiplying `(v & 0x8080…)` by `0x0002_0408_1020_4081` and taking the
1204///   top byte (shift right 56).
1205fn classify_u64(src: &[u8]) -> ByteState {
1206    assert!(!src.is_empty() && src.len() <= 64);
1207    let mut buf = [0u8; 64];
1208    buf[..src.len()].copy_from_slice(src);
1209
1210    #[inline(always)]
1211    fn has_zero_byte(v: u64) -> u64 {
1212        v.wrapping_sub(0x0101_0101_0101_0101_u64) & !v & 0x8080_8080_8080_8080_u64
1213    }
1214
1215    /// Produce a u64 with bit 7 of each byte set where that byte equals `b`.
1216    #[inline(always)]
1217    fn eq_byte(v: u64, b: u8) -> u64 {
1218        has_zero_byte(v ^ (b as u64 * 0x0101_0101_0101_0101_u64))
1219    }
1220
1221    /// Collect the MSB of each byte into the low 8 bits.
1222    #[inline(always)]
1223    fn movemask8(v: u64) -> u8 {
1224        ((v & 0x8080_8080_8080_8080_u64).wrapping_mul(0x0002_0408_1020_4081_u64) >> 56) as u8
1225    }
1226
1227    let mut ws = [0u8; 8];
1228    let mut q = [0u8; 8];
1229    let mut bs = [0u8; 8];
1230    let mut dl = [0u8; 8];
1231
1232    for i in 0..8 {
1233        let v = u64::from_le_bytes(buf[i * 8..][..8].try_into().unwrap());
1234
1235        // Whitespace: byte ≤ 0x20.
1236        // (v & 0x7f…) + 0x5f… overflows into bit 7 iff byte ≥ 0x21 (low-7 range);
1237        // OR-ing the original v excludes bytes ≥ 0x80.
1238        let masked = v & 0x7f7f_7f7f_7f7f_7f7f_u64;
1239        let sum = masked.wrapping_add(0x5f5f_5f5f_5f5f_5f5f_u64);
1240        let w = !(sum | v) & 0x8080_8080_8080_8080_u64;
1241
1242        let quotes = eq_byte(v, b'"');
1243        let backslashes = eq_byte(v, b'\\');
1244        let commas = eq_byte(v, b',');
1245        let cl_brace = eq_byte(v, b'}');
1246        let cl_bracket = eq_byte(v, b']');
1247        let delims = w | commas | cl_brace | cl_bracket;
1248
1249        ws[i] = movemask8(w);
1250        q[i] = movemask8(quotes);
1251        bs[i] = movemask8(backslashes);
1252        dl[i] = movemask8(delims);
1253    }
1254
1255    ByteState {
1256        whitespace: u64::from_le_bytes(ws),
1257        quotes: u64::from_le_bytes(q),
1258        backslashes: u64::from_le_bytes(bs),
1259        delimiters: u64::from_le_bytes(dl),
1260    }
1261}
1262
1263#[cfg(test)]
1264mod tests {
1265    use super::*;
1266
1267    // -----------------------------------------------------------------------
1268    // zmm_tape correctness: compare parse_to_dom_zmm against the Rust
1269    // reference parser across a range of JSON inputs.
1270    // -----------------------------------------------------------------------
1271
1272    #[cfg(target_arch = "x86_64")]
1273    fn zmm_dom_matches(src: &str) {
1274        let ref_tape = parse_to_dom(src).unwrap_or_else(|| panic!("reference rejected: {src:?}"));
1275        let asm_tape = unsafe { parse_to_dom_zmm(src, None) }
1276            .unwrap_or_else(|| panic!("zmm_tape rejected: {src:?}"));
1277        assert_eq!(
1278            ref_tape.entries, asm_tape.entries,
1279            "tape mismatch for {src:?}"
1280        );
1281    }
1282
1283    #[cfg(target_arch = "x86_64")]
1284    fn zmm_dom_rejects(src: &str) {
1285        assert!(
1286            unsafe { parse_to_dom_zmm(src, None) }.is_none(),
1287            "zmm_tape should reject {src:?}"
1288        );
1289    }
1290
1291    #[cfg(target_arch = "x86_64")]
1292    #[test]
1293    fn zmm_dom_atoms() {
1294        for src in &[
1295            "null",
1296            "true",
1297            "false",
1298            "0",
1299            "42",
1300            "-7",
1301            "3.14",
1302            "1e10",
1303            "-0.5e-3",
1304            // SWAR fast-path boundary cases: pure integers up to 8 bytes
1305            "1",
1306            "12",
1307            "123",
1308            "1234",
1309            "12345",
1310            "123456",
1311            "1234567",
1312            "12345678",
1313            // Integers just beyond 8 bytes (validator path)
1314            "123456789",
1315        ] {
1316            zmm_dom_matches(src);
1317        }
1318    }
1319
1320    #[cfg(target_arch = "x86_64")]
1321    #[test]
1322    fn zmm_dom_strings() {
1323        for src in &[
1324            r#""hello""#,
1325            r#""""#,
1326            r#""with \"escape\"""#,
1327            r#""newline\nand\ttab""#,
1328            r#""\u0041\u0042\u0043""#,
1329            r#""\u0000""#,
1330            r#""surrogate \uD83D\uDE00""#,
1331        ] {
1332            zmm_dom_matches(src);
1333        }
1334    }
1335
1336    #[cfg(target_arch = "x86_64")]
1337    #[test]
1338    fn zmm_dom_simple_object() {
1339        zmm_dom_matches(r#"{"x":1}"#);
1340        zmm_dom_matches(r#"{"a":1,"b":2,"c":3}"#);
1341        zmm_dom_matches(r#"{}"#);
1342    }
1343
1344    #[cfg(target_arch = "x86_64")]
1345    #[test]
1346    fn zmm_dom_simple_array() {
1347        zmm_dom_matches(r#"[1,2,3]"#);
1348        zmm_dom_matches(r#"[]"#);
1349        zmm_dom_matches(r#"[null,true,false,"x",42]"#);
1350    }
1351
1352    #[cfg(target_arch = "x86_64")]
1353    #[test]
1354    fn zmm_dom_nested() {
1355        zmm_dom_matches(r#"{"a":{"b":[1,true,null]}}"#);
1356        zmm_dom_matches(r#"[[1,[2,[3]]]]"#);
1357        zmm_dom_matches(r#"{"k":{"k":{"k":{}}}}"#);
1358        zmm_dom_matches(r#"[{"a":1},{"b":2}]"#);
1359    }
1360
1361    #[cfg(target_arch = "x86_64")]
1362    #[test]
1363    fn zmm_dom_escaped_keys() {
1364        zmm_dom_matches(r#"{"key\nname":1}"#);
1365        zmm_dom_matches(r#"{"key\u0041":true}"#);
1366        zmm_dom_matches(r#"{"a\"b":null}"#);
1367    }
1368
1369    #[cfg(target_arch = "x86_64")]
1370    #[test]
1371    fn zmm_dom_whitespace() {
1372        zmm_dom_matches("  { \"x\" : 1 }  ");
1373        zmm_dom_matches("[ 1 , 2 , 3 ]");
1374        zmm_dom_matches("\t\r\nnull\t\r\n");
1375    }
1376
1377    #[cfg(target_arch = "x86_64")]
1378    #[test]
1379    fn zmm_dom_long_string() {
1380        // String that spans more than one 64-byte chunk.
1381        let long = format!(r#""{}""#, "a".repeat(200));
1382        zmm_dom_matches(&long);
1383        let long_esc = format!(r#""{}\n{}""#, "b".repeat(100), "c".repeat(100));
1384        zmm_dom_matches(&long_esc);
1385    }
1386
1387    #[cfg(target_arch = "x86_64")]
1388    #[test]
1389    fn zmm_dom_reject_invalid() {
1390        zmm_dom_rejects("");
1391        zmm_dom_rejects("{");
1392        zmm_dom_rejects("[");
1393        zmm_dom_rejects("}");
1394        zmm_dom_rejects(r#"{"a":}"#);
1395        zmm_dom_rejects(r#"{"a":1"#);
1396        // Leading zeros must be rejected (SWAR fast path must not bypass this).
1397        zmm_dom_rejects("01");
1398        zmm_dom_rejects("00");
1399        zmm_dom_rejects("007");
1400        zmm_dom_rejects("01234567"); // exactly 8 bytes, leading zero
1401    }
1402
1403    // -----------------------------------------------------------------------
1404    // parse_with_zmm SAX: compare against the Rust reference on escape inputs.
1405    // -----------------------------------------------------------------------
1406
1407    #[cfg(target_arch = "x86_64")]
1408    fn zmm_sax_matches(src: &str) {
1409        // Collect events from both parsers into a comparable string.
1410        #[derive(Default)]
1411        struct EventLog(String);
1412
1413        impl<'s> Sax<'s> for EventLog {
1414            type Output = String;
1415            fn null(&mut self) {
1416                self.0.push_str("null;");
1417            }
1418            fn bool_val(&mut self, v: bool) {
1419                self.0.push_str(if v { "true;" } else { "false;" });
1420            }
1421            fn number(&mut self, s: &str) {
1422                self.0.push_str(s);
1423                self.0.push(';');
1424            }
1425            fn string(&mut self, s: &str) {
1426                self.0.push_str("s:");
1427                self.0.push_str(s);
1428                self.0.push(';');
1429            }
1430            fn escaped_string(&mut self, s: &str) {
1431                self.0.push_str("es:");
1432                self.0.push_str(s);
1433                self.0.push(';');
1434            }
1435            fn key(&mut self, s: &str) {
1436                self.0.push_str("k:");
1437                self.0.push_str(s);
1438                self.0.push(';');
1439            }
1440            fn escaped_key(&mut self, s: &str) {
1441                self.0.push_str("ek:");
1442                self.0.push_str(s);
1443                self.0.push(';');
1444            }
1445            fn start_object(&mut self) {
1446                self.0.push('{');
1447            }
1448            fn end_object(&mut self) {
1449                self.0.push('}');
1450            }
1451            fn start_array(&mut self) {
1452                self.0.push('[');
1453            }
1454            fn end_array(&mut self) {
1455                self.0.push(']');
1456            }
1457            fn finish(self) -> Option<String> {
1458                Some(self.0)
1459            }
1460        }
1461
1462        let ref_log = parse_with(src, EventLog::default())
1463            .unwrap_or_else(|| panic!("reference rejected: {src:?}"));
1464        let asm_log = unsafe { parse_with_zmm(src, EventLog::default()) }
1465            .unwrap_or_else(|| panic!("parse_with_zmm rejected: {src:?}"));
1466        assert_eq!(ref_log, asm_log, "event log mismatch for {src:?}");
1467    }
1468
1469    #[cfg(target_arch = "x86_64")]
1470    #[test]
1471    fn zmm_sax_escaped_strings() {
1472        // Single-backslash escapes and \uXXXX — the assembly handles these correctly.
1473        zmm_sax_matches(r#"{"key":"\n\t\r\""}"#);
1474        zmm_sax_matches(r#"{"key\nname":"val\u0041"}"#);
1475        zmm_sax_matches(r#"["\u0041","\u0042\u0043"]"#);
1476        zmm_sax_matches(r#"{"a\"b":"c\"d"}"#);
1477        // String that spans more than one 64-byte chunk and contains an escape.
1478        let long = format!(r#"{{"{}\n":"{}\t"}}"#, "x".repeat(70), "y".repeat(70));
1479        zmm_sax_matches(&long);
1480        // Note: inputs with even runs of backslashes before a closing quote (e.g.
1481        // `\\"`) require the parity-counting fix in the assembly too; tested via
1482        // parse_with in rust_even_backslash_before_quote below.
1483    }
1484
1485    // Rust-path-only test for even backslash runs before a closing quote.
1486    // The assembly SAX path has not yet been updated to count backslash parity,
1487    // so this test drives parse_to_dom (SWAR) directly.
1488    #[test]
1489    fn rust_even_backslash_before_quote() {
1490        use crate::JsonRef;
1491        // `\\` = one literal backslash, then `"` terminates string → decoded = `\`
1492        let t = parse_to_dom(r#"{"k":"\\"}"#).expect("parse failed");
1493        assert_eq!(t.root().get("k").as_str(), Some("\\"));
1494        // `\\\\` = two literal backslashes → decoded = `\\`
1495        let t = parse_to_dom(r#"{"k":"\\\\"}"#).expect("parse failed");
1496        assert_eq!(t.root().get("k").as_str(), Some("\\\\"));
1497        // `\\` inside array
1498        let t = parse_to_dom(r#"["\\"]"#).expect("parse failed");
1499        assert_eq!(t.root().index_at(0).as_str(), Some("\\"));
1500        // Mixed content: `abc\\` followed by closing quote → decoded = `abc\`
1501        let t = parse_to_dom(r#"{"k":"abc\\"}"#).expect("parse failed");
1502        assert_eq!(t.root().get("k").as_str(), Some("abc\\"));
1503        // Three backslashes before `"`: `\\` escapes itself, `\"` escapes the quote.
1504        // So `\\\"` does NOT close the string; the outer `"` closes it.
1505        // Decoded value = `\"` (backslash + quote).
1506        let t = parse_to_dom("{\"k\":\"\\\\\\\"\"}").expect("parse failed");
1507        assert_eq!(t.root().get("k").as_str(), Some("\\\""));
1508    }
1509
1510    #[cfg(target_arch = "x86_64")]
1511    #[test]
1512    fn zmm_dom_overflow_retry() {
1513        // A 200-element array of objects produces ~800+ tape entries.
1514        // Initial capacity is src.len()/4 which is far smaller, so the
1515        // function must handle at least one TapeOverflow retry automatically.
1516        let big: String = {
1517            let mut s = String::from("[");
1518            for i in 0..200u32 {
1519                if i > 0 {
1520                    s.push(',');
1521                }
1522                s.push_str(&format!(r#"{{"k":{i}}}"#));
1523            }
1524            s.push(']');
1525            s
1526        };
1527        // Use Some(4) to guarantee at least one overflow retry regardless of input size.
1528        let tape =
1529            unsafe { parse_to_dom_zmm(&big, Some(4)) }.expect("overflow retry should succeed");
1530        assert_eq!(tape.root().unwrap().array_iter().unwrap().count(), 200);
1531    }
1532}