asmjson/lib.rs
1#![doc = include_str!("../README.md")]
2
3#[cfg(feature = "serde")]
4pub mod de;
5pub mod dom;
6pub mod sax;
7
8#[cfg(feature = "serde")]
9pub use de::from_taperef;
10pub use dom::json_ref::JsonRef;
11pub use dom::{Dom, DomArrayIter, DomEntry, DomEntryKind, DomObjectIter, DomRef};
12pub use sax::Sax;
13
14use dom::DomWriter;
15
16// ---------------------------------------------------------------------------
17// Hand-written x86-64 AVX-512BW assembly parser (direct-threading, C vtable)
18// ---------------------------------------------------------------------------
19//
20// Instead of indexing directly into Rust's implementation-defined dyn-trait
21// vtable, we supply a *stable* `#[repr(C)]` function-pointer struct. The
22// assembly uses fixed offsets 0, 8, 16, … into this struct.
23
24/// Stable C-layout vtable passed to the assembly parser.
25///
26/// Every field is an `unsafe extern "C"` function pointer with the calling
27/// convention that the assembly uses for each `JsonWriter` method.
28#[cfg(target_arch = "x86_64")]
29#[repr(C)]
30struct ZmmVtab {
31 null: unsafe extern "C" fn(*mut ()),
32 bool_val: unsafe extern "C" fn(*mut (), bool),
33 number: unsafe extern "C" fn(*mut (), *const u8, usize),
34 string: unsafe extern "C" fn(*mut (), *const u8, usize),
35 escaped_string: unsafe extern "C" fn(*mut (), *const u8, usize),
36 key: unsafe extern "C" fn(*mut (), *const u8, usize),
37 escaped_key: unsafe extern "C" fn(*mut (), *const u8, usize),
38 start_object: unsafe extern "C" fn(*mut ()),
39 end_object: unsafe extern "C" fn(*mut ()),
40 start_array: unsafe extern "C" fn(*mut ()),
41 end_array: unsafe extern "C" fn(*mut ()),
42}
43
44// ---------------------------------------------------------------------------
45// Generic C-ABI trampolines for any JsonWriter
46// ---------------------------------------------------------------------------
47//
48// `WriterForZmm` is a private bridge trait that exposes every `JsonWriter`
49// method via raw pointer / length pairs so that the `extern "C"` trampolines
50// below need no lifetime parameters. It is implemented for every
51// `W: JsonWriter<'a>` via the blanket impl; the `transmute` in each `src_*`
52// method is sound because the raw pointers always point into the source JSON
53// which lives for at least `'a`, matching the lifetime the concrete writer
54// expects.
55
56#[cfg(target_arch = "x86_64")]
57pub(crate) trait WriterForZmm {
58 unsafe fn wfz_null(&mut self);
59 unsafe fn wfz_bool_val(&mut self, v: bool);
60 unsafe fn wfz_number(&mut self, ptr: *const u8, len: usize);
61 unsafe fn wfz_string(&mut self, ptr: *const u8, len: usize);
62 unsafe fn wfz_escaped_string(&mut self, ptr: *const u8, len: usize);
63 unsafe fn wfz_key(&mut self, ptr: *const u8, len: usize);
64 unsafe fn wfz_escaped_key(&mut self, ptr: *const u8, len: usize);
65 unsafe fn wfz_start_object(&mut self);
66 unsafe fn wfz_end_object(&mut self);
67 unsafe fn wfz_start_array(&mut self);
68 unsafe fn wfz_end_array(&mut self);
69}
70
71#[cfg(target_arch = "x86_64")]
72impl<'a, W: Sax<'a>> WriterForZmm for W {
73 unsafe fn wfz_null(&mut self) {
74 self.null()
75 }
76 unsafe fn wfz_bool_val(&mut self, v: bool) {
77 self.bool_val(v)
78 }
79 unsafe fn wfz_number(&mut self, ptr: *const u8, len: usize) {
80 let s: &'a str = unsafe {
81 std::mem::transmute(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
82 ptr, len,
83 )))
84 };
85 self.number(s)
86 }
87 unsafe fn wfz_string(&mut self, ptr: *const u8, len: usize) {
88 let s: &'a str = unsafe {
89 std::mem::transmute(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
90 ptr, len,
91 )))
92 };
93 self.string(s)
94 }
95 unsafe fn wfz_escaped_string(&mut self, ptr: *const u8, len: usize) {
96 let s = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len)) };
97 self.escaped_string(s)
98 }
99 unsafe fn wfz_key(&mut self, ptr: *const u8, len: usize) {
100 let s: &'a str = unsafe {
101 std::mem::transmute(std::str::from_utf8_unchecked(std::slice::from_raw_parts(
102 ptr, len,
103 )))
104 };
105 self.key(s)
106 }
107 unsafe fn wfz_escaped_key(&mut self, ptr: *const u8, len: usize) {
108 let s = unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len)) };
109 self.escaped_key(s)
110 }
111 unsafe fn wfz_start_object(&mut self) {
112 self.start_object()
113 }
114 unsafe fn wfz_end_object(&mut self) {
115 self.end_object()
116 }
117 unsafe fn wfz_start_array(&mut self) {
118 self.start_array()
119 }
120 unsafe fn wfz_end_array(&mut self) {
121 self.end_array()
122 }
123}
124
125#[cfg(target_arch = "x86_64")]
126unsafe extern "C" fn zw_null<W: WriterForZmm>(data: *mut ()) {
127 unsafe { (*(data as *mut W)).wfz_null() }
128}
129#[cfg(target_arch = "x86_64")]
130unsafe extern "C" fn zw_bool_val<W: WriterForZmm>(data: *mut (), v: bool) {
131 unsafe { (*(data as *mut W)).wfz_bool_val(v) }
132}
133#[cfg(target_arch = "x86_64")]
134unsafe extern "C" fn zw_number<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
135 unsafe { (*(data as *mut W)).wfz_number(ptr, len) }
136}
137#[cfg(target_arch = "x86_64")]
138unsafe extern "C" fn zw_string<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
139 unsafe { (*(data as *mut W)).wfz_string(ptr, len) }
140}
141#[cfg(target_arch = "x86_64")]
142unsafe extern "C" fn zw_escaped_string<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
143 unsafe { (*(data as *mut W)).wfz_escaped_string(ptr, len) }
144}
145#[cfg(target_arch = "x86_64")]
146unsafe extern "C" fn zw_key<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
147 unsafe { (*(data as *mut W)).wfz_key(ptr, len) }
148}
149#[cfg(target_arch = "x86_64")]
150unsafe extern "C" fn zw_escaped_key<W: WriterForZmm>(data: *mut (), ptr: *const u8, len: usize) {
151 unsafe { (*(data as *mut W)).wfz_escaped_key(ptr, len) }
152}
153#[cfg(target_arch = "x86_64")]
154unsafe extern "C" fn zw_start_object<W: WriterForZmm>(data: *mut ()) {
155 unsafe { (*(data as *mut W)).wfz_start_object() }
156}
157#[cfg(target_arch = "x86_64")]
158unsafe extern "C" fn zw_end_object<W: WriterForZmm>(data: *mut ()) {
159 unsafe { (*(data as *mut W)).wfz_end_object() }
160}
161#[cfg(target_arch = "x86_64")]
162unsafe extern "C" fn zw_start_array<W: WriterForZmm>(data: *mut ()) {
163 unsafe { (*(data as *mut W)).wfz_start_array() }
164}
165#[cfg(target_arch = "x86_64")]
166unsafe extern "C" fn zw_end_array<W: WriterForZmm>(data: *mut ()) {
167 unsafe { (*(data as *mut W)).wfz_end_array() }
168}
169
170/// Build a [`ZmmVtab`] whose function pointers are monomorphised for writer
171/// type `W`. `W` must implement [`WriterForZmm`], which is blanket-impl'd
172/// for every `JsonWriter<'a>`.
173#[cfg(target_arch = "x86_64")]
174fn build_zmm_vtab<W: WriterForZmm>() -> ZmmVtab {
175 ZmmVtab {
176 null: zw_null::<W>,
177 bool_val: zw_bool_val::<W>,
178 number: zw_number::<W>,
179 string: zw_string::<W>,
180 escaped_string: zw_escaped_string::<W>,
181 key: zw_key::<W>,
182 escaped_key: zw_escaped_key::<W>,
183 start_object: zw_start_object::<W>,
184 end_object: zw_end_object::<W>,
185 start_array: zw_start_array::<W>,
186 end_array: zw_end_array::<W>,
187 }
188}
189
190#[cfg(target_arch = "x86_64")]
191#[allow(improper_ctypes)]
192unsafe extern "C" {
193 /// Entry point assembled from `asm/x86_64/parse_json_zmm_sax.S`.
194 ///
195 /// Calls writer methods through the supplied `ZmmVtab`. Does NOT call
196 /// `finish`. Returns `true` on success.
197 fn parse_json_zmm_sax(
198 src_ptr: *const u8,
199 src_len: usize,
200 writer_data: *mut (),
201 writer_vtab: *const ZmmVtab,
202 frames_buf: *mut u8,
203 ) -> bool;
204
205 /// Entry point assembled from `asm/x86_64/parse_json_zmm_dom.S`.
206 ///
207 /// Writes [`DomEntry`] values directly into the pre-allocated `tape_ptr`
208 /// array (up to `tape_cap` entries). On success sets `*tape_len_out` to
209 /// the number of entries written and returns `RESULT_OK` (0). Sets
210 /// `*has_escapes_out` to `true` if any `EscapedString` or `EscapedKey`
211 /// entry was written. Returns `RESULT_PARSE_ERROR` (1) for invalid JSON
212 /// or `RESULT_TAPE_OVERFLOW` (2) if `tape_cap` entries are not sufficient.
213 fn parse_json_zmm_dom(
214 src_ptr: *const u8,
215 src_len: usize,
216 tape_ptr: *mut DomEntry<'static>,
217 tape_len_out: *mut usize,
218 frames_buf: *mut u8,
219 open_buf: *mut u64,
220 has_escapes_out: *mut bool,
221 tape_cap: usize,
222 ) -> u8;
223}
224
225#[derive(PartialEq)]
226enum State {
227 // Waiting for the first byte of any JSON value.
228 ValueWhitespace,
229
230 // Inside a quoted string value.
231 StringChars,
232
233 // Inside a key string (left-hand side of an object member).
234 KeyChars,
235 // Closing `"` of a key consumed; skip whitespace then expect `:`.
236 KeyEnd,
237 // `:` consumed; skip whitespace then dispatch a value.
238 AfterColon,
239
240 // Inside an unquoted atom (number / true / false / null).
241 AtomChars,
242
243 // An invalid token was encountered; the parse will return None.
244 Error,
245
246 // `{` consumed; skip whitespace then expect `"` (key) or `}`.
247 ObjectStart,
248
249 // `[` consumed; skip whitespace then expect a value or `]`.
250 ArrayStart,
251
252 // A complete value was produced; skip whitespace then pop the context stack.
253 AfterValue,
254}
255
256// ---------------------------------------------------------------------------
257// Lightweight frame kind — the parser only needs to know Object vs Array for
258// routing commas and validating bracket matches. Value construction lives in
259// the writer implementations below.
260// ---------------------------------------------------------------------------
261
262#[derive(Copy, Clone, PartialEq)]
263#[repr(u8)]
264enum FrameKind {
265 Object = 0,
266 Array = 1,
267}
268
269/// Maximum supported JSON nesting depth (objects + arrays combined).
270pub const MAX_JSON_DEPTH: usize = 64;
271
272// The Sax trait (SAX-style event sink) lives in the `sax` module.
273// Re-exported at crate root as `pub use sax::Sax`.
274
275// ---------------------------------------------------------------------------
276// Atom helper — Validate a JSON number.
277// ---------------------------------------------------------------------------
278
279fn is_valid_json_number(s: &[u8]) -> bool {
280 let mut i = 0;
281 let n = s.len();
282 if n == 0 {
283 return false;
284 }
285 if s[i] == b'-' {
286 i += 1;
287 if i == n {
288 return false;
289 }
290 }
291 if s[i] == b'0' {
292 i += 1;
293 if i < n && s[i].is_ascii_digit() {
294 return false;
295 }
296 } else if s[i].is_ascii_digit() {
297 while i < n && s[i].is_ascii_digit() {
298 i += 1;
299 }
300 } else {
301 return false;
302 }
303 if i < n && s[i] == b'.' {
304 i += 1;
305 if i == n || !s[i].is_ascii_digit() {
306 return false;
307 }
308 while i < n && s[i].is_ascii_digit() {
309 i += 1;
310 }
311 }
312 if i < n && (s[i] == b'e' || s[i] == b'E') {
313 i += 1;
314 if i < n && (s[i] == b'+' || s[i] == b'-') {
315 i += 1;
316 }
317 if i == n || !s[i].is_ascii_digit() {
318 return false;
319 }
320 while i < n && s[i].is_ascii_digit() {
321 i += 1;
322 }
323 }
324 i == n
325}
326
327/// C-linkage entry point for the hand-written assembly parser.
328/// Returns 1 if `bytes[..len]` is a valid JSON number, 0 otherwise.
329#[doc(hidden)]
330#[unsafe(no_mangle)]
331pub extern "C" fn is_valid_json_number_c(ptr: *const u8, len: usize) -> bool {
332 let s = unsafe { std::slice::from_raw_parts(ptr, len) };
333 is_valid_json_number(s)
334}
335
336/// Called from `parse_json_zmm_dom` to unescape and box a raw JSON string
337/// in one step.
338///
339/// Decodes the still-escaped bytes at `raw_ptr[..raw_len]` via
340/// [`unescape_str`], moves the result into a `Box<str>`, writes the data
341/// pointer and length to `*out_ptr` / `*out_len`, then **leaks** the box.
342/// Ownership is transferred to the `DomEntry` written immediately after this
343/// call, which will free it on `Drop`.
344#[doc(hidden)]
345#[cfg(target_arch = "x86_64")]
346#[unsafe(no_mangle)]
347#[inline(never)]
348pub extern "C" fn dom_unescape_to_box_str(
349 raw_ptr: *const u8,
350 raw_len: usize,
351 out_ptr: *mut *const u8,
352 out_len: *mut usize,
353) {
354 unsafe {
355 let raw = std::str::from_utf8_unchecked(std::slice::from_raw_parts(raw_ptr, raw_len));
356 let mut buf = String::new();
357 unescape_str(raw, &mut buf);
358 let boxed: Box<str> = buf.into_boxed_str();
359 let len = boxed.len();
360 let raw_out: *mut str = Box::into_raw(boxed);
361 *out_ptr = raw_out as *mut u8 as *const u8;
362 *out_len = len;
363 }
364}
365
366fn write_atom<'a, W: Sax<'a>>(s: &'a str, w: &mut W) -> bool {
367 match s {
368 "true" => {
369 w.bool_val(true);
370 true
371 }
372 "false" => {
373 w.bool_val(false);
374 true
375 }
376 "null" => {
377 w.null();
378 true
379 }
380 n => {
381 if is_valid_json_number(n.as_bytes()) {
382 w.number(n);
383 true
384 } else {
385 false
386 }
387 }
388 }
389}
390
391// ---------------------------------------------------------------------------
392// Public parse entry points
393// ---------------------------------------------------------------------------
394
395/// Parse `src` into a flat [`Dom`] using the portable SWAR classifier.
396///
397/// `initial_capacity` pre-sizes the tape allocation. Pass `None` to let the
398/// parser decide (equivalent to `Some(0)`, i.e. start with a default-sized
399/// `Vec`). The tape grows automatically so this is only a performance hint.
400///
401/// Returns `None` if the input is not valid JSON.
402///
403/// For maximum throughput on CPUs with AVX-512BW, use [`parse_to_dom_zmm`] or
404/// the safe wrapper returned by [`dom_parser`].
405///
406/// ```rust
407/// use asmjson::{parse_to_dom, JsonRef};
408/// let tape = parse_to_dom(r#"{"x":1}"#, None).unwrap();
409/// assert_eq!(tape.root().get("x").as_i64(), Some(1));
410/// ```
411pub fn parse_to_dom<'a>(src: &'a str, initial_capacity: Option<usize>) -> Option<Dom<'a>> {
412 let cap = initial_capacity.unwrap_or(0);
413 parse_with(src, DomWriter::with_capacity(cap))
414}
415
416/// Parse `src` to a [`Dom`] using the hand-written x86-64 AVX-512BW
417/// assembly parser that writes [`DomEntry`] values directly into a
418/// pre-allocated array, bypassing all virtual dispatch.
419///
420/// `initial_capacity` controls how many [`DomEntry`] slots the first
421/// allocation reserves. Pass `None` to use the default of `src.len() / 4`,
422/// which is large enough for well-formed JSON without triggering a retry on
423/// typical inputs. Pass `Some(n)` to hint a known-good size and avoid any
424/// retry allocation. On overflow the capacity is doubled and the parse is
425/// retried automatically regardless of the initial hint.
426///
427/// Only available on `x86_64` targets. Returns `None` if the JSON is
428/// invalid or nesting exceeds [`MAX_JSON_DEPTH`] levels.
429///
430/// # Safety
431///
432/// The caller must ensure the CPU supports AVX-512BW. Invoking this on a CPU
433/// without AVX-512BW support will trigger an illegal instruction fault. Use
434/// [`parse_to_dom`] for portable code.
435///
436/// ```rust
437/// #[cfg(target_arch = "x86_64")]
438/// {
439/// use asmjson::parse_to_dom_zmm;
440/// let tape = unsafe { parse_to_dom_zmm(r#"{"x":1}"#, None) }.unwrap();
441/// use asmjson::JsonRef;
442/// assert_eq!(tape.root().get("x").as_i64(), Some(1));
443/// }
444/// ```
445#[cfg(target_arch = "x86_64")]
446pub unsafe fn parse_to_dom_zmm<'a>(
447 src: &'a str,
448 initial_capacity: Option<usize>,
449) -> Option<Dom<'a>> {
450 // Result codes matching the assembly RESULT_* constants.
451 const RESULT_OK: u8 = 0;
452 const RESULT_PARSE_ERROR: u8 = 1;
453 const RESULT_TAPE_OVERFLOW: u8 = 2;
454
455 let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
456 let mut open_buf = [0u64; MAX_JSON_DEPTH];
457
458 // Start at the caller-supplied hint, or default to src.len()/4 entries.
459 // For well-formed JSON this default comfortably exceeds the tape length
460 // (each record is ~130 bytes and emits ~22 entries; 130/4 = 32.5 > 22),
461 // so no retry should be needed in practice.
462 let mut capacity = initial_capacity.unwrap_or_else(|| (src.len() / 4).max(2));
463
464 loop {
465 let mut tape_data: Vec<DomEntry<'a>> = Vec::with_capacity(capacity);
466 let tape_ptr = tape_data.as_mut_ptr() as *mut DomEntry<'static>;
467 let mut tape_len: usize = 0;
468 let mut has_escapes: bool = false;
469
470 // SAFETY:
471 // • `tape_data` has exactly `capacity` entries; the assembly checks
472 // bounds before every write and returns RESULT_TAPE_OVERFLOW if
473 // the capacity is exceeded.
474 // • `src` lives for at least `'a`; string pointers stored in tape
475 // entries point into `src`'s bytes and remain valid for `'a`.
476 // • EscapedString / EscapedKey entries own a `Box<str>` allocated by
477 // `dom_unescape_to_box_str`; `DomEntry::drop` frees them.
478 // • `parse_json_zmm_dom` does NOT call `finish`.
479 let result = unsafe {
480 parse_json_zmm_dom(
481 src.as_ptr(),
482 src.len(),
483 tape_ptr,
484 &raw mut tape_len,
485 frames_buf.as_mut_ptr() as *mut u8,
486 open_buf.as_mut_ptr(),
487 &raw mut has_escapes,
488 capacity,
489 )
490 };
491
492 match result {
493 RESULT_OK => {
494 // SAFETY: assembly wrote exactly `tape_len` initialised entries.
495 unsafe { tape_data.set_len(tape_len) };
496 return Some(Dom {
497 entries: tape_data,
498 has_escapes,
499 });
500 }
501 RESULT_PARSE_ERROR => return None,
502 RESULT_TAPE_OVERFLOW => {
503 // The tape was too small; double capacity and retry.
504 // First, set the vec length to `tape_len` so that any
505 // EscapedString / EscapedKey entries already written (which
506 // own a Box<str>) are properly dropped when tape_data goes
507 // out of scope at the end of this block.
508 unsafe { tape_data.set_len(tape_len) };
509 capacity = capacity.saturating_mul(2).max(capacity + 1);
510 continue;
511 }
512 _ => return None, // should not happen
513 }
514 }
515}
516
517// ---------------------------------------------------------------------------
518// CPUID-dispatching helpers
519// ---------------------------------------------------------------------------
520
521/// Safe trampoline for `parse_to_dom_zmm`; only called when CPUID has
522/// confirmed AVX-512BW support (see [`dom_parser`]).
523#[cfg(target_arch = "x86_64")]
524fn parse_to_dom_zmm_safe<'a>(src: &'a str, cap: Option<usize>) -> Option<Dom<'a>> {
525 // SAFETY: dom_parser() only returns this fn after CPUID confirms AVX-512BW.
526 unsafe { parse_to_dom_zmm(src, cap) }
527}
528
529/// Returns a CPUID-selected DOM parse function.
530///
531/// On CPUs with AVX-512BW the returned function uses the hand-written
532/// assembly parser; otherwise the portable SWAR parser is used. The CPUID
533/// check is performed once when `dom_parser()` is called.
534///
535/// The returned function has the signature
536/// `fn(&str, Option<usize>) -> Option<Dom<'_>>`.
537///
538/// ```rust
539/// use asmjson::{dom_parser, JsonRef};
540/// let parse = dom_parser();
541/// let tape = parse(r#"{"x":1}"#, None).unwrap();
542/// assert_eq!(tape.root().get("x").as_i64(), Some(1));
543/// ```
544pub fn dom_parser() -> for<'a> fn(&'a str, Option<usize>) -> Option<Dom<'a>> {
545 #[cfg(target_arch = "x86_64")]
546 if is_x86_feature_detected!("avx512bw") {
547 return parse_to_dom_zmm_safe;
548 }
549 parse_to_dom
550}
551
552/// Handle returned by [`sax_parser`]; call `.parse(src, writer)`.
553///
554/// Stores the result of a CPUID check performed at construction time so that
555/// repeated `.parse()` calls pay only one branch.
556#[derive(Copy, Clone)]
557pub struct SaxParser {
558 #[cfg(target_arch = "x86_64")]
559 zmm: bool,
560}
561
562impl SaxParser {
563 /// Parse `src` with `writer` using the best available CPU path.
564 pub fn parse<'a, W: Sax<'a>>(&self, src: &'a str, writer: W) -> Option<W::Output> {
565 #[cfg(target_arch = "x86_64")]
566 if self.zmm {
567 // SAFETY: constructed only when CPUID confirms AVX-512BW.
568 return unsafe { parse_with_zmm(src, writer) };
569 }
570 parse_with(src, writer)
571 }
572}
573
574/// Returns a CPUID-selected SAX parser handle.
575///
576/// The CPUID check is performed once; subsequent calls to
577/// [`SaxParser::parse`] dispatch to the best available path without
578/// repeating it.
579///
580/// ```rust
581/// use asmjson::sax::Sax;
582/// use asmjson::sax_parser;
583///
584/// struct Counter { n: usize }
585/// impl<'a> Sax<'a> for Counter {
586/// type Output = usize;
587/// fn null(&mut self) {}
588/// fn bool_val(&mut self, _: bool) {}
589/// fn number(&mut self, _: &str) {}
590/// fn string(&mut self, _: &str) { self.n += 1; }
591/// fn escaped_string(&mut self, _: &str) { self.n += 1; }
592/// fn key(&mut self, _: &str) {}
593/// fn escaped_key(&mut self, _: &str) {}
594/// fn start_object(&mut self) {}
595/// fn end_object(&mut self) {}
596/// fn start_array(&mut self) {}
597/// fn end_array(&mut self) {}
598/// fn finish(self) -> Option<usize> { Some(self.n) }
599/// }
600///
601/// let parser = sax_parser();
602/// let n = parser.parse(r#"["a","b"]"#, Counter { n: 0 }).unwrap();
603/// assert_eq!(n, 2);
604/// ```
605pub fn sax_parser() -> SaxParser {
606 SaxParser {
607 #[cfg(target_arch = "x86_64")]
608 zmm: is_x86_feature_detected!("avx512bw"),
609 }
610}
611
612/// Parse `src` using a custom [`JsonWriter`], returning its output.
613///
614/// This is the generic entry point: supply your own writer to produce any
615/// output in a single pass over the source. Uses the portable SWAR
616/// classifier; works on any architecture.
617///
618/// For maximum throughput on CPUs with AVX-512BW, use [`parse_with_zmm`] or
619/// the safe wrapper returned by [`sax_parser`].
620pub fn parse_with<'a, W: Sax<'a>>(src: &'a str, writer: W) -> Option<W::Output> {
621 let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
622 parse_json_impl(src, writer, &mut frames_buf)
623}
624
625/// Parse `src` using a custom [`JsonWriter`] and the hand-written x86-64
626/// AVX-512BW assembly parser with direct-threaded state dispatch.
627///
628/// Only available on `x86_64` targets. Returns `None` if the JSON is
629/// invalid or nesting exceeds [`MAX_JSON_DEPTH`] levels.
630///
631/// # Safety
632///
633/// The caller must ensure the CPU supports AVX-512BW. Invoking this on a CPU
634/// without AVX-512BW support will trigger an illegal instruction fault. Use
635/// [`parse_with`] for portable code.
636///
637#[cfg(target_arch = "x86_64")]
638pub unsafe fn parse_with_zmm<'a, W: Sax<'a>>(src: &'a str, mut writer: W) -> Option<W::Output> {
639 let vtab = build_zmm_vtab::<W>();
640 let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
641 // SAFETY (caller obligation): CPU supports AVX-512BW.
642 // SAFETY (internal): writer and src both live for 'a, outlasting this
643 // synchronous call. parse_json_zmm_sax does NOT call finish.
644 let ok = unsafe {
645 parse_json_zmm_sax(
646 src.as_ptr(),
647 src.len(),
648 &raw mut writer as *mut (),
649 &vtab,
650 frames_buf.as_mut_ptr() as *mut u8,
651 )
652 };
653 if ok { writer.finish() } else { None }
654}
655
656fn parse_json_impl<'a, W: Sax<'a>>(
657 src: &'a str,
658 mut writer: W,
659 frames_buf: &mut [FrameKind; MAX_JSON_DEPTH],
660) -> Option<W::Output> {
661 let bytes = src.as_bytes();
662 let mut frames_depth: usize = 0;
663 let mut str_start: usize = 0; // absolute byte offset of char after opening '"'
664 let mut str_escaped = false; // true if the current string contained any backslash
665 let mut bs_count: usize = 0; // consecutive backslashes immediately before current pos
666 let mut atom_start: usize = 0; // absolute byte offset of first atom byte
667 let mut current_key_raw: &'a str = ""; // raw key slice captured when KeyChars closes
668 let mut current_key_escaped = false; // true when the key contained backslash escapes
669 let mut after_comma = false; // true when ObjectStart/ArrayStart was reached via a `,`
670 let mut state = State::ValueWhitespace;
671
672 let mut pos = 0;
673 while pos < bytes.len() {
674 let chunk_len = (bytes.len() - pos).min(64);
675 let chunk = &bytes[pos..pos + chunk_len];
676 let byte_state = classify_u64(chunk);
677
678 let mut chunk_offset = 0;
679 'inner: while chunk_offset < chunk_len {
680 state = match state {
681 State::ValueWhitespace => {
682 let ahead = (!byte_state.whitespace) >> chunk_offset;
683 let skip = ahead.trailing_zeros() as usize;
684 chunk_offset += skip;
685 if chunk_offset >= chunk_len {
686 break 'inner;
687 }
688 let byte = chunk[chunk_offset];
689 match byte {
690 b'{' => {
691 if frames_depth >= MAX_JSON_DEPTH {
692 State::Error
693 } else {
694 frames_buf[frames_depth] = FrameKind::Object;
695 frames_depth += 1;
696 writer.start_object();
697 State::ObjectStart
698 }
699 }
700 b'[' => {
701 if frames_depth >= MAX_JSON_DEPTH {
702 State::Error
703 } else {
704 frames_buf[frames_depth] = FrameKind::Array;
705 frames_depth += 1;
706 writer.start_array();
707 State::ArrayStart
708 }
709 }
710 b'"' => {
711 str_start = pos + chunk_offset + 1;
712 str_escaped = false;
713 bs_count = 0;
714 State::StringChars
715 }
716 _ => {
717 atom_start = pos + chunk_offset;
718 State::AtomChars
719 }
720 }
721 }
722
723 State::StringChars => {
724 // Scan for either '\' or '"'; handle runs of backslashes here
725 // rather than via a separate state so that even/odd counting is
726 // correct for sequences like `\\"` (two backslashes + quote).
727 let interesting = (byte_state.backslashes | byte_state.quotes) >> chunk_offset;
728 let skip = interesting.trailing_zeros() as usize;
729 chunk_offset = (chunk_offset + skip).min(chunk_len);
730 if chunk_offset >= chunk_len {
731 break 'inner;
732 }
733 // Any ordinary chars between the last event and here break the run.
734 if skip > 0 {
735 bs_count = 0;
736 }
737 let byte = chunk[chunk_offset];
738 match byte {
739 b'\\' => {
740 // Count consecutive backslashes; parity decides whether
741 // the next quote (if any) is escaped.
742 bs_count += 1;
743 str_escaped = true;
744 State::StringChars
745 }
746 b'"' if bs_count & 1 == 1 => {
747 // Odd run of preceding backslashes: this quote is escaped.
748 bs_count = 0;
749 State::StringChars
750 }
751 _ => {
752 // Even run (0, 2, 4 …): string ends here.
753 bs_count = 0;
754 let raw = &src[str_start..pos + chunk_offset];
755 if str_escaped {
756 writer.escaped_string(raw);
757 } else {
758 writer.string(raw);
759 }
760 State::AfterValue
761 }
762 }
763 }
764
765 State::KeyChars => {
766 let interesting = (byte_state.backslashes | byte_state.quotes) >> chunk_offset;
767 let skip = interesting.trailing_zeros() as usize;
768 chunk_offset = (chunk_offset + skip).min(chunk_len);
769 if chunk_offset >= chunk_len {
770 break 'inner;
771 }
772 if skip > 0 {
773 bs_count = 0;
774 }
775 let byte = chunk[chunk_offset];
776 match byte {
777 b'\\' => {
778 bs_count += 1;
779 str_escaped = true;
780 State::KeyChars
781 }
782 b'"' if bs_count & 1 == 1 => {
783 // Odd run of preceding backslashes: this quote is escaped.
784 bs_count = 0;
785 State::KeyChars
786 }
787 _ => {
788 // Even run: key ends here.
789 bs_count = 0;
790 current_key_raw = &src[str_start..pos + chunk_offset];
791 current_key_escaped = str_escaped;
792 State::KeyEnd
793 }
794 }
795 }
796 State::KeyEnd => {
797 let ahead = (!byte_state.whitespace) >> chunk_offset;
798 let skip = ahead.trailing_zeros() as usize;
799 chunk_offset += skip;
800 if chunk_offset >= chunk_len {
801 break 'inner;
802 }
803 let byte = chunk[chunk_offset];
804 match byte {
805 b':' => {
806 if current_key_escaped {
807 writer.escaped_key(current_key_raw);
808 } else {
809 writer.key(current_key_raw);
810 }
811 State::AfterColon
812 }
813 _ => State::Error,
814 }
815 }
816 State::AfterColon => {
817 let ahead = (!byte_state.whitespace) >> chunk_offset;
818 let skip = ahead.trailing_zeros() as usize;
819 chunk_offset += skip;
820 if chunk_offset >= chunk_len {
821 break 'inner;
822 }
823 let byte = chunk[chunk_offset];
824 match byte {
825 b'{' => {
826 if frames_depth >= MAX_JSON_DEPTH {
827 State::Error
828 } else {
829 frames_buf[frames_depth] = FrameKind::Object;
830 frames_depth += 1;
831 writer.start_object();
832 State::ObjectStart
833 }
834 }
835 b'[' => {
836 if frames_depth >= MAX_JSON_DEPTH {
837 State::Error
838 } else {
839 frames_buf[frames_depth] = FrameKind::Array;
840 frames_depth += 1;
841 writer.start_array();
842 State::ArrayStart
843 }
844 }
845 b'"' => {
846 str_start = pos + chunk_offset + 1;
847 str_escaped = false;
848 bs_count = 0;
849 State::StringChars
850 }
851 _ => {
852 atom_start = pos + chunk_offset;
853 State::AtomChars
854 }
855 }
856 }
857
858 State::AtomChars => {
859 let ahead = byte_state.delimiters >> chunk_offset;
860 let skip = ahead.trailing_zeros() as usize;
861 chunk_offset += skip;
862 if chunk_offset >= chunk_len {
863 break 'inner;
864 }
865 let byte = chunk[chunk_offset];
866 if !write_atom(&src[atom_start..pos + chunk_offset], &mut writer) {
867 State::Error
868 } else {
869 match byte {
870 b'}' => {
871 if frames_depth == 0
872 || frames_buf[frames_depth - 1] != FrameKind::Object
873 {
874 State::Error
875 } else {
876 frames_depth -= 1;
877 writer.end_object();
878 State::AfterValue
879 }
880 }
881 b']' => {
882 if frames_depth == 0
883 || frames_buf[frames_depth - 1] != FrameKind::Array
884 {
885 State::Error
886 } else {
887 frames_depth -= 1;
888 writer.end_array();
889 State::AfterValue
890 }
891 }
892 b',' => {
893 if frames_depth == 0 {
894 State::Error
895 } else {
896 match frames_buf[frames_depth - 1] {
897 FrameKind::Array => {
898 after_comma = true;
899 State::ArrayStart
900 }
901 FrameKind::Object => {
902 after_comma = true;
903 State::ObjectStart
904 }
905 }
906 }
907 }
908 _ => State::AfterValue, // whitespace delimiter
909 }
910 }
911 }
912
913 State::Error => break 'inner,
914
915 State::ObjectStart => {
916 let ahead = (!byte_state.whitespace) >> chunk_offset;
917 let skip = ahead.trailing_zeros() as usize;
918 chunk_offset += skip;
919 if chunk_offset >= chunk_len {
920 break 'inner;
921 }
922 let byte = chunk[chunk_offset];
923 match byte {
924 b'"' => {
925 after_comma = false;
926 str_start = pos + chunk_offset + 1;
927 str_escaped = false;
928 bs_count = 0;
929 State::KeyChars
930 }
931 b'}' => {
932 if after_comma {
933 State::Error
934 } else if frames_depth > 0
935 && frames_buf[frames_depth - 1] == FrameKind::Object
936 {
937 frames_depth -= 1;
938 writer.end_object();
939 State::AfterValue
940 } else {
941 State::Error
942 }
943 }
944 _ => State::Error,
945 }
946 }
947
948 State::ArrayStart => {
949 let ahead = (!byte_state.whitespace) >> chunk_offset;
950 let skip = ahead.trailing_zeros() as usize;
951 chunk_offset += skip;
952 if chunk_offset >= chunk_len {
953 break 'inner;
954 }
955 let byte = chunk[chunk_offset];
956 match byte {
957 b']' => {
958 if after_comma {
959 State::Error
960 } else if frames_depth > 0
961 && frames_buf[frames_depth - 1] == FrameKind::Array
962 {
963 frames_depth -= 1;
964 writer.end_array();
965 State::AfterValue
966 } else {
967 State::Error
968 }
969 }
970 b'{' => {
971 after_comma = false;
972 if frames_depth >= MAX_JSON_DEPTH {
973 State::Error
974 } else {
975 frames_buf[frames_depth] = FrameKind::Object;
976 frames_depth += 1;
977 writer.start_object();
978 State::ObjectStart
979 }
980 }
981 b'[' => {
982 after_comma = false;
983 if frames_depth >= MAX_JSON_DEPTH {
984 State::Error
985 } else {
986 frames_buf[frames_depth] = FrameKind::Array;
987 frames_depth += 1;
988 writer.start_array();
989 State::ArrayStart
990 }
991 }
992 b'"' => {
993 after_comma = false;
994 str_start = pos + chunk_offset + 1;
995 str_escaped = false;
996 bs_count = 0;
997 State::StringChars
998 }
999 _ => {
1000 after_comma = false;
1001 atom_start = pos + chunk_offset;
1002 State::AtomChars
1003 }
1004 }
1005 }
1006
1007 State::AfterValue => {
1008 let ahead = (!byte_state.whitespace) >> chunk_offset;
1009 let skip = ahead.trailing_zeros() as usize;
1010 chunk_offset += skip;
1011 if chunk_offset >= chunk_len {
1012 break 'inner;
1013 }
1014 let byte = chunk[chunk_offset];
1015 match byte {
1016 b',' => {
1017 if frames_depth == 0 {
1018 State::Error
1019 } else {
1020 match frames_buf[frames_depth - 1] {
1021 FrameKind::Object => {
1022 after_comma = true;
1023 State::ObjectStart
1024 }
1025 FrameKind::Array => {
1026 after_comma = true;
1027 State::ArrayStart
1028 }
1029 }
1030 }
1031 }
1032 b'}' => {
1033 if frames_depth > 0 && frames_buf[frames_depth - 1] == FrameKind::Object
1034 {
1035 frames_depth -= 1;
1036 writer.end_object();
1037 State::AfterValue
1038 } else {
1039 State::Error
1040 }
1041 }
1042 b']' => {
1043 if frames_depth > 0 && frames_buf[frames_depth - 1] == FrameKind::Array
1044 {
1045 frames_depth -= 1;
1046 writer.end_array();
1047 State::AfterValue
1048 } else {
1049 State::Error
1050 }
1051 }
1052 _ => State::Error,
1053 }
1054 }
1055 };
1056 chunk_offset += 1;
1057 }
1058 pos += chunk_len;
1059 }
1060
1061 // Flush a trailing atom not followed by a delimiter (e.g. top-level `42`).
1062 if state == State::AtomChars {
1063 if !write_atom(&src[atom_start..], &mut writer) {
1064 return None;
1065 }
1066 } else if state != State::AfterValue {
1067 return None;
1068 }
1069
1070 if state == State::Error {
1071 return None;
1072 }
1073
1074 // Unclosed objects or arrays.
1075 if frames_depth != 0 {
1076 return None;
1077 }
1078
1079 writer.finish()
1080}
1081
1082/// Decode all JSON string escape sequences within `s` (the raw content between
1083/// the opening and closing quotes, with no surrounding quotes). Clears `out`
1084/// and writes the decoded text into it.
1085///
1086/// Supported escapes: `\"` `\\` `\/` `\b` `\f` `\n` `\r` `\t` `\uXXXX`
1087/// (including surrogate pairs). Unknown escapes are passed through verbatim.
1088#[doc(hidden)]
1089#[unsafe(no_mangle)]
1090#[inline(never)]
1091pub fn unescape_str(s: &str, out: &mut String) {
1092 out.clear();
1093 let bytes = s.as_bytes();
1094 let mut i = 0;
1095 while i < bytes.len() {
1096 if bytes[i] != b'\\' {
1097 // Copy one UTF-8 character verbatim.
1098 let ch = s[i..].chars().next().unwrap();
1099 out.push(ch);
1100 i += ch.len_utf8();
1101 continue;
1102 }
1103 // Skip the backslash.
1104 i += 1;
1105 if i >= bytes.len() {
1106 break;
1107 }
1108 match bytes[i] {
1109 b'"' => {
1110 out.push('"');
1111 i += 1;
1112 }
1113 b'\\' => {
1114 out.push('\\');
1115 i += 1;
1116 }
1117 b'/' => {
1118 out.push('/');
1119 i += 1;
1120 }
1121 b'b' => {
1122 out.push('\x08');
1123 i += 1;
1124 }
1125 b'f' => {
1126 out.push('\x0C');
1127 i += 1;
1128 }
1129 b'n' => {
1130 out.push('\n');
1131 i += 1;
1132 }
1133 b'r' => {
1134 out.push('\r');
1135 i += 1;
1136 }
1137 b't' => {
1138 out.push('\t');
1139 i += 1;
1140 }
1141 b'u' => {
1142 i += 1; // skip 'u'
1143 if i + 4 <= bytes.len() {
1144 if let Ok(hi) = u16::from_str_radix(&s[i..i + 4], 16) {
1145 i += 4;
1146 // Surrogate pair: high surrogate \uD800-\uDBFF + low \uDC00-\uDFFF.
1147 if (0xD800..0xDC00).contains(&hi)
1148 && i + 6 <= bytes.len()
1149 && bytes[i] == b'\\'
1150 && bytes[i + 1] == b'u'
1151 {
1152 if let Ok(lo) = u16::from_str_radix(&s[i + 2..i + 6], 16) {
1153 if (0xDC00..=0xDFFF).contains(&lo) {
1154 let cp = 0x1_0000u32
1155 + ((hi as u32 - 0xD800) << 10)
1156 + (lo as u32 - 0xDC00);
1157 if let Some(ch) = char::from_u32(cp) {
1158 out.push(ch);
1159 i += 6;
1160 continue;
1161 }
1162 }
1163 }
1164 }
1165 if let Some(ch) = char::from_u32(hi as u32) {
1166 out.push(ch);
1167 }
1168 }
1169 }
1170 // i was already advanced past uXXXX inside the block above.
1171 }
1172 b => {
1173 out.push('\\');
1174 out.push(b as char);
1175 i += 1;
1176 }
1177 }
1178 }
1179}
1180
1181/// Per-chunk classification masks produced by the classifier functions.
1182#[repr(C)]
1183#[derive(Debug, PartialEq)]
1184pub struct ByteState {
1185 whitespace: u64, // bit n set => byte n is whitespace (<= 0x20)
1186 quotes: u64, // bit n set => byte n is '"'
1187 backslashes: u64, // bit n set => byte n is '\\'
1188 delimiters: u64, // bit n set => byte n ends an atom (whitespace | ',' | '}' | ']')
1189}
1190
1191// ---------------------------------------------------------------------------
1192// U64 (portable SWAR) — 8 × u64 words, no SIMD
1193// ---------------------------------------------------------------------------
1194
1195/// Classify up to 64 bytes purely in software using SWAR
1196/// (SIMD Within A Register) bit-manipulation on eight `u64` words.
1197/// The Rust parse path always uses this classifier.
1198///
1199/// Three tricks are used:
1200///
1201/// * **Whitespace (`byte ≤ 0x20`)**: mask off the high bit with `v & 0x7f…`,
1202/// then add `0x5f` per byte. The sum overflows into bit 7 exactly when the
1203/// original byte is ≥ 0x21; OR-ing back the original high bit excludes
1204/// bytes ≥ 0x80 (not whitespace). Invert and mask to get the flag.
1205///
1206/// * **Byte equality**: XOR the word with a broadcast of the target byte
1207/// (`b * 0x0101_0101_0101_0101`), then test for a zero byte via
1208/// `(v − 0x0101…) & ∼v & 0x8080…`.
1209///
1210/// * **Movemask**: collect the MSB of each byte into the low 8 bits by
1211/// multiplying `(v & 0x8080…)` by `0x0002_0408_1020_4081` and taking the
1212/// top byte (shift right 56).
1213fn classify_u64(src: &[u8]) -> ByteState {
1214 assert!(!src.is_empty() && src.len() <= 64);
1215 let mut buf = [0u8; 64];
1216 buf[..src.len()].copy_from_slice(src);
1217
1218 #[inline(always)]
1219 fn has_zero_byte(v: u64) -> u64 {
1220 v.wrapping_sub(0x0101_0101_0101_0101_u64) & !v & 0x8080_8080_8080_8080_u64
1221 }
1222
1223 /// Produce a u64 with bit 7 of each byte set where that byte equals `b`.
1224 #[inline(always)]
1225 fn eq_byte(v: u64, b: u8) -> u64 {
1226 has_zero_byte(v ^ (b as u64 * 0x0101_0101_0101_0101_u64))
1227 }
1228
1229 /// Collect the MSB of each byte into the low 8 bits.
1230 #[inline(always)]
1231 fn movemask8(v: u64) -> u8 {
1232 ((v & 0x8080_8080_8080_8080_u64).wrapping_mul(0x0002_0408_1020_4081_u64) >> 56) as u8
1233 }
1234
1235 let mut ws = [0u8; 8];
1236 let mut q = [0u8; 8];
1237 let mut bs = [0u8; 8];
1238 let mut dl = [0u8; 8];
1239
1240 for i in 0..8 {
1241 let v = u64::from_le_bytes(buf[i * 8..][..8].try_into().unwrap());
1242
1243 // Whitespace: byte ≤ 0x20.
1244 // (v & 0x7f…) + 0x5f… overflows into bit 7 iff byte ≥ 0x21 (low-7 range);
1245 // OR-ing the original v excludes bytes ≥ 0x80.
1246 let masked = v & 0x7f7f_7f7f_7f7f_7f7f_u64;
1247 let sum = masked.wrapping_add(0x5f5f_5f5f_5f5f_5f5f_u64);
1248 let w = !(sum | v) & 0x8080_8080_8080_8080_u64;
1249
1250 let quotes = eq_byte(v, b'"');
1251 let backslashes = eq_byte(v, b'\\');
1252 let commas = eq_byte(v, b',');
1253 let cl_brace = eq_byte(v, b'}');
1254 let cl_bracket = eq_byte(v, b']');
1255 let delims = w | commas | cl_brace | cl_bracket;
1256
1257 ws[i] = movemask8(w);
1258 q[i] = movemask8(quotes);
1259 bs[i] = movemask8(backslashes);
1260 dl[i] = movemask8(delims);
1261 }
1262
1263 ByteState {
1264 whitespace: u64::from_le_bytes(ws),
1265 quotes: u64::from_le_bytes(q),
1266 backslashes: u64::from_le_bytes(bs),
1267 delimiters: u64::from_le_bytes(dl),
1268 }
1269}
1270
1271#[cfg(test)]
1272mod tests {
1273 use super::*;
1274
1275 // -----------------------------------------------------------------------
1276 // zmm_tape correctness: compare parse_to_dom_zmm against the Rust
1277 // reference parser across a range of JSON inputs.
1278 // -----------------------------------------------------------------------
1279
1280 #[cfg(target_arch = "x86_64")]
1281 fn zmm_dom_matches(src: &str) {
1282 let ref_tape =
1283 parse_to_dom(src, None).unwrap_or_else(|| panic!("reference rejected: {src:?}"));
1284 let asm_tape = unsafe { parse_to_dom_zmm(src, None) }
1285 .unwrap_or_else(|| panic!("zmm_tape rejected: {src:?}"));
1286 assert_eq!(
1287 ref_tape.entries, asm_tape.entries,
1288 "tape mismatch for {src:?}"
1289 );
1290 }
1291
1292 #[cfg(target_arch = "x86_64")]
1293 fn zmm_dom_rejects(src: &str) {
1294 assert!(
1295 unsafe { parse_to_dom_zmm(src, None) }.is_none(),
1296 "zmm_tape should reject {src:?}"
1297 );
1298 }
1299
1300 #[cfg(target_arch = "x86_64")]
1301 #[test]
1302 fn zmm_dom_atoms() {
1303 for src in &[
1304 "null",
1305 "true",
1306 "false",
1307 "0",
1308 "42",
1309 "-7",
1310 "3.14",
1311 "1e10",
1312 "-0.5e-3",
1313 // SWAR fast-path boundary cases: pure integers up to 8 bytes
1314 "1",
1315 "12",
1316 "123",
1317 "1234",
1318 "12345",
1319 "123456",
1320 "1234567",
1321 "12345678",
1322 // Integers just beyond 8 bytes (validator path)
1323 "123456789",
1324 ] {
1325 zmm_dom_matches(src);
1326 }
1327 }
1328
1329 #[cfg(target_arch = "x86_64")]
1330 #[test]
1331 fn zmm_dom_strings() {
1332 for src in &[
1333 r#""hello""#,
1334 r#""""#,
1335 r#""with \"escape\"""#,
1336 r#""newline\nand\ttab""#,
1337 r#""\u0041\u0042\u0043""#,
1338 r#""\u0000""#,
1339 r#""surrogate \uD83D\uDE00""#,
1340 ] {
1341 zmm_dom_matches(src);
1342 }
1343 }
1344
1345 #[cfg(target_arch = "x86_64")]
1346 #[test]
1347 fn zmm_dom_simple_object() {
1348 zmm_dom_matches(r#"{"x":1}"#);
1349 zmm_dom_matches(r#"{"a":1,"b":2,"c":3}"#);
1350 zmm_dom_matches(r#"{}"#);
1351 }
1352
1353 #[cfg(target_arch = "x86_64")]
1354 #[test]
1355 fn zmm_dom_simple_array() {
1356 zmm_dom_matches(r#"[1,2,3]"#);
1357 zmm_dom_matches(r#"[]"#);
1358 zmm_dom_matches(r#"[null,true,false,"x",42]"#);
1359 }
1360
1361 #[cfg(target_arch = "x86_64")]
1362 #[test]
1363 fn zmm_dom_nested() {
1364 zmm_dom_matches(r#"{"a":{"b":[1,true,null]}}"#);
1365 zmm_dom_matches(r#"[[1,[2,[3]]]]"#);
1366 zmm_dom_matches(r#"{"k":{"k":{"k":{}}}}"#);
1367 zmm_dom_matches(r#"[{"a":1},{"b":2}]"#);
1368 }
1369
1370 #[cfg(target_arch = "x86_64")]
1371 #[test]
1372 fn zmm_dom_escaped_keys() {
1373 zmm_dom_matches(r#"{"key\nname":1}"#);
1374 zmm_dom_matches(r#"{"key\u0041":true}"#);
1375 zmm_dom_matches(r#"{"a\"b":null}"#);
1376 }
1377
1378 #[cfg(target_arch = "x86_64")]
1379 #[test]
1380 fn zmm_dom_whitespace() {
1381 zmm_dom_matches(" { \"x\" : 1 } ");
1382 zmm_dom_matches("[ 1 , 2 , 3 ]");
1383 zmm_dom_matches("\t\r\nnull\t\r\n");
1384 }
1385
1386 #[cfg(target_arch = "x86_64")]
1387 #[test]
1388 fn zmm_dom_long_string() {
1389 // String that spans more than one 64-byte chunk.
1390 let long = format!(r#""{}""#, "a".repeat(200));
1391 zmm_dom_matches(&long);
1392 let long_esc = format!(r#""{}\n{}""#, "b".repeat(100), "c".repeat(100));
1393 zmm_dom_matches(&long_esc);
1394 }
1395
1396 #[cfg(target_arch = "x86_64")]
1397 #[test]
1398 fn zmm_dom_reject_invalid() {
1399 zmm_dom_rejects("");
1400 zmm_dom_rejects("{");
1401 zmm_dom_rejects("[");
1402 zmm_dom_rejects("}");
1403 zmm_dom_rejects(r#"{"a":}"#);
1404 zmm_dom_rejects(r#"{"a":1"#);
1405 // Leading zeros must be rejected (SWAR fast path must not bypass this).
1406 zmm_dom_rejects("01");
1407 zmm_dom_rejects("00");
1408 zmm_dom_rejects("007");
1409 zmm_dom_rejects("01234567"); // exactly 8 bytes, leading zero
1410 }
1411
1412 // -----------------------------------------------------------------------
1413 // parse_with_zmm SAX: compare against the Rust reference on escape inputs.
1414 // -----------------------------------------------------------------------
1415
1416 #[cfg(target_arch = "x86_64")]
1417 fn zmm_sax_matches(src: &str) {
1418 // Collect events from both parsers into a comparable string.
1419 #[derive(Default)]
1420 struct EventLog(String);
1421
1422 impl<'s> Sax<'s> for EventLog {
1423 type Output = String;
1424 fn null(&mut self) {
1425 self.0.push_str("null;");
1426 }
1427 fn bool_val(&mut self, v: bool) {
1428 self.0.push_str(if v { "true;" } else { "false;" });
1429 }
1430 fn number(&mut self, s: &str) {
1431 self.0.push_str(s);
1432 self.0.push(';');
1433 }
1434 fn string(&mut self, s: &str) {
1435 self.0.push_str("s:");
1436 self.0.push_str(s);
1437 self.0.push(';');
1438 }
1439 fn escaped_string(&mut self, s: &str) {
1440 self.0.push_str("es:");
1441 self.0.push_str(s);
1442 self.0.push(';');
1443 }
1444 fn key(&mut self, s: &str) {
1445 self.0.push_str("k:");
1446 self.0.push_str(s);
1447 self.0.push(';');
1448 }
1449 fn escaped_key(&mut self, s: &str) {
1450 self.0.push_str("ek:");
1451 self.0.push_str(s);
1452 self.0.push(';');
1453 }
1454 fn start_object(&mut self) {
1455 self.0.push('{');
1456 }
1457 fn end_object(&mut self) {
1458 self.0.push('}');
1459 }
1460 fn start_array(&mut self) {
1461 self.0.push('[');
1462 }
1463 fn end_array(&mut self) {
1464 self.0.push(']');
1465 }
1466 fn finish(self) -> Option<String> {
1467 Some(self.0)
1468 }
1469 }
1470
1471 let ref_log = parse_with(src, EventLog::default())
1472 .unwrap_or_else(|| panic!("reference rejected: {src:?}"));
1473 let asm_log = unsafe { parse_with_zmm(src, EventLog::default()) }
1474 .unwrap_or_else(|| panic!("parse_with_zmm rejected: {src:?}"));
1475 assert_eq!(ref_log, asm_log, "event log mismatch for {src:?}");
1476 }
1477
1478 #[cfg(target_arch = "x86_64")]
1479 #[test]
1480 fn zmm_sax_escaped_strings() {
1481 // Single-backslash escapes and \uXXXX — the assembly handles these correctly.
1482 zmm_sax_matches(r#"{"key":"\n\t\r\""}"#);
1483 zmm_sax_matches(r#"{"key\nname":"val\u0041"}"#);
1484 zmm_sax_matches(r#"["\u0041","\u0042\u0043"]"#);
1485 zmm_sax_matches(r#"{"a\"b":"c\"d"}"#);
1486 // String that spans more than one 64-byte chunk and contains an escape.
1487 let long = format!(r#"{{"{}\n":"{}\t"}}"#, "x".repeat(70), "y".repeat(70));
1488 zmm_sax_matches(&long);
1489 // Note: inputs with even runs of backslashes before a closing quote (e.g.
1490 // `\\"`) require the parity-counting fix in the assembly too; tested via
1491 // parse_with in rust_even_backslash_before_quote below.
1492 }
1493
1494 // Rust-path-only test for even backslash runs before a closing quote.
1495 // The assembly SAX path has not yet been updated to count backslash parity,
1496 // so this test drives parse_to_dom (SWAR) directly.
1497 #[test]
1498 fn rust_even_backslash_before_quote() {
1499 use crate::JsonRef;
1500 // `\\` = one literal backslash, then `"` terminates string → decoded = `\`
1501 let t = parse_to_dom(r#"{"k":"\\"}"#, None).expect("parse failed");
1502 assert_eq!(t.root().get("k").as_str(), Some("\\"));
1503 // `\\\\` = two literal backslashes → decoded = `\\`
1504 let t = parse_to_dom(r#"{"k":"\\\\"}"#, None).expect("parse failed");
1505 assert_eq!(t.root().get("k").as_str(), Some("\\\\"));
1506 // `\\` inside array
1507 let t = parse_to_dom(r#"["\\"]"#, None).expect("parse failed");
1508 assert_eq!(t.root().index_at(0).as_str(), Some("\\"));
1509 // Mixed content: `abc\\` followed by closing quote → decoded = `abc\`
1510 let t = parse_to_dom(r#"{"k":"abc\\"}"#, None).expect("parse failed");
1511 assert_eq!(t.root().get("k").as_str(), Some("abc\\"));
1512 // Three backslashes before `"`: `\\` escapes itself, `\"` escapes the quote.
1513 // So `\\\"` does NOT close the string; the outer `"` closes it.
1514 // Decoded value = `\"` (backslash + quote).
1515 let t = parse_to_dom("{\"k\":\"\\\\\\\"\"}", None).expect("parse failed");
1516 assert_eq!(t.root().get("k").as_str(), Some("\\\""));
1517 }
1518
1519 #[cfg(target_arch = "x86_64")]
1520 #[test]
1521 fn zmm_dom_overflow_retry() {
1522 // A 200-element array of objects produces ~800+ tape entries.
1523 // Initial capacity is src.len()/4 which is far smaller, so the
1524 // function must handle at least one TapeOverflow retry automatically.
1525 let big: String = {
1526 let mut s = String::from("[");
1527 for i in 0..200u32 {
1528 if i > 0 {
1529 s.push(',');
1530 }
1531 s.push_str(&format!(r#"{{"k":{i}}}"#));
1532 }
1533 s.push(']');
1534 s
1535 };
1536 // Use Some(4) to guarantee at least one overflow retry regardless of input size.
1537 let tape =
1538 unsafe { parse_to_dom_zmm(&big, Some(4)) }.expect("overflow retry should succeed");
1539 assert_eq!(tape.root().unwrap().array_iter().unwrap().count(), 200);
1540 }
1541}