Skip to main content

sonic_rs/
parser.rs

1use std::{
2    borrow::Cow,
3    collections::HashMap,
4    fmt::Debug,
5    num::NonZeroU8,
6    ops::Deref,
7    slice::{from_raw_parts, from_raw_parts_mut},
8    str::{from_utf8, from_utf8_unchecked},
9};
10
11use faststr::FastStr;
12use serde::de::{self, Expected, Unexpected};
13use sonic_number::{parse_number, ParserNumber};
14#[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
15use sonic_simd::bits::NeonBits;
16use sonic_simd::{i8x32, m8x32, u8x32, u8x64, Mask, Simd};
17
18use crate::{
19    config::DeserializeCfg,
20    error::{
21        Error,
22        ErrorCode::{self, *},
23        Result,
24    },
25    index::Index,
26    lazyvalue::value::HasEsc,
27    pointer::{
28        tree::{MultiIndex, MultiKey, PointerTreeInner, PointerTreeNode},
29        PointerTree,
30    },
31    reader::Reader,
32    serde::de::invalid_type_number,
33    util::{
34        arch::{get_nonspace_bits, prefix_xor},
35        string::*,
36        unicode::{codepoint_to_utf8, hex_to_u32_nocheck},
37    },
38    value::visitor::JsonVisitor,
39    JsonValueMutTrait, JsonValueTrait, LazyValue, Number, OwnedLazyValue,
40};
41
42// support borrow for owned deserizlie or skip
43pub enum Reference<'b, 'c, T>
44where
45    T: ?Sized + 'static,
46{
47    Borrowed(&'b T),
48    Copied(&'c T),
49}
50
51impl<'b, 'c> From<Reference<'b, 'c, str>> for Cow<'b, str> {
52    fn from(value: Reference<'b, 'c, str>) -> Self {
53        match value {
54            Reference::Borrowed(b) => Cow::Owned(b.to_string()),
55            Reference::Copied(c) => Cow::Owned(c.to_string()),
56        }
57    }
58}
59
60impl<'b, 'c, T: Debug + ?Sized + 'static> Debug for Reference<'b, 'c, T> {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        match self {
63            Self::Borrowed(c) => write!(f, "Borrowed({c:?})"),
64            Self::Copied(c) => write!(f, "Copied({c:?})"),
65        }
66    }
67}
68
69impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
70where
71    T: ?Sized + 'static,
72{
73    type Target = T;
74
75    fn deref(&self) -> &Self::Target {
76        match *self {
77            Reference::Borrowed(b) => b,
78            Reference::Copied(c) => c,
79        }
80    }
81}
82
83pub(crate) enum ParsedSlice<'b, 'c> {
84    Borrowed {
85        slice: &'b [u8],
86        buf: &'c mut Vec<u8>,
87    },
88    Copied(&'c mut Vec<u8>),
89}
90
91impl<'b, 'c> Deref for ParsedSlice<'b, 'c> {
92    type Target = [u8];
93
94    fn deref(&self) -> &Self::Target {
95        match self {
96            ParsedSlice::Borrowed { slice, buf: _ } => slice,
97            ParsedSlice::Copied(c) => c.as_slice(),
98        }
99    }
100}
101
102pub(crate) const DEFAULT_KEY_BUF_CAPACITY: usize = 128;
103pub(crate) fn as_str(data: &[u8]) -> &str {
104    debug_assert!(from_utf8(data).is_ok(), "invalid utf-8 in as_str");
105    unsafe { from_utf8_unchecked(data) }
106}
107
108macro_rules! impl_get_escaped_branchless {
109    ($name:ident, $ty:ty, $even_bits:expr) => {
110        #[inline(always)]
111        fn $name(prev_escaped: &mut $ty, backslash: $ty) -> $ty {
112            const EVEN_BITS: $ty = $even_bits;
113            let backslash = backslash & (!*prev_escaped);
114            let follows_escape = (backslash << 1) | *prev_escaped;
115            let odd_sequence_starts = backslash & !EVEN_BITS & !follows_escape;
116            let (sequences_starting_on_even_bits, overflow) =
117                odd_sequence_starts.overflowing_add(backslash);
118            *prev_escaped = overflow as $ty;
119            let invert_mask = sequences_starting_on_even_bits << 1;
120            (EVEN_BITS ^ invert_mask) & follows_escape
121        }
122    };
123}
124
125impl_get_escaped_branchless!(get_escaped_branchless_u32, u32, 0x5555_5555);
126impl_get_escaped_branchless!(get_escaped_branchless_u64, u64, 0x5555_5555_5555_5555);
127
128macro_rules! perr {
129    ($self:ident, $err:expr) => {{
130        Err($self.error($err))
131    }};
132}
133
134macro_rules! check_visit {
135    ($self:ident, $e:expr $(,)?) => {
136        if !($e) {
137            perr!($self, UnexpectedVisitType)
138        } else {
139            Ok(())
140        }
141    };
142}
143
144#[inline(always)]
145pub(crate) fn is_whitespace(ch: u8) -> bool {
146    // NOTE: the compiler not optimize as lookup, so we hard code here.
147    const SPACE_MASK: u64 = (1u64 << b' ') | (1u64 << b'\r') | (1u64 << b'\n') | (1u64 << b'\t');
148    1u64.checked_shl(ch as u32)
149        .is_some_and(|v| v & SPACE_MASK != 0)
150}
151
152#[inline(always)]
153fn get_string_bits(data: &[u8; 64], prev_instring: &mut u64, prev_escaped: &mut u64) -> u64 {
154    let v = unsafe { u8x64::from_slice_unaligned_unchecked(data) };
155
156    let bs_bits = (v.eq(&u8x64::splat(b'\\'))).bitmask();
157    let escaped: u64;
158    if bs_bits != 0 {
159        escaped = get_escaped_branchless_u64(prev_escaped, bs_bits);
160    } else {
161        escaped = *prev_escaped;
162        *prev_escaped = 0;
163    }
164    let quote_bits = (v.eq(&u8x64::splat(b'"'))).bitmask() & !escaped;
165    let in_string = unsafe { prefix_xor(quote_bits) ^ *prev_instring };
166    *prev_instring = (in_string as i64 >> 63) as u64;
167    in_string
168}
169
170#[inline(always)]
171fn skip_container_loop(
172    input: &[u8; 64],        /* a 64-bytes slice from json */
173    prev_instring: &mut u64, /* the bitmap of last string */
174    prev_escaped: &mut u64,
175    lbrace_num: &mut usize,
176    rbrace_num: &mut usize,
177    left: u8,
178    right: u8,
179) -> Option<NonZeroU8> {
180    // get the bitmao
181    let instring = get_string_bits(input, prev_instring, prev_escaped);
182    // #Safety
183    // the input is 64 bytes, so the v is always valid.
184    let v = unsafe { u8x64::from_slice_unaligned_unchecked(input) };
185    let last_lbrace_num = *lbrace_num;
186    let mut rbrace = (v.eq(&u8x64::splat(right))).bitmask() & !instring;
187    let lbrace = (v.eq(&u8x64::splat(left))).bitmask() & !instring;
188    while rbrace != 0 {
189        *rbrace_num += 1;
190        *lbrace_num = last_lbrace_num + (lbrace & (rbrace - 1)).count_ones() as usize;
191        let is_closed = lbrace_num < rbrace_num;
192        if is_closed {
193            debug_assert_eq!(*rbrace_num, *lbrace_num + 1);
194            let cnt = rbrace.trailing_zeros() + 1;
195            return unsafe { Some(NonZeroU8::new_unchecked(cnt as u8)) };
196        }
197        rbrace &= rbrace - 1;
198    }
199    *lbrace_num = last_lbrace_num + lbrace.count_ones() as usize;
200    None
201}
202
203pub(crate) struct Pair<'de> {
204    pub key: Cow<'de, str>,
205    pub val: &'de [u8],
206    pub status: ParseStatus,
207}
208
209pub struct Parser<R> {
210    pub read: R,
211    error_index: usize,   // mark the error position
212    nospace_bits: u64,    // SIMD marked nospace bitmap
213    nospace_start: isize, // the start position of nospace_bits
214    pub(crate) cfg: DeserializeCfg,
215}
216
217/// Records the parse status
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219pub enum ParseStatus {
220    None,
221    HasEscaped,
222}
223
224impl From<ParseStatus> for HasEsc {
225    fn from(value: ParseStatus) -> Self {
226        match value {
227            ParseStatus::None => HasEsc::None,
228            ParseStatus::HasEscaped => HasEsc::Yes,
229        }
230    }
231}
232
233impl<'de, R> Parser<R>
234where
235    R: Reader<'de>,
236{
237    pub fn new(read: R) -> Self {
238        Self {
239            read,
240            error_index: usize::MAX,
241            nospace_bits: 0,
242            nospace_start: -128,
243            cfg: DeserializeCfg::default(),
244        }
245    }
246
247    pub fn offset(&self) -> usize {
248        self.read.index()
249    }
250
251    /// Enable lossy UTF-8 handling: invalid surrogates produce U+FFFD replacement chars
252    /// instead of errors. Matches Go's encoding/json behavior.
253    pub fn utf8_lossy(mut self) -> Self {
254        self.cfg.utf8_lossy = true;
255        self
256    }
257
258    pub(crate) fn with_config(mut self, cfg: DeserializeCfg) -> Self {
259        self.cfg = cfg;
260        self
261    }
262
263    #[inline(always)]
264    fn error_index(&self) -> usize {
265        // when parsing strings , we need record the error position.
266        // it must be smaller than reader.index().
267        std::cmp::min(self.error_index, self.read.index().saturating_sub(1))
268    }
269
270    /// Error caused by a byte from next_char().
271    #[cold]
272    pub fn error(&self, mut reason: ErrorCode) -> Error {
273        // check invalid utf8 here at first
274        // FIXME: maybe has invalid utf8 when deserializing into byte, and just bytes has other
275        // errors?
276        if let Err(e) = self.read.check_utf8_final() {
277            return e;
278        }
279
280        // check errors, if exceed, the reason must be eof, and begin parsing the padding chars
281        let mut index = self.error_index();
282        let len = self.read.as_u8_slice().len();
283        if index > len {
284            reason = EofWhileParsing;
285            index = len;
286        }
287        Error::syntax(reason, self.read.origin_input(), index)
288    }
289
290    // maybe error in generated in visitor, so we need fix the position.
291    #[cold]
292    pub(crate) fn fix_position(&self, err: Error) -> Error {
293        if err.line() == 0 {
294            self.error(err.error_code())
295        } else {
296            err
297        }
298    }
299
300    #[inline(always)]
301    pub fn parse_number(&mut self, first: u8) -> Result<ParserNumber> {
302        let reader = &mut self.read;
303        let neg = first == b'-';
304        let mut now = reader.index() - (!neg as usize);
305        let data = reader.as_u8_slice();
306        let ret = parse_number(data, &mut now, neg);
307        reader.set_index(now);
308        ret.map_err(|err| self.error(err.into()))
309    }
310
311    /// Parse a JSON string and visit it.
312    /// When `strbuf` is Some, copies into the buffer (owned, calls visit_str).
313    /// When `strbuf` is None, parses inplace zero-copy (calls visit_borrowed_str).
314    #[inline(always)]
315    fn parse_string_visit<V>(&mut self, vis: &mut V, strbuf: Option<&mut Vec<u8>>) -> Result<()>
316    where
317        V: JsonVisitor<'de>,
318    {
319        if let Some(strbuf) = strbuf {
320            let rs = self.parse_str(strbuf)?;
321            check_visit!(self, vis.visit_str(rs.as_ref()))
322        } else {
323            unsafe {
324                let mut src = self.read.cur_ptr();
325                let start = self.read.cur_ptr();
326                let cnt = parse_string_inplace(&mut src, self.cfg.utf8_lossy)
327                    .map_err(|e| self.error(e))?;
328                self.read.set_ptr(src);
329                let slice = from_raw_parts(start, cnt);
330                let s = from_utf8_unchecked(slice);
331                check_visit!(self, vis.visit_borrowed_str(s))
332            }
333        }
334    }
335
336    /// Parse a number. When `inplace` is true, visits as borrowed raw number.
337    #[inline(always)]
338    fn parse_number_visit<V>(&mut self, first: u8, vis: &mut V, inplace: bool) -> Result<()>
339    where
340        V: JsonVisitor<'de>,
341    {
342        if self.cfg.use_rawnumber {
343            let start = self.read.index() - 1;
344            self.skip_number(first)?;
345            let slice = self.read.slice_unchecked(start, self.read.index());
346            let ok = if inplace {
347                vis.visit_borrowed_raw_number(as_str(slice))
348            } else {
349                vis.visit_raw_number(as_str(slice))
350            };
351            check_visit!(self, ok)
352        } else {
353            let ok = match self.parse_number(first)? {
354                ParserNumber::Float(f) => vis.visit_f64(f),
355                ParserNumber::Unsigned(f) => vis.visit_u64(f),
356                ParserNumber::Signed(f) => vis.visit_i64(f),
357            };
358            check_visit!(self, ok)
359        }
360    }
361
362    fn parse_array<V>(&mut self, vis: &mut V, mut strbuf: Option<&mut Vec<u8>>) -> Result<()>
363    where
364        V: JsonVisitor<'de>,
365    {
366        check_visit!(self, vis.visit_array_start(0))?;
367
368        let mut first = match self.skip_space() {
369            Some(b']') => return check_visit!(self, vis.visit_array_end(0)),
370            first => first,
371        };
372
373        let mut count = 0;
374        loop {
375            self.dispatch_value(first, vis, &mut strbuf)?;
376            count += 1;
377            first = match self.skip_space() {
378                Some(b']') => return check_visit!(self, vis.visit_array_end(count)),
379                Some(b',') => self.skip_space(),
380                _ => return perr!(self, ExpectedArrayCommaOrEnd),
381            };
382        }
383    }
384
385    fn parse_object<V>(&mut self, vis: &mut V, mut strbuf: Option<&mut Vec<u8>>) -> Result<()>
386    where
387        V: JsonVisitor<'de>,
388    {
389        let mut count: usize = 0;
390        check_visit!(self, vis.visit_object_start(0))?;
391        match self.skip_space() {
392            Some(b'}') => return check_visit!(self, vis.visit_object_end(0)),
393            Some(b'"') => {}
394            _ => return perr!(self, ExpectObjectKeyOrEnd),
395        }
396
397        loop {
398            self.parse_string_visit(vis, strbuf.as_deref_mut())?;
399            self.parse_object_clo()?;
400            let next = self.skip_space();
401            self.dispatch_value(next, vis, &mut strbuf)?;
402            count += 1;
403            match self.skip_space() {
404                Some(b'}') => return check_visit!(self, vis.visit_object_end(count)),
405                Some(b',') => match self.skip_space() {
406                    Some(b'"') => continue,
407                    _ => return perr!(self, ExpectObjectKeyOrEnd),
408                },
409                _ => return perr!(self, ExpectedArrayCommaOrEnd),
410            }
411        }
412    }
413
414    /// Dispatch value parsing based on the peeked byte.
415    /// When `strbuf` is None, strings are parsed inplace (zero-copy borrowed).
416    /// When `strbuf` is Some, strings are parsed into the buffer (owned copy).
417    #[inline(always)]
418    fn dispatch_value<V>(
419        &mut self,
420        ch: Option<u8>,
421        vis: &mut V,
422        strbuf: &mut Option<&mut Vec<u8>>,
423    ) -> Result<()>
424    where
425        V: JsonVisitor<'de>,
426    {
427        match ch {
428            Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_visit(c, vis, strbuf.is_none()),
429            Some(b'"') => self.parse_string_visit(vis, strbuf.as_deref_mut()),
430            Some(b'{') => self.parse_object(vis, strbuf.as_deref_mut()),
431            Some(b'[') => self.parse_array(vis, strbuf.as_deref_mut()),
432            Some(first) => self.parse_literal_visit(first, vis),
433            None => perr!(self, EofWhileParsing),
434        }
435    }
436
437    #[inline(always)]
438    fn parse_literal_visit<V>(&mut self, first: u8, vis: &mut V) -> Result<()>
439    where
440        V: JsonVisitor<'de>,
441    {
442        let literal = match first {
443            b't' => "rue",
444            b'f' => "alse",
445            b'n' => "ull",
446            _ => return perr!(self, InvalidJsonValue),
447        };
448
449        let reader = &mut self.read;
450        if let Some(chunk) = reader.next_n(literal.len()) {
451            if chunk != literal.as_bytes() {
452                return perr!(self, InvalidLiteral);
453            }
454
455            let ok = match first {
456                b't' => vis.visit_bool(true),
457                b'f' => vis.visit_bool(false),
458                b'n' => vis.visit_null(),
459                _ => unreachable!(),
460            };
461            check_visit!(self, ok)
462        } else {
463            perr!(self, EofWhileParsing)
464        }
465    }
466
467    #[inline]
468    pub(crate) fn parse_array_elem_lazy(
469        &mut self,
470        first: &mut bool,
471        check: bool,
472    ) -> Result<Option<(&'de [u8], ParseStatus)>> {
473        if *first && self.skip_space() != Some(b'[') {
474            return perr!(self, ExpectedArrayStart);
475        }
476        match self.skip_space_peek() {
477            Some(b']') => {
478                self.read.eat(1);
479                return Ok(None);
480            }
481            Some(b',') if !(*first) => {
482                self.read.eat(1);
483            }
484            Some(_) if *first => {
485                *first = false;
486            }
487            _ => return perr!(self, ExpectedArrayCommaOrEnd),
488        };
489        let (raw, status) = self.skip_one(check)?;
490        Ok(Some((raw, status)))
491    }
492
493    #[inline]
494    pub(crate) fn parse_entry_lazy(
495        &mut self,
496        strbuf: &mut Vec<u8>,
497        first: &mut bool,
498        check: bool,
499    ) -> Result<Option<Pair<'de>>> {
500        if *first && self.skip_space() != Some(b'{') {
501            return perr!(self, ExpectedObjectStart);
502        }
503        match self.skip_space() {
504            Some(b'}') => return Ok(None),
505            Some(b'"') if *first => *first = false,
506            Some(b',') if !*first => {
507                if self.skip_space() != Some(b'"') {
508                    return perr!(self, ExpectObjectKeyOrEnd);
509                }
510            }
511            _ => return perr!(self, ExpectedObjectCommaOrEnd),
512        }
513
514        let parsed = self.parse_str(strbuf)?;
515        self.parse_object_clo()?;
516        let (raw, status) = self.skip_one(check)?;
517
518        Ok(Some(Pair {
519            key: parsed.into(),
520            val: raw,
521            status,
522        }))
523    }
524
525    #[inline(always)]
526    pub(crate) fn match_literal(&mut self, literal: &'static str) -> Result<bool> {
527        if let Some(chunk) = self.read.next_n(literal.len()) {
528            if chunk != literal.as_bytes() {
529                perr!(self, InvalidLiteral)
530            } else {
531                Ok(true)
532            }
533        } else {
534            perr!(self, EofWhileParsing)
535        }
536    }
537
538    #[inline(always)]
539    pub(crate) fn get_owned_lazyvalue(&mut self, strict: bool) -> Result<OwnedLazyValue> {
540        let c = self.skip_space();
541        let start = match c {
542            Some(b'"') => {
543                let start = self.read.index() - 1;
544                match self.skip_string()? {
545                    ParseStatus::None => {
546                        let slice = self.read.slice_unchecked(start, self.read.index());
547                        let raw = self.read.slice_ref(slice).as_faststr();
548                        return Ok(OwnedLazyValue::from_non_esc_str(raw));
549                    }
550                    ParseStatus::HasEscaped => {}
551                }
552                start
553            }
554            Some(b't') if self.match_literal("rue")? => return Ok(true.into()),
555            Some(b'f') if self.match_literal("alse")? => return Ok(false.into()),
556            Some(b'n') if self.match_literal("ull")? => return Ok(().into()),
557            None => return perr!(self, EofWhileParsing),
558            _ => {
559                let start = self.read.index() - 1;
560                self.read.backward(1);
561                self.skip_one(strict)?;
562                start
563            }
564        };
565        let end = self.read.index();
566        let sub = self.read.slice_unchecked(start, end);
567        let raw = self.read.slice_ref(sub).as_faststr();
568        Ok(OwnedLazyValue::new(raw.into(), HasEsc::Possible))
569    }
570
571    #[inline(always)]
572    fn parse_faststr(&mut self, strbuf: &mut Vec<u8>) -> Result<FastStr> {
573        match self.parse_str(strbuf)? {
574            Reference::Borrowed(s) => {
575                return Ok(self.read.slice_ref(s.as_bytes()).as_faststr());
576            }
577            Reference::Copied(s) => Ok(FastStr::new(s)),
578        }
579    }
580
581    #[inline(always)]
582    pub(crate) fn load_owned_lazyvalue(&mut self, strbuf: &mut Vec<u8>) -> Result<OwnedLazyValue> {
583        match self.skip_space() {
584            Some(c @ b'-' | c @ b'0'..=b'9') => {
585                let num: Number = self.parse_number(c)?.into();
586                Ok(OwnedLazyValue::from(num))
587            }
588            Some(b'"') => match self.parse_str(strbuf)? {
589                Reference::Borrowed(s) => {
590                    let raw = self.read.slice_ref(s.as_bytes()).as_faststr();
591                    Ok(OwnedLazyValue::from_faststr(raw))
592                }
593                Reference::Copied(s) => {
594                    let raw = FastStr::new(s);
595                    Ok(OwnedLazyValue::from_faststr(raw))
596                }
597            },
598            Some(b'{') => {
599                // parsing empty object
600                match self.skip_space() {
601                    Some(b'}') => return Ok(Vec::<(FastStr, OwnedLazyValue)>::new().into()),
602                    Some(b'"') => {}
603                    _ => return perr!(self, ExpectObjectKeyOrEnd),
604                }
605
606                // loop for each object key and value
607                let mut vec = Vec::with_capacity(32);
608                loop {
609                    let key = self.parse_faststr(strbuf)?;
610                    self.parse_object_clo()?;
611                    let olv = self.get_owned_lazyvalue(false)?;
612                    vec.push((key, olv));
613                    match self.skip_space() {
614                        Some(b'}') => return Ok(vec.into()),
615                        Some(b',') => match self.skip_space() {
616                            Some(b'"') => continue,
617                            _ => return perr!(self, ExpectObjectKeyOrEnd),
618                        },
619                        _ => return perr!(self, ExpectedArrayCommaOrEnd),
620                    }
621                }
622            }
623            Some(b'[') => {
624                if let Some(b']') = self.skip_space() {
625                    return Ok(Vec::<OwnedLazyValue>::new().into());
626                }
627
628                let mut vec = Vec::with_capacity(32);
629                self.read.backward(1);
630                loop {
631                    vec.push(self.get_owned_lazyvalue(false)?);
632                    match self.skip_space() {
633                        Some(b']') => return Ok(vec.into()),
634                        Some(b',') => {}
635                        _ => return perr!(self, ExpectedArrayCommaOrEnd),
636                    };
637                }
638            }
639            _ => perr!(self, InvalidJsonValue),
640        }
641    }
642
643    #[inline(always)]
644    pub(crate) fn parse_dom<V>(
645        &mut self,
646        vis: &mut V,
647        mut strbuf: Option<&mut Vec<u8>>,
648    ) -> Result<()>
649    where
650        V: JsonVisitor<'de>,
651    {
652        check_visit!(self, vis.visit_dom_start())?;
653        let ch = self.skip_space();
654        self.dispatch_value(ch, vis, &mut strbuf)?;
655        check_visit!(self, vis.visit_dom_end())
656    }
657
658    #[inline(always)]
659    pub fn parse_str<'own>(&mut self, buf: &'own mut Vec<u8>) -> Result<Reference<'de, 'own, str>> {
660        match self.parse_string_raw(buf) {
661            Ok(ParsedSlice::Copied(buf)) => {
662                if self.check_invalid_utf8(self.cfg.utf8_lossy)? {
663                    // repr the invalid utf-8
664                    let repr = String::from_utf8_lossy(buf.as_ref()).into_owned();
665                    *buf = repr.into_bytes();
666                }
667                let slice = unsafe { from_utf8_unchecked(buf.as_slice()) };
668                Ok(Reference::Copied(slice))
669            }
670            Ok(ParsedSlice::Borrowed { slice, buf }) => {
671                if self.check_invalid_utf8(self.cfg.utf8_lossy)? {
672                    // repr the invalid utf-8
673                    let repr = String::from_utf8_lossy(slice).into_owned();
674                    *buf = repr.into_bytes();
675                    let slice = unsafe { from_utf8_unchecked(buf) };
676                    Ok(Reference::Copied(slice))
677                } else {
678                    Ok(Reference::Borrowed(unsafe { from_utf8_unchecked(slice) }))
679                }
680            }
681            Err(e) => Err(e),
682        }
683    }
684
685    pub(crate) fn check_invalid_utf8(&mut self, allowed: bool) -> Result<bool> {
686        // the invalid UTF-8 before the string, must have been checked before.
687        let invalid = self.read.next_invalid_utf8();
688        if invalid >= self.read.index() {
689            return Ok(false);
690        }
691
692        if !allowed {
693            Err(Error::syntax(
694                ErrorCode::InvalidUTF8,
695                self.read.origin_input(),
696                invalid,
697            ))
698        } else {
699            // this space is allowed, should update the next invalid utf8 position
700            self.read.check_invalid_utf8();
701            Ok(true)
702        }
703    }
704
705    pub(crate) fn parse_escaped_utf8(&mut self) -> Result<u32> {
706        let point1 = if let Some(asc) = self.read.next_n(4) {
707            unsafe { hex_to_u32_nocheck(&*(asc.as_ptr() as *const _ as *const [u8; 4])) }
708        } else {
709            return perr!(self, EofWhileParsing);
710        };
711
712        // only check surrogate here, and we will check the code pointer later when use
713        // `codepoint_to_utf8`
714        if (0xD800..0xDC00).contains(&point1) {
715            // parse the second utf8 code point of surrogate
716            let point2 = if let Some(asc) = self.read.next_n(6) {
717                if asc[0] != b'\\' || asc[1] != b'u' {
718                    if self.cfg.utf8_lossy {
719                        // Backtrack so the non-\uXXXX bytes can be re-parsed
720                        let idx = self.read.index();
721                        self.read.set_index(idx - 6);
722                        return Ok(0xFFFD);
723                    } else {
724                        return perr!(self, InvalidSurrogateUnicodeCodePoint);
725                    }
726                }
727                unsafe { hex_to_u32_nocheck(&*(asc.as_ptr().add(2) as *const _ as *const [u8; 4])) }
728            } else if self.cfg.utf8_lossy {
729                return Ok(0xFFFD);
730            } else {
731                // invalid surrogate
732                return perr!(self, InvalidSurrogateUnicodeCodePoint);
733            };
734
735            /* calcute the real code point */
736            let low_bit = point2.wrapping_sub(0xdc00);
737            if (low_bit >> 10) != 0 {
738                if self.cfg.utf8_lossy {
739                    // point2 is not a valid low surrogate. Backtrack 6 bytes
740                    // so it can be re-parsed (e.g. \uDA51\uD83D\uDE04 → FFFD + 😄).
741                    let idx = self.read.index();
742                    self.read.set_index(idx - 6);
743                    return Ok(0xFFFD);
744                } else {
745                    return perr!(self, InvalidSurrogateUnicodeCodePoint);
746                }
747            }
748
749            Ok((((point1 - 0xd800) << 10) | low_bit).wrapping_add(0x10000))
750        } else if (0xDC00..0xE000).contains(&point1) {
751            if self.cfg.utf8_lossy {
752                Ok(0xFFFD)
753            } else {
754                // invalid surrogate
755                perr!(self, InvalidSurrogateUnicodeCodePoint)
756            }
757        } else {
758            Ok(point1)
759        }
760    }
761
762    pub(crate) unsafe fn parse_escaped_char(&mut self, buf: &mut Vec<u8>) -> Result<()> {
763        'escape: loop {
764            match self.read.next() {
765                Some(b'u') => {
766                    let code = self.parse_escaped_utf8()?;
767                    buf.reserve(4);
768                    let ptr = buf.as_mut_ptr().add(buf.len());
769                    let cnt = codepoint_to_utf8(code, ptr);
770                    if cnt == 0 {
771                        return perr!(self, InvalidUnicodeCodePoint);
772                    }
773                    buf.set_len(buf.len() + cnt);
774                }
775                Some(c) if ESCAPED_TAB[c as usize] != 0 => {
776                    buf.push(ESCAPED_TAB[c as usize]);
777                }
778                None => return perr!(self, EofWhileParsing),
779                _ => return perr!(self, InvalidEscape),
780            }
781
782            // fast path for continuous escaped chars
783            if self.read.peek() == Some(b'\\') {
784                self.read.eat(1);
785                continue 'escape;
786            }
787            break 'escape;
788        }
789        Ok(())
790    }
791
792    pub(crate) unsafe fn parse_string_escaped<'own>(
793        &mut self,
794        buf: &'own mut Vec<u8>,
795    ) -> Result<ParsedSlice<'de, 'own>> {
796        #[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
797        let mut block: StringBlock<NeonBits>;
798        #[cfg(not(all(target_feature = "neon", target_arch = "aarch64")))]
799        let mut block: StringBlock<u32>;
800
801        self.parse_escaped_char(buf)?;
802
803        while let Some(chunk) = self.read.peek_n(StringBlock::LANES) {
804            buf.reserve(StringBlock::LANES);
805            let v = unsafe { load(chunk.as_ptr()) };
806            block = StringBlock::new(&v);
807
808            if block.has_unescaped() {
809                self.read.eat(block.unescaped_index());
810                return perr!(self, ControlCharacterWhileParsingString);
811            }
812
813            // write the chunk to buf, we will set new_len later
814            let chunk = from_raw_parts_mut(buf.as_mut_ptr().add(buf.len()), StringBlock::LANES);
815            v.write_to_slice_unaligned_unchecked(chunk);
816
817            if block.has_quote_first() {
818                let cnt = block.quote_index();
819                buf.set_len(buf.len() + cnt);
820
821                // skip the right quote
822                self.read.eat(cnt + 1);
823                return Ok(ParsedSlice::Copied(buf));
824            }
825
826            if block.has_backslash() {
827                // TODO: loop unrooling here
828                let cnt = block.bs_index();
829                // skip the backslash
830                self.read.eat(cnt + 1);
831                buf.set_len(buf.len() + cnt);
832                self.parse_escaped_char(buf)?;
833            } else {
834                buf.set_len(buf.len() + StringBlock::LANES);
835                self.read.eat(StringBlock::LANES);
836            }
837        }
838
839        // scalar codes
840        while let Some(c) = self.read.peek() {
841            match c {
842                b'"' => {
843                    self.read.eat(1);
844                    return Ok(ParsedSlice::Copied(buf));
845                }
846                b'\\' => {
847                    // skip the backslash
848                    self.read.eat(1);
849                    self.parse_escaped_char(buf)?;
850                }
851                b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
852                _ => {
853                    buf.push(c);
854                    self.read.eat(1);
855                }
856            }
857        }
858
859        perr!(self, EofWhileParsing)
860    }
861
862    #[inline(always)]
863    // parse_string_raw maybe borrowed, maybe copied into buf(buf will be clear at first).
864    pub(crate) fn parse_string_raw<'own>(
865        &mut self,
866        buf: &'own mut Vec<u8>,
867    ) -> Result<ParsedSlice<'de, 'own>> {
868        // now reader is start after `"`, so we can directly skipstring
869        let start = self.read.index();
870        #[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
871        let mut block: StringBlock<NeonBits>;
872        #[cfg(not(all(target_feature = "neon", target_arch = "aarch64")))]
873        let mut block: StringBlock<u32>;
874
875        while let Some(chunk) = self.read.peek_n(StringBlock::LANES) {
876            let v = unsafe { load(chunk.as_ptr()) };
877            block = StringBlock::new(&v);
878
879            if block.has_quote_first() {
880                let cnt = block.quote_index();
881                self.read.eat(cnt + 1);
882                let slice = self.read.slice_unchecked(start, self.read.index() - 1);
883                return Ok(ParsedSlice::Borrowed { slice, buf });
884            }
885
886            if block.has_unescaped() {
887                self.read.eat(block.unescaped_index());
888                return perr!(self, ControlCharacterWhileParsingString);
889            }
890
891            if block.has_backslash() {
892                let cnt = block.bs_index();
893                // skip the backslash
894                self.read.eat(cnt + 1);
895
896                // copy unescaped parts to buf
897                buf.clear();
898                buf.extend_from_slice(&self.read.as_u8_slice()[start..self.read.index() - 1]);
899
900                return unsafe { self.parse_string_escaped(buf) };
901            }
902
903            self.read.eat(StringBlock::LANES);
904            continue;
905        }
906
907        // found quote for remaining bytes
908        while let Some(c) = self.read.peek() {
909            match c {
910                b'"' => {
911                    self.read.eat(1);
912                    let slice = self.read.slice_unchecked(start, self.read.index() - 1);
913                    return Ok(ParsedSlice::Borrowed { slice, buf });
914                }
915                b'\\' => {
916                    buf.clear();
917                    buf.extend_from_slice(self.read.slice_unchecked(start, self.read.index()));
918                    self.read.eat(1);
919                    return unsafe { self.parse_string_escaped(buf) };
920                }
921                b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
922                _ => self.read.eat(1),
923            }
924        }
925        perr!(self, EofWhileParsing)
926    }
927
928    #[inline(always)]
929    fn get_next_token<const N: usize>(&mut self, tokens: [u8; N], advance: usize) -> Option<u8> {
930        let r = &mut self.read;
931        const LANS: usize = u8x32::LANES;
932        while let Some(chunk) = r.peek_n(LANS) {
933            let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
934            let mut vor = m8x32::splat(false);
935            for t in tokens.iter().take(N) {
936                vor |= v.eq(&u8x32::splat(*t));
937            }
938            let next = vor.bitmask();
939            if next != 0 {
940                let cnt = next.trailing_zeros() as usize;
941                let ch = chunk[cnt];
942                r.eat(cnt + advance);
943                return Some(ch);
944            }
945            r.eat(LANS);
946        }
947
948        while let Some(ch) = r.peek() {
949            for t in tokens.iter().take(N) {
950                if ch == *t {
951                    r.eat(advance);
952                    return Some(ch);
953                }
954            }
955            r.eat(1)
956        }
957        None
958    }
959
960    // skip_string skips a JSON string, and return the later parts after closed quote, and the
961    // escaped status. skip_string always start with the quote marks.
962    #[inline(always)]
963    unsafe fn skip_string_unchecked(&mut self) -> Result<ParseStatus> {
964        const LANS: usize = u8x32::LANES;
965        let r = &mut self.read;
966        let mut quote_bits;
967        let mut escaped;
968        let mut prev_escaped = 0;
969        let mut status = ParseStatus::None;
970
971        while let Some(chunk) = r.peek_n(LANS) {
972            let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
973            let bs_bits = (v.eq(&u8x32::splat(b'\\'))).bitmask();
974            quote_bits = (v.eq(&u8x32::splat(b'"'))).bitmask();
975            // maybe has escaped quotes
976            if ((quote_bits.wrapping_sub(1)) & bs_bits) != 0 || prev_escaped != 0 {
977                escaped = get_escaped_branchless_u32(&mut prev_escaped, bs_bits);
978                status = ParseStatus::HasEscaped;
979                quote_bits &= !escaped;
980            }
981            // real quote bits
982            if quote_bits != 0 {
983                // eat the ending quote mark
984                r.eat(quote_bits.trailing_zeros() as usize + 1);
985                return Ok(status);
986            }
987            r.eat(LANS)
988        }
989
990        // skip the possible prev escaped quote
991        if prev_escaped != 0 {
992            r.eat(1)
993        }
994
995        // found quote for remaining bytes
996        while let Some(ch) = r.peek() {
997            if ch == b'\\' {
998                if r.remain() < 2 {
999                    break;
1000                }
1001                status = ParseStatus::HasEscaped;
1002                r.eat(2);
1003                continue;
1004            }
1005            r.eat(1);
1006            if ch == b'"' {
1007                return Ok(status);
1008            }
1009        }
1010        perr!(self, EofWhileParsing)
1011    }
1012
1013    fn skip_escaped_chars(&mut self) -> Result<()> {
1014        match self.read.peek() {
1015            Some(b'u') => {
1016                if self.read.remain() < 6 {
1017                    return perr!(self, EofWhileParsing);
1018                } else {
1019                    self.read.eat(5);
1020                }
1021            }
1022            Some(c) => {
1023                if self.read.next().is_none() {
1024                    return perr!(self, EofWhileParsing);
1025                }
1026                if ESCAPED_TAB[c as usize] == 0 {
1027                    return perr!(self, InvalidEscape);
1028                }
1029            }
1030            None => return perr!(self, EofWhileParsing),
1031        }
1032        Ok(())
1033    }
1034
1035    // skip_string skips a JSON string with validation.
1036    #[inline(always)]
1037    fn skip_string(&mut self) -> Result<ParseStatus> {
1038        const LANS: usize = u8x32::LANES;
1039
1040        let mut status = ParseStatus::None;
1041        while let Some(chunk) = self.read.peek_n(LANS) {
1042            let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
1043            let v_bs = v.eq(&u8x32::splat(b'\\'));
1044            let v_quote = v.eq(&u8x32::splat(b'"'));
1045            let v_cc = v.le(&u8x32::splat(0x1f));
1046            let mask = (v_bs | v_quote | v_cc).bitmask();
1047
1048            // check the mask
1049            if mask != 0 {
1050                let cnt = mask.trailing_zeros() as usize;
1051                self.read.eat(cnt + 1);
1052
1053                match chunk[cnt] {
1054                    b'\\' => {
1055                        self.skip_escaped_chars()?;
1056                        status = ParseStatus::HasEscaped;
1057                    }
1058                    b'\"' => return Ok(status),
1059                    0..=0x1f => return perr!(self, ControlCharacterWhileParsingString),
1060                    _ => unreachable!(),
1061                }
1062            } else {
1063                self.read.eat(LANS)
1064            }
1065        }
1066
1067        // found quote for remaining bytes
1068        while let Some(ch) = self.read.next() {
1069            match ch {
1070                b'\\' => {
1071                    self.skip_escaped_chars()?;
1072                    status = ParseStatus::HasEscaped;
1073                }
1074                b'"' => return Ok(status),
1075                0..=0x1f => return perr!(self, ControlCharacterWhileParsingString),
1076                _ => {}
1077            }
1078        }
1079        perr!(self, EofWhileParsing)
1080    }
1081
1082    // parse the Colon :
1083    #[inline(always)]
1084    pub(crate) fn parse_object_clo(&mut self) -> Result<()> {
1085        if let Some(ch) = self.read.peek() {
1086            // fast path for compact json
1087            if ch == b':' {
1088                self.read.eat(1);
1089                return Ok(());
1090            }
1091
1092            match self.skip_space() {
1093                Some(b':') => Ok(()),
1094                Some(_) => perr!(self, ExpectedColon),
1095                None => perr!(self, EofWhileParsing),
1096            }
1097        } else {
1098            perr!(self, EofWhileParsing)
1099        }
1100    }
1101
1102    // parse the Colon :
1103    #[inline(always)]
1104    pub(crate) fn parse_array_end(&mut self) -> Result<()> {
1105        match self.skip_space() {
1106            Some(b']') => Ok(()),
1107            Some(_) => perr!(self, ExpectedArrayCommaOrEnd),
1108            None => perr!(self, EofWhileParsing),
1109        }
1110    }
1111
1112    #[inline(always)]
1113    fn skip_object(&mut self) -> Result<()> {
1114        match self.skip_space() {
1115            Some(b'}') => return Ok(()),
1116            Some(b'"') => {}
1117            None => return perr!(self, EofWhileParsing),
1118            Some(_) => return perr!(self, ExpectObjectKeyOrEnd),
1119        }
1120
1121        loop {
1122            self.skip_string()?;
1123            self.parse_object_clo()?;
1124            self.skip_one(true)?;
1125
1126            match self.skip_space() {
1127                Some(b'}') => return Ok(()),
1128                Some(b',') => match self.skip_space() {
1129                    Some(b'"') => continue,
1130                    _ => return perr!(self, ExpectObjectKeyOrEnd),
1131                },
1132                None => return perr!(self, EofWhileParsing),
1133                Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
1134            }
1135        }
1136    }
1137
1138    #[inline(always)]
1139    fn skip_array(&mut self) -> Result<()> {
1140        match self.skip_space_peek() {
1141            Some(b']') => {
1142                self.read.eat(1);
1143                return Ok(());
1144            }
1145            None => return perr!(self, EofWhileParsing),
1146            _ => {}
1147        }
1148
1149        loop {
1150            self.skip_one(true)?;
1151            match self.skip_space() {
1152                Some(b']') => return Ok(()),
1153                Some(b',') => continue,
1154                None => return perr!(self, EofWhileParsing),
1155                _ => return perr!(self, ExpectedArrayCommaOrEnd),
1156            }
1157        }
1158    }
1159
1160    /// skip_container skip a object or array, and retu
1161    #[inline(always)]
1162    fn skip_container(&mut self, left: u8, right: u8) -> Result<()> {
1163        let mut prev_instring = 0;
1164        let mut prev_escaped = 0;
1165        let mut rbrace_num = 0;
1166        let mut lbrace_num = 0;
1167        let reader = &mut self.read;
1168
1169        while let Some(chunk) = reader.peek_n(64) {
1170            let input = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
1171            if let Some(count) = skip_container_loop(
1172                input,
1173                &mut prev_instring,
1174                &mut prev_escaped,
1175                &mut lbrace_num,
1176                &mut rbrace_num,
1177                left,
1178                right,
1179            ) {
1180                reader.eat(count.get() as usize);
1181                return Ok(());
1182            }
1183            reader.eat(64);
1184        }
1185
1186        let mut remain = [0u8; 64];
1187        {
1188            let n = reader.remain();
1189            debug_assert!(n <= 64);
1190            remain[..n].copy_from_slice(reader.peek_n(n).unwrap());
1191        }
1192        if let Some(count) = skip_container_loop(
1193            &remain,
1194            &mut prev_instring,
1195            &mut prev_escaped,
1196            &mut lbrace_num,
1197            &mut rbrace_num,
1198            left,
1199            right,
1200        ) {
1201            reader.eat(count.get() as usize);
1202            return Ok(());
1203        }
1204
1205        perr!(self, EofWhileParsing)
1206    }
1207
1208    #[inline(always)]
1209    pub fn skip_space(&mut self) -> Option<u8> {
1210        let reader = &mut self.read;
1211        // fast path 1: for nospace or single space
1212        // most JSON is like ` "name": "balabala" `
1213        if let Some(ch) = reader.next() {
1214            if !is_whitespace(ch) {
1215                return Some(ch);
1216            }
1217        }
1218        if let Some(ch) = reader.next() {
1219            if !is_whitespace(ch) {
1220                return Some(ch);
1221            }
1222        }
1223
1224        // fast path 2: reuse the bitmap for short key or numbers
1225        let nospace_offset = (reader.index() as isize) - self.nospace_start;
1226        if nospace_offset < 64 {
1227            let bitmap = {
1228                let mask = !((1 << nospace_offset) - 1);
1229                self.nospace_bits & mask
1230            };
1231            if bitmap != 0 {
1232                let cnt = bitmap.trailing_zeros() as usize;
1233                let ch = reader.at(self.nospace_start as usize + cnt);
1234                reader.set_index(self.nospace_start as usize + cnt + 1);
1235
1236                return Some(ch);
1237            } else {
1238                // we can still fast skip the marked space in here.
1239                reader.set_index(self.nospace_start as usize + 64);
1240            }
1241        }
1242
1243        // then we use simd to accelerate skipping space
1244        while let Some(chunk) = reader.peek_n(64) {
1245            let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
1246            let bitmap = unsafe { get_nonspace_bits(chunk) };
1247            if bitmap != 0 {
1248                self.nospace_bits = bitmap;
1249                self.nospace_start = reader.index() as isize;
1250                let cnt = bitmap.trailing_zeros() as usize;
1251                let ch = chunk[cnt];
1252                reader.eat(cnt + 1);
1253
1254                return Some(ch);
1255            }
1256            reader.eat(64)
1257        }
1258
1259        while let Some(ch) = reader.next() {
1260            if !is_whitespace(ch) {
1261                //
1262                return Some(ch);
1263            }
1264        }
1265        None
1266    }
1267
1268    #[inline(always)]
1269    pub fn skip_space_peek(&mut self) -> Option<u8> {
1270        let ret = self.skip_space()?;
1271        self.read.backward(1);
1272        Some(ret)
1273    }
1274
1275    #[inline(always)]
1276    pub fn parse_literal(&mut self, literal: &str) -> Result<()> {
1277        let reader = &mut self.read;
1278        if let Some(chunk) = reader.next_n(literal.len()) {
1279            if chunk == literal.as_bytes() {
1280                Ok(())
1281            } else {
1282                perr!(self, InvalidLiteral)
1283            }
1284        } else {
1285            perr!(self, EofWhileParsing)
1286        }
1287    }
1288
1289    #[inline(always)]
1290    fn skip_number_unsafe(&mut self) -> Result<()> {
1291        let _ = self.get_next_token([b']', b'}', b','], 0);
1292        Ok(())
1293    }
1294
1295    #[inline(always)]
1296    fn skip_exponent(&mut self) -> Result<()> {
1297        if let Some(ch) = self.read.peek() {
1298            if ch == b'-' || ch == b'+' {
1299                self.read.eat(1);
1300            }
1301        }
1302        self.skip_single_digit()?;
1303        // skip the remaining digits
1304        while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1305            self.read.eat(1);
1306        }
1307        Ok(())
1308    }
1309
1310    #[inline(always)]
1311    fn skip_single_digit(&mut self) -> Result<u8> {
1312        if let Some(ch) = self.read.next() {
1313            if !ch.is_ascii_digit() {
1314                perr!(self, InvalidNumber)
1315            } else {
1316                Ok(ch)
1317            }
1318        } else {
1319            perr!(self, EofWhileParsing)
1320        }
1321    }
1322
1323    #[inline(always)]
1324    pub fn skip_number(&mut self, first: u8) -> Result<&'de str> {
1325        let start = self.read.index() - 1;
1326        self.do_skip_number(first)?;
1327        let end = self.read.index();
1328        Ok(as_str(self.read.slice_unchecked(start, end)))
1329    }
1330
1331    #[inline(always)]
1332    pub(crate) fn do_skip_number(&mut self, mut first: u8) -> Result<()> {
1333        // check eof after the sign
1334        if first == b'-' {
1335            first = self.skip_single_digit()?;
1336        }
1337
1338        // check the leading zeros
1339        let second = self.read.peek();
1340        if first == b'0' && matches!(second, Some(b'0'..=b'9')) {
1341            return perr!(self, InvalidNumber);
1342        }
1343
1344        // fast path for the single digit
1345        let mut is_float: bool = false;
1346        match second {
1347            Some(b'0'..=b'9') => self.read.eat(1),
1348            Some(b'.') => {
1349                is_float = true;
1350                self.read.eat(1);
1351                self.skip_single_digit()?;
1352            }
1353            Some(b'e' | b'E') => {
1354                self.read.eat(1);
1355                return self.skip_exponent();
1356            }
1357            _ => return Ok(()),
1358        }
1359
1360        // SIMD path for long number
1361        const LANES: usize = i8x32::LANES;
1362        while let Some(chunk) = self.read.peek_n(LANES) {
1363            let v = unsafe { i8x32::from_slice_unaligned_unchecked(chunk) };
1364            let zero = i8x32::splat(b'0' as i8);
1365            let nine = i8x32::splat(b'9' as i8);
1366            let mut nondigits = (zero.gt(&v) | v.gt(&nine)).bitmask();
1367            if nondigits != 0 {
1368                let mut cnt = nondigits.trailing_zeros() as usize;
1369                let ch = chunk[cnt];
1370                if ch == b'.' && !is_float {
1371                    self.read.eat(cnt + 1);
1372                    // check the first digit after the dot
1373                    self.skip_single_digit()?;
1374
1375                    // check the overflow
1376                    cnt += 2;
1377                    if cnt >= LANES {
1378                        is_float = true;
1379                        continue;
1380                    }
1381
1382                    nondigits = nondigits.wrapping_shr(cnt as u32);
1383                    if nondigits != 0 {
1384                        let offset = nondigits.trailing_zeros() as usize;
1385                        let ch = chunk[cnt + offset];
1386                        if ch == b'e' || ch == b'E' {
1387                            self.read.eat(offset + 1);
1388                            return self.skip_exponent();
1389                        } else {
1390                            self.read.eat(offset);
1391                            return Ok(());
1392                        }
1393                    } else {
1394                        self.read.eat(32 - cnt);
1395                        is_float = true;
1396                        continue;
1397                    }
1398                } else if ch == b'e' || ch == b'E' {
1399                    self.read.eat(cnt + 1);
1400                    return self.skip_exponent();
1401                } else {
1402                    self.read.eat(cnt);
1403                    return Ok(());
1404                }
1405            }
1406            // long digits
1407            self.read.eat(32);
1408        }
1409
1410        // has less than 32 bytes
1411        while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1412            self.read.eat(1);
1413        }
1414
1415        match self.read.peek() {
1416            Some(b'.') if !is_float => {
1417                self.read.eat(1);
1418                self.skip_single_digit()?;
1419                while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1420                    self.read.eat(1);
1421                }
1422                match self.read.peek() {
1423                    Some(b'e' | b'E') => {
1424                        self.read.eat(1);
1425                        return self.skip_exponent();
1426                    }
1427                    _ => return Ok(()),
1428                }
1429            }
1430            Some(b'e' | b'E') => {
1431                self.read.eat(1);
1432                return self.skip_exponent();
1433            }
1434            _ => {}
1435        }
1436        Ok(())
1437    }
1438
1439    pub fn skip_one(&mut self, checked: bool) -> Result<(&'de [u8], ParseStatus)> {
1440        let ch = match self.skip_space() {
1441            Some(ch) => ch,
1442            None => return perr!(self, EofWhileParsing),
1443        };
1444        let start = self.read.index() - 1;
1445        let mut status = ParseStatus::None;
1446        match ch {
1447            c @ b'-' | c @ b'0'..=b'9' => {
1448                if checked {
1449                    self.skip_number(c)?;
1450                } else {
1451                    self.skip_number_unsafe()?;
1452                }
1453                Ok(())
1454            }
1455            b'"' => {
1456                status = if checked {
1457                    self.skip_string()?
1458                } else {
1459                    unsafe { self.skip_string_unchecked() }?
1460                };
1461                Ok(())
1462            }
1463            b'{' => {
1464                if checked {
1465                    self.skip_object()
1466                } else {
1467                    self.skip_container(b'{', b'}')
1468                }
1469            }
1470            b'[' => {
1471                if checked {
1472                    self.skip_array()
1473                } else {
1474                    self.skip_container(b'[', b']')
1475                }
1476            }
1477            b't' => self.parse_literal("rue"),
1478            b'f' => self.parse_literal("alse"),
1479            b'n' => self.parse_literal("ull"),
1480            _ => perr!(self, InvalidJsonValue),
1481        }?;
1482        let slice = self.read.slice_unchecked(start, self.read.index());
1483        Ok((slice, status))
1484    }
1485
1486    #[inline(always)]
1487    pub(crate) fn parse_trailing(&mut self) -> Result<()> {
1488        // check exceed
1489        let exceed = self.read.index() > self.read.as_u8_slice().len();
1490        if exceed {
1491            return perr!(self, EofWhileParsing);
1492        }
1493
1494        // has_main should marked before skip_space
1495        let remain = self.read.remain() > 0;
1496        if !remain {
1497            return Ok(());
1498        }
1499
1500        // note: we use padding chars `x"x` when parsing json into dom.
1501        // so, we should check the trailing chars is not the padding chars.
1502        let last = self.skip_space();
1503        let exceed = self.read.index() > self.read.as_u8_slice().len();
1504        if last.is_some() && !exceed {
1505            perr!(self, TrailingCharacters)
1506        } else {
1507            Ok(())
1508        }
1509    }
1510
1511    // get_from_object will make reader at the position after target key in JSON object.
1512    // Advance reader past the value of `target_key` in a JSON object.
1513    // When `checked` is false, uses fast-path token scanning to skip values.
1514    fn get_from_object(
1515        &mut self,
1516        target_key: &str,
1517        temp_buf: &mut Vec<u8>,
1518        checked: bool,
1519    ) -> Result<()> {
1520        match self.skip_space() {
1521            Some(b'{') => {}
1522            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1523            None => return perr!(self, EofWhileParsing),
1524        }
1525
1526        // deal with the empty object
1527        match self.get_next_token([b'"', b'}'], 1) {
1528            Some(b'"') => {}
1529            Some(b'}') => return perr!(self, GetInEmptyObject),
1530            None => return perr!(self, EofWhileParsing),
1531            Some(_) => unreachable!(),
1532        }
1533
1534        loop {
1535            let key = self.parse_string_raw(temp_buf)?;
1536            self.parse_object_clo()?;
1537            if key.len() == target_key.len() && key.as_ref() == target_key.as_bytes() {
1538                return Ok(());
1539            }
1540
1541            if checked {
1542                self.skip_one(true)?;
1543                match self.skip_space() {
1544                    Some(b'}') => return perr!(self, GetUnknownKeyInObject),
1545                    Some(b',') => match self.skip_space() {
1546                        Some(b'"') => continue,
1547                        _ => return perr!(self, ExpectObjectKeyOrEnd),
1548                    },
1549                    None => return perr!(self, EofWhileParsing),
1550                    _ => return perr!(self, ExpectedObjectCommaOrEnd),
1551                };
1552            } else {
1553                // skip object,array,string at first (unchecked fast path)
1554                match self.skip_space() {
1555                    Some(b'{') => self.skip_container(b'{', b'}')?,
1556                    Some(b'[') => self.skip_container(b'[', b']')?,
1557                    Some(b'"') => unsafe {
1558                        let _ = self.skip_string_unchecked()?;
1559                    },
1560                    None => return perr!(self, EofWhileParsing),
1561                    _ => {}
1562                };
1563                // optimize: direct find the next quote of key or object ending
1564                match self.get_next_token([b'"', b'}'], 1) {
1565                    Some(b'"') => continue,
1566                    Some(b'}') => return perr!(self, GetUnknownKeyInObject),
1567                    None => return perr!(self, EofWhileParsing),
1568                    Some(_) => unreachable!(),
1569                }
1570            }
1571        }
1572    }
1573
1574    // Advance reader past `index` elements in a JSON array.
1575    // When `checked` is false, uses fast-path token scanning to skip values.
1576    fn get_from_array(&mut self, index: usize, checked: bool) -> Result<()> {
1577        let mut count = index;
1578        match self.skip_space() {
1579            Some(b'[') => {}
1580            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
1581            None => return perr!(self, EofWhileParsing),
1582        }
1583
1584        if checked {
1585            match self.skip_space_peek() {
1586                Some(b']') => return perr!(self, GetInEmptyArray),
1587                Some(_) => {}
1588                None => return perr!(self, EofWhileParsing),
1589            }
1590        }
1591
1592        while count > 0 {
1593            if checked {
1594                self.skip_one(true)?;
1595                match self.skip_space() {
1596                    Some(b']') => return perr!(self, GetIndexOutOfArray),
1597                    Some(b',') => {}
1598                    Some(_) => return perr!(self, ExpectedArrayCommaOrEnd),
1599                    None => return perr!(self, EofWhileParsing),
1600                }
1601                count -= 1;
1602                match self.skip_space_peek() {
1603                    Some(_) if count == 0 => return Ok(()),
1604                    None => return perr!(self, EofWhileParsing),
1605                    _ => continue,
1606                }
1607            } else {
1608                // skip object,array,string at first (unchecked fast path)
1609                match self.skip_space() {
1610                    Some(b'{') => self.skip_container(b'{', b'}')?,
1611                    Some(b'[') => self.skip_container(b'[', b']')?,
1612                    Some(b'"') => unsafe {
1613                        let _ = self.skip_string_unchecked()?;
1614                    },
1615                    Some(b']') => return perr!(self, GetInEmptyArray),
1616                    None => return perr!(self, EofWhileParsing),
1617                    _ => {}
1618                };
1619                // optimize: direct find the next token
1620                match self.get_next_token([b']', b','], 1) {
1621                    Some(b']') => return perr!(self, GetIndexOutOfArray),
1622                    Some(b',') => {
1623                        count -= 1;
1624                        continue;
1625                    }
1626                    None => return perr!(self, EofWhileParsing),
1627                    Some(_) => unreachable!(),
1628                }
1629            }
1630        }
1631
1632        Ok(())
1633    }
1634
1635    pub(crate) fn get_from_with_iter<P: IntoIterator>(
1636        &mut self,
1637        path: P,
1638        checked: bool,
1639    ) -> Result<(&'de [u8], ParseStatus)>
1640    where
1641        P::Item: Index,
1642    {
1643        // temp buf reused when parsing each escaped key
1644        let mut temp_buf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1645        for jp in path.into_iter() {
1646            if let Some(key) = jp.as_key() {
1647                self.get_from_object(key, &mut temp_buf, checked)
1648            } else if let Some(index) = jp.as_index() {
1649                self.get_from_array(index, checked)
1650            } else {
1651                unreachable!();
1652            }?;
1653        }
1654        self.skip_one(true)
1655    }
1656
1657    fn get_many_rec(
1658        &mut self,
1659        node: &PointerTreeNode,
1660        out: &mut Vec<Option<LazyValue<'de>>>,
1661        strbuf: &mut Vec<u8>,
1662        remain: &mut usize,
1663        is_safe: bool,
1664    ) -> Result<()> {
1665        // all path has parsed
1666        if *remain == 0 {
1667            return Ok(());
1668        }
1669
1670        // skip the leading space
1671        let ch = self.skip_space_peek();
1672        if ch.is_none() {
1673            return perr!(self, EofWhileParsing);
1674        }
1675
1676        // need write to out, record the start position
1677        let start = self.read.index();
1678        let slice: &'de [u8];
1679
1680        let mut status = ParseStatus::None;
1681        match &node.children {
1682            PointerTreeInner::Empty => {
1683                status = self.skip_one(true)?.1;
1684            }
1685            PointerTreeInner::Index(midxs) => {
1686                self.get_many_index(midxs, strbuf, out, remain, is_safe)?
1687            }
1688            PointerTreeInner::Key(mkeys) => {
1689                self.get_many_keys(mkeys, strbuf, out, remain, is_safe)?
1690            }
1691        };
1692
1693        if !node.order.is_empty() {
1694            slice = self.read.slice_unchecked(start, self.read.index());
1695            let lv = LazyValue::new(slice.into(), status.into());
1696            for p in &node.order {
1697                out[*p] = Some(lv.clone());
1698            }
1699            *remain -= node.order.len();
1700        }
1701        Ok(())
1702    }
1703
1704    #[allow(clippy::mutable_key_type)]
1705    #[allow(clippy::mutable_key_type)]
1706    fn get_many_keys(
1707        &mut self,
1708        mkeys: &MultiKey,
1709        strbuf: &mut Vec<u8>,
1710        out: &mut Vec<Option<LazyValue<'de>>>,
1711        remain: &mut usize,
1712        checked: bool,
1713    ) -> Result<()> {
1714        debug_assert!(strbuf.is_empty());
1715        match self.skip_space() {
1716            Some(b'{') => {}
1717            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1718            None => return perr!(self, EofWhileParsing),
1719        }
1720
1721        // deal with the empty object
1722        if checked {
1723            match self.skip_space() {
1724                Some(b'"') => {}
1725                Some(b'}') => return perr!(self, GetInEmptyObject),
1726                _ => return perr!(self, ExpectObjectKeyOrEnd),
1727            }
1728        } else {
1729            match self.get_next_token([b'"', b'}'], 1) {
1730                Some(b'"') => {}
1731                Some(b'}') => return perr!(self, GetInEmptyObject),
1732                None => return perr!(self, EofWhileParsing),
1733                Some(_) => unreachable!(),
1734            }
1735        }
1736
1737        loop {
1738            let key = self.parse_str(strbuf)?;
1739            self.parse_object_clo()?;
1740            if let Some(val) = mkeys.get(key.deref()) {
1741                self.get_many_rec(val, out, strbuf, remain, checked)?;
1742                if *remain == 0 {
1743                    break;
1744                }
1745            } else if checked {
1746                self.skip_one(true)?;
1747            } else {
1748                // skip object,array,string at first (unchecked fast path)
1749                match self.skip_space() {
1750                    Some(b'{') => self.skip_container(b'{', b'}')?,
1751                    Some(b'[') => self.skip_container(b'[', b']')?,
1752                    Some(b'"') => unsafe {
1753                        let _ = self.skip_string_unchecked()?;
1754                    },
1755                    None => return perr!(self, EofWhileParsing),
1756                    _ => {}
1757                };
1758            }
1759
1760            if checked {
1761                match self.skip_space() {
1762                    Some(b',') if self.skip_space() == Some(b'"') => continue,
1763                    Some(b',') => return perr!(self, ExpectObjectKeyOrEnd),
1764                    Some(b'}') => break,
1765                    Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
1766                    None => return perr!(self, EofWhileParsing),
1767                }
1768            } else {
1769                // optimize: direct find the next quote of key. or object ending
1770                match self.get_next_token([b'"', b'}'], 1) {
1771                    Some(b'"') => {}
1772                    Some(b'}') => break,
1773                    None => return perr!(self, EofWhileParsing),
1774                    Some(_) => unreachable!(),
1775                }
1776            }
1777        }
1778
1779        Ok(())
1780    }
1781
1782    #[cfg(test)]
1783    #[allow(dead_code)]
1784    pub(crate) fn remain_str(&self) -> &'de str {
1785        as_str(self.remain_u8_slice())
1786    }
1787
1788    #[cfg(test)]
1789    #[allow(dead_code)]
1790    pub(crate) fn remain_u8_slice(&self) -> &'de [u8] {
1791        let reader = &self.read;
1792        let start = reader.index();
1793        reader.slice_unchecked(start, start + reader.remain())
1794    }
1795
1796    fn get_many_index(
1797        &mut self,
1798        midx: &MultiIndex,
1799        strbuf: &mut Vec<u8>,
1800        out: &mut Vec<Option<LazyValue<'de>>>,
1801        remain: &mut usize,
1802        checked: bool,
1803    ) -> Result<()> {
1804        match self.skip_space() {
1805            Some(b'[') => {}
1806            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
1807            None => return perr!(self, EofWhileParsing),
1808        }
1809        let mut index = 0;
1810        let mut visited = 0;
1811
1812        match self.skip_space_peek() {
1813            Some(b']') => return perr!(self, GetInEmptyArray),
1814            Some(_) => {}
1815            None => return perr!(self, EofWhileParsing),
1816        }
1817
1818        loop {
1819            if let Some(val) = midx.get(&index) {
1820                self.get_many_rec(val, out, strbuf, remain, checked)?;
1821                visited += 1;
1822                if *remain == 0 {
1823                    break;
1824                }
1825            } else if checked {
1826                self.skip_one(true)?;
1827            } else {
1828                // skip object,array,string at first (unchecked fast path)
1829                match self.skip_space() {
1830                    Some(b'{') => self.skip_container(b'{', b'}')?,
1831                    Some(b'[') => self.skip_container(b'[', b']')?,
1832                    Some(b'"') => unsafe {
1833                        let _ = self.skip_string_unchecked()?;
1834                    },
1835                    None => return perr!(self, EofWhileParsing),
1836                    _ => {}
1837                };
1838            }
1839
1840            if checked {
1841                match self.skip_space() {
1842                    Some(b']') => break,
1843                    Some(b',') => {
1844                        index += 1;
1845                        continue;
1846                    }
1847                    Some(_) => return perr!(self, ExpectedArrayCommaOrEnd),
1848                    None => return perr!(self, EofWhileParsing),
1849                }
1850            } else {
1851                // optimize: direct find the next token
1852                match self.get_next_token([b']', b','], 1) {
1853                    Some(b']') => break,
1854                    Some(b',') => {
1855                        index += 1;
1856                        continue;
1857                    }
1858                    None => return perr!(self, EofWhileParsing),
1859                    Some(_) => unreachable!(),
1860                }
1861            }
1862        }
1863
1864        // check whether remaining unknown keys
1865        if visited < midx.len() {
1866            perr!(self, GetIndexOutOfArray)
1867        } else {
1868            Ok(())
1869        }
1870    }
1871
1872    pub(crate) fn get_many(
1873        &mut self,
1874        tree: &PointerTree,
1875        is_safe: bool,
1876    ) -> Result<Vec<Option<LazyValue<'de>>>> {
1877        let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1878        let mut remain = tree.size();
1879        let mut out: Vec<Option<LazyValue<'de>>> = Vec::with_capacity(tree.size());
1880        out.resize(tree.size(), Option::default());
1881        let cur = &tree.root;
1882        self.get_many_rec(cur, &mut out, &mut strbuf, &mut remain, is_safe)?;
1883        Ok(out)
1884    }
1885
1886    #[cold]
1887    pub fn peek_invalid_type(&mut self, peek: u8, exp: &dyn Expected) -> Error {
1888        let err = match peek {
1889            b'n' => {
1890                if let Err(err) = self.parse_literal("ull") {
1891                    return err;
1892                }
1893                de::Error::invalid_type(Unexpected::Unit, exp)
1894            }
1895            b't' => {
1896                if let Err(err) = self.parse_literal("rue") {
1897                    return err;
1898                }
1899                de::Error::invalid_type(Unexpected::Bool(true), exp)
1900            }
1901            b'f' => {
1902                if let Err(err) = self.parse_literal("alse") {
1903                    return err;
1904                }
1905                de::Error::invalid_type(Unexpected::Bool(false), exp)
1906            }
1907            c @ b'-' | c @ b'0'..=b'9' => match self.parse_number(c) {
1908                Ok(n) => invalid_type_number(&n, exp),
1909                Err(err) => return err,
1910            },
1911            b'"' => {
1912                let mut scratch = Vec::new();
1913                match self.parse_str(&mut scratch) {
1914                    Ok(s) if std::str::from_utf8(s.as_bytes()).is_ok() => {
1915                        de::Error::invalid_type(Unexpected::Str(&s), exp)
1916                    }
1917                    Ok(s) => de::Error::invalid_type(Unexpected::Bytes(s.as_bytes()), exp),
1918                    Err(err) => return err,
1919                }
1920            }
1921            // for correctness, we will parse the whole object or array.
1922            b'[' => {
1923                self.read.backward(1);
1924
1925                match self.skip_one(true) {
1926                    Ok(_) => de::Error::invalid_type(Unexpected::Seq, exp),
1927                    Err(err) => return err,
1928                }
1929            }
1930            b'{' => {
1931                self.read.backward(1);
1932                match self.skip_one(true) {
1933                    Ok(_) => de::Error::invalid_type(Unexpected::Map, exp),
1934                    Err(err) => return err,
1935                }
1936            }
1937            _ => self.error(ErrorCode::InvalidJsonValue),
1938        };
1939        self.fix_position(err)
1940    }
1941}
1942
1943impl<'de, R> Parser<R>
1944where
1945    R: Reader<'de>,
1946{
1947    pub fn get_by_schema(&mut self, schema: &mut crate::Value) -> Result<()> {
1948        if !schema.is_object() {
1949            return perr!(
1950                self,
1951                Message(std::borrow::Cow::Borrowed("The schema must be an object"))
1952            );
1953        }
1954
1955        let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1956        self.get_by_schema_rec(schema, &mut strbuf)
1957    }
1958
1959    fn get_by_schema_rec(&mut self, schema: &mut crate::Value, strbuf: &mut Vec<u8>) -> Result<()> {
1960        let ch = self.skip_space_peek();
1961        if ch.is_none() {
1962            return perr!(self, EofWhileParsing);
1963        }
1964
1965        let mut should_replace = true;
1966        let start = self.read.index();
1967
1968        match (schema.as_object_mut(), ch) {
1969            (Some(object), Some(b'{')) => {
1970                let mut key_values = HashMap::new();
1971                for (key, value) in object.iter_mut() {
1972                    key_values.insert(key, value);
1973                }
1974
1975                // We should replace the schema object if the object is empty
1976                should_replace = key_values.is_empty();
1977                if should_replace {
1978                    self.skip_one(true)?;
1979                } else {
1980                    self.read.eat(1);
1981                    match self.skip_space() {
1982                        Some(b'"') => {}
1983                        Some(b'}') => return Ok(()),
1984                        _ => {
1985                            return perr!(self, ExpectObjectKeyOrEnd);
1986                        }
1987                    }
1988
1989                    loop {
1990                        let key = self.parse_str(strbuf)?;
1991                        self.parse_object_clo()?;
1992                        if let Some(val) = key_values.get_mut(key.deref()) {
1993                            self.get_by_schema_rec(val, strbuf)?;
1994                        } else {
1995                            self.skip_one(true)?;
1996                        }
1997
1998                        match self.skip_space() {
1999                            Some(b',') => match self.skip_space() {
2000                                Some(b'"') => continue,
2001                                _ => return perr!(self, ExpectObjectKeyOrEnd),
2002                            },
2003                            Some(b'}') => break,
2004                            Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
2005                            None => return perr!(self, EofWhileParsing),
2006                        }
2007                    }
2008                }
2009            }
2010            _ => {
2011                self.skip_one(true)?;
2012            }
2013        }
2014
2015        let end = self.read.index();
2016        if should_replace && start < end {
2017            let slice = self.read.slice_unchecked(start, end);
2018            *schema = crate::from_slice(slice)?;
2019        }
2020        Ok(())
2021    }
2022}