sonic_rs/
parser.rs

1use std::{
2    borrow::Cow,
3    collections::HashMap,
4    fmt::Debug,
5    num::NonZeroU8,
6    ops::Deref,
7    slice::{from_raw_parts, from_raw_parts_mut},
8    str::from_utf8_unchecked,
9};
10
11use faststr::FastStr;
12use serde::de::{self, Expected, Unexpected};
13use sonic_number::{parse_number, ParserNumber};
14#[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
15use sonic_simd::bits::NeonBits;
16use sonic_simd::{i8x32, m8x32, u8x32, u8x64, Mask, Simd};
17
18use crate::{
19    config::DeserializeCfg,
20    error::{
21        Error,
22        ErrorCode::{self, *},
23        Result,
24    },
25    index::Index,
26    lazyvalue::value::HasEsc,
27    pointer::{
28        tree::{MultiIndex, MultiKey, PointerTreeInner, PointerTreeNode},
29        PointerTree,
30    },
31    reader::Reader,
32    serde::de::invalid_type_number,
33    util::{
34        arch::{get_nonspace_bits, prefix_xor},
35        string::*,
36        unicode::{codepoint_to_utf8, hex_to_u32_nocheck},
37    },
38    value::visitor::JsonVisitor,
39    JsonValueMutTrait, JsonValueTrait, LazyValue, Number, OwnedLazyValue,
40};
41
42// support borrow for owned deserizlie or skip
43pub enum Reference<'b, 'c, T>
44where
45    T: ?Sized + 'static,
46{
47    Borrowed(&'b T),
48    Copied(&'c T),
49}
50
51impl<'b, 'c> From<Reference<'b, 'c, str>> for Cow<'b, str> {
52    fn from(value: Reference<'b, 'c, str>) -> Self {
53        match value {
54            Reference::Borrowed(b) => Cow::Owned(b.to_string()),
55            Reference::Copied(c) => Cow::Owned(c.to_string()),
56        }
57    }
58}
59
60impl<'b, 'c, T: Debug + ?Sized + 'static> Debug for Reference<'b, 'c, T> {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        match self {
63            Self::Borrowed(c) => write!(f, "Borrowed({c:?})"),
64            Self::Copied(c) => write!(f, "Copied({c:?})"),
65        }
66    }
67}
68
69impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
70where
71    T: ?Sized + 'static,
72{
73    type Target = T;
74
75    fn deref(&self) -> &Self::Target {
76        match *self {
77            Reference::Borrowed(b) => b,
78            Reference::Copied(c) => c,
79        }
80    }
81}
82
83pub(crate) enum ParsedSlice<'b, 'c> {
84    Borrowed {
85        slice: &'b [u8],
86        buf: &'c mut Vec<u8>,
87    },
88    Copied(&'c mut Vec<u8>),
89}
90
91impl<'b, 'c> Deref for ParsedSlice<'b, 'c> {
92    type Target = [u8];
93
94    fn deref(&self) -> &Self::Target {
95        match self {
96            ParsedSlice::Borrowed { slice, buf: _ } => slice,
97            ParsedSlice::Copied(c) => c.as_slice(),
98        }
99    }
100}
101
102pub(crate) const DEFAULT_KEY_BUF_CAPACITY: usize = 128;
103pub(crate) fn as_str(data: &[u8]) -> &str {
104    unsafe { from_utf8_unchecked(data) }
105}
106
107#[inline(always)]
108fn get_escaped_branchless_u32(prev_escaped: &mut u32, backslash: u32) -> u32 {
109    const EVEN_BITS: u32 = 0x5555_5555;
110    let backslash = backslash & (!*prev_escaped);
111    let follows_escape = (backslash << 1) | *prev_escaped;
112    let odd_sequence_starts = backslash & !EVEN_BITS & !follows_escape;
113    let (sequences_starting_on_even_bits, overflow) =
114        odd_sequence_starts.overflowing_add(backslash);
115    *prev_escaped = overflow as u32;
116    let invert_mask = sequences_starting_on_even_bits << 1;
117    (EVEN_BITS ^ invert_mask) & follows_escape
118}
119
120// convert $int to u32 for JsonPointer.
121macro_rules! perr {
122    ($self:ident, $err:expr) => {{
123        Err($self.error($err))
124    }};
125}
126
127macro_rules! check_visit {
128    ($self:ident, $e:expr $(,)?) => {
129        if !($e) {
130            perr!($self, UnexpectedVisitType)
131        } else {
132            Ok(())
133        }
134    };
135}
136
137#[inline(always)]
138fn get_escaped_branchless_u64(prev_escaped: &mut u64, backslash: u64) -> u64 {
139    const EVEN_BITS: u64 = 0x5555_5555_5555_5555;
140    let backslash = backslash & (!*prev_escaped);
141    let follows_escape = (backslash << 1) | *prev_escaped;
142    let odd_sequence_starts = backslash & !EVEN_BITS & !follows_escape;
143    let (sequences_starting_on_even_bits, overflow) =
144        odd_sequence_starts.overflowing_add(backslash);
145    *prev_escaped = overflow as u64;
146    let invert_mask = sequences_starting_on_even_bits << 1;
147    (EVEN_BITS ^ invert_mask) & follows_escape
148}
149
150#[inline(always)]
151pub(crate) fn is_whitespace(ch: u8) -> bool {
152    // NOTE: the compiler not optimize as lookup, so we hard code here.
153    const SPACE_MASK: u64 = (1u64 << b' ') | (1u64 << b'\r') | (1u64 << b'\n') | (1u64 << b'\t');
154    1u64.checked_shl(ch as u32)
155        .is_some_and(|v| v & SPACE_MASK != 0)
156}
157
158#[inline(always)]
159fn get_string_bits(data: &[u8; 64], prev_instring: &mut u64, prev_escaped: &mut u64) -> u64 {
160    let v = unsafe { u8x64::from_slice_unaligned_unchecked(data) };
161
162    let bs_bits = (v.eq(&u8x64::splat(b'\\'))).bitmask();
163    let escaped: u64;
164    if bs_bits != 0 {
165        escaped = get_escaped_branchless_u64(prev_escaped, bs_bits);
166    } else {
167        escaped = *prev_escaped;
168        *prev_escaped = 0;
169    }
170    let quote_bits = (v.eq(&u8x64::splat(b'"'))).bitmask() & !escaped;
171    let in_string = unsafe { prefix_xor(quote_bits) ^ *prev_instring };
172    *prev_instring = (in_string as i64 >> 63) as u64;
173    in_string
174}
175
176#[inline(always)]
177fn skip_container_loop(
178    input: &[u8; 64],        /* a 64-bytes slice from json */
179    prev_instring: &mut u64, /* the bitmap of last string */
180    prev_escaped: &mut u64,
181    lbrace_num: &mut usize,
182    rbrace_num: &mut usize,
183    left: u8,
184    right: u8,
185) -> Option<NonZeroU8> {
186    // get the bitmao
187    let instring = get_string_bits(input, prev_instring, prev_escaped);
188    // #Safety
189    // the input is 64 bytes, so the v is always valid.
190    let v = unsafe { u8x64::from_slice_unaligned_unchecked(input) };
191    let last_lbrace_num = *lbrace_num;
192    let mut rbrace = (v.eq(&u8x64::splat(right))).bitmask() & !instring;
193    let lbrace = (v.eq(&u8x64::splat(left))).bitmask() & !instring;
194    while rbrace != 0 {
195        *rbrace_num += 1;
196        *lbrace_num = last_lbrace_num + (lbrace & (rbrace - 1)).count_ones() as usize;
197        let is_closed = lbrace_num < rbrace_num;
198        if is_closed {
199            debug_assert_eq!(*rbrace_num, *lbrace_num + 1);
200            let cnt = rbrace.trailing_zeros() + 1;
201            return unsafe { Some(NonZeroU8::new_unchecked(cnt as u8)) };
202        }
203        rbrace &= rbrace - 1;
204    }
205    *lbrace_num = last_lbrace_num + lbrace.count_ones() as usize;
206    None
207}
208
209pub(crate) struct Pair<'de> {
210    pub key: Cow<'de, str>,
211    pub val: &'de [u8],
212    pub status: ParseStatus,
213}
214
215pub struct Parser<R> {
216    pub read: R,
217    error_index: usize,   // mark the error position
218    nospace_bits: u64,    // SIMD marked nospace bitmap
219    nospace_start: isize, // the start position of nospace_bits
220    pub(crate) cfg: DeserializeCfg,
221}
222
223/// Records the parse status
224#[derive(Debug, Clone, Copy, PartialEq, Eq)]
225pub enum ParseStatus {
226    None,
227    HasEscaped,
228}
229
230impl From<ParseStatus> for HasEsc {
231    fn from(value: ParseStatus) -> Self {
232        match value {
233            ParseStatus::None => HasEsc::None,
234            ParseStatus::HasEscaped => HasEsc::Yes,
235        }
236    }
237}
238
239impl<'de, R> Parser<R>
240where
241    R: Reader<'de>,
242{
243    pub fn new(read: R) -> Self {
244        Self {
245            read,
246            error_index: usize::MAX,
247            nospace_bits: 0,
248            nospace_start: -128,
249            cfg: DeserializeCfg::default(),
250        }
251    }
252
253    pub fn offset(&self) -> usize {
254        self.read.index()
255    }
256
257    pub(crate) fn with_config(mut self, cfg: DeserializeCfg) -> Self {
258        self.cfg = cfg;
259        self
260    }
261
262    #[inline(always)]
263    fn error_index(&self) -> usize {
264        // when parsing strings , we need record the error position.
265        // it must be smaller than reader.index().
266        std::cmp::min(self.error_index, self.read.index().saturating_sub(1))
267    }
268
269    /// Error caused by a byte from next_char().
270    #[cold]
271    pub fn error(&self, mut reason: ErrorCode) -> Error {
272        // check invalid utf8 here at first
273        // FIXME: maybe has invalid utf8 when deserializing into byte, and just bytes has other
274        // errors?
275        if let Err(e) = self.read.check_utf8_final() {
276            return e;
277        }
278
279        // check errors, if exceed, the reason must be eof, and begin parsing the padding chars
280        let mut index = self.error_index();
281        let len = self.read.as_u8_slice().len();
282        if index > len {
283            reason = EofWhileParsing;
284            index = len;
285        }
286        Error::syntax(reason, self.read.origin_input(), index)
287    }
288
289    // maybe error in generated in visitor, so we need fix the position.
290    #[cold]
291    pub(crate) fn fix_position(&self, err: Error) -> Error {
292        if err.line() == 0 {
293            self.error(err.error_code())
294        } else {
295            err
296        }
297    }
298
299    #[inline(always)]
300    pub fn parse_number(&mut self, first: u8) -> Result<ParserNumber> {
301        let reader = &mut self.read;
302        let neg = first == b'-';
303        let mut now = reader.index() - (!neg as usize);
304        let data = reader.as_u8_slice();
305        let ret = parse_number(data, &mut now, neg);
306        reader.set_index(now);
307        ret.map_err(|err| self.error(err.into()))
308    }
309
310    // TODO: optimize me, avoid clone twice.
311    #[inline(always)]
312    fn parse_string_owned<V>(&mut self, vis: &mut V, strbuf: &mut Vec<u8>) -> Result<()>
313    where
314        V: JsonVisitor<'de>,
315    {
316        let rs = self.parse_str(strbuf)?;
317        check_visit!(self, vis.visit_str(rs.as_ref()))
318    }
319
320    #[inline(always)]
321    fn parse_string_inplace<V: JsonVisitor<'de>>(&mut self, vis: &mut V) -> Result<()> {
322        unsafe {
323            let mut src = self.read.cur_ptr();
324            let start = self.read.cur_ptr();
325            let cnt =
326                parse_string_inplace(&mut src, self.cfg.utf8_lossy).map_err(|e| self.error(e))?;
327            self.read.set_ptr(src);
328            let slice = from_raw_parts(start, cnt);
329            let s = from_utf8_unchecked(slice);
330            check_visit!(self, vis.visit_borrowed_str(s))
331        }
332    }
333
334    #[inline(always)]
335    fn parse_number_visit<V>(&mut self, first: u8, vis: &mut V) -> Result<()>
336    where
337        V: JsonVisitor<'de>,
338    {
339        if self.cfg.use_rawnumber {
340            let start = self.read.index() - 1;
341            self.skip_number(first)?;
342            let slice = self.read.slice_unchecked(start, self.read.index());
343            check_visit!(self, vis.visit_raw_number(as_str(slice)))
344        } else {
345            let ok = match self.parse_number(first)? {
346                ParserNumber::Float(f) => vis.visit_f64(f),
347                ParserNumber::Unsigned(f) => vis.visit_u64(f),
348                ParserNumber::Signed(f) => vis.visit_i64(f),
349            };
350            check_visit!(self, ok)
351        }
352    }
353
354    #[inline(always)]
355    fn parse_number_inplace<V>(&mut self, first: u8, vis: &mut V) -> Result<()>
356    where
357        V: JsonVisitor<'de>,
358    {
359        if self.cfg.use_rawnumber {
360            let start = self.read.index() - 1;
361            self.skip_number(first)?;
362            let slice = self.read.slice_unchecked(start, self.read.index());
363            check_visit!(self, vis.visit_borrowed_raw_number(as_str(slice)))
364        } else {
365            let ok = match self.parse_number(first)? {
366                ParserNumber::Float(f) => vis.visit_f64(f),
367                ParserNumber::Unsigned(f) => vis.visit_u64(f),
368                ParserNumber::Signed(f) => vis.visit_i64(f),
369            };
370            check_visit!(self, ok)
371        }
372    }
373
374    #[inline(always)]
375    fn parse_array<V>(&mut self, vis: &mut V) -> Result<()>
376    where
377        V: JsonVisitor<'de>,
378    {
379        // parsing empty array
380        check_visit!(self, vis.visit_array_start(0))?;
381
382        let mut first = match self.skip_space() {
383            Some(b']') => return check_visit!(self, vis.visit_array_end(0)),
384            first => first,
385        };
386
387        let mut count = 0;
388        loop {
389            match first {
390                Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_inplace(c, vis),
391                Some(b'"') => self.parse_string_inplace(vis),
392                Some(b'{') => self.parse_object(vis),
393                Some(b'[') => self.parse_array(vis),
394                Some(first) => self.parse_literal_visit(first, vis),
395                None => perr!(self, EofWhileParsing),
396            }?;
397            count += 1;
398            first = match self.skip_space() {
399                Some(b']') => return check_visit!(self, vis.visit_array_end(count)),
400                Some(b',') => self.skip_space(),
401                _ => return perr!(self, ExpectedArrayCommaOrEnd),
402            };
403        }
404    }
405
406    #[inline(always)]
407    fn parse_object<V>(&mut self, vis: &mut V) -> Result<()>
408    where
409        V: JsonVisitor<'de>,
410    {
411        // parsing empty object
412        let mut count: usize = 0;
413        check_visit!(self, vis.visit_object_start(0))?;
414        match self.skip_space() {
415            Some(b'}') => return check_visit!(self, vis.visit_object_end(0)),
416            Some(b'"') => {}
417            _ => return perr!(self, ExpectObjectKeyOrEnd),
418        }
419
420        // loop for each object key and value
421        loop {
422            self.parse_string_inplace(vis)?;
423            self.parse_object_clo()?;
424            self.parse_value(vis)?;
425            count += 1;
426            match self.skip_space() {
427                Some(b'}') => return check_visit!(self, vis.visit_object_end(count)),
428                Some(b',') => match self.skip_space() {
429                    Some(b'"') => continue,
430                    _ => return perr!(self, ExpectObjectKeyOrEnd),
431                },
432                _ => return perr!(self, ExpectedArrayCommaOrEnd),
433            }
434        }
435    }
436
437    #[inline(always)]
438    fn parse_literal_visit<V>(&mut self, first: u8, vis: &mut V) -> Result<()>
439    where
440        V: JsonVisitor<'de>,
441    {
442        let literal = match first {
443            b't' => "rue",
444            b'f' => "alse",
445            b'n' => "ull",
446            _ => return perr!(self, InvalidJsonValue),
447        };
448
449        let reader = &mut self.read;
450        if let Some(chunk) = reader.next_n(literal.len()) {
451            if chunk != literal.as_bytes() {
452                return perr!(self, InvalidLiteral);
453            }
454
455            let ok = match first {
456                b't' => vis.visit_bool(true),
457                b'f' => vis.visit_bool(false),
458                b'n' => vis.visit_null(),
459                _ => unreachable!(),
460            };
461            check_visit!(self, ok)
462        } else {
463            perr!(self, EofWhileParsing)
464        }
465    }
466
467    #[inline]
468    pub(crate) fn parse_array_elem_lazy(
469        &mut self,
470        first: &mut bool,
471        check: bool,
472    ) -> Result<Option<(&'de [u8], ParseStatus)>> {
473        if *first && self.skip_space() != Some(b'[') {
474            return perr!(self, ExpectedArrayStart);
475        }
476        match self.skip_space_peek() {
477            Some(b']') => {
478                self.read.eat(1);
479                return Ok(None);
480            }
481            Some(b',') if !(*first) => {
482                self.read.eat(1);
483            }
484            Some(_) if *first => {
485                *first = false;
486            }
487            _ => return perr!(self, ExpectedArrayCommaOrEnd),
488        };
489        let (raw, status) = if check {
490            self.skip_one()
491        } else {
492            self.skip_one_unchecked()
493        }?;
494        Ok(Some((raw, status)))
495    }
496
497    #[inline]
498    pub(crate) fn parse_entry_lazy(
499        &mut self,
500        strbuf: &mut Vec<u8>,
501        first: &mut bool,
502        check: bool,
503    ) -> Result<Option<Pair<'de>>> {
504        if *first && self.skip_space() != Some(b'{') {
505            return perr!(self, ExpectedObjectStart);
506        }
507        match self.skip_space() {
508            Some(b'}') => return Ok(None),
509            Some(b'"') if *first => *first = false,
510            Some(b',') if !*first => {
511                if self.skip_space() != Some(b'"') {
512                    return perr!(self, ExpectObjectKeyOrEnd);
513                }
514            }
515            _ => return perr!(self, ExpectedObjectCommaOrEnd),
516        }
517
518        let parsed = self.parse_str(strbuf)?;
519        self.parse_object_clo()?;
520        let (raw, status) = if check {
521            self.skip_one()
522        } else {
523            self.skip_one_unchecked()
524        }?;
525
526        Ok(Some(Pair {
527            key: parsed.into(),
528            val: raw,
529            status,
530        }))
531    }
532
533    // Not use non-recurse version here, because it maybe 5% slower than recurse version.
534    #[inline(always)]
535    pub(crate) fn parse_value<V>(&mut self, visitor: &mut V) -> Result<()>
536    where
537        V: JsonVisitor<'de>,
538    {
539        match self.skip_space() {
540            Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_inplace(c, visitor),
541            Some(b'"') => self.parse_string_inplace(visitor),
542            Some(b'{') => self.parse_object(visitor),
543            Some(b'[') => self.parse_array(visitor),
544            Some(first) => self.parse_literal_visit(first, visitor),
545            None => return perr!(self, EofWhileParsing),
546        }?;
547        Ok(())
548    }
549
550    #[inline(always)]
551    pub(crate) fn match_literal(&mut self, literal: &'static str) -> Result<bool> {
552        if let Some(chunk) = self.read.next_n(literal.len()) {
553            if chunk != literal.as_bytes() {
554                perr!(self, InvalidLiteral)
555            } else {
556                Ok(true)
557            }
558        } else {
559            perr!(self, EofWhileParsing)
560        }
561    }
562
563    #[inline(always)]
564    pub(crate) fn get_owned_lazyvalue(&mut self, strict: bool) -> Result<OwnedLazyValue> {
565        let c = self.skip_space();
566        let start = match c {
567            Some(b'"') => {
568                let start = self.read.index() - 1;
569                match self.skip_string()? {
570                    ParseStatus::None => {
571                        let slice = self.read.slice_unchecked(start, self.read.index());
572                        let raw = unsafe { self.read.slice_ref(slice).as_faststr() };
573                        return Ok(OwnedLazyValue::from_non_esc_str(raw));
574                    }
575                    ParseStatus::HasEscaped => {}
576                }
577                start
578            }
579            Some(b't') if self.match_literal("rue")? => return Ok(true.into()),
580            Some(b'f') if self.match_literal("alse")? => return Ok(false.into()),
581            Some(b'n') if self.match_literal("ull")? => return Ok(().into()),
582            None => return perr!(self, EofWhileParsing),
583            _ => {
584                let start = self.read.index() - 1;
585                self.read.backward(1);
586                if strict {
587                    self.skip_one()?;
588                } else {
589                    self.skip_one_unchecked()?;
590                }
591                start
592            }
593        };
594        let end = self.read.index();
595        let sub = self.read.slice_unchecked(start, end);
596        let raw = unsafe { self.read.slice_ref(sub).as_faststr() };
597        Ok(OwnedLazyValue::new(raw.into(), HasEsc::Possible))
598    }
599
600    #[inline(always)]
601    fn parse_faststr(&mut self, strbuf: &mut Vec<u8>) -> Result<FastStr> {
602        match self.parse_str(strbuf)? {
603            Reference::Borrowed(s) => {
604                return Ok(unsafe { self.read.slice_ref(s.as_bytes()).as_faststr() });
605            }
606            Reference::Copied(s) => Ok(FastStr::new(s)),
607        }
608    }
609
610    #[inline(always)]
611    pub(crate) fn load_owned_lazyvalue(&mut self, strbuf: &mut Vec<u8>) -> Result<OwnedLazyValue> {
612        match self.skip_space() {
613            Some(c @ b'-' | c @ b'0'..=b'9') => {
614                let num: Number = self.parse_number(c)?.into();
615                Ok(OwnedLazyValue::from(num))
616            }
617            Some(b'"') => match self.parse_str(strbuf)? {
618                Reference::Borrowed(s) => {
619                    let raw = unsafe { self.read.slice_ref(s.as_bytes()).as_faststr() };
620                    Ok(OwnedLazyValue::from_faststr(raw))
621                }
622                Reference::Copied(s) => {
623                    let raw = FastStr::new(s);
624                    Ok(OwnedLazyValue::from_faststr(raw))
625                }
626            },
627            Some(b'{') => {
628                // parsing empty object
629                match self.skip_space() {
630                    Some(b'}') => return Ok(Vec::<(FastStr, OwnedLazyValue)>::new().into()),
631                    Some(b'"') => {}
632                    _ => return perr!(self, ExpectObjectKeyOrEnd),
633                }
634
635                // loop for each object key and value
636                let mut vec = Vec::with_capacity(32);
637                loop {
638                    let key = self.parse_faststr(strbuf)?;
639                    self.parse_object_clo()?;
640                    let olv = self.get_owned_lazyvalue(false)?;
641                    vec.push((key, olv));
642                    match self.skip_space() {
643                        Some(b'}') => return Ok(vec.into()),
644                        Some(b',') => match self.skip_space() {
645                            Some(b'"') => continue,
646                            _ => return perr!(self, ExpectObjectKeyOrEnd),
647                        },
648                        _ => return perr!(self, ExpectedArrayCommaOrEnd),
649                    }
650                }
651            }
652            Some(b'[') => {
653                if let Some(b']') = self.skip_space() {
654                    return Ok(Vec::<OwnedLazyValue>::new().into());
655                }
656
657                let mut vec = Vec::with_capacity(32);
658                self.read.backward(1);
659                loop {
660                    vec.push(self.get_owned_lazyvalue(false)?);
661                    match self.skip_space() {
662                        Some(b']') => return Ok(vec.into()),
663                        Some(b',') => {}
664                        _ => return perr!(self, ExpectedArrayCommaOrEnd),
665                    };
666                }
667            }
668            _ => perr!(self, InvalidJsonValue),
669        }
670    }
671
672    #[inline(always)]
673    pub(crate) fn parse_dom<V>(&mut self, vis: &mut V) -> Result<()>
674    where
675        V: JsonVisitor<'de>,
676    {
677        check_visit!(self, vis.visit_dom_start())?;
678        self.parse_value(vis)?;
679        check_visit!(self, vis.visit_dom_end())
680    }
681
682    #[inline(always)]
683    pub(crate) fn parse_dom2<V>(&mut self, vis: &mut V, strbuf: &mut Vec<u8>) -> Result<()>
684    where
685        V: JsonVisitor<'de>,
686    {
687        check_visit!(self, vis.visit_dom_start())?;
688        self.parse_value2(vis, strbuf)?;
689        check_visit!(self, vis.visit_dom_end())
690    }
691
692    pub(crate) fn parse_value2<V: JsonVisitor<'de>>(
693        &mut self,
694        vis: &mut V,
695        strbuf: &mut Vec<u8>,
696    ) -> Result<()> {
697        match self.skip_space() {
698            Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_visit(c, vis),
699            Some(b'"') => self.parse_string_owned(vis, strbuf),
700            Some(b'{') => self.parse_object2(vis, strbuf),
701            Some(b'[') => self.parse_array2(vis, strbuf),
702            Some(first) => self.parse_literal_visit(first, vis),
703            None => perr!(self, EofWhileParsing),
704        }
705    }
706
707    pub(crate) fn parse_object2<V: JsonVisitor<'de>>(
708        &mut self,
709        vis: &mut V,
710        strbuf: &mut Vec<u8>,
711    ) -> Result<()> {
712        // parsing empty object
713        let mut count: usize = 0;
714        check_visit!(self, vis.visit_object_start(0))?;
715        match self.skip_space() {
716            Some(b'}') => return check_visit!(self, vis.visit_object_end(0)),
717            Some(b'"') => {}
718            _ => return perr!(self, ExpectObjectKeyOrEnd),
719        }
720
721        // loop for each object key and value
722        loop {
723            self.parse_string_owned(vis, strbuf)?;
724            self.parse_object_clo()?;
725            self.parse_value2(vis, strbuf)?;
726            count += 1;
727            match self.skip_space() {
728                Some(b'}') => return check_visit!(self, vis.visit_object_end(count)),
729                Some(b',') => match self.skip_space() {
730                    Some(b'"') => continue,
731                    _ => return perr!(self, ExpectObjectKeyOrEnd),
732                },
733                _ => return perr!(self, ExpectedArrayCommaOrEnd),
734            }
735        }
736    }
737
738    pub(crate) fn parse_array2<V: JsonVisitor<'de>>(
739        &mut self,
740        visitor: &mut V,
741        strbuf: &mut Vec<u8>,
742    ) -> Result<()> {
743        // parsing empty array
744        check_visit!(self, visitor.visit_array_start(0))?;
745
746        let mut first = match self.skip_space() {
747            Some(b']') => return check_visit!(self, visitor.visit_array_end(0)),
748            first => first,
749        };
750
751        let mut count = 0;
752        loop {
753            match first {
754                Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_visit(c, visitor),
755                Some(b'"') => self.parse_string_owned(visitor, strbuf),
756                Some(b'{') => self.parse_object2(visitor, strbuf),
757                Some(b'[') => self.parse_array2(visitor, strbuf),
758                Some(first) => self.parse_literal_visit(first, visitor),
759                None => perr!(self, EofWhileParsing),
760            }?;
761            count += 1;
762            first = match self.skip_space() {
763                Some(b']') => return check_visit!(self, visitor.visit_array_end(count)),
764                Some(b',') => self.skip_space(),
765                _ => return perr!(self, ExpectedArrayCommaOrEnd),
766            };
767        }
768    }
769
770    #[inline(always)]
771    pub fn parse_str<'own>(&mut self, buf: &'own mut Vec<u8>) -> Result<Reference<'de, 'own, str>> {
772        match self.parse_string_raw(buf) {
773            Ok(ParsedSlice::Copied(buf)) => {
774                if self.check_invalid_utf8(self.cfg.utf8_lossy)? {
775                    // repr the invalid utf-8
776                    let repr = String::from_utf8_lossy(buf.as_ref()).into_owned();
777                    *buf = repr.into_bytes();
778                }
779                let slice = unsafe { from_utf8_unchecked(buf.as_slice()) };
780                Ok(Reference::Copied(slice))
781            }
782            Ok(ParsedSlice::Borrowed { slice, buf }) => {
783                if self.check_invalid_utf8(self.cfg.utf8_lossy)? {
784                    // repr the invalid utf-8
785                    let repr = String::from_utf8_lossy(slice).into_owned();
786                    *buf = repr.into_bytes();
787                    let slice = unsafe { from_utf8_unchecked(buf) };
788                    Ok(Reference::Copied(slice))
789                } else {
790                    Ok(Reference::Borrowed(unsafe { from_utf8_unchecked(slice) }))
791                }
792            }
793            Err(e) => Err(e),
794        }
795    }
796
797    pub(crate) fn check_invalid_utf8(&mut self, allowed: bool) -> Result<bool> {
798        // the invalid UTF-8 before the string, must have been checked before.
799        let invalid = self.read.next_invalid_utf8();
800        if invalid >= self.read.index() {
801            return Ok(false);
802        }
803
804        if !allowed {
805            Err(Error::syntax(
806                ErrorCode::InvalidUTF8,
807                self.read.origin_input(),
808                invalid,
809            ))
810        } else {
811            // this space is allowed, should update the next invalid utf8 position
812            self.read.check_invalid_utf8();
813            Ok(true)
814        }
815    }
816
817    pub(crate) fn parse_escaped_utf8(&mut self) -> Result<u32> {
818        let point1 = if let Some(asc) = self.read.next_n(4) {
819            unsafe { hex_to_u32_nocheck(&*(asc.as_ptr() as *const _ as *const [u8; 4])) }
820        } else {
821            return perr!(self, EofWhileParsing);
822        };
823
824        // only check surrogate here, and we will check the code pointer later when use
825        // `codepoint_to_utf8`
826        if (0xD800..0xDC00).contains(&point1) {
827            // parse the second utf8 code point of surrogate
828            let point2 = if let Some(asc) = self.read.next_n(6) {
829                if asc[0] != b'\\' || asc[1] != b'u' {
830                    if self.cfg.utf8_lossy {
831                        return Ok(0xFFFD);
832                    } else {
833                        // invalid surrogate
834                        return perr!(self, InvalidSurrogateUnicodeCodePoint);
835                    }
836                }
837                unsafe { hex_to_u32_nocheck(&*(asc.as_ptr().add(2) as *const _ as *const [u8; 4])) }
838            } else if self.cfg.utf8_lossy {
839                return Ok(0xFFFD);
840            } else {
841                // invalid surrogate
842                return perr!(self, InvalidSurrogateUnicodeCodePoint);
843            };
844
845            /* calcute the real code point */
846            let low_bit = point2.wrapping_sub(0xdc00);
847            if (low_bit >> 10) != 0 {
848                if self.cfg.utf8_lossy {
849                    return Ok(0xFFFD);
850                } else {
851                    // invalid surrogate
852                    return perr!(self, InvalidSurrogateUnicodeCodePoint);
853                }
854            }
855
856            Ok((((point1 - 0xd800) << 10) | low_bit).wrapping_add(0x10000))
857        } else if (0xDC00..0xE000).contains(&point1) {
858            if self.cfg.utf8_lossy {
859                Ok(0xFFFD)
860            } else {
861                // invalid surrogate
862                perr!(self, InvalidSurrogateUnicodeCodePoint)
863            }
864        } else {
865            Ok(point1)
866        }
867    }
868
869    pub(crate) unsafe fn parse_escaped_char(&mut self, buf: &mut Vec<u8>) -> Result<()> {
870        'escape: loop {
871            match self.read.next() {
872                Some(b'u') => {
873                    let code = self.parse_escaped_utf8()?;
874                    buf.reserve(4);
875                    let ptr = buf.as_mut_ptr().add(buf.len());
876                    let cnt = codepoint_to_utf8(code, ptr);
877                    if cnt == 0 {
878                        return perr!(self, InvalidUnicodeCodePoint);
879                    }
880                    buf.set_len(buf.len() + cnt);
881                }
882                Some(c) if ESCAPED_TAB[c as usize] != 0 => {
883                    buf.push(ESCAPED_TAB[c as usize]);
884                }
885                None => return perr!(self, EofWhileParsing),
886                _ => return perr!(self, InvalidEscape),
887            }
888
889            // fast path for continuous escaped chars
890            if self.read.peek() == Some(b'\\') {
891                self.read.eat(1);
892                continue 'escape;
893            }
894            break 'escape;
895        }
896        Ok(())
897    }
898
899    pub(crate) unsafe fn parse_string_escaped<'own>(
900        &mut self,
901        buf: &'own mut Vec<u8>,
902    ) -> Result<ParsedSlice<'de, 'own>> {
903        #[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
904        let mut block: StringBlock<NeonBits>;
905        #[cfg(not(all(target_feature = "neon", target_arch = "aarch64")))]
906        let mut block: StringBlock<u32>;
907
908        self.parse_escaped_char(buf)?;
909
910        while let Some(chunk) = self.read.peek_n(StringBlock::LANES) {
911            buf.reserve(StringBlock::LANES);
912            let v = unsafe { load(chunk.as_ptr()) };
913            block = StringBlock::new(&v);
914
915            if block.has_unescaped() {
916                self.read.eat(block.unescaped_index());
917                return perr!(self, ControlCharacterWhileParsingString);
918            }
919
920            // write the chunk to buf, we will set new_len later
921            let chunk = from_raw_parts_mut(buf.as_mut_ptr().add(buf.len()), StringBlock::LANES);
922            v.write_to_slice_unaligned_unchecked(chunk);
923
924            if block.has_quote_first() {
925                let cnt = block.quote_index();
926                buf.set_len(buf.len() + cnt);
927
928                // skip the right quote
929                self.read.eat(cnt + 1);
930                return Ok(ParsedSlice::Copied(buf));
931            }
932
933            if block.has_backslash() {
934                // TODO: loop unrooling here
935                let cnt = block.bs_index();
936                // skip the backslash
937                self.read.eat(cnt + 1);
938                buf.set_len(buf.len() + cnt);
939                self.parse_escaped_char(buf)?;
940            } else {
941                buf.set_len(buf.len() + StringBlock::LANES);
942                self.read.eat(StringBlock::LANES);
943            }
944        }
945
946        // scalar codes
947        while let Some(c) = self.read.peek() {
948            match c {
949                b'"' => {
950                    self.read.eat(1);
951                    return Ok(ParsedSlice::Copied(buf));
952                }
953                b'\\' => {
954                    // skip the backslash
955                    self.read.eat(1);
956                    self.parse_escaped_char(buf)?;
957                }
958                b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
959                _ => {
960                    buf.push(c);
961                    self.read.eat(1);
962                }
963            }
964        }
965
966        perr!(self, EofWhileParsing)
967    }
968
969    #[inline(always)]
970    // parse_string_raw maybe borrowed, maybe copied into buf(buf will be clear at first).
971    pub(crate) fn parse_string_raw<'own>(
972        &mut self,
973        buf: &'own mut Vec<u8>,
974    ) -> Result<ParsedSlice<'de, 'own>> {
975        // now reader is start after `"`, so we can directly skipstring
976        let start = self.read.index();
977        #[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
978        let mut block: StringBlock<NeonBits>;
979        #[cfg(not(all(target_feature = "neon", target_arch = "aarch64")))]
980        let mut block: StringBlock<u32>;
981
982        while let Some(chunk) = self.read.peek_n(StringBlock::LANES) {
983            let v = unsafe { load(chunk.as_ptr()) };
984            block = StringBlock::new(&v);
985
986            if block.has_quote_first() {
987                let cnt = block.quote_index();
988                self.read.eat(cnt + 1);
989                let slice = self.read.slice_unchecked(start, self.read.index() - 1);
990                return Ok(ParsedSlice::Borrowed { slice, buf });
991            }
992
993            if block.has_unescaped() {
994                self.read.eat(block.unescaped_index());
995                return perr!(self, ControlCharacterWhileParsingString);
996            }
997
998            if block.has_backslash() {
999                let cnt = block.bs_index();
1000                // skip the backslash
1001                self.read.eat(cnt + 1);
1002
1003                // copy unescaped parts to buf
1004                buf.clear();
1005                buf.extend_from_slice(&self.read.as_u8_slice()[start..self.read.index() - 1]);
1006
1007                return unsafe { self.parse_string_escaped(buf) };
1008            }
1009
1010            self.read.eat(StringBlock::LANES);
1011            continue;
1012        }
1013
1014        // found quote for remaining bytes
1015        while let Some(c) = self.read.peek() {
1016            match c {
1017                b'"' => {
1018                    self.read.eat(1);
1019                    let slice = self.read.slice_unchecked(start, self.read.index() - 1);
1020                    return Ok(ParsedSlice::Borrowed { slice, buf });
1021                }
1022                b'\\' => {
1023                    buf.clear();
1024                    buf.extend_from_slice(self.read.slice_unchecked(start, self.read.index()));
1025                    self.read.eat(1);
1026                    return unsafe { self.parse_string_escaped(buf) };
1027                }
1028                b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
1029                _ => self.read.eat(1),
1030            }
1031        }
1032        perr!(self, EofWhileParsing)
1033    }
1034
1035    #[inline(always)]
1036    fn get_next_token<const N: usize>(&mut self, tokens: [u8; N], advance: usize) -> Option<u8> {
1037        let r = &mut self.read;
1038        const LANS: usize = u8x32::LANES;
1039        while let Some(chunk) = r.peek_n(LANS) {
1040            let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
1041            let mut vor = m8x32::splat(false);
1042            for t in tokens.iter().take(N) {
1043                vor |= v.eq(&u8x32::splat(*t));
1044            }
1045            let next = vor.bitmask();
1046            if next != 0 {
1047                let cnt = next.trailing_zeros() as usize;
1048                let ch = chunk[cnt];
1049                r.eat(cnt + advance);
1050                return Some(ch);
1051            }
1052            r.eat(LANS);
1053        }
1054
1055        while let Some(ch) = r.peek() {
1056            for t in tokens.iter().take(N) {
1057                if ch == *t {
1058                    r.eat(advance);
1059                    return Some(ch);
1060                }
1061            }
1062            r.eat(1)
1063        }
1064        None
1065    }
1066
1067    #[inline(always)]
1068    unsafe fn skip_string_unchecked2(&mut self) -> Result<()> {
1069        let _ = self.skip_string_unchecked()?;
1070        Ok(())
1071    }
1072
1073    // skip_string skips a JSON string, and return the later parts after closed quote, and the
1074    // escaped status. skip_string always start with the quote marks.
1075    #[inline(always)]
1076    unsafe fn skip_string_unchecked(&mut self) -> Result<ParseStatus> {
1077        const LANS: usize = u8x32::LANES;
1078        let r = &mut self.read;
1079        let mut quote_bits;
1080        let mut escaped;
1081        let mut prev_escaped = 0;
1082        let mut status = ParseStatus::None;
1083
1084        while let Some(chunk) = r.peek_n(LANS) {
1085            let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
1086            let bs_bits = (v.eq(&u8x32::splat(b'\\'))).bitmask();
1087            quote_bits = (v.eq(&u8x32::splat(b'"'))).bitmask();
1088            // maybe has escaped quotes
1089            if ((quote_bits.wrapping_sub(1)) & bs_bits) != 0 || prev_escaped != 0 {
1090                escaped = get_escaped_branchless_u32(&mut prev_escaped, bs_bits);
1091                status = ParseStatus::HasEscaped;
1092                quote_bits &= !escaped;
1093            }
1094            // real quote bits
1095            if quote_bits != 0 {
1096                // eat the ending quote mark
1097                r.eat(quote_bits.trailing_zeros() as usize + 1);
1098                return Ok(status);
1099            }
1100            r.eat(LANS)
1101        }
1102
1103        // skip the possible prev escaped quote
1104        if prev_escaped != 0 {
1105            r.eat(1)
1106        }
1107
1108        // found quote for remaining bytes
1109        while let Some(ch) = r.peek() {
1110            if ch == b'\\' {
1111                if r.remain() < 2 {
1112                    break;
1113                }
1114                status = ParseStatus::HasEscaped;
1115                r.eat(2);
1116                continue;
1117            }
1118            r.eat(1);
1119            if ch == b'"' {
1120                return Ok(status);
1121            }
1122        }
1123        perr!(self, EofWhileParsing)
1124    }
1125
1126    fn skip_escaped_chars(&mut self) -> Result<()> {
1127        match self.read.peek() {
1128            Some(b'u') => {
1129                if self.read.remain() < 6 {
1130                    return perr!(self, EofWhileParsing);
1131                } else {
1132                    self.read.eat(5);
1133                }
1134            }
1135            Some(c) => {
1136                if self.read.next().is_none() {
1137                    return perr!(self, EofWhileParsing);
1138                }
1139                if ESCAPED_TAB[c as usize] == 0 {
1140                    return perr!(self, InvalidEscape);
1141                }
1142            }
1143            None => return perr!(self, EofWhileParsing),
1144        }
1145        Ok(())
1146    }
1147
1148    // skip_string skips a JSON string with validation.
1149    #[inline(always)]
1150    fn skip_string(&mut self) -> Result<ParseStatus> {
1151        const LANS: usize = u8x32::LANES;
1152
1153        let mut status = ParseStatus::None;
1154        while let Some(chunk) = self.read.peek_n(LANS) {
1155            let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
1156            let v_bs = v.eq(&u8x32::splat(b'\\'));
1157            let v_quote = v.eq(&u8x32::splat(b'"'));
1158            let v_cc = v.le(&u8x32::splat(0x1f));
1159            let mask = (v_bs | v_quote | v_cc).bitmask();
1160
1161            // check the mask
1162            if mask != 0 {
1163                let cnt = mask.trailing_zeros() as usize;
1164                self.read.eat(cnt + 1);
1165
1166                match chunk[cnt] {
1167                    b'\\' => {
1168                        self.skip_escaped_chars()?;
1169                        status = ParseStatus::HasEscaped;
1170                    }
1171                    b'\"' => return Ok(status),
1172                    0..=0x1f => return perr!(self, ControlCharacterWhileParsingString),
1173                    _ => unreachable!(),
1174                }
1175            } else {
1176                self.read.eat(LANS)
1177            }
1178        }
1179
1180        // found quote for remaining bytes
1181        while let Some(ch) = self.read.next() {
1182            match ch {
1183                b'\\' => {
1184                    self.skip_escaped_chars()?;
1185                    status = ParseStatus::HasEscaped;
1186                }
1187                b'"' => return Ok(status),
1188                0..=0x1f => return perr!(self, ControlCharacterWhileParsingString),
1189                _ => {}
1190            }
1191        }
1192        perr!(self, EofWhileParsing)
1193    }
1194
1195    // parse the Colon :
1196    #[inline(always)]
1197    pub(crate) fn parse_object_clo(&mut self) -> Result<()> {
1198        if let Some(ch) = self.read.peek() {
1199            // fast path for compact json
1200            if ch == b':' {
1201                self.read.eat(1);
1202                return Ok(());
1203            }
1204
1205            match self.skip_space() {
1206                Some(b':') => Ok(()),
1207                Some(_) => perr!(self, ExpectedColon),
1208                None => perr!(self, EofWhileParsing),
1209            }
1210        } else {
1211            perr!(self, EofWhileParsing)
1212        }
1213    }
1214
1215    // parse the Colon :
1216    #[inline(always)]
1217    pub(crate) fn parse_array_end(&mut self) -> Result<()> {
1218        match self.skip_space() {
1219            Some(b']') => Ok(()),
1220            Some(_) => perr!(self, ExpectedArrayCommaOrEnd),
1221            None => perr!(self, EofWhileParsing),
1222        }
1223    }
1224
1225    #[inline(always)]
1226    fn skip_object(&mut self) -> Result<()> {
1227        match self.skip_space() {
1228            Some(b'}') => return Ok(()),
1229            Some(b'"') => {}
1230            None => return perr!(self, EofWhileParsing),
1231            Some(_) => return perr!(self, ExpectObjectKeyOrEnd),
1232        }
1233
1234        loop {
1235            self.skip_string()?;
1236            self.parse_object_clo()?;
1237            self.skip_one()?;
1238
1239            match self.skip_space() {
1240                Some(b'}') => return Ok(()),
1241                Some(b',') => match self.skip_space() {
1242                    Some(b'"') => continue,
1243                    _ => return perr!(self, ExpectObjectKeyOrEnd),
1244                },
1245                None => return perr!(self, EofWhileParsing),
1246                Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
1247            }
1248        }
1249    }
1250
1251    #[inline(always)]
1252    fn skip_array(&mut self) -> Result<()> {
1253        match self.skip_space_peek() {
1254            Some(b']') => {
1255                self.read.eat(1);
1256                return Ok(());
1257            }
1258            None => return perr!(self, EofWhileParsing),
1259            _ => {}
1260        }
1261
1262        loop {
1263            self.skip_one()?;
1264            match self.skip_space() {
1265                Some(b']') => return Ok(()),
1266                Some(b',') => continue,
1267                None => return perr!(self, EofWhileParsing),
1268                _ => return perr!(self, ExpectedArrayCommaOrEnd),
1269            }
1270        }
1271    }
1272
1273    /// skip_container skip a object or array, and retu
1274    #[inline(always)]
1275    fn skip_container(&mut self, left: u8, right: u8) -> Result<()> {
1276        let mut prev_instring = 0;
1277        let mut prev_escaped = 0;
1278        let mut rbrace_num = 0;
1279        let mut lbrace_num = 0;
1280        let reader = &mut self.read;
1281
1282        while let Some(chunk) = reader.peek_n(64) {
1283            let input = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
1284            if let Some(count) = skip_container_loop(
1285                input,
1286                &mut prev_instring,
1287                &mut prev_escaped,
1288                &mut lbrace_num,
1289                &mut rbrace_num,
1290                left,
1291                right,
1292            ) {
1293                reader.eat(count.get() as usize);
1294                return Ok(());
1295            }
1296            reader.eat(64);
1297        }
1298
1299        let mut remain = [0u8; 64];
1300        unsafe {
1301            let n = reader.remain();
1302            remain[..n].copy_from_slice(reader.peek_n(n).unwrap_unchecked());
1303        }
1304        if let Some(count) = skip_container_loop(
1305            &remain,
1306            &mut prev_instring,
1307            &mut prev_escaped,
1308            &mut lbrace_num,
1309            &mut rbrace_num,
1310            left,
1311            right,
1312        ) {
1313            reader.eat(count.get() as usize);
1314            return Ok(());
1315        }
1316
1317        perr!(self, EofWhileParsing)
1318    }
1319
1320    #[inline(always)]
1321    pub fn skip_space(&mut self) -> Option<u8> {
1322        let reader = &mut self.read;
1323        // fast path 1: for nospace or single space
1324        // most JSON is like ` "name": "balabala" `
1325        if let Some(ch) = reader.next() {
1326            if !is_whitespace(ch) {
1327                return Some(ch);
1328            }
1329        }
1330        if let Some(ch) = reader.next() {
1331            if !is_whitespace(ch) {
1332                return Some(ch);
1333            }
1334        }
1335
1336        // fast path 2: reuse the bitmap for short key or numbers
1337        let nospace_offset = (reader.index() as isize) - self.nospace_start;
1338        if nospace_offset < 64 {
1339            let bitmap = {
1340                let mask = !((1 << nospace_offset) - 1);
1341                self.nospace_bits & mask
1342            };
1343            if bitmap != 0 {
1344                let cnt = bitmap.trailing_zeros() as usize;
1345                let ch = reader.at(self.nospace_start as usize + cnt);
1346                reader.set_index(self.nospace_start as usize + cnt + 1);
1347
1348                return Some(ch);
1349            } else {
1350                // we can still fast skip the marked space in here.
1351                reader.set_index(self.nospace_start as usize + 64);
1352            }
1353        }
1354
1355        // then we use simd to accelerate skipping space
1356        while let Some(chunk) = reader.peek_n(64) {
1357            let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
1358            let bitmap = unsafe { get_nonspace_bits(chunk) };
1359            if bitmap != 0 {
1360                self.nospace_bits = bitmap;
1361                self.nospace_start = reader.index() as isize;
1362                let cnt = bitmap.trailing_zeros() as usize;
1363                let ch = chunk[cnt];
1364                reader.eat(cnt + 1);
1365
1366                return Some(ch);
1367            }
1368            reader.eat(64)
1369        }
1370
1371        while let Some(ch) = reader.next() {
1372            if !is_whitespace(ch) {
1373                //
1374                return Some(ch);
1375            }
1376        }
1377        None
1378    }
1379
1380    #[inline(always)]
1381    pub fn skip_space_peek(&mut self) -> Option<u8> {
1382        let ret = self.skip_space()?;
1383        self.read.backward(1);
1384        Some(ret)
1385    }
1386
1387    #[inline(always)]
1388    pub fn parse_literal(&mut self, literal: &str) -> Result<()> {
1389        let reader = &mut self.read;
1390        if let Some(chunk) = reader.next_n(literal.len()) {
1391            if chunk == literal.as_bytes() {
1392                Ok(())
1393            } else {
1394                perr!(self, InvalidLiteral)
1395            }
1396        } else {
1397            perr!(self, EofWhileParsing)
1398        }
1399    }
1400
1401    #[inline(always)]
1402    fn skip_number_unsafe(&mut self) -> Result<()> {
1403        let _ = self.get_next_token([b']', b'}', b','], 0);
1404        Ok(())
1405    }
1406
1407    #[inline(always)]
1408    fn skip_exponent(&mut self) -> Result<()> {
1409        if let Some(ch) = self.read.peek() {
1410            if ch == b'-' || ch == b'+' {
1411                self.read.eat(1);
1412            }
1413        }
1414        self.skip_single_digit()?;
1415        // skip the remaining digits
1416        while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1417            self.read.eat(1);
1418        }
1419        Ok(())
1420    }
1421
1422    #[inline(always)]
1423    fn skip_single_digit(&mut self) -> Result<u8> {
1424        if let Some(ch) = self.read.next() {
1425            if !ch.is_ascii_digit() {
1426                perr!(self, InvalidNumber)
1427            } else {
1428                Ok(ch)
1429            }
1430        } else {
1431            perr!(self, EofWhileParsing)
1432        }
1433    }
1434
1435    #[inline(always)]
1436    pub fn skip_number(&mut self, first: u8) -> Result<&'de str> {
1437        let start = self.read.index() - 1;
1438        self.do_skip_number(first)?;
1439        let end = self.read.index();
1440        Ok(as_str(self.read.slice_unchecked(start, end)))
1441    }
1442
1443    #[inline(always)]
1444    pub(crate) fn do_skip_number(&mut self, mut first: u8) -> Result<()> {
1445        // check eof after the sign
1446        if first == b'-' {
1447            first = self.skip_single_digit()?;
1448        }
1449
1450        // check the leading zeros
1451        let second = self.read.peek();
1452        if first == b'0' && matches!(second, Some(b'0'..=b'9')) {
1453            return perr!(self, InvalidNumber);
1454        }
1455
1456        // fast path for the single digit
1457        let mut is_float: bool = false;
1458        match second {
1459            Some(b'0'..=b'9') => self.read.eat(1),
1460            Some(b'.') => {
1461                is_float = true;
1462                self.read.eat(1);
1463                self.skip_single_digit()?;
1464            }
1465            Some(b'e' | b'E') => {
1466                self.read.eat(1);
1467                return self.skip_exponent();
1468            }
1469            _ => return Ok(()),
1470        }
1471
1472        // SIMD path for long number
1473        const LANES: usize = i8x32::LANES;
1474        while let Some(chunk) = self.read.peek_n(LANES) {
1475            let v = unsafe { i8x32::from_slice_unaligned_unchecked(chunk) };
1476            let zero = i8x32::splat(b'0' as i8);
1477            let nine = i8x32::splat(b'9' as i8);
1478            let mut nondigits = (zero.gt(&v) | v.gt(&nine)).bitmask();
1479            if nondigits != 0 {
1480                let mut cnt = nondigits.trailing_zeros() as usize;
1481                let ch = chunk[cnt];
1482                if ch == b'.' && !is_float {
1483                    self.read.eat(cnt + 1);
1484                    // check the first digit after the dot
1485                    self.skip_single_digit()?;
1486
1487                    // check the overflow
1488                    cnt += 2;
1489                    if cnt >= LANES {
1490                        is_float = true;
1491                        continue;
1492                    }
1493
1494                    nondigits = nondigits.wrapping_shr(cnt as u32);
1495                    if nondigits != 0 {
1496                        let offset = nondigits.trailing_zeros() as usize;
1497                        let ch = chunk[cnt + offset];
1498                        if ch == b'e' || ch == b'E' {
1499                            self.read.eat(offset + 1);
1500                            return self.skip_exponent();
1501                        } else {
1502                            self.read.eat(offset);
1503                            return Ok(());
1504                        }
1505                    } else {
1506                        self.read.eat(32 - cnt);
1507                        is_float = true;
1508                        continue;
1509                    }
1510                } else if ch == b'e' || ch == b'E' {
1511                    self.read.eat(cnt + 1);
1512                    return self.skip_exponent();
1513                } else {
1514                    self.read.eat(cnt);
1515                    return Ok(());
1516                }
1517            }
1518            // long digits
1519            self.read.eat(32);
1520        }
1521
1522        // has less than 32 bytes
1523        while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1524            self.read.eat(1);
1525        }
1526
1527        match self.read.peek() {
1528            Some(b'.') if !is_float => {
1529                self.read.eat(1);
1530                self.skip_single_digit()?;
1531                while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1532                    self.read.eat(1);
1533                }
1534                match self.read.peek() {
1535                    Some(b'e' | b'E') => {
1536                        self.read.eat(1);
1537                        return self.skip_exponent();
1538                    }
1539                    _ => return Ok(()),
1540                }
1541            }
1542            Some(b'e' | b'E') => {
1543                self.read.eat(1);
1544                return self.skip_exponent();
1545            }
1546            _ => {}
1547        }
1548        Ok(())
1549    }
1550
1551    #[inline(always)]
1552    pub fn skip_one(&mut self) -> Result<(&'de [u8], ParseStatus)> {
1553        let ch = self.skip_space();
1554        let start = self.read.index() - 1;
1555        let mut status = ParseStatus::None;
1556        match ch {
1557            Some(c @ b'-' | c @ b'0'..=b'9') => {
1558                self.skip_number(c)?;
1559                Ok(())
1560            }
1561            Some(b'"') => {
1562                status = self.skip_string()?;
1563                Ok(())
1564            }
1565            Some(b'{') => self.skip_object(),
1566            Some(b'[') => self.skip_array(),
1567            Some(b't') => self.parse_literal("rue"),
1568            Some(b'f') => self.parse_literal("alse"),
1569            Some(b'n') => self.parse_literal("ull"),
1570            Some(_) => perr!(self, InvalidJsonValue),
1571            None => perr!(self, EofWhileParsing),
1572        }?;
1573        let slice = self.read.slice_unchecked(start, self.read.index());
1574        Ok((slice, status))
1575    }
1576
1577    #[inline(always)]
1578    pub fn skip_one_unchecked(&mut self) -> Result<(&'de [u8], ParseStatus)> {
1579        let ch = self.skip_space();
1580        let start = self.read.index() - 1;
1581        let mut status = ParseStatus::None;
1582        match ch {
1583            Some(b'-' | b'0'..=b'9') => self.skip_number_unsafe(),
1584            Some(b'"') => {
1585                status = unsafe { self.skip_string_unchecked() }?;
1586                Ok(())
1587            }
1588            Some(b'{') => self.skip_container(b'{', b'}'),
1589            Some(b'[') => self.skip_container(b'[', b']'),
1590            Some(b't') => self.parse_literal("rue"),
1591            Some(b'f') => self.parse_literal("alse"),
1592            Some(b'n') => self.parse_literal("ull"),
1593            Some(_) => perr!(self, InvalidJsonValue),
1594            None => perr!(self, EofWhileParsing),
1595        }?;
1596        let slice = self.read.slice_unchecked(start, self.read.index());
1597        Ok((slice, status))
1598    }
1599
1600    #[inline(always)]
1601    pub(crate) fn parse_trailing(&mut self) -> Result<()> {
1602        // check exceed
1603        let exceed = self.read.index() > self.read.as_u8_slice().len();
1604        if exceed {
1605            return perr!(self, EofWhileParsing);
1606        }
1607
1608        // has_main should marked before skip_space
1609        let remain = self.read.remain() > 0;
1610        if !remain {
1611            return Ok(());
1612        }
1613
1614        // note: we use padding chars `x"x` when parsing json into dom.
1615        // so, we should check the trailing chars is not the padding chars.
1616        let last = self.skip_space();
1617        let exceed = self.read.index() > self.read.as_u8_slice().len();
1618        if last.is_some() && !exceed {
1619            perr!(self, TrailingCharacters)
1620        } else {
1621            Ok(())
1622        }
1623    }
1624
1625    // get_from_object will make reader at the position after target key in JSON object.
1626    #[inline(always)]
1627    fn get_from_object(&mut self, target_key: &str, temp_buf: &mut Vec<u8>) -> Result<()> {
1628        match self.skip_space() {
1629            Some(b'{') => {}
1630            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1631            None => return perr!(self, EofWhileParsing),
1632        }
1633
1634        // deal with the empty object
1635        match self.get_next_token([b'"', b'}'], 1) {
1636            Some(b'"') => {}
1637            Some(b'}') => return perr!(self, GetInEmptyObject),
1638            None => return perr!(self, EofWhileParsing),
1639            Some(_) => unreachable!(),
1640        }
1641
1642        loop {
1643            let key = self.parse_string_raw(temp_buf)?;
1644            self.parse_object_clo()?;
1645            if key.len() == target_key.len() && key.as_ref() == target_key.as_bytes() {
1646                return Ok(());
1647            }
1648
1649            // skip object,array,string at first
1650            match self.skip_space() {
1651                Some(b'{') => self.skip_container(b'{', b'}')?,
1652                Some(b'[') => self.skip_container(b'[', b']')?,
1653                Some(b'"') => unsafe {
1654                    let _ = self.skip_string_unchecked()?;
1655                },
1656                None => return perr!(self, EofWhileParsing),
1657                _ => {}
1658            };
1659
1660            // optimize: direct find the next quote of key. or object ending
1661            match self.get_next_token([b'"', b'}'], 1) {
1662                Some(b'"') => continue,
1663                Some(b'}') => return perr!(self, GetUnknownKeyInObject),
1664                None => return perr!(self, EofWhileParsing),
1665                Some(_) => unreachable!(),
1666            }
1667        }
1668    }
1669
1670    // get_from_object will make reader at the position after target key in JSON object.
1671    #[inline(always)]
1672    fn get_from_object_checked(&mut self, target_key: &str, temp_buf: &mut Vec<u8>) -> Result<()> {
1673        match self.skip_space() {
1674            Some(b'{') => {}
1675            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1676            None => return perr!(self, EofWhileParsing),
1677        }
1678
1679        // deal with the empty object
1680        match self.get_next_token([b'"', b'}'], 1) {
1681            Some(b'"') => {}
1682            Some(b'}') => return perr!(self, GetInEmptyObject),
1683            None => return perr!(self, EofWhileParsing),
1684            Some(_) => unreachable!(),
1685        }
1686
1687        loop {
1688            let key = self.parse_string_raw(temp_buf)?;
1689            self.parse_object_clo()?;
1690            if key.len() == target_key.len() && key.as_ref() == target_key.as_bytes() {
1691                return Ok(());
1692            }
1693
1694            self.skip_one()?;
1695
1696            match self.skip_space() {
1697                Some(b'}') => return perr!(self, GetUnknownKeyInObject),
1698                Some(b',') => match self.skip_space() {
1699                    Some(b'"') => continue,
1700                    _ => return perr!(self, ExpectObjectKeyOrEnd),
1701                },
1702                None => return perr!(self, EofWhileParsing),
1703                _ => return perr!(self, ExpectedObjectCommaOrEnd),
1704            };
1705        }
1706    }
1707
1708    #[inline(always)]
1709    fn get_from_array_checked(&mut self, index: usize) -> Result<()> {
1710        let mut count = index;
1711        match self.skip_space() {
1712            Some(b'[') => {}
1713            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
1714            None => return perr!(self, EofWhileParsing),
1715        }
1716
1717        match self.skip_space_peek() {
1718            Some(b']') => return perr!(self, GetInEmptyArray),
1719            Some(_) => {}
1720            None => return perr!(self, EofWhileParsing),
1721        }
1722
1723        while count > 0 {
1724            self.skip_one()?;
1725
1726            match self.skip_space() {
1727                Some(b']') => return perr!(self, GetIndexOutOfArray),
1728                Some(b',') => {}
1729                Some(_) => return perr!(self, ExpectedArrayCommaOrEnd),
1730                None => return perr!(self, EofWhileParsing),
1731            }
1732
1733            count -= 1;
1734            match self.skip_space_peek() {
1735                Some(_) if count == 0 => return Ok(()),
1736                None => return perr!(self, EofWhileParsing),
1737                _ => continue,
1738            }
1739        }
1740
1741        // index is 0, just skipped '[' and return
1742        Ok(())
1743    }
1744
1745    // get_from_array will make reader at the position after target index in JSON array.
1746    #[inline(always)]
1747    fn get_from_array(&mut self, index: usize) -> Result<()> {
1748        let mut count = index;
1749        match self.skip_space() {
1750            Some(b'[') => {}
1751            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
1752            None => return perr!(self, EofWhileParsing),
1753        }
1754        while count > 0 {
1755            // skip object,array,string at first
1756            match self.skip_space() {
1757                Some(b'{') => self.skip_container(b'{', b'}')?,
1758                Some(b'[') => self.skip_container(b'[', b']')?,
1759                Some(b'"') => unsafe { self.skip_string_unchecked2() }?,
1760                Some(b']') => return perr!(self, GetInEmptyArray),
1761                None => return perr!(self, EofWhileParsing),
1762                _ => {}
1763            };
1764
1765            // optimize: direct find the next token
1766            match self.get_next_token([b']', b','], 1) {
1767                Some(b']') => return perr!(self, GetIndexOutOfArray),
1768                Some(b',') => {
1769                    count -= 1;
1770                    continue;
1771                }
1772                None => return perr!(self, EofWhileParsing),
1773                Some(_) => unreachable!(),
1774            }
1775        }
1776        // special case: `[]` will report error when skip one later.
1777        Ok(())
1778    }
1779
1780    pub(crate) fn get_from_with_iter_unchecked<P: IntoIterator>(
1781        &mut self,
1782        path: P,
1783    ) -> Result<(&'de [u8], ParseStatus)>
1784    where
1785        P::Item: Index,
1786    {
1787        // temp buf reused when parsing each escaped key
1788        let mut temp_buf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1789        for jp in path.into_iter() {
1790            if let Some(key) = jp.as_key() {
1791                self.get_from_object(key, &mut temp_buf)
1792            } else if let Some(index) = jp.as_index() {
1793                self.get_from_array(index)
1794            } else {
1795                unreachable!();
1796            }?;
1797        }
1798        self.skip_one()
1799    }
1800
1801    pub(crate) fn get_from_with_iter<P: IntoIterator>(
1802        &mut self,
1803        path: P,
1804    ) -> Result<(&'de [u8], ParseStatus)>
1805    where
1806        P::Item: Index,
1807    {
1808        // temp buf reused when parsing each escaped key
1809        let mut temp_buf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1810        for jp in path.into_iter() {
1811            if let Some(key) = jp.as_key() {
1812                self.get_from_object_checked(key, &mut temp_buf)
1813            } else if let Some(index) = jp.as_index() {
1814                self.get_from_array_checked(index)
1815            } else {
1816                unreachable!();
1817            }?;
1818        }
1819        self.skip_one()
1820    }
1821
1822    fn get_many_rec(
1823        &mut self,
1824        node: &PointerTreeNode,
1825        out: &mut Vec<Option<LazyValue<'de>>>,
1826        strbuf: &mut Vec<u8>,
1827        remain: &mut usize,
1828        is_safe: bool,
1829    ) -> Result<()> {
1830        // all path has parsed
1831        if *remain == 0 {
1832            return Ok(());
1833        }
1834
1835        // skip the leading space
1836        let ch = self.skip_space_peek();
1837        if ch.is_none() {
1838            return perr!(self, EofWhileParsing);
1839        }
1840
1841        // need write to out, record the start position
1842        let start = self.read.index();
1843        let slice: &'de [u8];
1844
1845        let mut status = ParseStatus::None;
1846        match &node.children {
1847            PointerTreeInner::Empty => {
1848                status = self.skip_one()?.1;
1849            }
1850            PointerTreeInner::Index(midxs) => {
1851                if is_safe {
1852                    self.get_many_index(midxs, strbuf, out, remain)?
1853                } else {
1854                    self.get_many_index_unchecked(midxs, strbuf, out, remain)?
1855                }
1856            }
1857            PointerTreeInner::Key(mkeys) => {
1858                if is_safe {
1859                    self.get_many_keys(mkeys, strbuf, out, remain)?
1860                } else {
1861                    self.get_many_keys_unchecked(mkeys, strbuf, out, remain)?
1862                }
1863            }
1864        };
1865
1866        if !node.order.is_empty() {
1867            slice = self.read.slice_unchecked(start, self.read.index());
1868            let lv = LazyValue::new(slice.into(), status.into());
1869            for p in &node.order {
1870                out[*p] = Some(lv.clone());
1871            }
1872            *remain -= node.order.len();
1873        }
1874        Ok(())
1875    }
1876
1877    #[allow(clippy::mutable_key_type)]
1878    fn get_many_keys_unchecked(
1879        &mut self,
1880        mkeys: &MultiKey,
1881        strbuf: &mut Vec<u8>,
1882        out: &mut Vec<Option<LazyValue<'de>>>,
1883        remain: &mut usize,
1884    ) -> Result<()> {
1885        debug_assert!(strbuf.is_empty());
1886        match self.skip_space() {
1887            Some(b'{') => {}
1888            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1889            None => return perr!(self, EofWhileParsing),
1890        }
1891
1892        // deal with the empty object
1893        match self.get_next_token([b'"', b'}'], 1) {
1894            Some(b'"') => {}
1895            Some(b'}') => return perr!(self, GetInEmptyObject),
1896            None => return perr!(self, EofWhileParsing),
1897            Some(_) => unreachable!(),
1898        }
1899
1900        loop {
1901            let key = self.parse_str(strbuf)?;
1902            self.parse_object_clo()?;
1903            if let Some(val) = mkeys.get(key.deref()) {
1904                self.get_many_rec(val, out, strbuf, remain, false)?;
1905                if *remain == 0 {
1906                    break;
1907                }
1908            } else {
1909                // skip object,array,string at first
1910                match self.skip_space() {
1911                    Some(b'{') => self.skip_container(b'{', b'}')?,
1912                    Some(b'[') => self.skip_container(b'[', b']')?,
1913                    Some(b'"') => unsafe { self.skip_string_unchecked2() }?,
1914                    None => return perr!(self, EofWhileParsing),
1915                    _ => {}
1916                };
1917            }
1918
1919            // optimize: direct find the next quote of key. or object ending
1920            match self.get_next_token([b'"', b'}'], 1) {
1921                Some(b'"') => {}
1922                Some(b'}') => break,
1923                None => return perr!(self, EofWhileParsing),
1924                Some(_) => unreachable!(),
1925            }
1926        }
1927
1928        Ok(())
1929    }
1930
1931    #[allow(clippy::mutable_key_type)]
1932    fn get_many_keys(
1933        &mut self,
1934        mkeys: &MultiKey,
1935        strbuf: &mut Vec<u8>,
1936        out: &mut Vec<Option<LazyValue<'de>>>,
1937        remain: &mut usize,
1938    ) -> Result<()> {
1939        debug_assert!(strbuf.is_empty());
1940        match self.skip_space() {
1941            Some(b'{') => {}
1942            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1943            None => return perr!(self, EofWhileParsing),
1944        }
1945
1946        // deal with the empty object
1947        match self.skip_space() {
1948            Some(b'"') => {}
1949            Some(b'}') => return perr!(self, GetInEmptyObject),
1950            _ => {
1951                return perr!(self, ExpectObjectKeyOrEnd);
1952            }
1953        }
1954
1955        loop {
1956            let key = self.parse_str(strbuf)?;
1957            self.parse_object_clo()?;
1958            if let Some(val) = mkeys.get(key.deref()) {
1959                // parse the child point tree
1960                self.get_many_rec(val, out, strbuf, remain, true)?;
1961                if *remain == 0 {
1962                    break;
1963                }
1964            } else {
1965                self.skip_one()?;
1966            }
1967
1968            match self.skip_space() {
1969                Some(b',') if self.skip_space() == Some(b'"') => continue,
1970                Some(b',') => return perr!(self, ExpectObjectKeyOrEnd),
1971                Some(b'}') => break,
1972                Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
1973                None => return perr!(self, EofWhileParsing),
1974            }
1975        }
1976
1977        Ok(())
1978    }
1979
1980    #[cfg(test)]
1981    #[allow(dead_code)]
1982    pub(crate) fn remain_str(&self) -> &'de str {
1983        as_str(self.remain_u8_slice())
1984    }
1985
1986    #[cfg(test)]
1987    #[allow(dead_code)]
1988    pub(crate) fn remain_u8_slice(&self) -> &'de [u8] {
1989        let reader = &self.read;
1990        let start = reader.index();
1991        reader.slice_unchecked(start, start + reader.remain())
1992    }
1993
1994    fn get_many_index_unchecked(
1995        &mut self,
1996        midx: &MultiIndex,
1997        strbuf: &mut Vec<u8>,
1998        out: &mut Vec<Option<LazyValue<'de>>>,
1999        remain: &mut usize,
2000    ) -> Result<()> {
2001        match self.skip_space() {
2002            Some(b'[') => {}
2003            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
2004            None => return perr!(self, EofWhileParsing),
2005        }
2006        let mut index = 0;
2007        let mut visited = 0;
2008
2009        match self.skip_space_peek() {
2010            Some(b']') => return perr!(self, GetInEmptyArray),
2011            None => return perr!(self, EofWhileParsing),
2012            _ => {}
2013        };
2014
2015        loop {
2016            if let Some(val) = midx.get(&index) {
2017                self.get_many_rec(val, out, strbuf, remain, false)?;
2018                visited += 1;
2019                if *remain == 0 {
2020                    break;
2021                }
2022            } else {
2023                // skip object,array,string at first
2024                match self.skip_space() {
2025                    Some(b'{') => self.skip_container(b'{', b'}')?,
2026                    Some(b'[') => self.skip_container(b'[', b']')?,
2027                    Some(b'"') => unsafe { self.skip_string_unchecked2() }?,
2028                    None => return perr!(self, EofWhileParsing),
2029                    _ => {}
2030                };
2031            }
2032
2033            // optimize: direct find the next token
2034            match self.get_next_token([b']', b','], 1) {
2035                Some(b']') => break,
2036                Some(b',') => {
2037                    index += 1;
2038                    continue;
2039                }
2040                None => return perr!(self, EofWhileParsing),
2041                Some(_) => unreachable!(),
2042            }
2043        }
2044
2045        // check whether remaining unknown keys
2046        if visited < midx.len() {
2047            perr!(self, GetIndexOutOfArray)
2048        } else {
2049            Ok(())
2050        }
2051    }
2052
2053    fn get_many_index(
2054        &mut self,
2055        midx: &MultiIndex,
2056        strbuf: &mut Vec<u8>,
2057        out: &mut Vec<Option<LazyValue<'de>>>,
2058        remain: &mut usize,
2059    ) -> Result<()> {
2060        match self.skip_space() {
2061            Some(b'[') => {}
2062            Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
2063            None => return perr!(self, EofWhileParsing),
2064        }
2065        let mut index = 0;
2066        let mut visited = 0;
2067
2068        // check empty array
2069        match self.skip_space_peek() {
2070            Some(b']') => return perr!(self, GetInEmptyArray),
2071            Some(_) => {}
2072            None => return perr!(self, EofWhileParsing),
2073        }
2074
2075        loop {
2076            if let Some(val) = midx.get(&index) {
2077                self.get_many_rec(val, out, strbuf, remain, true)?;
2078                visited += 1;
2079                if *remain == 0 {
2080                    break;
2081                }
2082            } else {
2083                self.skip_one()?;
2084            }
2085
2086            match self.skip_space() {
2087                Some(b']') => break,
2088                Some(b',') => {
2089                    index += 1;
2090                    continue;
2091                }
2092                Some(_) => return perr!(self, ExpectedArrayCommaOrEnd),
2093                None => return perr!(self, EofWhileParsing),
2094            }
2095        }
2096
2097        // check whether remaining unknown keys
2098        if visited < midx.len() {
2099            perr!(self, GetIndexOutOfArray)
2100        } else {
2101            Ok(())
2102        }
2103    }
2104
2105    pub(crate) fn get_many(
2106        &mut self,
2107        tree: &PointerTree,
2108        is_safe: bool,
2109    ) -> Result<Vec<Option<LazyValue<'de>>>> {
2110        let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
2111        let mut remain = tree.size();
2112        let mut out: Vec<Option<LazyValue<'de>>> = Vec::with_capacity(tree.size());
2113        out.resize(tree.size(), Option::default());
2114        let cur = &tree.root;
2115        self.get_many_rec(cur, &mut out, &mut strbuf, &mut remain, is_safe)?;
2116        Ok(out)
2117    }
2118
2119    #[cold]
2120    pub fn peek_invalid_type(&mut self, peek: u8, exp: &dyn Expected) -> Error {
2121        let err = match peek {
2122            b'n' => {
2123                if let Err(err) = self.parse_literal("ull") {
2124                    return err;
2125                }
2126                de::Error::invalid_type(Unexpected::Unit, exp)
2127            }
2128            b't' => {
2129                if let Err(err) = self.parse_literal("rue") {
2130                    return err;
2131                }
2132                de::Error::invalid_type(Unexpected::Bool(true), exp)
2133            }
2134            b'f' => {
2135                if let Err(err) = self.parse_literal("alse") {
2136                    return err;
2137                }
2138                de::Error::invalid_type(Unexpected::Bool(false), exp)
2139            }
2140            c @ b'-' | c @ b'0'..=b'9' => match self.parse_number(c) {
2141                Ok(n) => invalid_type_number(&n, exp),
2142                Err(err) => return err,
2143            },
2144            b'"' => {
2145                let mut scratch = Vec::new();
2146                match self.parse_str(&mut scratch) {
2147                    Ok(s) if std::str::from_utf8(s.as_bytes()).is_ok() => {
2148                        de::Error::invalid_type(Unexpected::Str(&s), exp)
2149                    }
2150                    Ok(s) => de::Error::invalid_type(Unexpected::Bytes(s.as_bytes()), exp),
2151                    Err(err) => return err,
2152                }
2153            }
2154            // for correctness, we will parse the whole object or array.
2155            b'[' => {
2156                self.read.backward(1);
2157
2158                match self.skip_one() {
2159                    Ok(_) => de::Error::invalid_type(Unexpected::Seq, exp),
2160                    Err(err) => return err,
2161                }
2162            }
2163            b'{' => {
2164                self.read.backward(1);
2165                match self.skip_one() {
2166                    Ok(_) => de::Error::invalid_type(Unexpected::Map, exp),
2167                    Err(err) => return err,
2168                }
2169            }
2170            _ => self.error(ErrorCode::InvalidJsonValue),
2171        };
2172        self.fix_position(err)
2173    }
2174}
2175
2176impl<'de, R> Parser<R>
2177where
2178    R: Reader<'de>,
2179{
2180    pub fn get_by_schema(&mut self, schema: &mut crate::Value) -> Result<()> {
2181        if !schema.is_object() {
2182            return perr!(
2183                self,
2184                Message(std::borrow::Cow::Borrowed("The schema must be an object"))
2185            );
2186        }
2187
2188        let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
2189        self.get_by_schema_rec(schema, &mut strbuf)
2190    }
2191
2192    fn get_by_schema_rec(&mut self, schema: &mut crate::Value, strbuf: &mut Vec<u8>) -> Result<()> {
2193        let ch = self.skip_space_peek();
2194        if ch.is_none() {
2195            return perr!(self, EofWhileParsing);
2196        }
2197
2198        let mut should_replace = true;
2199        let start = self.read.index();
2200
2201        match (schema.as_object_mut(), ch) {
2202            (Some(object), Some(b'{')) => {
2203                let mut key_values = HashMap::new();
2204                for (key, value) in object.iter_mut() {
2205                    key_values.insert(key, value);
2206                }
2207
2208                // We should replace the schema object if the object is empty
2209                should_replace = key_values.is_empty();
2210                if should_replace {
2211                    self.skip_one()?;
2212                } else {
2213                    self.read.eat(1);
2214                    match self.skip_space() {
2215                        Some(b'"') => {}
2216                        Some(b'}') => return Ok(()),
2217                        _ => {
2218                            return perr!(self, ExpectObjectKeyOrEnd);
2219                        }
2220                    }
2221
2222                    loop {
2223                        let key = self.parse_str(strbuf)?;
2224                        self.parse_object_clo()?;
2225                        if let Some(val) = key_values.get_mut(key.deref()) {
2226                            self.get_by_schema_rec(val, strbuf)?;
2227                        } else {
2228                            self.skip_one()?;
2229                        }
2230
2231                        match self.skip_space() {
2232                            Some(b',') => match self.skip_space() {
2233                                Some(b'"') => continue,
2234                                _ => return perr!(self, ExpectObjectKeyOrEnd),
2235                            },
2236                            Some(b'}') => break,
2237                            Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
2238                            None => return perr!(self, EofWhileParsing),
2239                        }
2240                    }
2241                }
2242            }
2243            _ => {
2244                self.skip_one()?;
2245            }
2246        }
2247
2248        let end = self.read.index();
2249        if should_replace && start < end {
2250            let slice = self.read.slice_unchecked(start, end);
2251            *schema = crate::from_slice(slice)?;
2252        }
2253        Ok(())
2254    }
2255}