1use std::{
2 borrow::Cow,
3 collections::HashMap,
4 fmt::Debug,
5 num::NonZeroU8,
6 ops::Deref,
7 slice::{from_raw_parts, from_raw_parts_mut},
8 str::{from_utf8, from_utf8_unchecked},
9};
10
11use faststr::FastStr;
12use serde::de::{self, Expected, Unexpected};
13use sonic_number::{parse_number, ParserNumber};
14#[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
15use sonic_simd::bits::NeonBits;
16use sonic_simd::{i8x32, m8x32, u8x32, u8x64, Mask, Simd};
17
18use crate::{
19 config::DeserializeCfg,
20 error::{
21 Error,
22 ErrorCode::{self, *},
23 Result,
24 },
25 index::Index,
26 lazyvalue::value::HasEsc,
27 pointer::{
28 tree::{MultiIndex, MultiKey, PointerTreeInner, PointerTreeNode},
29 PointerTree,
30 },
31 reader::Reader,
32 serde::de::invalid_type_number,
33 util::{
34 arch::{get_nonspace_bits, prefix_xor},
35 string::*,
36 unicode::{codepoint_to_utf8, hex_to_u32_nocheck},
37 },
38 value::visitor::JsonVisitor,
39 JsonValueMutTrait, JsonValueTrait, LazyValue, Number, OwnedLazyValue,
40};
41
42pub enum Reference<'b, 'c, T>
44where
45 T: ?Sized + 'static,
46{
47 Borrowed(&'b T),
48 Copied(&'c T),
49}
50
51impl<'b, 'c> From<Reference<'b, 'c, str>> for Cow<'b, str> {
52 fn from(value: Reference<'b, 'c, str>) -> Self {
53 match value {
54 Reference::Borrowed(b) => Cow::Owned(b.to_string()),
55 Reference::Copied(c) => Cow::Owned(c.to_string()),
56 }
57 }
58}
59
60impl<'b, 'c, T: Debug + ?Sized + 'static> Debug for Reference<'b, 'c, T> {
61 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62 match self {
63 Self::Borrowed(c) => write!(f, "Borrowed({c:?})"),
64 Self::Copied(c) => write!(f, "Copied({c:?})"),
65 }
66 }
67}
68
69impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
70where
71 T: ?Sized + 'static,
72{
73 type Target = T;
74
75 fn deref(&self) -> &Self::Target {
76 match *self {
77 Reference::Borrowed(b) => b,
78 Reference::Copied(c) => c,
79 }
80 }
81}
82
83pub(crate) enum ParsedSlice<'b, 'c> {
84 Borrowed {
85 slice: &'b [u8],
86 buf: &'c mut Vec<u8>,
87 },
88 Copied(&'c mut Vec<u8>),
89}
90
91impl<'b, 'c> Deref for ParsedSlice<'b, 'c> {
92 type Target = [u8];
93
94 fn deref(&self) -> &Self::Target {
95 match self {
96 ParsedSlice::Borrowed { slice, buf: _ } => slice,
97 ParsedSlice::Copied(c) => c.as_slice(),
98 }
99 }
100}
101
102pub(crate) const DEFAULT_KEY_BUF_CAPACITY: usize = 128;
103pub(crate) fn as_str(data: &[u8]) -> &str {
104 debug_assert!(from_utf8(data).is_ok(), "invalid utf-8 in as_str");
105 unsafe { from_utf8_unchecked(data) }
106}
107
108macro_rules! impl_get_escaped_branchless {
109 ($name:ident, $ty:ty, $even_bits:expr) => {
110 #[inline(always)]
111 fn $name(prev_escaped: &mut $ty, backslash: $ty) -> $ty {
112 const EVEN_BITS: $ty = $even_bits;
113 let backslash = backslash & (!*prev_escaped);
114 let follows_escape = (backslash << 1) | *prev_escaped;
115 let odd_sequence_starts = backslash & !EVEN_BITS & !follows_escape;
116 let (sequences_starting_on_even_bits, overflow) =
117 odd_sequence_starts.overflowing_add(backslash);
118 *prev_escaped = overflow as $ty;
119 let invert_mask = sequences_starting_on_even_bits << 1;
120 (EVEN_BITS ^ invert_mask) & follows_escape
121 }
122 };
123}
124
125impl_get_escaped_branchless!(get_escaped_branchless_u32, u32, 0x5555_5555);
126impl_get_escaped_branchless!(get_escaped_branchless_u64, u64, 0x5555_5555_5555_5555);
127
128macro_rules! perr {
129 ($self:ident, $err:expr) => {{
130 Err($self.error($err))
131 }};
132}
133
134macro_rules! check_visit {
135 ($self:ident, $e:expr $(,)?) => {
136 if !($e) {
137 perr!($self, UnexpectedVisitType)
138 } else {
139 Ok(())
140 }
141 };
142}
143
144#[inline(always)]
145pub(crate) fn is_whitespace(ch: u8) -> bool {
146 const SPACE_MASK: u64 = (1u64 << b' ') | (1u64 << b'\r') | (1u64 << b'\n') | (1u64 << b'\t');
148 1u64.checked_shl(ch as u32)
149 .is_some_and(|v| v & SPACE_MASK != 0)
150}
151
152#[inline(always)]
153fn get_string_bits(data: &[u8; 64], prev_instring: &mut u64, prev_escaped: &mut u64) -> u64 {
154 let v = unsafe { u8x64::from_slice_unaligned_unchecked(data) };
155
156 let bs_bits = (v.eq(&u8x64::splat(b'\\'))).bitmask();
157 let escaped: u64;
158 if bs_bits != 0 {
159 escaped = get_escaped_branchless_u64(prev_escaped, bs_bits);
160 } else {
161 escaped = *prev_escaped;
162 *prev_escaped = 0;
163 }
164 let quote_bits = (v.eq(&u8x64::splat(b'"'))).bitmask() & !escaped;
165 let in_string = unsafe { prefix_xor(quote_bits) ^ *prev_instring };
166 *prev_instring = (in_string as i64 >> 63) as u64;
167 in_string
168}
169
170#[inline(always)]
171fn skip_container_loop(
172 input: &[u8; 64], prev_instring: &mut u64, prev_escaped: &mut u64,
175 lbrace_num: &mut usize,
176 rbrace_num: &mut usize,
177 left: u8,
178 right: u8,
179) -> Option<NonZeroU8> {
180 let instring = get_string_bits(input, prev_instring, prev_escaped);
182 let v = unsafe { u8x64::from_slice_unaligned_unchecked(input) };
185 let last_lbrace_num = *lbrace_num;
186 let mut rbrace = (v.eq(&u8x64::splat(right))).bitmask() & !instring;
187 let lbrace = (v.eq(&u8x64::splat(left))).bitmask() & !instring;
188 while rbrace != 0 {
189 *rbrace_num += 1;
190 *lbrace_num = last_lbrace_num + (lbrace & (rbrace - 1)).count_ones() as usize;
191 let is_closed = lbrace_num < rbrace_num;
192 if is_closed {
193 debug_assert_eq!(*rbrace_num, *lbrace_num + 1);
194 let cnt = rbrace.trailing_zeros() + 1;
195 return unsafe { Some(NonZeroU8::new_unchecked(cnt as u8)) };
196 }
197 rbrace &= rbrace - 1;
198 }
199 *lbrace_num = last_lbrace_num + lbrace.count_ones() as usize;
200 None
201}
202
203pub(crate) struct Pair<'de> {
204 pub key: Cow<'de, str>,
205 pub val: &'de [u8],
206 pub status: ParseStatus,
207}
208
209pub struct Parser<R> {
210 pub read: R,
211 error_index: usize, nospace_bits: u64, nospace_start: isize, pub(crate) cfg: DeserializeCfg,
215}
216
217#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219pub enum ParseStatus {
220 None,
221 HasEscaped,
222}
223
224impl From<ParseStatus> for HasEsc {
225 fn from(value: ParseStatus) -> Self {
226 match value {
227 ParseStatus::None => HasEsc::None,
228 ParseStatus::HasEscaped => HasEsc::Yes,
229 }
230 }
231}
232
233impl<'de, R> Parser<R>
234where
235 R: Reader<'de>,
236{
237 pub fn new(read: R) -> Self {
238 Self {
239 read,
240 error_index: usize::MAX,
241 nospace_bits: 0,
242 nospace_start: -128,
243 cfg: DeserializeCfg::default(),
244 }
245 }
246
247 pub fn offset(&self) -> usize {
248 self.read.index()
249 }
250
251 pub fn utf8_lossy(mut self) -> Self {
254 self.cfg.utf8_lossy = true;
255 self
256 }
257
258 pub(crate) fn with_config(mut self, cfg: DeserializeCfg) -> Self {
259 self.cfg = cfg;
260 self
261 }
262
263 #[inline(always)]
264 fn error_index(&self) -> usize {
265 std::cmp::min(self.error_index, self.read.index().saturating_sub(1))
268 }
269
270 #[cold]
272 pub fn error(&self, mut reason: ErrorCode) -> Error {
273 if let Err(e) = self.read.check_utf8_final() {
277 return e;
278 }
279
280 let mut index = self.error_index();
282 let len = self.read.as_u8_slice().len();
283 if index > len {
284 reason = EofWhileParsing;
285 index = len;
286 }
287 Error::syntax(reason, self.read.origin_input(), index)
288 }
289
290 #[cold]
292 pub(crate) fn fix_position(&self, err: Error) -> Error {
293 if err.line() == 0 {
294 self.error(err.error_code())
295 } else {
296 err
297 }
298 }
299
300 #[inline(always)]
301 pub fn parse_number(&mut self, first: u8) -> Result<ParserNumber> {
302 let reader = &mut self.read;
303 let neg = first == b'-';
304 let mut now = reader.index() - (!neg as usize);
305 let data = reader.as_u8_slice();
306 let ret = parse_number(data, &mut now, neg);
307 reader.set_index(now);
308 ret.map_err(|err| self.error(err.into()))
309 }
310
311 #[inline(always)]
315 fn parse_string_visit<V>(&mut self, vis: &mut V, strbuf: Option<&mut Vec<u8>>) -> Result<()>
316 where
317 V: JsonVisitor<'de>,
318 {
319 if let Some(strbuf) = strbuf {
320 let rs = self.parse_str(strbuf)?;
321 check_visit!(self, vis.visit_str(rs.as_ref()))
322 } else {
323 unsafe {
324 let mut src = self.read.cur_ptr();
325 let start = self.read.cur_ptr();
326 let cnt = parse_string_inplace(&mut src, self.cfg.utf8_lossy)
327 .map_err(|e| self.error(e))?;
328 self.read.set_ptr(src);
329 let slice = from_raw_parts(start, cnt);
330 let s = from_utf8_unchecked(slice);
331 check_visit!(self, vis.visit_borrowed_str(s))
332 }
333 }
334 }
335
336 #[inline(always)]
338 fn parse_number_visit<V>(&mut self, first: u8, vis: &mut V, inplace: bool) -> Result<()>
339 where
340 V: JsonVisitor<'de>,
341 {
342 if self.cfg.use_rawnumber {
343 let start = self.read.index() - 1;
344 self.skip_number(first)?;
345 let slice = self.read.slice_unchecked(start, self.read.index());
346 let ok = if inplace {
347 vis.visit_borrowed_raw_number(as_str(slice))
348 } else {
349 vis.visit_raw_number(as_str(slice))
350 };
351 check_visit!(self, ok)
352 } else {
353 let ok = match self.parse_number(first)? {
354 ParserNumber::Float(f) => vis.visit_f64(f),
355 ParserNumber::Unsigned(f) => vis.visit_u64(f),
356 ParserNumber::Signed(f) => vis.visit_i64(f),
357 };
358 check_visit!(self, ok)
359 }
360 }
361
362 fn parse_array<V>(&mut self, vis: &mut V, mut strbuf: Option<&mut Vec<u8>>) -> Result<()>
363 where
364 V: JsonVisitor<'de>,
365 {
366 check_visit!(self, vis.visit_array_start(0))?;
367
368 let mut first = match self.skip_space() {
369 Some(b']') => return check_visit!(self, vis.visit_array_end(0)),
370 first => first,
371 };
372
373 let mut count = 0;
374 loop {
375 self.dispatch_value(first, vis, &mut strbuf)?;
376 count += 1;
377 first = match self.skip_space() {
378 Some(b']') => return check_visit!(self, vis.visit_array_end(count)),
379 Some(b',') => self.skip_space(),
380 _ => return perr!(self, ExpectedArrayCommaOrEnd),
381 };
382 }
383 }
384
385 fn parse_object<V>(&mut self, vis: &mut V, mut strbuf: Option<&mut Vec<u8>>) -> Result<()>
386 where
387 V: JsonVisitor<'de>,
388 {
389 let mut count: usize = 0;
390 check_visit!(self, vis.visit_object_start(0))?;
391 match self.skip_space() {
392 Some(b'}') => return check_visit!(self, vis.visit_object_end(0)),
393 Some(b'"') => {}
394 _ => return perr!(self, ExpectObjectKeyOrEnd),
395 }
396
397 loop {
398 self.parse_string_visit(vis, strbuf.as_deref_mut())?;
399 self.parse_object_clo()?;
400 let next = self.skip_space();
401 self.dispatch_value(next, vis, &mut strbuf)?;
402 count += 1;
403 match self.skip_space() {
404 Some(b'}') => return check_visit!(self, vis.visit_object_end(count)),
405 Some(b',') => match self.skip_space() {
406 Some(b'"') => continue,
407 _ => return perr!(self, ExpectObjectKeyOrEnd),
408 },
409 _ => return perr!(self, ExpectedArrayCommaOrEnd),
410 }
411 }
412 }
413
414 #[inline(always)]
418 fn dispatch_value<V>(
419 &mut self,
420 ch: Option<u8>,
421 vis: &mut V,
422 strbuf: &mut Option<&mut Vec<u8>>,
423 ) -> Result<()>
424 where
425 V: JsonVisitor<'de>,
426 {
427 match ch {
428 Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_visit(c, vis, strbuf.is_none()),
429 Some(b'"') => self.parse_string_visit(vis, strbuf.as_deref_mut()),
430 Some(b'{') => self.parse_object(vis, strbuf.as_deref_mut()),
431 Some(b'[') => self.parse_array(vis, strbuf.as_deref_mut()),
432 Some(first) => self.parse_literal_visit(first, vis),
433 None => perr!(self, EofWhileParsing),
434 }
435 }
436
437 #[inline(always)]
438 fn parse_literal_visit<V>(&mut self, first: u8, vis: &mut V) -> Result<()>
439 where
440 V: JsonVisitor<'de>,
441 {
442 let literal = match first {
443 b't' => "rue",
444 b'f' => "alse",
445 b'n' => "ull",
446 _ => return perr!(self, InvalidJsonValue),
447 };
448
449 let reader = &mut self.read;
450 if let Some(chunk) = reader.next_n(literal.len()) {
451 if chunk != literal.as_bytes() {
452 return perr!(self, InvalidLiteral);
453 }
454
455 let ok = match first {
456 b't' => vis.visit_bool(true),
457 b'f' => vis.visit_bool(false),
458 b'n' => vis.visit_null(),
459 _ => unreachable!(),
460 };
461 check_visit!(self, ok)
462 } else {
463 perr!(self, EofWhileParsing)
464 }
465 }
466
467 #[inline]
468 pub(crate) fn parse_array_elem_lazy(
469 &mut self,
470 first: &mut bool,
471 check: bool,
472 ) -> Result<Option<(&'de [u8], ParseStatus)>> {
473 if *first && self.skip_space() != Some(b'[') {
474 return perr!(self, ExpectedArrayStart);
475 }
476 match self.skip_space_peek() {
477 Some(b']') => {
478 self.read.eat(1);
479 return Ok(None);
480 }
481 Some(b',') if !(*first) => {
482 self.read.eat(1);
483 }
484 Some(_) if *first => {
485 *first = false;
486 }
487 _ => return perr!(self, ExpectedArrayCommaOrEnd),
488 };
489 let (raw, status) = self.skip_one(check)?;
490 Ok(Some((raw, status)))
491 }
492
493 #[inline]
494 pub(crate) fn parse_entry_lazy(
495 &mut self,
496 strbuf: &mut Vec<u8>,
497 first: &mut bool,
498 check: bool,
499 ) -> Result<Option<Pair<'de>>> {
500 if *first && self.skip_space() != Some(b'{') {
501 return perr!(self, ExpectedObjectStart);
502 }
503 match self.skip_space() {
504 Some(b'}') => return Ok(None),
505 Some(b'"') if *first => *first = false,
506 Some(b',') if !*first => {
507 if self.skip_space() != Some(b'"') {
508 return perr!(self, ExpectObjectKeyOrEnd);
509 }
510 }
511 _ => return perr!(self, ExpectedObjectCommaOrEnd),
512 }
513
514 let parsed = self.parse_str(strbuf)?;
515 self.parse_object_clo()?;
516 let (raw, status) = self.skip_one(check)?;
517
518 Ok(Some(Pair {
519 key: parsed.into(),
520 val: raw,
521 status,
522 }))
523 }
524
525 #[inline(always)]
526 pub(crate) fn match_literal(&mut self, literal: &'static str) -> Result<bool> {
527 if let Some(chunk) = self.read.next_n(literal.len()) {
528 if chunk != literal.as_bytes() {
529 perr!(self, InvalidLiteral)
530 } else {
531 Ok(true)
532 }
533 } else {
534 perr!(self, EofWhileParsing)
535 }
536 }
537
538 #[inline(always)]
539 pub(crate) fn get_owned_lazyvalue(&mut self, strict: bool) -> Result<OwnedLazyValue> {
540 let c = self.skip_space();
541 let start = match c {
542 Some(b'"') => {
543 let start = self.read.index() - 1;
544 match self.skip_string()? {
545 ParseStatus::None => {
546 let slice = self.read.slice_unchecked(start, self.read.index());
547 let raw = self.read.slice_ref(slice).as_faststr();
548 return Ok(OwnedLazyValue::from_non_esc_str(raw));
549 }
550 ParseStatus::HasEscaped => {}
551 }
552 start
553 }
554 Some(b't') if self.match_literal("rue")? => return Ok(true.into()),
555 Some(b'f') if self.match_literal("alse")? => return Ok(false.into()),
556 Some(b'n') if self.match_literal("ull")? => return Ok(().into()),
557 None => return perr!(self, EofWhileParsing),
558 _ => {
559 let start = self.read.index() - 1;
560 self.read.backward(1);
561 self.skip_one(strict)?;
562 start
563 }
564 };
565 let end = self.read.index();
566 let sub = self.read.slice_unchecked(start, end);
567 let raw = self.read.slice_ref(sub).as_faststr();
568 Ok(OwnedLazyValue::new(raw.into(), HasEsc::Possible))
569 }
570
571 #[inline(always)]
572 fn parse_faststr(&mut self, strbuf: &mut Vec<u8>) -> Result<FastStr> {
573 match self.parse_str(strbuf)? {
574 Reference::Borrowed(s) => {
575 return Ok(self.read.slice_ref(s.as_bytes()).as_faststr());
576 }
577 Reference::Copied(s) => Ok(FastStr::new(s)),
578 }
579 }
580
581 #[inline(always)]
582 pub(crate) fn load_owned_lazyvalue(&mut self, strbuf: &mut Vec<u8>) -> Result<OwnedLazyValue> {
583 match self.skip_space() {
584 Some(c @ b'-' | c @ b'0'..=b'9') => {
585 let num: Number = self.parse_number(c)?.into();
586 Ok(OwnedLazyValue::from(num))
587 }
588 Some(b'"') => match self.parse_str(strbuf)? {
589 Reference::Borrowed(s) => {
590 let raw = self.read.slice_ref(s.as_bytes()).as_faststr();
591 Ok(OwnedLazyValue::from_faststr(raw))
592 }
593 Reference::Copied(s) => {
594 let raw = FastStr::new(s);
595 Ok(OwnedLazyValue::from_faststr(raw))
596 }
597 },
598 Some(b'{') => {
599 match self.skip_space() {
601 Some(b'}') => return Ok(Vec::<(FastStr, OwnedLazyValue)>::new().into()),
602 Some(b'"') => {}
603 _ => return perr!(self, ExpectObjectKeyOrEnd),
604 }
605
606 let mut vec = Vec::with_capacity(32);
608 loop {
609 let key = self.parse_faststr(strbuf)?;
610 self.parse_object_clo()?;
611 let olv = self.get_owned_lazyvalue(false)?;
612 vec.push((key, olv));
613 match self.skip_space() {
614 Some(b'}') => return Ok(vec.into()),
615 Some(b',') => match self.skip_space() {
616 Some(b'"') => continue,
617 _ => return perr!(self, ExpectObjectKeyOrEnd),
618 },
619 _ => return perr!(self, ExpectedArrayCommaOrEnd),
620 }
621 }
622 }
623 Some(b'[') => {
624 if let Some(b']') = self.skip_space() {
625 return Ok(Vec::<OwnedLazyValue>::new().into());
626 }
627
628 let mut vec = Vec::with_capacity(32);
629 self.read.backward(1);
630 loop {
631 vec.push(self.get_owned_lazyvalue(false)?);
632 match self.skip_space() {
633 Some(b']') => return Ok(vec.into()),
634 Some(b',') => {}
635 _ => return perr!(self, ExpectedArrayCommaOrEnd),
636 };
637 }
638 }
639 _ => perr!(self, InvalidJsonValue),
640 }
641 }
642
643 #[inline(always)]
644 pub(crate) fn parse_dom<V>(
645 &mut self,
646 vis: &mut V,
647 mut strbuf: Option<&mut Vec<u8>>,
648 ) -> Result<()>
649 where
650 V: JsonVisitor<'de>,
651 {
652 check_visit!(self, vis.visit_dom_start())?;
653 let ch = self.skip_space();
654 self.dispatch_value(ch, vis, &mut strbuf)?;
655 check_visit!(self, vis.visit_dom_end())
656 }
657
658 #[inline(always)]
659 pub fn parse_str<'own>(&mut self, buf: &'own mut Vec<u8>) -> Result<Reference<'de, 'own, str>> {
660 match self.parse_string_raw(buf) {
661 Ok(ParsedSlice::Copied(buf)) => {
662 if self.check_invalid_utf8(self.cfg.utf8_lossy)? {
663 let repr = String::from_utf8_lossy(buf.as_ref()).into_owned();
665 *buf = repr.into_bytes();
666 }
667 let slice = unsafe { from_utf8_unchecked(buf.as_slice()) };
668 Ok(Reference::Copied(slice))
669 }
670 Ok(ParsedSlice::Borrowed { slice, buf }) => {
671 if self.check_invalid_utf8(self.cfg.utf8_lossy)? {
672 let repr = String::from_utf8_lossy(slice).into_owned();
674 *buf = repr.into_bytes();
675 let slice = unsafe { from_utf8_unchecked(buf) };
676 Ok(Reference::Copied(slice))
677 } else {
678 Ok(Reference::Borrowed(unsafe { from_utf8_unchecked(slice) }))
679 }
680 }
681 Err(e) => Err(e),
682 }
683 }
684
685 pub(crate) fn check_invalid_utf8(&mut self, allowed: bool) -> Result<bool> {
686 let invalid = self.read.next_invalid_utf8();
688 if invalid >= self.read.index() {
689 return Ok(false);
690 }
691
692 if !allowed {
693 Err(Error::syntax(
694 ErrorCode::InvalidUTF8,
695 self.read.origin_input(),
696 invalid,
697 ))
698 } else {
699 self.read.check_invalid_utf8();
701 Ok(true)
702 }
703 }
704
705 pub(crate) fn parse_escaped_utf8(&mut self) -> Result<u32> {
706 let point1 = if let Some(asc) = self.read.next_n(4) {
707 unsafe { hex_to_u32_nocheck(&*(asc.as_ptr() as *const _ as *const [u8; 4])) }
708 } else {
709 return perr!(self, EofWhileParsing);
710 };
711
712 if (0xD800..0xDC00).contains(&point1) {
715 let point2 = if let Some(asc) = self.read.next_n(6) {
717 if asc[0] != b'\\' || asc[1] != b'u' {
718 if self.cfg.utf8_lossy {
719 let idx = self.read.index();
721 self.read.set_index(idx - 6);
722 return Ok(0xFFFD);
723 } else {
724 return perr!(self, InvalidSurrogateUnicodeCodePoint);
725 }
726 }
727 unsafe { hex_to_u32_nocheck(&*(asc.as_ptr().add(2) as *const _ as *const [u8; 4])) }
728 } else if self.cfg.utf8_lossy {
729 return Ok(0xFFFD);
730 } else {
731 return perr!(self, InvalidSurrogateUnicodeCodePoint);
733 };
734
735 let low_bit = point2.wrapping_sub(0xdc00);
737 if (low_bit >> 10) != 0 {
738 if self.cfg.utf8_lossy {
739 let idx = self.read.index();
742 self.read.set_index(idx - 6);
743 return Ok(0xFFFD);
744 } else {
745 return perr!(self, InvalidSurrogateUnicodeCodePoint);
746 }
747 }
748
749 Ok((((point1 - 0xd800) << 10) | low_bit).wrapping_add(0x10000))
750 } else if (0xDC00..0xE000).contains(&point1) {
751 if self.cfg.utf8_lossy {
752 Ok(0xFFFD)
753 } else {
754 perr!(self, InvalidSurrogateUnicodeCodePoint)
756 }
757 } else {
758 Ok(point1)
759 }
760 }
761
762 pub(crate) unsafe fn parse_escaped_char(&mut self, buf: &mut Vec<u8>) -> Result<()> {
763 'escape: loop {
764 match self.read.next() {
765 Some(b'u') => {
766 let code = self.parse_escaped_utf8()?;
767 buf.reserve(4);
768 let ptr = buf.as_mut_ptr().add(buf.len());
769 let cnt = codepoint_to_utf8(code, ptr);
770 if cnt == 0 {
771 return perr!(self, InvalidUnicodeCodePoint);
772 }
773 buf.set_len(buf.len() + cnt);
774 }
775 Some(c) if ESCAPED_TAB[c as usize] != 0 => {
776 buf.push(ESCAPED_TAB[c as usize]);
777 }
778 None => return perr!(self, EofWhileParsing),
779 _ => return perr!(self, InvalidEscape),
780 }
781
782 if self.read.peek() == Some(b'\\') {
784 self.read.eat(1);
785 continue 'escape;
786 }
787 break 'escape;
788 }
789 Ok(())
790 }
791
792 pub(crate) unsafe fn parse_string_escaped<'own>(
793 &mut self,
794 buf: &'own mut Vec<u8>,
795 ) -> Result<ParsedSlice<'de, 'own>> {
796 #[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
797 let mut block: StringBlock<NeonBits>;
798 #[cfg(not(all(target_feature = "neon", target_arch = "aarch64")))]
799 let mut block: StringBlock<u32>;
800
801 self.parse_escaped_char(buf)?;
802
803 while let Some(chunk) = self.read.peek_n(StringBlock::LANES) {
804 buf.reserve(StringBlock::LANES);
805 let v = unsafe { load(chunk.as_ptr()) };
806 block = StringBlock::new(&v);
807
808 if block.has_unescaped() {
809 self.read.eat(block.unescaped_index());
810 return perr!(self, ControlCharacterWhileParsingString);
811 }
812
813 let chunk = from_raw_parts_mut(buf.as_mut_ptr().add(buf.len()), StringBlock::LANES);
815 v.write_to_slice_unaligned_unchecked(chunk);
816
817 if block.has_quote_first() {
818 let cnt = block.quote_index();
819 buf.set_len(buf.len() + cnt);
820
821 self.read.eat(cnt + 1);
823 return Ok(ParsedSlice::Copied(buf));
824 }
825
826 if block.has_backslash() {
827 let cnt = block.bs_index();
829 self.read.eat(cnt + 1);
831 buf.set_len(buf.len() + cnt);
832 self.parse_escaped_char(buf)?;
833 } else {
834 buf.set_len(buf.len() + StringBlock::LANES);
835 self.read.eat(StringBlock::LANES);
836 }
837 }
838
839 while let Some(c) = self.read.peek() {
841 match c {
842 b'"' => {
843 self.read.eat(1);
844 return Ok(ParsedSlice::Copied(buf));
845 }
846 b'\\' => {
847 self.read.eat(1);
849 self.parse_escaped_char(buf)?;
850 }
851 b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
852 _ => {
853 buf.push(c);
854 self.read.eat(1);
855 }
856 }
857 }
858
859 perr!(self, EofWhileParsing)
860 }
861
862 #[inline(always)]
863 pub(crate) fn parse_string_raw<'own>(
865 &mut self,
866 buf: &'own mut Vec<u8>,
867 ) -> Result<ParsedSlice<'de, 'own>> {
868 let start = self.read.index();
870 #[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
871 let mut block: StringBlock<NeonBits>;
872 #[cfg(not(all(target_feature = "neon", target_arch = "aarch64")))]
873 let mut block: StringBlock<u32>;
874
875 while let Some(chunk) = self.read.peek_n(StringBlock::LANES) {
876 let v = unsafe { load(chunk.as_ptr()) };
877 block = StringBlock::new(&v);
878
879 if block.has_quote_first() {
880 let cnt = block.quote_index();
881 self.read.eat(cnt + 1);
882 let slice = self.read.slice_unchecked(start, self.read.index() - 1);
883 return Ok(ParsedSlice::Borrowed { slice, buf });
884 }
885
886 if block.has_unescaped() {
887 self.read.eat(block.unescaped_index());
888 return perr!(self, ControlCharacterWhileParsingString);
889 }
890
891 if block.has_backslash() {
892 let cnt = block.bs_index();
893 self.read.eat(cnt + 1);
895
896 buf.clear();
898 buf.extend_from_slice(&self.read.as_u8_slice()[start..self.read.index() - 1]);
899
900 return unsafe { self.parse_string_escaped(buf) };
901 }
902
903 self.read.eat(StringBlock::LANES);
904 continue;
905 }
906
907 while let Some(c) = self.read.peek() {
909 match c {
910 b'"' => {
911 self.read.eat(1);
912 let slice = self.read.slice_unchecked(start, self.read.index() - 1);
913 return Ok(ParsedSlice::Borrowed { slice, buf });
914 }
915 b'\\' => {
916 buf.clear();
917 buf.extend_from_slice(self.read.slice_unchecked(start, self.read.index()));
918 self.read.eat(1);
919 return unsafe { self.parse_string_escaped(buf) };
920 }
921 b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
922 _ => self.read.eat(1),
923 }
924 }
925 perr!(self, EofWhileParsing)
926 }
927
928 #[inline(always)]
929 fn get_next_token<const N: usize>(&mut self, tokens: [u8; N], advance: usize) -> Option<u8> {
930 let r = &mut self.read;
931 const LANS: usize = u8x32::LANES;
932 while let Some(chunk) = r.peek_n(LANS) {
933 let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
934 let mut vor = m8x32::splat(false);
935 for t in tokens.iter().take(N) {
936 vor |= v.eq(&u8x32::splat(*t));
937 }
938 let next = vor.bitmask();
939 if next != 0 {
940 let cnt = next.trailing_zeros() as usize;
941 let ch = chunk[cnt];
942 r.eat(cnt + advance);
943 return Some(ch);
944 }
945 r.eat(LANS);
946 }
947
948 while let Some(ch) = r.peek() {
949 for t in tokens.iter().take(N) {
950 if ch == *t {
951 r.eat(advance);
952 return Some(ch);
953 }
954 }
955 r.eat(1)
956 }
957 None
958 }
959
960 #[inline(always)]
963 unsafe fn skip_string_unchecked(&mut self) -> Result<ParseStatus> {
964 const LANS: usize = u8x32::LANES;
965 let r = &mut self.read;
966 let mut quote_bits;
967 let mut escaped;
968 let mut prev_escaped = 0;
969 let mut status = ParseStatus::None;
970
971 while let Some(chunk) = r.peek_n(LANS) {
972 let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
973 let bs_bits = (v.eq(&u8x32::splat(b'\\'))).bitmask();
974 quote_bits = (v.eq(&u8x32::splat(b'"'))).bitmask();
975 if ((quote_bits.wrapping_sub(1)) & bs_bits) != 0 || prev_escaped != 0 {
977 escaped = get_escaped_branchless_u32(&mut prev_escaped, bs_bits);
978 status = ParseStatus::HasEscaped;
979 quote_bits &= !escaped;
980 }
981 if quote_bits != 0 {
983 r.eat(quote_bits.trailing_zeros() as usize + 1);
985 return Ok(status);
986 }
987 r.eat(LANS)
988 }
989
990 if prev_escaped != 0 {
992 r.eat(1)
993 }
994
995 while let Some(ch) = r.peek() {
997 if ch == b'\\' {
998 if r.remain() < 2 {
999 break;
1000 }
1001 status = ParseStatus::HasEscaped;
1002 r.eat(2);
1003 continue;
1004 }
1005 r.eat(1);
1006 if ch == b'"' {
1007 return Ok(status);
1008 }
1009 }
1010 perr!(self, EofWhileParsing)
1011 }
1012
1013 fn skip_escaped_chars(&mut self) -> Result<()> {
1014 match self.read.peek() {
1015 Some(b'u') => {
1016 if self.read.remain() < 6 {
1017 return perr!(self, EofWhileParsing);
1018 } else {
1019 self.read.eat(5);
1020 }
1021 }
1022 Some(c) => {
1023 if self.read.next().is_none() {
1024 return perr!(self, EofWhileParsing);
1025 }
1026 if ESCAPED_TAB[c as usize] == 0 {
1027 return perr!(self, InvalidEscape);
1028 }
1029 }
1030 None => return perr!(self, EofWhileParsing),
1031 }
1032 Ok(())
1033 }
1034
1035 #[inline(always)]
1037 fn skip_string(&mut self) -> Result<ParseStatus> {
1038 const LANS: usize = u8x32::LANES;
1039
1040 let mut status = ParseStatus::None;
1041 while let Some(chunk) = self.read.peek_n(LANS) {
1042 let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) };
1043 let v_bs = v.eq(&u8x32::splat(b'\\'));
1044 let v_quote = v.eq(&u8x32::splat(b'"'));
1045 let v_cc = v.le(&u8x32::splat(0x1f));
1046 let mask = (v_bs | v_quote | v_cc).bitmask();
1047
1048 if mask != 0 {
1050 let cnt = mask.trailing_zeros() as usize;
1051 self.read.eat(cnt + 1);
1052
1053 match chunk[cnt] {
1054 b'\\' => {
1055 self.skip_escaped_chars()?;
1056 status = ParseStatus::HasEscaped;
1057 }
1058 b'\"' => return Ok(status),
1059 0..=0x1f => return perr!(self, ControlCharacterWhileParsingString),
1060 _ => unreachable!(),
1061 }
1062 } else {
1063 self.read.eat(LANS)
1064 }
1065 }
1066
1067 while let Some(ch) = self.read.next() {
1069 match ch {
1070 b'\\' => {
1071 self.skip_escaped_chars()?;
1072 status = ParseStatus::HasEscaped;
1073 }
1074 b'"' => return Ok(status),
1075 0..=0x1f => return perr!(self, ControlCharacterWhileParsingString),
1076 _ => {}
1077 }
1078 }
1079 perr!(self, EofWhileParsing)
1080 }
1081
1082 #[inline(always)]
1084 pub(crate) fn parse_object_clo(&mut self) -> Result<()> {
1085 if let Some(ch) = self.read.peek() {
1086 if ch == b':' {
1088 self.read.eat(1);
1089 return Ok(());
1090 }
1091
1092 match self.skip_space() {
1093 Some(b':') => Ok(()),
1094 Some(_) => perr!(self, ExpectedColon),
1095 None => perr!(self, EofWhileParsing),
1096 }
1097 } else {
1098 perr!(self, EofWhileParsing)
1099 }
1100 }
1101
1102 #[inline(always)]
1104 pub(crate) fn parse_array_end(&mut self) -> Result<()> {
1105 match self.skip_space() {
1106 Some(b']') => Ok(()),
1107 Some(_) => perr!(self, ExpectedArrayCommaOrEnd),
1108 None => perr!(self, EofWhileParsing),
1109 }
1110 }
1111
1112 #[inline(always)]
1113 fn skip_object(&mut self) -> Result<()> {
1114 match self.skip_space() {
1115 Some(b'}') => return Ok(()),
1116 Some(b'"') => {}
1117 None => return perr!(self, EofWhileParsing),
1118 Some(_) => return perr!(self, ExpectObjectKeyOrEnd),
1119 }
1120
1121 loop {
1122 self.skip_string()?;
1123 self.parse_object_clo()?;
1124 self.skip_one(true)?;
1125
1126 match self.skip_space() {
1127 Some(b'}') => return Ok(()),
1128 Some(b',') => match self.skip_space() {
1129 Some(b'"') => continue,
1130 _ => return perr!(self, ExpectObjectKeyOrEnd),
1131 },
1132 None => return perr!(self, EofWhileParsing),
1133 Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
1134 }
1135 }
1136 }
1137
1138 #[inline(always)]
1139 fn skip_array(&mut self) -> Result<()> {
1140 match self.skip_space_peek() {
1141 Some(b']') => {
1142 self.read.eat(1);
1143 return Ok(());
1144 }
1145 None => return perr!(self, EofWhileParsing),
1146 _ => {}
1147 }
1148
1149 loop {
1150 self.skip_one(true)?;
1151 match self.skip_space() {
1152 Some(b']') => return Ok(()),
1153 Some(b',') => continue,
1154 None => return perr!(self, EofWhileParsing),
1155 _ => return perr!(self, ExpectedArrayCommaOrEnd),
1156 }
1157 }
1158 }
1159
1160 #[inline(always)]
1162 fn skip_container(&mut self, left: u8, right: u8) -> Result<()> {
1163 let mut prev_instring = 0;
1164 let mut prev_escaped = 0;
1165 let mut rbrace_num = 0;
1166 let mut lbrace_num = 0;
1167 let reader = &mut self.read;
1168
1169 while let Some(chunk) = reader.peek_n(64) {
1170 let input = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
1171 if let Some(count) = skip_container_loop(
1172 input,
1173 &mut prev_instring,
1174 &mut prev_escaped,
1175 &mut lbrace_num,
1176 &mut rbrace_num,
1177 left,
1178 right,
1179 ) {
1180 reader.eat(count.get() as usize);
1181 return Ok(());
1182 }
1183 reader.eat(64);
1184 }
1185
1186 let mut remain = [0u8; 64];
1187 {
1188 let n = reader.remain();
1189 debug_assert!(n <= 64);
1190 remain[..n].copy_from_slice(reader.peek_n(n).unwrap());
1191 }
1192 if let Some(count) = skip_container_loop(
1193 &remain,
1194 &mut prev_instring,
1195 &mut prev_escaped,
1196 &mut lbrace_num,
1197 &mut rbrace_num,
1198 left,
1199 right,
1200 ) {
1201 reader.eat(count.get() as usize);
1202 return Ok(());
1203 }
1204
1205 perr!(self, EofWhileParsing)
1206 }
1207
1208 #[inline(always)]
1209 pub fn skip_space(&mut self) -> Option<u8> {
1210 let reader = &mut self.read;
1211 if let Some(ch) = reader.next() {
1214 if !is_whitespace(ch) {
1215 return Some(ch);
1216 }
1217 }
1218 if let Some(ch) = reader.next() {
1219 if !is_whitespace(ch) {
1220 return Some(ch);
1221 }
1222 }
1223
1224 let nospace_offset = (reader.index() as isize) - self.nospace_start;
1226 if nospace_offset < 64 {
1227 let bitmap = {
1228 let mask = !((1 << nospace_offset) - 1);
1229 self.nospace_bits & mask
1230 };
1231 if bitmap != 0 {
1232 let cnt = bitmap.trailing_zeros() as usize;
1233 let ch = reader.at(self.nospace_start as usize + cnt);
1234 reader.set_index(self.nospace_start as usize + cnt + 1);
1235
1236 return Some(ch);
1237 } else {
1238 reader.set_index(self.nospace_start as usize + 64);
1240 }
1241 }
1242
1243 while let Some(chunk) = reader.peek_n(64) {
1245 let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
1246 let bitmap = unsafe { get_nonspace_bits(chunk) };
1247 if bitmap != 0 {
1248 self.nospace_bits = bitmap;
1249 self.nospace_start = reader.index() as isize;
1250 let cnt = bitmap.trailing_zeros() as usize;
1251 let ch = chunk[cnt];
1252 reader.eat(cnt + 1);
1253
1254 return Some(ch);
1255 }
1256 reader.eat(64)
1257 }
1258
1259 while let Some(ch) = reader.next() {
1260 if !is_whitespace(ch) {
1261 return Some(ch);
1263 }
1264 }
1265 None
1266 }
1267
1268 #[inline(always)]
1269 pub fn skip_space_peek(&mut self) -> Option<u8> {
1270 let ret = self.skip_space()?;
1271 self.read.backward(1);
1272 Some(ret)
1273 }
1274
1275 #[inline(always)]
1276 pub fn parse_literal(&mut self, literal: &str) -> Result<()> {
1277 let reader = &mut self.read;
1278 if let Some(chunk) = reader.next_n(literal.len()) {
1279 if chunk == literal.as_bytes() {
1280 Ok(())
1281 } else {
1282 perr!(self, InvalidLiteral)
1283 }
1284 } else {
1285 perr!(self, EofWhileParsing)
1286 }
1287 }
1288
1289 #[inline(always)]
1290 fn skip_number_unsafe(&mut self) -> Result<()> {
1291 let _ = self.get_next_token([b']', b'}', b','], 0);
1292 Ok(())
1293 }
1294
1295 #[inline(always)]
1296 fn skip_exponent(&mut self) -> Result<()> {
1297 if let Some(ch) = self.read.peek() {
1298 if ch == b'-' || ch == b'+' {
1299 self.read.eat(1);
1300 }
1301 }
1302 self.skip_single_digit()?;
1303 while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1305 self.read.eat(1);
1306 }
1307 Ok(())
1308 }
1309
1310 #[inline(always)]
1311 fn skip_single_digit(&mut self) -> Result<u8> {
1312 if let Some(ch) = self.read.next() {
1313 if !ch.is_ascii_digit() {
1314 perr!(self, InvalidNumber)
1315 } else {
1316 Ok(ch)
1317 }
1318 } else {
1319 perr!(self, EofWhileParsing)
1320 }
1321 }
1322
1323 #[inline(always)]
1324 pub fn skip_number(&mut self, first: u8) -> Result<&'de str> {
1325 let start = self.read.index() - 1;
1326 self.do_skip_number(first)?;
1327 let end = self.read.index();
1328 Ok(as_str(self.read.slice_unchecked(start, end)))
1329 }
1330
1331 #[inline(always)]
1332 pub(crate) fn do_skip_number(&mut self, mut first: u8) -> Result<()> {
1333 if first == b'-' {
1335 first = self.skip_single_digit()?;
1336 }
1337
1338 let second = self.read.peek();
1340 if first == b'0' && matches!(second, Some(b'0'..=b'9')) {
1341 return perr!(self, InvalidNumber);
1342 }
1343
1344 let mut is_float: bool = false;
1346 match second {
1347 Some(b'0'..=b'9') => self.read.eat(1),
1348 Some(b'.') => {
1349 is_float = true;
1350 self.read.eat(1);
1351 self.skip_single_digit()?;
1352 }
1353 Some(b'e' | b'E') => {
1354 self.read.eat(1);
1355 return self.skip_exponent();
1356 }
1357 _ => return Ok(()),
1358 }
1359
1360 const LANES: usize = i8x32::LANES;
1362 while let Some(chunk) = self.read.peek_n(LANES) {
1363 let v = unsafe { i8x32::from_slice_unaligned_unchecked(chunk) };
1364 let zero = i8x32::splat(b'0' as i8);
1365 let nine = i8x32::splat(b'9' as i8);
1366 let mut nondigits = (zero.gt(&v) | v.gt(&nine)).bitmask();
1367 if nondigits != 0 {
1368 let mut cnt = nondigits.trailing_zeros() as usize;
1369 let ch = chunk[cnt];
1370 if ch == b'.' && !is_float {
1371 self.read.eat(cnt + 1);
1372 self.skip_single_digit()?;
1374
1375 cnt += 2;
1377 if cnt >= LANES {
1378 is_float = true;
1379 continue;
1380 }
1381
1382 nondigits = nondigits.wrapping_shr(cnt as u32);
1383 if nondigits != 0 {
1384 let offset = nondigits.trailing_zeros() as usize;
1385 let ch = chunk[cnt + offset];
1386 if ch == b'e' || ch == b'E' {
1387 self.read.eat(offset + 1);
1388 return self.skip_exponent();
1389 } else {
1390 self.read.eat(offset);
1391 return Ok(());
1392 }
1393 } else {
1394 self.read.eat(32 - cnt);
1395 is_float = true;
1396 continue;
1397 }
1398 } else if ch == b'e' || ch == b'E' {
1399 self.read.eat(cnt + 1);
1400 return self.skip_exponent();
1401 } else {
1402 self.read.eat(cnt);
1403 return Ok(());
1404 }
1405 }
1406 self.read.eat(32);
1408 }
1409
1410 while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1412 self.read.eat(1);
1413 }
1414
1415 match self.read.peek() {
1416 Some(b'.') if !is_float => {
1417 self.read.eat(1);
1418 self.skip_single_digit()?;
1419 while matches!(self.read.peek(), Some(b'0'..=b'9')) {
1420 self.read.eat(1);
1421 }
1422 match self.read.peek() {
1423 Some(b'e' | b'E') => {
1424 self.read.eat(1);
1425 return self.skip_exponent();
1426 }
1427 _ => return Ok(()),
1428 }
1429 }
1430 Some(b'e' | b'E') => {
1431 self.read.eat(1);
1432 return self.skip_exponent();
1433 }
1434 _ => {}
1435 }
1436 Ok(())
1437 }
1438
1439 pub fn skip_one(&mut self, checked: bool) -> Result<(&'de [u8], ParseStatus)> {
1440 let ch = match self.skip_space() {
1441 Some(ch) => ch,
1442 None => return perr!(self, EofWhileParsing),
1443 };
1444 let start = self.read.index() - 1;
1445 let mut status = ParseStatus::None;
1446 match ch {
1447 c @ b'-' | c @ b'0'..=b'9' => {
1448 if checked {
1449 self.skip_number(c)?;
1450 } else {
1451 self.skip_number_unsafe()?;
1452 }
1453 Ok(())
1454 }
1455 b'"' => {
1456 status = if checked {
1457 self.skip_string()?
1458 } else {
1459 unsafe { self.skip_string_unchecked() }?
1460 };
1461 Ok(())
1462 }
1463 b'{' => {
1464 if checked {
1465 self.skip_object()
1466 } else {
1467 self.skip_container(b'{', b'}')
1468 }
1469 }
1470 b'[' => {
1471 if checked {
1472 self.skip_array()
1473 } else {
1474 self.skip_container(b'[', b']')
1475 }
1476 }
1477 b't' => self.parse_literal("rue"),
1478 b'f' => self.parse_literal("alse"),
1479 b'n' => self.parse_literal("ull"),
1480 _ => perr!(self, InvalidJsonValue),
1481 }?;
1482 let slice = self.read.slice_unchecked(start, self.read.index());
1483 Ok((slice, status))
1484 }
1485
1486 #[inline(always)]
1487 pub(crate) fn parse_trailing(&mut self) -> Result<()> {
1488 let exceed = self.read.index() > self.read.as_u8_slice().len();
1490 if exceed {
1491 return perr!(self, EofWhileParsing);
1492 }
1493
1494 let remain = self.read.remain() > 0;
1496 if !remain {
1497 return Ok(());
1498 }
1499
1500 let last = self.skip_space();
1503 let exceed = self.read.index() > self.read.as_u8_slice().len();
1504 if last.is_some() && !exceed {
1505 perr!(self, TrailingCharacters)
1506 } else {
1507 Ok(())
1508 }
1509 }
1510
1511 fn get_from_object(
1515 &mut self,
1516 target_key: &str,
1517 temp_buf: &mut Vec<u8>,
1518 checked: bool,
1519 ) -> Result<()> {
1520 match self.skip_space() {
1521 Some(b'{') => {}
1522 Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1523 None => return perr!(self, EofWhileParsing),
1524 }
1525
1526 match self.get_next_token([b'"', b'}'], 1) {
1528 Some(b'"') => {}
1529 Some(b'}') => return perr!(self, GetInEmptyObject),
1530 None => return perr!(self, EofWhileParsing),
1531 Some(_) => unreachable!(),
1532 }
1533
1534 loop {
1535 let key = self.parse_string_raw(temp_buf)?;
1536 self.parse_object_clo()?;
1537 if key.len() == target_key.len() && key.as_ref() == target_key.as_bytes() {
1538 return Ok(());
1539 }
1540
1541 if checked {
1542 self.skip_one(true)?;
1543 match self.skip_space() {
1544 Some(b'}') => return perr!(self, GetUnknownKeyInObject),
1545 Some(b',') => match self.skip_space() {
1546 Some(b'"') => continue,
1547 _ => return perr!(self, ExpectObjectKeyOrEnd),
1548 },
1549 None => return perr!(self, EofWhileParsing),
1550 _ => return perr!(self, ExpectedObjectCommaOrEnd),
1551 };
1552 } else {
1553 match self.skip_space() {
1555 Some(b'{') => self.skip_container(b'{', b'}')?,
1556 Some(b'[') => self.skip_container(b'[', b']')?,
1557 Some(b'"') => unsafe {
1558 let _ = self.skip_string_unchecked()?;
1559 },
1560 None => return perr!(self, EofWhileParsing),
1561 _ => {}
1562 };
1563 match self.get_next_token([b'"', b'}'], 1) {
1565 Some(b'"') => continue,
1566 Some(b'}') => return perr!(self, GetUnknownKeyInObject),
1567 None => return perr!(self, EofWhileParsing),
1568 Some(_) => unreachable!(),
1569 }
1570 }
1571 }
1572 }
1573
1574 fn get_from_array(&mut self, index: usize, checked: bool) -> Result<()> {
1577 let mut count = index;
1578 match self.skip_space() {
1579 Some(b'[') => {}
1580 Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
1581 None => return perr!(self, EofWhileParsing),
1582 }
1583
1584 if checked {
1585 match self.skip_space_peek() {
1586 Some(b']') => return perr!(self, GetInEmptyArray),
1587 Some(_) => {}
1588 None => return perr!(self, EofWhileParsing),
1589 }
1590 }
1591
1592 while count > 0 {
1593 if checked {
1594 self.skip_one(true)?;
1595 match self.skip_space() {
1596 Some(b']') => return perr!(self, GetIndexOutOfArray),
1597 Some(b',') => {}
1598 Some(_) => return perr!(self, ExpectedArrayCommaOrEnd),
1599 None => return perr!(self, EofWhileParsing),
1600 }
1601 count -= 1;
1602 match self.skip_space_peek() {
1603 Some(_) if count == 0 => return Ok(()),
1604 None => return perr!(self, EofWhileParsing),
1605 _ => continue,
1606 }
1607 } else {
1608 match self.skip_space() {
1610 Some(b'{') => self.skip_container(b'{', b'}')?,
1611 Some(b'[') => self.skip_container(b'[', b']')?,
1612 Some(b'"') => unsafe {
1613 let _ = self.skip_string_unchecked()?;
1614 },
1615 Some(b']') => return perr!(self, GetInEmptyArray),
1616 None => return perr!(self, EofWhileParsing),
1617 _ => {}
1618 };
1619 match self.get_next_token([b']', b','], 1) {
1621 Some(b']') => return perr!(self, GetIndexOutOfArray),
1622 Some(b',') => {
1623 count -= 1;
1624 continue;
1625 }
1626 None => return perr!(self, EofWhileParsing),
1627 Some(_) => unreachable!(),
1628 }
1629 }
1630 }
1631
1632 Ok(())
1633 }
1634
1635 pub(crate) fn get_from_with_iter<P: IntoIterator>(
1636 &mut self,
1637 path: P,
1638 checked: bool,
1639 ) -> Result<(&'de [u8], ParseStatus)>
1640 where
1641 P::Item: Index,
1642 {
1643 let mut temp_buf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1645 for jp in path.into_iter() {
1646 if let Some(key) = jp.as_key() {
1647 self.get_from_object(key, &mut temp_buf, checked)
1648 } else if let Some(index) = jp.as_index() {
1649 self.get_from_array(index, checked)
1650 } else {
1651 unreachable!();
1652 }?;
1653 }
1654 self.skip_one(true)
1655 }
1656
1657 fn get_many_rec(
1658 &mut self,
1659 node: &PointerTreeNode,
1660 out: &mut Vec<Option<LazyValue<'de>>>,
1661 strbuf: &mut Vec<u8>,
1662 remain: &mut usize,
1663 is_safe: bool,
1664 ) -> Result<()> {
1665 if *remain == 0 {
1667 return Ok(());
1668 }
1669
1670 let ch = self.skip_space_peek();
1672 if ch.is_none() {
1673 return perr!(self, EofWhileParsing);
1674 }
1675
1676 let start = self.read.index();
1678 let slice: &'de [u8];
1679
1680 let mut status = ParseStatus::None;
1681 match &node.children {
1682 PointerTreeInner::Empty => {
1683 status = self.skip_one(true)?.1;
1684 }
1685 PointerTreeInner::Index(midxs) => {
1686 self.get_many_index(midxs, strbuf, out, remain, is_safe)?
1687 }
1688 PointerTreeInner::Key(mkeys) => {
1689 self.get_many_keys(mkeys, strbuf, out, remain, is_safe)?
1690 }
1691 };
1692
1693 if !node.order.is_empty() {
1694 slice = self.read.slice_unchecked(start, self.read.index());
1695 let lv = LazyValue::new(slice.into(), status.into());
1696 for p in &node.order {
1697 out[*p] = Some(lv.clone());
1698 }
1699 *remain -= node.order.len();
1700 }
1701 Ok(())
1702 }
1703
1704 #[allow(clippy::mutable_key_type)]
1705 #[allow(clippy::mutable_key_type)]
1706 fn get_many_keys(
1707 &mut self,
1708 mkeys: &MultiKey,
1709 strbuf: &mut Vec<u8>,
1710 out: &mut Vec<Option<LazyValue<'de>>>,
1711 remain: &mut usize,
1712 checked: bool,
1713 ) -> Result<()> {
1714 debug_assert!(strbuf.is_empty());
1715 match self.skip_space() {
1716 Some(b'{') => {}
1717 Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON object")),
1718 None => return perr!(self, EofWhileParsing),
1719 }
1720
1721 if checked {
1723 match self.skip_space() {
1724 Some(b'"') => {}
1725 Some(b'}') => return perr!(self, GetInEmptyObject),
1726 _ => return perr!(self, ExpectObjectKeyOrEnd),
1727 }
1728 } else {
1729 match self.get_next_token([b'"', b'}'], 1) {
1730 Some(b'"') => {}
1731 Some(b'}') => return perr!(self, GetInEmptyObject),
1732 None => return perr!(self, EofWhileParsing),
1733 Some(_) => unreachable!(),
1734 }
1735 }
1736
1737 loop {
1738 let key = self.parse_str(strbuf)?;
1739 self.parse_object_clo()?;
1740 if let Some(val) = mkeys.get(key.deref()) {
1741 self.get_many_rec(val, out, strbuf, remain, checked)?;
1742 if *remain == 0 {
1743 break;
1744 }
1745 } else if checked {
1746 self.skip_one(true)?;
1747 } else {
1748 match self.skip_space() {
1750 Some(b'{') => self.skip_container(b'{', b'}')?,
1751 Some(b'[') => self.skip_container(b'[', b']')?,
1752 Some(b'"') => unsafe {
1753 let _ = self.skip_string_unchecked()?;
1754 },
1755 None => return perr!(self, EofWhileParsing),
1756 _ => {}
1757 };
1758 }
1759
1760 if checked {
1761 match self.skip_space() {
1762 Some(b',') if self.skip_space() == Some(b'"') => continue,
1763 Some(b',') => return perr!(self, ExpectObjectKeyOrEnd),
1764 Some(b'}') => break,
1765 Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
1766 None => return perr!(self, EofWhileParsing),
1767 }
1768 } else {
1769 match self.get_next_token([b'"', b'}'], 1) {
1771 Some(b'"') => {}
1772 Some(b'}') => break,
1773 None => return perr!(self, EofWhileParsing),
1774 Some(_) => unreachable!(),
1775 }
1776 }
1777 }
1778
1779 Ok(())
1780 }
1781
1782 #[cfg(test)]
1783 #[allow(dead_code)]
1784 pub(crate) fn remain_str(&self) -> &'de str {
1785 as_str(self.remain_u8_slice())
1786 }
1787
1788 #[cfg(test)]
1789 #[allow(dead_code)]
1790 pub(crate) fn remain_u8_slice(&self) -> &'de [u8] {
1791 let reader = &self.read;
1792 let start = reader.index();
1793 reader.slice_unchecked(start, start + reader.remain())
1794 }
1795
1796 fn get_many_index(
1797 &mut self,
1798 midx: &MultiIndex,
1799 strbuf: &mut Vec<u8>,
1800 out: &mut Vec<Option<LazyValue<'de>>>,
1801 remain: &mut usize,
1802 checked: bool,
1803 ) -> Result<()> {
1804 match self.skip_space() {
1805 Some(b'[') => {}
1806 Some(peek) => return Err(self.peek_invalid_type(peek, &"a JSON array")),
1807 None => return perr!(self, EofWhileParsing),
1808 }
1809 let mut index = 0;
1810 let mut visited = 0;
1811
1812 match self.skip_space_peek() {
1813 Some(b']') => return perr!(self, GetInEmptyArray),
1814 Some(_) => {}
1815 None => return perr!(self, EofWhileParsing),
1816 }
1817
1818 loop {
1819 if let Some(val) = midx.get(&index) {
1820 self.get_many_rec(val, out, strbuf, remain, checked)?;
1821 visited += 1;
1822 if *remain == 0 {
1823 break;
1824 }
1825 } else if checked {
1826 self.skip_one(true)?;
1827 } else {
1828 match self.skip_space() {
1830 Some(b'{') => self.skip_container(b'{', b'}')?,
1831 Some(b'[') => self.skip_container(b'[', b']')?,
1832 Some(b'"') => unsafe {
1833 let _ = self.skip_string_unchecked()?;
1834 },
1835 None => return perr!(self, EofWhileParsing),
1836 _ => {}
1837 };
1838 }
1839
1840 if checked {
1841 match self.skip_space() {
1842 Some(b']') => break,
1843 Some(b',') => {
1844 index += 1;
1845 continue;
1846 }
1847 Some(_) => return perr!(self, ExpectedArrayCommaOrEnd),
1848 None => return perr!(self, EofWhileParsing),
1849 }
1850 } else {
1851 match self.get_next_token([b']', b','], 1) {
1853 Some(b']') => break,
1854 Some(b',') => {
1855 index += 1;
1856 continue;
1857 }
1858 None => return perr!(self, EofWhileParsing),
1859 Some(_) => unreachable!(),
1860 }
1861 }
1862 }
1863
1864 if visited < midx.len() {
1866 perr!(self, GetIndexOutOfArray)
1867 } else {
1868 Ok(())
1869 }
1870 }
1871
1872 pub(crate) fn get_many(
1873 &mut self,
1874 tree: &PointerTree,
1875 is_safe: bool,
1876 ) -> Result<Vec<Option<LazyValue<'de>>>> {
1877 let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1878 let mut remain = tree.size();
1879 let mut out: Vec<Option<LazyValue<'de>>> = Vec::with_capacity(tree.size());
1880 out.resize(tree.size(), Option::default());
1881 let cur = &tree.root;
1882 self.get_many_rec(cur, &mut out, &mut strbuf, &mut remain, is_safe)?;
1883 Ok(out)
1884 }
1885
1886 #[cold]
1887 pub fn peek_invalid_type(&mut self, peek: u8, exp: &dyn Expected) -> Error {
1888 let err = match peek {
1889 b'n' => {
1890 if let Err(err) = self.parse_literal("ull") {
1891 return err;
1892 }
1893 de::Error::invalid_type(Unexpected::Unit, exp)
1894 }
1895 b't' => {
1896 if let Err(err) = self.parse_literal("rue") {
1897 return err;
1898 }
1899 de::Error::invalid_type(Unexpected::Bool(true), exp)
1900 }
1901 b'f' => {
1902 if let Err(err) = self.parse_literal("alse") {
1903 return err;
1904 }
1905 de::Error::invalid_type(Unexpected::Bool(false), exp)
1906 }
1907 c @ b'-' | c @ b'0'..=b'9' => match self.parse_number(c) {
1908 Ok(n) => invalid_type_number(&n, exp),
1909 Err(err) => return err,
1910 },
1911 b'"' => {
1912 let mut scratch = Vec::new();
1913 match self.parse_str(&mut scratch) {
1914 Ok(s) if std::str::from_utf8(s.as_bytes()).is_ok() => {
1915 de::Error::invalid_type(Unexpected::Str(&s), exp)
1916 }
1917 Ok(s) => de::Error::invalid_type(Unexpected::Bytes(s.as_bytes()), exp),
1918 Err(err) => return err,
1919 }
1920 }
1921 b'[' => {
1923 self.read.backward(1);
1924
1925 match self.skip_one(true) {
1926 Ok(_) => de::Error::invalid_type(Unexpected::Seq, exp),
1927 Err(err) => return err,
1928 }
1929 }
1930 b'{' => {
1931 self.read.backward(1);
1932 match self.skip_one(true) {
1933 Ok(_) => de::Error::invalid_type(Unexpected::Map, exp),
1934 Err(err) => return err,
1935 }
1936 }
1937 _ => self.error(ErrorCode::InvalidJsonValue),
1938 };
1939 self.fix_position(err)
1940 }
1941}
1942
1943impl<'de, R> Parser<R>
1944where
1945 R: Reader<'de>,
1946{
1947 pub fn get_by_schema(&mut self, schema: &mut crate::Value) -> Result<()> {
1948 if !schema.is_object() {
1949 return perr!(
1950 self,
1951 Message(std::borrow::Cow::Borrowed("The schema must be an object"))
1952 );
1953 }
1954
1955 let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY);
1956 self.get_by_schema_rec(schema, &mut strbuf)
1957 }
1958
1959 fn get_by_schema_rec(&mut self, schema: &mut crate::Value, strbuf: &mut Vec<u8>) -> Result<()> {
1960 let ch = self.skip_space_peek();
1961 if ch.is_none() {
1962 return perr!(self, EofWhileParsing);
1963 }
1964
1965 let mut should_replace = true;
1966 let start = self.read.index();
1967
1968 match (schema.as_object_mut(), ch) {
1969 (Some(object), Some(b'{')) => {
1970 let mut key_values = HashMap::new();
1971 for (key, value) in object.iter_mut() {
1972 key_values.insert(key, value);
1973 }
1974
1975 should_replace = key_values.is_empty();
1977 if should_replace {
1978 self.skip_one(true)?;
1979 } else {
1980 self.read.eat(1);
1981 match self.skip_space() {
1982 Some(b'"') => {}
1983 Some(b'}') => return Ok(()),
1984 _ => {
1985 return perr!(self, ExpectObjectKeyOrEnd);
1986 }
1987 }
1988
1989 loop {
1990 let key = self.parse_str(strbuf)?;
1991 self.parse_object_clo()?;
1992 if let Some(val) = key_values.get_mut(key.deref()) {
1993 self.get_by_schema_rec(val, strbuf)?;
1994 } else {
1995 self.skip_one(true)?;
1996 }
1997
1998 match self.skip_space() {
1999 Some(b',') => match self.skip_space() {
2000 Some(b'"') => continue,
2001 _ => return perr!(self, ExpectObjectKeyOrEnd),
2002 },
2003 Some(b'}') => break,
2004 Some(_) => return perr!(self, ExpectedObjectCommaOrEnd),
2005 None => return perr!(self, EofWhileParsing),
2006 }
2007 }
2008 }
2009 }
2010 _ => {
2011 self.skip_one(true)?;
2012 }
2013 }
2014
2015 let end = self.read.index();
2016 if should_replace && start < end {
2017 let slice = self.read.slice_unchecked(start, end);
2018 *schema = crate::from_slice(slice)?;
2019 }
2020 Ok(())
2021 }
2022}