Skip to main content

sonic_rs/serde/
de.rs

1//! Deserialize JSON data to a Rust data structure.
2
3// The code is cloned from [serde_json](https://github.com/serde-rs/json) and modified necessary parts.
4use std::{marker::PhantomData, mem::ManuallyDrop, ptr::slice_from_raw_parts, sync::Arc};
5
6use serde::{
7    de::{self, Expected, Unexpected},
8    forward_to_deserialize_any,
9};
10use sonic_number::ParserNumber;
11
12use crate::{
13    error::{
14        Error,
15        ErrorCode::{self, EofWhileParsing, RecursionLimitExceeded},
16        Result,
17    },
18    parser::{as_str, ParseStatus, ParsedSlice, Parser, Reference},
19    reader::{Read, Reader},
20    value::{node::Value, shared::Shared},
21    JsonInput, OwnedLazyValue,
22};
23const MAX_ALLOWED_DEPTH: u8 = u8::MAX;
24
25//////////////////////////////////////////////////////////////////////////////
26
27/// A structure that deserializes JSON into Rust values.
28pub struct Deserializer<R> {
29    pub(crate) parser: Parser<R>,
30    scratch: Vec<u8>,
31    remaining_depth: u8,
32    shared: Option<Arc<Shared>>, // the shared allocator for `Value`
33}
34
35// some functions only used for struct visitors.
36impl<'de, R: Reader<'de>> Deserializer<R> {
37    /// Create a new deserializer.
38    pub fn new(read: R) -> Self {
39        Self {
40            parser: Parser::new(read),
41            scratch: Vec::new(),
42            remaining_depth: MAX_ALLOWED_DEPTH,
43            shared: Option::None,
44        }
45    }
46
47    /// Parse all number as [`crate::RawNumber`].
48    ///
49    /// # Example
50    /// ```
51    /// use sonic_rs::{Deserializer, Value};
52    /// let json = r#"{"a":1.2345678901234567890123}"#;
53    /// let mut de = Deserializer::from_str(json).use_rawnumber();
54    /// let value: Value = de.deserialize().unwrap();
55    /// let out = sonic_rs::to_string(&value).unwrap();
56    /// assert_eq!(json, out);
57    /// ```
58    pub fn use_rawnumber(mut self) -> Self {
59        self.parser.cfg.use_rawnumber = true;
60        self
61    }
62
63    /// Allow to parse JSON with invalid UTF-8 and UTF-16 characters. Will replace them with
64    /// `\uFFFD` (displayed as �).
65    ///
66    /// # Example
67    /// ```
68    /// use sonic_rs::{Deserializer, Value};
69    /// let data = [
70    ///     &[b'\"', 0xff, b'\"'][..],         // invalid UTF8 char in string
71    ///     br#"{"a":"\ud800","b":"\udc00"}"#, // invalid UTF16 surrogate pair
72    /// ];
73    /// let expect = [r#""�""#, r#"{"a":"�","b":"�"}"#];
74    ///
75    /// let mut exp = expect.iter();
76    /// for json in data {
77    ///     let mut de = Deserializer::from_slice(json).utf8_lossy();
78    ///     let value: Value = de.deserialize().unwrap();
79    ///     let out = sonic_rs::to_string(&value).unwrap();
80    ///     assert_eq!(&out, exp.next().unwrap());
81    /// }
82    /// ```
83    pub fn utf8_lossy(mut self) -> Self {
84        self.parser.cfg.utf8_lossy = true;
85        self
86    }
87
88    /// Deserialize a JSON stream to a Rust data structure.
89    ///
90    /// It can be used repeatedly and we do not check trailing chars after deserilalized.
91    ///
92    /// # Example
93    ///
94    /// ```
95    /// # use sonic_rs::{prelude::*, Value};
96    ///
97    /// use sonic_rs::Deserializer;
98    ///
99    /// let multiple_json = r#"{"a": 123, "b": "foo"} true [1, 2, 3] wrong chars"#;
100    ///
101    /// let mut deserializer = Deserializer::from_json(multiple_json);
102    ///
103    /// let val: Value = deserializer.deserialize().unwrap();
104    /// assert_eq!(val["a"].as_i64().unwrap(), 123);
105    /// assert_eq!(val["b"].as_str().unwrap(), "foo");
106    ///
107    /// let val: bool = deserializer.deserialize().unwrap();
108    /// assert_eq!(val, true);
109    ///
110    /// let val: Vec<u8> = deserializer.deserialize().unwrap();
111    /// assert_eq!(val, &[1, 2, 3]);
112    ///
113    /// // encounter the wrong chars in json
114    /// assert!(deserializer.deserialize::<Value>().is_err());
115    /// ```
116    pub fn deserialize<T>(&mut self) -> Result<T>
117    where
118        T: de::Deserialize<'de>,
119    {
120        de::Deserialize::deserialize(self)
121    }
122
123    /// Convert Deserializer to a [`StreamDeserializer`].
124    pub fn into_stream<T>(self) -> StreamDeserializer<'de, T, R> {
125        StreamDeserializer {
126            de: self,
127            data: PhantomData,
128            lifetime: PhantomData,
129            is_ending: false,
130        }
131    }
132
133    /// The `Deserializer::end` method should be called after a value has been fully deserialized.
134    /// This allows the `Deserializer` to validate that the input stream is at the end or that it
135    /// only has trailing whitespace.
136    pub fn end(&mut self) -> Result<()> {
137        tri!(self.parser.parse_trailing());
138        Ok(())
139    }
140}
141
142impl<'de> Deserializer<Read<'de>> {
143    /// Create a new deserializer from a json input [`JsonInput`].
144    pub fn from_json<I: JsonInput<'de>>(input: I) -> Self {
145        Self::new(Read::from(input))
146    }
147
148    /// Create a new deserializer from a string.
149    #[allow(clippy::should_implement_trait)]
150    pub fn from_str(s: &'de str) -> Self {
151        Self::new(Read::from(s))
152    }
153
154    /// Create a new deserializer from a string slice.
155    pub fn from_slice(s: &'de [u8]) -> Self {
156        Self::new(Read::from(s))
157    }
158}
159
160/// An iterator that deserializes a json stream into multiple `T` values.
161///
162/// # Example
163///
164/// ```
165/// use sonic_rs::{prelude::*, Deserializer, Value};
166///
167/// let multiple_json = r#"{"a": 123, "b": "foo"} true [1, 2, 3] wrong chars"#;
168///
169/// let mut stream = Deserializer::from_json(multiple_json).into_stream::<Value>();
170///
171/// let val = stream.next().unwrap().unwrap();
172/// assert_eq!(val["a"].as_i64().unwrap(), 123);
173/// assert_eq!(val["b"].as_str().unwrap(), "foo");
174///
175/// let val = stream.next().unwrap().unwrap();
176/// assert_eq!(val, true);
177///
178/// let val = stream.next().unwrap().unwrap();
179/// assert_eq!(val, &[1, 2, 3]);
180///
181/// // encounter the wrong chars in json
182/// assert!(stream.next().unwrap().is_err());
183/// ```
184pub struct StreamDeserializer<'de, T, R> {
185    de: Deserializer<R>,
186    data: PhantomData<T>,
187    lifetime: PhantomData<&'de R>,
188    is_ending: bool,
189}
190
191impl<'de, T, R> Iterator for StreamDeserializer<'de, T, R>
192where
193    T: de::Deserialize<'de>,
194    R: Reader<'de>,
195{
196    type Item = Result<T>;
197
198    fn next(&mut self) -> Option<Self::Item> {
199        if self.is_ending {
200            return None;
201        }
202        let val: Result<T> = self.de.deserialize();
203        if val.is_err() {
204            self.is_ending = true;
205        }
206        Some(val)
207    }
208}
209
210// We only use our own error type; no need for From conversions provided by the
211// standard library's try! macro. This reduces lines of LLVM IR by 4%.
212macro_rules! tri {
213    ($e:expr $(,)?) => {
214        match $e {
215            Ok(val) => val,
216            Err(err) => {
217                return Err(err);
218            }
219        }
220    };
221}
222
223pub(crate) use tri;
224
225impl<'de, R: Reader<'de>> Deserializer<R> {
226    /// Ensures recursion depth limit; calls `f` with `self` and restores depth on return.
227    #[inline]
228    fn with_depth_limit<F, T>(&mut self, f: F) -> Result<T>
229    where
230        F: FnOnce(&mut Self) -> Result<T>,
231    {
232        self.remaining_depth -= 1;
233        if self.remaining_depth == 0 {
234            return Err(self.parser.error(RecursionLimitExceeded));
235        }
236        let result = f(self);
237        self.remaining_depth += 1;
238        result
239    }
240}
241
242fn visit_number<'de, V>(num: &ParserNumber, visitor: V) -> Result<V::Value>
243where
244    V: de::Visitor<'de>,
245{
246    match *num {
247        ParserNumber::Float(x) => visitor.visit_f64(x),
248        ParserNumber::Unsigned(x) => visitor.visit_u64(x),
249        ParserNumber::Signed(x) => visitor.visit_i64(x),
250    }
251}
252
253pub(crate) fn invalid_type_number(num: &ParserNumber, exp: &dyn Expected) -> Error {
254    match *num {
255        ParserNumber::Float(x) => de::Error::invalid_type(Unexpected::Float(x), exp),
256        ParserNumber::Unsigned(x) => de::Error::invalid_type(Unexpected::Unsigned(x), exp),
257        ParserNumber::Signed(x) => de::Error::invalid_type(Unexpected::Signed(x), exp),
258    }
259}
260
261macro_rules! impl_deserialize_number {
262    ($method:ident) => {
263        fn $method<V>(self, visitor: V) -> Result<V::Value>
264        where
265            V: de::Visitor<'de>,
266        {
267            self.deserialize_number(visitor)
268        }
269    };
270}
271
272// some functions only used for struct visitors.
273impl<'de, R: Reader<'de>> Deserializer<R> {
274    /// Fix error position for deserialized results.
275    #[inline]
276    fn fix_position<T>(&self, result: Result<T>) -> Result<T> {
277        result.map_err(|err| self.parser.fix_position(err))
278    }
279
280    pub(crate) fn deserialize_number<V>(&mut self, visitor: V) -> Result<V::Value>
281    where
282        V: de::Visitor<'de>,
283    {
284        let Some(peek) = self.parser.skip_space() else {
285            return Err(self.parser.error(EofWhileParsing));
286        };
287
288        let value = match peek {
289            c @ b'-' | c @ b'0'..=b'9' => visit_number(&tri!(self.parser.parse_number(c)), visitor),
290            _ => Err(self.peek_invalid_type(peek, &visitor)),
291        };
292
293        // fixed error position if not matched type
294        self.fix_position(value)
295    }
296
297    #[cold]
298    fn peek_invalid_type(&mut self, peek: u8, exp: &dyn Expected) -> Error {
299        self.parser.peek_invalid_type(peek, exp)
300    }
301
302    pub fn end_seq(&mut self) -> Result<()> {
303        self.parser.parse_array_end()
304    }
305
306    pub fn end_map(&mut self) -> Result<()> {
307        match self.parser.skip_space() {
308            Some(b'}') => Ok(()),
309            Some(b',') => Err(self.parser.error(ErrorCode::TrailingComma)),
310            Some(_) => Err(self.parser.error(ErrorCode::ExpectedObjectCommaOrEnd)),
311            None => Err(self.parser.error(ErrorCode::EofWhileParsing)),
312        }
313    }
314
315    fn scan_integer128(&mut self, buf: &mut String) -> Result<()> {
316        match self.parser.read.peek() {
317            Some(b'0') => {
318                buf.push('0');
319                self.parser.read.eat(1);
320                // There can be only one leading '0'.
321                if let Some(ch) = self.parser.read.peek() {
322                    if ch.is_ascii_digit() {
323                        return Err(self.parser.error(ErrorCode::InvalidNumber));
324                    }
325                }
326                Ok(())
327            }
328            Some(c) if c.is_ascii_digit() => {
329                buf.push(c as char);
330                self.parser.read.eat(1);
331                while let c @ b'0'..=b'9' = self.parser.read.peek().unwrap_or_default() {
332                    self.parser.read.eat(1);
333                    buf.push(c as char);
334                }
335                Ok(())
336            }
337            _ => Err(self.parser.error(ErrorCode::InvalidNumber)),
338        }
339    }
340
341    fn deserialize_lazyvalue<V>(&mut self, visitor: V) -> Result<V::Value>
342    where
343        V: de::Visitor<'de>,
344    {
345        let (raw, status) = self.parser.skip_one(true)?;
346        if status == ParseStatus::HasEscaped {
347            visitor.visit_str(as_str(raw))
348        } else {
349            visitor.visit_borrowed_str(as_str(raw))
350        }
351    }
352
353    fn deserialize_owned_lazyvalue<V>(&mut self, visitor: V) -> Result<V::Value>
354    where
355        V: de::Visitor<'de>,
356    {
357        let val = ManuallyDrop::new(self.parser.get_owned_lazyvalue(true)?);
358        // #Safety
359        // the json is validate before parsing json, and we pass the document using visit_bytes
360        // here.
361        unsafe {
362            let binary = &*slice_from_raw_parts(
363                &val as *const _ as *const u8,
364                std::mem::size_of::<OwnedLazyValue>(),
365            );
366            visitor.visit_bytes(binary)
367        }
368    }
369
370    fn deserialize_value<V>(&mut self, visitor: V) -> Result<V::Value>
371    where
372        V: de::Visitor<'de>,
373    {
374        let mut val = Value::new();
375        if self.parser.read.index() == 0 {
376            // will parse the JSON inplace
377            let cfg = self.parser.cfg;
378            let json = self.parser.read.as_u8_slice();
379
380            // get n to check trailing characters in later
381            let n = if cfg.utf8_lossy && self.parser.read.next_invalid_utf8() != usize::MAX {
382                // repr the invalid utf8, not need to care about the invalid UTF8 char in non-string
383                // parts, it will cause errors when parsing.
384                val.parse_with_padding(String::from_utf8_lossy(json).as_bytes(), cfg)?
385            } else {
386                val.parse_with_padding(json, cfg)?
387            };
388            self.parser.read.eat(n);
389        } else {
390            let shared = unsafe {
391                if self.shared.is_none() {
392                    self.shared = Some(Arc::new(Shared::default()));
393                }
394                let shared = self.shared.as_mut().unwrap();
395                let ptr = Arc::as_ptr(shared);
396                // Expose Arc allocation provenance for pack_shared's
397                // Arc::increment_strong_count (needs access via with_exposed_provenance).
398                ptr.expose_provenance();
399                &mut *(ptr as *mut _)
400            };
401            // deserialize some json parts into `Value`, not use padding buffer, avoid the memory
402            // copy
403            val.parse_without_padding(shared, &mut self.scratch, &mut self.parser)?
404        };
405
406        let val = ManuallyDrop::new(val);
407        // #Safety
408        // the json is validate before parsing json, and we pass the document using visit_bytes
409        // here.
410        unsafe {
411            let binary =
412                &*slice_from_raw_parts(&val as *const _ as *const u8, std::mem::size_of::<Value>());
413            visitor.visit_bytes(binary)
414        }
415    }
416
417    // we deserialize json number from string or number types
418    fn deserialize_rawnumber<V>(&mut self, visitor: V) -> Result<V::Value>
419    where
420        V: de::Visitor<'de>,
421    {
422        let raw = match self.parser.skip_space_peek() {
423            Some(c @ b'-' | c @ b'0'..=b'9') => {
424                self.parser.read.eat(1);
425                self.parser.skip_number(c)?
426            }
427            Some(b'"') => {
428                self.parser.read.eat(1);
429                let start = self.parser.read.index();
430                match self.parser.read.next() {
431                    Some(c @ b'-' | c @ b'0'..=b'9') => {
432                        self.parser.skip_number(c)?;
433                    }
434                    _ => return Err(self.parser.error(ErrorCode::InvalidNumber)),
435                }
436                let end = self.parser.read.index();
437                let raw = as_str(self.parser.read.slice_unchecked(start, end));
438                // match the right quote
439                if self.parser.read.next() != Some(b'"') {
440                    return Err(self.parser.error(ErrorCode::InvalidNumber));
441                }
442                raw
443            }
444            _ => return Err(self.parser.error(ErrorCode::InvalidNumber)),
445        };
446
447        visitor.visit_borrowed_str(raw)
448    }
449}
450
451impl<'de, 'a, R: Reader<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
452    type Error = Error;
453    #[inline]
454    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
455    where
456        V: de::Visitor<'de>,
457    {
458        let Some(peek) = self.parser.skip_space() else {
459            return Err(self.parser.error(EofWhileParsing));
460        };
461
462        let value = match peek {
463            b'n' => {
464                tri!(self.parser.parse_literal("ull"));
465                visitor.visit_unit()
466            }
467            b't' => {
468                tri!(self.parser.parse_literal("rue"));
469                visitor.visit_bool(true)
470            }
471            b'f' => {
472                tri!(self.parser.parse_literal("alse"));
473                visitor.visit_bool(false)
474            }
475            c @ b'-' | c @ b'0'..=b'9' => visit_number(&tri!(self.parser.parse_number(c)), visitor),
476            b'"' => match tri!(self.parser.parse_str(&mut self.scratch)) {
477                Reference::Borrowed(s) => visitor.visit_borrowed_str(s),
478                Reference::Copied(s) => visitor.visit_str(s),
479            },
480            b'[' => {
481                let ret = self.with_depth_limit(|de| visitor.visit_seq(SeqAccess::new(de)));
482                match (ret, self.end_seq()) {
483                    (Ok(ret), Ok(())) => Ok(ret),
484                    (Err(err), _) | (_, Err(err)) => Err(err),
485                }
486            }
487            b'{' => {
488                let ret = self.with_depth_limit(|de| visitor.visit_map(MapAccess::new(de)));
489                match (ret, self.end_map()) {
490                    (Ok(ret), Ok(())) => Ok(ret),
491                    (Err(err), _) | (_, Err(err)) => Err(err),
492                }
493            }
494            _ => Err(self.parser.error(ErrorCode::InvalidJsonValue)),
495        };
496
497        match value {
498            Ok(value) => Ok(value),
499            // The de::Error impl creates errors with unknown line and column.
500            // Fill in the position here by looking at the current index in the
501            // input. There is no way to tell whether this should call `error`
502            // or `error` so pick the one that seems correct more often.
503            // Worst case, the position is off by one character.
504            Err(err) => Err(self.parser.fix_position(err)),
505        }
506    }
507
508    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
509    where
510        V: de::Visitor<'de>,
511    {
512        let Some(peek) = self.parser.skip_space() else {
513            return Err(self.parser.error(ErrorCode::EofWhileParsing));
514        };
515
516        let value = match peek {
517            b't' => {
518                tri!(self.parser.parse_literal("rue"));
519                visitor.visit_bool(true)
520            }
521            b'f' => {
522                tri!(self.parser.parse_literal("alse"));
523                visitor.visit_bool(false)
524            }
525            _ => Err(self.peek_invalid_type(peek, &visitor)),
526        };
527
528        self.fix_position(value)
529    }
530
531    impl_deserialize_number!(deserialize_i8);
532    impl_deserialize_number!(deserialize_i16);
533    impl_deserialize_number!(deserialize_i32);
534    impl_deserialize_number!(deserialize_i64);
535    impl_deserialize_number!(deserialize_u8);
536    impl_deserialize_number!(deserialize_u16);
537    impl_deserialize_number!(deserialize_u32);
538    impl_deserialize_number!(deserialize_u64);
539    impl_deserialize_number!(deserialize_f32);
540    impl_deserialize_number!(deserialize_f64);
541
542    fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value>
543    where
544        V: de::Visitor<'de>,
545    {
546        let mut buf = String::new();
547        match self.parser.skip_space_peek() {
548            Some(b'-') => {
549                buf.push('-');
550                self.parser.read.eat(1);
551            }
552            Some(_) => {}
553            None => {
554                return Err(self.parser.error(ErrorCode::EofWhileParsing));
555            }
556        };
557
558        tri!(self.scan_integer128(&mut buf));
559
560        let value = match buf.parse() {
561            Ok(int) => visitor.visit_i128(int),
562            Err(_) => {
563                return Err(self.parser.error(ErrorCode::NumberOutOfRange));
564            }
565        };
566
567        self.fix_position(value)
568    }
569
570    fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value>
571    where
572        V: de::Visitor<'de>,
573    {
574        match self.parser.skip_space_peek() {
575            Some(b'-') => {
576                return Err(self.parser.error(ErrorCode::NumberOutOfRange));
577            }
578            Some(_) => {}
579            None => {
580                return Err(self.parser.error(ErrorCode::EofWhileParsing));
581            }
582        }
583
584        let mut buf = String::new();
585        tri!(self.scan_integer128(&mut buf));
586
587        let value = match buf.parse() {
588            Ok(int) => visitor.visit_u128(int),
589            Err(_) => {
590                return Err(self.parser.error(ErrorCode::NumberOutOfRange));
591            }
592        };
593
594        self.fix_position(value)
595    }
596
597    fn deserialize_char<V>(self, visitor: V) -> Result<V::Value>
598    where
599        V: de::Visitor<'de>,
600    {
601        self.deserialize_str(visitor)
602    }
603
604    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value>
605    where
606        V: de::Visitor<'de>,
607    {
608        let Some(peek) = self.parser.skip_space() else {
609            return Err(self.parser.error(ErrorCode::EofWhileParsing));
610        };
611
612        let value = match peek {
613            b'"' => match tri!(self.parser.parse_str(&mut self.scratch)) {
614                Reference::Borrowed(s) => visitor.visit_borrowed_str(s),
615                Reference::Copied(s) => visitor.visit_str(s),
616            },
617            _ => Err(self.peek_invalid_type(peek, &visitor)),
618        };
619
620        self.fix_position(value)
621    }
622
623    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
624    where
625        V: de::Visitor<'de>,
626    {
627        self.deserialize_str(visitor)
628    }
629
630    /// Parses a JSON string as bytes. Note that this function does not check
631    /// whether the bytes represent a valid UTF-8 string.
632    ///
633    /// Followed as `serde_json`.
634    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
635    where
636        V: de::Visitor<'de>,
637    {
638        let Some(peek) = self.parser.skip_space() else {
639            return Err(self.parser.error(ErrorCode::EofWhileParsing));
640        };
641
642        let value = match peek {
643            b'"' => match tri!(self.parser.parse_string_raw(&mut self.scratch)) {
644                ParsedSlice::Borrowed { slice: b, buf: _ } => visitor.visit_borrowed_bytes(b),
645                ParsedSlice::Copied(b) => visitor.visit_bytes(b),
646            },
647            b'[' => {
648                self.parser.read.backward(1);
649                self.deserialize_seq(visitor)
650            }
651            _ => Err(self.peek_invalid_type(peek, &visitor)),
652        };
653
654        // check invalid utf8 with allow space here
655        let _ = self.parser.check_invalid_utf8(true)?;
656        self.fix_position(value)
657    }
658
659    #[inline]
660    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
661    where
662        V: de::Visitor<'de>,
663    {
664        self.deserialize_bytes(visitor)
665    }
666
667    /// Parses a `null` as a None, and any other values as a `Some(...)`.
668    #[inline]
669    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
670    where
671        V: de::Visitor<'de>,
672    {
673        match self.parser.skip_space_peek() {
674            Some(b'n') => {
675                self.parser.read.eat(1);
676                tri!(self.parser.parse_literal("ull"));
677                visitor.visit_none()
678            }
679            _ => visitor.visit_some(self),
680        }
681    }
682
683    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
684    where
685        V: de::Visitor<'de>,
686    {
687        let Some(peek) = self.parser.skip_space() else {
688            return Err(self.parser.error(ErrorCode::EofWhileParsing));
689        };
690
691        let value = match peek {
692            b'n' => {
693                tri!(self.parser.parse_literal("ull"));
694                visitor.visit_unit()
695            }
696            _ => Err(self.peek_invalid_type(peek, &visitor)),
697        };
698
699        self.fix_position(value)
700    }
701
702    fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
703    where
704        V: de::Visitor<'de>,
705    {
706        self.deserialize_unit(visitor)
707    }
708
709    /// Parses a newtype struct as the underlying value.
710    #[inline]
711    fn deserialize_newtype_struct<V>(self, name: &'static str, visitor: V) -> Result<V::Value>
712    where
713        V: de::Visitor<'de>,
714    {
715        {
716            if name == crate::serde::rawnumber::TOKEN {
717                return self.deserialize_rawnumber(visitor);
718            } else if name == crate::lazyvalue::TOKEN {
719                return self.deserialize_lazyvalue(visitor);
720            } else if name == crate::lazyvalue::OWNED_LAZY_VALUE_TOKEN {
721                return self.deserialize_owned_lazyvalue(visitor);
722            } else if name == crate::value::de::TOKEN {
723                return self.deserialize_value(visitor);
724            }
725        }
726
727        let _ = name;
728        visitor.visit_newtype_struct(self)
729    }
730
731    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
732    where
733        V: de::Visitor<'de>,
734    {
735        let Some(peek) = self.parser.skip_space() else {
736            return Err(self.parser.error(ErrorCode::EofWhileParsing));
737        };
738
739        let value = match peek {
740            b'[' => {
741                let ret = self.with_depth_limit(|de| visitor.visit_seq(SeqAccess::new(de)));
742                match (ret, self.end_seq()) {
743                    (Ok(ret), Ok(())) => Ok(ret),
744                    (Err(err), _) | (_, Err(err)) => Err(err),
745                }
746            }
747            _ => return Err(self.peek_invalid_type(peek, &visitor)),
748        };
749        self.fix_position(value)
750    }
751
752    fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value>
753    where
754        V: de::Visitor<'de>,
755    {
756        self.deserialize_seq(visitor)
757    }
758
759    fn deserialize_tuple_struct<V>(
760        self,
761        _name: &'static str,
762        _len: usize,
763        visitor: V,
764    ) -> Result<V::Value>
765    where
766        V: de::Visitor<'de>,
767    {
768        self.deserialize_seq(visitor)
769    }
770
771    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value>
772    where
773        V: de::Visitor<'de>,
774    {
775        let Some(peek) = self.parser.skip_space() else {
776            return Err(self.parser.error(ErrorCode::EofWhileParsing));
777        };
778
779        let value = match peek {
780            b'{' => {
781                let ret = self.with_depth_limit(|de| visitor.visit_map(MapAccess::new(de)));
782                match (ret, self.end_map()) {
783                    (Ok(ret), Ok(())) => Ok(ret),
784                    (Err(err), _) | (_, Err(err)) => Err(err),
785                }
786            }
787            _ => return Err(self.peek_invalid_type(peek, &visitor)),
788        };
789        self.fix_position(value)
790    }
791
792    fn deserialize_struct<V>(
793        self,
794        _name: &'static str,
795        _fields: &'static [&'static str],
796        visitor: V,
797    ) -> Result<V::Value>
798    where
799        V: de::Visitor<'de>,
800    {
801        let Some(peek) = self.parser.skip_space() else {
802            return Err(self.parser.error(ErrorCode::EofWhileParsing));
803        };
804
805        let value = match peek {
806            b'[' => {
807                let ret = self.with_depth_limit(|de| visitor.visit_seq(SeqAccess::new(de)));
808                match (ret, self.end_seq()) {
809                    (Ok(ret), Ok(())) => Ok(ret),
810                    (Err(err), _) | (_, Err(err)) => Err(err),
811                }
812            }
813            b'{' => {
814                let ret = self.with_depth_limit(|de| visitor.visit_map(MapAccess::new(de)));
815                match (ret, self.end_map()) {
816                    (Ok(ret), Ok(())) => Ok(ret),
817                    (Err(err), _) | (_, Err(err)) => Err(err),
818                }
819            }
820            _ => return Err(self.peek_invalid_type(peek, &visitor)),
821        };
822
823        self.fix_position(value)
824    }
825
826    /// Parses an enum as an object like `{"$KEY":$VALUE}`, where $VALUE is either a straight
827    /// value, a `[..]`, or a `{..}`.
828    #[inline]
829    fn deserialize_enum<V>(
830        self,
831        _name: &str,
832        _variants: &'static [&'static str],
833        visitor: V,
834    ) -> Result<V::Value>
835    where
836        V: de::Visitor<'de>,
837    {
838        match self.parser.skip_space_peek() {
839            Some(b'{') => {
840                self.parser.read.eat(1);
841                let value =
842                    self.with_depth_limit(|de| visitor.visit_enum(VariantAccess::new(de)))?;
843
844                match self.parser.skip_space() {
845                    Some(b'}') => Ok(value),
846                    Some(_) => Err(self.parser.error(ErrorCode::InvalidJsonValue)),
847                    None => Err(self.parser.error(ErrorCode::EofWhileParsing)),
848                }
849            }
850            Some(b'"') => visitor.visit_enum(UnitVariantAccess::new(self)),
851            Some(_) => Err(self.parser.error(ErrorCode::InvalidJsonValue)),
852            None => Err(self.parser.error(ErrorCode::EofWhileParsing)),
853        }
854    }
855
856    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
857    where
858        V: de::Visitor<'de>,
859    {
860        self.deserialize_str(visitor)
861    }
862
863    fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
864    where
865        V: de::Visitor<'de>,
866    {
867        // Skip the ignored value with full validation.
868        tri!(self.parser.skip_one(true));
869        visitor.visit_unit()
870    }
871}
872
873pub struct SeqAccess<'a, R: 'a> {
874    de: &'a mut Deserializer<R>,
875    first: bool, // first is marked as
876}
877
878impl<'a, R: 'a> SeqAccess<'a, R> {
879    pub fn new(de: &'a mut Deserializer<R>) -> Self {
880        SeqAccess { de, first: true }
881    }
882}
883
884impl<'de, 'a, R: Reader<'de> + 'a> de::SeqAccess<'de> for SeqAccess<'a, R> {
885    type Error = Error;
886
887    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
888    where
889        T: de::DeserializeSeed<'de>,
890    {
891        match self.de.parser.skip_space_peek() {
892            Some(b']') => Ok(None), // we will check the ending brace after `visit_seq`
893            Some(b',') if !self.first => {
894                self.de.parser.read.eat(1);
895                Ok(Some(tri!(seed.deserialize(&mut *self.de))))
896            }
897            Some(_) => {
898                if self.first {
899                    self.first = false;
900                    Ok(Some(tri!(seed.deserialize(&mut *self.de))))
901                } else {
902                    self.de.parser.read.eat(1); // makes the error position is correct
903                    Err(self.de.parser.error(ErrorCode::ExpectedArrayCommaOrEnd))
904                }
905            }
906            None => Err(self.de.parser.error(ErrorCode::EofWhileParsing)),
907        }
908    }
909}
910
911pub struct MapAccess<'a, R: 'a> {
912    de: &'a mut Deserializer<R>,
913    first: bool,
914}
915
916impl<'a, R: 'a> MapAccess<'a, R> {
917    pub fn new(de: &'a mut Deserializer<R>) -> Self {
918        MapAccess { de, first: true }
919    }
920}
921
922impl<'de, 'a, R: Reader<'de> + 'a> de::MapAccess<'de> for MapAccess<'a, R> {
923    type Error = Error;
924
925    #[inline(always)]
926    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
927    where
928        K: de::DeserializeSeed<'de>,
929    {
930        let peek = match self.de.parser.skip_space_peek() {
931            Some(b'}') => {
932                return Ok(None);
933            }
934            Some(b',') if !self.first => {
935                self.de.parser.read.eat(1);
936                self.de.parser.skip_space()
937            }
938            Some(b) => {
939                self.de.parser.read.eat(1);
940                if self.first {
941                    self.first = false;
942                    Some(b)
943                } else {
944                    return Err(self.de.parser.error(ErrorCode::ExpectedObjectCommaOrEnd));
945                }
946            }
947            None => {
948                return Err(self.de.parser.error(ErrorCode::EofWhileParsing));
949            }
950        };
951
952        match peek {
953            Some(b'"') => seed.deserialize(MapKey { de: &mut *self.de }).map(Some),
954            Some(b'}') => Err(self.de.parser.error(ErrorCode::TrailingComma)),
955            Some(_) => Err(self.de.parser.error(ErrorCode::ExpectObjectKeyOrEnd)),
956            None => Err(self.de.parser.error(ErrorCode::EofWhileParsing)),
957        }
958    }
959
960    #[inline(always)]
961    fn next_value<V>(&mut self) -> Result<V>
962    where
963        V: de::Deserialize<'de>,
964    {
965        use std::marker::PhantomData;
966        self.next_value_seed(PhantomData)
967    }
968
969    #[inline(always)]
970    fn next_entry<K, V>(&mut self) -> Result<Option<(K, V)>>
971    where
972        K: de::Deserialize<'de>,
973        V: de::Deserialize<'de>,
974    {
975        use std::marker::PhantomData;
976        self.next_entry_seed(PhantomData, PhantomData)
977    }
978
979    #[inline(always)]
980    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
981    where
982        V: de::DeserializeSeed<'de>,
983    {
984        tri!(self.de.parser.parse_object_clo());
985        seed.deserialize(&mut *self.de)
986    }
987}
988
989struct VariantAccess<'a, R: 'a> {
990    de: &'a mut Deserializer<R>,
991}
992
993impl<'a, R: 'a> VariantAccess<'a, R> {
994    fn new(de: &'a mut Deserializer<R>) -> Self {
995        VariantAccess { de }
996    }
997}
998
999impl<'de, 'a, R: Reader<'de> + 'a> de::EnumAccess<'de> for VariantAccess<'a, R> {
1000    type Error = Error;
1001    type Variant = Self;
1002
1003    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self)>
1004    where
1005        V: de::DeserializeSeed<'de>,
1006    {
1007        let val = tri!(seed.deserialize(&mut *self.de));
1008        tri!(self.de.parser.parse_object_clo());
1009        Ok((val, self))
1010    }
1011}
1012
1013impl<'de, 'a, R: Reader<'de> + 'a> de::VariantAccess<'de> for VariantAccess<'a, R> {
1014    type Error = Error;
1015
1016    fn unit_variant(self) -> Result<()> {
1017        de::Deserialize::deserialize(self.de)
1018    }
1019
1020    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
1021    where
1022        T: de::DeserializeSeed<'de>,
1023    {
1024        seed.deserialize(self.de)
1025    }
1026
1027    fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
1028    where
1029        V: de::Visitor<'de>,
1030    {
1031        de::Deserializer::deserialize_seq(self.de, visitor)
1032    }
1033
1034    fn struct_variant<V>(self, fields: &'static [&'static str], visitor: V) -> Result<V::Value>
1035    where
1036        V: de::Visitor<'de>,
1037    {
1038        de::Deserializer::deserialize_struct(self.de, "", fields, visitor)
1039    }
1040}
1041
1042struct UnitVariantAccess<'a, R: 'a> {
1043    de: &'a mut Deserializer<R>,
1044}
1045
1046impl<'a, R: 'a> UnitVariantAccess<'a, R> {
1047    fn new(de: &'a mut Deserializer<R>) -> Self {
1048        UnitVariantAccess { de }
1049    }
1050}
1051
1052impl<'de, 'a, R: Reader<'de> + 'a> de::EnumAccess<'de> for UnitVariantAccess<'a, R> {
1053    type Error = Error;
1054    type Variant = Self;
1055
1056    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self)>
1057    where
1058        V: de::DeserializeSeed<'de>,
1059    {
1060        let variant = tri!(seed.deserialize(&mut *self.de));
1061        Ok((variant, self))
1062    }
1063}
1064
1065impl<'de, 'a, R: Reader<'de> + 'a> de::VariantAccess<'de> for UnitVariantAccess<'a, R> {
1066    type Error = Error;
1067
1068    fn unit_variant(self) -> Result<()> {
1069        Ok(())
1070    }
1071
1072    fn newtype_variant_seed<T>(self, _seed: T) -> Result<T::Value>
1073    where
1074        T: de::DeserializeSeed<'de>,
1075    {
1076        Err(de::Error::invalid_type(
1077            Unexpected::UnitVariant,
1078            &"newtype variant",
1079        ))
1080    }
1081
1082    fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value>
1083    where
1084        V: de::Visitor<'de>,
1085    {
1086        Err(de::Error::invalid_type(
1087            Unexpected::UnitVariant,
1088            &"tuple variant",
1089        ))
1090    }
1091
1092    fn struct_variant<V>(self, _fields: &'static [&'static str], _visitor: V) -> Result<V::Value>
1093    where
1094        V: de::Visitor<'de>,
1095    {
1096        Err(de::Error::invalid_type(
1097            Unexpected::UnitVariant,
1098            &"struct variant",
1099        ))
1100    }
1101}
1102
1103/// Only deserialize from this after peeking a '"' byte! Otherwise it may
1104/// deserialize invalid JSON successfully./// Only deserialize from this after peeking a '"' byte!
1105/// Otherwise it may deserialize invalid JSON successfully.
1106struct MapKey<'a, R: 'a> {
1107    de: &'a mut Deserializer<R>,
1108}
1109
1110macro_rules! deserialize_numeric_key {
1111    ($method:ident) => {
1112        fn $method<V>(self, visitor: V) -> Result<V::Value>
1113        where
1114            V: de::Visitor<'de>,
1115        {
1116            let value = tri!(self.de.deserialize_number(visitor));
1117            if self.de.parser.read.next() != Some(b'"') {
1118                return Err(self.de.parser.error(ErrorCode::ExpectedQuote));
1119            }
1120
1121            Ok(value)
1122        }
1123    };
1124
1125    ($method:ident, $delegate:ident) => {
1126        fn $method<V>(self, visitor: V) -> Result<V::Value>
1127        where
1128            V: de::Visitor<'de>,
1129        {
1130            match self.de.parser.read.peek() {
1131                Some(b'0'..=b'9' | b'-') => {}
1132                _ => return Err(self.de.parser.error(ErrorCode::ExpectedNumericKey)),
1133            }
1134
1135            let value = tri!(self.de.$delegate(visitor));
1136
1137            if self.de.parser.read.next() != Some(b'"') {
1138                return Err(self.de.parser.error(ErrorCode::ExpectedQuote));
1139            }
1140
1141            Ok(value)
1142        }
1143    };
1144}
1145
1146impl<'de, 'a, R> de::Deserializer<'de> for MapKey<'a, R>
1147where
1148    R: Reader<'de>,
1149{
1150    type Error = Error;
1151
1152    #[inline]
1153    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
1154    where
1155        V: de::Visitor<'de>,
1156    {
1157        self.de.scratch.clear();
1158        match tri!(self.de.parser.parse_str(&mut self.de.scratch)) {
1159            Reference::Borrowed(s) => visitor.visit_borrowed_str(s),
1160            Reference::Copied(s) => visitor.visit_str(s),
1161        }
1162    }
1163
1164    deserialize_numeric_key!(deserialize_i8);
1165    deserialize_numeric_key!(deserialize_i16);
1166    deserialize_numeric_key!(deserialize_i32);
1167    deserialize_numeric_key!(deserialize_i64);
1168    deserialize_numeric_key!(deserialize_i128, deserialize_i128);
1169    deserialize_numeric_key!(deserialize_u8);
1170    deserialize_numeric_key!(deserialize_u16);
1171    deserialize_numeric_key!(deserialize_u32);
1172    deserialize_numeric_key!(deserialize_u64);
1173    deserialize_numeric_key!(deserialize_u128, deserialize_u128);
1174    deserialize_numeric_key!(deserialize_f32);
1175    deserialize_numeric_key!(deserialize_f64);
1176
1177    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
1178    where
1179        V: de::Visitor<'de>,
1180    {
1181        let mut value = match self.de.parser.read.next() {
1182            Some(b't') => {
1183                tri!(self.de.parser.parse_literal("rue"));
1184                visitor.visit_bool(true)
1185            }
1186            Some(b'f') => {
1187                tri!(self.de.parser.parse_literal("alse"));
1188                visitor.visit_bool(false)
1189            }
1190            None => Err(self.de.parser.error(ErrorCode::EofWhileParsing)),
1191            Some(peek) => Err(self.de.peek_invalid_type(peek, &visitor)),
1192        };
1193
1194        if self.de.parser.read.next() != Some(b'"') {
1195            value = Err(self.de.parser.error(ErrorCode::ExpectedQuote));
1196        }
1197
1198        match value {
1199            Ok(value) => Ok(value),
1200            Err(err) => Err(self.de.parser.fix_position(err)),
1201        }
1202    }
1203
1204    #[inline]
1205    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
1206    where
1207        V: de::Visitor<'de>,
1208    {
1209        // Map keys cannot be null.
1210        visitor.visit_some(self)
1211    }
1212
1213    #[inline]
1214    fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
1215    where
1216        V: de::Visitor<'de>,
1217    {
1218        visitor.visit_newtype_struct(self)
1219    }
1220
1221    #[inline]
1222    fn deserialize_enum<V>(
1223        self,
1224        name: &'static str,
1225        variants: &'static [&'static str],
1226        visitor: V,
1227    ) -> Result<V::Value>
1228    where
1229        V: de::Visitor<'de>,
1230    {
1231        self.de.parser.read.backward(1);
1232        self.de.deserialize_enum(name, variants, visitor)
1233    }
1234
1235    #[inline]
1236    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
1237    where
1238        V: de::Visitor<'de>,
1239    {
1240        self.de.parser.read.backward(1);
1241        self.de.deserialize_bytes(visitor)
1242    }
1243
1244    #[inline]
1245    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
1246    where
1247        V: de::Visitor<'de>,
1248    {
1249        self.de.parser.read.backward(1);
1250        self.de.deserialize_bytes(visitor)
1251    }
1252
1253    forward_to_deserialize_any! {
1254        char str string unit unit_struct seq tuple tuple_struct map struct
1255        identifier ignored_any
1256    }
1257}
1258//////////////////////////////////////////////////////////////////////////////
1259
1260fn from_trait<'de, R, T>(read: R) -> Result<T>
1261where
1262    R: Reader<'de>,
1263    T: de::Deserialize<'de>,
1264{
1265    // check JSON size, because the design of `sonic_rs::Value`, parsing JSON larger than 4 GB is
1266    // not supported
1267    let len = read.as_u8_slice().len();
1268    if len > u32::MAX as _ {
1269        return Err(crate::error::make_error(format!(
1270            "Only support JSON less than 4 GB, the input JSON is too large here, len is {len}"
1271        )));
1272    }
1273
1274    let mut de = Deserializer::new(read);
1275    #[cfg(feature = "arbitrary_precision")]
1276    {
1277        de = de.use_rawnumber();
1278    }
1279
1280    #[cfg(feature = "utf8_lossy")]
1281    {
1282        de = de.utf8_lossy();
1283    }
1284
1285    let value = tri!(de::Deserialize::deserialize(&mut de));
1286
1287    // Make sure the whole stream has been consumed.
1288    tri!(de.parser.parse_trailing());
1289
1290    // check invalid utf8
1291    tri!(de.parser.read.check_utf8_final());
1292    Ok(value)
1293}
1294
1295/// Deserialize an instance of type `T` from bytes of JSON text.
1296/// If user can guarantee the JSON is valid UTF-8, recommend to use `from_slice_unchecked` instead.
1297pub fn from_slice<'a, T>(json: &'a [u8]) -> Result<T>
1298where
1299    T: de::Deserialize<'a>,
1300{
1301    from_trait(Read::new(json, true))
1302}
1303
1304/// Deserialize an instance of type `T` from bytes of JSON text.
1305///
1306/// # Safety
1307/// The json passed in must be valid UTF-8.
1308pub unsafe fn from_slice_unchecked<'a, T>(json: &'a [u8]) -> Result<T>
1309where
1310    T: de::Deserialize<'a>,
1311{
1312    from_trait(Read::new(json, false))
1313}
1314
1315/// Deserialize an instance of type `T` from a string of JSON text.
1316pub fn from_str<'a, T>(s: &'a str) -> Result<T>
1317where
1318    T: de::Deserialize<'a>,
1319{
1320    from_trait(Read::new(s.as_bytes(), false))
1321}
1322
1323/// Deserialize an instance of type `T` from a Reader
1324pub fn from_reader<R, T>(mut reader: R) -> Result<T>
1325where
1326    R: std::io::Read,
1327    T: de::DeserializeOwned,
1328{
1329    let mut data = Vec::new();
1330    if let Err(e) = reader.read_to_end(&mut data) {
1331        return Err(Error::io(e));
1332    };
1333    from_slice(data.as_slice())
1334}
1335
1336#[cfg(test)]
1337mod test {
1338    use crate::{object, Value};
1339
1340    #[cfg(not(target_family = "wasm"))]
1341    #[test]
1342    fn test_recursion_depth_limit() {
1343        // MAX_ALLOWED_DEPTH is 255; nesting 256 levels returns RecursionLimitExceeded.
1344        // Use serde_json::Value so we go through the recursive path
1345        // (sonic_rs::Value may use a fast path when index==0).
1346        std::thread::Builder::new()
1347            .name("test_recursion_depth_limit".to_string())
1348            .stack_size(16 * 1024 * 1024)
1349            .spawn(|| {
1350                let depth = 256;
1351                let src = format!("{}{}", "[".repeat(depth), "]".repeat(depth));
1352                let err = crate::from_str::<serde_json::Value>(&src).unwrap_err();
1353                assert!(matches!(
1354                    err.error_code(),
1355                    crate::error::ErrorCode::RecursionLimitExceeded
1356                ));
1357            })
1358            .expect("failed to spawn test thread")
1359            .join()
1360            .expect("test thread panicked");
1361    }
1362
1363    #[test]
1364    fn test_value_as_deserializer() {
1365        let json = r#"{"a": 1, "b": 2}"#;
1366        let mut de = crate::Deserializer::new(crate::Read::from(json));
1367
1368        let res: Value = de.deserialize().unwrap();
1369        assert_eq!(res, object! { "a": 1, "b": 2 });
1370        assert_eq!(de.parser.read.index, 16);
1371
1372        let res = de.end();
1373        assert!(res.is_ok());
1374
1375        let json = r#"{"a": 1, "b": 2}123"#;
1376        let mut de = crate::Deserializer::new(crate::Read::from(json));
1377
1378        let res: Value = de.deserialize().unwrap();
1379        assert_eq!(res, object! { "a": 1, "b": 2 });
1380        assert_eq!(de.parser.read.index, 16);
1381
1382        let res = de.end();
1383        assert!(res.is_err());
1384    }
1385}