sonic_rs/serde/
de.rs

1//! Deserialize JSON data to a Rust data structure.
2
3// The code is cloned from [serde_json](https://github.com/serde-rs/json) and modified necessary parts.
4use std::{marker::PhantomData, mem::ManuallyDrop, ptr::slice_from_raw_parts, sync::Arc};
5
6use serde::{
7    de::{self, Expected, Unexpected},
8    forward_to_deserialize_any,
9};
10use sonic_number::ParserNumber;
11
12use crate::{
13    error::{
14        Error,
15        ErrorCode::{self, EofWhileParsing, RecursionLimitExceeded},
16        Result,
17    },
18    parser::{as_str, ParseStatus, ParsedSlice, Parser, Reference},
19    reader::{Read, Reader},
20    value::{node::Value, shared::Shared},
21    JsonInput, OwnedLazyValue,
22};
23const MAX_ALLOWED_DEPTH: u8 = u8::MAX;
24
25//////////////////////////////////////////////////////////////////////////////
26
27/// A structure that deserializes JSON into Rust values.
28pub struct Deserializer<R> {
29    pub(crate) parser: Parser<R>,
30    scratch: Vec<u8>,
31    remaining_depth: u8,
32    shared: Option<Arc<Shared>>, // the shared allocator for `Value`
33}
34
35// some functions only used for struct visitors.
36impl<'de, R: Reader<'de>> Deserializer<R> {
37    /// Create a new deserializer.
38    pub fn new(read: R) -> Self {
39        Self {
40            parser: Parser::new(read),
41            scratch: Vec::new(),
42            remaining_depth: MAX_ALLOWED_DEPTH,
43            shared: Option::None,
44        }
45    }
46
47    /// Parse all number as [`crate::RawNumber`].
48    ///
49    /// # Example
50    /// ```
51    /// use sonic_rs::{Deserializer, Value};
52    /// let json = r#"{"a":1.2345678901234567890123}"#;
53    /// let mut de = Deserializer::from_str(json).use_rawnumber();
54    /// let value: Value = de.deserialize().unwrap();
55    /// let out = sonic_rs::to_string(&value).unwrap();
56    /// assert_eq!(json, out);
57    /// ```
58    pub fn use_rawnumber(mut self) -> Self {
59        self.parser.cfg.use_rawnumber = true;
60        self
61    }
62
63    /// Allow to parse JSON with invalid UTF-8 and UTF-16 characters. Will replace them with
64    /// `\uFFFD` (displayed as �).
65    ///
66    /// # Example
67    /// ```
68    /// use sonic_rs::{Deserializer, Value};
69    /// let data = [
70    ///     &[b'\"', 0xff, b'\"'][..],         // invalid UTF8 char in string
71    ///     br#"{"a":"\ud800","b":"\udc00"}"#, // invalid UTF16 surrogate pair
72    /// ];
73    /// let expect = [r#""�""#, r#"{"a":"�","b":"�"}"#];
74    ///
75    /// let mut exp = expect.iter();
76    /// for json in data {
77    ///     let mut de = Deserializer::from_slice(json).utf8_lossy();
78    ///     let value: Value = de.deserialize().unwrap();
79    ///     let out = sonic_rs::to_string(&value).unwrap();
80    ///     assert_eq!(&out, exp.next().unwrap());
81    /// }
82    /// ```
83    pub fn utf8_lossy(mut self) -> Self {
84        self.parser.cfg.utf8_lossy = true;
85        self
86    }
87
88    /// Deserialize a JSON stream to a Rust data structure.
89    ///
90    /// It can be used repeatedly and we do not check trailing chars after deserilalized.
91    ///
92    /// # Example
93    ///
94    /// ```
95    /// # use sonic_rs::{prelude::*, Value};
96    ///
97    /// use sonic_rs::Deserializer;
98    ///
99    /// let multiple_json = r#"{"a": 123, "b": "foo"} true [1, 2, 3] wrong chars"#;
100    ///
101    /// let mut deserializer = Deserializer::from_json(multiple_json);
102    ///
103    /// let val: Value = deserializer.deserialize().unwrap();
104    /// assert_eq!(val["a"].as_i64().unwrap(), 123);
105    /// assert_eq!(val["b"].as_str().unwrap(), "foo");
106    ///
107    /// let val: bool = deserializer.deserialize().unwrap();
108    /// assert_eq!(val, true);
109    ///
110    /// let val: Vec<u8> = deserializer.deserialize().unwrap();
111    /// assert_eq!(val, &[1, 2, 3]);
112    ///
113    /// // encounter the wrong chars in json
114    /// assert!(deserializer.deserialize::<Value>().is_err());
115    /// ```
116    pub fn deserialize<T>(&mut self) -> Result<T>
117    where
118        T: de::Deserialize<'de>,
119    {
120        de::Deserialize::deserialize(self)
121    }
122
123    /// Convert Deserializer to a [`StreamDeserializer`].
124    pub fn into_stream<T>(self) -> StreamDeserializer<'de, T, R> {
125        StreamDeserializer {
126            de: self,
127            data: PhantomData,
128            lifetime: PhantomData,
129            is_ending: false,
130        }
131    }
132
133    /// The `Deserializer::end` method should be called after a value has been fully deserialized.
134    /// This allows the `Deserializer` to validate that the input stream is at the end or that it
135    /// only has trailing whitespace.
136    pub fn end(&mut self) -> Result<()> {
137        tri!(self.parser.parse_trailing());
138        Ok(())
139    }
140}
141
142impl<'de> Deserializer<Read<'de>> {
143    /// Create a new deserializer from a json input [`JsonInput`].
144    pub fn from_json<I: JsonInput<'de>>(input: I) -> Self {
145        Self::new(Read::from(input))
146    }
147
148    /// Create a new deserializer from a string.
149    #[allow(clippy::should_implement_trait)]
150    pub fn from_str(s: &'de str) -> Self {
151        Self::new(Read::from(s))
152    }
153
154    /// Create a new deserializer from a string slice.
155    pub fn from_slice(s: &'de [u8]) -> Self {
156        Self::new(Read::from(s))
157    }
158}
159
160/// An iterator that deserializes a json stream into multiple `T` values.
161///
162/// # Example
163///
164/// ```
165/// use sonic_rs::{prelude::*, Deserializer, Value};
166///
167/// let multiple_json = r#"{"a": 123, "b": "foo"} true [1, 2, 3] wrong chars"#;
168///
169/// let mut stream = Deserializer::from_json(multiple_json).into_stream::<Value>();
170///
171/// let val = stream.next().unwrap().unwrap();
172/// assert_eq!(val["a"].as_i64().unwrap(), 123);
173/// assert_eq!(val["b"].as_str().unwrap(), "foo");
174///
175/// let val = stream.next().unwrap().unwrap();
176/// assert_eq!(val, true);
177///
178/// let val = stream.next().unwrap().unwrap();
179/// assert_eq!(val, &[1, 2, 3]);
180///
181/// // encounter the wrong chars in json
182/// assert!(stream.next().unwrap().is_err());
183/// ```
184pub struct StreamDeserializer<'de, T, R> {
185    de: Deserializer<R>,
186    data: PhantomData<T>,
187    lifetime: PhantomData<&'de R>,
188    is_ending: bool,
189}
190
191impl<'de, T, R> Iterator for StreamDeserializer<'de, T, R>
192where
193    T: de::Deserialize<'de>,
194    R: Reader<'de>,
195{
196    type Item = Result<T>;
197
198    fn next(&mut self) -> Option<Self::Item> {
199        if self.is_ending {
200            return None;
201        }
202        let val: Result<T> = self.de.deserialize();
203        if val.is_err() {
204            self.is_ending = true;
205        }
206        Some(val)
207    }
208}
209
210// We only use our own error type; no need for From conversions provided by the
211// standard library's try! macro. This reduces lines of LLVM IR by 4%.
212macro_rules! tri {
213    ($e:expr $(,)?) => {
214        match $e {
215            Ok(val) => val,
216            Err(err) => {
217                return Err(err);
218            }
219        }
220    };
221}
222
223pub(crate) use tri;
224
225struct DepthGuard<'a, R> {
226    de: &'a mut Deserializer<R>,
227}
228
229impl<'a, 'de, R: Reader<'de>> DepthGuard<'a, R> {
230    fn guard(de: &'a mut Deserializer<R>) -> Result<Self> {
231        de.remaining_depth -= 1;
232        if de.remaining_depth == 0 {
233            return Err(de.parser.error(RecursionLimitExceeded));
234        }
235        Ok(Self { de })
236    }
237}
238
239impl<'a, R> Drop for DepthGuard<'a, R> {
240    fn drop(&mut self) {
241        self.de.remaining_depth += 1;
242    }
243}
244
245fn visit_number<'de, V>(num: &ParserNumber, visitor: V) -> Result<V::Value>
246where
247    V: de::Visitor<'de>,
248{
249    match *num {
250        ParserNumber::Float(x) => visitor.visit_f64(x),
251        ParserNumber::Unsigned(x) => visitor.visit_u64(x),
252        ParserNumber::Signed(x) => visitor.visit_i64(x),
253    }
254}
255
256pub(crate) fn invalid_type_number(num: &ParserNumber, exp: &dyn Expected) -> Error {
257    match *num {
258        ParserNumber::Float(x) => de::Error::invalid_type(Unexpected::Float(x), exp),
259        ParserNumber::Unsigned(x) => de::Error::invalid_type(Unexpected::Unsigned(x), exp),
260        ParserNumber::Signed(x) => de::Error::invalid_type(Unexpected::Signed(x), exp),
261    }
262}
263
264macro_rules! impl_deserialize_number {
265    ($method:ident) => {
266        fn $method<V>(self, visitor: V) -> Result<V::Value>
267        where
268            V: de::Visitor<'de>,
269        {
270            self.deserialize_number(visitor)
271        }
272    };
273}
274
275// some functions only used for struct visitors.
276impl<'de, R: Reader<'de>> Deserializer<R> {
277    pub(crate) fn deserialize_number<V>(&mut self, visitor: V) -> Result<V::Value>
278    where
279        V: de::Visitor<'de>,
280    {
281        let Some(peek) = self.parser.skip_space() else {
282            return Err(self.parser.error(EofWhileParsing));
283        };
284
285        let value = match peek {
286            c @ b'-' | c @ b'0'..=b'9' => visit_number(&tri!(self.parser.parse_number(c)), visitor),
287            _ => Err(self.peek_invalid_type(peek, &visitor)),
288        };
289
290        // fixed error position if not matched type
291        match value {
292            Ok(value) => Ok(value),
293            Err(err) => Err(self.parser.fix_position(err)),
294        }
295    }
296
297    #[cold]
298    fn peek_invalid_type(&mut self, peek: u8, exp: &dyn Expected) -> Error {
299        self.parser.peek_invalid_type(peek, exp)
300    }
301
302    pub fn end_seq(&mut self) -> Result<()> {
303        self.parser.parse_array_end()
304    }
305
306    pub fn end_map(&mut self) -> Result<()> {
307        match self.parser.skip_space() {
308            Some(b'}') => Ok(()),
309            Some(b',') => Err(self.parser.error(ErrorCode::TrailingComma)),
310            Some(_) => Err(self.parser.error(ErrorCode::ExpectedObjectCommaOrEnd)),
311            None => Err(self.parser.error(ErrorCode::EofWhileParsing)),
312        }
313    }
314
315    fn scan_integer128(&mut self, buf: &mut String) -> Result<()> {
316        match self.parser.read.peek() {
317            Some(b'0') => {
318                buf.push('0');
319                self.parser.read.eat(1);
320                // There can be only one leading '0'.
321                if let Some(ch) = self.parser.read.peek() {
322                    if ch.is_ascii_digit() {
323                        return Err(self.parser.error(ErrorCode::InvalidNumber));
324                    }
325                }
326                Ok(())
327            }
328            Some(c) if c.is_ascii_digit() => {
329                buf.push(c as char);
330                self.parser.read.eat(1);
331                while let c @ b'0'..=b'9' = self.parser.read.peek().unwrap_or_default() {
332                    self.parser.read.eat(1);
333                    buf.push(c as char);
334                }
335                Ok(())
336            }
337            _ => Err(self.parser.error(ErrorCode::InvalidNumber)),
338        }
339    }
340
341    fn deserialize_lazyvalue<V>(&mut self, visitor: V) -> Result<V::Value>
342    where
343        V: de::Visitor<'de>,
344    {
345        let (raw, status) = self.parser.skip_one()?;
346        if status == ParseStatus::HasEscaped {
347            visitor.visit_str(as_str(raw))
348        } else {
349            visitor.visit_borrowed_str(as_str(raw))
350        }
351    }
352
353    fn deserialize_owned_lazyvalue<V>(&mut self, visitor: V) -> Result<V::Value>
354    where
355        V: de::Visitor<'de>,
356    {
357        let val = ManuallyDrop::new(self.parser.get_owned_lazyvalue(true)?);
358        // #Safety
359        // the json is validate before parsing json, and we pass the document using visit_bytes
360        // here.
361        unsafe {
362            let binary = &*slice_from_raw_parts(
363                &val as *const _ as *const u8,
364                std::mem::size_of::<OwnedLazyValue>(),
365            );
366            visitor.visit_bytes(binary)
367        }
368    }
369
370    fn deserialize_value<V>(&mut self, visitor: V) -> Result<V::Value>
371    where
372        V: de::Visitor<'de>,
373    {
374        let mut val = Value::new();
375        if self.parser.read.index() == 0 {
376            // will parse the JSON inplace
377            let cfg = self.parser.cfg;
378            let json = self.parser.read.as_u8_slice();
379
380            // get n to check trailing characters in later
381            let n = if cfg.utf8_lossy && self.parser.read.next_invalid_utf8() != usize::MAX {
382                // repr the invalid utf8, not need to care about the invalid UTF8 char in non-string
383                // parts, it will cause errors when parsing.
384                val.parse_with_padding(String::from_utf8_lossy(json).as_bytes(), cfg)?
385            } else {
386                val.parse_with_padding(json, cfg)?
387            };
388            self.parser.read.eat(n);
389        } else {
390            let shared = unsafe {
391                if self.shared.is_none() {
392                    self.shared = Some(Arc::new(Shared::default()));
393                }
394                let shared = self.shared.as_mut().unwrap();
395                &mut *(Arc::as_ptr(shared) as *mut _)
396            };
397            // deserialize some json parts into `Value`, not use padding buffer, avoid the memory
398            // copy
399            val.parse_without_padding(shared, &mut self.scratch, &mut self.parser)?
400        };
401
402        let val = ManuallyDrop::new(val);
403        // #Safety
404        // the json is validate before parsing json, and we pass the document using visit_bytes
405        // here.
406        unsafe {
407            let binary =
408                &*slice_from_raw_parts(&val as *const _ as *const u8, std::mem::size_of::<Value>());
409            visitor.visit_bytes(binary)
410        }
411    }
412
413    // we deserialize json number from string or number types
414    fn deserialize_rawnumber<V>(&mut self, visitor: V) -> Result<V::Value>
415    where
416        V: de::Visitor<'de>,
417    {
418        let raw = match self.parser.skip_space_peek() {
419            Some(c @ b'-' | c @ b'0'..=b'9') => {
420                self.parser.read.eat(1);
421                self.parser.skip_number(c)?
422            }
423            Some(b'"') => {
424                self.parser.read.eat(1);
425                let start = self.parser.read.index();
426                match self.parser.read.next() {
427                    Some(c @ b'-' | c @ b'0'..=b'9') => {
428                        self.parser.skip_number(c)?;
429                    }
430                    _ => return Err(self.parser.error(ErrorCode::InvalidNumber)),
431                }
432                let end = self.parser.read.index();
433                let raw = as_str(self.parser.read.slice_unchecked(start, end));
434                // match the right quote
435                if self.parser.read.next() != Some(b'"') {
436                    return Err(self.parser.error(ErrorCode::InvalidNumber));
437                }
438                raw
439            }
440            _ => return Err(self.parser.error(ErrorCode::InvalidNumber)),
441        };
442
443        visitor.visit_borrowed_str(raw)
444    }
445}
446
447impl<'de, 'a, R: Reader<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
448    type Error = Error;
449    #[inline]
450    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
451    where
452        V: de::Visitor<'de>,
453    {
454        let Some(peek) = self.parser.skip_space() else {
455            return Err(self.parser.error(EofWhileParsing));
456        };
457
458        let value = match peek {
459            b'n' => {
460                tri!(self.parser.parse_literal("ull"));
461                visitor.visit_unit()
462            }
463            b't' => {
464                tri!(self.parser.parse_literal("rue"));
465                visitor.visit_bool(true)
466            }
467            b'f' => {
468                tri!(self.parser.parse_literal("alse"));
469                visitor.visit_bool(false)
470            }
471            c @ b'-' | c @ b'0'..=b'9' => visit_number(&tri!(self.parser.parse_number(c)), visitor),
472            b'"' => match tri!(self.parser.parse_str(&mut self.scratch)) {
473                Reference::Borrowed(s) => visitor.visit_borrowed_str(s),
474                Reference::Copied(s) => visitor.visit_str(s),
475            },
476            b'[' => {
477                let ret = {
478                    let _ = DepthGuard::guard(self);
479                    visitor.visit_seq(SeqAccess::new(self))
480                };
481                match (ret, self.end_seq()) {
482                    (Ok(ret), Ok(())) => Ok(ret),
483                    (Err(err), _) | (_, Err(err)) => Err(err),
484                }
485            }
486            b'{' => {
487                let ret = {
488                    let _ = DepthGuard::guard(self);
489                    visitor.visit_map(MapAccess::new(self))
490                };
491                match (ret, self.end_map()) {
492                    (Ok(ret), Ok(())) => Ok(ret),
493                    (Err(err), _) | (_, Err(err)) => Err(err),
494                }
495            }
496            _ => Err(self.parser.error(ErrorCode::InvalidJsonValue)),
497        };
498
499        match value {
500            Ok(value) => Ok(value),
501            // The de::Error impl creates errors with unknown line and column.
502            // Fill in the position here by looking at the current index in the
503            // input. There is no way to tell whether this should call `error`
504            // or `error` so pick the one that seems correct more often.
505            // Worst case, the position is off by one character.
506            Err(err) => Err(self.parser.fix_position(err)),
507        }
508    }
509
510    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
511    where
512        V: de::Visitor<'de>,
513    {
514        let Some(peek) = self.parser.skip_space() else {
515            return Err(self.parser.error(ErrorCode::EofWhileParsing));
516        };
517
518        let value = match peek {
519            b't' => {
520                tri!(self.parser.parse_literal("rue"));
521                visitor.visit_bool(true)
522            }
523            b'f' => {
524                tri!(self.parser.parse_literal("alse"));
525                visitor.visit_bool(false)
526            }
527            _ => Err(self.peek_invalid_type(peek, &visitor)),
528        };
529
530        match value {
531            Ok(value) => Ok(value),
532            Err(err) => Err(self.parser.fix_position(err)),
533        }
534    }
535
536    impl_deserialize_number!(deserialize_i8);
537    impl_deserialize_number!(deserialize_i16);
538    impl_deserialize_number!(deserialize_i32);
539    impl_deserialize_number!(deserialize_i64);
540    impl_deserialize_number!(deserialize_u8);
541    impl_deserialize_number!(deserialize_u16);
542    impl_deserialize_number!(deserialize_u32);
543    impl_deserialize_number!(deserialize_u64);
544    impl_deserialize_number!(deserialize_f32);
545    impl_deserialize_number!(deserialize_f64);
546
547    fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value>
548    where
549        V: de::Visitor<'de>,
550    {
551        let mut buf = String::new();
552        match self.parser.skip_space_peek() {
553            Some(b'-') => {
554                buf.push('-');
555                self.parser.read.eat(1);
556            }
557            Some(_) => {}
558            None => {
559                return Err(self.parser.error(ErrorCode::EofWhileParsing));
560            }
561        };
562
563        tri!(self.scan_integer128(&mut buf));
564
565        let value = match buf.parse() {
566            Ok(int) => visitor.visit_i128(int),
567            Err(_) => {
568                return Err(self.parser.error(ErrorCode::NumberOutOfRange));
569            }
570        };
571
572        match value {
573            Ok(value) => Ok(value),
574            Err(err) => Err(self.parser.fix_position(err)),
575        }
576    }
577
578    fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value>
579    where
580        V: de::Visitor<'de>,
581    {
582        match self.parser.skip_space_peek() {
583            Some(b'-') => {
584                return Err(self.parser.error(ErrorCode::NumberOutOfRange));
585            }
586            Some(_) => {}
587            None => {
588                return Err(self.parser.error(ErrorCode::EofWhileParsing));
589            }
590        }
591
592        let mut buf = String::new();
593        tri!(self.scan_integer128(&mut buf));
594
595        let value = match buf.parse() {
596            Ok(int) => visitor.visit_u128(int),
597            Err(_) => {
598                return Err(self.parser.error(ErrorCode::NumberOutOfRange));
599            }
600        };
601
602        match value {
603            Ok(value) => Ok(value),
604            Err(err) => Err(self.parser.fix_position(err)),
605        }
606    }
607
608    fn deserialize_char<V>(self, visitor: V) -> Result<V::Value>
609    where
610        V: de::Visitor<'de>,
611    {
612        self.deserialize_str(visitor)
613    }
614
615    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value>
616    where
617        V: de::Visitor<'de>,
618    {
619        let Some(peek) = self.parser.skip_space() else {
620            return Err(self.parser.error(ErrorCode::EofWhileParsing));
621        };
622
623        let value = match peek {
624            b'"' => match tri!(self.parser.parse_str(&mut self.scratch)) {
625                Reference::Borrowed(s) => visitor.visit_borrowed_str(s),
626                Reference::Copied(s) => visitor.visit_str(s),
627            },
628            _ => Err(self.peek_invalid_type(peek, &visitor)),
629        };
630
631        match value {
632            Ok(value) => Ok(value),
633            Err(err) => Err(self.parser.fix_position(err)),
634        }
635    }
636
637    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
638    where
639        V: de::Visitor<'de>,
640    {
641        self.deserialize_str(visitor)
642    }
643
644    /// Parses a JSON string as bytes. Note that this function does not check
645    /// whether the bytes represent a valid UTF-8 string.
646    ///
647    /// Followed as `serde_json`.
648    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
649    where
650        V: de::Visitor<'de>,
651    {
652        let Some(peek) = self.parser.skip_space() else {
653            return Err(self.parser.error(ErrorCode::EofWhileParsing));
654        };
655
656        let value = match peek {
657            b'"' => match tri!(self.parser.parse_string_raw(&mut self.scratch)) {
658                ParsedSlice::Borrowed { slice: b, buf: _ } => visitor.visit_borrowed_bytes(b),
659                ParsedSlice::Copied(b) => visitor.visit_bytes(b),
660            },
661            b'[' => {
662                self.parser.read.backward(1);
663                self.deserialize_seq(visitor)
664            }
665            _ => Err(self.peek_invalid_type(peek, &visitor)),
666        };
667
668        // check invalid utf8 with allow space here
669        let _ = self.parser.check_invalid_utf8(true)?;
670        match value {
671            Ok(value) => Ok(value),
672            Err(err) => Err(self.parser.fix_position(err)),
673        }
674    }
675
676    #[inline]
677    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
678    where
679        V: de::Visitor<'de>,
680    {
681        self.deserialize_bytes(visitor)
682    }
683
684    /// Parses a `null` as a None, and any other values as a `Some(...)`.
685    #[inline]
686    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
687    where
688        V: de::Visitor<'de>,
689    {
690        match self.parser.skip_space_peek() {
691            Some(b'n') => {
692                self.parser.read.eat(1);
693                tri!(self.parser.parse_literal("ull"));
694                visitor.visit_none()
695            }
696            _ => visitor.visit_some(self),
697        }
698    }
699
700    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
701    where
702        V: de::Visitor<'de>,
703    {
704        let Some(peek) = self.parser.skip_space() else {
705            return Err(self.parser.error(ErrorCode::EofWhileParsing));
706        };
707
708        let value = match peek {
709            b'n' => {
710                tri!(self.parser.parse_literal("ull"));
711                visitor.visit_unit()
712            }
713            _ => Err(self.peek_invalid_type(peek, &visitor)),
714        };
715
716        match value {
717            Ok(value) => Ok(value),
718            Err(err) => Err(self.parser.fix_position(err)),
719        }
720    }
721
722    fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
723    where
724        V: de::Visitor<'de>,
725    {
726        self.deserialize_unit(visitor)
727    }
728
729    /// Parses a newtype struct as the underlying value.
730    #[inline]
731    fn deserialize_newtype_struct<V>(self, name: &'static str, visitor: V) -> Result<V::Value>
732    where
733        V: de::Visitor<'de>,
734    {
735        {
736            if name == crate::serde::rawnumber::TOKEN {
737                return self.deserialize_rawnumber(visitor);
738            } else if name == crate::lazyvalue::TOKEN {
739                return self.deserialize_lazyvalue(visitor);
740            } else if name == crate::lazyvalue::OWNED_LAZY_VALUE_TOKEN {
741                return self.deserialize_owned_lazyvalue(visitor);
742            } else if name == crate::value::de::TOKEN {
743                return self.deserialize_value(visitor);
744            }
745        }
746
747        let _ = name;
748        visitor.visit_newtype_struct(self)
749    }
750
751    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
752    where
753        V: de::Visitor<'de>,
754    {
755        let Some(peek) = self.parser.skip_space() else {
756            return Err(self.parser.error(ErrorCode::EofWhileParsing));
757        };
758
759        let value = match peek {
760            b'[' => {
761                let ret = {
762                    let _ = DepthGuard::guard(self);
763                    visitor.visit_seq(SeqAccess::new(self))
764                };
765                match (ret, self.end_seq()) {
766                    (Ok(ret), Ok(())) => Ok(ret),
767                    (Err(err), _) | (_, Err(err)) => Err(err),
768                }
769            }
770            _ => return Err(self.peek_invalid_type(peek, &visitor)),
771        };
772        match value {
773            Ok(value) => Ok(value),
774            Err(err) => Err(self.parser.fix_position(err)),
775        }
776    }
777
778    fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value>
779    where
780        V: de::Visitor<'de>,
781    {
782        self.deserialize_seq(visitor)
783    }
784
785    fn deserialize_tuple_struct<V>(
786        self,
787        _name: &'static str,
788        _len: usize,
789        visitor: V,
790    ) -> Result<V::Value>
791    where
792        V: de::Visitor<'de>,
793    {
794        self.deserialize_seq(visitor)
795    }
796
797    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value>
798    where
799        V: de::Visitor<'de>,
800    {
801        let Some(peek) = self.parser.skip_space() else {
802            return Err(self.parser.error(ErrorCode::EofWhileParsing));
803        };
804
805        let value = match peek {
806            b'{' => {
807                let ret = {
808                    let _ = DepthGuard::guard(self);
809                    visitor.visit_map(MapAccess::new(self))
810                };
811                match (ret, self.end_map()) {
812                    (Ok(ret), Ok(())) => Ok(ret),
813                    (Err(err), _) | (_, Err(err)) => Err(err),
814                }
815            }
816            _ => return Err(self.peek_invalid_type(peek, &visitor)),
817        };
818        match value {
819            Ok(value) => Ok(value),
820            Err(err) => Err(self.parser.fix_position(err)),
821        }
822    }
823
824    fn deserialize_struct<V>(
825        self,
826        _name: &'static str,
827        _fields: &'static [&'static str],
828        visitor: V,
829    ) -> Result<V::Value>
830    where
831        V: de::Visitor<'de>,
832    {
833        let Some(peek) = self.parser.skip_space() else {
834            return Err(self.parser.error(ErrorCode::EofWhileParsing));
835        };
836
837        let value = match peek {
838            b'[' => {
839                let ret = {
840                    let _ = DepthGuard::guard(self);
841                    visitor.visit_seq(SeqAccess::new(self))
842                };
843                match (ret, self.end_seq()) {
844                    (Ok(ret), Ok(())) => Ok(ret),
845                    (Err(err), _) | (_, Err(err)) => Err(err),
846                }
847            }
848            b'{' => {
849                let ret = {
850                    let _ = DepthGuard::guard(self);
851                    visitor.visit_map(MapAccess::new(self))
852                };
853                match (ret, self.end_map()) {
854                    (Ok(ret), Ok(())) => Ok(ret),
855                    (Err(err), _) | (_, Err(err)) => Err(err),
856                }
857            }
858            _ => return Err(self.peek_invalid_type(peek, &visitor)),
859        };
860
861        match value {
862            Ok(value) => Ok(value),
863            Err(err) => Err(self.parser.fix_position(err)),
864        }
865    }
866
867    /// Parses an enum as an object like `{"$KEY":$VALUE}`, where $VALUE is either a straight
868    /// value, a `[..]`, or a `{..}`.
869    #[inline]
870    fn deserialize_enum<V>(
871        self,
872        _name: &str,
873        _variants: &'static [&'static str],
874        visitor: V,
875    ) -> Result<V::Value>
876    where
877        V: de::Visitor<'de>,
878    {
879        match self.parser.skip_space_peek() {
880            Some(b'{') => {
881                self.parser.read.eat(1);
882                let value = {
883                    let _ = DepthGuard::guard(self);
884                    tri!(visitor.visit_enum(VariantAccess::new(self)))
885                };
886
887                match self.parser.skip_space() {
888                    Some(b'}') => Ok(value),
889                    Some(_) => Err(self.parser.error(ErrorCode::InvalidJsonValue)),
890                    None => Err(self.parser.error(ErrorCode::EofWhileParsing)),
891                }
892            }
893            Some(b'"') => visitor.visit_enum(UnitVariantAccess::new(self)),
894            Some(_) => Err(self.parser.error(ErrorCode::InvalidJsonValue)),
895            None => Err(self.parser.error(ErrorCode::EofWhileParsing)),
896        }
897    }
898
899    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
900    where
901        V: de::Visitor<'de>,
902    {
903        self.deserialize_str(visitor)
904    }
905
906    fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
907    where
908        V: de::Visitor<'de>,
909    {
910        // NOTE: we use faster skip, and will not validate the skipped parts.
911        tri!(self.parser.skip_one());
912        visitor.visit_unit()
913    }
914}
915
916pub struct SeqAccess<'a, R: 'a> {
917    de: &'a mut Deserializer<R>,
918    first: bool, // first is marked as
919}
920
921impl<'a, R: 'a> SeqAccess<'a, R> {
922    pub fn new(de: &'a mut Deserializer<R>) -> Self {
923        SeqAccess { de, first: true }
924    }
925}
926
927impl<'de, 'a, R: Reader<'de> + 'a> de::SeqAccess<'de> for SeqAccess<'a, R> {
928    type Error = Error;
929
930    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
931    where
932        T: de::DeserializeSeed<'de>,
933    {
934        match self.de.parser.skip_space_peek() {
935            Some(b']') => Ok(None), // we will check the ending brace after `visit_seq`
936            Some(b',') if !self.first => {
937                self.de.parser.read.eat(1);
938                Ok(Some(tri!(seed.deserialize(&mut *self.de))))
939            }
940            Some(_) => {
941                if self.first {
942                    self.first = false;
943                    Ok(Some(tri!(seed.deserialize(&mut *self.de))))
944                } else {
945                    self.de.parser.read.eat(1); // makes the error position is correct
946                    Err(self.de.parser.error(ErrorCode::ExpectedArrayCommaOrEnd))
947                }
948            }
949            None => Err(self.de.parser.error(ErrorCode::EofWhileParsing)),
950        }
951    }
952}
953
954pub struct MapAccess<'a, R: 'a> {
955    de: &'a mut Deserializer<R>,
956    first: bool,
957}
958
959impl<'a, R: 'a> MapAccess<'a, R> {
960    pub fn new(de: &'a mut Deserializer<R>) -> Self {
961        MapAccess { de, first: true }
962    }
963}
964
965impl<'de, 'a, R: Reader<'de> + 'a> de::MapAccess<'de> for MapAccess<'a, R> {
966    type Error = Error;
967
968    #[inline(always)]
969    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
970    where
971        K: de::DeserializeSeed<'de>,
972    {
973        let peek = match self.de.parser.skip_space_peek() {
974            Some(b'}') => {
975                return Ok(None);
976            }
977            Some(b',') if !self.first => {
978                self.de.parser.read.eat(1);
979                self.de.parser.skip_space()
980            }
981            Some(b) => {
982                self.de.parser.read.eat(1);
983                if self.first {
984                    self.first = false;
985                    Some(b)
986                } else {
987                    return Err(self.de.parser.error(ErrorCode::ExpectedObjectCommaOrEnd));
988                }
989            }
990            None => {
991                return Err(self.de.parser.error(ErrorCode::EofWhileParsing));
992            }
993        };
994
995        match peek {
996            Some(b'"') => seed.deserialize(MapKey { de: &mut *self.de }).map(Some),
997            Some(b'}') => Err(self.de.parser.error(ErrorCode::TrailingComma)),
998            Some(_) => Err(self.de.parser.error(ErrorCode::ExpectObjectKeyOrEnd)),
999            None => Err(self.de.parser.error(ErrorCode::EofWhileParsing)),
1000        }
1001    }
1002
1003    #[inline(always)]
1004    fn next_value<V>(&mut self) -> Result<V>
1005    where
1006        V: de::Deserialize<'de>,
1007    {
1008        use std::marker::PhantomData;
1009        self.next_value_seed(PhantomData)
1010    }
1011
1012    #[inline(always)]
1013    fn next_entry<K, V>(&mut self) -> Result<Option<(K, V)>>
1014    where
1015        K: de::Deserialize<'de>,
1016        V: de::Deserialize<'de>,
1017    {
1018        use std::marker::PhantomData;
1019        self.next_entry_seed(PhantomData, PhantomData)
1020    }
1021
1022    #[inline(always)]
1023    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
1024    where
1025        V: de::DeserializeSeed<'de>,
1026    {
1027        tri!(self.de.parser.parse_object_clo());
1028        seed.deserialize(&mut *self.de)
1029    }
1030}
1031
1032struct VariantAccess<'a, R: 'a> {
1033    de: &'a mut Deserializer<R>,
1034}
1035
1036impl<'a, R: 'a> VariantAccess<'a, R> {
1037    fn new(de: &'a mut Deserializer<R>) -> Self {
1038        VariantAccess { de }
1039    }
1040}
1041
1042impl<'de, 'a, R: Reader<'de> + 'a> de::EnumAccess<'de> for VariantAccess<'a, R> {
1043    type Error = Error;
1044    type Variant = Self;
1045
1046    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self)>
1047    where
1048        V: de::DeserializeSeed<'de>,
1049    {
1050        let val = tri!(seed.deserialize(&mut *self.de));
1051        tri!(self.de.parser.parse_object_clo());
1052        Ok((val, self))
1053    }
1054}
1055
1056impl<'de, 'a, R: Reader<'de> + 'a> de::VariantAccess<'de> for VariantAccess<'a, R> {
1057    type Error = Error;
1058
1059    fn unit_variant(self) -> Result<()> {
1060        de::Deserialize::deserialize(self.de)
1061    }
1062
1063    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
1064    where
1065        T: de::DeserializeSeed<'de>,
1066    {
1067        seed.deserialize(self.de)
1068    }
1069
1070    fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
1071    where
1072        V: de::Visitor<'de>,
1073    {
1074        de::Deserializer::deserialize_seq(self.de, visitor)
1075    }
1076
1077    fn struct_variant<V>(self, fields: &'static [&'static str], visitor: V) -> Result<V::Value>
1078    where
1079        V: de::Visitor<'de>,
1080    {
1081        de::Deserializer::deserialize_struct(self.de, "", fields, visitor)
1082    }
1083}
1084
1085struct UnitVariantAccess<'a, R: 'a> {
1086    de: &'a mut Deserializer<R>,
1087}
1088
1089impl<'a, R: 'a> UnitVariantAccess<'a, R> {
1090    fn new(de: &'a mut Deserializer<R>) -> Self {
1091        UnitVariantAccess { de }
1092    }
1093}
1094
1095impl<'de, 'a, R: Reader<'de> + 'a> de::EnumAccess<'de> for UnitVariantAccess<'a, R> {
1096    type Error = Error;
1097    type Variant = Self;
1098
1099    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self)>
1100    where
1101        V: de::DeserializeSeed<'de>,
1102    {
1103        let variant = tri!(seed.deserialize(&mut *self.de));
1104        Ok((variant, self))
1105    }
1106}
1107
1108impl<'de, 'a, R: Reader<'de> + 'a> de::VariantAccess<'de> for UnitVariantAccess<'a, R> {
1109    type Error = Error;
1110
1111    fn unit_variant(self) -> Result<()> {
1112        Ok(())
1113    }
1114
1115    fn newtype_variant_seed<T>(self, _seed: T) -> Result<T::Value>
1116    where
1117        T: de::DeserializeSeed<'de>,
1118    {
1119        Err(de::Error::invalid_type(
1120            Unexpected::UnitVariant,
1121            &"newtype variant",
1122        ))
1123    }
1124
1125    fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value>
1126    where
1127        V: de::Visitor<'de>,
1128    {
1129        Err(de::Error::invalid_type(
1130            Unexpected::UnitVariant,
1131            &"tuple variant",
1132        ))
1133    }
1134
1135    fn struct_variant<V>(self, _fields: &'static [&'static str], _visitor: V) -> Result<V::Value>
1136    where
1137        V: de::Visitor<'de>,
1138    {
1139        Err(de::Error::invalid_type(
1140            Unexpected::UnitVariant,
1141            &"struct variant",
1142        ))
1143    }
1144}
1145
1146/// Only deserialize from this after peeking a '"' byte! Otherwise it may
1147/// deserialize invalid JSON successfully./// Only deserialize from this after peeking a '"' byte!
1148/// Otherwise it may deserialize invalid JSON successfully.
1149struct MapKey<'a, R: 'a> {
1150    de: &'a mut Deserializer<R>,
1151}
1152
1153macro_rules! deserialize_numeric_key {
1154    ($method:ident) => {
1155        fn $method<V>(self, visitor: V) -> Result<V::Value>
1156        where
1157            V: de::Visitor<'de>,
1158        {
1159            let value = tri!(self.de.deserialize_number(visitor));
1160            if self.de.parser.read.next() != Some(b'"') {
1161                return Err(self.de.parser.error(ErrorCode::ExpectedQuote));
1162            }
1163
1164            Ok(value)
1165        }
1166    };
1167
1168    ($method:ident, $delegate:ident) => {
1169        fn $method<V>(self, visitor: V) -> Result<V::Value>
1170        where
1171            V: de::Visitor<'de>,
1172        {
1173            match self.de.parser.read.peek() {
1174                Some(b'0'..=b'9' | b'-') => {}
1175                _ => return Err(self.de.parser.error(ErrorCode::ExpectedNumericKey)),
1176            }
1177
1178            let value = tri!(self.de.$delegate(visitor));
1179
1180            if self.de.parser.read.next() != Some(b'"') {
1181                return Err(self.de.parser.error(ErrorCode::ExpectedQuote));
1182            }
1183
1184            Ok(value)
1185        }
1186    };
1187}
1188
1189impl<'de, 'a, R> de::Deserializer<'de> for MapKey<'a, R>
1190where
1191    R: Reader<'de>,
1192{
1193    type Error = Error;
1194
1195    #[inline]
1196    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
1197    where
1198        V: de::Visitor<'de>,
1199    {
1200        self.de.scratch.clear();
1201        match tri!(self.de.parser.parse_str(&mut self.de.scratch)) {
1202            Reference::Borrowed(s) => visitor.visit_borrowed_str(s),
1203            Reference::Copied(s) => visitor.visit_str(s),
1204        }
1205    }
1206
1207    deserialize_numeric_key!(deserialize_i8);
1208    deserialize_numeric_key!(deserialize_i16);
1209    deserialize_numeric_key!(deserialize_i32);
1210    deserialize_numeric_key!(deserialize_i64);
1211    deserialize_numeric_key!(deserialize_i128, deserialize_i128);
1212    deserialize_numeric_key!(deserialize_u8);
1213    deserialize_numeric_key!(deserialize_u16);
1214    deserialize_numeric_key!(deserialize_u32);
1215    deserialize_numeric_key!(deserialize_u64);
1216    deserialize_numeric_key!(deserialize_u128, deserialize_u128);
1217    deserialize_numeric_key!(deserialize_f32);
1218    deserialize_numeric_key!(deserialize_f64);
1219
1220    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
1221    where
1222        V: de::Visitor<'de>,
1223    {
1224        let mut value = match self.de.parser.read.next() {
1225            Some(b't') => {
1226                tri!(self.de.parser.parse_literal("rue"));
1227                visitor.visit_bool(true)
1228            }
1229            Some(b'f') => {
1230                tri!(self.de.parser.parse_literal("alse"));
1231                visitor.visit_bool(false)
1232            }
1233            None => Err(self.de.parser.error(ErrorCode::EofWhileParsing)),
1234            Some(peek) => Err(self.de.peek_invalid_type(peek, &visitor)),
1235        };
1236
1237        if self.de.parser.read.next() != Some(b'"') {
1238            value = Err(self.de.parser.error(ErrorCode::ExpectedQuote));
1239        }
1240
1241        match value {
1242            Ok(value) => Ok(value),
1243            Err(err) => Err(self.de.parser.fix_position(err)),
1244        }
1245    }
1246
1247    #[inline]
1248    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
1249    where
1250        V: de::Visitor<'de>,
1251    {
1252        // Map keys cannot be null.
1253        visitor.visit_some(self)
1254    }
1255
1256    #[inline]
1257    fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
1258    where
1259        V: de::Visitor<'de>,
1260    {
1261        visitor.visit_newtype_struct(self)
1262    }
1263
1264    #[inline]
1265    fn deserialize_enum<V>(
1266        self,
1267        name: &'static str,
1268        variants: &'static [&'static str],
1269        visitor: V,
1270    ) -> Result<V::Value>
1271    where
1272        V: de::Visitor<'de>,
1273    {
1274        self.de.parser.read.backward(1);
1275        self.de.deserialize_enum(name, variants, visitor)
1276    }
1277
1278    #[inline]
1279    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
1280    where
1281        V: de::Visitor<'de>,
1282    {
1283        self.de.parser.read.backward(1);
1284        self.de.deserialize_bytes(visitor)
1285    }
1286
1287    #[inline]
1288    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
1289    where
1290        V: de::Visitor<'de>,
1291    {
1292        self.de.parser.read.backward(1);
1293        self.de.deserialize_bytes(visitor)
1294    }
1295
1296    forward_to_deserialize_any! {
1297        char str string unit unit_struct seq tuple tuple_struct map struct
1298        identifier ignored_any
1299    }
1300}
1301//////////////////////////////////////////////////////////////////////////////
1302
1303fn from_trait<'de, R, T>(read: R) -> Result<T>
1304where
1305    R: Reader<'de>,
1306    T: de::Deserialize<'de>,
1307{
1308    // check JSON size, because the design of `sonic_rs::Value`, parsing JSON larger than 4 GB is
1309    // not supported
1310    let len = read.as_u8_slice().len();
1311    if len > u32::MAX as _ {
1312        return Err(crate::error::make_error(format!(
1313            "Only support JSON less than 4 GB, the input JSON is too large here, len is {len}"
1314        )));
1315    }
1316
1317    let mut de = Deserializer::new(read);
1318    #[cfg(feature = "arbitrary_precision")]
1319    {
1320        de = de.use_rawnumber();
1321    }
1322
1323    #[cfg(feature = "utf8_lossy")]
1324    {
1325        de = de.utf8_lossy();
1326    }
1327
1328    let value = tri!(de::Deserialize::deserialize(&mut de));
1329
1330    // Make sure the whole stream has been consumed.
1331    tri!(de.parser.parse_trailing());
1332
1333    // check invalid utf8
1334    tri!(de.parser.read.check_utf8_final());
1335    Ok(value)
1336}
1337
1338/// Deserialize an instance of type `T` from bytes of JSON text.
1339/// If user can guarantee the JSON is valid UTF-8, recommend to use `from_slice_unchecked` instead.
1340pub fn from_slice<'a, T>(json: &'a [u8]) -> Result<T>
1341where
1342    T: de::Deserialize<'a>,
1343{
1344    from_trait(Read::new(json, true))
1345}
1346
1347/// Deserialize an instance of type `T` from bytes of JSON text.
1348///
1349/// # Safety
1350/// The json passed in must be valid UTF-8.
1351pub unsafe fn from_slice_unchecked<'a, T>(json: &'a [u8]) -> Result<T>
1352where
1353    T: de::Deserialize<'a>,
1354{
1355    from_trait(Read::new(json, false))
1356}
1357
1358/// Deserialize an instance of type `T` from a string of JSON text.
1359pub fn from_str<'a, T>(s: &'a str) -> Result<T>
1360where
1361    T: de::Deserialize<'a>,
1362{
1363    from_trait(Read::new(s.as_bytes(), false))
1364}
1365
1366/// Deserialize an instance of type `T` from a Reader
1367pub fn from_reader<R, T>(mut reader: R) -> Result<T>
1368where
1369    R: std::io::Read,
1370    T: de::DeserializeOwned,
1371{
1372    let mut data = Vec::new();
1373    if let Err(e) = reader.read_to_end(&mut data) {
1374        return Err(Error::io(e));
1375    };
1376    from_slice(data.as_slice())
1377}
1378
1379#[cfg(test)]
1380mod test {
1381    use crate::{object, Value};
1382
1383    #[test]
1384    fn test_value_as_deserializer() {
1385        let json = r#"{"a": 1, "b": 2}"#;
1386        let mut de = crate::Deserializer::new(crate::Read::from(json));
1387
1388        let res: Value = de.deserialize().unwrap();
1389        assert_eq!(res, object! { "a": 1, "b": 2 });
1390        assert_eq!(de.parser.read.index, 16);
1391
1392        let res = de.end();
1393        assert!(res.is_ok());
1394
1395        let json = r#"{"a": 1, "b": 2}123"#;
1396        let mut de = crate::Deserializer::new(crate::Read::from(json));
1397
1398        let res: Value = de.deserialize().unwrap();
1399        assert_eq!(res, object! { "a": 1, "b": 2 });
1400        assert_eq!(de.parser.read.index, 16);
1401
1402        let res = de.end();
1403        assert!(res.is_err());
1404    }
1405}