sonic_rs/lazyvalue/
iterator.rs

1use std::borrow::Cow;
2
3use crate::{
4    error::Result,
5    input::{JsonInput, JsonSlice},
6    lazyvalue::LazyValue,
7    parser::{Pair, Parser, DEFAULT_KEY_BUF_CAPACITY},
8    reader::{Read, Reader},
9};
10/// A lazied iterator for JSON object text. It will parse the JSON when iterating.
11///
12/// The item of the iterator is [`Result<LazyValue>`][`crate::LazyValue`].
13///
14/// # Examples
15///```
16/// use faststr::FastStr;
17/// use sonic_rs::{to_object_iter, JsonValueTrait};
18///
19/// let json = FastStr::from(r#"{"a": null, "b":[1, 2, 3]}"#);
20/// let iter = to_object_iter(&json);
21///
22/// for ret in iter {
23///     // deal with errors
24///     if ret.is_err() {
25///         println!("{}", ret.unwrap_err());
26///         return;
27///     }
28///     let (k, v) = ret.unwrap();
29///     if k == "a" {
30///         assert!(v.is_null());
31///     } else if k == "b" {
32///         assert_eq!(v.as_raw_str(), "[1, 2, 3]");
33///     }
34/// }
35/// ```
36pub struct ObjectJsonIter<'de> {
37    parser: Parser<Read<'de>>,
38    strbuf: Vec<u8>,
39    first: bool,
40    ending: bool,
41    skip_strict: bool,
42}
43
44/// A lazied iterator for JSON array text. It will parse the JSON when iterating.
45///
46/// The item of the iterator is [`Result<LazyValue>`][`crate::LazyValue`].
47///
48/// # Examples
49/// ```
50/// use sonic_rs::{to_array_iter, JsonValueTrait};
51///
52/// let iter = to_array_iter(r#"[0, 1, 2, 3, 4, 5, 6]"#);
53/// for (i, ret) in iter.enumerate() {
54///     let lv = ret.unwrap(); // get lazyvalue
55///     assert_eq!(i.to_string(), lv.as_raw_str()); // lv is not parsed
56///     assert_eq!(i, lv.as_u64().unwrap() as usize);
57/// }
58///
59/// let iter = to_array_iter(r#"[1, 2, 3, 4, 5, 6"#);
60/// for elem in iter {
61///     // do something for each elem
62///     // deal with errors when invalid json
63///     if elem.is_err() {
64///         assert!(elem
65///             .unwrap_err()
66///             .to_string()
67///             .contains("Expected this character to be either a ',' or a ']'"));
68///     }
69/// }
70/// ```
71pub struct ArrayJsonIter<'de> {
72    parser: Parser<Read<'de>>,
73    first: bool,
74    ending: bool,
75    skip_strict: bool,
76}
77
78impl<'de> ObjectJsonIter<'de> {
79    // input is inner json, expected always be validated and well-formed
80    pub(crate) fn new_inner(input: JsonSlice<'de>) -> Self {
81        Self {
82            parser: Parser::new(Read::new_in(input, false)),
83            strbuf: Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY),
84            first: true,
85            ending: false,
86            skip_strict: false,
87        }
88    }
89
90    pub(crate) fn new<I: JsonInput<'de>>(input: I, skip_strict: bool) -> Self {
91        let validate_utf8 = if skip_strict {
92            input.need_utf8_valid()
93        } else {
94            Default::default()
95        };
96
97        Self {
98            parser: Parser::new(Read::new_in(input.to_json_slice(), validate_utf8)),
99            strbuf: Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY),
100            first: true,
101            ending: false,
102            skip_strict,
103        }
104    }
105
106    fn next_entry_impl(&mut self) -> Option<Result<(Cow<'de, str>, LazyValue<'de>)>> {
107        if self.ending {
108            return None;
109        }
110
111        if self.first {
112            // check invalid utf8
113            if let Err(err) = self.parser.read.check_utf8_final() {
114                self.ending = true;
115                return Some(Err(err));
116            }
117        }
118
119        match self
120            .parser
121            .parse_entry_lazy(&mut self.strbuf, &mut self.first, self.skip_strict)
122        {
123            Ok(ret) => {
124                if let Some(Pair { key, val, status }) = ret {
125                    let val = self.parser.read.slice_ref(val);
126                    Some(Ok(LazyValue::new(val, status.into())).map(|v| (key, v)))
127                } else {
128                    self.ending = true;
129                    None
130                }
131            }
132            Err(err) => {
133                self.ending = true;
134                Some(Err(err))
135            }
136        }
137    }
138}
139
140impl<'de> ArrayJsonIter<'de> {
141    // input is inner json, expected always be validated and well-formed
142    pub(crate) fn new_inner(input: JsonSlice<'de>) -> Self {
143        Self {
144            parser: Parser::new(Read::new_in(input, false)),
145            first: true,
146            ending: false,
147            skip_strict: false,
148        }
149    }
150
151    pub(crate) fn new<I: JsonInput<'de>>(input: I, skip_strict: bool) -> Self {
152        let validate_utf8 = if skip_strict {
153            input.need_utf8_valid()
154        } else {
155            Default::default()
156        };
157
158        Self {
159            parser: Parser::new(Read::new_in(input.to_json_slice(), validate_utf8)),
160            first: true,
161            ending: false,
162            skip_strict,
163        }
164    }
165
166    fn next_elem_impl(&mut self) -> Option<Result<LazyValue<'de>>> {
167        if self.ending {
168            return None;
169        }
170
171        if self.first {
172            // check invalid utf8
173            if let Err(err) = self.parser.read.check_utf8_final() {
174                self.ending = true;
175                return Some(Err(err));
176            }
177        }
178
179        match self
180            .parser
181            .parse_array_elem_lazy(&mut self.first, self.skip_strict)
182        {
183            Ok(ret) => {
184                if let Some((val, status)) = ret {
185                    let val = self.parser.read.slice_ref(val);
186                    Some(Ok(LazyValue::new(val, status.into())))
187                } else {
188                    self.ending = true;
189                    None
190                }
191            }
192            Err(err) => {
193                self.ending = true;
194                Some(Err(err))
195            }
196        }
197    }
198}
199
200/// Traverse the JSON object text through a lazy iterator. The JSON parsing will doing when
201/// iterating.
202///
203/// The item of the iterator is a key-value pair: ([FastStr][`faststr::FastStr`],
204/// [`Result<LazyValue>`][`crate::LazyValue`]).
205///
206/// # Errors
207///
208/// If the JSON is empty, not a object or parse error, the result will be Err and the `next()` will
209/// return `None`.
210///
211/// # Examples
212///
213/// ```
214/// # use sonic_rs::to_object_iter;
215/// use faststr::FastStr;
216/// use sonic_rs::JsonValueTrait;
217///
218/// let json = FastStr::from(r#"{"a": null, "b":[1, 2, 3]}"#);
219/// for ret in to_object_iter(&json) {
220///     assert!(ret.is_ok());
221///     let (k, v) = ret.unwrap();
222///     if k == "a" {
223///         assert!(v.is_null());
224///     } else if k == "b" {
225///         assert_eq!(v.as_raw_str(), "[1, 2, 3]");
226///     }
227/// }
228///
229/// // the JSON is invalid, will report error when encountering the error
230/// for (i, ret) in to_object_iter(r#"{"a": null, "b":[1, 2, 3"#).enumerate() {
231///     if i == 0 {
232///         assert!(ret.is_ok());
233///     }
234///     if i == 1 {
235///         assert!(ret.is_err());
236///     }
237/// }
238/// ```
239pub fn to_object_iter<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
240    ObjectJsonIter::new(json, true)
241}
242
243/// Traverse the JSON array text through a lazy iterator. The JSON parsing will doing when
244/// iterating.
245///
246/// The item of the iterator is [`Result<LazyValue>`][`crate::LazyValue`].
247///
248/// # Errors
249///
250/// If the JSON is empty, not array or parse error, it will return Err and `next()` will return
251/// `None`.
252///
253/// # Examples
254///
255/// ```
256/// # use sonic_rs::to_array_iter;
257/// use sonic_rs::JsonValueTrait;
258///
259/// for (i, ret) in to_array_iter(r#"[0, 1, 2, 3, 4, 5, 6]"#).enumerate() {
260///     let lv = ret.unwrap(); // get lazyvalue
261///     assert_eq!(i.to_string(), lv.as_raw_str()); // lv is not parsed
262///     assert_eq!(i, lv.as_u64().unwrap() as usize);
263/// }
264///
265/// for elem in to_array_iter(r#"[1, 2, 3, 4, 5, 6"#) {
266///     // do something for each elem
267///     // deal with errors when invalid json
268///     if elem.is_err() {
269///         assert!(elem
270///             .unwrap_err()
271///             .to_string()
272///             .contains("Expected this character to be either a ',' or a ']'"));
273///     }
274/// }
275/// ```
276pub fn to_array_iter<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
277    ArrayJsonIter::new(json, true)
278}
279
280/// Traverse the JSON text through a lazy object iterator. The JSON parsing will doing when
281/// iterating.
282///
283/// The item of the iterator is a key-value pair: ([FastStr][`faststr::FastStr`],
284/// [`Result<LazyValue>`][`crate::LazyValue`]).
285///
286/// # Errors
287///
288/// If the JSON is empty, or not a object, the result will be Err and the `next()` will return
289/// `None`.
290///
291/// # Safety
292///
293/// If the json is invalid, the result is undefined.
294///
295/// # Examples
296///
297/// ```
298/// # use sonic_rs::to_object_iter_unchecked;
299/// use faststr::FastStr;
300/// use sonic_rs::JsonValueTrait;
301///
302/// let json = FastStr::from(r#"{"a": null, "b":[1, 2, 3]}"#);
303/// for ret in unsafe { to_object_iter_unchecked(&json) } {
304///     assert!(ret.is_ok());
305///     let (k, v) = ret.unwrap();
306///     if k == "a" {
307///         assert!(v.is_null());
308///     } else if k == "b" {
309///         assert_eq!(v.as_raw_str(), "[1, 2, 3]");
310///     }
311/// }
312/// ```
313pub unsafe fn to_object_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
314    ObjectJsonIter::new(json, false)
315}
316
317/// Traverse the JSON text through a lazy object iterator. The JSON parsing will doing when
318/// iterating.
319///
320/// The item of the iterator is [`Result<LazyValue>`][`crate::LazyValue`].
321///
322/// # Errors
323///
324/// If the JSON is empty, or not a array, the result will be Err and the `next()` will return
325/// `None`.
326///
327/// # Safety
328///
329/// If the json is invalid, the result is undefined.
330///
331/// # Examples
332/// ```
333/// # use sonic_rs::to_array_iter_unchecked;
334/// use sonic_rs::JsonValueTrait;
335///
336/// for (i, ret) in unsafe { to_array_iter_unchecked(r#"[0, 1, 2, 3, 4, 5, 6]"#) }.enumerate() {
337///     let lv = ret.unwrap(); // get lazyvalue
338///     assert_eq!(i.to_string(), lv.as_raw_str()); // lv is not parsed
339///     assert_eq!(i, lv.as_u64().unwrap() as usize);
340/// }
341///
342/// // the JSON is empty
343/// for elem in unsafe { to_array_iter_unchecked("") } {
344///     assert!(elem.is_err());
345/// }
346/// ```
347pub unsafe fn to_array_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
348    ArrayJsonIter::new(json, false)
349}
350
351impl<'de> Iterator for ObjectJsonIter<'de> {
352    type Item = Result<(Cow<'de, str>, LazyValue<'de>)>;
353
354    fn next(&mut self) -> Option<Self::Item> {
355        self.next_entry_impl()
356    }
357}
358
359impl<'de> Iterator for ArrayJsonIter<'de> {
360    type Item = Result<LazyValue<'de>>;
361
362    fn next(&mut self) -> Option<Self::Item> {
363        self.next_elem_impl()
364    }
365}
366
367#[cfg(test)]
368mod test {
369    use bytes::Bytes;
370
371    use super::*;
372    use crate::{value::JsonValueTrait, JsonType};
373
374    #[test]
375    fn test_object_iter() {
376        let json = Bytes::from(
377            r#"{
378            "string": "Hello, world!",
379            "number": 42,
380            "boolean": true,
381            "null": null,
382            "array": ["foo","bar","baz"],
383            "object": {"name": "Alice"},
384            "empty": {},
385            "": [],
386            "escaped\"": "\"\"",
387            "\t": "\n",
388            "\u0000": "\u0001"
389        }"#,
390        );
391        let _v: serde_json::Value = serde_json::from_slice(json.as_ref()).unwrap();
392        let mut iter = to_object_iter(&json);
393        let mut iter_unchecked = unsafe { to_object_iter_unchecked(&json) };
394
395        let mut test_ok = |key: &str, val: &str, typ: JsonType| {
396            let ret = iter.next().unwrap().unwrap();
397            assert_eq!(ret.0.as_ref(), key);
398            assert_eq!(
399                ret.1.as_raw_str().as_bytes(),
400                val.as_bytes(),
401                "key is {key} ",
402            );
403            assert_eq!(ret.1.get_type(), typ);
404
405            let ret = iter_unchecked.next().unwrap().unwrap();
406            assert_eq!(ret.0.as_ref(), key);
407            assert_eq!(
408                ret.1.as_raw_str().as_bytes(),
409                val.as_bytes(),
410                "key is {key} ",
411            );
412            assert_eq!(ret.1.get_type(), typ);
413        };
414        test_ok("string", r#""Hello, world!""#, JsonType::String);
415        test_ok("number", "42", JsonType::Number);
416        test_ok("boolean", "true", JsonType::Boolean);
417        test_ok("null", "null", JsonType::Null);
418        test_ok("array", r#"["foo","bar","baz"]"#, JsonType::Array);
419        test_ok("object", r#"{"name": "Alice"}"#, JsonType::Object);
420        test_ok("empty", r#"{}"#, JsonType::Object);
421        test_ok("", r#"[]"#, JsonType::Array);
422        test_ok("escaped\"", r#""\"\"""#, JsonType::String);
423        test_ok("\t", r#""\n""#, JsonType::String);
424        test_ok("\x00", r#""\u0001""#, JsonType::String);
425        assert!(iter.next().is_none());
426        assert!(iter.next().is_none());
427
428        let json = Bytes::from("{}");
429        let mut iter = to_object_iter(&json);
430        assert!(iter.next().is_none());
431        assert!(iter.next().is_none());
432        assert!(iter.next().is_none());
433
434        let json = Bytes::from("{xxxxxx");
435        let mut iter = to_object_iter(&json);
436        assert!(iter.next().unwrap().is_err());
437        assert!(iter.next().is_none());
438    }
439
440    #[test]
441    fn test_array_iter() {
442        let json = Bytes::from(
443            r#"[
444            "",
445            "\\\"\"",
446            "{\"a\":null}",
447            "Hello, world!",
448            0,
449            1,
450            11,
451            1000,
452            42,
453            42.0,
454            42e-1,
455            4.2e+1,
456            2333.2e+1,
457            0.0000000999e8,
458            true,
459            null,
460            ["foo","bar","baz"],
461            {"name": "Alice"},
462            [],
463            {}
464        ]"#,
465        );
466        let mut iter = to_array_iter(&json);
467        let mut iter_unchecked = unsafe { to_array_iter_unchecked(&json) };
468        let mut test_ok = |val: &str, typ: JsonType| {
469            let ret: LazyValue<'_> = iter.next().unwrap().unwrap();
470            assert_eq!(ret.as_raw_str(), val);
471            assert_eq!(ret.get_type(), typ);
472
473            let ret = iter_unchecked.next().unwrap().unwrap();
474            assert_eq!(ret.as_raw_str().as_bytes(), val.as_bytes());
475            assert_eq!(ret.get_type(), typ);
476        };
477
478        test_ok(r#""""#, JsonType::String);
479        test_ok(r#""\\\"\"""#, JsonType::String);
480        test_ok(r#""{\"a\":null}""#, JsonType::String);
481        test_ok(r#""Hello, world!""#, JsonType::String);
482        test_ok("0", JsonType::Number);
483        test_ok("1", JsonType::Number);
484        test_ok("11", JsonType::Number);
485        test_ok("1000", JsonType::Number);
486        test_ok("42", JsonType::Number);
487        test_ok("42.0", JsonType::Number);
488        test_ok("42e-1", JsonType::Number);
489        test_ok("4.2e+1", JsonType::Number);
490        test_ok("2333.2e+1", JsonType::Number);
491        test_ok("0.0000000999e8", JsonType::Number);
492        test_ok("true", JsonType::Boolean);
493        test_ok("null", JsonType::Null);
494        test_ok(r#"["foo","bar","baz"]"#, JsonType::Array);
495        test_ok(r#"{"name": "Alice"}"#, JsonType::Object);
496        test_ok(r#"[]"#, JsonType::Array);
497        test_ok(r#"{}"#, JsonType::Object);
498        assert!(iter.next().is_none());
499        assert!(iter.next().is_none());
500
501        let json = Bytes::from("[]");
502        let mut iter = to_array_iter(&json);
503        assert!(iter.next().is_none());
504        assert!(iter.next().is_none());
505        assert!(iter.next().is_none());
506
507        let json = Bytes::from("[xxxxxx");
508        let mut iter = to_array_iter(&json);
509        assert!(iter.next().unwrap().is_err());
510        assert!(iter.next().is_none());
511    }
512
513    #[test]
514    fn test_iter_deserialize() {
515        let json = Bytes::from(r#"[1, 2, 3, 4, 5, 6]"#);
516        let iter = to_array_iter(&json);
517        let out: Vec<u8> = iter
518            .flatten()
519            .map(|e| crate::from_str::<u8>(e.as_raw_str()).unwrap_or_default())
520            .collect();
521        assert_eq!(out.as_slice(), &[1, 2, 3, 4, 5, 6]);
522
523        let json = Bytes::from(r#"[1, true, "hello", null, 5, 6]"#);
524        let iter = to_array_iter(&json);
525        let out: Vec<JsonType> = iter.map(|e| e.get_type()).collect();
526        println!("array elem type is {out:?}");
527    }
528
529    #[test]
530    fn test_num_iter() {
531        for i in to_array_iter("[6,-9E6]") {
532            println!("{:?}", i.unwrap().as_raw_str());
533        }
534    }
535
536    #[test]
537    fn test_json_iter_for_utf8() {
538        let data = [b'[', b'"', 0, 0, 0, 0x80, 0x90, b'"', b']'];
539        let iter = to_array_iter(&data[..]);
540        for item in iter {
541            assert_eq!(
542                item.err().unwrap().to_string(),
543                "Invalid UTF-8 characters in json at line 1 column \
544                 6\n\n\t[\"\0\0\0��\"]\n\t.....^...\n"
545            );
546        }
547
548        let data = [
549            b'{', b'"', 0, 0, 0, 0x80, 0x90, b'"', b':', b'"', b'"', b'}',
550        ];
551        let iter = to_object_iter(&data[..]);
552        for item in iter {
553            assert_eq!(
554                item.err().unwrap().to_string(),
555                "Invalid UTF-8 characters in json at line 1 column \
556                 6\n\n\t{\"\0\0\0��\":\"\"}\n\t.....^......\n"
557            );
558        }
559    }
560
561    #[test]
562    fn test_issue_182_uaf() {
563        let json = r#"{"key": "value"}"#;
564        let root: LazyValue = crate::from_str(json).unwrap();
565        let key = {
566            let mut iter = root.into_object_iter().unwrap();
567            let (key, _) = iter.next().unwrap().unwrap();
568            key
569        };
570
571        // the asan will report uaf here if uaf happened
572        assert_eq!(key, "key");
573    }
574}