streamson_lib/
streamer.rs

1//! Streams significant point of JSON from input data
2
3use crate::{
4    error,
5    path::{Element, Path},
6};
7use std::{
8    collections::{vec_deque::Drain, VecDeque},
9    str::from_utf8,
10};
11
12/// Kind of output
13#[derive(Clone, Copy, Debug, PartialEq)]
14pub enum ParsedKind {
15    /// Object e.g. {}
16    Obj,
17    /// Array e.g. []
18    Arr,
19    /// String e.g. ""
20    Str,
21    /// Number e.g. 0
22    Num,
23    /// Null e.g. null
24    Null,
25    /// Bolean e.g. false
26    Bool,
27}
28
29/// Structure which contains further info about matched data
30#[derive(Clone, Debug, PartialEq)]
31pub enum Token {
32    /// Path starts here
33    Start(usize, ParsedKind),
34    /// Path ends here
35    End(usize, ParsedKind),
36    /// Element separator idx (idx of `,` between array/object elements)
37    Separator(usize),
38    /// Needs more data
39    Pending,
40}
41
42impl Token {
43    pub fn is_end(&self) -> bool {
44        matches!(self, Self::End(_, _))
45    }
46}
47
48/// Key parsing states
49#[derive(Debug)]
50enum ObjectKeyState {
51    Init,
52    Parse(StringState),
53}
54
55/// Parsing string states
56#[derive(Debug, PartialEq)]
57enum StringState {
58    Normal,
59    Escaped,
60}
61
62/// JSON processing states
63#[derive(Debug)]
64enum States {
65    Value(Option<Element>),
66    Str(StringState),
67    Number,
68    Bool,
69    Null,
70    Array(usize),
71    Object,
72    ObjectKey(ObjectKeyState),
73    Colon,
74    RemoveWhitespaces,
75}
76
77/// Reads parts of UTF-8 json input and emits paths
78/// e.g. reading of
79/// ```json
80/// {
81///     "People": [
82///         {"Height": 180, "Age": 33},
83///         {"Height": 175, "Age": 24}
84///     ]
85/// }
86/// ```
87/// should emit (index and path)
88/// ```text
89/// Start( 0, ParsedKind::Obj) // Streamer.path == ""
90/// Start( 1, ParsedKind::Arr) // Streamer.path == "{\"People\"}"
91/// Start( 3, ParsedKind::Obj) // Streamer.path == "{\"People\"}[0]"
92/// Start( 4, ParsedKind::Num) // Streamer.path == "{\"People\"}[0]{\"Height\"}"
93/// End(   5, ParsedKind::Num)
94/// Start( 6, ParsedKind::Num) // Streamer.path == "{\"People\"}[0]{\"Age\"}"
95/// End(   7, ParsedKind::Num)
96/// End(   8, ParsedKind::Obj)
97/// Start( 9, ParsedKind::Obj) // Streamer.path == "{\"People\"}[1]"
98/// Start(10, ParsedKind::Num) // Streamer.path == "{\"People\"}[1]{\"Height\"}"
99/// End(  11, ParsedKind::Num)
100/// Start(12, ParsedKind::Num) // Streamer.path == "{\"People\"}[1]{\"Age\"}"
101/// End(  13, ParsedKind::Num)
102/// End(  14, ParsedKind::Obj)
103/// End(  15, ParsedKind::Arr)
104/// End(  16, ParsedKind::Obj)
105/// ```
106#[derive(Debug)]
107pub struct Streamer {
108    /// Path stack
109    path: Path,
110    /// Paring elements stack
111    states: Vec<States>,
112    /// Pending buffer
113    pending: VecDeque<u8>,
114    /// Total index of pending buffer
115    pending_idx: usize,
116    /// Total index agains the first byte passed to input
117    total_idx: usize,
118    /// Indicator whether to pop path in the next read
119    pop_path: bool,
120}
121
122impl Default for Streamer {
123    fn default() -> Self {
124        Self {
125            path: Path::default(),
126            states: vec![States::Value(None), States::RemoveWhitespaces],
127            pending: VecDeque::new(),
128            pending_idx: 0,
129            total_idx: 0,
130            pop_path: false,
131        }
132    }
133}
134
135impl Streamer {
136    /// Creates a new instance of streamer
137    pub fn new() -> Self {
138        Self::default()
139    }
140
141    /// Returns current path
142    pub fn current_path(&mut self) -> &mut Path {
143        &mut self.path
144    }
145
146    /// Returns currently processed byte
147    ///
148    /// # Returns
149    /// * None - needs more data
150    /// * Some(byte) - current byte
151    ///
152    fn peek(&mut self) -> Option<u8> {
153        if self.pending.len() > self.pending_idx {
154            Some(self.pending[self.pending_idx])
155        } else {
156            None
157        }
158    }
159
160    /// Moves current curser character forward
161    ///
162    fn forward(&mut self) {
163        if self.peek().is_some() {
164            self.pending_idx += 1;
165        }
166    }
167
168    /// Moves pending buffer forward (reallocates data)
169    fn advance(&mut self) -> Drain<u8> {
170        let to_remove = self.pending_idx;
171        if self.pending_idx > 0 {
172            self.total_idx += self.pending_idx;
173            self.pending_idx = 0;
174        }
175        self.pending.drain(0..to_remove)
176    }
177
178    /// Feed streamer with data
179    pub fn feed(&mut self, input: &[u8]) {
180        self.pending.extend(input);
181    }
182
183    /// Moves cursor forward while characters are whitespace
184    fn process_remove_whitespace(&mut self) -> Option<Token> {
185        while let Some(byte) = self.peek() {
186            if !byte.is_ascii_whitespace() {
187                self.advance();
188                return None;
189            }
190            self.forward();
191        }
192        self.states.push(States::RemoveWhitespaces);
193        Some(Token::Pending)
194    }
195
196    /// Processes value which type will be determined later
197    fn process_value(&mut self, element: Option<Element>) -> Result<Option<Token>, error::General> {
198        if let Some(byte) = self.peek() {
199            match byte {
200                b'"' => {
201                    self.states.push(States::Str(StringState::Normal));
202                    self.advance();
203                    self.forward();
204                    if let Some(element) = element {
205                        self.path.push(element);
206                    }
207                    Ok(Some(Token::Start(self.total_idx, ParsedKind::Str)))
208                }
209                b'0'..=b'9' => {
210                    self.states.push(States::Number);
211                    self.advance();
212                    if let Some(element) = element {
213                        self.path.push(element);
214                    }
215                    Ok(Some(Token::Start(self.total_idx, ParsedKind::Num)))
216                }
217                b't' | b'f' => {
218                    self.states.push(States::Bool);
219                    self.advance();
220                    if let Some(element) = element {
221                        self.path.push(element);
222                    }
223                    Ok(Some(Token::Start(self.total_idx, ParsedKind::Bool)))
224                }
225                b'n' => {
226                    self.states.push(States::Null);
227                    self.advance();
228                    if let Some(element) = element {
229                        self.path.push(element);
230                    }
231                    Ok(Some(Token::Start(self.total_idx, ParsedKind::Null)))
232                }
233                b'[' => {
234                    self.states.push(States::Array(0));
235                    self.states.push(States::RemoveWhitespaces);
236                    self.states.push(States::Value(Some(Element::Index(0))));
237                    self.states.push(States::RemoveWhitespaces);
238                    self.advance();
239                    self.forward();
240                    if let Some(element) = element {
241                        self.path.push(element);
242                    }
243                    Ok(Some(Token::Start(self.total_idx, ParsedKind::Arr)))
244                }
245                b'{' => {
246                    self.states.push(States::Object);
247                    self.states.push(States::RemoveWhitespaces);
248                    self.states.push(States::ObjectKey(ObjectKeyState::Init));
249                    self.states.push(States::RemoveWhitespaces);
250                    self.advance();
251                    self.forward();
252                    if let Some(element) = element {
253                        self.path.push(element);
254                    }
255                    Ok(Some(Token::Start(self.total_idx, ParsedKind::Obj)))
256                }
257                b']' | b'}' => {
258                    // End of an array or object -> no value matched
259                    Ok(None)
260                }
261                byte => {
262                    Err(error::IncorrectInput::new(byte, self.total_idx + self.pending_idx).into())
263                }
264            }
265        } else {
266            self.states.push(States::Value(element));
267            Ok(Some(Token::Pending))
268        }
269    }
270
271    /// Processes string on the input
272    fn process_str(&mut self, state: StringState) -> Option<Token> {
273        if let Some(byte) = self.peek() {
274            match byte {
275                b'"' => {
276                    if state == StringState::Normal {
277                        self.forward();
278                        self.advance();
279                        Some(Token::End(self.total_idx, ParsedKind::Str))
280                    } else {
281                        self.forward();
282                        self.states.push(States::Str(StringState::Normal));
283                        None
284                    }
285                }
286                b'\\' => {
287                    self.forward();
288                    let new_state = match state {
289                        StringState::Escaped => StringState::Normal,
290                        StringState::Normal => StringState::Escaped,
291                    };
292                    self.states.push(States::Str(new_state));
293                    None
294                }
295                _ => {
296                    self.forward();
297                    self.states.push(States::Str(StringState::Normal));
298                    None
299                }
300            }
301        } else {
302            self.states.push(States::Str(state));
303            Some(Token::Pending)
304        }
305    }
306
307    /// Processes the number
308    fn process_number(&mut self) -> Option<Token> {
309        if let Some(byte) = self.peek() {
310            if byte.is_ascii_digit() || byte == b'.' {
311                self.forward();
312                self.states.push(States::Number);
313                None
314            } else {
315                self.advance();
316                Some(Token::End(self.total_idx, ParsedKind::Num))
317            }
318        } else {
319            self.states.push(States::Number);
320            Some(Token::Pending)
321        }
322    }
323
324    /// Processes bool
325    fn process_bool(&mut self) -> Option<Token> {
326        if let Some(byte) = self.peek() {
327            if byte.is_ascii_alphabetic() {
328                self.forward();
329                self.states.push(States::Bool);
330                None
331            } else {
332                self.advance();
333                Some(Token::End(self.total_idx, ParsedKind::Bool))
334            }
335        } else {
336            self.states.push(States::Bool);
337            Some(Token::Pending)
338        }
339    }
340
341    /// Processes null
342    fn process_null(&mut self) -> Option<Token> {
343        if let Some(byte) = self.peek() {
344            if byte.is_ascii_alphabetic() {
345                self.forward();
346                self.states.push(States::Null);
347                None
348            } else {
349                self.advance();
350                Some(Token::End(self.total_idx, ParsedKind::Null))
351            }
352        } else {
353            self.states.push(States::Null);
354            Some(Token::Pending)
355        }
356    }
357
358    /// Processes an array
359    fn process_array(&mut self, idx: usize) -> Result<Option<Token>, error::General> {
360        if let Some(byte) = self.peek() {
361            match byte {
362                b']' => {
363                    self.forward();
364                    self.advance();
365                    Ok(Some(Token::End(self.total_idx, ParsedKind::Arr)))
366                }
367                b',' => {
368                    self.forward();
369                    self.states.push(States::Array(idx + 1));
370                    self.states.push(States::RemoveWhitespaces);
371                    self.states
372                        .push(States::Value(Some(Element::Index(idx + 1))));
373                    self.states.push(States::RemoveWhitespaces);
374                    Ok(Some(Token::Separator(self.total_idx)))
375                }
376                byte => {
377                    Err(error::IncorrectInput::new(byte, self.total_idx + self.pending_idx).into())
378                }
379            }
380        } else {
381            self.states.push(States::Array(idx));
382            Ok(Some(Token::Pending))
383        }
384    }
385
386    /// Processes and object
387    fn process_object(&mut self) -> Result<Option<Token>, error::General> {
388        if let Some(byte) = self.peek() {
389            match byte {
390                b'}' => {
391                    self.forward();
392                    self.advance();
393                    Ok(Some(Token::End(self.total_idx, ParsedKind::Obj)))
394                }
395                b',' => {
396                    self.forward();
397                    self.states.push(States::Object);
398                    self.states.push(States::RemoveWhitespaces);
399                    self.states.push(States::ObjectKey(ObjectKeyState::Init));
400                    self.states.push(States::RemoveWhitespaces);
401                    Ok(Some(Token::Separator(self.total_idx)))
402                }
403                byte => {
404                    Err(error::IncorrectInput::new(byte, self.total_idx + self.pending_idx).into())
405                }
406            }
407        } else {
408            self.states.push(States::Object);
409            Ok(Some(Token::Pending))
410        }
411    }
412
413    /// Processes object key
414    fn process_object_key(
415        &mut self,
416        state: ObjectKeyState,
417    ) -> Result<Option<Token>, error::General> {
418        match state {
419            ObjectKeyState::Init => {
420                if let Some(byte) = self.peek() {
421                    match byte {
422                        b'"' => {
423                            self.advance(); // move cursor to the start
424                            self.forward();
425                            self.states.push(States::ObjectKey(ObjectKeyState::Parse(
426                                StringState::Normal,
427                            )));
428                            Ok(None)
429                        }
430                        b'}' => Ok(None), // end has been reached to Object
431
432                        byte => Err(error::IncorrectInput::new(
433                            byte,
434                            self.total_idx + self.pending_idx,
435                        )
436                        .into()), // keys are strings in JSON
437                    }
438                } else {
439                    self.states.push(States::ObjectKey(state));
440                    Ok(Some(Token::Pending))
441                }
442            }
443            ObjectKeyState::Parse(string_state) => {
444                if let Some(byte) = self.peek() {
445                    self.forward();
446                    match string_state {
447                        StringState::Normal => match byte {
448                            b'\"' => {
449                                let idx = self.pending_idx;
450                                let slice = &self.advance().collect::<Vec<u8>>()[1..idx - 1];
451                                let key = from_utf8(slice)?.to_string();
452                                self.states.push(States::Value(Some(Element::Key(key))));
453                                self.states.push(States::RemoveWhitespaces);
454                                self.states.push(States::Colon);
455                                self.states.push(States::RemoveWhitespaces);
456                                Ok(None)
457                            }
458                            b'\\' => {
459                                self.states.push(States::ObjectKey(ObjectKeyState::Parse(
460                                    StringState::Escaped,
461                                )));
462                                Ok(None)
463                            }
464                            _ => {
465                                self.states.push(States::ObjectKey(ObjectKeyState::Parse(
466                                    StringState::Normal,
467                                )));
468                                Ok(None)
469                            }
470                        },
471                        StringState::Escaped => {
472                            self.states.push(States::ObjectKey(ObjectKeyState::Parse(
473                                StringState::Normal,
474                            )));
475                            Ok(None)
476                        }
477                    }
478                } else {
479                    self.states
480                        .push(States::ObjectKey(ObjectKeyState::Parse(string_state)));
481                    Ok(Some(Token::Pending))
482                }
483            }
484        }
485    }
486
487    /// Processes a single colon
488    fn process_colon(&mut self) -> Result<Option<Token>, error::General> {
489        if let Some(byte) = self.peek() {
490            if byte != b':' {
491                return Err(
492                    error::IncorrectInput::new(byte, self.total_idx + self.pending_idx).into(),
493                );
494            }
495            self.forward();
496            Ok(None)
497        } else {
498            self.states.push(States::Colon);
499            Ok(Some(Token::Pending))
500        }
501    }
502
503    /// Reads data from streamer and emits [Token](enum.Token.html) struct
504    ///
505    /// # Errors
506    ///
507    /// If invalid JSON is passed and error may be emitted.
508    /// Note that validity of input JSON is not checked.
509    pub fn read(&mut self) -> Result<Token, error::General> {
510        loop {
511            while let Some(state) = self.states.pop() {
512                if self.pop_path {
513                    self.path.pop();
514                    self.pop_path = false;
515                }
516
517                match state {
518                    States::RemoveWhitespaces => {
519                        if let Some(output) = self.process_remove_whitespace() {
520                            return Ok(output);
521                        }
522                    }
523                    States::Value(element) => {
524                        if let Some(output) = self.process_value(element)? {
525                            return Ok(output);
526                        }
527                        if self.states.is_empty() {
528                            return Ok(Token::Pending);
529                        }
530                    }
531                    States::Str(state) => {
532                        if let Some(output) = self.process_str(state) {
533                            self.pop_path = output.is_end();
534                            return Ok(output);
535                        }
536                    }
537                    States::Number => {
538                        if let Some(output) = self.process_number() {
539                            self.pop_path = output.is_end();
540                            return Ok(output);
541                        }
542                    }
543                    States::Bool => {
544                        if let Some(output) = self.process_bool() {
545                            self.pop_path = output.is_end();
546                            return Ok(output);
547                        }
548                    }
549                    States::Null => {
550                        if let Some(output) = self.process_null() {
551                            self.pop_path = output.is_end();
552                            return Ok(output);
553                        }
554                    }
555                    States::Array(idx) => {
556                        if let Some(output) = self.process_array(idx)? {
557                            self.pop_path = output.is_end();
558                            return Ok(output);
559                        }
560                    }
561                    States::Object => {
562                        if let Some(output) = self.process_object()? {
563                            self.pop_path = output.is_end();
564                            return Ok(output);
565                        }
566                    }
567                    States::ObjectKey(state) => {
568                        if let Some(output) = self.process_object_key(state)? {
569                            return Ok(output);
570                        }
571                    }
572                    States::Colon => {
573                        if let Some(output) = self.process_colon()? {
574                            return Ok(output);
575                        }
576                    }
577                }
578            }
579            self.states.push(States::Value(None));
580            self.states.push(States::RemoveWhitespaces);
581        }
582    }
583}
584
585#[cfg(test)]
586mod test {
587    use super::{ParsedKind, Streamer, Token};
588    use crate::path::Path;
589    use std::convert::TryFrom;
590
591    fn make_path(path: &str) -> Path {
592        Path::try_from(path).unwrap()
593    }
594
595    #[test]
596    fn test_spaces() {
597        let mut streamer = Streamer::new();
598        streamer.feed(br#"  "#);
599        assert_eq!(streamer.read().unwrap(), Token::Pending);
600    }
601
602    #[test]
603    fn test_string() {
604        let mut streamer = Streamer::new();
605        streamer.feed(br#"  "test string \" \\\" [ ] {} , :\\""#);
606        assert_eq!(streamer.read().unwrap(), Token::Start(2, ParsedKind::Str));
607        assert_eq!(streamer.current_path(), &make_path(""));
608        assert_eq!(streamer.read().unwrap(), Token::End(36, ParsedKind::Str));
609        assert_eq!(streamer.current_path(), &make_path(""));
610        assert_eq!(streamer.read().unwrap(), Token::Pending);
611
612        let mut streamer = Streamer::new();
613        streamer.feed(br#"" another one " "#);
614        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Str));
615        assert_eq!(streamer.current_path(), &make_path(""));
616        assert_eq!(streamer.read().unwrap(), Token::End(15, ParsedKind::Str));
617        assert_eq!(streamer.current_path(), &make_path(""));
618        assert_eq!(streamer.read().unwrap(), Token::Pending);
619    }
620
621    #[test]
622    fn test_number() {
623        let mut streamer = Streamer::new();
624        streamer.feed(br#" 3.24 "#);
625        assert_eq!(streamer.read().unwrap(), Token::Start(1, ParsedKind::Num));
626        assert_eq!(streamer.current_path(), &make_path(""));
627        assert_eq!(streamer.read().unwrap(), Token::End(5, ParsedKind::Num));
628        assert_eq!(streamer.current_path(), &make_path(""));
629        assert_eq!(streamer.read().unwrap(), Token::Pending);
630    }
631
632    #[test]
633    fn test_bool() {
634        let mut streamer = Streamer::new();
635        streamer.feed(br#"  true  "#);
636        assert_eq!(streamer.read().unwrap(), Token::Start(2, ParsedKind::Bool));
637        assert_eq!(streamer.current_path(), &make_path(""));
638        assert_eq!(streamer.read().unwrap(), Token::End(6, ParsedKind::Bool));
639        assert_eq!(streamer.current_path(), &make_path(""));
640        assert_eq!(streamer.read().unwrap(), Token::Pending);
641    }
642
643    #[test]
644    fn test_null() {
645        let mut streamer = Streamer::new();
646        // TODO think of some better way to terminate the nulls/bools/numbers
647        streamer.feed(br#"null"#);
648        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Null));
649        assert_eq!(streamer.current_path(), &make_path(""));
650        assert_eq!(streamer.read().unwrap(), Token::Pending);
651
652        let mut streamer = Streamer::new();
653        streamer.feed(br#"null  "#);
654        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Null));
655        assert_eq!(streamer.current_path(), &make_path(""));
656        assert_eq!(streamer.read().unwrap(), Token::End(4, ParsedKind::Null));
657        assert_eq!(streamer.current_path(), &make_path(""));
658        assert_eq!(streamer.read().unwrap(), Token::Pending);
659    }
660
661    #[test]
662    fn test_array() {
663        let mut streamer = Streamer::new();
664        streamer.feed(br#"[ null, 33, "string" ]"#);
665        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Arr));
666        assert_eq!(streamer.current_path(), &make_path(""));
667        assert_eq!(streamer.read().unwrap(), Token::Start(2, ParsedKind::Null));
668        assert_eq!(streamer.current_path(), &make_path("[0]"));
669        assert_eq!(streamer.read().unwrap(), Token::End(6, ParsedKind::Null));
670        assert_eq!(streamer.current_path(), &make_path("[0]"));
671        assert_eq!(streamer.read().unwrap(), Token::Separator(6));
672        assert_eq!(streamer.read().unwrap(), Token::Start(8, ParsedKind::Num));
673        assert_eq!(streamer.current_path(), &make_path("[1]"));
674        assert_eq!(streamer.read().unwrap(), Token::End(10, ParsedKind::Num));
675        assert_eq!(streamer.current_path(), &make_path("[1]"));
676        assert_eq!(streamer.read().unwrap(), Token::Separator(10));
677        assert_eq!(streamer.read().unwrap(), Token::Start(12, ParsedKind::Str));
678        assert_eq!(streamer.current_path(), &make_path("[2]"));
679        assert_eq!(streamer.read().unwrap(), Token::End(20, ParsedKind::Str));
680        assert_eq!(streamer.current_path(), &make_path("[2]"));
681        assert_eq!(streamer.read().unwrap(), Token::End(22, ParsedKind::Arr));
682        assert_eq!(streamer.current_path(), &make_path(""));
683        assert_eq!(streamer.read().unwrap(), Token::Pending);
684    }
685
686    #[test]
687    fn test_array_pending() {
688        let mut streamer = Streamer::new();
689        streamer.feed(br#"[ null, 3"#);
690        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Arr));
691        assert_eq!(streamer.current_path(), &make_path(""));
692        assert_eq!(streamer.read().unwrap(), Token::Start(2, ParsedKind::Null));
693        assert_eq!(streamer.current_path(), &make_path("[0]"));
694        assert_eq!(streamer.read().unwrap(), Token::End(6, ParsedKind::Null));
695        assert_eq!(streamer.current_path(), &make_path("[0]"));
696        assert_eq!(streamer.read().unwrap(), Token::Separator(6));
697        assert_eq!(streamer.read().unwrap(), Token::Start(8, ParsedKind::Num));
698        assert_eq!(streamer.current_path(), &make_path("[1]"));
699        assert_eq!(streamer.read().unwrap(), Token::Pending);
700        assert_eq!(streamer.current_path(), &make_path("[1]"));
701        streamer.feed(br#"3,"#);
702        assert_eq!(streamer.read().unwrap(), Token::End(10, ParsedKind::Num));
703        assert_eq!(streamer.current_path(), &make_path("[1]"));
704        assert_eq!(streamer.read().unwrap(), Token::Separator(10));
705        assert_eq!(streamer.read().unwrap(), Token::Pending);
706        assert_eq!(streamer.current_path(), &make_path(""));
707        streamer.feed(br#" "string" ]"#);
708        assert_eq!(streamer.read().unwrap(), Token::Start(12, ParsedKind::Str));
709        assert_eq!(streamer.current_path(), &make_path("[2]"));
710        assert_eq!(streamer.read().unwrap(), Token::End(20, ParsedKind::Str));
711        assert_eq!(streamer.current_path(), &make_path("[2]"));
712        assert_eq!(streamer.read().unwrap(), Token::End(22, ParsedKind::Arr));
713        assert_eq!(streamer.current_path(), &make_path(""));
714        assert_eq!(streamer.read().unwrap(), Token::Pending);
715    }
716
717    #[test]
718    fn test_empty_array() {
719        let mut streamer = Streamer::new();
720        streamer.feed(br#"[]"#);
721        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Arr));
722        assert_eq!(streamer.current_path(), &make_path(""));
723        assert_eq!(streamer.read().unwrap(), Token::End(2, ParsedKind::Arr));
724        assert_eq!(streamer.current_path(), &make_path(""));
725        assert_eq!(streamer.read().unwrap(), Token::Pending);
726    }
727
728    #[test]
729    fn test_array_in_array() {
730        let mut streamer = Streamer::new();
731        streamer.feed(br#"[ [], 33, ["string" , 44], [  ]]"#);
732        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Arr));
733        assert_eq!(streamer.current_path(), &make_path(""));
734        assert_eq!(streamer.read().unwrap(), Token::Start(2, ParsedKind::Arr));
735        assert_eq!(streamer.current_path(), &make_path("[0]"));
736        assert_eq!(streamer.read().unwrap(), Token::End(4, ParsedKind::Arr));
737        assert_eq!(streamer.current_path(), &make_path("[0]"));
738        assert_eq!(streamer.read().unwrap(), Token::Separator(4));
739        assert_eq!(streamer.read().unwrap(), Token::Start(6, ParsedKind::Num));
740        assert_eq!(streamer.current_path(), &make_path("[1]"));
741        assert_eq!(streamer.read().unwrap(), Token::End(8, ParsedKind::Num));
742        assert_eq!(streamer.current_path(), &make_path("[1]"));
743        assert_eq!(streamer.read().unwrap(), Token::Separator(8));
744        assert_eq!(streamer.read().unwrap(), Token::Start(10, ParsedKind::Arr));
745        assert_eq!(streamer.current_path(), &make_path("[2]"));
746        assert_eq!(streamer.read().unwrap(), Token::Start(11, ParsedKind::Str));
747        assert_eq!(streamer.current_path(), &make_path("[2][0]"));
748        assert_eq!(streamer.read().unwrap(), Token::End(19, ParsedKind::Str));
749        assert_eq!(streamer.current_path(), &make_path("[2][0]"));
750        assert_eq!(streamer.read().unwrap(), Token::Separator(20));
751        assert_eq!(streamer.read().unwrap(), Token::Start(22, ParsedKind::Num));
752        assert_eq!(streamer.current_path(), &make_path("[2][1]"));
753        assert_eq!(streamer.read().unwrap(), Token::End(24, ParsedKind::Num));
754        assert_eq!(streamer.current_path(), &make_path("[2][1]"));
755        assert_eq!(streamer.read().unwrap(), Token::End(25, ParsedKind::Arr));
756        assert_eq!(streamer.current_path(), &make_path("[2]"));
757        assert_eq!(streamer.read().unwrap(), Token::Separator(25));
758        assert_eq!(streamer.read().unwrap(), Token::Start(27, ParsedKind::Arr));
759        assert_eq!(streamer.current_path(), &make_path("[3]"));
760        assert_eq!(streamer.read().unwrap(), Token::End(31, ParsedKind::Arr));
761        assert_eq!(streamer.current_path(), &make_path("[3]"));
762        assert_eq!(streamer.read().unwrap(), Token::End(32, ParsedKind::Arr));
763        assert_eq!(streamer.current_path(), &make_path(""));
764        assert_eq!(streamer.read().unwrap(), Token::Pending);
765    }
766
767    #[test]
768    fn test_object() {
769        let mut streamer = Streamer::new();
770        streamer.feed(br#"{"a":"a", "b" :  true , "c": null, " \" \\\" \\": 33}"#);
771        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Obj));
772        assert_eq!(streamer.current_path(), &make_path(""));
773        assert_eq!(streamer.read().unwrap(), Token::Start(5, ParsedKind::Str));
774        assert_eq!(streamer.current_path(), &make_path("{\"a\"}"));
775        assert_eq!(streamer.read().unwrap(), Token::End(8, ParsedKind::Str));
776        assert_eq!(streamer.current_path(), &make_path("{\"a\"}"));
777        assert_eq!(streamer.read().unwrap(), Token::Separator(8));
778        assert_eq!(streamer.read().unwrap(), Token::Start(17, ParsedKind::Bool));
779        assert_eq!(streamer.current_path(), &make_path("{\"b\"}"));
780        assert_eq!(streamer.read().unwrap(), Token::End(21, ParsedKind::Bool));
781        assert_eq!(streamer.current_path(), &make_path("{\"b\"}"));
782        assert_eq!(streamer.read().unwrap(), Token::Separator(22));
783        assert_eq!(streamer.read().unwrap(), Token::Start(29, ParsedKind::Null));
784        assert_eq!(streamer.current_path(), &make_path("{\"c\"}"));
785        assert_eq!(streamer.read().unwrap(), Token::End(33, ParsedKind::Null));
786        assert_eq!(streamer.current_path(), &make_path("{\"c\"}"));
787        assert_eq!(streamer.read().unwrap(), Token::Separator(33));
788        assert_eq!(streamer.read().unwrap(), Token::Start(50, ParsedKind::Num));
789        assert_eq!(streamer.current_path(), &make_path(r#"{" \" \\\" \\"}"#));
790        assert_eq!(streamer.read().unwrap(), Token::End(52, ParsedKind::Num));
791        assert_eq!(streamer.current_path(), &make_path(r#"{" \" \\\" \\"}"#));
792        assert_eq!(streamer.read().unwrap(), Token::End(53, ParsedKind::Obj));
793        assert_eq!(streamer.current_path(), &make_path(""));
794        assert_eq!(streamer.read().unwrap(), Token::Pending);
795    }
796
797    #[test]
798    fn test_empty_object() {
799        let mut streamer = Streamer::new();
800        streamer.feed(br#"{}"#);
801        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Obj));
802        assert_eq!(streamer.current_path(), &make_path(""));
803        assert_eq!(streamer.read().unwrap(), Token::End(2, ParsedKind::Obj));
804        assert_eq!(streamer.current_path(), &make_path(""));
805        assert_eq!(streamer.read().unwrap(), Token::Pending);
806    }
807
808    #[test]
809    fn test_object_in_object() {
810        let mut streamer = Streamer::new();
811        streamer.feed(br#" {"u": {}, "j": {"x": {  }, "y": 10}} "#);
812        assert_eq!(streamer.read().unwrap(), Token::Start(1, ParsedKind::Obj));
813        assert_eq!(streamer.current_path(), &make_path(""));
814        assert_eq!(streamer.read().unwrap(), Token::Start(7, ParsedKind::Obj));
815        assert_eq!(streamer.current_path(), &make_path("{\"u\"}"));
816        assert_eq!(streamer.read().unwrap(), Token::End(9, ParsedKind::Obj));
817        assert_eq!(streamer.current_path(), &make_path("{\"u\"}"));
818        assert_eq!(streamer.read().unwrap(), Token::Separator(9));
819        assert_eq!(streamer.read().unwrap(), Token::Start(16, ParsedKind::Obj));
820        assert_eq!(streamer.current_path(), &make_path("{\"j\"}"));
821        assert_eq!(streamer.read().unwrap(), Token::Start(22, ParsedKind::Obj));
822        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"x\"}"));
823        assert_eq!(streamer.read().unwrap(), Token::End(26, ParsedKind::Obj));
824        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"x\"}"));
825        assert_eq!(streamer.read().unwrap(), Token::Separator(26));
826        assert_eq!(streamer.read().unwrap(), Token::Start(33, ParsedKind::Num));
827        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"y\"}"));
828        assert_eq!(streamer.read().unwrap(), Token::End(35, ParsedKind::Num));
829        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"y\"}"));
830        assert_eq!(streamer.read().unwrap(), Token::End(36, ParsedKind::Obj));
831        assert_eq!(streamer.current_path(), &make_path("{\"j\"}"));
832        assert_eq!(streamer.read().unwrap(), Token::End(37, ParsedKind::Obj));
833        assert_eq!(streamer.current_path(), &make_path(""));
834        assert_eq!(streamer.read().unwrap(), Token::Pending);
835    }
836
837    #[test]
838    fn test_complex_with_pending() {
839        const COMPLEX_DATA: &[u8] = br#" [{"aha y": {}, "j": {"x": [{  }, [ {}, null ]], "y" : 10}}, null, 43, [ {"a": false} ] ]"#;
840
841        // Split complex data into parts
842        for i in 0..COMPLEX_DATA.len() {
843            let start_data = &COMPLEX_DATA[0..i];
844            let end_data = &COMPLEX_DATA[i..];
845            let mut streamer = Streamer::new();
846
847            // feed the first part
848            streamer.feed(start_data);
849
850            let mut terminate = false;
851            // Gets next item and feed the rest of the data when pending
852            let mut get_item = |path: Option<&str>| loop {
853                match streamer.read() {
854                    Ok(Token::Pending) => {
855                        if terminate {
856                            break Token::Pending;
857                        } else {
858                            terminate = true;
859                            streamer.feed(end_data);
860                        }
861                        continue;
862                    }
863                    Ok(e) => {
864                        if let Some(pth) = path {
865                            assert_eq!(streamer.current_path(), &make_path(pth));
866                        }
867                        return e;
868                    }
869                    Err(_) => panic!("Error occured"),
870                }
871            };
872
873            assert_eq!(get_item(Some("")), Token::Start(1, ParsedKind::Arr));
874            assert_eq!(get_item(Some("[0]")), Token::Start(2, ParsedKind::Obj));
875            assert_eq!(
876                get_item(Some("[0]{\"aha y\"}")),
877                Token::Start(12, ParsedKind::Obj)
878            );
879            assert_eq!(
880                get_item(Some("[0]{\"aha y\"}")),
881                Token::End(14, ParsedKind::Obj)
882            );
883            assert_eq!(get_item(None), Token::Separator(14));
884            assert_eq!(
885                get_item(Some("[0]{\"j\"}")),
886                Token::Start(21, ParsedKind::Obj)
887            );
888            assert_eq!(
889                get_item(Some("[0]{\"j\"}{\"x\"}")),
890                Token::Start(27, ParsedKind::Arr)
891            );
892            assert_eq!(
893                get_item(Some("[0]{\"j\"}{\"x\"}[0]")),
894                Token::Start(28, ParsedKind::Obj)
895            );
896            assert_eq!(
897                get_item(Some("[0]{\"j\"}{\"x\"}[0]")),
898                Token::End(32, ParsedKind::Obj)
899            );
900            assert_eq!(get_item(None), Token::Separator(32));
901            assert_eq!(
902                get_item(Some("[0]{\"j\"}{\"x\"}[1]")),
903                Token::Start(34, ParsedKind::Arr)
904            );
905            assert_eq!(
906                get_item(Some("[0]{\"j\"}{\"x\"}[1][0]")),
907                Token::Start(36, ParsedKind::Obj)
908            );
909            assert_eq!(
910                get_item(Some("[0]{\"j\"}{\"x\"}[1][0]")),
911                Token::End(38, ParsedKind::Obj)
912            );
913            assert_eq!(get_item(None), Token::Separator(38));
914            assert_eq!(
915                get_item(Some("[0]{\"j\"}{\"x\"}[1][1]")),
916                Token::Start(40, ParsedKind::Null)
917            );
918            assert_eq!(
919                get_item(Some("[0]{\"j\"}{\"x\"}[1][1]")),
920                Token::End(44, ParsedKind::Null)
921            );
922            assert_eq!(
923                get_item(Some("[0]{\"j\"}{\"x\"}[1]")),
924                Token::End(46, ParsedKind::Arr)
925            );
926            assert_eq!(
927                get_item(Some("[0]{\"j\"}{\"x\"}")),
928                Token::End(47, ParsedKind::Arr)
929            );
930            assert_eq!(get_item(None), Token::Separator(47));
931            assert_eq!(
932                get_item(Some("[0]{\"j\"}{\"y\"}")),
933                Token::Start(55, ParsedKind::Num)
934            );
935            assert_eq!(
936                get_item(Some("[0]{\"j\"}{\"y\"}")),
937                Token::End(57, ParsedKind::Num)
938            );
939            assert_eq!(
940                get_item(Some("[0]{\"j\"}")),
941                Token::End(58, ParsedKind::Obj)
942            );
943            assert_eq!(get_item(Some("[0]")), Token::End(59, ParsedKind::Obj));
944            assert_eq!(get_item(None), Token::Separator(59));
945            assert_eq!(get_item(Some("[1]")), Token::Start(61, ParsedKind::Null));
946            assert_eq!(get_item(Some("[1]")), Token::End(65, ParsedKind::Null));
947            assert_eq!(get_item(None), Token::Separator(65));
948            assert_eq!(get_item(Some("[2]")), Token::Start(67, ParsedKind::Num));
949            assert_eq!(get_item(Some("[2]")), Token::End(69, ParsedKind::Num));
950            assert_eq!(get_item(None), Token::Separator(69));
951            assert_eq!(get_item(Some("[3]")), Token::Start(71, ParsedKind::Arr));
952            assert_eq!(get_item(Some("[3][0]")), Token::Start(73, ParsedKind::Obj));
953            assert_eq!(
954                get_item(Some("[3][0]{\"a\"}")),
955                Token::Start(79, ParsedKind::Bool)
956            );
957            assert_eq!(
958                get_item(Some("[3][0]{\"a\"}")),
959                Token::End(84, ParsedKind::Bool)
960            );
961            assert_eq!(get_item(Some("[3][0]")), Token::End(85, ParsedKind::Obj));
962            assert_eq!(get_item(Some("[3]")), Token::End(87, ParsedKind::Arr));
963            assert_eq!(get_item(Some("")), Token::End(89, ParsedKind::Arr));
964            assert_eq!(get_item(None), Token::Pending);
965        }
966    }
967
968    #[test]
969    fn test_utf8() {
970        // try to cover all utf8 character lengths
971        let utf8_data: Vec<u8> = r#"[{"š𐍈€": "€š𐍈"}, "𐍈€š"]"#.to_string().into_bytes();
972        for i in 0..utf8_data.len() {
973            let start_data = &utf8_data[0..i];
974            let end_data = &utf8_data[i..];
975            let mut streamer = Streamer::new();
976
977            // feed the first part
978            streamer.feed(start_data);
979
980            let mut terminate = false;
981            // Gets next item and feed the rest of the data when pending
982            let mut get_item = |path: Option<&str>| loop {
983                match streamer.read() {
984                    Ok(Token::Pending) => {
985                        if terminate {
986                            break Token::Pending;
987                        } else {
988                            terminate = true;
989                            streamer.feed(end_data);
990                        }
991                        continue;
992                    }
993                    Ok(e) => {
994                        if let Some(pth) = path {
995                            assert_eq!(streamer.current_path(), &make_path(pth));
996                        }
997                        return e;
998                    }
999                    Err(_) => panic!("Error occured"),
1000                }
1001            };
1002
1003            assert_eq!(get_item(Some("")), Token::Start(0, ParsedKind::Arr));
1004            assert_eq!(get_item(Some("[0]")), Token::Start(1, ParsedKind::Obj));
1005            assert_eq!(
1006                get_item(Some("[0]{\"š𐍈€\"}")),
1007                Token::Start(15, ParsedKind::Str)
1008            );
1009            assert_eq!(
1010                get_item(Some("[0]{\"š𐍈€\"}")),
1011                Token::End(26, ParsedKind::Str)
1012            );
1013            assert_eq!(get_item(Some("[0]")), Token::End(27, ParsedKind::Obj));
1014            assert_eq!(get_item(None), Token::Separator(27));
1015            assert_eq!(get_item(Some("[1]")), Token::Start(29, ParsedKind::Str));
1016            assert_eq!(get_item(Some("[1]")), Token::End(40, ParsedKind::Str));
1017            assert_eq!(get_item(Some("")), Token::End(41, ParsedKind::Arr));
1018            assert_eq!(get_item(None), Token::Pending);
1019        }
1020    }
1021
1022    #[test]
1023    fn test_multiple_input_flat() {
1024        let mut streamer = Streamer::new();
1025        streamer.feed(br#""first" "second""third""#);
1026        assert_eq!(streamer.read().unwrap(), Token::Start(0, ParsedKind::Str));
1027        assert_eq!(streamer.current_path(), &make_path(""));
1028        assert_eq!(streamer.read().unwrap(), Token::End(7, ParsedKind::Str));
1029        assert_eq!(streamer.current_path(), &make_path(""));
1030        assert_eq!(streamer.read().unwrap(), Token::Start(8, ParsedKind::Str));
1031        assert_eq!(streamer.current_path(), &make_path(""));
1032        assert_eq!(streamer.read().unwrap(), Token::End(16, ParsedKind::Str));
1033        assert_eq!(streamer.current_path(), &make_path(""));
1034        assert_eq!(streamer.read().unwrap(), Token::Start(16, ParsedKind::Str));
1035        assert_eq!(streamer.current_path(), &make_path(""));
1036        assert_eq!(streamer.read().unwrap(), Token::End(23, ParsedKind::Str));
1037        assert_eq!(streamer.current_path(), &make_path(""));
1038        assert_eq!(streamer.read().unwrap(), Token::Pending);
1039    }
1040
1041    #[test]
1042    fn test_newlines() {
1043        let mut streamer = Streamer::new();
1044        streamer.feed(
1045            br#" {
1046                "u": {},
1047                "j": {
1048                    "x": {  } ,
1049                    "y":10
1050                }
1051            } "#,
1052        );
1053        assert_eq!(streamer.read().unwrap(), Token::Start(1, ParsedKind::Obj));
1054        assert_eq!(streamer.current_path(), &make_path(""));
1055        assert_eq!(streamer.read().unwrap(), Token::Start(24, ParsedKind::Obj));
1056        assert_eq!(streamer.current_path(), &make_path("{\"u\"}"));
1057        assert_eq!(streamer.read().unwrap(), Token::End(26, ParsedKind::Obj));
1058        assert_eq!(streamer.current_path(), &make_path("{\"u\"}"));
1059        assert_eq!(streamer.read().unwrap(), Token::Separator(26));
1060        assert_eq!(streamer.read().unwrap(), Token::Start(49, ParsedKind::Obj));
1061        assert_eq!(streamer.current_path(), &make_path("{\"j\"}"));
1062        assert_eq!(streamer.read().unwrap(), Token::Start(76, ParsedKind::Obj));
1063        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"x\"}"));
1064        assert_eq!(streamer.read().unwrap(), Token::End(80, ParsedKind::Obj));
1065        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"x\"}"));
1066        assert_eq!(streamer.read().unwrap(), Token::Separator(81));
1067        assert_eq!(streamer.read().unwrap(), Token::Start(107, ParsedKind::Num));
1068        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"y\"}"));
1069        assert_eq!(streamer.read().unwrap(), Token::End(109, ParsedKind::Num));
1070        assert_eq!(streamer.current_path(), &make_path("{\"j\"}{\"y\"}"));
1071        assert_eq!(streamer.read().unwrap(), Token::End(127, ParsedKind::Obj));
1072        assert_eq!(streamer.current_path(), &make_path("{\"j\"}"));
1073        assert_eq!(streamer.read().unwrap(), Token::End(141, ParsedKind::Obj));
1074        assert_eq!(streamer.current_path(), &make_path(""));
1075        assert_eq!(streamer.read().unwrap(), Token::Pending);
1076    }
1077}