json_session/
session.rs

1use crate::JsonParseError;
2
3use super::tokenizer::{JsonParseResult, JsonToken, JsonTokenizer, Location};
4
5/// A [`JsonFragment`] paired with a [`LocationSpan`].
6#[derive(Debug, Clone)]
7pub struct JsonFragmentWithSpan {
8    pub fragment: JsonFragment,
9    pub span: LocationSpan,
10}
11
12/// A fragment of JSON. This is a bit more high-level than a token.
13#[derive(Debug, Clone, PartialEq)]
14pub enum JsonFragment {
15    /// Corresponds to `{`. Always followed by [`JsonFragment::ObjectProperty`] or [`JsonFragment::EndObject`].
16    ///
17    /// The span includes only the single opening brace byte.
18    BeginObject,
19
20    /// The name of the current property. Always followed by the value of this property.
21    ///
22    /// The span goes from the starting quote to after the colon.
23    ObjectProperty(String),
24
25    /// Corresponds to `}`.
26    ///
27    /// The span includes only the single closing brace byte.
28    EndObject,
29
30    /// Corresponds to `[`. Always followed by the first element or by [`JsonFragment::EndArray`].
31    ///
32    /// The span includes only the single opening bracket byte.
33    BeginArray,
34
35    /// Corresponds to `]`.
36    ///
37    /// The span includes only the single closing bracket byte.
38    EndArray,
39
40    /// Any JSON value that's not an object or an array.
41    ///
42    /// The span encloses the value.
43    PrimitiveValue(JsonPrimitiveValue),
44}
45
46impl JsonFragment {
47    fn with_span(self, span: LocationSpan) -> JsonFragmentWithSpan {
48        JsonFragmentWithSpan {
49            fragment: self,
50            span,
51        }
52    }
53}
54
55/// The start and end [`Location`] of a fragment.
56#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
57pub struct LocationSpan {
58    pub start: Location,
59    pub end: Location,
60}
61
62impl LocationSpan {
63    pub fn new(start: Location, end: Location) -> Self {
64        Self { start, end }
65    }
66}
67
68/// A JSON value which is not an object or an array.
69#[derive(Debug, Clone, PartialEq)]
70pub enum JsonPrimitiveValue {
71    Number(f64),
72    Boolean(bool),
73    String(String),
74    Null,
75}
76
77/// A pull-based JSON parser which consumes an iterator over bytes and yields
78/// a valid sequence of [`JsonFragmentWithSpan`] values.
79///
80/// This API allows gathering statistics about the contents of large JSON documents without ever
81/// holding the entire document in memory.
82///
83/// [`JsonSession`] checks that the input is valid JSON. If an invalid sequence of tokens
84/// is detected, [`JsonSession::next`] yields an error. As a user of [`JsonSession`], you can
85/// rely on the fact that the yielded fragments will always describe a well-formed JSON document,
86/// at least the part of the document that has been consumed so far. (To clarify, there is no
87/// pre-pass which validates the entire document. Validation happens as you go, so
88/// [`JsonSession::next`] will happily return fragments as long as it hasn't arrived at the error yet.)
89///
90/// When the optional feature `fallible-iterator` is used, [`JsonSession`] implements
91/// `fallible_iterator::FallibleIterator`.
92///
93/// # Example
94///
95/// ```
96/// use json_session::{JsonSession, JsonFragment, JsonPrimitiveValue};
97///
98/// # fn main() {
99/// let input_str = r#"{"key1": 1234, "key2": [true], "key3": "value" }"#;
100/// let expected = &[
101///     JsonFragment::BeginObject,
102///     JsonFragment::ObjectProperty(String::from("key1")),
103///     JsonFragment::PrimitiveValue(JsonPrimitiveValue::Number(1234.0)),
104///     JsonFragment::ObjectProperty(String::from("key2")),
105///     JsonFragment::BeginArray,
106///     JsonFragment::PrimitiveValue(JsonPrimitiveValue::Boolean(true)),
107///     JsonFragment::EndArray,
108///     JsonFragment::ObjectProperty(String::from("key3")),
109///     JsonFragment::PrimitiveValue(JsonPrimitiveValue::String(String::from("value"))),
110///     JsonFragment::EndObject,
111/// ];
112/// let mut session = JsonSession::new(input_str.as_bytes().iter().cloned());
113/// for expected_fragment in expected {
114///     let fragment = session.next().unwrap().unwrap().fragment;
115///     assert_eq!(fragment, *expected_fragment);
116/// }
117/// assert!(session.next().unwrap().is_none());
118/// # }
119/// ```
120pub struct JsonSession<I: Iterator<Item = u8>> {
121    tokenizer: JsonTokenizer<I>,
122    state_stack: Vec<StateStackEntry>,
123}
124
125#[derive(Debug, Clone)]
126enum StateStackEntry {
127    BeforeAnyValue,
128    BeforeAnyValueWithToken {
129        location: Location,
130        token: JsonToken,
131    },
132    AfterObjectOpen,
133    BeforeObjectPropertyKeyWithToken {
134        location: Location,
135        token: JsonToken,
136    },
137    AfterObjectPropertyValue,
138    AfterArrayOpen,
139    AfterArrayItem,
140}
141
142impl<I: Iterator<Item = u8>> JsonSession<I> {
143    /// Create a new [`JsonSession`] from an iterator over bytes.
144    pub fn new(it: I) -> Self {
145        JsonSession {
146            tokenizer: JsonTokenizer::new(it),
147            state_stack: vec![StateStackEntry::BeforeAnyValue],
148        }
149    }
150
151    /// Get the next [`JsonFragmentWithSpan`].
152    ///
153    /// Returns:
154    ///
155    /// - `Ok(Some(...))` in the regular case, with the next fragment.
156    /// - `Ok(None)` if the JSON document is complete and the end of the input has been reached.
157    /// - `Err(...)` if a invalid JSON is detected.
158    pub fn next(&mut self) -> JsonParseResult<Option<JsonFragmentWithSpan>> {
159        while let Some(entry) = self.state_stack.last().cloned() {
160            match entry {
161                StateStackEntry::BeforeAnyValue => {
162                    let (token, location) = self.tokenizer.next_token_and_location()?;
163                    *self.state_stack.last_mut().unwrap() =
164                        StateStackEntry::BeforeAnyValueWithToken { token, location };
165                }
166                StateStackEntry::BeforeAnyValueWithToken { location, token } => {
167                    let span = LocationSpan::new(location, self.tokenizer.location());
168                    let value = match token {
169                        JsonToken::Number(num) => JsonPrimitiveValue::Number(num),
170                        JsonToken::True => JsonPrimitiveValue::Boolean(true),
171                        JsonToken::False => JsonPrimitiveValue::Boolean(false),
172                        JsonToken::String(s) => JsonPrimitiveValue::String(s),
173                        JsonToken::Null => JsonPrimitiveValue::Null,
174                        JsonToken::ArrayOpen => {
175                            *self.state_stack.last_mut().unwrap() = StateStackEntry::AfterArrayOpen;
176                            return Ok(Some(JsonFragment::BeginArray.with_span(span)));
177                        }
178                        JsonToken::ObjOpen => {
179                            *self.state_stack.last_mut().unwrap() =
180                                StateStackEntry::AfterObjectOpen;
181                            return Ok(Some(JsonFragment::BeginObject.with_span(span)));
182                        }
183                        t @ JsonToken::Comma
184                        | t @ JsonToken::ArrayClose
185                        | t @ JsonToken::Colon
186                        | t @ JsonToken::ObjClose => {
187                            return Err(JsonParseError::new(
188                                format!("Unexpected token {t:?}"),
189                                location,
190                            ));
191                        }
192                    };
193                    self.state_stack.pop();
194                    return Ok(Some(JsonFragment::PrimitiveValue(value).with_span(span)));
195                }
196                StateStackEntry::AfterObjectOpen => {
197                    let (token, location) = self.tokenizer.next_token_and_location()?;
198                    if matches!(token, JsonToken::ObjClose) {
199                        self.state_stack.pop();
200                        let span = LocationSpan::new(location, self.tokenizer.location());
201                        return Ok(Some(JsonFragment::EndObject.with_span(span)));
202                    }
203
204                    *self.state_stack.last_mut().unwrap() =
205                        StateStackEntry::BeforeObjectPropertyKeyWithToken { location, token };
206                }
207                StateStackEntry::BeforeObjectPropertyKeyWithToken { location, token } => {
208                    let key = match token {
209                        JsonToken::String(s) => s,
210                        other_token => {
211                            return Err(JsonParseError::new(
212                                format!("Key of object must be string but found {other_token:?}"),
213                                location,
214                            ))
215                        }
216                    };
217
218                    let (token, colon_location) = self.tokenizer.next_token_and_location()?;
219                    if token != JsonToken::Colon {
220                        return Err(JsonParseError::new(
221                            format!(
222                                "':' is expected after key of object but actually found '{token:?}'",
223                            ),
224                            colon_location,
225                        ));
226                    }
227
228                    *self.state_stack.last_mut().unwrap() =
229                        StateStackEntry::AfterObjectPropertyValue;
230                    self.state_stack.push(StateStackEntry::BeforeAnyValue);
231                    let span = LocationSpan::new(location, self.tokenizer.location());
232                    return Ok(Some(JsonFragment::ObjectProperty(key).with_span(span)));
233                }
234                StateStackEntry::AfterObjectPropertyValue => {
235                    let (token, location) = self.tokenizer.next_token_and_location()?;
236                    match token {
237                        JsonToken::Comma => {}
238                        JsonToken::ObjClose => {
239                            let span = LocationSpan::new(location, self.tokenizer.location());
240                            self.state_stack.pop();
241                            return Ok(Some(JsonFragment::EndObject.with_span(span)));
242                        }
243                        token => {
244                            return Err(JsonParseError::new(
245                                format!(
246                                "',' or '}}' is expected for object but actually found '{token:?}'",
247                            ),
248                                location,
249                            ))
250                        }
251                    }
252
253                    let (token, location) = self.tokenizer.next_token_and_location()?;
254                    *self.state_stack.last_mut().unwrap() =
255                        StateStackEntry::BeforeObjectPropertyKeyWithToken { location, token };
256                }
257                StateStackEntry::AfterArrayOpen => {
258                    let (token, location) = self.tokenizer.next_token_and_location()?;
259
260                    if token == JsonToken::ArrayClose {
261                        self.state_stack.pop();
262                        let span = LocationSpan::new(location, self.tokenizer.location());
263                        return Ok(Some(JsonFragment::EndArray.with_span(span)));
264                    }
265
266                    *self.state_stack.last_mut().unwrap() = StateStackEntry::AfterArrayItem;
267                    self.state_stack
268                        .push(StateStackEntry::BeforeAnyValueWithToken { token, location });
269                }
270                StateStackEntry::AfterArrayItem => {
271                    let (token, location) = self.tokenizer.next_token_and_location()?;
272                    match token {
273                        JsonToken::Comma => {}
274                        JsonToken::ArrayClose => {
275                            self.state_stack.pop();
276                            let span = LocationSpan::new(location, self.tokenizer.location());
277                            return Ok(Some(JsonFragment::EndArray.with_span(span)));
278                        }
279                        token => {
280                            return Err(JsonParseError::new(
281                                format!(
282                                "',' or ']' is expected for array but actually found '{token:?}'",
283                            ),
284                                location,
285                            ))
286                        }
287                    }
288
289                    *self.state_stack.last_mut().unwrap() = StateStackEntry::AfterArrayItem;
290                    self.state_stack.push(StateStackEntry::BeforeAnyValue);
291                }
292            }
293        }
294
295        self.tokenizer.expect_eof()?;
296
297        Ok(None)
298    }
299}
300
301#[cfg(feature = "fallible-iterator")]
302impl<I: Iterator<Item = u8>> fallible_iterator::FallibleIterator for JsonSession<I> {
303    type Item = JsonFragmentWithSpan;
304    type Error = JsonParseError;
305
306    fn next(&mut self) -> Result<Option<Self::Item>, Self::Error> {
307        self.next()
308    }
309}
310
311#[cfg(test)]
312mod test {
313    use super::*;
314
315    fn get(s: &str) -> (Vec<JsonFragmentWithSpan>, Option<JsonParseError>) {
316        let mut session = JsonSession::new(s.as_bytes().iter().cloned());
317        let mut v = Vec::new();
318        loop {
319            match session.next() {
320                Ok(Some(ev)) => v.push(ev),
321                Ok(None) => return (v, None),
322                Err(e) => return (v, Some(e)),
323            }
324        }
325    }
326
327    #[test]
328    fn test_doc() {
329        let s = r#"{"key1": 1234, "key2": [true], "key3": "value" }"#;
330        let expected = &[
331            JsonFragment::BeginObject,
332            JsonFragment::ObjectProperty(String::from("key1")),
333            JsonFragment::PrimitiveValue(JsonPrimitiveValue::Number(1234.0)),
334            JsonFragment::ObjectProperty(String::from("key2")),
335            JsonFragment::BeginArray,
336            JsonFragment::PrimitiveValue(JsonPrimitiveValue::Boolean(true)),
337            JsonFragment::EndArray,
338            JsonFragment::ObjectProperty(String::from("key3")),
339            JsonFragment::PrimitiveValue(JsonPrimitiveValue::String(String::from("value"))),
340            JsonFragment::EndObject,
341        ];
342        let mut session = JsonSession::new(s.as_bytes().iter().cloned());
343        for expected_fragment in expected {
344            let fragment = session.next().unwrap().unwrap().fragment;
345            assert_eq!(fragment, *expected_fragment);
346        }
347        assert!(session.next().unwrap().is_none());
348    }
349
350    #[test]
351    fn test_basic() {
352        let s =
353            r#"{"propertyKey": 1234, "arr": [], "obj": {}, "arr2": [null, false, true, -0.54e2] }"#;
354        let (v, e) = get(s);
355        assert_eq!(v.len(), 17);
356        assert!(e.is_none());
357    }
358}