core_json/
deserializer.rs

1use crate::*;
2
3/// Advance the reader until there's a non-whitespace character.
4#[inline(always)]
5fn advance_whitespace<'read, R: Read<'read>, S: Stack>(
6  reader: &mut PeekableRead<'read, R>,
7) -> Result<(), JsonError<'read, R, S>> {
8  let mut next;
9  while {
10    next = reader.peek();
11    // https://datatracker.ietf.org/doc/html/rfc8259#section-2 defines whitespace as follows
12    matches!(next, b'\x20' | b'\x09' | b'\x0A' | b'\x0D')
13  } {
14    reader.read_byte().map_err(JsonError::ReadError)?;
15  }
16  Ok(())
17}
18
19/// Advance past a colon.
20#[inline(always)]
21pub(super) fn advance_past_colon<'read, R: Read<'read>, S: Stack>(
22  reader: &mut PeekableRead<'read, R>,
23) -> Result<(), JsonError<'read, R, S>> {
24  advance_whitespace(reader)?;
25  match reader.read_byte().map_err(JsonError::ReadError)? {
26    b':' => advance_whitespace(reader)?,
27    _ => Err(JsonError::InvalidKeyValueDelimiter)?,
28  }
29  Ok(())
30}
31
32/// Advance past a comma, or to the close of the structure.
33#[inline(always)]
34pub(super) fn advance_past_comma_or_to_close<'read, R: Read<'read>, S: Stack>(
35  reader: &mut PeekableRead<'read, R>,
36) -> Result<(), JsonError<'read, R, S>> {
37  advance_whitespace(reader)?;
38  match reader.peek() {
39    b',' => {
40      reader.read_byte().map_err(JsonError::ReadError)?;
41      advance_whitespace(reader)?;
42      if matches!(reader.peek(), b']' | b'}') {
43        Err(JsonError::TrailingComma)?;
44      }
45    }
46    b']' | b'}' => {}
47    _ => Err(JsonError::InvalidValue)?,
48  }
49  Ok(())
50}
51
52/// The result from a single step of the deserialized, if within an object.
53pub(super) enum SingleStepObjectResult {
54  /// A field within the object was advanced to.
55  Field,
56  /// The object was closed.
57  Closed,
58}
59
60/// The result from a single step of the deserialized, if within an array.
61pub(super) enum SingleStepArrayResult {
62  /// A value within the array was advanced to.
63  Value,
64  /// The array was closed.
65  Closed,
66}
67
68/// The result from a single step of the deserializer, if handling an unknown value.
69pub(super) enum SingleStepUnknownResult {
70  /// An object was opened.
71  ObjectOpened,
72  /// An array was opened.
73  ArrayOpened,
74  /// A string was read.
75  String,
76  /// A number was read.
77  Number(Number),
78  /// A boolean value was advanced past.
79  Bool(bool),
80  /// Null was advanced past.
81  Null,
82}
83
84/// The result from a single step of the deserializer.
85pub(super) enum SingleStepResult {
86  /// The result if within an object.
87  Object(SingleStepObjectResult),
88  /// The result if within an array.
89  Array(SingleStepArrayResult),
90  /// The result if handling an unknown value.
91  Unknown(SingleStepUnknownResult),
92}
93
94/// Step the deserializer forwards.
95///
96/// This assumes there is no leading whitespace present in `reader` and will advance past any
97/// whitespace present before the next logical unit.
98fn single_step<'read, 'parent, R: Read<'read>, S: Stack>(
99  reader: &'parent mut PeekableRead<'read, R>,
100  stack: &'parent mut S,
101) -> Result<SingleStepResult, JsonError<'read, R, S>> {
102  match stack.peek().ok_or(JsonError::InternalError)? {
103    State::Object => {
104      let next = reader.peek();
105
106      // Check if the object terminates
107      if next == b'}' {
108        stack.pop().ok_or(JsonError::InternalError)?;
109
110        // If this isn't the outer object, advance past the comma after
111        if stack.depth() != 0 {
112          // Advance past the '}'
113          /*
114            We only do this when the object *isn't* closing to prevent reading past the boundary of
115            the object, as the '}' was already internally read (consumed from the underlying
116            reader) by `PeekableRead`.
117          */
118          reader.read_byte().map_err(JsonError::ReadError)?;
119          advance_past_comma_or_to_close(reader)?;
120        }
121
122        return Ok(SingleStepResult::Object(SingleStepObjectResult::Closed));
123      }
124
125      // Read the name of this field
126      if next != b'"' {
127        Err(JsonError::InvalidKey)?;
128      }
129      // Advance past the '"'
130      reader.read_byte().map_err(JsonError::ReadError)?;
131
132      // Push how we're reading a value of an unknown type onto the stack, for the value
133      stack.push(State::Unknown).map_err(JsonError::StackError)?;
134      Ok(SingleStepResult::Object(SingleStepObjectResult::Field))
135    }
136    State::Array => {
137      // Check if the array terminates
138      if reader.peek() == b']' {
139        stack.pop().ok_or(JsonError::InternalError)?;
140
141        // If this isn't the outer object, advance past the comma after
142        if stack.depth() != 0 {
143          reader.read_byte().map_err(JsonError::ReadError)?;
144          advance_past_comma_or_to_close(reader)?;
145        }
146
147        return Ok(SingleStepResult::Array(SingleStepArrayResult::Closed));
148      }
149
150      // Since the array doesn't terminate, read the next value
151      stack.push(State::Unknown).map_err(JsonError::StackError)?;
152      Ok(SingleStepResult::Array(SingleStepArrayResult::Value))
153    }
154    State::Unknown => {
155      stack.pop().ok_or(JsonError::InternalError)?;
156
157      let result = match kind(reader) {
158        // Handle if this opens an object
159        Type::Object => {
160          reader.read_byte().map_err(JsonError::ReadError)?;
161          advance_whitespace(reader)?;
162          stack.push(State::Object).map_err(JsonError::StackError)?;
163          return Ok(SingleStepResult::Unknown(SingleStepUnknownResult::ObjectOpened));
164        }
165        // Handle if this opens an array
166        Type::Array => {
167          reader.read_byte().map_err(JsonError::ReadError)?;
168          advance_whitespace(reader)?;
169          stack.push(State::Array).map_err(JsonError::StackError)?;
170          return Ok(SingleStepResult::Unknown(SingleStepUnknownResult::ArrayOpened));
171        }
172        // Handle if this opens an string
173        Type::String => {
174          reader.read_byte().map_err(JsonError::ReadError)?;
175          return Ok(SingleStepResult::Unknown(SingleStepUnknownResult::String));
176        }
177        Type::Number => {
178          SingleStepResult::Unknown(SingleStepUnknownResult::Number(number::to_number_str(reader)?))
179        }
180        Type::Bool => {
181          let mut bool_string = [0; 4];
182          reader.read_exact_into_non_empty_slice(&mut bool_string).map_err(JsonError::ReadError)?;
183          let bool = if &bool_string == b"true" {
184            true
185          } else {
186            let e = reader.read_byte().map_err(JsonError::ReadError)?;
187            if !((bool_string == *b"fals") & (e == b'e')) {
188              Err(JsonError::TypeError)?;
189            }
190            false
191          };
192          SingleStepResult::Unknown(SingleStepUnknownResult::Bool(bool))
193        }
194        Type::Null => {
195          let mut null_string = [0; 4];
196          reader.read_exact_into_non_empty_slice(&mut null_string).map_err(JsonError::ReadError)?;
197          if null_string != *b"null" {
198            Err(JsonError::InvalidValue)?;
199          }
200          SingleStepResult::Unknown(SingleStepUnknownResult::Null)
201        }
202      };
203
204      // We now have to read past the next comma, or to the next closing of a structure
205      advance_past_comma_or_to_close(reader)?;
206
207      Ok(result)
208    }
209  }
210}
211
212/// A deserializer for a JSON-encoded structure.
213pub struct Deserializer<'read, R: Read<'read>, S: Stack> {
214  pub(crate) reader: PeekableRead<'read, R>,
215  stack: S,
216  /*
217    We advance the deserializer within `Drop` which cannot return an error. If an error is raised
218    within drop, we store it here to be consumed upon the next call to a method which can return an
219    error (if one is ever called).
220  */
221  pub(crate) error: Option<JsonError<'read, R, S>>,
222}
223
224impl<'read, R: Read<'read>, S: Stack> Deserializer<'read, R, S> {
225  #[inline(always)]
226  pub(super) fn single_step(&mut self) -> Result<SingleStepResult, JsonError<'read, R, S>> {
227    if let Some(e) = self.error {
228      Err(e)?;
229    }
230    let res = single_step(&mut self.reader, &mut self.stack);
231    if let Some(e) = res.as_ref().err() {
232      self.error = Some(*e);
233    }
234    res
235  }
236}
237
238/// A JSON value.
239// Internally, we assume whenever this is held, the top item on the stack is `State::Unknown`
240pub struct Value<'read, 'parent, R: Read<'read>, S: Stack> {
241  pub(crate) deserializer: Option<&'parent mut Deserializer<'read, R, S>>,
242}
243
244impl<'read, 'parent, R: Read<'read>, S: Stack> Drop for Value<'read, 'parent, R, S> {
245  fn drop(&mut self) {
246    /*
247      When this value is dropped, we advance the deserializer past it if it hasn't already been
248      converted into a `FieldIterator` or `ArrayIterator` (which each have their own `Drop`
249      implementations).
250    */
251    if let Some(deserializer) = self.deserializer.take() {
252      if deserializer.error.is_some() {
253        return;
254      }
255
256      let Some(current) = deserializer.stack.peek() else {
257        deserializer.error = Some(JsonError::InternalError);
258        return;
259      };
260
261      let mut depth = match current {
262        State::Object | State::Array => 1,
263        State::Unknown => {
264          let step = match deserializer.single_step() {
265            Ok(SingleStepResult::Unknown(step)) => step,
266            Ok(_) => {
267              deserializer.error = Some(JsonError::InternalError);
268              return;
269            }
270            Err(_) => return,
271          };
272          match step {
273            // We successfully advanced past this item
274            SingleStepUnknownResult::Number(_) |
275            SingleStepUnknownResult::Bool(_) |
276            SingleStepUnknownResult::Null => return,
277            // We opened a string we now have to handle
278            SingleStepUnknownResult::String => {
279              handle_string_value(deserializer);
280              return;
281            }
282            // We opened an object/array we now have to advance past
283            SingleStepUnknownResult::ObjectOpened | SingleStepUnknownResult::ArrayOpened => 1,
284          }
285        }
286      };
287
288      // Since our object isn't a unit, step the deserializer until it's advanced past
289      while depth != 0 {
290        let Ok(step) = deserializer.single_step() else { return };
291        match step {
292          SingleStepResult::Unknown(SingleStepUnknownResult::String) => {
293            handle_string_value(deserializer);
294          }
295          SingleStepResult::Object(SingleStepObjectResult::Field) => {
296            handle_field(deserializer);
297          }
298          SingleStepResult::Object(SingleStepObjectResult::Closed) |
299          SingleStepResult::Array(SingleStepArrayResult::Closed) => depth -= 1,
300          SingleStepResult::Unknown(
301            SingleStepUnknownResult::ObjectOpened | SingleStepUnknownResult::ArrayOpened,
302          ) => depth += 1,
303          _ => {}
304        }
305      }
306    }
307  }
308}
309
310impl<'read, R: Read<'read>, S: Stack> Deserializer<'read, R, S> {
311  /// Create a new deserializer.
312  ///
313  /// This will advance past any whitespace present at the start of the reader, per RFC 8259's
314  /// definition of whitespace.
315  ///
316  /// If `reader` is aligned to valid JSON, this will read past the immediately present structure
317  /// yet no further. If `reader` is not aligned to valid JSON, the state of `reader` is undefined
318  /// after this.
319  #[inline(always)]
320  pub fn new(reader: R) -> Result<Self, JsonError<'read, R, S>> {
321    let mut reader = PeekableRead::try_from(reader).map_err(JsonError::ReadError)?;
322    advance_whitespace(&mut reader)?;
323
324    let mut stack = S::empty();
325    stack.push(State::Unknown).map_err(JsonError::StackError)?;
326
327    Ok(Deserializer { reader, stack, error: None })
328  }
329
330  /// Obtain the `Value` representing the serialized structure.
331  ///
332  /// This takes a mutable reference as `Deserializer` is the owned object representing the
333  /// deserializer's state. However, this is not eligible to be called more than once, even after
334  /// the initial mutable borrow is dropped. Multiple calls to this function will cause an error to
335  /// be returned.
336  #[inline(always)]
337  pub fn value(&mut self) -> Result<Value<'read, '_, R, S>, JsonError<'read, R, S>> {
338    if (self.stack.depth() != 1) || self.error.is_some() {
339      Err(JsonError::ReusedDeserializer)?;
340    }
341    let mut result = Value { deserializer: Some(self) };
342    if !matches!(result.kind()?, Type::Object | Type::Array) {
343      Err(JsonError::TypeError)?;
344    }
345    Ok(result)
346  }
347}