ocpi_tariffs/json/parser.rs
1//! Hand-rolled single-pass recursive-descent JSON parser.
2//!
3//! # Responsibilities
4//!
5//! The parser is responsible for structural correctness only: balanced delimiters,
6//! valid top-level values, and well-formed numbers. String content is captured as
7//! [`RawStr`] slices of the source — escape sequences and control characters are
8//! left untouched. Validation of string content is the responsibility of
9//! [`crate::json::decode`].
10//!
11//! # Output
12//!
13//! [`parse`] returns a [`Document`] that wraps the root [`Element`] and the shared
14//! [`DocumentInner`]. Every element carries an `Rc<DocumentInner>` so it can resolve
15//! its own path after the [`Document`] has been dropped.
16//!
17//! # Limits
18//!
19//! - **Nesting depth**: capped at [`MAX_DEPTH`] (128 levels). Inputs that exceed this
20//! are rejected with [`ErrorKind::DepthLimitExceeded`].
21//! - **Element count**: capped at `u32::MAX` by the [`ElemId`] counter, enforced via
22//! [`ErrorKind::MaxElements`]. In practice, the 5 megabytes [`string::ReasonableLen`] gate makes
23//! this limit unreachable at the moment.
24//!
25//! # Two-phase construction
26//!
27//! The [`Parser`] builds a private [`RawElement`] tree and a [`PathTable`] in one
28//! pass. Once the full tree is available, [`into_element`] threads the shared
29//! `Rc<DocumentInner>` through every node to produce the public [`Element`] tree.
30//! This keeps the hot parsing loop free of reference-counting overhead.
31
32#![expect(
33 clippy::arithmetic_side_effects,
34 reason = "pos is bounded by source.len() and only advances after a successful byte read; arithmetic is safe within parser state machine invariants"
35)]
36#![expect(
37 clippy::as_conversions,
38 reason = "byte position casts between usize and u32 are safe: usize->u32 is bounded by available memory, u32->usize always fits"
39)]
40#![expect(
41 clippy::cast_possible_truncation,
42 reason = "source length is bounded by available memory, so byte positions always fit in u32"
43)]
44#![expect(
45 clippy::string_slice,
46 reason = "span boundaries are always at ASCII JSON token boundaries, so slices are valid UTF-8"
47)]
48
49#[cfg(test)]
50mod test;
51
52#[cfg(test)]
53mod test_basics;
54
55#[cfg(test)]
56mod test_parser;
57
58#[cfg(test)]
59mod test_type_sizes;
60
61use std::rc::Rc;
62
63use crate::{string, warning};
64
65use super::{
66 Document, DocumentInner, ElemId, Element, Field, Location, PathEntry, PathTable, RawStr, Span,
67 Value,
68};
69
70/// Maximum nesting depth for arrays and objects.
71///
72/// RFC 8259 recommends implementations handle at least 128 levels of nesting.
73/// Inputs that exceed this limit are rejected with [`ErrorKind::DepthLimitExceeded`].
74const MAX_DEPTH: usize = 128;
75
76// JSON whitespace characters `RFC 8259 s2`.
77const SPACE: u8 = b' ';
78const TAB: u8 = b'\t';
79const LF: u8 = b'\n';
80const CR: u8 = b'\r';
81
82// Structural characters `RFC 8259 s2`.
83const QUOTE: u8 = b'"';
84const BACKSLASH: u8 = b'\\';
85const COMMA: u8 = b',';
86const COLON: u8 = b':';
87const ARRAY_OPEN: u8 = b'[';
88const ARRAY_CLOSE: u8 = b']';
89const OBJECT_OPEN: u8 = b'{';
90const OBJECT_CLOSE: u8 = b'}';
91
92// Number-grammar characters `RFC 8259 s6`.
93const MINUS: u8 = b'-';
94const PLUS: u8 = b'+';
95const DECIMAL_POINT: u8 = b'.';
96const EXP_LOWER: u8 = b'e';
97const EXP_UPPER: u8 = b'E';
98const DIGIT_0: u8 = b'0';
99const DIGIT_1: u8 = b'1';
100const DIGIT_9: u8 = b'9';
101
102// JSON keyword literals `RFC 8259 s3`.
103const NULL: &str = "null";
104const TRUE: &str = "true";
105const FALSE: &str = "false";
106
107// UTF-8 BOM (`U+FEFF`, encoded as 0xEF 0xBB 0xBF).
108const BOM: &[u8; 3] = b"\xEF\xBB\xBF";
109
110/// Parse a JSON document from `source`.
111///
112/// All string content in the returned tree borrows from `source`.
113/// Call `element.path()` on any element to obtain its RFC 9535 path.
114pub(crate) fn parse(source: string::ReasonableLen<'_>) -> Result<Document<'_>, Error> {
115 let mut p = Parser::new(source.into_inner());
116 // Skip a UTF-8 BOM (`U+FEFF` encoded as 0xEF 0xBB 0xBF) if present.
117 if p.bytes.starts_with(BOM) {
118 p.pos = BOM.len();
119 }
120 let raw_root = p.parse_value(PathEntry::Root)?;
121 p.skip_ws();
122 if p.pos < p.bytes.len() {
123 return Err(p.error(ErrorKind::TrailingContent));
124 }
125 let inner = Rc::new(DocumentInner {
126 source: source.into_inner(),
127 paths: p.table,
128 });
129 let root = into_element(raw_root, &inner);
130 Ok(Document { inner, root })
131}
132
133/// A parse error produced when the input is not well-formed JSON.
134///
135/// Carries the byte offset and line/column position of the failure, and an
136/// [`ErrorKind`] that describes what was wrong.
137#[derive(Debug)]
138pub struct Error {
139 /// Byte offset of the error location.
140 byte_offset: usize,
141 /// A file location expressed as line and column.
142 position: Location,
143 /// The details about the error that occurred.
144 kind: ErrorKind,
145}
146
147impl Error {
148 /// Byte offset of the error location.
149 pub fn byte_offset(&self) -> usize {
150 self.byte_offset
151 }
152
153 /// Return a reference to the details about the error that occurred.
154 pub fn kind(&self) -> &ErrorKind {
155 &self.kind
156 }
157
158 /// Consume the `Error` and return the details about the error that occurred.
159 pub fn into_kind(self) -> ErrorKind {
160 self.kind
161 }
162
163 /// Consume the `Error` and return the byte offset of the error location and the details of the
164 /// error that occurred.
165 pub fn into_parts(self) -> (usize, ErrorKind) {
166 (self.byte_offset, self.kind)
167 }
168}
169
170/// The specific reason a [`parse`] call failed.
171#[derive(Debug, Eq, PartialEq)]
172pub enum ErrorKind {
173 /// A character that cannot be a JSON number was encountered.
174 ExpectedNumeral,
175 /// A character that cannot start a JSON value was encountered.
176 ExpectedStart,
177 /// A literal that cannot start or continue a JSON value was encountered.
178 ExpectedLiteral { expected: &'static str },
179 /// An array wasn't terminated correctly.
180 ExpectedEndArray,
181 /// An object wasn't terminated correctly.
182 ExpectedEndObject,
183 /// A character that cannot continue a JSON value was encountered.
184 UnexpectedChar { expected: char },
185 /// The input ended before the value was complete.
186 UnexpectedEOF,
187 /// Non-whitespace bytes follow the root value.
188 TrailingContent,
189 /// The input exceeds the maximum supported nesting depth.
190 DepthLimitExceeded,
191 /// The input contains more than [`u32::MAX`] JSON elements.
192 MaxElements,
193}
194
195impl crate::Warning for Error {
196 fn id(&self) -> warning::Id {
197 let s = match self.kind {
198 ErrorKind::ExpectedNumeral => "expected_numeral",
199 ErrorKind::ExpectedStart => "expected_start",
200 ErrorKind::ExpectedLiteral { .. } => "expected_literal",
201 ErrorKind::ExpectedEndArray => "expected_end_array",
202 ErrorKind::ExpectedEndObject => "expected_end_object",
203 ErrorKind::UnexpectedChar { .. } => "unexpected_char",
204 ErrorKind::UnexpectedEOF => "unexpected_eof",
205 ErrorKind::TrailingContent => "trailing_content",
206 ErrorKind::DepthLimitExceeded => "depth_limit_exceeded",
207 ErrorKind::MaxElements => "max_elements",
208 };
209
210 warning::Id::from_static(s)
211 }
212}
213
214impl std::fmt::Display for Error {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 let Self {
217 byte_offset,
218 position,
219 kind,
220 } = self;
221
222 match kind {
223 ErrorKind::ExpectedLiteral { expected } => {
224 write!(
225 f,
226 "unexpected literal found at line: `{position}`, byte `{byte_offset}`; expected: `{expected:?}`"
227 )
228 }
229 ErrorKind::ExpectedNumeral => {
230 write!(
231 f,
232 "unexpected numeral found at line: `{position}`, byte `{byte_offset}`; expected: `0-9`"
233 )
234 }
235 ErrorKind::ExpectedStart => {
236 write!(
237 f,
238 "unexpected start character found at line: `{position}`, byte `{byte_offset}`; expected one of: `[n, t, f, \", -, 0-9, [, {{]`"
239 )
240 }
241 ErrorKind::ExpectedEndArray => {
242 write!(
243 f,
244 "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `]`"
245 )
246 }
247 ErrorKind::ExpectedEndObject => {
248 write!(
249 f,
250 "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `}}`"
251 )
252 }
253 ErrorKind::UnexpectedChar { expected } => {
254 write!(
255 f,
256 "unexpected character `{expected}` found at line: `{position}`, byte `{byte_offset}``"
257 )
258 }
259 ErrorKind::UnexpectedEOF => write!(
260 f,
261 "unexpected end of input found at line: `{position}`, byte `{byte_offset}`"
262 ),
263 ErrorKind::TrailingContent => write!(
264 f,
265 "trailing content found at line: `{position}`, byte `{byte_offset}`"
266 ),
267 ErrorKind::DepthLimitExceeded => {
268 write!(f, "nesting depth exceeds the {MAX_DEPTH}-level limit")
269 }
270 ErrorKind::MaxElements => write!(f, "document exceeds {} JSON elements", u32::MAX),
271 }
272 }
273}
274
275impl std::error::Error for Error {}
276
277/// Parser-private element tree; carries no reference to [`DocumentInner`].
278///
279/// Converted to the public [`Element`] tree by [`into_element`] after
280/// [`DocumentInner`] is constructed.
281struct RawElement<'buf> {
282 /// Unique identifier within the document; sequentially assigned depth-first.
283 id: ElemId,
284 /// Byte range of the value only; use for replacement edits.
285 span: Span,
286 /// End of the value plus any trailing comma and whitespace; use for removal edits.
287 /// Equal to `span.end` when there is no trailing comma (root element, or last sibling).
288 full_span_end: u32,
289 /// Parsed value, borrowing from the source `&str`.
290 value: RawValue<'buf>,
291}
292
293/// The parsed content of a [`RawElement`], mirroring [`Value`] but without [`DocumentInner`].
294///
295/// Strings are kept as [`RawStr`] slices of the source; numbers are kept as
296/// raw `&str` slices. Both are converted to their public forms by [`into_element`].
297enum RawValue<'buf> {
298 /// JSON `null` literal.
299 Null,
300 /// JSON `true` literal.
301 True,
302 /// JSON `false` literal.
303 False,
304 /// String content with quotes removed; escape sequences are not decoded.
305 String(RawStr<'buf>),
306 /// Raw number text; not guaranteed to fit any specific numeric type.
307 Number(&'buf str),
308 /// Ordered list of child elements.
309 Array(Vec<RawElement<'buf>>),
310 /// Ordered list of key-value fields.
311 Object(Vec<RawField<'buf>>),
312}
313
314/// A key-value pair inside a JSON object, mirroring [`Field`] but without [`DocumentInner`].
315///
316/// `key_span` covers the quoted key bytes in the source, including the surrounding
317/// double-quotes. The value is stored as a [`RawElement`].
318struct RawField<'buf> {
319 /// Span of the key token, including surrounding `"` delimiters.
320 key_span: Span,
321 /// The value element; its path ends with the key from `key_span`.
322 element: RawElement<'buf>,
323}
324
325/// Single-pass recursive-descent JSON parser.
326///
327/// Holds all mutable state for one parse: the source string, a byte cursor, an
328/// [`ElemId`] counter, the [`PathTable`] being built, and a nesting-depth guard.
329///
330/// Call [`Parser::new`] to create an instance, then [`Parser::parse_value`] to
331/// drive the parse. The result is a [`RawElement`] tree; pass it together with
332/// the completed [`PathTable`] to [`into_element`] to obtain the public
333/// [`Element`] tree with shared [`DocumentInner`] attached.
334struct Parser<'buf> {
335 /// The full source string; all span byte positions are relative to this.
336 source: &'buf str,
337 /// Byte view of `source`; used for index-based reads without UTF-8 overhead.
338 bytes: &'buf [u8],
339 /// Current read position in bytes.
340 pos: usize,
341 /// Counter for assigning sequential [`ElemId`]s depth-first.
342 next_id: usize,
343 /// Path table being built as elements are parsed.
344 table: PathTable<'buf>,
345 /// Current nesting depth; checked against [`MAX_DEPTH`] on each container open.
346 depth: usize,
347}
348
349impl<'buf> Parser<'buf> {
350 /// Creates a `Parser` that will read from `source`.
351 fn new(source: &'buf str) -> Self {
352 Self {
353 source,
354 bytes: source.as_bytes(),
355 pos: 0,
356 next_id: 0,
357 table: PathTable::default(),
358 depth: 0,
359 }
360 }
361
362 /// Allocates the next sequential [`ElemId`] and advances the counter.
363 fn alloc_id(&mut self) -> Result<ElemId, Error> {
364 let id = ElemId(self.next_id);
365 self.next_id = self
366 .next_id
367 .checked_add(1)
368 .ok_or_else(|| self.error(ErrorKind::MaxElements))?;
369 Ok(id)
370 }
371
372 /// Advances past any JSON whitespace (`space`, `tab`, `CR`, `LF`) at the current position.
373 fn skip_ws(&mut self) {
374 while matches!(self.bytes.get(self.pos), Some(&SPACE | &TAB | &LF | &CR)) {
375 self.chomp();
376 }
377 }
378
379 /// Returns the byte at the current position without advancing, or `None` at end of input.
380 fn peek(&self) -> Option<u8> {
381 self.bytes.get(self.pos).copied()
382 }
383
384 /// Advances the cursor by one byte.
385 #[inline]
386 fn chomp(&mut self) {
387 self.pos += 1;
388 }
389
390 /// Create and return an `Error`.
391 fn error(&self, kind: ErrorKind) -> Error {
392 let parsed = &self.source[..self.pos];
393 Error {
394 byte_offset: parsed.len(),
395 position: super::line_col(parsed),
396 kind,
397 }
398 }
399
400 /// Returns the byte at the current position and advances past it, or `None` at end of input.
401 fn advance(&mut self) -> Option<u8> {
402 let b = self.bytes.get(self.pos).copied();
403 if b.is_some() {
404 self.chomp();
405 }
406 b
407 }
408
409 /// Asserts that the next byte equals `byte` and advances past it.
410 ///
411 /// Returns [`ErrorKind::UnexpectedChar`] if a different byte is present, or
412 /// [`ErrorKind::UnexpectedEOF`] if the input is exhausted.
413 fn expect_byte(&mut self, byte: u8) -> Result<(), Error> {
414 match self.bytes.get(self.pos) {
415 Some(&b) if b == byte => {
416 self.chomp();
417 Ok(())
418 }
419 Some(_) => Err(self.error(ErrorKind::UnexpectedChar {
420 expected: char::from(byte),
421 })),
422 None => Err(self.error(ErrorKind::UnexpectedEOF)),
423 }
424 }
425
426 /// Asserts that the next bytes match `literal` byte-for-byte, advancing past them.
427 ///
428 /// Used for JSON keywords (`null`, `true`, `false`). The caller dispatches via
429 /// [`Self::peek`] without consuming the first byte, so `literal` must include it.
430 fn expect_literal(&mut self, literal: &'static str) -> Result<(), Error> {
431 for &expected in literal.as_bytes() {
432 match self.advance() {
433 Some(b) if b == expected => {}
434 Some(_) => {
435 self.pos -= 1;
436 return Err(self.error(ErrorKind::ExpectedLiteral { expected: literal }));
437 }
438 None => return Err(self.error(ErrorKind::UnexpectedEOF)),
439 }
440 }
441 Ok(())
442 }
443
444 /// Parses one JSON value preceded by optional whitespace and returns a fully-formed [`Element`].
445 ///
446 /// Allocates an [`ElemId`] and records the path `entry` before dispatching to
447 /// [`Self::parse_value_kind`], so child elements produced during that call
448 /// already find this element's id in the table as their parent.
449 fn parse_value(&mut self, entry: PathEntry<'buf>) -> Result<RawElement<'buf>, Error> {
450 self.skip_ws();
451 // ID and table entry are registered before recursing so that child elements
452 // produced by parse_value_kind see this id as their parent.
453 let id = self.alloc_id()?;
454 self.table.push(entry);
455 let start = self.pos;
456 let value = self.parse_value_kind(id)?;
457 let span = Span::new(start as u32, self.pos as u32);
458 Ok(RawElement {
459 id,
460 span,
461 // The parent container extends this past the trailing comma and whitespace
462 // when it exists, so that the element's removal span covers its own separator.
463 full_span_end: span.end,
464 value,
465 })
466 }
467
468 /// Dispatches to the type-specific parser based on the first byte of the value.
469 ///
470 /// `id` is this element's own [`ElemId`], threaded down to [`Self::parse_array`]
471 /// and [`Self::parse_object`] so they can record it as the parent of their children.
472 fn parse_value_kind(&mut self, id: ElemId) -> Result<RawValue<'buf>, Error> {
473 match self
474 .peek()
475 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
476 {
477 b'n' => {
478 self.expect_literal(NULL)?;
479 Ok(RawValue::Null)
480 }
481 b't' => {
482 self.expect_literal(TRUE)?;
483 Ok(RawValue::True)
484 }
485 b'f' => {
486 self.expect_literal(FALSE)?;
487 Ok(RawValue::False)
488 }
489 QUOTE => Ok(RawValue::String(self.parse_raw_str()?)),
490 MINUS | DIGIT_0..=DIGIT_9 => Ok(RawValue::Number(self.parse_number_str()?)),
491 ARRAY_OPEN => self.parse_array(id),
492 OBJECT_OPEN => self.parse_object(id),
493 _ => Err(self.error(ErrorKind::ExpectedStart)),
494 }
495 }
496
497 /// Parses a JSON number and returns the raw source slice.
498 ///
499 /// Grammar `RFC 8259 s6`:
500 /// ```text
501 /// number = [ '-' ] int [ frac ] [ exp ]
502 /// int = '0' | [1-9] DIGIT*
503 /// frac = '.' DIGIT+
504 /// exp = ('e'|'E') ['+'|'-'] DIGIT+
505 /// ```
506 fn parse_number_str(&mut self) -> Result<&'buf str, Error> {
507 let start = self.pos;
508
509 if self.peek() == Some(MINUS) {
510 self.chomp();
511 }
512
513 match self
514 .peek()
515 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
516 {
517 // A lone '0' is the only valid integer starting with zero; more digits
518 // after it would be a leading-zero violation (e.g. "01" is invalid JSON).
519 DIGIT_0 => self.chomp(),
520 DIGIT_1..=DIGIT_9 => {
521 while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
522 self.chomp();
523 }
524 }
525 _ => return Err(self.error(ErrorKind::ExpectedNumeral)),
526 }
527
528 if self.peek() == Some(DECIMAL_POINT) {
529 self.chomp();
530 // At least one digit is required after the decimal point.
531 if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
532 return Err(match self.peek() {
533 Some(_) => self.error(ErrorKind::ExpectedNumeral),
534 None => self.error(ErrorKind::UnexpectedEOF),
535 });
536 }
537 while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
538 self.chomp();
539 }
540 }
541
542 if matches!(self.peek(), Some(EXP_LOWER | EXP_UPPER)) {
543 self.chomp();
544 if matches!(self.peek(), Some(PLUS | MINUS)) {
545 self.chomp();
546 }
547 // At least one digit is required after the exponent indicator (and optional sign).
548 if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
549 return Err(match self.peek() {
550 Some(_) => self.error(ErrorKind::ExpectedNumeral),
551 None => self.error(ErrorKind::UnexpectedEOF),
552 });
553 }
554 while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
555 self.chomp();
556 }
557 }
558
559 Ok(&self.source[start..self.pos])
560 }
561
562 /// Parses a JSON string and returns a [`RawStr`] with quotes stripped.
563 ///
564 /// Scans for the closing `"` delimiter, skipping one byte after every `\`
565 /// so that `\"` does not terminate the string. Escape sequences and control
566 /// characters are not validated here; callers use [`RawStr::decode_escapes`].
567 fn parse_raw_str(&mut self) -> Result<RawStr<'buf>, Error> {
568 self.expect_byte(QUOTE)?;
569 let content_start = self.pos; // First byte after the opening `"`.
570
571 loop {
572 match self
573 .advance()
574 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
575 {
576 QUOTE => break,
577 BACKSLASH => {
578 // Consume whatever follows so that `\"` does not close the string.
579 self.advance()
580 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?;
581 }
582 _ => {}
583 }
584 }
585
586 // `advance()` left `pos` one past the closing '"', so `pos-1` is the '"' itself;
587 // `content_start..pos-1` therefore captures content without either delimiter.
588 Ok(RawStr(&self.source[content_start..self.pos - 1]))
589 }
590
591 /// Parses a JSON array `[...]` and returns [`Value::Array`].
592 ///
593 /// Increments the depth counter before consuming `[` and returns
594 /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
595 fn parse_array(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
596 self.depth += 1;
597 if self.depth > MAX_DEPTH {
598 return Err(self.error(ErrorKind::DepthLimitExceeded));
599 }
600 self.expect_byte(ARRAY_OPEN)?;
601 self.skip_ws();
602 let mut elements: Vec<RawElement<'buf>> = Vec::new();
603
604 if self.peek() != Some(ARRAY_CLOSE) {
605 loop {
606 let entry = PathEntry::Item {
607 parent: parent_id,
608 index: elements.len() as u32,
609 };
610 let mut elem = self.parse_value(entry)?;
611 self.skip_ws();
612 match self
613 .peek()
614 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
615 {
616 COMMA => {
617 self.chomp();
618 self.skip_ws();
619 if self.peek() == Some(ARRAY_CLOSE) {
620 return Err(self.error(ErrorKind::ExpectedEndArray));
621 }
622 // Extend past the comma and leading whitespace of the next sibling
623 // so that removing this element also removes its own separator.
624 elem.full_span_end = self.pos as u32;
625 elements.push(elem);
626 }
627 ARRAY_CLOSE => {
628 elements.push(elem);
629 break;
630 }
631 _ => return Err(self.error(ErrorKind::ExpectedEndArray)),
632 }
633 }
634 }
635
636 self.expect_byte(ARRAY_CLOSE)?;
637 self.depth -= 1;
638 Ok(RawValue::Array(elements))
639 }
640
641 /// Parses a JSON object `{...}` and returns [`Value::Object`].
642 ///
643 /// Increments the depth counter before consuming `{` and returns
644 /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
645 fn parse_object(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
646 self.depth += 1;
647 if self.depth > MAX_DEPTH {
648 return Err(self.error(ErrorKind::DepthLimitExceeded));
649 }
650 self.expect_byte(OBJECT_OPEN)?;
651 self.skip_ws();
652 let mut fields: Vec<RawField<'buf>> = Vec::new();
653
654 if self.peek() != Some(OBJECT_CLOSE) {
655 loop {
656 let key_start = self.pos;
657 let key = self.parse_raw_str()?;
658 let key_span = Span::new(key_start as u32, self.pos as u32);
659 self.skip_ws();
660 self.expect_byte(COLON)?;
661 let entry = PathEntry::Field {
662 parent: parent_id,
663 key,
664 };
665 let mut elem = self.parse_value(entry)?;
666 self.skip_ws();
667 match self
668 .peek()
669 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
670 {
671 COMMA => {
672 self.chomp();
673 self.skip_ws();
674 if self.peek() == Some(OBJECT_CLOSE) {
675 return Err(self.error(ErrorKind::ExpectedEndObject));
676 }
677 // Same as in parse_array: extend past the comma and whitespace
678 // so that removing this field also removes its own separator.
679 elem.full_span_end = self.pos as u32;
680 fields.push(RawField {
681 key_span,
682 element: elem,
683 });
684 }
685 OBJECT_CLOSE => {
686 fields.push(RawField {
687 key_span,
688 element: elem,
689 });
690 break;
691 }
692 _ => return Err(self.error(ErrorKind::ExpectedEndObject)),
693 }
694 }
695 }
696
697 self.expect_byte(OBJECT_CLOSE)?;
698 self.depth -= 1;
699 Ok(RawValue::Object(fields))
700 }
701}
702
703/// Converts a [`RawElement`] tree into the public [`Element`] tree, loading every
704/// node with a clone of `inner` at construction time.
705///
706/// Uses an explicit work stack instead of recursion. Children are pushed in
707/// reverse so they are processed in order; each `Build*` task then pops its
708/// children off `done` and assembles the parent.
709fn into_element<'buf>(raw: RawElement<'buf>, inner: &Rc<DocumentInner<'buf>>) -> Element<'buf> {
710 enum Task<'buf> {
711 Process(RawElement<'buf>),
712 BuildArray {
713 id: ElemId,
714 span: Span,
715 full_span_end: u32,
716 count: usize,
717 },
718 BuildObject {
719 id: ElemId,
720 span: Span,
721 full_span_end: u32,
722 key_spans: Vec<Span>,
723 },
724 }
725
726 let mut work: Vec<Task<'buf>> = vec![Task::Process(raw)];
727 let mut done: Vec<Element<'buf>> = Vec::new();
728
729 while let Some(task) = work.pop() {
730 match task {
731 Task::Process(raw) => {
732 let value = match raw.value {
733 RawValue::Null => Value::Null,
734 RawValue::True => Value::True,
735 RawValue::False => Value::False,
736 RawValue::String(s) => Value::String(s),
737 RawValue::Number(n) => Value::Number(n),
738 RawValue::Array(items) => {
739 work.push(Task::BuildArray {
740 id: raw.id,
741 span: raw.span,
742 full_span_end: raw.full_span_end,
743 count: items.len(),
744 });
745 for item in items.into_iter().rev() {
746 work.push(Task::Process(item));
747 }
748 continue;
749 }
750 RawValue::Object(fields) => {
751 let key_spans = fields.iter().map(|f| f.key_span).collect();
752 work.push(Task::BuildObject {
753 id: raw.id,
754 span: raw.span,
755 full_span_end: raw.full_span_end,
756 key_spans,
757 });
758 for field in fields.into_iter().rev() {
759 work.push(Task::Process(field.element));
760 }
761 continue;
762 }
763 };
764 done.push(Element {
765 doc: Rc::clone(inner),
766 id: raw.id,
767 span: raw.span,
768 full_span_end: raw.full_span_end,
769 value,
770 });
771 }
772 Task::BuildArray {
773 id,
774 span,
775 full_span_end,
776 count,
777 } => {
778 let start = done.len() - count;
779 let items: Vec<Element<'buf>> = done.drain(start..).collect();
780 done.push(Element {
781 doc: Rc::clone(inner),
782 id,
783 span,
784 full_span_end,
785 value: Value::Array(items),
786 });
787 }
788 Task::BuildObject {
789 id,
790 span,
791 full_span_end,
792 key_spans,
793 } => {
794 let count = key_spans.len();
795 let start = done.len() - count;
796 let elements: Vec<Element<'buf>> = done.drain(start..).collect();
797 let fields = key_spans
798 .into_iter()
799 .zip(elements)
800 .map(|(key_span, element)| Field { key_span, element })
801 .collect();
802 done.push(Element {
803 doc: Rc::clone(inner),
804 id,
805 span,
806 full_span_end,
807 value: Value::Object(fields),
808 });
809 }
810 }
811 }
812
813 // Each Process task produces exactly one element in `done`, either directly
814 // (scalars) or via a Build task (containers). Starting with one root Process
815 // task guarantees exactly one element remains here.
816 done.swap_remove(0)
817}