Skip to main content

ocpi_tariffs/json/
parser.rs

1//! Hand-rolled single-pass recursive-descent JSON parser.
2//!
3//! # Responsibilities
4//!
5//! The parser is responsible for structural correctness only: balanced delimiters,
6//! valid top-level values, and well-formed numbers. String content is captured as
7//! [`RawStr`] slices of the source — escape sequences and control characters are
8//! left untouched. Validation of string content is the responsibility of
9//! [`crate::json::decode`].
10//!
11//! # Output
12//!
13//! [`parse`] returns a [`Document`] that wraps the root [`Element`] and the shared
14//! [`DocumentInner`]. Every element carries an `Rc<DocumentInner>` so it can resolve
15//! its own path after the [`Document`] has been dropped.
16//!
17//! # Limits
18//!
19//! - **Nesting depth**: capped at [`MAX_DEPTH`] (128 levels). Inputs that exceed this
20//!   are rejected with [`ErrorKind::DepthLimitExceeded`].
21//! - **Element count**: capped at `u32::MAX` by the [`ElemId`] counter, enforced via
22//!   [`ErrorKind::MaxElements`]. In practice, the 5 megabytes [`string::ReasonableLen`] gate makes
23//!   this limit unreachable at the moment.
24//!
25//! # Two-phase construction
26//!
27//! The [`Parser`] builds a private [`RawElement`] tree and a [`PathTable`] in one
28//! pass. Once the full tree is available, [`into_element`] threads the shared
29//! `Rc<DocumentInner>` through every node to produce the public [`Element`] tree.
30//! This keeps the hot parsing loop free of reference-counting overhead.
31
32#![expect(
33    clippy::arithmetic_side_effects,
34    reason = "pos is bounded by source.len() and only advances after a successful byte read; arithmetic is safe within parser state machine invariants"
35)]
36#![expect(
37    clippy::as_conversions,
38    reason = "byte position casts between usize and u32 are safe: usize->u32 is bounded by available memory, u32->usize always fits"
39)]
40#![expect(
41    clippy::cast_possible_truncation,
42    reason = "source length is bounded by available memory, so byte positions always fit in u32"
43)]
44#![expect(
45    clippy::string_slice,
46    reason = "span boundaries are always at ASCII JSON token boundaries, so slices are valid UTF-8"
47)]
48
49#[cfg(test)]
50mod test;
51
52#[cfg(test)]
53mod test_basics;
54
55#[cfg(test)]
56mod test_parser;
57
58#[cfg(test)]
59mod test_type_sizes;
60
61use std::rc::Rc;
62
63use crate::{string, warning};
64
65use super::{
66    Document, DocumentInner, ElemId, Element, Field, Location, PathEntry, PathTable, RawStr, Span,
67    Value,
68};
69
70/// Maximum nesting depth for arrays and objects.
71///
72/// RFC 8259 recommends implementations handle at least 128 levels of nesting.
73/// Inputs that exceed this limit are rejected with [`ErrorKind::DepthLimitExceeded`].
74const MAX_DEPTH: usize = 128;
75
76// JSON whitespace characters `RFC 8259 s2`.
77const SPACE: u8 = b' ';
78const TAB: u8 = b'\t';
79const LF: u8 = b'\n';
80const CR: u8 = b'\r';
81
82// Structural characters `RFC 8259 s2`.
83const QUOTE: u8 = b'"';
84const BACKSLASH: u8 = b'\\';
85const COMMA: u8 = b',';
86const COLON: u8 = b':';
87const ARRAY_OPEN: u8 = b'[';
88const ARRAY_CLOSE: u8 = b']';
89const OBJECT_OPEN: u8 = b'{';
90const OBJECT_CLOSE: u8 = b'}';
91
92// Number-grammar characters `RFC 8259 s6`.
93const MINUS: u8 = b'-';
94const PLUS: u8 = b'+';
95const DECIMAL_POINT: u8 = b'.';
96const EXP_LOWER: u8 = b'e';
97const EXP_UPPER: u8 = b'E';
98const DIGIT_0: u8 = b'0';
99const DIGIT_1: u8 = b'1';
100const DIGIT_9: u8 = b'9';
101
102// JSON keyword literals `RFC 8259 s3`.
103const NULL: &str = "null";
104const TRUE: &str = "true";
105const FALSE: &str = "false";
106
107// UTF-8 BOM (`U+FEFF`, encoded as 0xEF 0xBB 0xBF).
108const BOM: &[u8; 3] = b"\xEF\xBB\xBF";
109
110/// Parse a JSON document from `source`.
111///
112/// All string content in the returned tree borrows from `source`.
113/// Call `element.path()` on any element to obtain its RFC 9535 path.
114pub(crate) fn parse(source: string::ReasonableLen<'_>) -> Result<Document<'_>, Error> {
115    let mut p = Parser::new(source.into_inner());
116    // Skip a UTF-8 BOM (`U+FEFF` encoded as 0xEF 0xBB 0xBF) if present.
117    if p.bytes.starts_with(BOM) {
118        p.pos = BOM.len();
119    }
120    let raw_root = p.parse_value(PathEntry::Root)?;
121    p.skip_ws();
122    if p.pos < p.bytes.len() {
123        return Err(p.error(ErrorKind::TrailingContent));
124    }
125    let inner = Rc::new(DocumentInner {
126        source: source.into_inner(),
127        paths: p.table,
128    });
129    let root = into_element(raw_root, &inner);
130    Ok(Document { inner, root })
131}
132
133/// A parse error produced when the input is not well-formed JSON.
134///
135/// Carries the byte offset and line/column position of the failure, and an
136/// [`ErrorKind`] that describes what was wrong.
137#[derive(Debug)]
138pub struct Error {
139    /// Byte offset of the error location.
140    byte_offset: usize,
141    /// A file location expressed as line and column.
142    position: Location,
143    /// The details about the error that occurred.
144    kind: ErrorKind,
145}
146
147impl Error {
148    /// Byte offset of the error location.
149    pub fn byte_offset(&self) -> usize {
150        self.byte_offset
151    }
152
153    /// Return a reference to the details about the error that occurred.
154    pub fn kind(&self) -> &ErrorKind {
155        &self.kind
156    }
157
158    /// Consume the `Error` and return the details about the error that occurred.
159    pub fn into_kind(self) -> ErrorKind {
160        self.kind
161    }
162
163    /// Consume the `Error` and return the byte offset of the error location and the details of the
164    /// error that occurred.
165    pub fn into_parts(self) -> (usize, ErrorKind) {
166        (self.byte_offset, self.kind)
167    }
168}
169
170/// The specific reason a [`parse`] call failed.
171#[derive(Debug, Eq, PartialEq)]
172pub enum ErrorKind {
173    /// A character that cannot be a JSON number was encountered.
174    ExpectedNumeral,
175    /// A character that cannot start a JSON value was encountered.
176    ExpectedStart,
177    /// A literal that cannot start or continue a JSON value was encountered.
178    ExpectedLiteral { expected: &'static str },
179    /// An array wasn't terminated correctly.
180    ExpectedEndArray,
181    /// An object wasn't terminated correctly.
182    ExpectedEndObject,
183    /// A character that cannot continue a JSON value was encountered.
184    UnexpectedChar { expected: char },
185    /// The input ended before the value was complete.
186    UnexpectedEOF,
187    /// Non-whitespace bytes follow the root value.
188    TrailingContent,
189    /// The input exceeds the maximum supported nesting depth.
190    DepthLimitExceeded,
191    /// The input contains more than [`u32::MAX`] JSON elements.
192    MaxElements,
193}
194
195impl crate::Warning for Error {
196    fn id(&self) -> warning::Id {
197        let s = match self.kind {
198            ErrorKind::ExpectedNumeral => "expected_numeral",
199            ErrorKind::ExpectedStart => "expected_start",
200            ErrorKind::ExpectedLiteral { .. } => "expected_literal",
201            ErrorKind::ExpectedEndArray => "expected_end_array",
202            ErrorKind::ExpectedEndObject => "expected_end_object",
203            ErrorKind::UnexpectedChar { .. } => "unexpected_char",
204            ErrorKind::UnexpectedEOF => "unexpected_eof",
205            ErrorKind::TrailingContent => "trailing_content",
206            ErrorKind::DepthLimitExceeded => "depth_limit_exceeded",
207            ErrorKind::MaxElements => "max_elements",
208        };
209
210        warning::Id::from_static(s)
211    }
212}
213
214impl std::fmt::Display for Error {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        let Self {
217            byte_offset,
218            position,
219            kind,
220        } = self;
221
222        match kind {
223            ErrorKind::ExpectedLiteral { expected } => {
224                write!(
225                    f,
226                    "unexpected literal found at line: `{position}`, byte `{byte_offset}`; expected: `{expected:?}`"
227                )
228            }
229            ErrorKind::ExpectedNumeral => {
230                write!(
231                    f,
232                    "unexpected numeral found at line: `{position}`, byte `{byte_offset}`; expected: `0-9`"
233                )
234            }
235            ErrorKind::ExpectedStart => {
236                write!(
237                    f,
238                    "unexpected start character found at line: `{position}`, byte `{byte_offset}`; expected one of: `[n, t, f, \", -, 0-9, [, {{]`"
239                )
240            }
241            ErrorKind::ExpectedEndArray => {
242                write!(
243                    f,
244                    "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `]`"
245                )
246            }
247            ErrorKind::ExpectedEndObject => {
248                write!(
249                    f,
250                    "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `}}`"
251                )
252            }
253            ErrorKind::UnexpectedChar { expected } => {
254                write!(
255                    f,
256                    "unexpected character `{expected}` found at line: `{position}`, byte `{byte_offset}``"
257                )
258            }
259            ErrorKind::UnexpectedEOF => write!(
260                f,
261                "unexpected end of input found at line: `{position}`, byte `{byte_offset}`"
262            ),
263            ErrorKind::TrailingContent => write!(
264                f,
265                "trailing content found at line: `{position}`, byte `{byte_offset}`"
266            ),
267            ErrorKind::DepthLimitExceeded => {
268                write!(f, "nesting depth exceeds the {MAX_DEPTH}-level limit")
269            }
270            ErrorKind::MaxElements => write!(f, "document exceeds {} JSON elements", u32::MAX),
271        }
272    }
273}
274
275impl std::error::Error for Error {}
276
277/// Parser-private element tree; carries no reference to [`DocumentInner`].
278///
279/// Converted to the public [`Element`] tree by [`into_element`] after
280/// [`DocumentInner`] is constructed.
281struct RawElement<'buf> {
282    /// Unique identifier within the document; sequentially assigned depth-first.
283    id: ElemId,
284    /// Byte range of the value only; use for replacement edits.
285    span: Span,
286    /// End of the value plus any trailing comma and whitespace; use for removal edits.
287    /// Equal to `span.end` when there is no trailing comma (root element, or last sibling).
288    full_span_end: u32,
289    /// Parsed value, borrowing from the source `&str`.
290    value: RawValue<'buf>,
291}
292
293/// The parsed content of a [`RawElement`], mirroring [`Value`] but without [`DocumentInner`].
294///
295/// Strings are kept as [`RawStr`] slices of the source; numbers are kept as
296/// raw `&str` slices. Both are converted to their public forms by [`into_element`].
297enum RawValue<'buf> {
298    /// JSON `null` literal.
299    Null,
300    /// JSON `true` literal.
301    True,
302    /// JSON `false` literal.
303    False,
304    /// String content with quotes removed; escape sequences are not decoded.
305    String(RawStr<'buf>),
306    /// Raw number text; not guaranteed to fit any specific numeric type.
307    Number(&'buf str),
308    /// Ordered list of child elements.
309    Array(Vec<RawElement<'buf>>),
310    /// Ordered list of key-value fields.
311    Object(Vec<RawField<'buf>>),
312}
313
314/// A key-value pair inside a JSON object, mirroring [`Field`] but without [`DocumentInner`].
315///
316/// `key_span` covers the quoted key bytes in the source, including the surrounding
317/// double-quotes. The value is stored as a [`RawElement`].
318struct RawField<'buf> {
319    /// Span of the key token, including surrounding `"` delimiters.
320    key_span: Span,
321    /// The value element; its path ends with the key from `key_span`.
322    element: RawElement<'buf>,
323}
324
325/// Single-pass recursive-descent JSON parser.
326///
327/// Holds all mutable state for one parse: the source string, a byte cursor, an
328/// [`ElemId`] counter, the [`PathTable`] being built, and a nesting-depth guard.
329///
330/// Call [`Parser::new`] to create an instance, then [`Parser::parse_value`] to
331/// drive the parse. The result is a [`RawElement`] tree; pass it together with
332/// the completed [`PathTable`] to [`into_element`] to obtain the public
333/// [`Element`] tree with shared [`DocumentInner`] attached.
334struct Parser<'buf> {
335    /// The full source string; all span byte positions are relative to this.
336    source: &'buf str,
337    /// Byte view of `source`; used for index-based reads without UTF-8 overhead.
338    bytes: &'buf [u8],
339    /// Current read position in bytes.
340    pos: usize,
341    /// Counter for assigning sequential [`ElemId`]s depth-first.
342    next_id: usize,
343    /// Path table being built as elements are parsed.
344    table: PathTable<'buf>,
345    /// Current nesting depth; checked against [`MAX_DEPTH`] on each container open.
346    depth: usize,
347}
348
349impl<'buf> Parser<'buf> {
350    /// Creates a `Parser` that will read from `source`.
351    fn new(source: &'buf str) -> Self {
352        Self {
353            source,
354            bytes: source.as_bytes(),
355            pos: 0,
356            next_id: 0,
357            table: PathTable::default(),
358            depth: 0,
359        }
360    }
361
362    /// Allocates the next sequential [`ElemId`] and advances the counter.
363    fn alloc_id(&mut self) -> Result<ElemId, Error> {
364        let id = ElemId(self.next_id);
365        self.next_id = self
366            .next_id
367            .checked_add(1)
368            .ok_or_else(|| self.error(ErrorKind::MaxElements))?;
369        Ok(id)
370    }
371
372    /// Advances past any JSON whitespace (`space`, `tab`, `CR`, `LF`) at the current position.
373    fn skip_ws(&mut self) {
374        while matches!(self.bytes.get(self.pos), Some(&SPACE | &TAB | &LF | &CR)) {
375            self.chomp();
376        }
377    }
378
379    /// Returns the byte at the current position without advancing, or `None` at end of input.
380    fn peek(&self) -> Option<u8> {
381        self.bytes.get(self.pos).copied()
382    }
383
384    /// Advances the cursor by one byte.
385    #[inline]
386    fn chomp(&mut self) {
387        self.pos += 1;
388    }
389
390    /// Create and return an `Error`.
391    fn error(&self, kind: ErrorKind) -> Error {
392        let parsed = &self.source[..self.pos];
393        Error {
394            byte_offset: parsed.len(),
395            position: super::line_col(parsed),
396            kind,
397        }
398    }
399
400    /// Returns the byte at the current position and advances past it, or `None` at end of input.
401    fn advance(&mut self) -> Option<u8> {
402        let b = self.bytes.get(self.pos).copied();
403        if b.is_some() {
404            self.chomp();
405        }
406        b
407    }
408
409    /// Asserts that the next byte equals `byte` and advances past it.
410    ///
411    /// Returns [`ErrorKind::UnexpectedChar`] if a different byte is present, or
412    /// [`ErrorKind::UnexpectedEOF`] if the input is exhausted.
413    fn expect_byte(&mut self, byte: u8) -> Result<(), Error> {
414        match self.bytes.get(self.pos) {
415            Some(&b) if b == byte => {
416                self.chomp();
417                Ok(())
418            }
419            Some(_) => Err(self.error(ErrorKind::UnexpectedChar {
420                expected: char::from(byte),
421            })),
422            None => Err(self.error(ErrorKind::UnexpectedEOF)),
423        }
424    }
425
426    /// Asserts that the next bytes match `literal` byte-for-byte, advancing past them.
427    ///
428    /// Used for JSON keywords (`null`, `true`, `false`). The caller dispatches via
429    /// [`Self::peek`] without consuming the first byte, so `literal` must include it.
430    fn expect_literal(&mut self, literal: &'static str) -> Result<(), Error> {
431        for &expected in literal.as_bytes() {
432            match self.advance() {
433                Some(b) if b == expected => {}
434                Some(_) => {
435                    self.pos -= 1;
436                    return Err(self.error(ErrorKind::ExpectedLiteral { expected: literal }));
437                }
438                None => return Err(self.error(ErrorKind::UnexpectedEOF)),
439            }
440        }
441        Ok(())
442    }
443
444    /// Parses one JSON value preceded by optional whitespace and returns a fully-formed [`Element`].
445    ///
446    /// Allocates an [`ElemId`] and records the path `entry` before dispatching to
447    /// [`Self::parse_value_kind`], so child elements produced during that call
448    /// already find this element's id in the table as their parent.
449    fn parse_value(&mut self, entry: PathEntry<'buf>) -> Result<RawElement<'buf>, Error> {
450        self.skip_ws();
451        // ID and table entry are registered before recursing so that child elements
452        // produced by parse_value_kind see this id as their parent.
453        let id = self.alloc_id()?;
454        self.table.push(entry);
455        let start = self.pos;
456        let value = self.parse_value_kind(id)?;
457        let span = Span::new(start as u32, self.pos as u32);
458        Ok(RawElement {
459            id,
460            span,
461            // The parent container extends this past the trailing comma and whitespace
462            // when it exists, so that the element's removal span covers its own separator.
463            full_span_end: span.end,
464            value,
465        })
466    }
467
468    /// Dispatches to the type-specific parser based on the first byte of the value.
469    ///
470    /// `id` is this element's own [`ElemId`], threaded down to [`Self::parse_array`]
471    /// and [`Self::parse_object`] so they can record it as the parent of their children.
472    fn parse_value_kind(&mut self, id: ElemId) -> Result<RawValue<'buf>, Error> {
473        match self
474            .peek()
475            .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
476        {
477            b'n' => {
478                self.expect_literal(NULL)?;
479                Ok(RawValue::Null)
480            }
481            b't' => {
482                self.expect_literal(TRUE)?;
483                Ok(RawValue::True)
484            }
485            b'f' => {
486                self.expect_literal(FALSE)?;
487                Ok(RawValue::False)
488            }
489            QUOTE => Ok(RawValue::String(self.parse_raw_str()?)),
490            MINUS | DIGIT_0..=DIGIT_9 => Ok(RawValue::Number(self.parse_number_str()?)),
491            ARRAY_OPEN => self.parse_array(id),
492            OBJECT_OPEN => self.parse_object(id),
493            _ => Err(self.error(ErrorKind::ExpectedStart)),
494        }
495    }
496
497    /// Parses a JSON number and returns the raw source slice.
498    ///
499    /// Grammar `RFC 8259 s6`:
500    /// ```text
501    /// number = [ '-' ] int [ frac ] [ exp ]
502    /// int    = '0' | [1-9] DIGIT*
503    /// frac   = '.' DIGIT+
504    /// exp    = ('e'|'E') ['+'|'-'] DIGIT+
505    /// ```
506    fn parse_number_str(&mut self) -> Result<&'buf str, Error> {
507        let start = self.pos;
508
509        if self.peek() == Some(MINUS) {
510            self.chomp();
511        }
512
513        match self
514            .peek()
515            .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
516        {
517            // A lone '0' is the only valid integer starting with zero; more digits
518            // after it would be a leading-zero violation (e.g. "01" is invalid JSON).
519            DIGIT_0 => self.chomp(),
520            DIGIT_1..=DIGIT_9 => {
521                while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
522                    self.chomp();
523                }
524            }
525            _ => return Err(self.error(ErrorKind::ExpectedNumeral)),
526        }
527
528        if self.peek() == Some(DECIMAL_POINT) {
529            self.chomp();
530            // At least one digit is required after the decimal point.
531            if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
532                return Err(match self.peek() {
533                    Some(_) => self.error(ErrorKind::ExpectedNumeral),
534                    None => self.error(ErrorKind::UnexpectedEOF),
535                });
536            }
537            while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
538                self.chomp();
539            }
540        }
541
542        if matches!(self.peek(), Some(EXP_LOWER | EXP_UPPER)) {
543            self.chomp();
544            if matches!(self.peek(), Some(PLUS | MINUS)) {
545                self.chomp();
546            }
547            // At least one digit is required after the exponent indicator (and optional sign).
548            if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
549                return Err(match self.peek() {
550                    Some(_) => self.error(ErrorKind::ExpectedNumeral),
551                    None => self.error(ErrorKind::UnexpectedEOF),
552                });
553            }
554            while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
555                self.chomp();
556            }
557        }
558
559        Ok(&self.source[start..self.pos])
560    }
561
562    /// Parses a JSON string and returns a [`RawStr`] with quotes stripped.
563    ///
564    /// Scans for the closing `"` delimiter, skipping one byte after every `\`
565    /// so that `\"` does not terminate the string. Escape sequences and control
566    /// characters are not validated here; callers use [`RawStr::decode_escapes`].
567    fn parse_raw_str(&mut self) -> Result<RawStr<'buf>, Error> {
568        self.expect_byte(QUOTE)?;
569        let content_start = self.pos; // First byte after the opening `"`.
570
571        loop {
572            match self
573                .advance()
574                .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
575            {
576                QUOTE => break,
577                BACKSLASH => {
578                    // Consume whatever follows so that `\"` does not close the string.
579                    self.advance()
580                        .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?;
581                }
582                _ => {}
583            }
584        }
585
586        // `advance()` left `pos` one past the closing '"', so `pos-1` is the '"' itself;
587        // `content_start..pos-1` therefore captures content without either delimiter.
588        Ok(RawStr(&self.source[content_start..self.pos - 1]))
589    }
590
591    /// Parses a JSON array `[...]` and returns [`Value::Array`].
592    ///
593    /// Increments the depth counter before consuming `[` and returns
594    /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
595    fn parse_array(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
596        self.depth += 1;
597        if self.depth > MAX_DEPTH {
598            return Err(self.error(ErrorKind::DepthLimitExceeded));
599        }
600        self.expect_byte(ARRAY_OPEN)?;
601        self.skip_ws();
602        let mut elements: Vec<RawElement<'buf>> = Vec::new();
603
604        if self.peek() != Some(ARRAY_CLOSE) {
605            loop {
606                let entry = PathEntry::Item {
607                    parent: parent_id,
608                    index: elements.len() as u32,
609                };
610                let mut elem = self.parse_value(entry)?;
611                self.skip_ws();
612                match self
613                    .peek()
614                    .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
615                {
616                    COMMA => {
617                        self.chomp();
618                        self.skip_ws();
619                        if self.peek() == Some(ARRAY_CLOSE) {
620                            return Err(self.error(ErrorKind::ExpectedEndArray));
621                        }
622                        // Extend past the comma and leading whitespace of the next sibling
623                        // so that removing this element also removes its own separator.
624                        elem.full_span_end = self.pos as u32;
625                        elements.push(elem);
626                    }
627                    ARRAY_CLOSE => {
628                        elements.push(elem);
629                        break;
630                    }
631                    _ => return Err(self.error(ErrorKind::ExpectedEndArray)),
632                }
633            }
634        }
635
636        self.expect_byte(ARRAY_CLOSE)?;
637        self.depth -= 1;
638        Ok(RawValue::Array(elements))
639    }
640
641    /// Parses a JSON object `{...}` and returns [`Value::Object`].
642    ///
643    /// Increments the depth counter before consuming `{` and returns
644    /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
645    fn parse_object(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
646        self.depth += 1;
647        if self.depth > MAX_DEPTH {
648            return Err(self.error(ErrorKind::DepthLimitExceeded));
649        }
650        self.expect_byte(OBJECT_OPEN)?;
651        self.skip_ws();
652        let mut fields: Vec<RawField<'buf>> = Vec::new();
653
654        if self.peek() != Some(OBJECT_CLOSE) {
655            loop {
656                let key_start = self.pos;
657                let key = self.parse_raw_str()?;
658                let key_span = Span::new(key_start as u32, self.pos as u32);
659                self.skip_ws();
660                self.expect_byte(COLON)?;
661                let entry = PathEntry::Field {
662                    parent: parent_id,
663                    key,
664                };
665                let mut elem = self.parse_value(entry)?;
666                self.skip_ws();
667                match self
668                    .peek()
669                    .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
670                {
671                    COMMA => {
672                        self.chomp();
673                        self.skip_ws();
674                        if self.peek() == Some(OBJECT_CLOSE) {
675                            return Err(self.error(ErrorKind::ExpectedEndObject));
676                        }
677                        // Same as in parse_array: extend past the comma and whitespace
678                        // so that removing this field also removes its own separator.
679                        elem.full_span_end = self.pos as u32;
680                        fields.push(RawField {
681                            key_span,
682                            element: elem,
683                        });
684                    }
685                    OBJECT_CLOSE => {
686                        fields.push(RawField {
687                            key_span,
688                            element: elem,
689                        });
690                        break;
691                    }
692                    _ => return Err(self.error(ErrorKind::ExpectedEndObject)),
693                }
694            }
695        }
696
697        self.expect_byte(OBJECT_CLOSE)?;
698        self.depth -= 1;
699        Ok(RawValue::Object(fields))
700    }
701}
702
703/// Converts a [`RawElement`] tree into the public [`Element`] tree, loading every
704/// node with a clone of `inner` at construction time.
705///
706/// Uses an explicit work stack instead of recursion. Children are pushed in
707/// reverse so they are processed in order; each `Build*` task then pops its
708/// children off `done` and assembles the parent.
709fn into_element<'buf>(raw: RawElement<'buf>, inner: &Rc<DocumentInner<'buf>>) -> Element<'buf> {
710    enum Task<'buf> {
711        Process(RawElement<'buf>),
712        BuildArray {
713            id: ElemId,
714            span: Span,
715            full_span_end: u32,
716            count: usize,
717        },
718        BuildObject {
719            id: ElemId,
720            span: Span,
721            full_span_end: u32,
722            key_spans: Vec<Span>,
723        },
724    }
725
726    let mut work: Vec<Task<'buf>> = vec![Task::Process(raw)];
727    let mut done: Vec<Element<'buf>> = Vec::new();
728
729    while let Some(task) = work.pop() {
730        match task {
731            Task::Process(raw) => {
732                let value = match raw.value {
733                    RawValue::Null => Value::Null,
734                    RawValue::True => Value::True,
735                    RawValue::False => Value::False,
736                    RawValue::String(s) => Value::String(s),
737                    RawValue::Number(n) => Value::Number(n),
738                    RawValue::Array(items) => {
739                        work.push(Task::BuildArray {
740                            id: raw.id,
741                            span: raw.span,
742                            full_span_end: raw.full_span_end,
743                            count: items.len(),
744                        });
745                        for item in items.into_iter().rev() {
746                            work.push(Task::Process(item));
747                        }
748                        continue;
749                    }
750                    RawValue::Object(fields) => {
751                        let key_spans = fields.iter().map(|f| f.key_span).collect();
752                        work.push(Task::BuildObject {
753                            id: raw.id,
754                            span: raw.span,
755                            full_span_end: raw.full_span_end,
756                            key_spans,
757                        });
758                        for field in fields.into_iter().rev() {
759                            work.push(Task::Process(field.element));
760                        }
761                        continue;
762                    }
763                };
764                done.push(Element {
765                    doc: Rc::clone(inner),
766                    id: raw.id,
767                    span: raw.span,
768                    full_span_end: raw.full_span_end,
769                    value,
770                });
771            }
772            Task::BuildArray {
773                id,
774                span,
775                full_span_end,
776                count,
777            } => {
778                let start = done.len() - count;
779                let items: Vec<Element<'buf>> = done.drain(start..).collect();
780                done.push(Element {
781                    doc: Rc::clone(inner),
782                    id,
783                    span,
784                    full_span_end,
785                    value: Value::Array(items),
786                });
787            }
788            Task::BuildObject {
789                id,
790                span,
791                full_span_end,
792                key_spans,
793            } => {
794                let count = key_spans.len();
795                let start = done.len() - count;
796                let elements: Vec<Element<'buf>> = done.drain(start..).collect();
797                let fields = key_spans
798                    .into_iter()
799                    .zip(elements)
800                    .map(|(key_span, element)| Field { key_span, element })
801                    .collect();
802                done.push(Element {
803                    doc: Rc::clone(inner),
804                    id,
805                    span,
806                    full_span_end,
807                    value: Value::Object(fields),
808                });
809            }
810        }
811    }
812
813    // Each Process task produces exactly one element in `done`, either directly
814    // (scalars) or via a Build task (containers). Starting with one root Process
815    // task guarantees exactly one element remains here.
816    done.swap_remove(0)
817}