Skip to main content

ocpi_tariffs/json/
parser.rs

1//! Hand-rolled single-pass recursive-descent JSON parser.
2//!
3//! # Responsibilities
4//!
5//! The parser is responsible for structural correctness only: balanced delimiters,
6//! valid top-level values, and well-formed numbers. String content is captured as
7//! [`RawStr`] slices of the source — escape sequences and control characters are
8//! left untouched. Validation of string content is the responsibility of
9//! [`crate::json::decode`].
10//!
11//! # Output
12//!
13//! [`parse`] returns a [`Document`] that wraps the root [`Element`] and the shared
14//! [`DocumentInner`]. Every element carries an `Rc<DocumentInner>` so it can resolve
15//! its own path after the [`Document`] has been dropped.
16//!
17//! # Limits
18//!
19//! - **Nesting depth**: capped at [`MAX_DEPTH`] (128 levels). Inputs that exceed this
20//!   are rejected with [`ErrorKind::DepthLimitExceeded`].
21//! - **Element count**: capped at `u32::MAX` by the [`ElemId`] counter, enforced via
22//!   [`ErrorKind::MaxElements`]. In practice, the 5 megabytes [`string::ReasonableLen`] gate makes
23//!   this limit unreachable at the moment.
24//!
25//! # Two-phase construction
26//!
27//! The [`Parser`] builds a private [`RawElement`] tree and a [`PathTable`] in one
28//! pass. Once the full tree is available, [`into_element`] threads the shared
29//! `Rc<DocumentInner>` through every node to produce the public [`Element`] tree.
30//! This keeps the hot parsing loop free of reference-counting overhead.
31
32#![expect(
33    clippy::arithmetic_side_effects,
34    reason = "pos is bounded by source.len() and only advances after a successful byte read; arithmetic is safe within parser state machine invariants"
35)]
36#![expect(
37    clippy::as_conversions,
38    reason = "byte position casts between usize and u32 are safe: usize->u32 is bounded by available memory, u32->usize always fits"
39)]
40#![expect(
41    clippy::cast_possible_truncation,
42    reason = "source length is bounded by available memory, so byte positions always fit in u32"
43)]
44#![expect(
45    clippy::string_slice,
46    reason = "span boundaries are always at ASCII JSON token boundaries, so slices are valid UTF-8"
47)]
48
49#[cfg(test)]
50mod test;
51
52#[cfg(test)]
53mod test_basics;
54
55#[cfg(test)]
56mod test_parser;
57
58#[cfg(test)]
59mod test_type_sizes;
60
61use std::rc::Rc;
62
63use crate::{string, warning};
64
65use super::{
66    Document, DocumentInner, ElemId, Element, Field, Location, PathEntry, PathTable, RawStr, Span,
67    Value,
68};
69
70/// Maximum nesting depth for arrays and objects.
71///
72/// RFC 8259 recommends implementations handle at least 128 levels of nesting.
73/// Inputs that exceed this limit are rejected with [`ErrorKind::DepthLimitExceeded`].
74const MAX_DEPTH: usize = 128;
75
76// JSON whitespace characters `RFC 8259 s2`.
77const SPACE: u8 = b' ';
78const TAB: u8 = b'\t';
79const LF: u8 = b'\n';
80const CR: u8 = b'\r';
81
82// Structural characters `RFC 8259 s2`.
83const QUOTE: u8 = b'"';
84const BACKSLASH: u8 = b'\\';
85const COMMA: u8 = b',';
86const COLON: u8 = b':';
87const ARRAY_OPEN: u8 = b'[';
88const ARRAY_CLOSE: u8 = b']';
89const OBJECT_OPEN: u8 = b'{';
90const OBJECT_CLOSE: u8 = b'}';
91
92// Number-grammar characters `RFC 8259 s6`.
93const MINUS: u8 = b'-';
94const PLUS: u8 = b'+';
95const DECIMAL_POINT: u8 = b'.';
96const EXP_LOWER: u8 = b'e';
97const EXP_UPPER: u8 = b'E';
98const DIGIT_0: u8 = b'0';
99const DIGIT_1: u8 = b'1';
100const DIGIT_9: u8 = b'9';
101
102// JSON keyword literals `RFC 8259 s3`.
103const NULL: &[u8] = b"null";
104const TRUE: &[u8] = b"true";
105const FALSE: &[u8] = b"false";
106
107// UTF-8 BOM (`U+FEFF`, encoded as 0xEF 0xBB 0xBF).
108const BOM: &[u8; 3] = b"\xEF\xBB\xBF";
109
110/// Parse a JSON document from `source`.
111///
112/// All string content in the returned tree borrows from `source`.
113/// Call `element.path()` on any element to obtain its RFC 9535 path.
114pub(crate) fn parse(source: string::ReasonableLen<'_>) -> Result<Document<'_>, Error> {
115    let mut p = Parser::new(source.into_inner());
116    // Skip a UTF-8 BOM (`U+FEFF` encoded as 0xEF 0xBB 0xBF) if present.
117    if p.bytes.starts_with(BOM) {
118        p.pos = BOM.len();
119    }
120    let raw_root = p.parse_value(PathEntry::Root)?;
121    p.skip_ws();
122    if p.pos < p.bytes.len() {
123        return Err(p.error(ErrorKind::TrailingContent));
124    }
125    let inner = Rc::new(DocumentInner {
126        source: source.into_inner(),
127        paths: p.table,
128    });
129    let root = into_element(raw_root, &inner);
130    Ok(Document { inner, root })
131}
132
133/// A parse error produced when the input is not well-formed JSON.
134///
135/// Carries the byte offset and line/column position of the failure, and an
136/// [`ErrorKind`] that describes what was wrong.
137#[derive(Debug)]
138pub struct Error {
139    /// Byte offset of the error location.
140    byte_offset: usize,
141    /// A file location expressed as line and column.
142    position: Location,
143    /// The details about the error that occurred.
144    kind: ErrorKind,
145}
146
147impl Error {
148    /// Byte offset of the error location.
149    pub fn byte_offset(&self) -> usize {
150        self.byte_offset
151    }
152
153    /// Return a reference to the details about the error that occurred.
154    pub fn kind(&self) -> &ErrorKind {
155        &self.kind
156    }
157
158    /// Consume the `Error` and return the details about the error that occurred.
159    pub fn into_kind(self) -> ErrorKind {
160        self.kind
161    }
162
163    /// Consume the `Error` and return the byte offset of the error location and the details of the
164    /// error that occurred.
165    pub fn into_parts(self) -> (usize, ErrorKind) {
166        (self.byte_offset, self.kind)
167    }
168}
169
170/// The specific reason a [`parse`] call failed.
171#[derive(Debug, Eq, PartialEq)]
172pub enum ErrorKind {
173    /// A character that cannot be a JSON number was encountered.
174    ExpectedNumeral,
175    /// A character that cannot start a JSON value was encountered.
176    ExpectedStart,
177    /// A literal that cannot start or continue a JSON value was encountered.
178    ExpectedLiteral { expected: &'static [u8] },
179    /// An array wasn't terminated correctly.
180    ExpectedEndArray,
181    /// An object wasn't terminated correctly.
182    ExpectedEndObject,
183    /// A character that cannot continue a JSON value was encountered.
184    UnexpectedChar { expected: u8 },
185    /// The input ended before the value was complete.
186    UnexpectedEOF,
187    /// Non-whitespace bytes follow the root value.
188    TrailingContent,
189    /// The input exceeds the maximum supported nesting depth.
190    DepthLimitExceeded,
191    /// The input contains more than [`u32::MAX`] JSON elements.
192    MaxElements,
193}
194
195impl crate::Warning for Error {
196    fn id(&self) -> warning::Id {
197        let s = match self.kind {
198            ErrorKind::ExpectedNumeral => "expected_numeral",
199            ErrorKind::ExpectedStart => "expected_start",
200            ErrorKind::ExpectedLiteral { .. } => "expected_literal",
201            ErrorKind::ExpectedEndArray => "expected_end_array",
202            ErrorKind::ExpectedEndObject => "expected_end_object",
203            ErrorKind::UnexpectedChar { .. } => "unexpected_char",
204            ErrorKind::UnexpectedEOF => "unexpected_eof",
205            ErrorKind::TrailingContent => "trailing_content",
206            ErrorKind::DepthLimitExceeded => "depth_limit_exceeded",
207            ErrorKind::MaxElements => "max_elements",
208        };
209
210        warning::Id::from_static(s)
211    }
212}
213
214impl std::fmt::Display for Error {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        let Self {
217            byte_offset,
218            position,
219            kind,
220        } = self;
221
222        match kind {
223            ErrorKind::ExpectedLiteral { expected } => {
224                write!(
225                    f,
226                    "unexpected literal found at line: `{position}`, byte `{byte_offset}`; expected: `{expected:?}`"
227                )
228            }
229            ErrorKind::ExpectedNumeral => {
230                write!(
231                    f,
232                    "unexpected numeral found at line: `{position}`, byte `{byte_offset}`; expected: `0-9`"
233                )
234            }
235            ErrorKind::ExpectedStart => {
236                write!(
237                    f,
238                    "unexpected start character found at line: `{position}`, byte `{byte_offset}`; expected one of: `[n, t, f, \", -, 0-9, [, {{]`"
239                )
240            }
241            ErrorKind::ExpectedEndArray => {
242                write!(
243                    f,
244                    "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `]`"
245                )
246            }
247            ErrorKind::ExpectedEndObject => {
248                write!(
249                    f,
250                    "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `}}`"
251                )
252            }
253            ErrorKind::UnexpectedChar { expected } => {
254                write!(
255                    f,
256                    "unexpected character `{expected}` found at line: `{position}`, byte `{byte_offset}``"
257                )
258            }
259            ErrorKind::UnexpectedEOF => write!(
260                f,
261                "unexpected end of input found at line: `{position}`, byte `{byte_offset}`"
262            ),
263            ErrorKind::TrailingContent => write!(
264                f,
265                "trailing content found at line: `{position}`, byte `{byte_offset}`"
266            ),
267            ErrorKind::DepthLimitExceeded => {
268                write!(f, "nesting depth exceeds the {MAX_DEPTH}-level limit")
269            }
270            ErrorKind::MaxElements => write!(f, "document exceeds {} JSON elements", u32::MAX),
271        }
272    }
273}
274
275impl std::error::Error for Error {}
276
277/// Parser-private element tree; carries no reference to [`DocumentInner`].
278///
279/// Converted to the public [`Element`] tree by [`into_element`] after
280/// [`DocumentInner`] is constructed.
281struct RawElement<'buf> {
282    /// Unique identifier within the document; sequentially assigned depth-first.
283    id: ElemId,
284    /// Byte range of the value only; use for replacement edits.
285    span: Span,
286    /// End of the value plus any trailing comma and whitespace; use for removal edits.
287    /// Equal to `span.end` when there is no trailing comma (root element, or last sibling).
288    full_span_end: u32,
289    /// Parsed value, borrowing from the source `&str`.
290    value: RawValue<'buf>,
291}
292
293/// The parsed content of a [`RawElement`], mirroring [`Value`] but without [`DocumentInner`].
294///
295/// Strings are kept as [`RawStr`] slices of the source; numbers are kept as
296/// raw `&str` slices. Both are converted to their public forms by [`into_element`].
297enum RawValue<'buf> {
298    /// JSON `null` literal.
299    Null,
300    /// JSON `true` literal.
301    True,
302    /// JSON `false` literal.
303    False,
304    /// String content with quotes removed; escape sequences are not decoded.
305    String(RawStr<'buf>),
306    /// Raw number text; not guaranteed to fit any specific numeric type.
307    Number(&'buf str),
308    /// Ordered list of child elements.
309    Array(Vec<RawElement<'buf>>),
310    /// Ordered list of key-value fields.
311    Object(Vec<RawField<'buf>>),
312}
313
314/// A key-value pair inside a JSON object, mirroring [`Field`] but without [`DocumentInner`].
315///
316/// `key_span` covers the quoted key bytes in the source, including the surrounding
317/// double-quotes. The value is stored as a [`RawElement`].
318struct RawField<'buf> {
319    /// Span of the key token, including surrounding `"` delimiters.
320    key_span: Span,
321    /// The value element; its path ends with the key from `key_span`.
322    element: RawElement<'buf>,
323}
324
325/// Single-pass recursive-descent JSON parser.
326///
327/// Holds all mutable state for one parse: the source string, a byte cursor, an
328/// [`ElemId`] counter, the [`PathTable`] being built, and a nesting-depth guard.
329///
330/// Call [`Parser::new`] to create an instance, then [`Parser::parse_value`] to
331/// drive the parse. The result is a [`RawElement`] tree; pass it together with
332/// the completed [`PathTable`] to [`into_element`] to obtain the public
333/// [`Element`] tree with shared [`DocumentInner`] attached.
334struct Parser<'buf> {
335    /// The full source string; all span byte positions are relative to this.
336    source: &'buf str,
337    /// Byte view of `source`; used for index-based reads without UTF-8 overhead.
338    bytes: &'buf [u8],
339    /// Current read position in bytes.
340    pos: usize,
341    /// Counter for assigning sequential [`ElemId`]s depth-first.
342    next_id: usize,
343    /// Path table being built as elements are parsed.
344    table: PathTable<'buf>,
345    /// Current nesting depth; checked against [`MAX_DEPTH`] on each container open.
346    depth: usize,
347}
348
349impl<'buf> Parser<'buf> {
350    /// Creates a `Parser` that will read from `source`.
351    fn new(source: &'buf str) -> Self {
352        Self {
353            source,
354            bytes: source.as_bytes(),
355            pos: 0,
356            next_id: 0,
357            table: PathTable::default(),
358            depth: 0,
359        }
360    }
361
362    /// Allocates the next sequential [`ElemId`] and advances the counter.
363    fn alloc_id(&mut self) -> Result<ElemId, Error> {
364        let id = ElemId(self.next_id);
365        self.next_id = self
366            .next_id
367            .checked_add(1)
368            .ok_or_else(|| self.error(ErrorKind::MaxElements))?;
369        Ok(id)
370    }
371
372    /// Advances past any JSON whitespace (`space`, `tab`, `CR`, `LF`) at the current position.
373    fn skip_ws(&mut self) {
374        while matches!(self.bytes.get(self.pos), Some(&SPACE | &TAB | &LF | &CR)) {
375            self.chomp();
376        }
377    }
378
379    /// Returns the byte at the current position without advancing, or `None` at end of input.
380    fn peek(&self) -> Option<u8> {
381        self.bytes.get(self.pos).copied()
382    }
383
384    /// Advances the cursor by one byte.
385    #[inline]
386    fn chomp(&mut self) {
387        self.pos += 1;
388    }
389
390    /// Create and return an `Error`.
391    fn error(&self, kind: ErrorKind) -> Error {
392        let parsed = &self.source[..self.pos];
393        Error {
394            byte_offset: parsed.len(),
395            position: super::line_col(parsed),
396            kind,
397        }
398    }
399
400    /// Returns the byte at the current position and advances past it, or `None` at end of input.
401    fn advance(&mut self) -> Option<u8> {
402        let b = self.bytes.get(self.pos).copied();
403        if b.is_some() {
404            self.chomp();
405        }
406        b
407    }
408
409    /// Asserts that the next byte equals `byte` and advances past it.
410    ///
411    /// Returns [`ErrorKind::UnexpectedChar`] if a different byte is present, or
412    /// [`ErrorKind::UnexpectedEOF`] if the input is exhausted.
413    fn expect_byte(&mut self, byte: u8) -> Result<(), Error> {
414        match self.bytes.get(self.pos) {
415            Some(&b) if b == byte => {
416                self.chomp();
417                Ok(())
418            }
419            Some(_) => Err(self.error(ErrorKind::UnexpectedChar { expected: byte })),
420            None => Err(self.error(ErrorKind::UnexpectedEOF)),
421        }
422    }
423
424    /// Asserts that the next bytes match `literal` byte-for-byte, advancing past them.
425    ///
426    /// Used for JSON keywords (`null`, `true`, `false`). The caller dispatches via
427    /// [`Self::peek`] without consuming the first byte, so `literal` must include it.
428    fn expect_literal(&mut self, literal: &'static [u8]) -> Result<(), Error> {
429        for &expected in literal {
430            match self.advance() {
431                Some(b) if b == expected => {}
432                Some(_) => {
433                    self.pos -= 1;
434                    return Err(self.error(ErrorKind::ExpectedLiteral { expected: literal }));
435                }
436                None => return Err(self.error(ErrorKind::UnexpectedEOF)),
437            }
438        }
439        Ok(())
440    }
441
442    /// Parses one JSON value preceded by optional whitespace and returns a fully-formed [`Element`].
443    ///
444    /// Allocates an [`ElemId`] and records the path `entry` before dispatching to
445    /// [`Self::parse_value_kind`], so child elements produced during that call
446    /// already find this element's id in the table as their parent.
447    fn parse_value(&mut self, entry: PathEntry<'buf>) -> Result<RawElement<'buf>, Error> {
448        self.skip_ws();
449        // ID and table entry are registered before recursing so that child elements
450        // produced by parse_value_kind see this id as their parent.
451        let id = self.alloc_id()?;
452        self.table.push(entry);
453        let start = self.pos;
454        let value = self.parse_value_kind(id)?;
455        let span = Span::new(start as u32, self.pos as u32);
456        Ok(RawElement {
457            id,
458            span,
459            // The parent container extends this past the trailing comma and whitespace
460            // when it exists, so that the element's removal span covers its own separator.
461            full_span_end: span.end,
462            value,
463        })
464    }
465
466    /// Dispatches to the type-specific parser based on the first byte of the value.
467    ///
468    /// `id` is this element's own [`ElemId`], threaded down to [`Self::parse_array`]
469    /// and [`Self::parse_object`] so they can record it as the parent of their children.
470    fn parse_value_kind(&mut self, id: ElemId) -> Result<RawValue<'buf>, Error> {
471        match self
472            .peek()
473            .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
474        {
475            b'n' => {
476                self.expect_literal(NULL)?;
477                Ok(RawValue::Null)
478            }
479            b't' => {
480                self.expect_literal(TRUE)?;
481                Ok(RawValue::True)
482            }
483            b'f' => {
484                self.expect_literal(FALSE)?;
485                Ok(RawValue::False)
486            }
487            QUOTE => Ok(RawValue::String(self.parse_raw_str()?)),
488            MINUS | DIGIT_0..=DIGIT_9 => Ok(RawValue::Number(self.parse_number_str()?)),
489            ARRAY_OPEN => self.parse_array(id),
490            OBJECT_OPEN => self.parse_object(id),
491            _ => Err(self.error(ErrorKind::ExpectedStart)),
492        }
493    }
494
495    /// Parses a JSON number and returns the raw source slice.
496    ///
497    /// Grammar `RFC 8259 s6`:
498    /// ```text
499    /// number = [ '-' ] int [ frac ] [ exp ]
500    /// int    = '0' | [1-9] DIGIT*
501    /// frac   = '.' DIGIT+
502    /// exp    = ('e'|'E') ['+'|'-'] DIGIT+
503    /// ```
504    fn parse_number_str(&mut self) -> Result<&'buf str, Error> {
505        let start = self.pos;
506
507        if self.peek() == Some(MINUS) {
508            self.chomp();
509        }
510
511        match self
512            .peek()
513            .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
514        {
515            // A lone '0' is the only valid integer starting with zero; more digits
516            // after it would be a leading-zero violation (e.g. "01" is invalid JSON).
517            DIGIT_0 => self.chomp(),
518            DIGIT_1..=DIGIT_9 => {
519                while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
520                    self.chomp();
521                }
522            }
523            _ => return Err(self.error(ErrorKind::ExpectedNumeral)),
524        }
525
526        if self.peek() == Some(DECIMAL_POINT) {
527            self.chomp();
528            // At least one digit is required after the decimal point.
529            if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
530                return Err(match self.peek() {
531                    Some(_) => self.error(ErrorKind::ExpectedNumeral),
532                    None => self.error(ErrorKind::UnexpectedEOF),
533                });
534            }
535            while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
536                self.chomp();
537            }
538        }
539
540        if matches!(self.peek(), Some(EXP_LOWER | EXP_UPPER)) {
541            self.chomp();
542            if matches!(self.peek(), Some(PLUS | MINUS)) {
543                self.chomp();
544            }
545            // At least one digit is required after the exponent indicator (and optional sign).
546            if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
547                return Err(match self.peek() {
548                    Some(_) => self.error(ErrorKind::ExpectedNumeral),
549                    None => self.error(ErrorKind::UnexpectedEOF),
550                });
551            }
552            while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
553                self.chomp();
554            }
555        }
556
557        Ok(&self.source[start..self.pos])
558    }
559
560    /// Parses a JSON string and returns a [`RawStr`] with quotes stripped.
561    ///
562    /// Scans for the closing `"` delimiter, skipping one byte after every `\`
563    /// so that `\"` does not terminate the string. Escape sequences and control
564    /// characters are not validated here; callers use [`RawStr::decode_escapes`].
565    fn parse_raw_str(&mut self) -> Result<RawStr<'buf>, Error> {
566        self.expect_byte(QUOTE)?;
567        let content_start = self.pos; // First byte after the opening `"`.
568
569        loop {
570            match self
571                .advance()
572                .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
573            {
574                QUOTE => break,
575                BACKSLASH => {
576                    // Consume whatever follows so that `\"` does not close the string.
577                    self.advance()
578                        .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?;
579                }
580                _ => {}
581            }
582        }
583
584        // `advance()` left `pos` one past the closing '"', so `pos-1` is the '"' itself;
585        // `content_start..pos-1` therefore captures content without either delimiter.
586        Ok(RawStr(&self.source[content_start..self.pos - 1]))
587    }
588
589    /// Parses a JSON array `[...]` and returns [`Value::Array`].
590    ///
591    /// Increments the depth counter before consuming `[` and returns
592    /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
593    fn parse_array(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
594        self.depth += 1;
595        if self.depth > MAX_DEPTH {
596            return Err(self.error(ErrorKind::DepthLimitExceeded));
597        }
598        self.expect_byte(ARRAY_OPEN)?;
599        self.skip_ws();
600        let mut elements: Vec<RawElement<'buf>> = Vec::new();
601
602        if self.peek() != Some(ARRAY_CLOSE) {
603            loop {
604                let entry = PathEntry::Item {
605                    parent: parent_id,
606                    index: elements.len() as u32,
607                };
608                let mut elem = self.parse_value(entry)?;
609                self.skip_ws();
610                match self
611                    .peek()
612                    .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
613                {
614                    COMMA => {
615                        self.chomp();
616                        self.skip_ws();
617                        if self.peek() == Some(ARRAY_CLOSE) {
618                            return Err(self.error(ErrorKind::ExpectedEndArray));
619                        }
620                        // Extend past the comma and leading whitespace of the next sibling
621                        // so that removing this element also removes its own separator.
622                        elem.full_span_end = self.pos as u32;
623                        elements.push(elem);
624                    }
625                    ARRAY_CLOSE => {
626                        elements.push(elem);
627                        break;
628                    }
629                    _ => return Err(self.error(ErrorKind::ExpectedEndArray)),
630                }
631            }
632        }
633
634        self.expect_byte(ARRAY_CLOSE)?;
635        self.depth -= 1;
636        Ok(RawValue::Array(elements))
637    }
638
639    /// Parses a JSON object `{...}` and returns [`Value::Object`].
640    ///
641    /// Increments the depth counter before consuming `{` and returns
642    /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
643    fn parse_object(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
644        self.depth += 1;
645        if self.depth > MAX_DEPTH {
646            return Err(self.error(ErrorKind::DepthLimitExceeded));
647        }
648        self.expect_byte(OBJECT_OPEN)?;
649        self.skip_ws();
650        let mut fields: Vec<RawField<'buf>> = Vec::new();
651
652        if self.peek() != Some(OBJECT_CLOSE) {
653            loop {
654                let key_start = self.pos;
655                let key = self.parse_raw_str()?;
656                let key_span = Span::new(key_start as u32, self.pos as u32);
657                self.skip_ws();
658                self.expect_byte(COLON)?;
659                let entry = PathEntry::Field {
660                    parent: parent_id,
661                    key,
662                };
663                let mut elem = self.parse_value(entry)?;
664                self.skip_ws();
665                match self
666                    .peek()
667                    .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
668                {
669                    COMMA => {
670                        self.chomp();
671                        self.skip_ws();
672                        if self.peek() == Some(OBJECT_CLOSE) {
673                            return Err(self.error(ErrorKind::ExpectedEndObject));
674                        }
675                        // Same as in parse_array: extend past the comma and whitespace
676                        // so that removing this field also removes its own separator.
677                        elem.full_span_end = self.pos as u32;
678                        fields.push(RawField {
679                            key_span,
680                            element: elem,
681                        });
682                    }
683                    OBJECT_CLOSE => {
684                        fields.push(RawField {
685                            key_span,
686                            element: elem,
687                        });
688                        break;
689                    }
690                    _ => return Err(self.error(ErrorKind::ExpectedEndObject)),
691                }
692            }
693        }
694
695        self.expect_byte(OBJECT_CLOSE)?;
696        self.depth -= 1;
697        Ok(RawValue::Object(fields))
698    }
699}
700
701/// Converts a [`RawElement`] tree into the public [`Element`] tree, loading every
702/// node with a clone of `inner` at construction time.
703///
704/// Uses an explicit work stack instead of recursion. Children are pushed in
705/// reverse so they are processed in order; each `Build*` task then pops its
706/// children off `done` and assembles the parent.
707fn into_element<'buf>(raw: RawElement<'buf>, inner: &Rc<DocumentInner<'buf>>) -> Element<'buf> {
708    enum Task<'buf> {
709        Process(RawElement<'buf>),
710        BuildArray {
711            id: ElemId,
712            span: Span,
713            full_span_end: u32,
714            count: usize,
715        },
716        BuildObject {
717            id: ElemId,
718            span: Span,
719            full_span_end: u32,
720            key_spans: Vec<Span>,
721        },
722    }
723
724    let mut work: Vec<Task<'buf>> = vec![Task::Process(raw)];
725    let mut done: Vec<Element<'buf>> = Vec::new();
726
727    while let Some(task) = work.pop() {
728        match task {
729            Task::Process(raw) => {
730                let value = match raw.value {
731                    RawValue::Null => Value::Null,
732                    RawValue::True => Value::True,
733                    RawValue::False => Value::False,
734                    RawValue::String(s) => Value::String(s),
735                    RawValue::Number(n) => Value::Number(n),
736                    RawValue::Array(items) => {
737                        work.push(Task::BuildArray {
738                            id: raw.id,
739                            span: raw.span,
740                            full_span_end: raw.full_span_end,
741                            count: items.len(),
742                        });
743                        for item in items.into_iter().rev() {
744                            work.push(Task::Process(item));
745                        }
746                        continue;
747                    }
748                    RawValue::Object(fields) => {
749                        let key_spans = fields.iter().map(|f| f.key_span).collect();
750                        work.push(Task::BuildObject {
751                            id: raw.id,
752                            span: raw.span,
753                            full_span_end: raw.full_span_end,
754                            key_spans,
755                        });
756                        for field in fields.into_iter().rev() {
757                            work.push(Task::Process(field.element));
758                        }
759                        continue;
760                    }
761                };
762                done.push(Element {
763                    doc: Rc::clone(inner),
764                    id: raw.id,
765                    span: raw.span,
766                    full_span_end: raw.full_span_end,
767                    value,
768                });
769            }
770            Task::BuildArray {
771                id,
772                span,
773                full_span_end,
774                count,
775            } => {
776                let start = done.len() - count;
777                let items: Vec<Element<'buf>> = done.drain(start..).collect();
778                done.push(Element {
779                    doc: Rc::clone(inner),
780                    id,
781                    span,
782                    full_span_end,
783                    value: Value::Array(items),
784                });
785            }
786            Task::BuildObject {
787                id,
788                span,
789                full_span_end,
790                key_spans,
791            } => {
792                let count = key_spans.len();
793                let start = done.len() - count;
794                let elements: Vec<Element<'buf>> = done.drain(start..).collect();
795                let fields = key_spans
796                    .into_iter()
797                    .zip(elements)
798                    .map(|(key_span, element)| Field { key_span, element })
799                    .collect();
800                done.push(Element {
801                    doc: Rc::clone(inner),
802                    id,
803                    span,
804                    full_span_end,
805                    value: Value::Object(fields),
806                });
807            }
808        }
809    }
810
811    // Each Process task produces exactly one element in `done`, either directly
812    // (scalars) or via a Build task (containers). Starting with one root Process
813    // task guarantees exactly one element remains here.
814    done.swap_remove(0)
815}