ocpi_tariffs/json/parser.rs
1//! Hand-rolled single-pass recursive-descent JSON parser.
2//!
3//! # Responsibilities
4//!
5//! The parser is responsible for structural correctness only: balanced delimiters,
6//! valid top-level values, and well-formed numbers. String content is captured as
7//! [`RawStr`] slices of the source — escape sequences and control characters are
8//! left untouched. Validation of string content is the responsibility of
9//! [`crate::json::decode`].
10//!
11//! # Output
12//!
13//! [`parse`] returns a [`Document`] that wraps the root [`Element`] and the shared
14//! [`DocumentInner`]. Every element carries an `Rc<DocumentInner>` so it can resolve
15//! its own path after the [`Document`] has been dropped.
16//!
17//! # Limits
18//!
19//! - **Nesting depth**: capped at [`MAX_DEPTH`] (128 levels). Inputs that exceed this
20//! are rejected with [`ErrorKind::DepthLimitExceeded`].
21//! - **Element count**: capped at `u32::MAX` by the [`ElemId`] counter, enforced via
22//! [`ErrorKind::MaxElements`]. In practice, the 5 megabytes [`string::ReasonableLen`] gate makes
23//! this limit unreachable at the moment.
24//!
25//! # Two-phase construction
26//!
27//! The [`Parser`] builds a private [`RawElement`] tree and a [`PathTable`] in one
28//! pass. Once the full tree is available, [`into_element`] threads the shared
29//! `Rc<DocumentInner>` through every node to produce the public [`Element`] tree.
30//! This keeps the hot parsing loop free of reference-counting overhead.
31
32#![expect(
33 clippy::arithmetic_side_effects,
34 reason = "pos is bounded by source.len() and only advances after a successful byte read; arithmetic is safe within parser state machine invariants"
35)]
36#![expect(
37 clippy::as_conversions,
38 reason = "byte position casts between usize and u32 are safe: usize->u32 is bounded by available memory, u32->usize always fits"
39)]
40#![expect(
41 clippy::cast_possible_truncation,
42 reason = "source length is bounded by available memory, so byte positions always fit in u32"
43)]
44#![expect(
45 clippy::string_slice,
46 reason = "span boundaries are always at ASCII JSON token boundaries, so slices are valid UTF-8"
47)]
48
49#[cfg(test)]
50mod test;
51
52#[cfg(test)]
53mod test_basics;
54
55#[cfg(test)]
56mod test_parser;
57
58#[cfg(test)]
59mod test_type_sizes;
60
61use std::rc::Rc;
62
63use crate::{string, warning};
64
65use super::{
66 Document, DocumentInner, ElemId, Element, Field, Location, PathEntry, PathTable, RawStr, Span,
67 Value,
68};
69
70/// Maximum nesting depth for arrays and objects.
71///
72/// RFC 8259 recommends implementations handle at least 128 levels of nesting.
73/// Inputs that exceed this limit are rejected with [`ErrorKind::DepthLimitExceeded`].
74const MAX_DEPTH: usize = 128;
75
76// JSON whitespace characters `RFC 8259 s2`.
77const SPACE: u8 = b' ';
78const TAB: u8 = b'\t';
79const LF: u8 = b'\n';
80const CR: u8 = b'\r';
81
82// Structural characters `RFC 8259 s2`.
83const QUOTE: u8 = b'"';
84const BACKSLASH: u8 = b'\\';
85const COMMA: u8 = b',';
86const COLON: u8 = b':';
87const ARRAY_OPEN: u8 = b'[';
88const ARRAY_CLOSE: u8 = b']';
89const OBJECT_OPEN: u8 = b'{';
90const OBJECT_CLOSE: u8 = b'}';
91
92// Number-grammar characters `RFC 8259 s6`.
93const MINUS: u8 = b'-';
94const PLUS: u8 = b'+';
95const DECIMAL_POINT: u8 = b'.';
96const EXP_LOWER: u8 = b'e';
97const EXP_UPPER: u8 = b'E';
98const DIGIT_0: u8 = b'0';
99const DIGIT_1: u8 = b'1';
100const DIGIT_9: u8 = b'9';
101
102// JSON keyword literals `RFC 8259 s3`.
103const NULL: &[u8] = b"null";
104const TRUE: &[u8] = b"true";
105const FALSE: &[u8] = b"false";
106
107// UTF-8 BOM (`U+FEFF`, encoded as 0xEF 0xBB 0xBF).
108const BOM: &[u8; 3] = b"\xEF\xBB\xBF";
109
110/// Parse a JSON document from `source`.
111///
112/// All string content in the returned tree borrows from `source`.
113/// Call `element.path()` on any element to obtain its RFC 9535 path.
114pub(crate) fn parse(source: string::ReasonableLen<'_>) -> Result<Document<'_>, Error> {
115 let mut p = Parser::new(source.into_inner());
116 // Skip a UTF-8 BOM (`U+FEFF` encoded as 0xEF 0xBB 0xBF) if present.
117 if p.bytes.starts_with(BOM) {
118 p.pos = BOM.len();
119 }
120 let raw_root = p.parse_value(PathEntry::Root)?;
121 p.skip_ws();
122 if p.pos < p.bytes.len() {
123 return Err(p.error(ErrorKind::TrailingContent));
124 }
125 let inner = Rc::new(DocumentInner {
126 source: source.into_inner(),
127 paths: p.table,
128 });
129 let root = into_element(raw_root, &inner);
130 Ok(Document { inner, root })
131}
132
133/// A parse error produced when the input is not well-formed JSON.
134///
135/// Carries the byte offset and line/column position of the failure, and an
136/// [`ErrorKind`] that describes what was wrong.
137#[derive(Debug)]
138pub struct Error {
139 /// Byte offset of the error location.
140 byte_offset: usize,
141 /// A file location expressed as line and column.
142 position: Location,
143 /// The details about the error that occurred.
144 kind: ErrorKind,
145}
146
147impl Error {
148 /// Byte offset of the error location.
149 pub fn byte_offset(&self) -> usize {
150 self.byte_offset
151 }
152
153 /// Return a reference to the details about the error that occurred.
154 pub fn kind(&self) -> &ErrorKind {
155 &self.kind
156 }
157
158 /// Consume the `Error` and return the details about the error that occurred.
159 pub fn into_kind(self) -> ErrorKind {
160 self.kind
161 }
162
163 /// Consume the `Error` and return the byte offset of the error location and the details of the
164 /// error that occurred.
165 pub fn into_parts(self) -> (usize, ErrorKind) {
166 (self.byte_offset, self.kind)
167 }
168}
169
170/// The specific reason a [`parse`] call failed.
171#[derive(Debug, Eq, PartialEq)]
172pub enum ErrorKind {
173 /// A character that cannot be a JSON number was encountered.
174 ExpectedNumeral,
175 /// A character that cannot start a JSON value was encountered.
176 ExpectedStart,
177 /// A literal that cannot start or continue a JSON value was encountered.
178 ExpectedLiteral { expected: &'static [u8] },
179 /// An array wasn't terminated correctly.
180 ExpectedEndArray,
181 /// An object wasn't terminated correctly.
182 ExpectedEndObject,
183 /// A character that cannot continue a JSON value was encountered.
184 UnexpectedChar { expected: u8 },
185 /// The input ended before the value was complete.
186 UnexpectedEOF,
187 /// Non-whitespace bytes follow the root value.
188 TrailingContent,
189 /// The input exceeds the maximum supported nesting depth.
190 DepthLimitExceeded,
191 /// The input contains more than [`u32::MAX`] JSON elements.
192 MaxElements,
193}
194
195impl crate::Warning for Error {
196 fn id(&self) -> warning::Id {
197 let s = match self.kind {
198 ErrorKind::ExpectedNumeral => "expected_numeral",
199 ErrorKind::ExpectedStart => "expected_start",
200 ErrorKind::ExpectedLiteral { .. } => "expected_literal",
201 ErrorKind::ExpectedEndArray => "expected_end_array",
202 ErrorKind::ExpectedEndObject => "expected_end_object",
203 ErrorKind::UnexpectedChar { .. } => "unexpected_char",
204 ErrorKind::UnexpectedEOF => "unexpected_eof",
205 ErrorKind::TrailingContent => "trailing_content",
206 ErrorKind::DepthLimitExceeded => "depth_limit_exceeded",
207 ErrorKind::MaxElements => "max_elements",
208 };
209
210 warning::Id::from_static(s)
211 }
212}
213
214impl std::fmt::Display for Error {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 let Self {
217 byte_offset,
218 position,
219 kind,
220 } = self;
221
222 match kind {
223 ErrorKind::ExpectedLiteral { expected } => {
224 write!(
225 f,
226 "unexpected literal found at line: `{position}`, byte `{byte_offset}`; expected: `{expected:?}`"
227 )
228 }
229 ErrorKind::ExpectedNumeral => {
230 write!(
231 f,
232 "unexpected numeral found at line: `{position}`, byte `{byte_offset}`; expected: `0-9`"
233 )
234 }
235 ErrorKind::ExpectedStart => {
236 write!(
237 f,
238 "unexpected start character found at line: `{position}`, byte `{byte_offset}`; expected one of: `[n, t, f, \", -, 0-9, [, {{]`"
239 )
240 }
241 ErrorKind::ExpectedEndArray => {
242 write!(
243 f,
244 "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `]`"
245 )
246 }
247 ErrorKind::ExpectedEndObject => {
248 write!(
249 f,
250 "unexpected character found at line: `{position}`, byte `{byte_offset}`; expected: `,` or `}}`"
251 )
252 }
253 ErrorKind::UnexpectedChar { expected } => {
254 write!(
255 f,
256 "unexpected character `{expected}` found at line: `{position}`, byte `{byte_offset}``"
257 )
258 }
259 ErrorKind::UnexpectedEOF => write!(
260 f,
261 "unexpected end of input found at line: `{position}`, byte `{byte_offset}`"
262 ),
263 ErrorKind::TrailingContent => write!(
264 f,
265 "trailing content found at line: `{position}`, byte `{byte_offset}`"
266 ),
267 ErrorKind::DepthLimitExceeded => {
268 write!(f, "nesting depth exceeds the {MAX_DEPTH}-level limit")
269 }
270 ErrorKind::MaxElements => write!(f, "document exceeds {} JSON elements", u32::MAX),
271 }
272 }
273}
274
275impl std::error::Error for Error {}
276
277/// Parser-private element tree; carries no reference to [`DocumentInner`].
278///
279/// Converted to the public [`Element`] tree by [`into_element`] after
280/// [`DocumentInner`] is constructed.
281struct RawElement<'buf> {
282 /// Unique identifier within the document; sequentially assigned depth-first.
283 id: ElemId,
284 /// Byte range of the value only; use for replacement edits.
285 span: Span,
286 /// End of the value plus any trailing comma and whitespace; use for removal edits.
287 /// Equal to `span.end` when there is no trailing comma (root element, or last sibling).
288 full_span_end: u32,
289 /// Parsed value, borrowing from the source `&str`.
290 value: RawValue<'buf>,
291}
292
293/// The parsed content of a [`RawElement`], mirroring [`Value`] but without [`DocumentInner`].
294///
295/// Strings are kept as [`RawStr`] slices of the source; numbers are kept as
296/// raw `&str` slices. Both are converted to their public forms by [`into_element`].
297enum RawValue<'buf> {
298 /// JSON `null` literal.
299 Null,
300 /// JSON `true` literal.
301 True,
302 /// JSON `false` literal.
303 False,
304 /// String content with quotes removed; escape sequences are not decoded.
305 String(RawStr<'buf>),
306 /// Raw number text; not guaranteed to fit any specific numeric type.
307 Number(&'buf str),
308 /// Ordered list of child elements.
309 Array(Vec<RawElement<'buf>>),
310 /// Ordered list of key-value fields.
311 Object(Vec<RawField<'buf>>),
312}
313
314/// A key-value pair inside a JSON object, mirroring [`Field`] but without [`DocumentInner`].
315///
316/// `key_span` covers the quoted key bytes in the source, including the surrounding
317/// double-quotes. The value is stored as a [`RawElement`].
318struct RawField<'buf> {
319 /// Span of the key token, including surrounding `"` delimiters.
320 key_span: Span,
321 /// The value element; its path ends with the key from `key_span`.
322 element: RawElement<'buf>,
323}
324
325/// Single-pass recursive-descent JSON parser.
326///
327/// Holds all mutable state for one parse: the source string, a byte cursor, an
328/// [`ElemId`] counter, the [`PathTable`] being built, and a nesting-depth guard.
329///
330/// Call [`Parser::new`] to create an instance, then [`Parser::parse_value`] to
331/// drive the parse. The result is a [`RawElement`] tree; pass it together with
332/// the completed [`PathTable`] to [`into_element`] to obtain the public
333/// [`Element`] tree with shared [`DocumentInner`] attached.
334struct Parser<'buf> {
335 /// The full source string; all span byte positions are relative to this.
336 source: &'buf str,
337 /// Byte view of `source`; used for index-based reads without UTF-8 overhead.
338 bytes: &'buf [u8],
339 /// Current read position in bytes.
340 pos: usize,
341 /// Counter for assigning sequential [`ElemId`]s depth-first.
342 next_id: usize,
343 /// Path table being built as elements are parsed.
344 table: PathTable<'buf>,
345 /// Current nesting depth; checked against [`MAX_DEPTH`] on each container open.
346 depth: usize,
347}
348
349impl<'buf> Parser<'buf> {
350 /// Creates a `Parser` that will read from `source`.
351 fn new(source: &'buf str) -> Self {
352 Self {
353 source,
354 bytes: source.as_bytes(),
355 pos: 0,
356 next_id: 0,
357 table: PathTable::default(),
358 depth: 0,
359 }
360 }
361
362 /// Allocates the next sequential [`ElemId`] and advances the counter.
363 fn alloc_id(&mut self) -> Result<ElemId, Error> {
364 let id = ElemId(self.next_id);
365 self.next_id = self
366 .next_id
367 .checked_add(1)
368 .ok_or_else(|| self.error(ErrorKind::MaxElements))?;
369 Ok(id)
370 }
371
372 /// Advances past any JSON whitespace (`space`, `tab`, `CR`, `LF`) at the current position.
373 fn skip_ws(&mut self) {
374 while matches!(self.bytes.get(self.pos), Some(&SPACE | &TAB | &LF | &CR)) {
375 self.chomp();
376 }
377 }
378
379 /// Returns the byte at the current position without advancing, or `None` at end of input.
380 fn peek(&self) -> Option<u8> {
381 self.bytes.get(self.pos).copied()
382 }
383
384 /// Advances the cursor by one byte.
385 #[inline]
386 fn chomp(&mut self) {
387 self.pos += 1;
388 }
389
390 /// Create and return an `Error`.
391 fn error(&self, kind: ErrorKind) -> Error {
392 let parsed = &self.source[..self.pos];
393 Error {
394 byte_offset: parsed.len(),
395 position: super::line_col(parsed),
396 kind,
397 }
398 }
399
400 /// Returns the byte at the current position and advances past it, or `None` at end of input.
401 fn advance(&mut self) -> Option<u8> {
402 let b = self.bytes.get(self.pos).copied();
403 if b.is_some() {
404 self.chomp();
405 }
406 b
407 }
408
409 /// Asserts that the next byte equals `byte` and advances past it.
410 ///
411 /// Returns [`ErrorKind::UnexpectedChar`] if a different byte is present, or
412 /// [`ErrorKind::UnexpectedEOF`] if the input is exhausted.
413 fn expect_byte(&mut self, byte: u8) -> Result<(), Error> {
414 match self.bytes.get(self.pos) {
415 Some(&b) if b == byte => {
416 self.chomp();
417 Ok(())
418 }
419 Some(_) => Err(self.error(ErrorKind::UnexpectedChar { expected: byte })),
420 None => Err(self.error(ErrorKind::UnexpectedEOF)),
421 }
422 }
423
424 /// Asserts that the next bytes match `literal` byte-for-byte, advancing past them.
425 ///
426 /// Used for JSON keywords (`null`, `true`, `false`). The caller dispatches via
427 /// [`Self::peek`] without consuming the first byte, so `literal` must include it.
428 fn expect_literal(&mut self, literal: &'static [u8]) -> Result<(), Error> {
429 for &expected in literal {
430 match self.advance() {
431 Some(b) if b == expected => {}
432 Some(_) => {
433 self.pos -= 1;
434 return Err(self.error(ErrorKind::ExpectedLiteral { expected: literal }));
435 }
436 None => return Err(self.error(ErrorKind::UnexpectedEOF)),
437 }
438 }
439 Ok(())
440 }
441
442 /// Parses one JSON value preceded by optional whitespace and returns a fully-formed [`Element`].
443 ///
444 /// Allocates an [`ElemId`] and records the path `entry` before dispatching to
445 /// [`Self::parse_value_kind`], so child elements produced during that call
446 /// already find this element's id in the table as their parent.
447 fn parse_value(&mut self, entry: PathEntry<'buf>) -> Result<RawElement<'buf>, Error> {
448 self.skip_ws();
449 // ID and table entry are registered before recursing so that child elements
450 // produced by parse_value_kind see this id as their parent.
451 let id = self.alloc_id()?;
452 self.table.push(entry);
453 let start = self.pos;
454 let value = self.parse_value_kind(id)?;
455 let span = Span::new(start as u32, self.pos as u32);
456 Ok(RawElement {
457 id,
458 span,
459 // The parent container extends this past the trailing comma and whitespace
460 // when it exists, so that the element's removal span covers its own separator.
461 full_span_end: span.end,
462 value,
463 })
464 }
465
466 /// Dispatches to the type-specific parser based on the first byte of the value.
467 ///
468 /// `id` is this element's own [`ElemId`], threaded down to [`Self::parse_array`]
469 /// and [`Self::parse_object`] so they can record it as the parent of their children.
470 fn parse_value_kind(&mut self, id: ElemId) -> Result<RawValue<'buf>, Error> {
471 match self
472 .peek()
473 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
474 {
475 b'n' => {
476 self.expect_literal(NULL)?;
477 Ok(RawValue::Null)
478 }
479 b't' => {
480 self.expect_literal(TRUE)?;
481 Ok(RawValue::True)
482 }
483 b'f' => {
484 self.expect_literal(FALSE)?;
485 Ok(RawValue::False)
486 }
487 QUOTE => Ok(RawValue::String(self.parse_raw_str()?)),
488 MINUS | DIGIT_0..=DIGIT_9 => Ok(RawValue::Number(self.parse_number_str()?)),
489 ARRAY_OPEN => self.parse_array(id),
490 OBJECT_OPEN => self.parse_object(id),
491 _ => Err(self.error(ErrorKind::ExpectedStart)),
492 }
493 }
494
495 /// Parses a JSON number and returns the raw source slice.
496 ///
497 /// Grammar `RFC 8259 s6`:
498 /// ```text
499 /// number = [ '-' ] int [ frac ] [ exp ]
500 /// int = '0' | [1-9] DIGIT*
501 /// frac = '.' DIGIT+
502 /// exp = ('e'|'E') ['+'|'-'] DIGIT+
503 /// ```
504 fn parse_number_str(&mut self) -> Result<&'buf str, Error> {
505 let start = self.pos;
506
507 if self.peek() == Some(MINUS) {
508 self.chomp();
509 }
510
511 match self
512 .peek()
513 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
514 {
515 // A lone '0' is the only valid integer starting with zero; more digits
516 // after it would be a leading-zero violation (e.g. "01" is invalid JSON).
517 DIGIT_0 => self.chomp(),
518 DIGIT_1..=DIGIT_9 => {
519 while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
520 self.chomp();
521 }
522 }
523 _ => return Err(self.error(ErrorKind::ExpectedNumeral)),
524 }
525
526 if self.peek() == Some(DECIMAL_POINT) {
527 self.chomp();
528 // At least one digit is required after the decimal point.
529 if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
530 return Err(match self.peek() {
531 Some(_) => self.error(ErrorKind::ExpectedNumeral),
532 None => self.error(ErrorKind::UnexpectedEOF),
533 });
534 }
535 while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
536 self.chomp();
537 }
538 }
539
540 if matches!(self.peek(), Some(EXP_LOWER | EXP_UPPER)) {
541 self.chomp();
542 if matches!(self.peek(), Some(PLUS | MINUS)) {
543 self.chomp();
544 }
545 // At least one digit is required after the exponent indicator (and optional sign).
546 if !matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
547 return Err(match self.peek() {
548 Some(_) => self.error(ErrorKind::ExpectedNumeral),
549 None => self.error(ErrorKind::UnexpectedEOF),
550 });
551 }
552 while matches!(self.peek(), Some(DIGIT_0..=DIGIT_9)) {
553 self.chomp();
554 }
555 }
556
557 Ok(&self.source[start..self.pos])
558 }
559
560 /// Parses a JSON string and returns a [`RawStr`] with quotes stripped.
561 ///
562 /// Scans for the closing `"` delimiter, skipping one byte after every `\`
563 /// so that `\"` does not terminate the string. Escape sequences and control
564 /// characters are not validated here; callers use [`RawStr::decode_escapes`].
565 fn parse_raw_str(&mut self) -> Result<RawStr<'buf>, Error> {
566 self.expect_byte(QUOTE)?;
567 let content_start = self.pos; // First byte after the opening `"`.
568
569 loop {
570 match self
571 .advance()
572 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
573 {
574 QUOTE => break,
575 BACKSLASH => {
576 // Consume whatever follows so that `\"` does not close the string.
577 self.advance()
578 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?;
579 }
580 _ => {}
581 }
582 }
583
584 // `advance()` left `pos` one past the closing '"', so `pos-1` is the '"' itself;
585 // `content_start..pos-1` therefore captures content without either delimiter.
586 Ok(RawStr(&self.source[content_start..self.pos - 1]))
587 }
588
589 /// Parses a JSON array `[...]` and returns [`Value::Array`].
590 ///
591 /// Increments the depth counter before consuming `[` and returns
592 /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
593 fn parse_array(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
594 self.depth += 1;
595 if self.depth > MAX_DEPTH {
596 return Err(self.error(ErrorKind::DepthLimitExceeded));
597 }
598 self.expect_byte(ARRAY_OPEN)?;
599 self.skip_ws();
600 let mut elements: Vec<RawElement<'buf>> = Vec::new();
601
602 if self.peek() != Some(ARRAY_CLOSE) {
603 loop {
604 let entry = PathEntry::Item {
605 parent: parent_id,
606 index: elements.len() as u32,
607 };
608 let mut elem = self.parse_value(entry)?;
609 self.skip_ws();
610 match self
611 .peek()
612 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
613 {
614 COMMA => {
615 self.chomp();
616 self.skip_ws();
617 if self.peek() == Some(ARRAY_CLOSE) {
618 return Err(self.error(ErrorKind::ExpectedEndArray));
619 }
620 // Extend past the comma and leading whitespace of the next sibling
621 // so that removing this element also removes its own separator.
622 elem.full_span_end = self.pos as u32;
623 elements.push(elem);
624 }
625 ARRAY_CLOSE => {
626 elements.push(elem);
627 break;
628 }
629 _ => return Err(self.error(ErrorKind::ExpectedEndArray)),
630 }
631 }
632 }
633
634 self.expect_byte(ARRAY_CLOSE)?;
635 self.depth -= 1;
636 Ok(RawValue::Array(elements))
637 }
638
639 /// Parses a JSON object `{...}` and returns [`Value::Object`].
640 ///
641 /// Increments the depth counter before consuming `{` and returns
642 /// [`ErrorKind::DepthLimitExceeded`] if the limit is exceeded.
643 fn parse_object(&mut self, parent_id: ElemId) -> Result<RawValue<'buf>, Error> {
644 self.depth += 1;
645 if self.depth > MAX_DEPTH {
646 return Err(self.error(ErrorKind::DepthLimitExceeded));
647 }
648 self.expect_byte(OBJECT_OPEN)?;
649 self.skip_ws();
650 let mut fields: Vec<RawField<'buf>> = Vec::new();
651
652 if self.peek() != Some(OBJECT_CLOSE) {
653 loop {
654 let key_start = self.pos;
655 let key = self.parse_raw_str()?;
656 let key_span = Span::new(key_start as u32, self.pos as u32);
657 self.skip_ws();
658 self.expect_byte(COLON)?;
659 let entry = PathEntry::Field {
660 parent: parent_id,
661 key,
662 };
663 let mut elem = self.parse_value(entry)?;
664 self.skip_ws();
665 match self
666 .peek()
667 .ok_or_else(|| self.error(ErrorKind::UnexpectedEOF))?
668 {
669 COMMA => {
670 self.chomp();
671 self.skip_ws();
672 if self.peek() == Some(OBJECT_CLOSE) {
673 return Err(self.error(ErrorKind::ExpectedEndObject));
674 }
675 // Same as in parse_array: extend past the comma and whitespace
676 // so that removing this field also removes its own separator.
677 elem.full_span_end = self.pos as u32;
678 fields.push(RawField {
679 key_span,
680 element: elem,
681 });
682 }
683 OBJECT_CLOSE => {
684 fields.push(RawField {
685 key_span,
686 element: elem,
687 });
688 break;
689 }
690 _ => return Err(self.error(ErrorKind::ExpectedEndObject)),
691 }
692 }
693 }
694
695 self.expect_byte(OBJECT_CLOSE)?;
696 self.depth -= 1;
697 Ok(RawValue::Object(fields))
698 }
699}
700
701/// Converts a [`RawElement`] tree into the public [`Element`] tree, loading every
702/// node with a clone of `inner` at construction time.
703///
704/// Uses an explicit work stack instead of recursion. Children are pushed in
705/// reverse so they are processed in order; each `Build*` task then pops its
706/// children off `done` and assembles the parent.
707fn into_element<'buf>(raw: RawElement<'buf>, inner: &Rc<DocumentInner<'buf>>) -> Element<'buf> {
708 enum Task<'buf> {
709 Process(RawElement<'buf>),
710 BuildArray {
711 id: ElemId,
712 span: Span,
713 full_span_end: u32,
714 count: usize,
715 },
716 BuildObject {
717 id: ElemId,
718 span: Span,
719 full_span_end: u32,
720 key_spans: Vec<Span>,
721 },
722 }
723
724 let mut work: Vec<Task<'buf>> = vec![Task::Process(raw)];
725 let mut done: Vec<Element<'buf>> = Vec::new();
726
727 while let Some(task) = work.pop() {
728 match task {
729 Task::Process(raw) => {
730 let value = match raw.value {
731 RawValue::Null => Value::Null,
732 RawValue::True => Value::True,
733 RawValue::False => Value::False,
734 RawValue::String(s) => Value::String(s),
735 RawValue::Number(n) => Value::Number(n),
736 RawValue::Array(items) => {
737 work.push(Task::BuildArray {
738 id: raw.id,
739 span: raw.span,
740 full_span_end: raw.full_span_end,
741 count: items.len(),
742 });
743 for item in items.into_iter().rev() {
744 work.push(Task::Process(item));
745 }
746 continue;
747 }
748 RawValue::Object(fields) => {
749 let key_spans = fields.iter().map(|f| f.key_span).collect();
750 work.push(Task::BuildObject {
751 id: raw.id,
752 span: raw.span,
753 full_span_end: raw.full_span_end,
754 key_spans,
755 });
756 for field in fields.into_iter().rev() {
757 work.push(Task::Process(field.element));
758 }
759 continue;
760 }
761 };
762 done.push(Element {
763 doc: Rc::clone(inner),
764 id: raw.id,
765 span: raw.span,
766 full_span_end: raw.full_span_end,
767 value,
768 });
769 }
770 Task::BuildArray {
771 id,
772 span,
773 full_span_end,
774 count,
775 } => {
776 let start = done.len() - count;
777 let items: Vec<Element<'buf>> = done.drain(start..).collect();
778 done.push(Element {
779 doc: Rc::clone(inner),
780 id,
781 span,
782 full_span_end,
783 value: Value::Array(items),
784 });
785 }
786 Task::BuildObject {
787 id,
788 span,
789 full_span_end,
790 key_spans,
791 } => {
792 let count = key_spans.len();
793 let start = done.len() - count;
794 let elements: Vec<Element<'buf>> = done.drain(start..).collect();
795 let fields = key_spans
796 .into_iter()
797 .zip(elements)
798 .map(|(key_span, element)| Field { key_span, element })
799 .collect();
800 done.push(Element {
801 doc: Rc::clone(inner),
802 id,
803 span,
804 full_span_end,
805 value: Value::Object(fields),
806 });
807 }
808 }
809 }
810
811 // Each Process task produces exactly one element in `done`, either directly
812 // (scalars) or via a Build task (containers). Starting with one root Process
813 // task guarantees exactly one element remains here.
814 done.swap_remove(0)
815}