Skip to main content

facet_json/
parser.rs

1extern crate alloc;
2
3use alloc::{borrow::Cow, collections::VecDeque, format, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7    ContainerKind, DeserializeErrorKind, FieldKey, FieldLocationHint, FormatParser, ParseError,
8    ParseEvent, ParseEventKind, SavePoint, ScalarValue,
9};
10use facet_reflect::Span;
11
12use crate::scanner::{self, ParsedNumber, ScanError, ScanErrorKind, Scanner, Token as ScanToken};
13
14/// Convert a ScanError to a ParseError.
15fn scan_error_to_parse_error(err: ScanError) -> ParseError {
16    let kind = match err.kind {
17        ScanErrorKind::UnexpectedChar(ch) => DeserializeErrorKind::UnexpectedChar {
18            ch,
19            expected: "valid JSON token",
20        },
21        ScanErrorKind::UnexpectedEof(expected) => DeserializeErrorKind::UnexpectedEof { expected },
22        ScanErrorKind::InvalidUtf8 => DeserializeErrorKind::InvalidUtf8 {
23            context: [0u8; 16],
24            context_len: 0,
25        },
26    };
27    ParseError::new(err.span, kind)
28}
29
30/// Materialized token ready for the parser.
31#[derive(Debug, Clone)]
32pub struct MaterializedToken<'de> {
33    pub kind: TokenKind<'de>,
34    pub span: Span,
35}
36
37#[derive(Debug, Clone)]
38pub enum TokenKind<'de> {
39    ObjectStart,
40    ObjectEnd,
41    ArrayStart,
42    ArrayEnd,
43    Colon,
44    Comma,
45    Null,
46    True,
47    False,
48    String(Cow<'de, str>),
49    U64(u64),
50    I64(i64),
51    U128(u128),
52    I128(i128),
53    F64(f64),
54    Eof,
55}
56
57/// Mutable parser state that can be saved and restored.
58#[derive(Clone)]
59struct ParserState<'de> {
60    /// Stack tracking nested containers.
61    stack: Vec<ContextState>,
62    /// Cached event for `peek_event`.
63    event_peek: Option<ParseEvent<'de>>,
64    /// Start offset of the peeked event's first token (for capture_raw).
65    peek_start_offset: Option<usize>,
66    /// Whether the root value has started.
67    root_started: bool,
68    /// Whether the root value has fully completed.
69    root_complete: bool,
70    /// Offset of the last token's start (span.offset).
71    last_token_start: usize,
72    /// Scanner position (for save/restore).
73    scanner_pos: usize,
74}
75
76/// JSON parser using Scanner directly (no adapter layer).
77///
78/// The const generic `TRUSTED_UTF8` controls UTF-8 validation:
79/// - `TRUSTED_UTF8=true`: skip UTF-8 validation (input came from `&str`)
80/// - `TRUSTED_UTF8=false`: validate UTF-8 (input came from `&[u8]`)
81pub struct JsonParser<'de, const TRUSTED_UTF8: bool = false> {
82    input: &'de [u8],
83    scanner: Scanner,
84    state: ParserState<'de>,
85    /// Counter for save points.
86    save_counter: u64,
87    /// Saved states for restore functionality.
88    saved_states: Vec<(u64, ParserState<'de>)>,
89}
90
91#[derive(Debug, Clone)]
92enum ContextState {
93    Object(ObjectState),
94    Array(ArrayState),
95}
96
97#[derive(Debug, Clone, Copy)]
98enum ObjectState {
99    KeyOrEnd,
100    Value,
101    CommaOrEnd,
102}
103
104#[derive(Debug, Clone, Copy)]
105enum ArrayState {
106    ValueOrEnd,
107    CommaOrEnd,
108}
109
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111enum DelimKind {
112    Object,
113    Array,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum NextAction {
118    ObjectKey,
119    ObjectValue,
120    ObjectComma,
121    ArrayValue,
122    ArrayComma,
123    RootValue,
124    RootFinished,
125}
126
127impl<'de, const TRUSTED_UTF8: bool> JsonParser<'de, TRUSTED_UTF8> {
128    pub fn new(input: &'de [u8]) -> Self {
129        Self {
130            input,
131            scanner: Scanner::new(),
132            state: ParserState {
133                stack: Vec::new(),
134                event_peek: None,
135                peek_start_offset: None,
136                root_started: false,
137                root_complete: false,
138                last_token_start: 0,
139                scanner_pos: 0,
140            },
141            save_counter: 0,
142            saved_states: Vec::new(),
143        }
144    }
145
146    /// Create a JSONC parser that accepts `//` and `/* */` comments.
147    pub fn new_jsonc(input: &'de [u8]) -> Self {
148        Self {
149            input,
150            scanner: Scanner::new_with_comments(),
151            state: ParserState {
152                stack: Vec::new(),
153                event_peek: None,
154                peek_start_offset: None,
155                root_started: false,
156                root_complete: false,
157                last_token_start: 0,
158                scanner_pos: 0,
159            },
160            save_counter: 0,
161            saved_states: Vec::new(),
162        }
163    }
164
165    /// Scan and materialize the next token directly.
166    #[inline]
167    fn consume_token(&mut self) -> Result<MaterializedToken<'de>, ParseError> {
168        let mut spanned = self
169            .scanner
170            .next_token(self.input)
171            .map_err(scan_error_to_parse_error)?;
172
173        // Handle NeedMore by finalizing - we have full input so this is true EOF
174        if matches!(spanned.token, ScanToken::NeedMore { .. }) {
175            spanned = self
176                .scanner
177                .finalize_at_eof(self.input)
178                .map_err(scan_error_to_parse_error)?;
179        }
180
181        self.state.last_token_start = spanned.span.offset as usize;
182        self.state.scanner_pos = self.scanner.pos();
183
184        let kind = match spanned.token {
185            ScanToken::ObjectStart => TokenKind::ObjectStart,
186            ScanToken::ObjectEnd => TokenKind::ObjectEnd,
187            ScanToken::ArrayStart => TokenKind::ArrayStart,
188            ScanToken::ArrayEnd => TokenKind::ArrayEnd,
189            ScanToken::Colon => TokenKind::Colon,
190            ScanToken::Comma => TokenKind::Comma,
191            ScanToken::Null => TokenKind::Null,
192            ScanToken::True => TokenKind::True,
193            ScanToken::False => TokenKind::False,
194            ScanToken::String {
195                start,
196                end,
197                has_escapes,
198            } => {
199                let s = if !has_escapes {
200                    if TRUSTED_UTF8 {
201                        // SAFETY: Caller guarantees input is valid UTF-8
202                        unsafe { scanner::decode_string_borrowed_unchecked(self.input, start, end) }
203                            .map(Cow::Borrowed)
204                            .ok_or_else(|| {
205                                ParseError::new(
206                                    spanned.span,
207                                    DeserializeErrorKind::InvalidUtf8 {
208                                        context: [0u8; 16],
209                                        context_len: 0,
210                                    },
211                                )
212                            })?
213                    } else {
214                        scanner::decode_string_borrowed(self.input, start, end)
215                            .map(Cow::Borrowed)
216                            .ok_or_else(|| {
217                                ParseError::new(
218                                    spanned.span,
219                                    DeserializeErrorKind::InvalidUtf8 {
220                                        context: [0u8; 16],
221                                        context_len: 0,
222                                    },
223                                )
224                            })?
225                    }
226                } else if TRUSTED_UTF8 {
227                    // SAFETY: Caller guarantees input is valid UTF-8
228                    Cow::Owned(
229                        unsafe { scanner::decode_string_owned_unchecked(self.input, start, end) }
230                            .map_err(scan_error_to_parse_error)?,
231                    )
232                } else {
233                    Cow::Owned(
234                        scanner::decode_string_owned(self.input, start, end)
235                            .map_err(scan_error_to_parse_error)?,
236                    )
237                };
238                TokenKind::String(s)
239            }
240            ScanToken::Number { start, end, hint } => {
241                let parsed = if TRUSTED_UTF8 {
242                    // SAFETY: Input came from &str, so it's valid UTF-8
243                    unsafe { scanner::parse_number_unchecked(self.input, start, end, hint) }
244                } else {
245                    scanner::parse_number(self.input, start, end, hint)
246                }
247                .map_err(scan_error_to_parse_error)?;
248                match parsed {
249                    ParsedNumber::U64(n) => TokenKind::U64(n),
250                    ParsedNumber::I64(n) => TokenKind::I64(n),
251                    ParsedNumber::U128(n) => TokenKind::U128(n),
252                    ParsedNumber::I128(n) => TokenKind::I128(n),
253                    ParsedNumber::F64(n) => TokenKind::F64(n),
254                }
255            }
256            ScanToken::Eof => TokenKind::Eof,
257            ScanToken::NeedMore { .. } => unreachable!("handled above"),
258        };
259
260        Ok(MaterializedToken {
261            kind,
262            span: spanned.span,
263        })
264    }
265
266    fn expect_colon(&mut self) -> Result<(), ParseError> {
267        let token = self.consume_token()?;
268        if !matches!(token.kind, TokenKind::Colon) {
269            return Err(self.unexpected(&token, "':'"));
270        }
271        Ok(())
272    }
273
274    fn parse_value_start_with_token(
275        &mut self,
276        first: Option<MaterializedToken<'de>>,
277    ) -> Result<ParseEvent<'de>, ParseError> {
278        let token = match first {
279            Some(tok) => tok,
280            None => self.consume_token()?,
281        };
282
283        self.state.root_started = true;
284
285        let span = token.span;
286        match token.kind {
287            TokenKind::ObjectStart => {
288                self.state
289                    .stack
290                    .push(ContextState::Object(ObjectState::KeyOrEnd));
291                Ok(ParseEvent::new(
292                    ParseEventKind::StructStart(ContainerKind::Object),
293                    span,
294                ))
295            }
296            TokenKind::ArrayStart => {
297                self.state
298                    .stack
299                    .push(ContextState::Array(ArrayState::ValueOrEnd));
300                Ok(ParseEvent::new(
301                    ParseEventKind::SequenceStart(ContainerKind::Array),
302                    span,
303                ))
304            }
305            TokenKind::String(s) => {
306                let event = ParseEvent::new(ParseEventKind::Scalar(ScalarValue::Str(s)), span);
307                self.finish_value_in_parent();
308                Ok(event)
309            }
310            TokenKind::True => {
311                self.finish_value_in_parent();
312                Ok(ParseEvent::new(
313                    ParseEventKind::Scalar(ScalarValue::Bool(true)),
314                    span,
315                ))
316            }
317            TokenKind::False => {
318                self.finish_value_in_parent();
319                Ok(ParseEvent::new(
320                    ParseEventKind::Scalar(ScalarValue::Bool(false)),
321                    span,
322                ))
323            }
324            TokenKind::Null => {
325                self.finish_value_in_parent();
326                Ok(ParseEvent::new(
327                    ParseEventKind::Scalar(ScalarValue::Null),
328                    span,
329                ))
330            }
331            TokenKind::U64(n) => {
332                self.finish_value_in_parent();
333                Ok(ParseEvent::new(
334                    ParseEventKind::Scalar(ScalarValue::U64(n)),
335                    span,
336                ))
337            }
338            TokenKind::I64(n) => {
339                self.finish_value_in_parent();
340                Ok(ParseEvent::new(
341                    ParseEventKind::Scalar(ScalarValue::I64(n)),
342                    span,
343                ))
344            }
345            TokenKind::U128(n) => {
346                self.finish_value_in_parent();
347                Ok(ParseEvent::new(
348                    ParseEventKind::Scalar(ScalarValue::Str(Cow::Owned(n.to_string()))),
349                    span,
350                ))
351            }
352            TokenKind::I128(n) => {
353                self.finish_value_in_parent();
354                Ok(ParseEvent::new(
355                    ParseEventKind::Scalar(ScalarValue::Str(Cow::Owned(n.to_string()))),
356                    span,
357                ))
358            }
359            TokenKind::F64(n) => {
360                self.finish_value_in_parent();
361                Ok(ParseEvent::new(
362                    ParseEventKind::Scalar(ScalarValue::F64(n)),
363                    span,
364                ))
365            }
366            TokenKind::ObjectEnd | TokenKind::ArrayEnd => Err(self.unexpected(&token, "value")),
367            TokenKind::Comma | TokenKind::Colon => Err(self.unexpected(&token, "value")),
368            TokenKind::Eof => Err(ParseError::new(
369                span,
370                DeserializeErrorKind::UnexpectedEof { expected: "value" },
371            )),
372        }
373    }
374
375    fn finish_value_in_parent(&mut self) {
376        if let Some(context) = self.state.stack.last_mut() {
377            match context {
378                ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
379                ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
380            }
381        } else if self.state.root_started {
382            self.state.root_complete = true;
383        }
384    }
385
386    fn unexpected(&self, token: &MaterializedToken<'de>, expected: &'static str) -> ParseError {
387        ParseError::new(
388            token.span,
389            DeserializeErrorKind::UnexpectedToken {
390                got: format!("{:?}", token.kind).into(),
391                expected,
392            },
393        )
394    }
395
396    /// Skip a JSON value by scanning tokens without full materialization.
397    fn skip_value_tokens(&mut self) -> Result<Span, ParseError> {
398        let first = self
399            .scanner
400            .next_token(self.input)
401            .map_err(scan_error_to_parse_error)?;
402        let start = first.span.offset as usize;
403        self.state.scanner_pos = self.scanner.pos();
404
405        match first.token {
406            ScanToken::ObjectStart => self.skip_container(DelimKind::Object)?,
407            ScanToken::ArrayStart => self.skip_container(DelimKind::Array)?,
408            ScanToken::String { .. }
409            | ScanToken::Number { .. }
410            | ScanToken::True
411            | ScanToken::False
412            | ScanToken::Null => {}
413            ScanToken::ObjectEnd | ScanToken::ArrayEnd | ScanToken::Comma | ScanToken::Colon => {
414                return Err(ParseError::new(
415                    first.span,
416                    DeserializeErrorKind::UnexpectedToken {
417                        got: format!("{:?}", first.token).into(),
418                        expected: "value",
419                    },
420                ));
421            }
422            ScanToken::Eof => {
423                return Err(ParseError::new(
424                    first.span,
425                    DeserializeErrorKind::UnexpectedEof { expected: "value" },
426                ));
427            }
428            ScanToken::NeedMore { .. } => {
429                return Err(ParseError::new(
430                    first.span,
431                    DeserializeErrorKind::UnexpectedEof {
432                        expected: "more data",
433                    },
434                ));
435            }
436        }
437
438        let end = self.scanner.pos();
439        Ok(Span::new(start, end - start))
440    }
441
442    fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), ParseError> {
443        let mut stack = alloc::vec![start_kind];
444        while let Some(current) = stack.last().copied() {
445            let spanned = self
446                .scanner
447                .next_token(self.input)
448                .map_err(scan_error_to_parse_error)?;
449            self.state.scanner_pos = self.scanner.pos();
450
451            match spanned.token {
452                ScanToken::ObjectStart => stack.push(DelimKind::Object),
453                ScanToken::ArrayStart => stack.push(DelimKind::Array),
454                ScanToken::ObjectEnd => {
455                    if current != DelimKind::Object {
456                        return Err(ParseError::new(
457                            spanned.span,
458                            DeserializeErrorKind::UnexpectedToken {
459                                got: "'}'".into(),
460                                expected: "']'",
461                            },
462                        ));
463                    }
464                    stack.pop();
465                }
466                ScanToken::ArrayEnd => {
467                    if current != DelimKind::Array {
468                        return Err(ParseError::new(
469                            spanned.span,
470                            DeserializeErrorKind::UnexpectedToken {
471                                got: "']'".into(),
472                                expected: "'}'",
473                            },
474                        ));
475                    }
476                    stack.pop();
477                }
478                ScanToken::Eof => {
479                    return Err(ParseError::new(
480                        spanned.span,
481                        DeserializeErrorKind::UnexpectedEof { expected: "value" },
482                    ));
483                }
484                ScanToken::NeedMore { .. } => {
485                    return Err(ParseError::new(
486                        spanned.span,
487                        DeserializeErrorKind::UnexpectedEof {
488                            expected: "more data",
489                        },
490                    ));
491                }
492                _ => {}
493            }
494        }
495        Ok(())
496    }
497
498    fn determine_action(&self) -> NextAction {
499        if let Some(context) = self.state.stack.last() {
500            match context {
501                ContextState::Object(state) => match state {
502                    ObjectState::KeyOrEnd => NextAction::ObjectKey,
503                    ObjectState::Value => NextAction::ObjectValue,
504                    ObjectState::CommaOrEnd => NextAction::ObjectComma,
505                },
506                ContextState::Array(state) => match state {
507                    ArrayState::ValueOrEnd => NextAction::ArrayValue,
508                    ArrayState::CommaOrEnd => NextAction::ArrayComma,
509                },
510            }
511        } else if self.state.root_complete {
512            NextAction::RootFinished
513        } else {
514            NextAction::RootValue
515        }
516    }
517
518    fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
519        loop {
520            match self.determine_action() {
521                NextAction::ObjectKey => {
522                    let token = self.consume_token()?;
523                    let span = token.span;
524                    match token.kind {
525                        TokenKind::ObjectEnd => {
526                            self.state.stack.pop();
527                            self.finish_value_in_parent();
528                            return Ok(Some(ParseEvent::new(ParseEventKind::StructEnd, span)));
529                        }
530                        TokenKind::String(name) => {
531                            self.expect_colon()?;
532                            if let Some(ContextState::Object(state)) = self.state.stack.last_mut() {
533                                *state = ObjectState::Value;
534                            }
535                            return Ok(Some(ParseEvent::new(
536                                ParseEventKind::FieldKey(FieldKey::new(
537                                    name,
538                                    FieldLocationHint::KeyValue,
539                                )),
540                                span,
541                            )));
542                        }
543                        TokenKind::Eof => {
544                            return Err(ParseError::new(
545                                span,
546                                DeserializeErrorKind::UnexpectedEof {
547                                    expected: "field name or '}'",
548                                },
549                            ));
550                        }
551                        _ => return Err(self.unexpected(&token, "field name or '}'")),
552                    }
553                }
554                NextAction::ObjectValue => {
555                    return self.parse_value_start_with_token(None).map(Some);
556                }
557                NextAction::ObjectComma => {
558                    let token = self.consume_token()?;
559                    let span = token.span;
560                    match token.kind {
561                        TokenKind::Comma => {
562                            if let Some(ContextState::Object(state)) = self.state.stack.last_mut() {
563                                *state = ObjectState::KeyOrEnd;
564                            }
565                            continue;
566                        }
567                        TokenKind::ObjectEnd => {
568                            self.state.stack.pop();
569                            self.finish_value_in_parent();
570                            return Ok(Some(ParseEvent::new(ParseEventKind::StructEnd, span)));
571                        }
572                        TokenKind::Eof => {
573                            return Err(ParseError::new(
574                                span,
575                                DeserializeErrorKind::UnexpectedEof {
576                                    expected: "',' or '}'",
577                                },
578                            ));
579                        }
580                        _ => return Err(self.unexpected(&token, "',' or '}'")),
581                    }
582                }
583                NextAction::ArrayValue => {
584                    let token = self.consume_token()?;
585                    let span = token.span;
586                    match token.kind {
587                        TokenKind::ArrayEnd => {
588                            self.state.stack.pop();
589                            self.finish_value_in_parent();
590                            return Ok(Some(ParseEvent::new(ParseEventKind::SequenceEnd, span)));
591                        }
592                        TokenKind::Eof => {
593                            return Err(ParseError::new(
594                                span,
595                                DeserializeErrorKind::UnexpectedEof {
596                                    expected: "value or ']'",
597                                },
598                            ));
599                        }
600                        TokenKind::Comma | TokenKind::Colon => {
601                            return Err(self.unexpected(&token, "value or ']'"));
602                        }
603                        _ => {
604                            return self.parse_value_start_with_token(Some(token)).map(Some);
605                        }
606                    }
607                }
608                NextAction::ArrayComma => {
609                    let token = self.consume_token()?;
610                    let span = token.span;
611                    match token.kind {
612                        TokenKind::Comma => {
613                            if let Some(ContextState::Array(state)) = self.state.stack.last_mut() {
614                                *state = ArrayState::ValueOrEnd;
615                            }
616                            continue;
617                        }
618                        TokenKind::ArrayEnd => {
619                            self.state.stack.pop();
620                            self.finish_value_in_parent();
621                            return Ok(Some(ParseEvent::new(ParseEventKind::SequenceEnd, span)));
622                        }
623                        TokenKind::Eof => {
624                            return Err(ParseError::new(
625                                span,
626                                DeserializeErrorKind::UnexpectedEof {
627                                    expected: "',' or ']'",
628                                },
629                            ));
630                        }
631                        _ => return Err(self.unexpected(&token, "',' or ']'")),
632                    }
633                }
634                NextAction::RootValue => {
635                    return self.parse_value_start_with_token(None).map(Some);
636                }
637                NextAction::RootFinished => {
638                    return Ok(None);
639                }
640            }
641        }
642    }
643
644    /// Get current position in input.
645    fn current_offset(&self) -> usize {
646        self.state.scanner_pos
647    }
648}
649
650impl<'de, const TRUSTED_UTF8: bool> FormatParser<'de> for JsonParser<'de, TRUSTED_UTF8> {
651    fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
652        Some(crate::RawJson::SHAPE)
653    }
654
655    fn input(&self) -> Option<&'de [u8]> {
656        Some(self.input)
657    }
658
659    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
660        if let Some(event) = self.state.event_peek.take() {
661            self.state.peek_start_offset = None;
662            return Ok(Some(event));
663        }
664        self.produce_event()
665    }
666
667    fn next_events(
668        &mut self,
669        buf: &mut VecDeque<ParseEvent<'de>>,
670        limit: usize,
671    ) -> Result<usize, ParseError> {
672        if limit == 0 {
673            return Ok(0);
674        }
675
676        let mut count = 0;
677
678        // First, drain any peeked event
679        if let Some(event) = self.state.event_peek.take() {
680            self.state.peek_start_offset = None;
681            buf.push_back(event);
682            count += 1;
683        }
684
685        // Simple implementation: just call produce_event in a loop
686        while count < limit {
687            match self.produce_event()? {
688                Some(event) => {
689                    buf.push_back(event);
690                    count += 1;
691                }
692                None => break,
693            }
694        }
695
696        Ok(count)
697    }
698
699    fn save(&mut self) -> SavePoint {
700        self.save_counter += 1;
701        self.saved_states
702            .push((self.save_counter, self.state.clone()));
703        SavePoint(self.save_counter)
704    }
705
706    fn restore(&mut self, save_point: SavePoint) {
707        // Find and remove the saved state
708        if let Some(pos) = self
709            .saved_states
710            .iter()
711            .position(|(id, _)| *id == save_point.0)
712        {
713            let (_, saved) = self.saved_states.remove(pos);
714            self.state = saved;
715            // Reset the scanner to the saved position
716            self.scanner = Scanner::at_position(self.state.scanner_pos);
717        }
718    }
719
720    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
721        if let Some(event) = self.state.event_peek.clone() {
722            return Ok(Some(event));
723        }
724        let event = self.produce_event()?;
725        if let Some(ref e) = event {
726            self.state.event_peek = Some(e.clone());
727            // Use the offset of the last token consumed (which is the value's first token)
728            // For values, produce_event ultimately calls parse_value_start_with_token
729            // which consumes the first token and sets last_token_start.
730            self.state.peek_start_offset = Some(self.state.last_token_start);
731        }
732        Ok(event)
733    }
734
735    fn skip_value(&mut self) -> Result<(), ParseError> {
736        // Handle the case where peek_event was called before skip_value
737        if let Some(event) = self.state.event_peek.take() {
738            self.state.peek_start_offset = None;
739
740            // Based on the peeked event, we may need to skip the rest of a container.
741            // Note: When peeking a StructStart/SequenceStart, the parser already pushed
742            // to self.state.stack. We need to pop it after skipping the container.
743            match event.kind {
744                ParseEventKind::StructStart(_) => {
745                    let res = self.skip_container(DelimKind::Object);
746                    // Pop the stack entry that was pushed during peek, even if skip_container errored
747                    self.state.stack.pop();
748                    res?;
749                    // Update the parent's state after skipping the container
750                    self.finish_value_in_parent();
751                }
752                ParseEventKind::SequenceStart(_) => {
753                    let res = self.skip_container(DelimKind::Array);
754                    // Pop the stack entry that was pushed during peek, even if skip_container errored
755                    self.state.stack.pop();
756                    res?;
757                    // Update the parent's state after skipping the container
758                    self.finish_value_in_parent();
759                }
760                _ => {
761                    // Scalar or end event - already consumed during peek.
762                    // parse_value_start_with_token already called finish_value_in_parent
763                    // for scalars, so we don't call it again here.
764                }
765            }
766        } else {
767            self.skip_value_tokens()?;
768            self.finish_value_in_parent();
769        }
770        Ok(())
771    }
772
773    fn capture_raw(&mut self) -> Result<Option<&'de str>, ParseError> {
774        // Handle the case where peek_event was called before capture_raw.
775        // This happens when deserialize_option peeks to check for null.
776        let start_offset = if let Some(event) = self.state.event_peek.take() {
777            let start = self
778                .state
779                .peek_start_offset
780                .take()
781                .expect("peek_start_offset should be set when event_peek is set");
782
783            // Based on the peeked event, we may need to skip the rest of a container.
784            // Note: When peeking a StructStart/SequenceStart, the parser already pushed
785            // to self.state.stack. We need to pop it after skipping the container.
786            match event.kind {
787                ParseEventKind::StructStart(_) => {
788                    let res = self.skip_container(DelimKind::Object);
789                    // Pop the stack entry that was pushed during peek, even if skip_container errored
790                    self.state.stack.pop();
791                    res?;
792                }
793                ParseEventKind::SequenceStart(_) => {
794                    let res = self.skip_container(DelimKind::Array);
795                    // Pop the stack entry that was pushed during peek, even if skip_container errored
796                    self.state.stack.pop();
797                    res?;
798                }
799                ParseEventKind::StructEnd | ParseEventKind::SequenceEnd => {
800                    // This shouldn't happen in valid usage, but handle gracefully
801                    return Err(ParseError::new(
802                        Span::new(start, 0),
803                        DeserializeErrorKind::InvalidValue {
804                            message: "unexpected end event in capture_raw".into(),
805                        },
806                    ));
807                }
808                _ => {
809                    // Scalar value - already fully consumed during peek
810                }
811            }
812
813            start
814        } else {
815            // Normal path: no peek, consume the first token
816            let first = self
817                .scanner
818                .next_token(self.input)
819                .map_err(scan_error_to_parse_error)?;
820            let start = first.span.offset as usize;
821            self.state.scanner_pos = self.scanner.pos();
822
823            // Skip the rest of the value if it's a container
824            match first.token {
825                ScanToken::ObjectStart => self.skip_container(DelimKind::Object)?,
826                ScanToken::ArrayStart => self.skip_container(DelimKind::Array)?,
827                ScanToken::ObjectEnd
828                | ScanToken::ArrayEnd
829                | ScanToken::Comma
830                | ScanToken::Colon => {
831                    return Err(ParseError::new(
832                        first.span,
833                        DeserializeErrorKind::UnexpectedToken {
834                            got: format!("{:?}", first.token).into(),
835                            expected: "value",
836                        },
837                    ));
838                }
839                ScanToken::Eof => {
840                    return Err(ParseError::new(
841                        first.span,
842                        DeserializeErrorKind::UnexpectedEof { expected: "value" },
843                    ));
844                }
845                ScanToken::NeedMore { .. } => {
846                    return Err(ParseError::new(
847                        first.span,
848                        DeserializeErrorKind::UnexpectedEof {
849                            expected: "more data",
850                        },
851                    ));
852                }
853                _ => {
854                    // Simple value - already consumed
855                }
856            }
857
858            start
859        };
860
861        // Get end position
862        let end_offset = self.current_offset();
863
864        // Extract the raw slice and convert to str
865        let raw_bytes = &self.input[start_offset..end_offset];
866        let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
867            ParseError::new(
868                Span::new(start_offset, end_offset - start_offset),
869                DeserializeErrorKind::InvalidValue {
870                    message: format!("invalid UTF-8 in raw JSON: {}", e).into(),
871                },
872            )
873        })?;
874
875        self.finish_value_in_parent();
876        Ok(Some(raw_str))
877    }
878
879    fn format_namespace(&self) -> Option<&'static str> {
880        Some("json")
881    }
882
883    fn current_span(&self) -> Option<Span> {
884        // Return the span of the most recently consumed token
885        // This is used by metadata containers to track source locations
886        let offset = self.state.last_token_start;
887        let len = self.current_offset().saturating_sub(offset);
888        Some(Span::new(offset, len))
889    }
890}