Skip to main content

facet_json/
parser.rs

1extern crate alloc;
2
3use alloc::{borrow::Cow, collections::VecDeque, format, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7    ContainerKind, DeserializeErrorKind, FieldKey, FieldLocationHint, FormatParser, ParseError,
8    ParseEvent, ParseEventKind, SavePoint, ScalarValue,
9};
10use facet_reflect::Span;
11
12use crate::scanner::{self, ParsedNumber, ScanError, ScanErrorKind, Scanner, Token as ScanToken};
13
14/// Convert a ScanError to a ParseError.
15fn scan_error_to_parse_error(err: ScanError) -> ParseError {
16    let kind = match err.kind {
17        ScanErrorKind::UnexpectedChar(ch) => DeserializeErrorKind::UnexpectedChar {
18            ch,
19            expected: "valid JSON token",
20        },
21        ScanErrorKind::UnexpectedEof(expected) => DeserializeErrorKind::UnexpectedEof { expected },
22        ScanErrorKind::InvalidUtf8 => DeserializeErrorKind::InvalidUtf8 {
23            context: [0u8; 16],
24            context_len: 0,
25        },
26    };
27    ParseError::new(err.span, kind)
28}
29
30/// Materialized token ready for the parser.
31#[derive(Debug, Clone)]
32pub struct MaterializedToken<'de> {
33    pub kind: TokenKind<'de>,
34    pub span: Span,
35}
36
37#[derive(Debug, Clone)]
38pub enum TokenKind<'de> {
39    ObjectStart,
40    ObjectEnd,
41    ArrayStart,
42    ArrayEnd,
43    Colon,
44    Comma,
45    Null,
46    True,
47    False,
48    String(Cow<'de, str>),
49    U64(u64),
50    I64(i64),
51    U128(u128),
52    I128(i128),
53    F64(f64),
54    Eof,
55}
56
57/// Mutable parser state that can be saved and restored.
58#[derive(Clone)]
59struct ParserState<'de> {
60    /// Stack tracking nested containers.
61    stack: Vec<ContextState>,
62    /// Cached event for `peek_event`.
63    event_peek: Option<ParseEvent<'de>>,
64    /// Start offset of the peeked event's first token (for capture_raw).
65    peek_start_offset: Option<usize>,
66    /// Whether the root value has started.
67    root_started: bool,
68    /// Whether the root value has fully completed.
69    root_complete: bool,
70    /// Offset of the last token's start (span.offset).
71    last_token_start: usize,
72    /// Scanner position (for save/restore).
73    scanner_pos: usize,
74}
75
76/// JSON parser using Scanner directly (no adapter layer).
77///
78/// The const generic `TRUSTED_UTF8` controls UTF-8 validation:
79/// - `TRUSTED_UTF8=true`: skip UTF-8 validation (input came from `&str`)
80/// - `TRUSTED_UTF8=false`: validate UTF-8 (input came from `&[u8]`)
81pub struct JsonParser<'de, const TRUSTED_UTF8: bool = false> {
82    input: &'de [u8],
83    scanner: Scanner,
84    state: ParserState<'de>,
85    /// Counter for save points.
86    save_counter: u64,
87    /// Saved states for restore functionality.
88    saved_states: Vec<(u64, ParserState<'de>)>,
89}
90
91#[derive(Debug, Clone)]
92enum ContextState {
93    Object(ObjectState),
94    Array(ArrayState),
95}
96
97#[derive(Debug, Clone, Copy)]
98enum ObjectState {
99    KeyOrEnd,
100    Value,
101    CommaOrEnd,
102}
103
104#[derive(Debug, Clone, Copy)]
105enum ArrayState {
106    ValueOrEnd,
107    CommaOrEnd,
108}
109
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111enum DelimKind {
112    Object,
113    Array,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum NextAction {
118    ObjectKey,
119    ObjectValue,
120    ObjectComma,
121    ArrayValue,
122    ArrayComma,
123    RootValue,
124    RootFinished,
125}
126
127impl<'de, const TRUSTED_UTF8: bool> JsonParser<'de, TRUSTED_UTF8> {
128    pub fn new(input: &'de [u8]) -> Self {
129        Self {
130            input,
131            scanner: Scanner::new(),
132            state: ParserState {
133                stack: Vec::new(),
134                event_peek: None,
135                peek_start_offset: None,
136                root_started: false,
137                root_complete: false,
138                last_token_start: 0,
139                scanner_pos: 0,
140            },
141            save_counter: 0,
142            saved_states: Vec::new(),
143        }
144    }
145
146    /// Scan and materialize the next token directly.
147    #[inline]
148    fn consume_token(&mut self) -> Result<MaterializedToken<'de>, ParseError> {
149        let mut spanned = self
150            .scanner
151            .next_token(self.input)
152            .map_err(scan_error_to_parse_error)?;
153
154        // Handle NeedMore by finalizing - we have full input so this is true EOF
155        if matches!(spanned.token, ScanToken::NeedMore { .. }) {
156            spanned = self
157                .scanner
158                .finalize_at_eof(self.input)
159                .map_err(scan_error_to_parse_error)?;
160        }
161
162        self.state.last_token_start = spanned.span.offset as usize;
163        self.state.scanner_pos = self.scanner.pos();
164
165        let kind = match spanned.token {
166            ScanToken::ObjectStart => TokenKind::ObjectStart,
167            ScanToken::ObjectEnd => TokenKind::ObjectEnd,
168            ScanToken::ArrayStart => TokenKind::ArrayStart,
169            ScanToken::ArrayEnd => TokenKind::ArrayEnd,
170            ScanToken::Colon => TokenKind::Colon,
171            ScanToken::Comma => TokenKind::Comma,
172            ScanToken::Null => TokenKind::Null,
173            ScanToken::True => TokenKind::True,
174            ScanToken::False => TokenKind::False,
175            ScanToken::String {
176                start,
177                end,
178                has_escapes,
179            } => {
180                let s = if !has_escapes {
181                    if TRUSTED_UTF8 {
182                        // SAFETY: Caller guarantees input is valid UTF-8
183                        unsafe { scanner::decode_string_borrowed_unchecked(self.input, start, end) }
184                            .map(Cow::Borrowed)
185                            .ok_or_else(|| {
186                                ParseError::new(
187                                    spanned.span,
188                                    DeserializeErrorKind::InvalidUtf8 {
189                                        context: [0u8; 16],
190                                        context_len: 0,
191                                    },
192                                )
193                            })?
194                    } else {
195                        scanner::decode_string_borrowed(self.input, start, end)
196                            .map(Cow::Borrowed)
197                            .ok_or_else(|| {
198                                ParseError::new(
199                                    spanned.span,
200                                    DeserializeErrorKind::InvalidUtf8 {
201                                        context: [0u8; 16],
202                                        context_len: 0,
203                                    },
204                                )
205                            })?
206                    }
207                } else if TRUSTED_UTF8 {
208                    // SAFETY: Caller guarantees input is valid UTF-8
209                    Cow::Owned(
210                        unsafe { scanner::decode_string_owned_unchecked(self.input, start, end) }
211                            .map_err(scan_error_to_parse_error)?,
212                    )
213                } else {
214                    Cow::Owned(
215                        scanner::decode_string_owned(self.input, start, end)
216                            .map_err(scan_error_to_parse_error)?,
217                    )
218                };
219                TokenKind::String(s)
220            }
221            ScanToken::Number { start, end, hint } => {
222                let parsed = if TRUSTED_UTF8 {
223                    // SAFETY: Input came from &str, so it's valid UTF-8
224                    unsafe { scanner::parse_number_unchecked(self.input, start, end, hint) }
225                } else {
226                    scanner::parse_number(self.input, start, end, hint)
227                }
228                .map_err(scan_error_to_parse_error)?;
229                match parsed {
230                    ParsedNumber::U64(n) => TokenKind::U64(n),
231                    ParsedNumber::I64(n) => TokenKind::I64(n),
232                    ParsedNumber::U128(n) => TokenKind::U128(n),
233                    ParsedNumber::I128(n) => TokenKind::I128(n),
234                    ParsedNumber::F64(n) => TokenKind::F64(n),
235                }
236            }
237            ScanToken::Eof => TokenKind::Eof,
238            ScanToken::NeedMore { .. } => unreachable!("handled above"),
239        };
240
241        Ok(MaterializedToken {
242            kind,
243            span: spanned.span,
244        })
245    }
246
247    fn expect_colon(&mut self) -> Result<(), ParseError> {
248        let token = self.consume_token()?;
249        if !matches!(token.kind, TokenKind::Colon) {
250            return Err(self.unexpected(&token, "':'"));
251        }
252        Ok(())
253    }
254
255    fn parse_value_start_with_token(
256        &mut self,
257        first: Option<MaterializedToken<'de>>,
258    ) -> Result<ParseEvent<'de>, ParseError> {
259        let token = match first {
260            Some(tok) => tok,
261            None => self.consume_token()?,
262        };
263
264        self.state.root_started = true;
265
266        let span = token.span;
267        match token.kind {
268            TokenKind::ObjectStart => {
269                self.state
270                    .stack
271                    .push(ContextState::Object(ObjectState::KeyOrEnd));
272                Ok(ParseEvent::new(
273                    ParseEventKind::StructStart(ContainerKind::Object),
274                    span,
275                ))
276            }
277            TokenKind::ArrayStart => {
278                self.state
279                    .stack
280                    .push(ContextState::Array(ArrayState::ValueOrEnd));
281                Ok(ParseEvent::new(
282                    ParseEventKind::SequenceStart(ContainerKind::Array),
283                    span,
284                ))
285            }
286            TokenKind::String(s) => {
287                let event = ParseEvent::new(ParseEventKind::Scalar(ScalarValue::Str(s)), span);
288                self.finish_value_in_parent();
289                Ok(event)
290            }
291            TokenKind::True => {
292                self.finish_value_in_parent();
293                Ok(ParseEvent::new(
294                    ParseEventKind::Scalar(ScalarValue::Bool(true)),
295                    span,
296                ))
297            }
298            TokenKind::False => {
299                self.finish_value_in_parent();
300                Ok(ParseEvent::new(
301                    ParseEventKind::Scalar(ScalarValue::Bool(false)),
302                    span,
303                ))
304            }
305            TokenKind::Null => {
306                self.finish_value_in_parent();
307                Ok(ParseEvent::new(
308                    ParseEventKind::Scalar(ScalarValue::Null),
309                    span,
310                ))
311            }
312            TokenKind::U64(n) => {
313                self.finish_value_in_parent();
314                Ok(ParseEvent::new(
315                    ParseEventKind::Scalar(ScalarValue::U64(n)),
316                    span,
317                ))
318            }
319            TokenKind::I64(n) => {
320                self.finish_value_in_parent();
321                Ok(ParseEvent::new(
322                    ParseEventKind::Scalar(ScalarValue::I64(n)),
323                    span,
324                ))
325            }
326            TokenKind::U128(n) => {
327                self.finish_value_in_parent();
328                Ok(ParseEvent::new(
329                    ParseEventKind::Scalar(ScalarValue::Str(Cow::Owned(n.to_string()))),
330                    span,
331                ))
332            }
333            TokenKind::I128(n) => {
334                self.finish_value_in_parent();
335                Ok(ParseEvent::new(
336                    ParseEventKind::Scalar(ScalarValue::Str(Cow::Owned(n.to_string()))),
337                    span,
338                ))
339            }
340            TokenKind::F64(n) => {
341                self.finish_value_in_parent();
342                Ok(ParseEvent::new(
343                    ParseEventKind::Scalar(ScalarValue::F64(n)),
344                    span,
345                ))
346            }
347            TokenKind::ObjectEnd | TokenKind::ArrayEnd => Err(self.unexpected(&token, "value")),
348            TokenKind::Comma | TokenKind::Colon => Err(self.unexpected(&token, "value")),
349            TokenKind::Eof => Err(ParseError::new(
350                span,
351                DeserializeErrorKind::UnexpectedEof { expected: "value" },
352            )),
353        }
354    }
355
356    fn finish_value_in_parent(&mut self) {
357        if let Some(context) = self.state.stack.last_mut() {
358            match context {
359                ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
360                ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
361            }
362        } else if self.state.root_started {
363            self.state.root_complete = true;
364        }
365    }
366
367    fn unexpected(&self, token: &MaterializedToken<'de>, expected: &'static str) -> ParseError {
368        ParseError::new(
369            token.span,
370            DeserializeErrorKind::UnexpectedToken {
371                got: format!("{:?}", token.kind).into(),
372                expected,
373            },
374        )
375    }
376
377    /// Skip a JSON value by scanning tokens without full materialization.
378    fn skip_value_tokens(&mut self) -> Result<Span, ParseError> {
379        let first = self
380            .scanner
381            .next_token(self.input)
382            .map_err(scan_error_to_parse_error)?;
383        let start = first.span.offset as usize;
384        self.state.scanner_pos = self.scanner.pos();
385
386        match first.token {
387            ScanToken::ObjectStart => self.skip_container(DelimKind::Object)?,
388            ScanToken::ArrayStart => self.skip_container(DelimKind::Array)?,
389            ScanToken::String { .. }
390            | ScanToken::Number { .. }
391            | ScanToken::True
392            | ScanToken::False
393            | ScanToken::Null => {}
394            ScanToken::ObjectEnd | ScanToken::ArrayEnd | ScanToken::Comma | ScanToken::Colon => {
395                return Err(ParseError::new(
396                    first.span,
397                    DeserializeErrorKind::UnexpectedToken {
398                        got: format!("{:?}", first.token).into(),
399                        expected: "value",
400                    },
401                ));
402            }
403            ScanToken::Eof => {
404                return Err(ParseError::new(
405                    first.span,
406                    DeserializeErrorKind::UnexpectedEof { expected: "value" },
407                ));
408            }
409            ScanToken::NeedMore { .. } => {
410                return Err(ParseError::new(
411                    first.span,
412                    DeserializeErrorKind::UnexpectedEof {
413                        expected: "more data",
414                    },
415                ));
416            }
417        }
418
419        let end = self.scanner.pos();
420        Ok(Span::new(start, end - start))
421    }
422
423    fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), ParseError> {
424        let mut stack = alloc::vec![start_kind];
425        while let Some(current) = stack.last().copied() {
426            let spanned = self
427                .scanner
428                .next_token(self.input)
429                .map_err(scan_error_to_parse_error)?;
430            self.state.scanner_pos = self.scanner.pos();
431
432            match spanned.token {
433                ScanToken::ObjectStart => stack.push(DelimKind::Object),
434                ScanToken::ArrayStart => stack.push(DelimKind::Array),
435                ScanToken::ObjectEnd => {
436                    if current != DelimKind::Object {
437                        return Err(ParseError::new(
438                            spanned.span,
439                            DeserializeErrorKind::UnexpectedToken {
440                                got: "'}'".into(),
441                                expected: "']'",
442                            },
443                        ));
444                    }
445                    stack.pop();
446                }
447                ScanToken::ArrayEnd => {
448                    if current != DelimKind::Array {
449                        return Err(ParseError::new(
450                            spanned.span,
451                            DeserializeErrorKind::UnexpectedToken {
452                                got: "']'".into(),
453                                expected: "'}'",
454                            },
455                        ));
456                    }
457                    stack.pop();
458                }
459                ScanToken::Eof => {
460                    return Err(ParseError::new(
461                        spanned.span,
462                        DeserializeErrorKind::UnexpectedEof { expected: "value" },
463                    ));
464                }
465                ScanToken::NeedMore { .. } => {
466                    return Err(ParseError::new(
467                        spanned.span,
468                        DeserializeErrorKind::UnexpectedEof {
469                            expected: "more data",
470                        },
471                    ));
472                }
473                _ => {}
474            }
475        }
476        Ok(())
477    }
478
479    fn determine_action(&self) -> NextAction {
480        if let Some(context) = self.state.stack.last() {
481            match context {
482                ContextState::Object(state) => match state {
483                    ObjectState::KeyOrEnd => NextAction::ObjectKey,
484                    ObjectState::Value => NextAction::ObjectValue,
485                    ObjectState::CommaOrEnd => NextAction::ObjectComma,
486                },
487                ContextState::Array(state) => match state {
488                    ArrayState::ValueOrEnd => NextAction::ArrayValue,
489                    ArrayState::CommaOrEnd => NextAction::ArrayComma,
490                },
491            }
492        } else if self.state.root_complete {
493            NextAction::RootFinished
494        } else {
495            NextAction::RootValue
496        }
497    }
498
499    fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
500        loop {
501            match self.determine_action() {
502                NextAction::ObjectKey => {
503                    let token = self.consume_token()?;
504                    let span = token.span;
505                    match token.kind {
506                        TokenKind::ObjectEnd => {
507                            self.state.stack.pop();
508                            self.finish_value_in_parent();
509                            return Ok(Some(ParseEvent::new(ParseEventKind::StructEnd, span)));
510                        }
511                        TokenKind::String(name) => {
512                            self.expect_colon()?;
513                            if let Some(ContextState::Object(state)) = self.state.stack.last_mut() {
514                                *state = ObjectState::Value;
515                            }
516                            return Ok(Some(ParseEvent::new(
517                                ParseEventKind::FieldKey(FieldKey::new(
518                                    name,
519                                    FieldLocationHint::KeyValue,
520                                )),
521                                span,
522                            )));
523                        }
524                        TokenKind::Eof => {
525                            return Err(ParseError::new(
526                                span,
527                                DeserializeErrorKind::UnexpectedEof {
528                                    expected: "field name or '}'",
529                                },
530                            ));
531                        }
532                        _ => return Err(self.unexpected(&token, "field name or '}'")),
533                    }
534                }
535                NextAction::ObjectValue => {
536                    return self.parse_value_start_with_token(None).map(Some);
537                }
538                NextAction::ObjectComma => {
539                    let token = self.consume_token()?;
540                    let span = token.span;
541                    match token.kind {
542                        TokenKind::Comma => {
543                            if let Some(ContextState::Object(state)) = self.state.stack.last_mut() {
544                                *state = ObjectState::KeyOrEnd;
545                            }
546                            continue;
547                        }
548                        TokenKind::ObjectEnd => {
549                            self.state.stack.pop();
550                            self.finish_value_in_parent();
551                            return Ok(Some(ParseEvent::new(ParseEventKind::StructEnd, span)));
552                        }
553                        TokenKind::Eof => {
554                            return Err(ParseError::new(
555                                span,
556                                DeserializeErrorKind::UnexpectedEof {
557                                    expected: "',' or '}'",
558                                },
559                            ));
560                        }
561                        _ => return Err(self.unexpected(&token, "',' or '}'")),
562                    }
563                }
564                NextAction::ArrayValue => {
565                    let token = self.consume_token()?;
566                    let span = token.span;
567                    match token.kind {
568                        TokenKind::ArrayEnd => {
569                            self.state.stack.pop();
570                            self.finish_value_in_parent();
571                            return Ok(Some(ParseEvent::new(ParseEventKind::SequenceEnd, span)));
572                        }
573                        TokenKind::Eof => {
574                            return Err(ParseError::new(
575                                span,
576                                DeserializeErrorKind::UnexpectedEof {
577                                    expected: "value or ']'",
578                                },
579                            ));
580                        }
581                        TokenKind::Comma | TokenKind::Colon => {
582                            return Err(self.unexpected(&token, "value or ']'"));
583                        }
584                        _ => {
585                            return self.parse_value_start_with_token(Some(token)).map(Some);
586                        }
587                    }
588                }
589                NextAction::ArrayComma => {
590                    let token = self.consume_token()?;
591                    let span = token.span;
592                    match token.kind {
593                        TokenKind::Comma => {
594                            if let Some(ContextState::Array(state)) = self.state.stack.last_mut() {
595                                *state = ArrayState::ValueOrEnd;
596                            }
597                            continue;
598                        }
599                        TokenKind::ArrayEnd => {
600                            self.state.stack.pop();
601                            self.finish_value_in_parent();
602                            return Ok(Some(ParseEvent::new(ParseEventKind::SequenceEnd, span)));
603                        }
604                        TokenKind::Eof => {
605                            return Err(ParseError::new(
606                                span,
607                                DeserializeErrorKind::UnexpectedEof {
608                                    expected: "',' or ']'",
609                                },
610                            ));
611                        }
612                        _ => return Err(self.unexpected(&token, "',' or ']'")),
613                    }
614                }
615                NextAction::RootValue => {
616                    return self.parse_value_start_with_token(None).map(Some);
617                }
618                NextAction::RootFinished => {
619                    return Ok(None);
620                }
621            }
622        }
623    }
624
625    /// Get current position in input.
626    fn current_offset(&self) -> usize {
627        self.state.scanner_pos
628    }
629}
630
631impl<'de, const TRUSTED_UTF8: bool> FormatParser<'de> for JsonParser<'de, TRUSTED_UTF8> {
632    fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
633        Some(crate::RawJson::SHAPE)
634    }
635
636    fn input(&self) -> Option<&'de [u8]> {
637        Some(self.input)
638    }
639
640    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
641        if let Some(event) = self.state.event_peek.take() {
642            self.state.peek_start_offset = None;
643            return Ok(Some(event));
644        }
645        self.produce_event()
646    }
647
648    fn next_events(
649        &mut self,
650        buf: &mut VecDeque<ParseEvent<'de>>,
651        limit: usize,
652    ) -> Result<usize, ParseError> {
653        if limit == 0 {
654            return Ok(0);
655        }
656
657        let mut count = 0;
658
659        // First, drain any peeked event
660        if let Some(event) = self.state.event_peek.take() {
661            self.state.peek_start_offset = None;
662            buf.push_back(event);
663            count += 1;
664        }
665
666        // Simple implementation: just call produce_event in a loop
667        while count < limit {
668            match self.produce_event()? {
669                Some(event) => {
670                    buf.push_back(event);
671                    count += 1;
672                }
673                None => break,
674            }
675        }
676
677        Ok(count)
678    }
679
680    fn save(&mut self) -> SavePoint {
681        self.save_counter += 1;
682        self.saved_states
683            .push((self.save_counter, self.state.clone()));
684        SavePoint(self.save_counter)
685    }
686
687    fn restore(&mut self, save_point: SavePoint) {
688        // Find and remove the saved state
689        if let Some(pos) = self
690            .saved_states
691            .iter()
692            .position(|(id, _)| *id == save_point.0)
693        {
694            let (_, saved) = self.saved_states.remove(pos);
695            self.state = saved;
696            // Reset the scanner to the saved position
697            self.scanner = Scanner::at_position(self.state.scanner_pos);
698        }
699    }
700
701    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
702        if let Some(event) = self.state.event_peek.clone() {
703            return Ok(Some(event));
704        }
705        let event = self.produce_event()?;
706        if let Some(ref e) = event {
707            self.state.event_peek = Some(e.clone());
708            // Use the offset of the last token consumed (which is the value's first token)
709            // For values, produce_event ultimately calls parse_value_start_with_token
710            // which consumes the first token and sets last_token_start.
711            self.state.peek_start_offset = Some(self.state.last_token_start);
712        }
713        Ok(event)
714    }
715
716    fn skip_value(&mut self) -> Result<(), ParseError> {
717        // Handle the case where peek_event was called before skip_value
718        if let Some(event) = self.state.event_peek.take() {
719            self.state.peek_start_offset = None;
720
721            // Based on the peeked event, we may need to skip the rest of a container.
722            // Note: When peeking a StructStart/SequenceStart, the parser already pushed
723            // to self.state.stack. We need to pop it after skipping the container.
724            match event.kind {
725                ParseEventKind::StructStart(_) => {
726                    let res = self.skip_container(DelimKind::Object);
727                    // Pop the stack entry that was pushed during peek, even if skip_container errored
728                    self.state.stack.pop();
729                    res?;
730                    // Update the parent's state after skipping the container
731                    self.finish_value_in_parent();
732                }
733                ParseEventKind::SequenceStart(_) => {
734                    let res = self.skip_container(DelimKind::Array);
735                    // Pop the stack entry that was pushed during peek, even if skip_container errored
736                    self.state.stack.pop();
737                    res?;
738                    // Update the parent's state after skipping the container
739                    self.finish_value_in_parent();
740                }
741                _ => {
742                    // Scalar or end event - already consumed during peek.
743                    // parse_value_start_with_token already called finish_value_in_parent
744                    // for scalars, so we don't call it again here.
745                }
746            }
747        } else {
748            self.skip_value_tokens()?;
749            self.finish_value_in_parent();
750        }
751        Ok(())
752    }
753
754    fn capture_raw(&mut self) -> Result<Option<&'de str>, ParseError> {
755        // Handle the case where peek_event was called before capture_raw.
756        // This happens when deserialize_option peeks to check for null.
757        let start_offset = if let Some(event) = self.state.event_peek.take() {
758            let start = self
759                .state
760                .peek_start_offset
761                .take()
762                .expect("peek_start_offset should be set when event_peek is set");
763
764            // Based on the peeked event, we may need to skip the rest of a container.
765            // Note: When peeking a StructStart/SequenceStart, the parser already pushed
766            // to self.state.stack. We need to pop it after skipping the container.
767            match event.kind {
768                ParseEventKind::StructStart(_) => {
769                    let res = self.skip_container(DelimKind::Object);
770                    // Pop the stack entry that was pushed during peek, even if skip_container errored
771                    self.state.stack.pop();
772                    res?;
773                }
774                ParseEventKind::SequenceStart(_) => {
775                    let res = self.skip_container(DelimKind::Array);
776                    // Pop the stack entry that was pushed during peek, even if skip_container errored
777                    self.state.stack.pop();
778                    res?;
779                }
780                ParseEventKind::StructEnd | ParseEventKind::SequenceEnd => {
781                    // This shouldn't happen in valid usage, but handle gracefully
782                    return Err(ParseError::new(
783                        Span::new(start, 0),
784                        DeserializeErrorKind::InvalidValue {
785                            message: "unexpected end event in capture_raw".into(),
786                        },
787                    ));
788                }
789                _ => {
790                    // Scalar value - already fully consumed during peek
791                }
792            }
793
794            start
795        } else {
796            // Normal path: no peek, consume the first token
797            let first = self
798                .scanner
799                .next_token(self.input)
800                .map_err(scan_error_to_parse_error)?;
801            let start = first.span.offset as usize;
802            self.state.scanner_pos = self.scanner.pos();
803
804            // Skip the rest of the value if it's a container
805            match first.token {
806                ScanToken::ObjectStart => self.skip_container(DelimKind::Object)?,
807                ScanToken::ArrayStart => self.skip_container(DelimKind::Array)?,
808                ScanToken::ObjectEnd
809                | ScanToken::ArrayEnd
810                | ScanToken::Comma
811                | ScanToken::Colon => {
812                    return Err(ParseError::new(
813                        first.span,
814                        DeserializeErrorKind::UnexpectedToken {
815                            got: format!("{:?}", first.token).into(),
816                            expected: "value",
817                        },
818                    ));
819                }
820                ScanToken::Eof => {
821                    return Err(ParseError::new(
822                        first.span,
823                        DeserializeErrorKind::UnexpectedEof { expected: "value" },
824                    ));
825                }
826                ScanToken::NeedMore { .. } => {
827                    return Err(ParseError::new(
828                        first.span,
829                        DeserializeErrorKind::UnexpectedEof {
830                            expected: "more data",
831                        },
832                    ));
833                }
834                _ => {
835                    // Simple value - already consumed
836                }
837            }
838
839            start
840        };
841
842        // Get end position
843        let end_offset = self.current_offset();
844
845        // Extract the raw slice and convert to str
846        let raw_bytes = &self.input[start_offset..end_offset];
847        let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
848            ParseError::new(
849                Span::new(start_offset, end_offset - start_offset),
850                DeserializeErrorKind::InvalidValue {
851                    message: format!("invalid UTF-8 in raw JSON: {}", e).into(),
852                },
853            )
854        })?;
855
856        self.finish_value_in_parent();
857        Ok(Some(raw_str))
858    }
859
860    fn format_namespace(&self) -> Option<&'static str> {
861        Some("json")
862    }
863
864    fn current_span(&self) -> Option<Span> {
865        // Return the span of the most recently consumed token
866        // This is used by metadata containers to track source locations
867        let offset = self.state.last_token_start;
868        let len = self.current_offset().saturating_sub(offset);
869        Some(Span::new(offset, len))
870    }
871}
872
873// =============================================================================
874// FormatJitParser Implementation (Tier-2 JIT support)
875// =============================================================================
876
877#[cfg(feature = "jit")]
878impl<'de> facet_format::FormatJitParser<'de> for JsonParser<'de> {
879    type FormatJit = crate::jit::JsonJitFormat;
880
881    fn jit_input(&self) -> &'de [u8] {
882        self.input
883    }
884
885    fn jit_pos(&self) -> Option<usize> {
886        // Tier-2 JIT is only safe at root boundary:
887        // - No peeked event (position would be ambiguous)
888        // - Empty stack (we're at root level, not inside an object/array)
889        // - Root not yet started, OR root is complete
890        //
891        // This ensures jit_set_pos doesn't corrupt parser state machine.
892        if self.state.event_peek.is_some() {
893            return None;
894        }
895        if !self.state.stack.is_empty() {
896            return None;
897        }
898        if self.state.root_started && !self.state.root_complete {
899            // We've started parsing root but haven't finished - not safe
900            return None;
901        }
902        Some(self.current_offset())
903    }
904
905    fn jit_set_pos(&mut self, pos: usize) {
906        // Update the scanner position
907        self.state.scanner_pos = pos;
908        self.scanner = Scanner::at_position(pos);
909
910        // Clear any peeked event and its offset
911        self.state.event_peek = None;
912        self.state.peek_start_offset = None;
913
914        // Tier-2 JIT parsed a complete root value, so update parser state.
915        // jit_pos() already enforces root-only usage, so we know:
916        // - We started at root level with empty stack
917        // - Tier-2 successfully parsed a complete value
918        // - We're now at the position after that value
919        self.state.root_started = true;
920        self.state.root_complete = true;
921        // Stack should already be empty (jit_pos enforces this)
922        debug_assert!(self.state.stack.is_empty());
923    }
924
925    fn jit_format(&self) -> Self::FormatJit {
926        crate::jit::JsonJitFormat
927    }
928
929    fn jit_error(&self, _input: &'de [u8], error_pos: usize, error_code: i32) -> ParseError {
930        let kind = match error_code {
931            -100 => DeserializeErrorKind::UnexpectedEof { expected: "value" },
932            -101 => DeserializeErrorKind::UnexpectedToken {
933                got: "non-'['".into(),
934                expected: "'['",
935            },
936            -102 => DeserializeErrorKind::UnexpectedToken {
937                got: "non-boolean".into(),
938                expected: "'true' or 'false'",
939            },
940            -103 => DeserializeErrorKind::UnexpectedToken {
941                got: "unexpected token".into(),
942                expected: "',' or ']'",
943            },
944            _ => DeserializeErrorKind::InvalidValue {
945                message: format!("Tier-2 JIT error code: {}", error_code).into(),
946            },
947        };
948
949        ParseError::new(Span::new(error_pos, 1), kind)
950    }
951}