facet_json/
parser.rs

1extern crate alloc;
2
3use alloc::{borrow::Cow, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7    ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
8    ProbeStream, ScalarValue,
9};
10
11use crate::adapter::{SliceAdapter, SpannedAdapterToken, Token as AdapterToken};
12pub use crate::error::JsonError;
13use crate::error::JsonErrorKind;
14
15/// Streaming JSON parser backed by `facet-json`'s `SliceAdapter`.
16pub struct JsonParser<'de> {
17    input: &'de [u8],
18    adapter: SliceAdapter<'de, true>,
19    stack: Vec<ContextState>,
20    /// Cached event for `peek_event`.
21    event_peek: Option<ParseEvent<'de>>,
22    /// Whether the root value has started.
23    root_started: bool,
24    /// Whether the root value has fully completed.
25    root_complete: bool,
26    /// Absolute offset (in bytes) of the next unread token.
27    current_offset: usize,
28}
29
30#[derive(Debug)]
31enum ContextState {
32    Object(ObjectState),
33    Array(ArrayState),
34}
35
36#[derive(Debug)]
37enum ObjectState {
38    KeyOrEnd,
39    Value,
40    CommaOrEnd,
41}
42
43#[derive(Debug)]
44enum ArrayState {
45    ValueOrEnd,
46    CommaOrEnd,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum DelimKind {
51    Object,
52    Array,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56enum NextAction {
57    ObjectKey,
58    ObjectValue,
59    ObjectComma,
60    ArrayValue,
61    ArrayComma,
62    RootValue,
63    RootFinished,
64}
65
66impl<'de> JsonParser<'de> {
67    pub fn new(input: &'de [u8]) -> Self {
68        Self {
69            input,
70            adapter: SliceAdapter::new(input),
71            stack: Vec::new(),
72            event_peek: None,
73            root_started: false,
74            root_complete: false,
75            current_offset: 0,
76        }
77    }
78
79    fn consume_token(&mut self) -> Result<SpannedAdapterToken<'de>, JsonError> {
80        let token = self.adapter.next_token().map_err(JsonError::from)?;
81        self.current_offset = token.span.offset + token.span.len;
82        Ok(token)
83    }
84
85    fn expect_colon(&mut self) -> Result<(), JsonError> {
86        let token = self.consume_token()?;
87        if !matches!(token.token, AdapterToken::Colon) {
88            return Err(self.unexpected(&token, "':'"));
89        }
90        Ok(())
91    }
92
93    fn parse_value_start_with_token(
94        &mut self,
95        first: Option<SpannedAdapterToken<'de>>,
96    ) -> Result<ParseEvent<'de>, JsonError> {
97        let token = match first {
98            Some(tok) => tok,
99            None => self.consume_token()?,
100        };
101
102        self.root_started = true;
103
104        match token.token {
105            AdapterToken::ObjectStart => {
106                self.stack.push(ContextState::Object(ObjectState::KeyOrEnd));
107                Ok(ParseEvent::StructStart(ContainerKind::Object))
108            }
109            AdapterToken::ArrayStart => {
110                self.stack.push(ContextState::Array(ArrayState::ValueOrEnd));
111                Ok(ParseEvent::SequenceStart(ContainerKind::Array))
112            }
113            AdapterToken::String(s) => {
114                let event = ParseEvent::Scalar(ScalarValue::Str(s));
115                self.finish_value_in_parent();
116                Ok(event)
117            }
118            AdapterToken::True => {
119                self.finish_value_in_parent();
120                Ok(ParseEvent::Scalar(ScalarValue::Bool(true)))
121            }
122            AdapterToken::False => {
123                self.finish_value_in_parent();
124                Ok(ParseEvent::Scalar(ScalarValue::Bool(false)))
125            }
126            AdapterToken::Null => {
127                self.finish_value_in_parent();
128                Ok(ParseEvent::Scalar(ScalarValue::Null))
129            }
130            AdapterToken::U64(n) => {
131                self.finish_value_in_parent();
132                Ok(ParseEvent::Scalar(ScalarValue::U64(n)))
133            }
134            AdapterToken::I64(n) => {
135                self.finish_value_in_parent();
136                Ok(ParseEvent::Scalar(ScalarValue::I64(n)))
137            }
138            AdapterToken::U128(n) => {
139                self.finish_value_in_parent();
140                Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
141                    n.to_string(),
142                ))))
143            }
144            AdapterToken::I128(n) => {
145                self.finish_value_in_parent();
146                Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
147                    n.to_string(),
148                ))))
149            }
150            AdapterToken::F64(n) => {
151                self.finish_value_in_parent();
152                Ok(ParseEvent::Scalar(ScalarValue::F64(n)))
153            }
154            AdapterToken::ObjectEnd | AdapterToken::ArrayEnd => {
155                Err(self.unexpected(&token, "value"))
156            }
157            AdapterToken::Comma | AdapterToken::Colon => Err(self.unexpected(&token, "value")),
158            AdapterToken::Eof => Err(JsonError::new(
159                JsonErrorKind::UnexpectedEof { expected: "value" },
160                token.span,
161            )),
162        }
163    }
164
165    fn finish_value_in_parent(&mut self) {
166        if let Some(context) = self.stack.last_mut() {
167            match context {
168                ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
169                ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
170            }
171        } else if self.root_started {
172            self.root_complete = true;
173        }
174    }
175
176    fn unexpected(&self, token: &SpannedAdapterToken<'de>, expected: &'static str) -> JsonError {
177        JsonError::new(
178            JsonErrorKind::UnexpectedToken {
179                got: format!("{:?}", token.token),
180                expected,
181            },
182            token.span,
183        )
184    }
185
186    fn consume_value_tokens(&mut self) -> Result<(), JsonError> {
187        let span = self.adapter.skip().map_err(JsonError::from)?;
188        self.current_offset = span.offset + span.len;
189        Ok(())
190    }
191
192    fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), JsonError> {
193        let mut stack = vec![start_kind];
194        while let Some(current) = stack.last().copied() {
195            let token = self.consume_token()?;
196            match token.token {
197                AdapterToken::ObjectStart => stack.push(DelimKind::Object),
198                AdapterToken::ArrayStart => stack.push(DelimKind::Array),
199                AdapterToken::ObjectEnd => {
200                    if current != DelimKind::Object {
201                        return Err(self.unexpected(&token, "'}'"));
202                    }
203                    stack.pop();
204                    if stack.is_empty() {
205                        break;
206                    }
207                }
208                AdapterToken::ArrayEnd => {
209                    if current != DelimKind::Array {
210                        return Err(self.unexpected(&token, "']'"));
211                    }
212                    stack.pop();
213                    if stack.is_empty() {
214                        break;
215                    }
216                }
217                AdapterToken::Eof => {
218                    return Err(JsonError::new(
219                        JsonErrorKind::UnexpectedEof { expected: "value" },
220                        token.span,
221                    ));
222                }
223                _ => {}
224            }
225        }
226        Ok(())
227    }
228
229    /// Skip a container in a separate adapter (used during probing).
230    fn skip_container_in_adapter(
231        &self,
232        adapter: &mut SliceAdapter<'de, true>,
233        start_kind: DelimKind,
234    ) -> Result<(), JsonError> {
235        let mut stack = vec![start_kind];
236        while let Some(current) = stack.last().copied() {
237            let token = adapter.next_token().map_err(JsonError::from)?;
238            match token.token {
239                AdapterToken::ObjectStart => stack.push(DelimKind::Object),
240                AdapterToken::ArrayStart => stack.push(DelimKind::Array),
241                AdapterToken::ObjectEnd => {
242                    if current != DelimKind::Object {
243                        return Err(JsonError::new(
244                            JsonErrorKind::UnexpectedToken {
245                                got: format!("{:?}", token.token),
246                                expected: "'}'",
247                            },
248                            token.span,
249                        ));
250                    }
251                    stack.pop();
252                    if stack.is_empty() {
253                        break;
254                    }
255                }
256                AdapterToken::ArrayEnd => {
257                    if current != DelimKind::Array {
258                        return Err(JsonError::new(
259                            JsonErrorKind::UnexpectedToken {
260                                got: format!("{:?}", token.token),
261                                expected: "']'",
262                            },
263                            token.span,
264                        ));
265                    }
266                    stack.pop();
267                    if stack.is_empty() {
268                        break;
269                    }
270                }
271                AdapterToken::Eof => {
272                    return Err(JsonError::new(
273                        JsonErrorKind::UnexpectedEof { expected: "value" },
274                        token.span,
275                    ));
276                }
277                _ => {}
278            }
279        }
280        Ok(())
281    }
282
283    fn determine_action(&self) -> NextAction {
284        if let Some(context) = self.stack.last() {
285            match context {
286                ContextState::Object(state) => match state {
287                    ObjectState::KeyOrEnd => NextAction::ObjectKey,
288                    ObjectState::Value => NextAction::ObjectValue,
289                    ObjectState::CommaOrEnd => NextAction::ObjectComma,
290                },
291                ContextState::Array(state) => match state {
292                    ArrayState::ValueOrEnd => NextAction::ArrayValue,
293                    ArrayState::CommaOrEnd => NextAction::ArrayComma,
294                },
295            }
296        } else if self.root_complete {
297            NextAction::RootFinished
298        } else {
299            NextAction::RootValue
300        }
301    }
302
303    fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, JsonError> {
304        loop {
305            match self.determine_action() {
306                NextAction::ObjectKey => {
307                    let token = self.consume_token()?;
308                    match token.token {
309                        AdapterToken::ObjectEnd => {
310                            self.stack.pop();
311                            self.finish_value_in_parent();
312                            return Ok(Some(ParseEvent::StructEnd));
313                        }
314                        AdapterToken::String(name) => {
315                            self.expect_colon()?;
316                            if let Some(ContextState::Object(state)) = self.stack.last_mut() {
317                                *state = ObjectState::Value;
318                            }
319                            return Ok(Some(ParseEvent::FieldKey(FieldKey::new(
320                                name,
321                                FieldLocationHint::KeyValue,
322                            ))));
323                        }
324                        AdapterToken::Eof => {
325                            return Err(JsonError::new(
326                                JsonErrorKind::UnexpectedEof {
327                                    expected: "field name or '}'",
328                                },
329                                token.span,
330                            ));
331                        }
332                        _ => return Err(self.unexpected(&token, "field name or '}'")),
333                    }
334                }
335                NextAction::ObjectValue => {
336                    return self.parse_value_start_with_token(None).map(Some);
337                }
338                NextAction::ObjectComma => {
339                    let token = self.consume_token()?;
340                    match token.token {
341                        AdapterToken::Comma => {
342                            if let Some(ContextState::Object(state)) = self.stack.last_mut() {
343                                *state = ObjectState::KeyOrEnd;
344                            }
345                            continue;
346                        }
347                        AdapterToken::ObjectEnd => {
348                            self.stack.pop();
349                            self.finish_value_in_parent();
350                            return Ok(Some(ParseEvent::StructEnd));
351                        }
352                        AdapterToken::Eof => {
353                            return Err(JsonError::new(
354                                JsonErrorKind::UnexpectedEof {
355                                    expected: "',' or '}'",
356                                },
357                                token.span,
358                            ));
359                        }
360                        _ => return Err(self.unexpected(&token, "',' or '}'")),
361                    }
362                }
363                NextAction::ArrayValue => {
364                    let token = self.consume_token()?;
365                    match token.token {
366                        AdapterToken::ArrayEnd => {
367                            self.stack.pop();
368                            self.finish_value_in_parent();
369                            return Ok(Some(ParseEvent::SequenceEnd));
370                        }
371                        AdapterToken::Eof => {
372                            return Err(JsonError::new(
373                                JsonErrorKind::UnexpectedEof {
374                                    expected: "value or ']'",
375                                },
376                                token.span,
377                            ));
378                        }
379                        AdapterToken::Comma | AdapterToken::Colon => {
380                            return Err(self.unexpected(&token, "value or ']'"));
381                        }
382                        _ => {
383                            return self.parse_value_start_with_token(Some(token)).map(Some);
384                        }
385                    }
386                }
387                NextAction::ArrayComma => {
388                    let token = self.consume_token()?;
389                    match token.token {
390                        AdapterToken::Comma => {
391                            if let Some(ContextState::Array(state)) = self.stack.last_mut() {
392                                *state = ArrayState::ValueOrEnd;
393                            }
394                            continue;
395                        }
396                        AdapterToken::ArrayEnd => {
397                            self.stack.pop();
398                            self.finish_value_in_parent();
399                            return Ok(Some(ParseEvent::SequenceEnd));
400                        }
401                        AdapterToken::Eof => {
402                            return Err(JsonError::new(
403                                JsonErrorKind::UnexpectedEof {
404                                    expected: "',' or ']'",
405                                },
406                                token.span,
407                            ));
408                        }
409                        _ => return Err(self.unexpected(&token, "',' or ']'")),
410                    }
411                }
412                NextAction::RootValue => {
413                    return self.parse_value_start_with_token(None).map(Some);
414                }
415                NextAction::RootFinished => {
416                    return Ok(None);
417                }
418            }
419        }
420    }
421
422    fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, JsonError> {
423        let remaining = self.input.get(self.current_offset..).unwrap_or_default();
424        if remaining.is_empty() {
425            return Ok(Vec::new());
426        }
427
428        let mut adapter = SliceAdapter::<true>::new(remaining);
429
430        // If we've peeked a StructStart, we've already consumed the '{' so skip the check.
431        // Otherwise, expect ObjectStart as the first token.
432        let already_inside_object = matches!(self.event_peek, Some(ParseEvent::StructStart(_)));
433
434        if !already_inside_object {
435            let first = adapter.next_token().map_err(JsonError::from)?;
436            if !matches!(first.token, AdapterToken::ObjectStart) {
437                return Ok(Vec::new());
438            }
439        }
440
441        let mut evidence = Vec::new();
442        loop {
443            let token = adapter.next_token().map_err(JsonError::from)?;
444            match token.token {
445                AdapterToken::ObjectEnd => break,
446                AdapterToken::String(name) => {
447                    let colon = adapter.next_token().map_err(JsonError::from)?;
448                    if !matches!(colon.token, AdapterToken::Colon) {
449                        return Err(JsonError::new(
450                            JsonErrorKind::UnexpectedToken {
451                                got: format!("{:?}", colon.token),
452                                expected: "':'",
453                            },
454                            colon.span,
455                        ));
456                    }
457
458                    // Capture scalar values, skip complex types (objects/arrays)
459                    let value_token = adapter.next_token().map_err(JsonError::from)?;
460                    let scalar_value = match value_token.token {
461                        AdapterToken::String(s) => Some(ScalarValue::Str(s)),
462                        AdapterToken::True => Some(ScalarValue::Bool(true)),
463                        AdapterToken::False => Some(ScalarValue::Bool(false)),
464                        AdapterToken::Null => Some(ScalarValue::Null),
465                        AdapterToken::I64(n) => Some(ScalarValue::I64(n)),
466                        AdapterToken::U64(n) => Some(ScalarValue::U64(n)),
467                        AdapterToken::I128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
468                        AdapterToken::U128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
469                        AdapterToken::F64(n) => Some(ScalarValue::F64(n)),
470                        AdapterToken::ObjectStart => {
471                            // Skip the complex object
472                            self.skip_container_in_adapter(&mut adapter, DelimKind::Object)?;
473                            None
474                        }
475                        AdapterToken::ArrayStart => {
476                            // Skip the complex array
477                            self.skip_container_in_adapter(&mut adapter, DelimKind::Array)?;
478                            None
479                        }
480                        _ => None,
481                    };
482
483                    if let Some(sv) = scalar_value {
484                        evidence.push(FieldEvidence::with_scalar_value(
485                            name,
486                            FieldLocationHint::KeyValue,
487                            None,
488                            sv,
489                            None, // No namespace for JSON
490                        ));
491                    } else {
492                        evidence.push(FieldEvidence::new(
493                            name,
494                            FieldLocationHint::KeyValue,
495                            None,
496                            None, // No namespace for JSON
497                        ));
498                    }
499
500                    let sep = adapter.next_token().map_err(JsonError::from)?;
501                    match sep.token {
502                        AdapterToken::Comma => continue,
503                        AdapterToken::ObjectEnd => break,
504                        AdapterToken::Eof => {
505                            return Err(JsonError::new(
506                                JsonErrorKind::UnexpectedEof {
507                                    expected: "',' or '}'",
508                                },
509                                sep.span,
510                            ));
511                        }
512                        _ => {
513                            return Err(JsonError::new(
514                                JsonErrorKind::UnexpectedToken {
515                                    got: format!("{:?}", sep.token),
516                                    expected: "',' or '}'",
517                                },
518                                sep.span,
519                            ));
520                        }
521                    }
522                }
523                AdapterToken::Eof => {
524                    return Err(JsonError::new(
525                        JsonErrorKind::UnexpectedEof {
526                            expected: "field name or '}'",
527                        },
528                        token.span,
529                    ));
530                }
531                _ => {
532                    return Err(JsonError::new(
533                        JsonErrorKind::UnexpectedToken {
534                            got: format!("{:?}", token.token),
535                            expected: "field name or '}'",
536                        },
537                        token.span,
538                    ));
539                }
540            }
541        }
542
543        Ok(evidence)
544    }
545}
546
547impl<'de> FormatParser<'de> for JsonParser<'de> {
548    type Error = JsonError;
549    type Probe<'a>
550        = JsonProbe<'de>
551    where
552        Self: 'a;
553
554    fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
555        Some(crate::RawJson::SHAPE)
556    }
557
558    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
559        if let Some(event) = self.event_peek.take() {
560            return Ok(Some(event));
561        }
562        self.produce_event()
563    }
564
565    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
566        if let Some(event) = self.event_peek.clone() {
567            return Ok(Some(event));
568        }
569        let event = self.produce_event()?;
570        if let Some(ref e) = event {
571            self.event_peek = Some(e.clone());
572        }
573        Ok(event)
574    }
575
576    fn skip_value(&mut self) -> Result<(), Self::Error> {
577        debug_assert!(
578            self.event_peek.is_none(),
579            "skip_value called while an event is buffered"
580        );
581        self.consume_value_tokens()?;
582        self.finish_value_in_parent();
583        Ok(())
584    }
585
586    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
587        let evidence = self.build_probe()?;
588        Ok(JsonProbe { evidence, idx: 0 })
589    }
590
591    fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
592        debug_assert!(
593            self.event_peek.is_none(),
594            "capture_raw called while an event is buffered"
595        );
596
597        // Get the first token to find the actual start offset (excludes whitespace)
598        let first = self.consume_token()?;
599        let start_offset = first.span.offset;
600
601        // Skip the rest of the value if it's a container
602        match first.token {
603            AdapterToken::ObjectStart => self.skip_container(DelimKind::Object)?,
604            AdapterToken::ArrayStart => self.skip_container(DelimKind::Array)?,
605            AdapterToken::ObjectEnd
606            | AdapterToken::ArrayEnd
607            | AdapterToken::Comma
608            | AdapterToken::Colon => return Err(self.unexpected(&first, "value")),
609            AdapterToken::Eof => {
610                return Err(JsonError::new(
611                    JsonErrorKind::UnexpectedEof { expected: "value" },
612                    first.span,
613                ));
614            }
615            _ => {
616                // Simple value - already consumed
617            }
618        }
619
620        // Get end position
621        let end_offset = self.current_offset;
622
623        // Extract the raw slice and convert to str
624        let raw_bytes = &self.input[start_offset..end_offset];
625        let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
626            JsonError::without_span(JsonErrorKind::InvalidValue {
627                message: alloc::format!("invalid UTF-8 in raw JSON: {}", e),
628            })
629        })?;
630
631        self.finish_value_in_parent();
632        Ok(Some(raw_str))
633    }
634}
635
636// =============================================================================
637// FormatJitParser Implementation (Tier-2 JIT support)
638// =============================================================================
639
640#[cfg(feature = "jit")]
641impl<'de> facet_format::FormatJitParser<'de> for JsonParser<'de> {
642    type FormatJit = crate::jit::JsonJitFormat;
643
644    fn jit_input(&self) -> &'de [u8] {
645        self.input
646    }
647
648    fn jit_pos(&self) -> Option<usize> {
649        // Tier-2 JIT is only safe at root boundary:
650        // - No peeked event (position would be ambiguous)
651        // - Empty stack (we're at root level, not inside an object/array)
652        // - Root not yet started, OR root is complete
653        //
654        // This ensures jit_set_pos doesn't corrupt parser state machine.
655        if self.event_peek.is_some() {
656            return None;
657        }
658        if !self.stack.is_empty() {
659            return None;
660        }
661        if self.root_started && !self.root_complete {
662            // We've started parsing root but haven't finished - not safe
663            return None;
664        }
665        Some(self.current_offset)
666    }
667
668    fn jit_set_pos(&mut self, pos: usize) {
669        // Update the offset
670        self.current_offset = pos;
671
672        // Reset the adapter to start from the new position
673        // We need to create a new adapter pointing to the remaining input
674        // but preserving absolute offset semantics
675        self.adapter = SliceAdapter::new_with_offset(self.input, pos);
676
677        // Clear any peeked event
678        self.event_peek = None;
679
680        // Tier-2 JIT parsed a complete root value, so update parser state.
681        // jit_pos() already enforces root-only usage, so we know:
682        // - We started at root level with empty stack
683        // - Tier-2 successfully parsed a complete value
684        // - We're now at the position after that value
685        self.root_started = true;
686        self.root_complete = true;
687        // Stack should already be empty (jit_pos enforces this)
688        debug_assert!(self.stack.is_empty());
689    }
690
691    fn jit_format(&self) -> Self::FormatJit {
692        crate::jit::JsonJitFormat
693    }
694
695    fn jit_error(&self, _input: &'de [u8], error_pos: usize, error_code: i32) -> Self::Error {
696        use crate::error::JsonErrorKind;
697        use facet_reflect::Span;
698
699        let kind = match error_code {
700            -100 => JsonErrorKind::UnexpectedEof { expected: "value" },
701            -101 => JsonErrorKind::UnexpectedToken {
702                got: "non-'['".into(),
703                expected: "'['",
704            },
705            -102 => JsonErrorKind::UnexpectedToken {
706                got: "non-boolean".into(),
707                expected: "'true' or 'false'",
708            },
709            -103 => JsonErrorKind::UnexpectedToken {
710                got: "unexpected token".into(),
711                expected: "',' or ']'",
712            },
713            _ => JsonErrorKind::InvalidValue {
714                message: alloc::format!("Tier-2 JIT error code: {}", error_code),
715            },
716        };
717
718        JsonError::new(
719            kind,
720            Span {
721                offset: error_pos,
722                len: 1,
723            },
724        )
725    }
726}
727
728pub struct JsonProbe<'de> {
729    evidence: Vec<FieldEvidence<'de>>,
730    idx: usize,
731}
732
733impl<'de> ProbeStream<'de> for JsonProbe<'de> {
734    type Error = JsonError;
735
736    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
737        if self.idx >= self.evidence.len() {
738            Ok(None)
739        } else {
740            let ev = self.evidence[self.idx].clone();
741            self.idx += 1;
742            Ok(Some(ev))
743        }
744    }
745}