facet_format_json/
parser.rs

1extern crate alloc;
2
3use alloc::{borrow::Cow, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7    ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
8    ProbeStream, ScalarValue,
9};
10
11use crate::adapter::{SliceAdapter, SpannedAdapterToken, Token as AdapterToken};
12pub use crate::error::JsonError;
13use crate::error::JsonErrorKind;
14
15/// Streaming JSON parser backed by `facet-json`'s `SliceAdapter`.
16pub struct JsonParser<'de> {
17    input: &'de [u8],
18    adapter: SliceAdapter<'de, true>,
19    stack: Vec<ContextState>,
20    /// Cached event for `peek_event`.
21    event_peek: Option<ParseEvent<'de>>,
22    /// Whether the root value has started.
23    root_started: bool,
24    /// Whether the root value has fully completed.
25    root_complete: bool,
26    /// Absolute offset (in bytes) of the next unread token.
27    current_offset: usize,
28}
29
30#[derive(Debug)]
31enum ContextState {
32    Object(ObjectState),
33    Array(ArrayState),
34}
35
36#[derive(Debug)]
37enum ObjectState {
38    KeyOrEnd,
39    Value,
40    CommaOrEnd,
41}
42
43#[derive(Debug)]
44enum ArrayState {
45    ValueOrEnd,
46    CommaOrEnd,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum DelimKind {
51    Object,
52    Array,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56enum NextAction {
57    ObjectKey,
58    ObjectValue,
59    ObjectComma,
60    ArrayValue,
61    ArrayComma,
62    RootValue,
63    RootFinished,
64}
65
66impl<'de> JsonParser<'de> {
67    pub fn new(input: &'de [u8]) -> Self {
68        Self {
69            input,
70            adapter: SliceAdapter::new(input),
71            stack: Vec::new(),
72            event_peek: None,
73            root_started: false,
74            root_complete: false,
75            current_offset: 0,
76        }
77    }
78
79    fn consume_token(&mut self) -> Result<SpannedAdapterToken<'de>, JsonError> {
80        let token = self.adapter.next_token().map_err(JsonError::from)?;
81        self.current_offset = token.span.offset + token.span.len;
82        Ok(token)
83    }
84
85    fn expect_colon(&mut self) -> Result<(), JsonError> {
86        let token = self.consume_token()?;
87        if !matches!(token.token, AdapterToken::Colon) {
88            return Err(self.unexpected(&token, "':'"));
89        }
90        Ok(())
91    }
92
93    fn parse_value_start_with_token(
94        &mut self,
95        first: Option<SpannedAdapterToken<'de>>,
96    ) -> Result<ParseEvent<'de>, JsonError> {
97        let token = match first {
98            Some(tok) => tok,
99            None => self.consume_token()?,
100        };
101
102        self.root_started = true;
103
104        match token.token {
105            AdapterToken::ObjectStart => {
106                self.stack.push(ContextState::Object(ObjectState::KeyOrEnd));
107                Ok(ParseEvent::StructStart(ContainerKind::Object))
108            }
109            AdapterToken::ArrayStart => {
110                self.stack.push(ContextState::Array(ArrayState::ValueOrEnd));
111                Ok(ParseEvent::SequenceStart(ContainerKind::Array))
112            }
113            AdapterToken::String(s) => {
114                let event = ParseEvent::Scalar(ScalarValue::Str(s));
115                self.finish_value_in_parent();
116                Ok(event)
117            }
118            AdapterToken::True => {
119                self.finish_value_in_parent();
120                Ok(ParseEvent::Scalar(ScalarValue::Bool(true)))
121            }
122            AdapterToken::False => {
123                self.finish_value_in_parent();
124                Ok(ParseEvent::Scalar(ScalarValue::Bool(false)))
125            }
126            AdapterToken::Null => {
127                self.finish_value_in_parent();
128                Ok(ParseEvent::Scalar(ScalarValue::Null))
129            }
130            AdapterToken::U64(n) => {
131                self.finish_value_in_parent();
132                Ok(ParseEvent::Scalar(ScalarValue::U64(n)))
133            }
134            AdapterToken::I64(n) => {
135                self.finish_value_in_parent();
136                Ok(ParseEvent::Scalar(ScalarValue::I64(n)))
137            }
138            AdapterToken::U128(n) => {
139                self.finish_value_in_parent();
140                Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
141                    n.to_string(),
142                ))))
143            }
144            AdapterToken::I128(n) => {
145                self.finish_value_in_parent();
146                Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
147                    n.to_string(),
148                ))))
149            }
150            AdapterToken::F64(n) => {
151                self.finish_value_in_parent();
152                Ok(ParseEvent::Scalar(ScalarValue::F64(n)))
153            }
154            AdapterToken::ObjectEnd | AdapterToken::ArrayEnd => {
155                Err(self.unexpected(&token, "value"))
156            }
157            AdapterToken::Comma | AdapterToken::Colon => Err(self.unexpected(&token, "value")),
158            AdapterToken::Eof => Err(JsonError::new(
159                JsonErrorKind::UnexpectedEof { expected: "value" },
160                token.span,
161            )),
162        }
163    }
164
165    fn finish_value_in_parent(&mut self) {
166        if let Some(context) = self.stack.last_mut() {
167            match context {
168                ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
169                ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
170            }
171        } else if self.root_started {
172            self.root_complete = true;
173        }
174    }
175
176    fn unexpected(&self, token: &SpannedAdapterToken<'de>, expected: &'static str) -> JsonError {
177        JsonError::new(
178            JsonErrorKind::UnexpectedToken {
179                got: format!("{:?}", token.token),
180                expected,
181            },
182            token.span,
183        )
184    }
185
186    fn consume_value_tokens(&mut self) -> Result<(), JsonError> {
187        let span = self.adapter.skip().map_err(JsonError::from)?;
188        self.current_offset = span.offset + span.len;
189        Ok(())
190    }
191
192    fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), JsonError> {
193        let mut stack = vec![start_kind];
194        while let Some(current) = stack.last().copied() {
195            let token = self.consume_token()?;
196            match token.token {
197                AdapterToken::ObjectStart => stack.push(DelimKind::Object),
198                AdapterToken::ArrayStart => stack.push(DelimKind::Array),
199                AdapterToken::ObjectEnd => {
200                    if current != DelimKind::Object {
201                        return Err(self.unexpected(&token, "'}'"));
202                    }
203                    stack.pop();
204                    if stack.is_empty() {
205                        break;
206                    }
207                }
208                AdapterToken::ArrayEnd => {
209                    if current != DelimKind::Array {
210                        return Err(self.unexpected(&token, "']'"));
211                    }
212                    stack.pop();
213                    if stack.is_empty() {
214                        break;
215                    }
216                }
217                AdapterToken::Eof => {
218                    return Err(JsonError::new(
219                        JsonErrorKind::UnexpectedEof { expected: "value" },
220                        token.span,
221                    ));
222                }
223                _ => {}
224            }
225        }
226        Ok(())
227    }
228
229    /// Skip a container in a separate adapter (used during probing).
230    fn skip_container_in_adapter(
231        &self,
232        adapter: &mut SliceAdapter<'de, true>,
233        start_kind: DelimKind,
234    ) -> Result<(), JsonError> {
235        let mut stack = vec![start_kind];
236        while let Some(current) = stack.last().copied() {
237            let token = adapter.next_token().map_err(JsonError::from)?;
238            match token.token {
239                AdapterToken::ObjectStart => stack.push(DelimKind::Object),
240                AdapterToken::ArrayStart => stack.push(DelimKind::Array),
241                AdapterToken::ObjectEnd => {
242                    if current != DelimKind::Object {
243                        return Err(JsonError::new(
244                            JsonErrorKind::UnexpectedToken {
245                                got: format!("{:?}", token.token),
246                                expected: "'}'",
247                            },
248                            token.span,
249                        ));
250                    }
251                    stack.pop();
252                    if stack.is_empty() {
253                        break;
254                    }
255                }
256                AdapterToken::ArrayEnd => {
257                    if current != DelimKind::Array {
258                        return Err(JsonError::new(
259                            JsonErrorKind::UnexpectedToken {
260                                got: format!("{:?}", token.token),
261                                expected: "']'",
262                            },
263                            token.span,
264                        ));
265                    }
266                    stack.pop();
267                    if stack.is_empty() {
268                        break;
269                    }
270                }
271                AdapterToken::Eof => {
272                    return Err(JsonError::new(
273                        JsonErrorKind::UnexpectedEof { expected: "value" },
274                        token.span,
275                    ));
276                }
277                _ => {}
278            }
279        }
280        Ok(())
281    }
282
283    fn determine_action(&self) -> NextAction {
284        if let Some(context) = self.stack.last() {
285            match context {
286                ContextState::Object(state) => match state {
287                    ObjectState::KeyOrEnd => NextAction::ObjectKey,
288                    ObjectState::Value => NextAction::ObjectValue,
289                    ObjectState::CommaOrEnd => NextAction::ObjectComma,
290                },
291                ContextState::Array(state) => match state {
292                    ArrayState::ValueOrEnd => NextAction::ArrayValue,
293                    ArrayState::CommaOrEnd => NextAction::ArrayComma,
294                },
295            }
296        } else if self.root_complete {
297            NextAction::RootFinished
298        } else {
299            NextAction::RootValue
300        }
301    }
302
303    fn produce_event(&mut self) -> Result<ParseEvent<'de>, JsonError> {
304        loop {
305            match self.determine_action() {
306                NextAction::ObjectKey => {
307                    let token = self.consume_token()?;
308                    match token.token {
309                        AdapterToken::ObjectEnd => {
310                            self.stack.pop();
311                            self.finish_value_in_parent();
312                            return Ok(ParseEvent::StructEnd);
313                        }
314                        AdapterToken::String(name) => {
315                            self.expect_colon()?;
316                            if let Some(ContextState::Object(state)) = self.stack.last_mut() {
317                                *state = ObjectState::Value;
318                            }
319                            return Ok(ParseEvent::FieldKey(FieldKey::new(
320                                name,
321                                FieldLocationHint::KeyValue,
322                            )));
323                        }
324                        AdapterToken::Eof => {
325                            return Err(JsonError::new(
326                                JsonErrorKind::UnexpectedEof {
327                                    expected: "field name or '}'",
328                                },
329                                token.span,
330                            ));
331                        }
332                        _ => return Err(self.unexpected(&token, "field name or '}'")),
333                    }
334                }
335                NextAction::ObjectValue => {
336                    return self.parse_value_start_with_token(None);
337                }
338                NextAction::ObjectComma => {
339                    let token = self.consume_token()?;
340                    match token.token {
341                        AdapterToken::Comma => {
342                            if let Some(ContextState::Object(state)) = self.stack.last_mut() {
343                                *state = ObjectState::KeyOrEnd;
344                            }
345                            continue;
346                        }
347                        AdapterToken::ObjectEnd => {
348                            self.stack.pop();
349                            self.finish_value_in_parent();
350                            return Ok(ParseEvent::StructEnd);
351                        }
352                        AdapterToken::Eof => {
353                            return Err(JsonError::new(
354                                JsonErrorKind::UnexpectedEof {
355                                    expected: "',' or '}'",
356                                },
357                                token.span,
358                            ));
359                        }
360                        _ => return Err(self.unexpected(&token, "',' or '}'")),
361                    }
362                }
363                NextAction::ArrayValue => {
364                    let token = self.consume_token()?;
365                    match token.token {
366                        AdapterToken::ArrayEnd => {
367                            self.stack.pop();
368                            self.finish_value_in_parent();
369                            return Ok(ParseEvent::SequenceEnd);
370                        }
371                        AdapterToken::Eof => {
372                            return Err(JsonError::new(
373                                JsonErrorKind::UnexpectedEof {
374                                    expected: "value or ']'",
375                                },
376                                token.span,
377                            ));
378                        }
379                        AdapterToken::Comma | AdapterToken::Colon => {
380                            return Err(self.unexpected(&token, "value or ']'"));
381                        }
382                        _ => {
383                            return self.parse_value_start_with_token(Some(token));
384                        }
385                    }
386                }
387                NextAction::ArrayComma => {
388                    let token = self.consume_token()?;
389                    match token.token {
390                        AdapterToken::Comma => {
391                            if let Some(ContextState::Array(state)) = self.stack.last_mut() {
392                                *state = ArrayState::ValueOrEnd;
393                            }
394                            continue;
395                        }
396                        AdapterToken::ArrayEnd => {
397                            self.stack.pop();
398                            self.finish_value_in_parent();
399                            return Ok(ParseEvent::SequenceEnd);
400                        }
401                        AdapterToken::Eof => {
402                            return Err(JsonError::new(
403                                JsonErrorKind::UnexpectedEof {
404                                    expected: "',' or ']'",
405                                },
406                                token.span,
407                            ));
408                        }
409                        _ => return Err(self.unexpected(&token, "',' or ']'")),
410                    }
411                }
412                NextAction::RootValue => {
413                    return self.parse_value_start_with_token(None);
414                }
415                NextAction::RootFinished => {
416                    return Err(JsonError::without_span(JsonErrorKind::UnexpectedToken {
417                        got: "end of input".into(),
418                        expected: "no additional JSON values",
419                    }));
420                }
421            }
422        }
423    }
424
425    fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, JsonError> {
426        let remaining = self.input.get(self.current_offset..).unwrap_or_default();
427        if remaining.is_empty() {
428            return Ok(Vec::new());
429        }
430
431        let mut adapter = SliceAdapter::<true>::new(remaining);
432
433        // If we've peeked a StructStart, we've already consumed the '{' so skip the check.
434        // Otherwise, expect ObjectStart as the first token.
435        let already_inside_object = matches!(self.event_peek, Some(ParseEvent::StructStart(_)));
436
437        if !already_inside_object {
438            let first = adapter.next_token().map_err(JsonError::from)?;
439            if !matches!(first.token, AdapterToken::ObjectStart) {
440                return Ok(Vec::new());
441            }
442        }
443
444        let mut evidence = Vec::new();
445        loop {
446            let token = adapter.next_token().map_err(JsonError::from)?;
447            match token.token {
448                AdapterToken::ObjectEnd => break,
449                AdapterToken::String(name) => {
450                    let colon = adapter.next_token().map_err(JsonError::from)?;
451                    if !matches!(colon.token, AdapterToken::Colon) {
452                        return Err(JsonError::new(
453                            JsonErrorKind::UnexpectedToken {
454                                got: format!("{:?}", colon.token),
455                                expected: "':'",
456                            },
457                            colon.span,
458                        ));
459                    }
460
461                    // Capture scalar values, skip complex types (objects/arrays)
462                    let value_token = adapter.next_token().map_err(JsonError::from)?;
463                    let scalar_value = match value_token.token {
464                        AdapterToken::String(s) => Some(ScalarValue::Str(s)),
465                        AdapterToken::True => Some(ScalarValue::Bool(true)),
466                        AdapterToken::False => Some(ScalarValue::Bool(false)),
467                        AdapterToken::Null => Some(ScalarValue::Null),
468                        AdapterToken::I64(n) => Some(ScalarValue::I64(n)),
469                        AdapterToken::U64(n) => Some(ScalarValue::U64(n)),
470                        AdapterToken::I128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
471                        AdapterToken::U128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
472                        AdapterToken::F64(n) => Some(ScalarValue::F64(n)),
473                        AdapterToken::ObjectStart => {
474                            // Skip the complex object
475                            self.skip_container_in_adapter(&mut adapter, DelimKind::Object)?;
476                            None
477                        }
478                        AdapterToken::ArrayStart => {
479                            // Skip the complex array
480                            self.skip_container_in_adapter(&mut adapter, DelimKind::Array)?;
481                            None
482                        }
483                        _ => None,
484                    };
485
486                    if let Some(sv) = scalar_value {
487                        evidence.push(FieldEvidence::with_scalar_value(
488                            name,
489                            FieldLocationHint::KeyValue,
490                            None,
491                            sv,
492                            None, // No namespace for JSON
493                        ));
494                    } else {
495                        evidence.push(FieldEvidence::new(
496                            name,
497                            FieldLocationHint::KeyValue,
498                            None,
499                            None, // No namespace for JSON
500                        ));
501                    }
502
503                    let sep = adapter.next_token().map_err(JsonError::from)?;
504                    match sep.token {
505                        AdapterToken::Comma => continue,
506                        AdapterToken::ObjectEnd => break,
507                        AdapterToken::Eof => {
508                            return Err(JsonError::new(
509                                JsonErrorKind::UnexpectedEof {
510                                    expected: "',' or '}'",
511                                },
512                                sep.span,
513                            ));
514                        }
515                        _ => {
516                            return Err(JsonError::new(
517                                JsonErrorKind::UnexpectedToken {
518                                    got: format!("{:?}", sep.token),
519                                    expected: "',' or '}'",
520                                },
521                                sep.span,
522                            ));
523                        }
524                    }
525                }
526                AdapterToken::Eof => {
527                    return Err(JsonError::new(
528                        JsonErrorKind::UnexpectedEof {
529                            expected: "field name or '}'",
530                        },
531                        token.span,
532                    ));
533                }
534                _ => {
535                    return Err(JsonError::new(
536                        JsonErrorKind::UnexpectedToken {
537                            got: format!("{:?}", token.token),
538                            expected: "field name or '}'",
539                        },
540                        token.span,
541                    ));
542                }
543            }
544        }
545
546        Ok(evidence)
547    }
548}
549
550impl<'de> FormatParser<'de> for JsonParser<'de> {
551    type Error = JsonError;
552    type Probe<'a>
553        = JsonProbe<'de>
554    where
555        Self: 'a;
556
557    fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
558        Some(crate::RawJson::SHAPE)
559    }
560
561    fn next_event(&mut self) -> Result<ParseEvent<'de>, Self::Error> {
562        if let Some(event) = self.event_peek.take() {
563            return Ok(event);
564        }
565        self.produce_event()
566    }
567
568    fn peek_event(&mut self) -> Result<ParseEvent<'de>, Self::Error> {
569        if let Some(event) = self.event_peek.clone() {
570            return Ok(event);
571        }
572        let event = self.produce_event()?;
573        self.event_peek = Some(event.clone());
574        Ok(event)
575    }
576
577    fn skip_value(&mut self) -> Result<(), Self::Error> {
578        debug_assert!(
579            self.event_peek.is_none(),
580            "skip_value called while an event is buffered"
581        );
582        self.consume_value_tokens()?;
583        self.finish_value_in_parent();
584        Ok(())
585    }
586
587    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
588        let evidence = self.build_probe()?;
589        Ok(JsonProbe { evidence, idx: 0 })
590    }
591
592    fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
593        debug_assert!(
594            self.event_peek.is_none(),
595            "capture_raw called while an event is buffered"
596        );
597
598        // Get the first token to find the actual start offset (excludes whitespace)
599        let first = self.consume_token()?;
600        let start_offset = first.span.offset;
601
602        // Skip the rest of the value if it's a container
603        match first.token {
604            AdapterToken::ObjectStart => self.skip_container(DelimKind::Object)?,
605            AdapterToken::ArrayStart => self.skip_container(DelimKind::Array)?,
606            AdapterToken::ObjectEnd
607            | AdapterToken::ArrayEnd
608            | AdapterToken::Comma
609            | AdapterToken::Colon => return Err(self.unexpected(&first, "value")),
610            AdapterToken::Eof => {
611                return Err(JsonError::new(
612                    JsonErrorKind::UnexpectedEof { expected: "value" },
613                    first.span,
614                ));
615            }
616            _ => {
617                // Simple value - already consumed
618            }
619        }
620
621        // Get end position
622        let end_offset = self.current_offset;
623
624        // Extract the raw slice and convert to str
625        let raw_bytes = &self.input[start_offset..end_offset];
626        let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
627            JsonError::without_span(JsonErrorKind::InvalidValue {
628                message: alloc::format!("invalid UTF-8 in raw JSON: {}", e),
629            })
630        })?;
631
632        self.finish_value_in_parent();
633        Ok(Some(raw_str))
634    }
635}
636
637pub struct JsonProbe<'de> {
638    evidence: Vec<FieldEvidence<'de>>,
639    idx: usize,
640}
641
642impl<'de> ProbeStream<'de> for JsonProbe<'de> {
643    type Error = JsonError;
644
645    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
646        if self.idx >= self.evidence.len() {
647            Ok(None)
648        } else {
649            let ev = self.evidence[self.idx].clone();
650            self.idx += 1;
651            Ok(Some(ev))
652        }
653    }
654}