1extern crate alloc;
2
3use alloc::{borrow::Cow, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
8 ProbeStream, ScalarValue,
9};
10
11use crate::adapter::{SliceAdapter, SpannedAdapterToken, Token as AdapterToken};
12pub use crate::error::JsonError;
13use crate::error::JsonErrorKind;
14
15pub struct JsonParser<'de> {
17 input: &'de [u8],
18 adapter: SliceAdapter<'de, true>,
19 stack: Vec<ContextState>,
20 event_peek: Option<ParseEvent<'de>>,
22 root_started: bool,
24 root_complete: bool,
26 current_offset: usize,
28}
29
30#[derive(Debug)]
31enum ContextState {
32 Object(ObjectState),
33 Array(ArrayState),
34}
35
36#[derive(Debug)]
37enum ObjectState {
38 KeyOrEnd,
39 Value,
40 CommaOrEnd,
41}
42
43#[derive(Debug)]
44enum ArrayState {
45 ValueOrEnd,
46 CommaOrEnd,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum DelimKind {
51 Object,
52 Array,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56enum NextAction {
57 ObjectKey,
58 ObjectValue,
59 ObjectComma,
60 ArrayValue,
61 ArrayComma,
62 RootValue,
63 RootFinished,
64}
65
66impl<'de> JsonParser<'de> {
67 pub fn new(input: &'de [u8]) -> Self {
68 Self {
69 input,
70 adapter: SliceAdapter::new(input),
71 stack: Vec::new(),
72 event_peek: None,
73 root_started: false,
74 root_complete: false,
75 current_offset: 0,
76 }
77 }
78
79 fn consume_token(&mut self) -> Result<SpannedAdapterToken<'de>, JsonError> {
80 let token = self.adapter.next_token().map_err(JsonError::from)?;
81 self.current_offset = token.span.offset + token.span.len;
82 Ok(token)
83 }
84
85 fn expect_colon(&mut self) -> Result<(), JsonError> {
86 let token = self.consume_token()?;
87 if !matches!(token.token, AdapterToken::Colon) {
88 return Err(self.unexpected(&token, "':'"));
89 }
90 Ok(())
91 }
92
93 fn parse_value_start_with_token(
94 &mut self,
95 first: Option<SpannedAdapterToken<'de>>,
96 ) -> Result<ParseEvent<'de>, JsonError> {
97 let token = match first {
98 Some(tok) => tok,
99 None => self.consume_token()?,
100 };
101
102 self.root_started = true;
103
104 match token.token {
105 AdapterToken::ObjectStart => {
106 self.stack.push(ContextState::Object(ObjectState::KeyOrEnd));
107 Ok(ParseEvent::StructStart(ContainerKind::Object))
108 }
109 AdapterToken::ArrayStart => {
110 self.stack.push(ContextState::Array(ArrayState::ValueOrEnd));
111 Ok(ParseEvent::SequenceStart(ContainerKind::Array))
112 }
113 AdapterToken::String(s) => {
114 let event = ParseEvent::Scalar(ScalarValue::Str(s));
115 self.finish_value_in_parent();
116 Ok(event)
117 }
118 AdapterToken::True => {
119 self.finish_value_in_parent();
120 Ok(ParseEvent::Scalar(ScalarValue::Bool(true)))
121 }
122 AdapterToken::False => {
123 self.finish_value_in_parent();
124 Ok(ParseEvent::Scalar(ScalarValue::Bool(false)))
125 }
126 AdapterToken::Null => {
127 self.finish_value_in_parent();
128 Ok(ParseEvent::Scalar(ScalarValue::Null))
129 }
130 AdapterToken::U64(n) => {
131 self.finish_value_in_parent();
132 Ok(ParseEvent::Scalar(ScalarValue::U64(n)))
133 }
134 AdapterToken::I64(n) => {
135 self.finish_value_in_parent();
136 Ok(ParseEvent::Scalar(ScalarValue::I64(n)))
137 }
138 AdapterToken::U128(n) => {
139 self.finish_value_in_parent();
140 Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
141 n.to_string(),
142 ))))
143 }
144 AdapterToken::I128(n) => {
145 self.finish_value_in_parent();
146 Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
147 n.to_string(),
148 ))))
149 }
150 AdapterToken::F64(n) => {
151 self.finish_value_in_parent();
152 Ok(ParseEvent::Scalar(ScalarValue::F64(n)))
153 }
154 AdapterToken::ObjectEnd | AdapterToken::ArrayEnd => {
155 Err(self.unexpected(&token, "value"))
156 }
157 AdapterToken::Comma | AdapterToken::Colon => Err(self.unexpected(&token, "value")),
158 AdapterToken::Eof => Err(JsonError::new(
159 JsonErrorKind::UnexpectedEof { expected: "value" },
160 token.span,
161 )),
162 }
163 }
164
165 fn finish_value_in_parent(&mut self) {
166 if let Some(context) = self.stack.last_mut() {
167 match context {
168 ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
169 ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
170 }
171 } else if self.root_started {
172 self.root_complete = true;
173 }
174 }
175
176 fn unexpected(&self, token: &SpannedAdapterToken<'de>, expected: &'static str) -> JsonError {
177 JsonError::new(
178 JsonErrorKind::UnexpectedToken {
179 got: format!("{:?}", token.token),
180 expected,
181 },
182 token.span,
183 )
184 }
185
186 fn consume_value_tokens(&mut self) -> Result<(), JsonError> {
187 let span = self.adapter.skip().map_err(JsonError::from)?;
188 self.current_offset = span.offset + span.len;
189 Ok(())
190 }
191
192 fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), JsonError> {
193 let mut stack = vec![start_kind];
194 while let Some(current) = stack.last().copied() {
195 let token = self.consume_token()?;
196 match token.token {
197 AdapterToken::ObjectStart => stack.push(DelimKind::Object),
198 AdapterToken::ArrayStart => stack.push(DelimKind::Array),
199 AdapterToken::ObjectEnd => {
200 if current != DelimKind::Object {
201 return Err(self.unexpected(&token, "'}'"));
202 }
203 stack.pop();
204 if stack.is_empty() {
205 break;
206 }
207 }
208 AdapterToken::ArrayEnd => {
209 if current != DelimKind::Array {
210 return Err(self.unexpected(&token, "']'"));
211 }
212 stack.pop();
213 if stack.is_empty() {
214 break;
215 }
216 }
217 AdapterToken::Eof => {
218 return Err(JsonError::new(
219 JsonErrorKind::UnexpectedEof { expected: "value" },
220 token.span,
221 ));
222 }
223 _ => {}
224 }
225 }
226 Ok(())
227 }
228
229 fn skip_container_in_adapter(
231 &self,
232 adapter: &mut SliceAdapter<'de, true>,
233 start_kind: DelimKind,
234 ) -> Result<(), JsonError> {
235 let mut stack = vec![start_kind];
236 while let Some(current) = stack.last().copied() {
237 let token = adapter.next_token().map_err(JsonError::from)?;
238 match token.token {
239 AdapterToken::ObjectStart => stack.push(DelimKind::Object),
240 AdapterToken::ArrayStart => stack.push(DelimKind::Array),
241 AdapterToken::ObjectEnd => {
242 if current != DelimKind::Object {
243 return Err(JsonError::new(
244 JsonErrorKind::UnexpectedToken {
245 got: format!("{:?}", token.token),
246 expected: "'}'",
247 },
248 token.span,
249 ));
250 }
251 stack.pop();
252 if stack.is_empty() {
253 break;
254 }
255 }
256 AdapterToken::ArrayEnd => {
257 if current != DelimKind::Array {
258 return Err(JsonError::new(
259 JsonErrorKind::UnexpectedToken {
260 got: format!("{:?}", token.token),
261 expected: "']'",
262 },
263 token.span,
264 ));
265 }
266 stack.pop();
267 if stack.is_empty() {
268 break;
269 }
270 }
271 AdapterToken::Eof => {
272 return Err(JsonError::new(
273 JsonErrorKind::UnexpectedEof { expected: "value" },
274 token.span,
275 ));
276 }
277 _ => {}
278 }
279 }
280 Ok(())
281 }
282
283 fn determine_action(&self) -> NextAction {
284 if let Some(context) = self.stack.last() {
285 match context {
286 ContextState::Object(state) => match state {
287 ObjectState::KeyOrEnd => NextAction::ObjectKey,
288 ObjectState::Value => NextAction::ObjectValue,
289 ObjectState::CommaOrEnd => NextAction::ObjectComma,
290 },
291 ContextState::Array(state) => match state {
292 ArrayState::ValueOrEnd => NextAction::ArrayValue,
293 ArrayState::CommaOrEnd => NextAction::ArrayComma,
294 },
295 }
296 } else if self.root_complete {
297 NextAction::RootFinished
298 } else {
299 NextAction::RootValue
300 }
301 }
302
303 fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, JsonError> {
304 loop {
305 match self.determine_action() {
306 NextAction::ObjectKey => {
307 let token = self.consume_token()?;
308 match token.token {
309 AdapterToken::ObjectEnd => {
310 self.stack.pop();
311 self.finish_value_in_parent();
312 return Ok(Some(ParseEvent::StructEnd));
313 }
314 AdapterToken::String(name) => {
315 self.expect_colon()?;
316 if let Some(ContextState::Object(state)) = self.stack.last_mut() {
317 *state = ObjectState::Value;
318 }
319 return Ok(Some(ParseEvent::FieldKey(FieldKey::new(
320 name,
321 FieldLocationHint::KeyValue,
322 ))));
323 }
324 AdapterToken::Eof => {
325 return Err(JsonError::new(
326 JsonErrorKind::UnexpectedEof {
327 expected: "field name or '}'",
328 },
329 token.span,
330 ));
331 }
332 _ => return Err(self.unexpected(&token, "field name or '}'")),
333 }
334 }
335 NextAction::ObjectValue => {
336 return self.parse_value_start_with_token(None).map(Some);
337 }
338 NextAction::ObjectComma => {
339 let token = self.consume_token()?;
340 match token.token {
341 AdapterToken::Comma => {
342 if let Some(ContextState::Object(state)) = self.stack.last_mut() {
343 *state = ObjectState::KeyOrEnd;
344 }
345 continue;
346 }
347 AdapterToken::ObjectEnd => {
348 self.stack.pop();
349 self.finish_value_in_parent();
350 return Ok(Some(ParseEvent::StructEnd));
351 }
352 AdapterToken::Eof => {
353 return Err(JsonError::new(
354 JsonErrorKind::UnexpectedEof {
355 expected: "',' or '}'",
356 },
357 token.span,
358 ));
359 }
360 _ => return Err(self.unexpected(&token, "',' or '}'")),
361 }
362 }
363 NextAction::ArrayValue => {
364 let token = self.consume_token()?;
365 match token.token {
366 AdapterToken::ArrayEnd => {
367 self.stack.pop();
368 self.finish_value_in_parent();
369 return Ok(Some(ParseEvent::SequenceEnd));
370 }
371 AdapterToken::Eof => {
372 return Err(JsonError::new(
373 JsonErrorKind::UnexpectedEof {
374 expected: "value or ']'",
375 },
376 token.span,
377 ));
378 }
379 AdapterToken::Comma | AdapterToken::Colon => {
380 return Err(self.unexpected(&token, "value or ']'"));
381 }
382 _ => {
383 return self.parse_value_start_with_token(Some(token)).map(Some);
384 }
385 }
386 }
387 NextAction::ArrayComma => {
388 let token = self.consume_token()?;
389 match token.token {
390 AdapterToken::Comma => {
391 if let Some(ContextState::Array(state)) = self.stack.last_mut() {
392 *state = ArrayState::ValueOrEnd;
393 }
394 continue;
395 }
396 AdapterToken::ArrayEnd => {
397 self.stack.pop();
398 self.finish_value_in_parent();
399 return Ok(Some(ParseEvent::SequenceEnd));
400 }
401 AdapterToken::Eof => {
402 return Err(JsonError::new(
403 JsonErrorKind::UnexpectedEof {
404 expected: "',' or ']'",
405 },
406 token.span,
407 ));
408 }
409 _ => return Err(self.unexpected(&token, "',' or ']'")),
410 }
411 }
412 NextAction::RootValue => {
413 return self.parse_value_start_with_token(None).map(Some);
414 }
415 NextAction::RootFinished => {
416 return Ok(None);
417 }
418 }
419 }
420 }
421
422 fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, JsonError> {
423 let remaining = self.input.get(self.current_offset..).unwrap_or_default();
424 if remaining.is_empty() {
425 return Ok(Vec::new());
426 }
427
428 let mut adapter = SliceAdapter::<true>::new(remaining);
429
430 let already_inside_object = matches!(self.event_peek, Some(ParseEvent::StructStart(_)));
433
434 if !already_inside_object {
435 let first = adapter.next_token().map_err(JsonError::from)?;
436 if !matches!(first.token, AdapterToken::ObjectStart) {
437 return Ok(Vec::new());
438 }
439 }
440
441 let mut evidence = Vec::new();
442 loop {
443 let token = adapter.next_token().map_err(JsonError::from)?;
444 match token.token {
445 AdapterToken::ObjectEnd => break,
446 AdapterToken::String(name) => {
447 let colon = adapter.next_token().map_err(JsonError::from)?;
448 if !matches!(colon.token, AdapterToken::Colon) {
449 return Err(JsonError::new(
450 JsonErrorKind::UnexpectedToken {
451 got: format!("{:?}", colon.token),
452 expected: "':'",
453 },
454 colon.span,
455 ));
456 }
457
458 let value_token = adapter.next_token().map_err(JsonError::from)?;
460 let scalar_value = match value_token.token {
461 AdapterToken::String(s) => Some(ScalarValue::Str(s)),
462 AdapterToken::True => Some(ScalarValue::Bool(true)),
463 AdapterToken::False => Some(ScalarValue::Bool(false)),
464 AdapterToken::Null => Some(ScalarValue::Null),
465 AdapterToken::I64(n) => Some(ScalarValue::I64(n)),
466 AdapterToken::U64(n) => Some(ScalarValue::U64(n)),
467 AdapterToken::I128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
468 AdapterToken::U128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
469 AdapterToken::F64(n) => Some(ScalarValue::F64(n)),
470 AdapterToken::ObjectStart => {
471 self.skip_container_in_adapter(&mut adapter, DelimKind::Object)?;
473 None
474 }
475 AdapterToken::ArrayStart => {
476 self.skip_container_in_adapter(&mut adapter, DelimKind::Array)?;
478 None
479 }
480 _ => None,
481 };
482
483 if let Some(sv) = scalar_value {
484 evidence.push(FieldEvidence::with_scalar_value(
485 name,
486 FieldLocationHint::KeyValue,
487 None,
488 sv,
489 None, ));
491 } else {
492 evidence.push(FieldEvidence::new(
493 name,
494 FieldLocationHint::KeyValue,
495 None,
496 None, ));
498 }
499
500 let sep = adapter.next_token().map_err(JsonError::from)?;
501 match sep.token {
502 AdapterToken::Comma => continue,
503 AdapterToken::ObjectEnd => break,
504 AdapterToken::Eof => {
505 return Err(JsonError::new(
506 JsonErrorKind::UnexpectedEof {
507 expected: "',' or '}'",
508 },
509 sep.span,
510 ));
511 }
512 _ => {
513 return Err(JsonError::new(
514 JsonErrorKind::UnexpectedToken {
515 got: format!("{:?}", sep.token),
516 expected: "',' or '}'",
517 },
518 sep.span,
519 ));
520 }
521 }
522 }
523 AdapterToken::Eof => {
524 return Err(JsonError::new(
525 JsonErrorKind::UnexpectedEof {
526 expected: "field name or '}'",
527 },
528 token.span,
529 ));
530 }
531 _ => {
532 return Err(JsonError::new(
533 JsonErrorKind::UnexpectedToken {
534 got: format!("{:?}", token.token),
535 expected: "field name or '}'",
536 },
537 token.span,
538 ));
539 }
540 }
541 }
542
543 Ok(evidence)
544 }
545}
546
547impl<'de> FormatParser<'de> for JsonParser<'de> {
548 type Error = JsonError;
549 type Probe<'a>
550 = JsonProbe<'de>
551 where
552 Self: 'a;
553
554 fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
555 Some(crate::RawJson::SHAPE)
556 }
557
558 fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
559 if let Some(event) = self.event_peek.take() {
560 return Ok(Some(event));
561 }
562 self.produce_event()
563 }
564
565 fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
566 if let Some(event) = self.event_peek.clone() {
567 return Ok(Some(event));
568 }
569 let event = self.produce_event()?;
570 if let Some(ref e) = event {
571 self.event_peek = Some(e.clone());
572 }
573 Ok(event)
574 }
575
576 fn skip_value(&mut self) -> Result<(), Self::Error> {
577 debug_assert!(
578 self.event_peek.is_none(),
579 "skip_value called while an event is buffered"
580 );
581 self.consume_value_tokens()?;
582 self.finish_value_in_parent();
583 Ok(())
584 }
585
586 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
587 let evidence = self.build_probe()?;
588 Ok(JsonProbe { evidence, idx: 0 })
589 }
590
591 fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
592 debug_assert!(
593 self.event_peek.is_none(),
594 "capture_raw called while an event is buffered"
595 );
596
597 let first = self.consume_token()?;
599 let start_offset = first.span.offset;
600
601 match first.token {
603 AdapterToken::ObjectStart => self.skip_container(DelimKind::Object)?,
604 AdapterToken::ArrayStart => self.skip_container(DelimKind::Array)?,
605 AdapterToken::ObjectEnd
606 | AdapterToken::ArrayEnd
607 | AdapterToken::Comma
608 | AdapterToken::Colon => return Err(self.unexpected(&first, "value")),
609 AdapterToken::Eof => {
610 return Err(JsonError::new(
611 JsonErrorKind::UnexpectedEof { expected: "value" },
612 first.span,
613 ));
614 }
615 _ => {
616 }
618 }
619
620 let end_offset = self.current_offset;
622
623 let raw_bytes = &self.input[start_offset..end_offset];
625 let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
626 JsonError::without_span(JsonErrorKind::InvalidValue {
627 message: alloc::format!("invalid UTF-8 in raw JSON: {}", e),
628 })
629 })?;
630
631 self.finish_value_in_parent();
632 Ok(Some(raw_str))
633 }
634}
635
636#[cfg(feature = "jit")]
641impl<'de> facet_format::FormatJitParser<'de> for JsonParser<'de> {
642 type FormatJit = crate::jit::JsonJitFormat;
643
644 fn jit_input(&self) -> &'de [u8] {
645 self.input
646 }
647
648 fn jit_pos(&self) -> Option<usize> {
649 if self.event_peek.is_some() {
656 return None;
657 }
658 if !self.stack.is_empty() {
659 return None;
660 }
661 if self.root_started && !self.root_complete {
662 return None;
664 }
665 Some(self.current_offset)
666 }
667
668 fn jit_set_pos(&mut self, pos: usize) {
669 self.current_offset = pos;
671
672 self.adapter = SliceAdapter::new_with_offset(self.input, pos);
676
677 self.event_peek = None;
679
680 self.root_started = true;
686 self.root_complete = true;
687 debug_assert!(self.stack.is_empty());
689 }
690
691 fn jit_format(&self) -> Self::FormatJit {
692 crate::jit::JsonJitFormat
693 }
694
695 fn jit_error(&self, _input: &'de [u8], error_pos: usize, error_code: i32) -> Self::Error {
696 use crate::error::JsonErrorKind;
697 use facet_reflect::Span;
698
699 let kind = match error_code {
700 -100 => JsonErrorKind::UnexpectedEof { expected: "value" },
701 -101 => JsonErrorKind::UnexpectedToken {
702 got: "non-'['".into(),
703 expected: "'['",
704 },
705 -102 => JsonErrorKind::UnexpectedToken {
706 got: "non-boolean".into(),
707 expected: "'true' or 'false'",
708 },
709 -103 => JsonErrorKind::UnexpectedToken {
710 got: "unexpected token".into(),
711 expected: "',' or ']'",
712 },
713 _ => JsonErrorKind::InvalidValue {
714 message: alloc::format!("Tier-2 JIT error code: {}", error_code),
715 },
716 };
717
718 JsonError::new(
719 kind,
720 Span {
721 offset: error_pos,
722 len: 1,
723 },
724 )
725 }
726}
727
728pub struct JsonProbe<'de> {
729 evidence: Vec<FieldEvidence<'de>>,
730 idx: usize,
731}
732
733impl<'de> ProbeStream<'de> for JsonProbe<'de> {
734 type Error = JsonError;
735
736 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
737 if self.idx >= self.evidence.len() {
738 Ok(None)
739 } else {
740 let ev = self.evidence[self.idx].clone();
741 self.idx += 1;
742 Ok(Some(ev))
743 }
744 }
745}