1extern crate alloc;
2
3use alloc::{borrow::Cow, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
8 ProbeStream, ScalarValue,
9};
10
11use crate::adapter::{SliceAdapter, SpannedAdapterToken, Token as AdapterToken};
12pub use crate::error::JsonError;
13use crate::error::JsonErrorKind;
14
15pub struct JsonParser<'de> {
17 input: &'de [u8],
18 adapter: SliceAdapter<'de, true>,
19 stack: Vec<ContextState>,
20 event_peek: Option<ParseEvent<'de>>,
22 peek_start_offset: Option<usize>,
25 root_started: bool,
27 root_complete: bool,
29 current_offset: usize,
31 last_token_start: usize,
34}
35
36#[derive(Debug)]
37enum ContextState {
38 Object(ObjectState),
39 Array(ArrayState),
40}
41
42#[derive(Debug)]
43enum ObjectState {
44 KeyOrEnd,
45 Value,
46 CommaOrEnd,
47}
48
49#[derive(Debug)]
50enum ArrayState {
51 ValueOrEnd,
52 CommaOrEnd,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56enum DelimKind {
57 Object,
58 Array,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62enum NextAction {
63 ObjectKey,
64 ObjectValue,
65 ObjectComma,
66 ArrayValue,
67 ArrayComma,
68 RootValue,
69 RootFinished,
70}
71
72impl<'de> JsonParser<'de> {
73 pub fn new(input: &'de [u8]) -> Self {
74 Self {
75 input,
76 adapter: SliceAdapter::new(input),
77 stack: Vec::new(),
78 event_peek: None,
79 peek_start_offset: None,
80 root_started: false,
81 root_complete: false,
82 current_offset: 0,
83 last_token_start: 0,
84 }
85 }
86
87 fn consume_token(&mut self) -> Result<SpannedAdapterToken<'de>, JsonError> {
88 let token = self.adapter.next_token().map_err(JsonError::from)?;
89 self.last_token_start = token.span.offset;
90 self.current_offset = token.span.offset + token.span.len;
91 Ok(token)
92 }
93
94 fn expect_colon(&mut self) -> Result<(), JsonError> {
95 let token = self.consume_token()?;
96 if !matches!(token.token, AdapterToken::Colon) {
97 return Err(self.unexpected(&token, "':'"));
98 }
99 Ok(())
100 }
101
102 fn parse_value_start_with_token(
103 &mut self,
104 first: Option<SpannedAdapterToken<'de>>,
105 ) -> Result<ParseEvent<'de>, JsonError> {
106 let token = match first {
107 Some(tok) => tok,
108 None => self.consume_token()?,
109 };
110
111 self.root_started = true;
112
113 match token.token {
114 AdapterToken::ObjectStart => {
115 self.stack.push(ContextState::Object(ObjectState::KeyOrEnd));
116 Ok(ParseEvent::StructStart(ContainerKind::Object))
117 }
118 AdapterToken::ArrayStart => {
119 self.stack.push(ContextState::Array(ArrayState::ValueOrEnd));
120 Ok(ParseEvent::SequenceStart(ContainerKind::Array))
121 }
122 AdapterToken::String(s) => {
123 let event = ParseEvent::Scalar(ScalarValue::Str(s));
124 self.finish_value_in_parent();
125 Ok(event)
126 }
127 AdapterToken::True => {
128 self.finish_value_in_parent();
129 Ok(ParseEvent::Scalar(ScalarValue::Bool(true)))
130 }
131 AdapterToken::False => {
132 self.finish_value_in_parent();
133 Ok(ParseEvent::Scalar(ScalarValue::Bool(false)))
134 }
135 AdapterToken::Null => {
136 self.finish_value_in_parent();
137 Ok(ParseEvent::Scalar(ScalarValue::Null))
138 }
139 AdapterToken::U64(n) => {
140 self.finish_value_in_parent();
141 Ok(ParseEvent::Scalar(ScalarValue::U64(n)))
142 }
143 AdapterToken::I64(n) => {
144 self.finish_value_in_parent();
145 Ok(ParseEvent::Scalar(ScalarValue::I64(n)))
146 }
147 AdapterToken::U128(n) => {
148 self.finish_value_in_parent();
149 Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
150 n.to_string(),
151 ))))
152 }
153 AdapterToken::I128(n) => {
154 self.finish_value_in_parent();
155 Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
156 n.to_string(),
157 ))))
158 }
159 AdapterToken::F64(n) => {
160 self.finish_value_in_parent();
161 Ok(ParseEvent::Scalar(ScalarValue::F64(n)))
162 }
163 AdapterToken::ObjectEnd | AdapterToken::ArrayEnd => {
164 Err(self.unexpected(&token, "value"))
165 }
166 AdapterToken::Comma | AdapterToken::Colon => Err(self.unexpected(&token, "value")),
167 AdapterToken::Eof => Err(JsonError::new(
168 JsonErrorKind::UnexpectedEof { expected: "value" },
169 token.span,
170 )),
171 }
172 }
173
174 fn finish_value_in_parent(&mut self) {
175 if let Some(context) = self.stack.last_mut() {
176 match context {
177 ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
178 ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
179 }
180 } else if self.root_started {
181 self.root_complete = true;
182 }
183 }
184
185 fn unexpected(&self, token: &SpannedAdapterToken<'de>, expected: &'static str) -> JsonError {
186 JsonError::new(
187 JsonErrorKind::UnexpectedToken {
188 got: format!("{:?}", token.token),
189 expected,
190 },
191 token.span,
192 )
193 }
194
195 fn consume_value_tokens(&mut self) -> Result<(), JsonError> {
196 let span = self.adapter.skip().map_err(JsonError::from)?;
197 self.current_offset = span.offset + span.len;
198 Ok(())
199 }
200
201 fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), JsonError> {
202 let mut stack = vec![start_kind];
203 while let Some(current) = stack.last().copied() {
204 let token = self.consume_token()?;
205 match token.token {
206 AdapterToken::ObjectStart => stack.push(DelimKind::Object),
207 AdapterToken::ArrayStart => stack.push(DelimKind::Array),
208 AdapterToken::ObjectEnd => {
209 if current != DelimKind::Object {
210 return Err(self.unexpected(&token, "'}'"));
211 }
212 stack.pop();
213 if stack.is_empty() {
214 break;
215 }
216 }
217 AdapterToken::ArrayEnd => {
218 if current != DelimKind::Array {
219 return Err(self.unexpected(&token, "']'"));
220 }
221 stack.pop();
222 if stack.is_empty() {
223 break;
224 }
225 }
226 AdapterToken::Eof => {
227 return Err(JsonError::new(
228 JsonErrorKind::UnexpectedEof { expected: "value" },
229 token.span,
230 ));
231 }
232 _ => {}
233 }
234 }
235 Ok(())
236 }
237
238 fn skip_container_in_adapter(
240 &self,
241 adapter: &mut SliceAdapter<'de, true>,
242 start_kind: DelimKind,
243 ) -> Result<(), JsonError> {
244 let mut stack = vec![start_kind];
245 while let Some(current) = stack.last().copied() {
246 let token = adapter.next_token().map_err(JsonError::from)?;
247 match token.token {
248 AdapterToken::ObjectStart => stack.push(DelimKind::Object),
249 AdapterToken::ArrayStart => stack.push(DelimKind::Array),
250 AdapterToken::ObjectEnd => {
251 if current != DelimKind::Object {
252 return Err(JsonError::new(
253 JsonErrorKind::UnexpectedToken {
254 got: format!("{:?}", token.token),
255 expected: "'}'",
256 },
257 token.span,
258 ));
259 }
260 stack.pop();
261 if stack.is_empty() {
262 break;
263 }
264 }
265 AdapterToken::ArrayEnd => {
266 if current != DelimKind::Array {
267 return Err(JsonError::new(
268 JsonErrorKind::UnexpectedToken {
269 got: format!("{:?}", token.token),
270 expected: "']'",
271 },
272 token.span,
273 ));
274 }
275 stack.pop();
276 if stack.is_empty() {
277 break;
278 }
279 }
280 AdapterToken::Eof => {
281 return Err(JsonError::new(
282 JsonErrorKind::UnexpectedEof { expected: "value" },
283 token.span,
284 ));
285 }
286 _ => {}
287 }
288 }
289 Ok(())
290 }
291
292 fn determine_action(&self) -> NextAction {
293 if let Some(context) = self.stack.last() {
294 match context {
295 ContextState::Object(state) => match state {
296 ObjectState::KeyOrEnd => NextAction::ObjectKey,
297 ObjectState::Value => NextAction::ObjectValue,
298 ObjectState::CommaOrEnd => NextAction::ObjectComma,
299 },
300 ContextState::Array(state) => match state {
301 ArrayState::ValueOrEnd => NextAction::ArrayValue,
302 ArrayState::CommaOrEnd => NextAction::ArrayComma,
303 },
304 }
305 } else if self.root_complete {
306 NextAction::RootFinished
307 } else {
308 NextAction::RootValue
309 }
310 }
311
312 fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, JsonError> {
313 loop {
314 match self.determine_action() {
315 NextAction::ObjectKey => {
316 let token = self.consume_token()?;
317 match token.token {
318 AdapterToken::ObjectEnd => {
319 self.stack.pop();
320 self.finish_value_in_parent();
321 return Ok(Some(ParseEvent::StructEnd));
322 }
323 AdapterToken::String(name) => {
324 self.expect_colon()?;
325 if let Some(ContextState::Object(state)) = self.stack.last_mut() {
326 *state = ObjectState::Value;
327 }
328 return Ok(Some(ParseEvent::FieldKey(FieldKey::new(
329 name,
330 FieldLocationHint::KeyValue,
331 ))));
332 }
333 AdapterToken::Eof => {
334 return Err(JsonError::new(
335 JsonErrorKind::UnexpectedEof {
336 expected: "field name or '}'",
337 },
338 token.span,
339 ));
340 }
341 _ => return Err(self.unexpected(&token, "field name or '}'")),
342 }
343 }
344 NextAction::ObjectValue => {
345 return self.parse_value_start_with_token(None).map(Some);
346 }
347 NextAction::ObjectComma => {
348 let token = self.consume_token()?;
349 match token.token {
350 AdapterToken::Comma => {
351 if let Some(ContextState::Object(state)) = self.stack.last_mut() {
352 *state = ObjectState::KeyOrEnd;
353 }
354 continue;
355 }
356 AdapterToken::ObjectEnd => {
357 self.stack.pop();
358 self.finish_value_in_parent();
359 return Ok(Some(ParseEvent::StructEnd));
360 }
361 AdapterToken::Eof => {
362 return Err(JsonError::new(
363 JsonErrorKind::UnexpectedEof {
364 expected: "',' or '}'",
365 },
366 token.span,
367 ));
368 }
369 _ => return Err(self.unexpected(&token, "',' or '}'")),
370 }
371 }
372 NextAction::ArrayValue => {
373 let token = self.consume_token()?;
374 match token.token {
375 AdapterToken::ArrayEnd => {
376 self.stack.pop();
377 self.finish_value_in_parent();
378 return Ok(Some(ParseEvent::SequenceEnd));
379 }
380 AdapterToken::Eof => {
381 return Err(JsonError::new(
382 JsonErrorKind::UnexpectedEof {
383 expected: "value or ']'",
384 },
385 token.span,
386 ));
387 }
388 AdapterToken::Comma | AdapterToken::Colon => {
389 return Err(self.unexpected(&token, "value or ']'"));
390 }
391 _ => {
392 return self.parse_value_start_with_token(Some(token)).map(Some);
393 }
394 }
395 }
396 NextAction::ArrayComma => {
397 let token = self.consume_token()?;
398 match token.token {
399 AdapterToken::Comma => {
400 if let Some(ContextState::Array(state)) = self.stack.last_mut() {
401 *state = ArrayState::ValueOrEnd;
402 }
403 continue;
404 }
405 AdapterToken::ArrayEnd => {
406 self.stack.pop();
407 self.finish_value_in_parent();
408 return Ok(Some(ParseEvent::SequenceEnd));
409 }
410 AdapterToken::Eof => {
411 return Err(JsonError::new(
412 JsonErrorKind::UnexpectedEof {
413 expected: "',' or ']'",
414 },
415 token.span,
416 ));
417 }
418 _ => return Err(self.unexpected(&token, "',' or ']'")),
419 }
420 }
421 NextAction::RootValue => {
422 return self.parse_value_start_with_token(None).map(Some);
423 }
424 NextAction::RootFinished => {
425 return Ok(None);
426 }
427 }
428 }
429 }
430
431 fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, JsonError> {
432 let remaining = self.input.get(self.current_offset..).unwrap_or_default();
433 if remaining.is_empty() {
434 return Ok(Vec::new());
435 }
436
437 let mut adapter = SliceAdapter::<true>::new(remaining);
438
439 let already_inside_object = matches!(self.event_peek, Some(ParseEvent::StructStart(_)));
442
443 if !already_inside_object {
444 let first = adapter.next_token().map_err(JsonError::from)?;
445 if !matches!(first.token, AdapterToken::ObjectStart) {
446 return Ok(Vec::new());
447 }
448 }
449
450 let mut evidence = Vec::new();
451 loop {
452 let token = adapter.next_token().map_err(JsonError::from)?;
453 match token.token {
454 AdapterToken::ObjectEnd => break,
455 AdapterToken::String(name) => {
456 let colon = adapter.next_token().map_err(JsonError::from)?;
457 if !matches!(colon.token, AdapterToken::Colon) {
458 return Err(JsonError::new(
459 JsonErrorKind::UnexpectedToken {
460 got: format!("{:?}", colon.token),
461 expected: "':'",
462 },
463 colon.span,
464 ));
465 }
466
467 let value_token = adapter.next_token().map_err(JsonError::from)?;
469 let scalar_value = match value_token.token {
470 AdapterToken::String(s) => Some(ScalarValue::Str(s)),
471 AdapterToken::True => Some(ScalarValue::Bool(true)),
472 AdapterToken::False => Some(ScalarValue::Bool(false)),
473 AdapterToken::Null => Some(ScalarValue::Null),
474 AdapterToken::I64(n) => Some(ScalarValue::I64(n)),
475 AdapterToken::U64(n) => Some(ScalarValue::U64(n)),
476 AdapterToken::I128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
477 AdapterToken::U128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
478 AdapterToken::F64(n) => Some(ScalarValue::F64(n)),
479 AdapterToken::ObjectStart => {
480 self.skip_container_in_adapter(&mut adapter, DelimKind::Object)?;
482 None
483 }
484 AdapterToken::ArrayStart => {
485 self.skip_container_in_adapter(&mut adapter, DelimKind::Array)?;
487 None
488 }
489 _ => None,
490 };
491
492 if let Some(sv) = scalar_value {
493 evidence.push(FieldEvidence::with_scalar_value(
494 name,
495 FieldLocationHint::KeyValue,
496 None,
497 sv,
498 ));
499 } else {
500 evidence.push(FieldEvidence::new(name, FieldLocationHint::KeyValue, None));
501 }
502
503 let sep = adapter.next_token().map_err(JsonError::from)?;
504 match sep.token {
505 AdapterToken::Comma => continue,
506 AdapterToken::ObjectEnd => break,
507 AdapterToken::Eof => {
508 return Err(JsonError::new(
509 JsonErrorKind::UnexpectedEof {
510 expected: "',' or '}'",
511 },
512 sep.span,
513 ));
514 }
515 _ => {
516 return Err(JsonError::new(
517 JsonErrorKind::UnexpectedToken {
518 got: format!("{:?}", sep.token),
519 expected: "',' or '}'",
520 },
521 sep.span,
522 ));
523 }
524 }
525 }
526 AdapterToken::Eof => {
527 return Err(JsonError::new(
528 JsonErrorKind::UnexpectedEof {
529 expected: "field name or '}'",
530 },
531 token.span,
532 ));
533 }
534 _ => {
535 return Err(JsonError::new(
536 JsonErrorKind::UnexpectedToken {
537 got: format!("{:?}", token.token),
538 expected: "field name or '}'",
539 },
540 token.span,
541 ));
542 }
543 }
544 }
545
546 Ok(evidence)
547 }
548}
549
550impl<'de> FormatParser<'de> for JsonParser<'de> {
551 type Error = JsonError;
552 type Probe<'a>
553 = JsonProbe<'de>
554 where
555 Self: 'a;
556
557 fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
558 Some(crate::RawJson::SHAPE)
559 }
560
561 fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
562 if let Some(event) = self.event_peek.take() {
563 self.peek_start_offset = None;
564 return Ok(Some(event));
565 }
566 self.produce_event()
567 }
568
569 fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
570 if let Some(event) = self.event_peek.clone() {
571 return Ok(Some(event));
572 }
573 let event = self.produce_event()?;
574 if let Some(ref e) = event {
575 self.event_peek = Some(e.clone());
576 self.peek_start_offset = Some(self.last_token_start);
580 }
581 Ok(event)
582 }
583
584 fn skip_value(&mut self) -> Result<(), Self::Error> {
585 if let Some(event) = self.event_peek.take() {
587 self.peek_start_offset = None;
588
589 match event {
593 ParseEvent::StructStart(_) => {
594 let res = self.skip_container(DelimKind::Object);
595 self.stack.pop();
597 res?;
598 self.finish_value_in_parent();
600 }
601 ParseEvent::SequenceStart(_) => {
602 let res = self.skip_container(DelimKind::Array);
603 self.stack.pop();
605 res?;
606 self.finish_value_in_parent();
608 }
609 _ => {
610 }
614 }
615 } else {
616 self.consume_value_tokens()?;
617 self.finish_value_in_parent();
618 }
619 Ok(())
620 }
621
622 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
623 let evidence = self.build_probe()?;
624 Ok(JsonProbe { evidence, idx: 0 })
625 }
626
627 fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
628 let start_offset = if let Some(event) = self.event_peek.take() {
631 let start = self
632 .peek_start_offset
633 .take()
634 .expect("peek_start_offset should be set when event_peek is set");
635
636 match event {
640 ParseEvent::StructStart(_) => {
641 let res = self.skip_container(DelimKind::Object);
642 self.stack.pop();
644 res?;
645 }
646 ParseEvent::SequenceStart(_) => {
647 let res = self.skip_container(DelimKind::Array);
648 self.stack.pop();
650 res?;
651 }
652 ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
653 return Err(JsonError::without_span(JsonErrorKind::InvalidValue {
655 message: "unexpected end event in capture_raw".to_string(),
656 }));
657 }
658 _ => {
659 }
661 }
662
663 start
664 } else {
665 let first = self.consume_token()?;
667 let start = first.span.offset;
668
669 match first.token {
671 AdapterToken::ObjectStart => self.skip_container(DelimKind::Object)?,
672 AdapterToken::ArrayStart => self.skip_container(DelimKind::Array)?,
673 AdapterToken::ObjectEnd
674 | AdapterToken::ArrayEnd
675 | AdapterToken::Comma
676 | AdapterToken::Colon => return Err(self.unexpected(&first, "value")),
677 AdapterToken::Eof => {
678 return Err(JsonError::new(
679 JsonErrorKind::UnexpectedEof { expected: "value" },
680 first.span,
681 ));
682 }
683 _ => {
684 }
686 }
687
688 start
689 };
690
691 let end_offset = self.current_offset;
693
694 let raw_bytes = &self.input[start_offset..end_offset];
696 let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
697 JsonError::without_span(JsonErrorKind::InvalidValue {
698 message: alloc::format!("invalid UTF-8 in raw JSON: {}", e),
699 })
700 })?;
701
702 self.finish_value_in_parent();
703 Ok(Some(raw_str))
704 }
705
706 fn format_namespace(&self) -> Option<&'static str> {
707 Some("json")
708 }
709}
710
711#[cfg(feature = "jit")]
716impl<'de> facet_format::FormatJitParser<'de> for JsonParser<'de> {
717 type FormatJit = crate::jit::JsonJitFormat;
718
719 fn jit_input(&self) -> &'de [u8] {
720 self.input
721 }
722
723 fn jit_pos(&self) -> Option<usize> {
724 if self.event_peek.is_some() {
731 return None;
732 }
733 if !self.stack.is_empty() {
734 return None;
735 }
736 if self.root_started && !self.root_complete {
737 return None;
739 }
740 Some(self.current_offset)
741 }
742
743 fn jit_set_pos(&mut self, pos: usize) {
744 self.current_offset = pos;
746
747 self.adapter = SliceAdapter::new_with_offset(self.input, pos);
751
752 self.event_peek = None;
754 self.peek_start_offset = None;
755
756 self.root_started = true;
762 self.root_complete = true;
763 debug_assert!(self.stack.is_empty());
765 }
766
767 fn jit_format(&self) -> Self::FormatJit {
768 crate::jit::JsonJitFormat
769 }
770
771 fn jit_error(&self, _input: &'de [u8], error_pos: usize, error_code: i32) -> Self::Error {
772 use crate::error::JsonErrorKind;
773 use facet_reflect::Span;
774
775 let kind = match error_code {
776 -100 => JsonErrorKind::UnexpectedEof { expected: "value" },
777 -101 => JsonErrorKind::UnexpectedToken {
778 got: "non-'['".into(),
779 expected: "'['",
780 },
781 -102 => JsonErrorKind::UnexpectedToken {
782 got: "non-boolean".into(),
783 expected: "'true' or 'false'",
784 },
785 -103 => JsonErrorKind::UnexpectedToken {
786 got: "unexpected token".into(),
787 expected: "',' or ']'",
788 },
789 _ => JsonErrorKind::InvalidValue {
790 message: alloc::format!("Tier-2 JIT error code: {}", error_code),
791 },
792 };
793
794 JsonError::new(
795 kind,
796 Span {
797 offset: error_pos,
798 len: 1,
799 },
800 )
801 }
802}
803
804pub struct JsonProbe<'de> {
805 evidence: Vec<FieldEvidence<'de>>,
806 idx: usize,
807}
808
809impl<'de> ProbeStream<'de> for JsonProbe<'de> {
810 type Error = JsonError;
811
812 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
813 if self.idx >= self.evidence.len() {
814 Ok(None)
815 } else {
816 let ev = self.evidence[self.idx].clone();
817 self.idx += 1;
818 Ok(Some(ev))
819 }
820 }
821}