1extern crate alloc;
2
3use alloc::{borrow::Cow, vec::Vec};
4
5use facet_core::Facet as _;
6use facet_format::{
7 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
8 ProbeStream, ScalarValue,
9};
10
11use crate::adapter::{SliceAdapter, SpannedAdapterToken, Token as AdapterToken};
12pub use crate::error::JsonError;
13use crate::error::JsonErrorKind;
14
15pub struct JsonParser<'de> {
17 input: &'de [u8],
18 adapter: SliceAdapter<'de, true>,
19 stack: Vec<ContextState>,
20 event_peek: Option<ParseEvent<'de>>,
22 root_started: bool,
24 root_complete: bool,
26 current_offset: usize,
28}
29
30#[derive(Debug)]
31enum ContextState {
32 Object(ObjectState),
33 Array(ArrayState),
34}
35
36#[derive(Debug)]
37enum ObjectState {
38 KeyOrEnd,
39 Value,
40 CommaOrEnd,
41}
42
43#[derive(Debug)]
44enum ArrayState {
45 ValueOrEnd,
46 CommaOrEnd,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum DelimKind {
51 Object,
52 Array,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56enum NextAction {
57 ObjectKey,
58 ObjectValue,
59 ObjectComma,
60 ArrayValue,
61 ArrayComma,
62 RootValue,
63 RootFinished,
64}
65
66impl<'de> JsonParser<'de> {
67 pub fn new(input: &'de [u8]) -> Self {
68 Self {
69 input,
70 adapter: SliceAdapter::new(input),
71 stack: Vec::new(),
72 event_peek: None,
73 root_started: false,
74 root_complete: false,
75 current_offset: 0,
76 }
77 }
78
79 fn consume_token(&mut self) -> Result<SpannedAdapterToken<'de>, JsonError> {
80 let token = self.adapter.next_token().map_err(JsonError::from)?;
81 self.current_offset = token.span.offset + token.span.len;
82 Ok(token)
83 }
84
85 fn expect_colon(&mut self) -> Result<(), JsonError> {
86 let token = self.consume_token()?;
87 if !matches!(token.token, AdapterToken::Colon) {
88 return Err(self.unexpected(&token, "':'"));
89 }
90 Ok(())
91 }
92
93 fn parse_value_start_with_token(
94 &mut self,
95 first: Option<SpannedAdapterToken<'de>>,
96 ) -> Result<ParseEvent<'de>, JsonError> {
97 let token = match first {
98 Some(tok) => tok,
99 None => self.consume_token()?,
100 };
101
102 self.root_started = true;
103
104 match token.token {
105 AdapterToken::ObjectStart => {
106 self.stack.push(ContextState::Object(ObjectState::KeyOrEnd));
107 Ok(ParseEvent::StructStart(ContainerKind::Object))
108 }
109 AdapterToken::ArrayStart => {
110 self.stack.push(ContextState::Array(ArrayState::ValueOrEnd));
111 Ok(ParseEvent::SequenceStart(ContainerKind::Array))
112 }
113 AdapterToken::String(s) => {
114 let event = ParseEvent::Scalar(ScalarValue::Str(s));
115 self.finish_value_in_parent();
116 Ok(event)
117 }
118 AdapterToken::True => {
119 self.finish_value_in_parent();
120 Ok(ParseEvent::Scalar(ScalarValue::Bool(true)))
121 }
122 AdapterToken::False => {
123 self.finish_value_in_parent();
124 Ok(ParseEvent::Scalar(ScalarValue::Bool(false)))
125 }
126 AdapterToken::Null => {
127 self.finish_value_in_parent();
128 Ok(ParseEvent::Scalar(ScalarValue::Null))
129 }
130 AdapterToken::U64(n) => {
131 self.finish_value_in_parent();
132 Ok(ParseEvent::Scalar(ScalarValue::U64(n)))
133 }
134 AdapterToken::I64(n) => {
135 self.finish_value_in_parent();
136 Ok(ParseEvent::Scalar(ScalarValue::I64(n)))
137 }
138 AdapterToken::U128(n) => {
139 self.finish_value_in_parent();
140 Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
141 n.to_string(),
142 ))))
143 }
144 AdapterToken::I128(n) => {
145 self.finish_value_in_parent();
146 Ok(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
147 n.to_string(),
148 ))))
149 }
150 AdapterToken::F64(n) => {
151 self.finish_value_in_parent();
152 Ok(ParseEvent::Scalar(ScalarValue::F64(n)))
153 }
154 AdapterToken::ObjectEnd | AdapterToken::ArrayEnd => {
155 Err(self.unexpected(&token, "value"))
156 }
157 AdapterToken::Comma | AdapterToken::Colon => Err(self.unexpected(&token, "value")),
158 AdapterToken::Eof => Err(JsonError::new(
159 JsonErrorKind::UnexpectedEof { expected: "value" },
160 token.span,
161 )),
162 }
163 }
164
165 fn finish_value_in_parent(&mut self) {
166 if let Some(context) = self.stack.last_mut() {
167 match context {
168 ContextState::Object(state) => *state = ObjectState::CommaOrEnd,
169 ContextState::Array(state) => *state = ArrayState::CommaOrEnd,
170 }
171 } else if self.root_started {
172 self.root_complete = true;
173 }
174 }
175
176 fn unexpected(&self, token: &SpannedAdapterToken<'de>, expected: &'static str) -> JsonError {
177 JsonError::new(
178 JsonErrorKind::UnexpectedToken {
179 got: format!("{:?}", token.token),
180 expected,
181 },
182 token.span,
183 )
184 }
185
186 fn consume_value_tokens(&mut self) -> Result<(), JsonError> {
187 let span = self.adapter.skip().map_err(JsonError::from)?;
188 self.current_offset = span.offset + span.len;
189 Ok(())
190 }
191
192 fn skip_container(&mut self, start_kind: DelimKind) -> Result<(), JsonError> {
193 let mut stack = vec![start_kind];
194 while let Some(current) = stack.last().copied() {
195 let token = self.consume_token()?;
196 match token.token {
197 AdapterToken::ObjectStart => stack.push(DelimKind::Object),
198 AdapterToken::ArrayStart => stack.push(DelimKind::Array),
199 AdapterToken::ObjectEnd => {
200 if current != DelimKind::Object {
201 return Err(self.unexpected(&token, "'}'"));
202 }
203 stack.pop();
204 if stack.is_empty() {
205 break;
206 }
207 }
208 AdapterToken::ArrayEnd => {
209 if current != DelimKind::Array {
210 return Err(self.unexpected(&token, "']'"));
211 }
212 stack.pop();
213 if stack.is_empty() {
214 break;
215 }
216 }
217 AdapterToken::Eof => {
218 return Err(JsonError::new(
219 JsonErrorKind::UnexpectedEof { expected: "value" },
220 token.span,
221 ));
222 }
223 _ => {}
224 }
225 }
226 Ok(())
227 }
228
229 fn skip_container_in_adapter(
231 &self,
232 adapter: &mut SliceAdapter<'de, true>,
233 start_kind: DelimKind,
234 ) -> Result<(), JsonError> {
235 let mut stack = vec![start_kind];
236 while let Some(current) = stack.last().copied() {
237 let token = adapter.next_token().map_err(JsonError::from)?;
238 match token.token {
239 AdapterToken::ObjectStart => stack.push(DelimKind::Object),
240 AdapterToken::ArrayStart => stack.push(DelimKind::Array),
241 AdapterToken::ObjectEnd => {
242 if current != DelimKind::Object {
243 return Err(JsonError::new(
244 JsonErrorKind::UnexpectedToken {
245 got: format!("{:?}", token.token),
246 expected: "'}'",
247 },
248 token.span,
249 ));
250 }
251 stack.pop();
252 if stack.is_empty() {
253 break;
254 }
255 }
256 AdapterToken::ArrayEnd => {
257 if current != DelimKind::Array {
258 return Err(JsonError::new(
259 JsonErrorKind::UnexpectedToken {
260 got: format!("{:?}", token.token),
261 expected: "']'",
262 },
263 token.span,
264 ));
265 }
266 stack.pop();
267 if stack.is_empty() {
268 break;
269 }
270 }
271 AdapterToken::Eof => {
272 return Err(JsonError::new(
273 JsonErrorKind::UnexpectedEof { expected: "value" },
274 token.span,
275 ));
276 }
277 _ => {}
278 }
279 }
280 Ok(())
281 }
282
283 fn determine_action(&self) -> NextAction {
284 if let Some(context) = self.stack.last() {
285 match context {
286 ContextState::Object(state) => match state {
287 ObjectState::KeyOrEnd => NextAction::ObjectKey,
288 ObjectState::Value => NextAction::ObjectValue,
289 ObjectState::CommaOrEnd => NextAction::ObjectComma,
290 },
291 ContextState::Array(state) => match state {
292 ArrayState::ValueOrEnd => NextAction::ArrayValue,
293 ArrayState::CommaOrEnd => NextAction::ArrayComma,
294 },
295 }
296 } else if self.root_complete {
297 NextAction::RootFinished
298 } else {
299 NextAction::RootValue
300 }
301 }
302
303 fn produce_event(&mut self) -> Result<ParseEvent<'de>, JsonError> {
304 loop {
305 match self.determine_action() {
306 NextAction::ObjectKey => {
307 let token = self.consume_token()?;
308 match token.token {
309 AdapterToken::ObjectEnd => {
310 self.stack.pop();
311 self.finish_value_in_parent();
312 return Ok(ParseEvent::StructEnd);
313 }
314 AdapterToken::String(name) => {
315 self.expect_colon()?;
316 if let Some(ContextState::Object(state)) = self.stack.last_mut() {
317 *state = ObjectState::Value;
318 }
319 return Ok(ParseEvent::FieldKey(FieldKey::new(
320 name,
321 FieldLocationHint::KeyValue,
322 )));
323 }
324 AdapterToken::Eof => {
325 return Err(JsonError::new(
326 JsonErrorKind::UnexpectedEof {
327 expected: "field name or '}'",
328 },
329 token.span,
330 ));
331 }
332 _ => return Err(self.unexpected(&token, "field name or '}'")),
333 }
334 }
335 NextAction::ObjectValue => {
336 return self.parse_value_start_with_token(None);
337 }
338 NextAction::ObjectComma => {
339 let token = self.consume_token()?;
340 match token.token {
341 AdapterToken::Comma => {
342 if let Some(ContextState::Object(state)) = self.stack.last_mut() {
343 *state = ObjectState::KeyOrEnd;
344 }
345 continue;
346 }
347 AdapterToken::ObjectEnd => {
348 self.stack.pop();
349 self.finish_value_in_parent();
350 return Ok(ParseEvent::StructEnd);
351 }
352 AdapterToken::Eof => {
353 return Err(JsonError::new(
354 JsonErrorKind::UnexpectedEof {
355 expected: "',' or '}'",
356 },
357 token.span,
358 ));
359 }
360 _ => return Err(self.unexpected(&token, "',' or '}'")),
361 }
362 }
363 NextAction::ArrayValue => {
364 let token = self.consume_token()?;
365 match token.token {
366 AdapterToken::ArrayEnd => {
367 self.stack.pop();
368 self.finish_value_in_parent();
369 return Ok(ParseEvent::SequenceEnd);
370 }
371 AdapterToken::Eof => {
372 return Err(JsonError::new(
373 JsonErrorKind::UnexpectedEof {
374 expected: "value or ']'",
375 },
376 token.span,
377 ));
378 }
379 AdapterToken::Comma | AdapterToken::Colon => {
380 return Err(self.unexpected(&token, "value or ']'"));
381 }
382 _ => {
383 return self.parse_value_start_with_token(Some(token));
384 }
385 }
386 }
387 NextAction::ArrayComma => {
388 let token = self.consume_token()?;
389 match token.token {
390 AdapterToken::Comma => {
391 if let Some(ContextState::Array(state)) = self.stack.last_mut() {
392 *state = ArrayState::ValueOrEnd;
393 }
394 continue;
395 }
396 AdapterToken::ArrayEnd => {
397 self.stack.pop();
398 self.finish_value_in_parent();
399 return Ok(ParseEvent::SequenceEnd);
400 }
401 AdapterToken::Eof => {
402 return Err(JsonError::new(
403 JsonErrorKind::UnexpectedEof {
404 expected: "',' or ']'",
405 },
406 token.span,
407 ));
408 }
409 _ => return Err(self.unexpected(&token, "',' or ']'")),
410 }
411 }
412 NextAction::RootValue => {
413 return self.parse_value_start_with_token(None);
414 }
415 NextAction::RootFinished => {
416 return Err(JsonError::without_span(JsonErrorKind::UnexpectedToken {
417 got: "end of input".into(),
418 expected: "no additional JSON values",
419 }));
420 }
421 }
422 }
423 }
424
425 fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, JsonError> {
426 let remaining = self.input.get(self.current_offset..).unwrap_or_default();
427 if remaining.is_empty() {
428 return Ok(Vec::new());
429 }
430
431 let mut adapter = SliceAdapter::<true>::new(remaining);
432
433 let already_inside_object = matches!(self.event_peek, Some(ParseEvent::StructStart(_)));
436
437 if !already_inside_object {
438 let first = adapter.next_token().map_err(JsonError::from)?;
439 if !matches!(first.token, AdapterToken::ObjectStart) {
440 return Ok(Vec::new());
441 }
442 }
443
444 let mut evidence = Vec::new();
445 loop {
446 let token = adapter.next_token().map_err(JsonError::from)?;
447 match token.token {
448 AdapterToken::ObjectEnd => break,
449 AdapterToken::String(name) => {
450 let colon = adapter.next_token().map_err(JsonError::from)?;
451 if !matches!(colon.token, AdapterToken::Colon) {
452 return Err(JsonError::new(
453 JsonErrorKind::UnexpectedToken {
454 got: format!("{:?}", colon.token),
455 expected: "':'",
456 },
457 colon.span,
458 ));
459 }
460
461 let value_token = adapter.next_token().map_err(JsonError::from)?;
463 let scalar_value = match value_token.token {
464 AdapterToken::String(s) => Some(ScalarValue::Str(s)),
465 AdapterToken::True => Some(ScalarValue::Bool(true)),
466 AdapterToken::False => Some(ScalarValue::Bool(false)),
467 AdapterToken::Null => Some(ScalarValue::Null),
468 AdapterToken::I64(n) => Some(ScalarValue::I64(n)),
469 AdapterToken::U64(n) => Some(ScalarValue::U64(n)),
470 AdapterToken::I128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
471 AdapterToken::U128(n) => Some(ScalarValue::Str(Cow::Owned(n.to_string()))),
472 AdapterToken::F64(n) => Some(ScalarValue::F64(n)),
473 AdapterToken::ObjectStart => {
474 self.skip_container_in_adapter(&mut adapter, DelimKind::Object)?;
476 None
477 }
478 AdapterToken::ArrayStart => {
479 self.skip_container_in_adapter(&mut adapter, DelimKind::Array)?;
481 None
482 }
483 _ => None,
484 };
485
486 if let Some(sv) = scalar_value {
487 evidence.push(FieldEvidence::with_scalar_value(
488 name,
489 FieldLocationHint::KeyValue,
490 None,
491 sv,
492 None, ));
494 } else {
495 evidence.push(FieldEvidence::new(
496 name,
497 FieldLocationHint::KeyValue,
498 None,
499 None, ));
501 }
502
503 let sep = adapter.next_token().map_err(JsonError::from)?;
504 match sep.token {
505 AdapterToken::Comma => continue,
506 AdapterToken::ObjectEnd => break,
507 AdapterToken::Eof => {
508 return Err(JsonError::new(
509 JsonErrorKind::UnexpectedEof {
510 expected: "',' or '}'",
511 },
512 sep.span,
513 ));
514 }
515 _ => {
516 return Err(JsonError::new(
517 JsonErrorKind::UnexpectedToken {
518 got: format!("{:?}", sep.token),
519 expected: "',' or '}'",
520 },
521 sep.span,
522 ));
523 }
524 }
525 }
526 AdapterToken::Eof => {
527 return Err(JsonError::new(
528 JsonErrorKind::UnexpectedEof {
529 expected: "field name or '}'",
530 },
531 token.span,
532 ));
533 }
534 _ => {
535 return Err(JsonError::new(
536 JsonErrorKind::UnexpectedToken {
537 got: format!("{:?}", token.token),
538 expected: "field name or '}'",
539 },
540 token.span,
541 ));
542 }
543 }
544 }
545
546 Ok(evidence)
547 }
548}
549
550impl<'de> FormatParser<'de> for JsonParser<'de> {
551 type Error = JsonError;
552 type Probe<'a>
553 = JsonProbe<'de>
554 where
555 Self: 'a;
556
557 fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
558 Some(crate::RawJson::SHAPE)
559 }
560
561 fn next_event(&mut self) -> Result<ParseEvent<'de>, Self::Error> {
562 if let Some(event) = self.event_peek.take() {
563 return Ok(event);
564 }
565 self.produce_event()
566 }
567
568 fn peek_event(&mut self) -> Result<ParseEvent<'de>, Self::Error> {
569 if let Some(event) = self.event_peek.clone() {
570 return Ok(event);
571 }
572 let event = self.produce_event()?;
573 self.event_peek = Some(event.clone());
574 Ok(event)
575 }
576
577 fn skip_value(&mut self) -> Result<(), Self::Error> {
578 debug_assert!(
579 self.event_peek.is_none(),
580 "skip_value called while an event is buffered"
581 );
582 self.consume_value_tokens()?;
583 self.finish_value_in_parent();
584 Ok(())
585 }
586
587 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
588 let evidence = self.build_probe()?;
589 Ok(JsonProbe { evidence, idx: 0 })
590 }
591
592 fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
593 debug_assert!(
594 self.event_peek.is_none(),
595 "capture_raw called while an event is buffered"
596 );
597
598 let first = self.consume_token()?;
600 let start_offset = first.span.offset;
601
602 match first.token {
604 AdapterToken::ObjectStart => self.skip_container(DelimKind::Object)?,
605 AdapterToken::ArrayStart => self.skip_container(DelimKind::Array)?,
606 AdapterToken::ObjectEnd
607 | AdapterToken::ArrayEnd
608 | AdapterToken::Comma
609 | AdapterToken::Colon => return Err(self.unexpected(&first, "value")),
610 AdapterToken::Eof => {
611 return Err(JsonError::new(
612 JsonErrorKind::UnexpectedEof { expected: "value" },
613 first.span,
614 ));
615 }
616 _ => {
617 }
619 }
620
621 let end_offset = self.current_offset;
623
624 let raw_bytes = &self.input[start_offset..end_offset];
626 let raw_str = core::str::from_utf8(raw_bytes).map_err(|e| {
627 JsonError::without_span(JsonErrorKind::InvalidValue {
628 message: alloc::format!("invalid UTF-8 in raw JSON: {}", e),
629 })
630 })?;
631
632 self.finish_value_in_parent();
633 Ok(Some(raw_str))
634 }
635}
636
637pub struct JsonProbe<'de> {
638 evidence: Vec<FieldEvidence<'de>>,
639 idx: usize,
640}
641
642impl<'de> ProbeStream<'de> for JsonProbe<'de> {
643 type Error = JsonError;
644
645 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
646 if self.idx >= self.evidence.len() {
647 Ok(None)
648 } else {
649 let ev = self.evidence[self.idx].clone();
650 self.idx += 1;
651 Ok(Some(ev))
652 }
653 }
654}