1use alloc::borrow::Cow;
12
13use facet_reflect::Span;
14
15use crate::scanner::{self, ParsedNumber, ScanError, ScanErrorKind, Scanner, Token as ScanToken};
16
17#[derive(Debug, Clone, PartialEq)]
19pub enum Token<'input> {
20 ObjectStart,
22 ObjectEnd,
24 ArrayStart,
26 ArrayEnd,
28 Colon,
30 Comma,
32 Null,
34 True,
36 False,
38 String(Cow<'input, str>),
40 U64(u64),
42 I64(i64),
44 U128(u128),
46 I128(i128),
48 F64(f64),
50 Eof,
52}
53
54#[derive(Debug, Clone)]
56pub struct SpannedAdapterToken<'input> {
57 pub token: Token<'input>,
59 pub span: Span,
61}
62
63#[derive(Debug, Clone)]
65pub struct AdapterError {
66 pub kind: AdapterErrorKind,
68 pub span: Span,
70}
71
72#[derive(Debug, Clone)]
74pub enum AdapterErrorKind {
75 Scan(ScanErrorKind),
77 NeedMore,
79}
80
81impl From<ScanError> for AdapterError {
82 fn from(e: ScanError) -> Self {
83 AdapterError {
84 kind: AdapterErrorKind::Scan(e.kind),
85 span: e.span,
86 }
87 }
88}
89
90pub const DEFAULT_CHUNK_SIZE: usize = 4;
92
93pub struct SliceAdapter<'input, const BORROW: bool> {
105 input: &'input [u8],
107 window_start: usize,
109 window_end: usize,
111 chunk_size: usize,
113 scanner: Scanner,
115}
116
117impl<'input, const BORROW: bool> SliceAdapter<'input, BORROW> {
118 pub fn new(input: &'input [u8]) -> Self {
120 Self::with_chunk_size(input, DEFAULT_CHUNK_SIZE)
121 }
122
123 pub fn with_chunk_size(input: &'input [u8], chunk_size: usize) -> Self {
125 let initial_end = chunk_size.min(input.len());
126 Self {
127 input,
128 window_start: 0,
129 window_end: initial_end,
130 chunk_size,
131 scanner: Scanner::new(),
132 }
133 }
134
135 #[cfg(feature = "jit")]
141 pub fn new_with_offset(input: &'input [u8], offset: usize) -> Self {
142 let offset = offset.min(input.len());
143 let initial_end = (offset + DEFAULT_CHUNK_SIZE).min(input.len());
144 Self {
145 input,
146 window_start: offset,
147 window_end: initial_end,
148 chunk_size: DEFAULT_CHUNK_SIZE,
149 scanner: Scanner::new(),
150 }
151 }
152
153 #[inline]
155 fn current_window(&self) -> &'input [u8] {
156 &self.input[self.window_start..self.window_end]
157 }
158
159 #[inline]
161 fn grow_window(&mut self) {
162 self.window_end = (self.window_end + self.chunk_size).min(self.input.len());
163 }
164
165 #[inline]
167 fn slide_window(&mut self, consumed_in_window: usize) {
168 self.window_start += consumed_in_window;
169 self.window_end = (self.window_start + self.chunk_size).min(self.input.len());
170 self.scanner.set_pos(0);
171 }
172
173 #[inline]
175 fn at_end_of_input(&self) -> bool {
176 self.window_end >= self.input.len()
177 }
178
179 pub fn next_token(&mut self) -> Result<SpannedAdapterToken<'input>, AdapterError> {
187 loop {
188 let window = self.current_window();
189 let spanned = match self.scanner.next_token(window) {
190 Ok(s) => s,
191 Err(e) => {
192 return Err(AdapterError {
194 kind: AdapterErrorKind::Scan(e.kind),
195 span: Span::new(self.window_start + e.span.offset, e.span.len),
196 });
197 }
198 };
199
200 match spanned.token {
201 ScanToken::NeedMore { .. } => {
202 if self.at_end_of_input() {
204 let window = self.current_window();
206 let finalized = match self.scanner.finalize_at_eof(window) {
207 Ok(f) => f,
208 Err(e) => {
209 return Err(AdapterError {
210 kind: AdapterErrorKind::Scan(e.kind),
211 span: Span::new(self.window_start + e.span.offset, e.span.len),
212 });
213 }
214 };
215
216 let consumed = self.scanner.pos();
218 let absolute_span = Span::new(
219 self.window_start + finalized.span.offset,
220 finalized.span.len,
221 );
222
223 let token = self.materialize_token(&finalized)?;
224 self.slide_window(consumed);
225
226 return Ok(SpannedAdapterToken {
227 token,
228 span: absolute_span,
229 });
230 }
231 self.grow_window();
232 continue;
233 }
234 ScanToken::Eof => {
235 if self.at_end_of_input() {
237 return Ok(SpannedAdapterToken {
239 token: Token::Eof,
240 span: Span::new(self.window_start + spanned.span.offset, 0),
241 });
242 }
243 self.slide_window(self.scanner.pos());
245 continue;
246 }
247 _ => {
248 let consumed = self.scanner.pos();
250 let absolute_span =
251 Span::new(self.window_start + spanned.span.offset, spanned.span.len);
252
253 let token = self.materialize_token(&spanned)?;
254
255 self.slide_window(consumed);
257
258 return Ok(SpannedAdapterToken {
259 token,
260 span: absolute_span,
261 });
262 }
263 }
264 }
265 }
266
267 fn materialize_token(
272 &self,
273 spanned: &scanner::SpannedToken,
274 ) -> Result<Token<'input>, AdapterError> {
275 match &spanned.token {
276 ScanToken::ObjectStart => Ok(Token::ObjectStart),
277 ScanToken::ObjectEnd => Ok(Token::ObjectEnd),
278 ScanToken::ArrayStart => Ok(Token::ArrayStart),
279 ScanToken::ArrayEnd => Ok(Token::ArrayEnd),
280 ScanToken::Colon => Ok(Token::Colon),
281 ScanToken::Comma => Ok(Token::Comma),
282 ScanToken::Null => Ok(Token::Null),
283 ScanToken::True => Ok(Token::True),
284 ScanToken::False => Ok(Token::False),
285 ScanToken::String {
286 start,
287 end,
288 has_escapes,
289 } => {
290 let abs_start = self.window_start + start;
292 let abs_end = self.window_start + end;
293
294 let s = if BORROW && !*has_escapes {
295 scanner::decode_string(self.input, abs_start, abs_end, false)?
297 } else {
298 Cow::Owned(scanner::decode_string_owned(
300 self.input, abs_start, abs_end,
301 )?)
302 };
303 Ok(Token::String(s))
304 }
305 ScanToken::Number { start, end, hint } => {
306 let abs_start = self.window_start + start;
308 let abs_end = self.window_start + end;
309
310 let parsed = scanner::parse_number(self.input, abs_start, abs_end, *hint)?;
311 Ok(match parsed {
312 ParsedNumber::U64(n) => Token::U64(n),
313 ParsedNumber::I64(n) => Token::I64(n),
314 ParsedNumber::U128(n) => Token::U128(n),
315 ParsedNumber::I128(n) => Token::I128(n),
316 ParsedNumber::F64(n) => Token::F64(n),
317 })
318 }
319 ScanToken::Eof | ScanToken::NeedMore { .. } => {
320 unreachable!("Eof and NeedMore handled in next_token loop")
321 }
322 }
323 }
324
325 pub fn skip(&mut self) -> Result<Span, AdapterError> {
330 let first_token = self.next_token_for_skip()?;
332 let abs_start = first_token.span.offset;
333
334 match first_token.token {
335 SkipToken::ObjectStart => {
336 let mut depth = 1;
338 let mut abs_end = first_token.span.offset + first_token.span.len;
339 while depth > 0 {
340 let t = self.next_token_for_skip()?;
341 abs_end = t.span.offset + t.span.len;
342 match t.token {
343 SkipToken::ObjectStart => depth += 1,
344 SkipToken::ObjectEnd => depth -= 1,
345 _ => {}
346 }
347 }
348 Ok(Span::new(abs_start, abs_end - abs_start))
349 }
350 SkipToken::ArrayStart => {
351 let mut depth = 1;
353 let mut abs_end = first_token.span.offset + first_token.span.len;
354 while depth > 0 {
355 let t = self.next_token_for_skip()?;
356 abs_end = t.span.offset + t.span.len;
357 match t.token {
358 SkipToken::ArrayStart => depth += 1,
359 SkipToken::ArrayEnd => depth -= 1,
360 _ => {}
361 }
362 }
363 Ok(Span::new(abs_start, abs_end - abs_start))
364 }
365 SkipToken::Scalar => Ok(first_token.span),
367 SkipToken::Invalid(ch) => Err(AdapterError {
368 kind: AdapterErrorKind::Scan(ScanErrorKind::UnexpectedChar(ch)),
369 span: first_token.span,
370 }),
371 SkipToken::Eof => Err(AdapterError {
372 kind: AdapterErrorKind::Scan(ScanErrorKind::UnexpectedEof("expected value")),
373 span: first_token.span,
374 }),
375 SkipToken::ObjectEnd => Err(AdapterError {
377 kind: AdapterErrorKind::Scan(ScanErrorKind::UnexpectedChar('}')),
378 span: first_token.span,
379 }),
380 SkipToken::ArrayEnd => Err(AdapterError {
381 kind: AdapterErrorKind::Scan(ScanErrorKind::UnexpectedChar(']')),
382 span: first_token.span,
383 }),
384 }
385 }
386
387 fn next_token_for_skip(&mut self) -> Result<SpannedSkipToken, AdapterError> {
389 loop {
390 let window = self.current_window();
391 let spanned = match self.scanner.next_token(window) {
392 Ok(s) => s,
393 Err(e) => {
394 return Err(AdapterError {
395 kind: AdapterErrorKind::Scan(e.kind),
396 span: Span::new(self.window_start + e.span.offset, e.span.len),
397 });
398 }
399 };
400
401 match spanned.token {
402 ScanToken::NeedMore { .. } => {
403 if self.at_end_of_input() {
404 let window = self.current_window();
406 let finalized = match self.scanner.finalize_at_eof(window) {
407 Ok(f) => f,
408 Err(e) => {
409 return Err(AdapterError {
410 kind: AdapterErrorKind::Scan(e.kind),
411 span: Span::new(self.window_start + e.span.offset, e.span.len),
412 });
413 }
414 };
415
416 let consumed = self.scanner.pos();
417 let abs_span = Span::new(
418 self.window_start + finalized.span.offset,
419 finalized.span.len,
420 );
421
422 let skip_token = match finalized.token {
423 ScanToken::ObjectStart => SkipToken::ObjectStart,
424 ScanToken::ObjectEnd => SkipToken::ObjectEnd,
425 ScanToken::ArrayStart => SkipToken::ArrayStart,
426 ScanToken::ArrayEnd => SkipToken::ArrayEnd,
427 ScanToken::String { .. }
428 | ScanToken::Number { .. }
429 | ScanToken::True
430 | ScanToken::False
431 | ScanToken::Null => SkipToken::Scalar,
432 ScanToken::Colon => SkipToken::Invalid(':'),
433 ScanToken::Comma => SkipToken::Invalid(','),
434 ScanToken::Eof => SkipToken::Eof,
435 ScanToken::NeedMore { .. } => unreachable!(),
436 };
437
438 self.slide_window(consumed);
439 return Ok(SpannedSkipToken {
440 token: skip_token,
441 span: abs_span,
442 });
443 }
444 self.grow_window();
445 continue;
446 }
447 ScanToken::Eof => {
448 if self.at_end_of_input() {
449 return Ok(SpannedSkipToken {
450 token: SkipToken::Eof,
451 span: Span::new(self.window_start + spanned.span.offset, 0),
452 });
453 }
454 self.slide_window(self.scanner.pos());
455 continue;
456 }
457 _ => {
458 let consumed = self.scanner.pos();
459 let abs_span =
460 Span::new(self.window_start + spanned.span.offset, spanned.span.len);
461
462 let skip_token = match spanned.token {
463 ScanToken::ObjectStart => SkipToken::ObjectStart,
464 ScanToken::ObjectEnd => SkipToken::ObjectEnd,
465 ScanToken::ArrayStart => SkipToken::ArrayStart,
466 ScanToken::ArrayEnd => SkipToken::ArrayEnd,
467 ScanToken::String { .. }
468 | ScanToken::Number { .. }
469 | ScanToken::True
470 | ScanToken::False
471 | ScanToken::Null => SkipToken::Scalar,
472 ScanToken::Colon => SkipToken::Invalid(':'),
473 ScanToken::Comma => SkipToken::Invalid(','),
474 ScanToken::Eof | ScanToken::NeedMore { .. } => unreachable!(),
475 };
476
477 self.slide_window(consumed);
478 return Ok(SpannedSkipToken {
479 token: skip_token,
480 span: abs_span,
481 });
482 }
483 }
484 }
485 }
486
487 #[allow(dead_code)]
489 pub fn position(&self) -> usize {
490 self.window_start + self.scanner.pos()
491 }
492
493 #[allow(dead_code)]
495 pub fn input(&self) -> &'input [u8] {
496 self.input
497 }
498}
499
500#[derive(Debug, Clone, Copy)]
502enum SkipToken {
503 ObjectStart,
504 ObjectEnd,
505 ArrayStart,
506 ArrayEnd,
507 Scalar, Invalid(char), Eof,
510}
511
512#[derive(Debug)]
514struct SpannedSkipToken {
515 token: SkipToken,
516 span: Span,
517}
518
519#[cfg(feature = "streaming")]
524use crate::error::{JsonError, JsonErrorKind};
525
526#[cfg(feature = "streaming")]
535pub trait TokenSource<'input> {
536 fn next_token(&mut self) -> Result<SpannedAdapterToken<'input>, JsonError>;
538
539 fn skip(&mut self) -> Result<Span, JsonError>;
542
543 #[allow(dead_code)]
548 fn input_bytes(&self) -> Option<&'input [u8]> {
549 None
550 }
551
552 #[allow(dead_code)]
557 fn at_offset(&self, offset: usize) -> Option<Self>
558 where
559 Self: Sized,
560 {
561 let _ = offset;
562 None
563 }
564}
565
566#[cfg(feature = "streaming")]
567impl<'input, const BORROW: bool> TokenSource<'input> for SliceAdapter<'input, BORROW> {
568 fn next_token(&mut self) -> Result<SpannedAdapterToken<'input>, JsonError> {
569 SliceAdapter::next_token(self).map_err(|e| JsonError {
570 kind: JsonErrorKind::Scan(match e.kind {
571 AdapterErrorKind::Scan(s) => s,
572 AdapterErrorKind::NeedMore => {
573 crate::scanner::ScanErrorKind::UnexpectedEof("need more data")
574 }
575 }),
576 span: Some(e.span),
577 source_code: None,
578 })
579 }
580
581 fn skip(&mut self) -> Result<Span, JsonError> {
582 SliceAdapter::skip(self).map_err(|e| JsonError {
583 kind: JsonErrorKind::Scan(match e.kind {
584 AdapterErrorKind::Scan(s) => s,
585 AdapterErrorKind::NeedMore => {
586 crate::scanner::ScanErrorKind::UnexpectedEof("need more data")
587 }
588 }),
589 span: Some(e.span),
590 source_code: None,
591 })
592 }
593
594 fn input_bytes(&self) -> Option<&'input [u8]> {
595 Some(self.input)
596 }
597
598 fn at_offset(&self, offset: usize) -> Option<Self> {
599 Some(SliceAdapter::new(&self.input[offset..]))
600 }
601}
602
603#[cfg(test)]
604mod tests {
605 use super::*;
606
607 #[test]
608 fn test_next_simple() {
609 let json = br#"{"name": "test", "value": 42}"#;
610 let mut adapter = SliceAdapter::<true>::new(json);
611
612 let t = adapter.next_token().unwrap();
614 assert!(matches!(t.token, Token::ObjectStart));
615
616 let t = adapter.next_token().unwrap();
618 assert_eq!(t.token, Token::String(Cow::Borrowed("name")));
619
620 let t = adapter.next_token().unwrap();
622 assert!(matches!(t.token, Token::Colon));
623
624 let t = adapter.next_token().unwrap();
626 assert_eq!(t.token, Token::String(Cow::Borrowed("test")));
627
628 let t = adapter.next_token().unwrap();
630 assert!(matches!(t.token, Token::Comma));
631
632 let t = adapter.next_token().unwrap();
634 assert_eq!(t.token, Token::String(Cow::Borrowed("value")));
635
636 let t = adapter.next_token().unwrap();
638 assert!(matches!(t.token, Token::Colon));
639
640 let t = adapter.next_token().unwrap();
642 assert_eq!(t.token, Token::U64(42));
643
644 let t = adapter.next_token().unwrap();
646 assert!(matches!(t.token, Token::ObjectEnd));
647
648 let t = adapter.next_token().unwrap();
650 assert!(matches!(t.token, Token::Eof));
651 }
652
653 #[test]
654 fn test_next_with_escapes() {
655 let json = br#""hello\nworld""#;
656 let mut adapter = SliceAdapter::<true>::new(json);
657
658 let t = adapter.next_token().unwrap();
659 assert_eq!(
661 t.token,
662 Token::String(Cow::Owned("hello\nworld".to_string()))
663 );
664 }
665
666 #[test]
667 fn test_skip_scalar() {
668 let json = br#"{"skip": 12345, "keep": "value"}"#;
669 let mut adapter = SliceAdapter::<true>::new(json);
670
671 adapter.next_token().unwrap();
673 adapter.next_token().unwrap();
675 adapter.next_token().unwrap();
677
678 let span = adapter.skip().unwrap();
680 assert_eq!(&json[span.offset..span.offset + span.len], b"12345");
681
682 let t = adapter.next_token().unwrap();
684 assert!(matches!(t.token, Token::Comma));
685
686 let t = adapter.next_token().unwrap();
688 assert_eq!(t.token, Token::String(Cow::Borrowed("keep")));
689 }
690
691 #[test]
692 fn test_skip_object() {
693 let json = br#"{"skip": {"nested": {"deep": true}}, "keep": 1}"#;
694 let mut adapter = SliceAdapter::<true>::new(json);
695
696 adapter.next_token().unwrap();
698 adapter.next_token().unwrap();
700 adapter.next_token().unwrap();
702
703 let span = adapter.skip().unwrap();
705 assert_eq!(
706 &json[span.offset..span.offset + span.len],
707 br#"{"nested": {"deep": true}}"#
708 );
709
710 let t = adapter.next_token().unwrap();
712 assert!(matches!(t.token, Token::Comma));
713
714 let t = adapter.next_token().unwrap();
716 assert_eq!(t.token, Token::String(Cow::Borrowed("keep")));
717 }
718
719 #[test]
720 fn test_skip_array() {
721 let json = br#"{"skip": [1, [2, 3], 4], "keep": true}"#;
722 let mut adapter = SliceAdapter::<true>::new(json);
723
724 adapter.next_token().unwrap();
726 adapter.next_token().unwrap();
728 adapter.next_token().unwrap();
730
731 let span = adapter.skip().unwrap();
733 assert_eq!(
734 &json[span.offset..span.offset + span.len],
735 br#"[1, [2, 3], 4]"#
736 );
737
738 adapter.next_token().unwrap();
740
741 let t = adapter.next_token().unwrap();
743 assert_eq!(t.token, Token::String(Cow::Borrowed("keep")));
744
745 adapter.next_token().unwrap();
747
748 let t = adapter.next_token().unwrap();
750 assert!(matches!(t.token, Token::True));
751 }
752
753 #[test]
754 fn test_skip_string_no_allocation() {
755 let json = br#"{"skip": "hello\nworld\twith\rescapes", "keep": 1}"#;
757 let mut adapter = SliceAdapter::<true>::new(json);
758
759 adapter.next_token().unwrap();
761 adapter.next_token().unwrap();
763 adapter.next_token().unwrap();
765
766 let span = adapter.skip().unwrap();
768 assert_eq!(
769 &json[span.offset..span.offset + span.len],
770 br#""hello\nworld\twith\rescapes""#
771 );
772 }
773
774 #[test]
775 fn test_borrow_false_always_owned() {
776 let json = br#""no escapes here""#;
777 let mut adapter = SliceAdapter::<false>::new(json);
778
779 let t = adapter.next_token().unwrap();
780 assert!(matches!(t.token, Token::String(Cow::Owned(_))));
782 }
783
784 #[test]
785 fn test_borrow_true_borrows_when_possible() {
786 let json = br#""no escapes here""#;
787 let mut adapter = SliceAdapter::<true>::new(json);
788
789 let t = adapter.next_token().unwrap();
790 assert!(matches!(t.token, Token::String(Cow::Borrowed(_))));
792 }
793
794 #[test]
795 fn test_windowed_parsing_long_string() {
796 let json = br#""hello world""#; let mut adapter = SliceAdapter::<true>::new(json);
800
801 let t = adapter.next_token().unwrap();
802 assert_eq!(t.token, Token::String(Cow::Borrowed("hello world")));
803 assert_eq!(t.span.offset, 0);
805 assert_eq!(t.span.len, 13);
806 }
807
808 #[test]
809 fn test_windowed_parsing_number_at_eof() {
810 let json = b"-123"; let mut adapter = SliceAdapter::<true>::new(json);
813
814 let t = adapter.next_token().unwrap();
815 assert_eq!(t.token, Token::I64(-123));
816 }
817
818 #[test]
819 fn test_windowed_parsing_complex_object() {
820 let json = br#"{"name": "hello world", "value": 12345, "nested": {"a": 1}}"#;
822 let mut adapter = SliceAdapter::<true>::new(json);
823
824 assert!(matches!(
826 adapter.next_token().unwrap().token,
827 Token::ObjectStart
828 ));
829 assert_eq!(
831 adapter.next_token().unwrap().token,
832 Token::String(Cow::Borrowed("name"))
833 );
834 assert!(matches!(adapter.next_token().unwrap().token, Token::Colon));
836 assert_eq!(
838 adapter.next_token().unwrap().token,
839 Token::String(Cow::Borrowed("hello world"))
840 );
841 assert!(matches!(adapter.next_token().unwrap().token, Token::Comma));
843 assert_eq!(
845 adapter.next_token().unwrap().token,
846 Token::String(Cow::Borrowed("value"))
847 );
848 assert!(matches!(adapter.next_token().unwrap().token, Token::Colon));
850 assert_eq!(adapter.next_token().unwrap().token, Token::U64(12345));
852 assert!(matches!(adapter.next_token().unwrap().token, Token::Comma));
854 assert_eq!(
856 adapter.next_token().unwrap().token,
857 Token::String(Cow::Borrowed("nested"))
858 );
859 assert!(matches!(adapter.next_token().unwrap().token, Token::Colon));
861 assert!(matches!(
863 adapter.next_token().unwrap().token,
864 Token::ObjectStart
865 ));
866 assert_eq!(
868 adapter.next_token().unwrap().token,
869 Token::String(Cow::Borrowed("a"))
870 );
871 assert!(matches!(adapter.next_token().unwrap().token, Token::Colon));
873 assert_eq!(adapter.next_token().unwrap().token, Token::U64(1));
875 assert!(matches!(
877 adapter.next_token().unwrap().token,
878 Token::ObjectEnd
879 ));
880 assert!(matches!(
882 adapter.next_token().unwrap().token,
883 Token::ObjectEnd
884 ));
885 assert!(matches!(adapter.next_token().unwrap().token, Token::Eof));
887 }
888}