1use core::str;
13
14use facet_reflect::Span;
15
16#[derive(Debug, Clone, PartialEq)]
18pub enum Token {
19 ObjectStart,
21 ObjectEnd,
23 ArrayStart,
25 ArrayEnd,
27 Colon,
29 Comma,
31 Null,
33 True,
35 False,
37 String {
39 start: usize,
41 end: usize,
43 has_escapes: bool,
45 },
46 Number {
48 start: usize,
50 end: usize,
52 hint: NumberHint,
54 },
55 Eof,
57 NeedMore {
59 consumed: usize,
61 },
62}
63
64#[derive(Debug, Clone, Copy, PartialEq)]
66pub enum NumberHint {
67 Unsigned,
69 Signed,
71 Float,
73}
74
75#[derive(Debug, Clone, PartialEq)]
77pub struct SpannedToken {
78 pub token: Token,
80 pub span: Span,
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub struct ScanError {
87 pub kind: ScanErrorKind,
89 pub span: Span,
91}
92
93#[derive(Debug, Clone, PartialEq)]
95pub enum ScanErrorKind {
96 UnexpectedChar(char),
98 UnexpectedEof(&'static str),
100 InvalidUtf8,
102}
103
104pub type ScanResult = Result<SpannedToken, ScanError>;
106
107pub struct Scanner {
112 pos: usize,
114 state: ScanState,
116}
117
118#[derive(Debug, Clone, Default)]
120enum ScanState {
121 #[default]
122 Ready,
123 InString {
125 start: usize,
126 has_escapes: bool,
127 escape_next: bool,
128 },
129 InNumber { start: usize, hint: NumberHint },
131 InLiteral {
133 start: usize,
134 expected: &'static [u8],
135 matched: usize,
136 },
137}
138
139impl Scanner {
140 pub fn new() -> Self {
142 Self {
143 pos: 0,
144 state: ScanState::Ready,
145 }
146 }
147
148 pub fn at_position(pos: usize) -> Self {
150 Self {
151 pos,
152 state: ScanState::Ready,
153 }
154 }
155
156 pub fn pos(&self) -> usize {
158 self.pos
159 }
160
161 pub fn set_pos(&mut self, pos: usize) {
163 self.pos = pos;
164 }
165
166 pub fn finalize_at_eof(&mut self, buf: &[u8]) -> ScanResult {
172 match core::mem::take(&mut self.state) {
173 ScanState::Ready => {
174 Ok(SpannedToken {
176 token: Token::Eof,
177 span: Span::new(self.pos, 0),
178 })
179 }
180 ScanState::InNumber { start, hint } => {
181 let end = self.pos;
183 if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
184 return Err(ScanError {
185 kind: ScanErrorKind::UnexpectedEof("in number"),
186 span: Span::new(start, end - start),
187 });
188 }
189 Ok(SpannedToken {
190 token: Token::Number { start, end, hint },
191 span: Span::new(start, end - start),
192 })
193 }
194 ScanState::InString { start, .. } => {
195 Err(ScanError {
197 kind: ScanErrorKind::UnexpectedEof("in string"),
198 span: Span::new(start, self.pos - start),
199 })
200 }
201 ScanState::InLiteral {
202 start,
203 expected,
204 matched,
205 } => {
206 if matched == expected.len() {
208 let token = match expected {
209 b"true" => Token::True,
210 b"false" => Token::False,
211 b"null" => Token::Null,
212 _ => unreachable!(),
213 };
214 Ok(SpannedToken {
215 token,
216 span: Span::new(start, expected.len()),
217 })
218 } else {
219 Err(ScanError {
220 kind: ScanErrorKind::UnexpectedEof("in literal"),
221 span: Span::new(start, self.pos - start),
222 })
223 }
224 }
225 }
226 }
227
228 pub fn next_token(&mut self, buf: &[u8]) -> ScanResult {
233 match core::mem::take(&mut self.state) {
235 ScanState::Ready => {}
236 ScanState::InString {
237 start,
238 has_escapes,
239 escape_next,
240 } => {
241 return self.resume_string(buf, start, has_escapes, escape_next);
242 }
243 ScanState::InNumber { start, hint } => {
244 return self.resume_number(buf, start, hint);
245 }
246 ScanState::InLiteral {
247 start,
248 expected,
249 matched,
250 } => {
251 return self.resume_literal(buf, start, expected, matched);
252 }
253 }
254
255 self.skip_whitespace(buf);
256
257 let start = self.pos;
258 let Some(&byte) = buf.get(self.pos) else {
259 return Ok(SpannedToken {
260 token: Token::Eof,
261 span: Span::new(self.pos, 0),
262 });
263 };
264
265 match byte {
266 b'{' => {
267 self.pos += 1;
268 Ok(SpannedToken {
269 token: Token::ObjectStart,
270 span: Span::new(start, 1),
271 })
272 }
273 b'}' => {
274 self.pos += 1;
275 Ok(SpannedToken {
276 token: Token::ObjectEnd,
277 span: Span::new(start, 1),
278 })
279 }
280 b'[' => {
281 self.pos += 1;
282 Ok(SpannedToken {
283 token: Token::ArrayStart,
284 span: Span::new(start, 1),
285 })
286 }
287 b']' => {
288 self.pos += 1;
289 Ok(SpannedToken {
290 token: Token::ArrayEnd,
291 span: Span::new(start, 1),
292 })
293 }
294 b':' => {
295 self.pos += 1;
296 Ok(SpannedToken {
297 token: Token::Colon,
298 span: Span::new(start, 1),
299 })
300 }
301 b',' => {
302 self.pos += 1;
303 Ok(SpannedToken {
304 token: Token::Comma,
305 span: Span::new(start, 1),
306 })
307 }
308 b'"' => self.scan_string(buf, start),
309 b'-' | b'0'..=b'9' => self.scan_number(buf, start),
310 b't' => self.scan_literal(buf, start, b"true", Token::True),
311 b'f' => self.scan_literal(buf, start, b"false", Token::False),
312 b'n' => self.scan_literal(buf, start, b"null", Token::Null),
313 _ => Err(ScanError {
314 kind: ScanErrorKind::UnexpectedChar(byte as char),
315 span: Span::new(start, 1),
316 }),
317 }
318 }
319
320 fn skip_whitespace(&mut self, buf: &[u8]) {
321 while let Some(&b) = buf.get(self.pos) {
322 match b {
323 b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
324 _ => break,
325 }
326 }
327 }
328
329 fn scan_string(&mut self, buf: &[u8], start: usize) -> ScanResult {
331 self.pos += 1;
333 let content_start = self.pos;
334
335 self.scan_string_content(buf, start, content_start, false, false)
336 }
337
338 fn resume_string(
339 &mut self,
340 buf: &[u8],
341 start: usize,
342 has_escapes: bool,
343 escape_next: bool,
344 ) -> ScanResult {
345 let content_start = start + 1; self.scan_string_content(buf, start, content_start, has_escapes, escape_next)
347 }
348
349 fn scan_string_content(
350 &mut self,
351 buf: &[u8],
352 start: usize,
353 content_start: usize,
354 mut has_escapes: bool,
355 mut escape_next: bool,
356 ) -> ScanResult {
357 const STEP_SIZE: usize = 16;
359 type Window = u128;
360 type Chunk = [u8; STEP_SIZE];
361
362 if !escape_next {
364 loop {
365 if let Some(Ok(chunk)) = buf
366 .get(self.pos..)
367 .and_then(|s| s.get(..STEP_SIZE))
368 .map(Chunk::try_from)
369 {
370 let window = Window::from_ne_bytes(chunk);
371 let has_quote = contains_byte(window, b'"');
372 let has_backslash = contains_byte(window, b'\\');
373
374 if !has_quote && !has_backslash {
375 self.pos += STEP_SIZE;
377 continue;
378 }
379 }
380 break;
382 }
383 }
384
385 while let Some(&byte) = buf.get(self.pos) {
387 if escape_next {
388 escape_next = false;
390 self.pos += 1;
391
392 if byte == b'u' {
394 if self.pos + 4 > buf.len() {
396 self.state = ScanState::InString {
398 start,
399 has_escapes: true,
400 escape_next: false,
401 };
402 return Ok(SpannedToken {
403 token: Token::NeedMore { consumed: start },
404 span: Span::new(start, self.pos - start),
405 });
406 }
407 self.pos += 4;
408
409 if self.pos + 2 <= buf.len()
411 && buf.get(self.pos) == Some(&b'\\')
412 && buf.get(self.pos + 1) == Some(&b'u')
413 {
414 if self.pos + 6 > buf.len() {
415 self.state = ScanState::InString {
417 start,
418 has_escapes: true,
419 escape_next: false,
420 };
421 return Ok(SpannedToken {
422 token: Token::NeedMore { consumed: start },
423 span: Span::new(start, self.pos - start),
424 });
425 }
426 self.pos += 6;
428 }
429 }
430 continue;
431 }
432
433 match byte {
434 b'"' => {
435 let content_end = self.pos;
437 self.pos += 1; return Ok(SpannedToken {
440 token: Token::String {
441 start: content_start,
442 end: content_end,
443 has_escapes,
444 },
445 span: Span::new(start, self.pos - start),
446 });
447 }
448 b'\\' => {
449 has_escapes = true;
450 escape_next = true;
451 self.pos += 1;
452 }
453 _ => {
454 self.pos += 1;
455 }
456 }
457 }
458
459 if escape_next || self.pos > start {
461 self.state = ScanState::InString {
463 start,
464 has_escapes,
465 escape_next,
466 };
467 Ok(SpannedToken {
468 token: Token::NeedMore { consumed: start },
469 span: Span::new(start, self.pos - start),
470 })
471 } else {
472 Err(ScanError {
473 kind: ScanErrorKind::UnexpectedEof("in string"),
474 span: Span::new(start, self.pos - start),
475 })
476 }
477 }
478
479 fn scan_number(&mut self, buf: &[u8], start: usize) -> ScanResult {
481 let mut hint = NumberHint::Unsigned;
482
483 if buf.get(self.pos) == Some(&b'-') {
484 hint = NumberHint::Signed;
485 self.pos += 1;
486 }
487
488 self.scan_number_content(buf, start, hint)
489 }
490
491 fn resume_number(&mut self, buf: &[u8], start: usize, hint: NumberHint) -> ScanResult {
492 self.scan_number_content(buf, start, hint)
493 }
494
495 fn scan_number_content(
496 &mut self,
497 buf: &[u8],
498 start: usize,
499 mut hint: NumberHint,
500 ) -> ScanResult {
501 while let Some(&b) = buf.get(self.pos) {
503 if b.is_ascii_digit() {
504 self.pos += 1;
505 } else {
506 break;
507 }
508 }
509
510 if buf.get(self.pos) == Some(&b'.') {
512 hint = NumberHint::Float;
513 self.pos += 1;
514
515 while let Some(&b) = buf.get(self.pos) {
517 if b.is_ascii_digit() {
518 self.pos += 1;
519 } else {
520 break;
521 }
522 }
523 }
524
525 if matches!(buf.get(self.pos), Some(b'e') | Some(b'E')) {
527 hint = NumberHint::Float;
528 self.pos += 1;
529
530 if matches!(buf.get(self.pos), Some(b'+') | Some(b'-')) {
532 self.pos += 1;
533 }
534
535 while let Some(&b) = buf.get(self.pos) {
537 if b.is_ascii_digit() {
538 self.pos += 1;
539 } else {
540 break;
541 }
542 }
543 }
544
545 if self.pos == buf.len() {
548 self.state = ScanState::InNumber { start, hint };
550 return Ok(SpannedToken {
551 token: Token::NeedMore { consumed: start },
552 span: Span::new(start, self.pos - start),
553 });
554 }
555
556 let end = self.pos;
557
558 if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
560 return Err(ScanError {
561 kind: ScanErrorKind::UnexpectedChar(
562 buf.get(self.pos).map(|&b| b as char).unwrap_or('?'),
563 ),
564 span: Span::new(start, 1),
565 });
566 }
567
568 Ok(SpannedToken {
569 token: Token::Number { start, end, hint },
570 span: Span::new(start, end - start),
571 })
572 }
573
574 fn scan_literal(
576 &mut self,
577 buf: &[u8],
578 start: usize,
579 expected: &'static [u8],
580 token: Token,
581 ) -> ScanResult {
582 self.scan_literal_content(buf, start, expected, 0, token)
583 }
584
585 fn resume_literal(
586 &mut self,
587 buf: &[u8],
588 start: usize,
589 expected: &'static [u8],
590 matched: usize,
591 ) -> ScanResult {
592 let token = match expected {
593 b"true" => Token::True,
594 b"false" => Token::False,
595 b"null" => Token::Null,
596 _ => unreachable!(),
597 };
598 self.scan_literal_content(buf, start, expected, matched, token)
599 }
600
601 fn scan_literal_content(
602 &mut self,
603 buf: &[u8],
604 start: usize,
605 expected: &'static [u8],
606 mut matched: usize,
607 token: Token,
608 ) -> ScanResult {
609 while matched < expected.len() {
610 match buf.get(self.pos) {
611 Some(&b) if b == expected[matched] => {
612 self.pos += 1;
613 matched += 1;
614 }
615 Some(&b) => {
616 return Err(ScanError {
617 kind: ScanErrorKind::UnexpectedChar(b as char),
618 span: Span::new(self.pos, 1),
619 });
620 }
621 None => {
622 self.state = ScanState::InLiteral {
624 start,
625 expected,
626 matched,
627 };
628 return Ok(SpannedToken {
629 token: Token::NeedMore { consumed: start },
630 span: Span::new(start, self.pos - start),
631 });
632 }
633 }
634 }
635
636 Ok(SpannedToken {
637 token,
638 span: Span::new(start, expected.len()),
639 })
640 }
641}
642
643impl Default for Scanner {
644 fn default() -> Self {
645 Self::new()
646 }
647}
648
649#[inline]
651fn contains_byte(window: u128, byte: u8) -> bool {
652 let pattern = u128::from_ne_bytes([byte; 16]);
653 let xor = window ^ pattern;
654 let has_zero = (xor.wrapping_sub(0x01010101010101010101010101010101))
655 & !xor
656 & 0x80808080808080808080808080808080;
657 has_zero != 0
658}
659
660pub fn decode_string_owned(
677 buf: &[u8],
678 start: usize,
679 end: usize,
680) -> Result<alloc::string::String, ScanError> {
681 use alloc::string::String;
682
683 let slice = &buf[start..end];
684 let mut result = String::with_capacity(end - start);
685 let mut i = 0;
686
687 while i < slice.len() {
688 let byte = slice[i];
689 if byte == b'\\' {
690 i += 1;
691 if i >= slice.len() {
692 return Err(ScanError {
693 kind: ScanErrorKind::UnexpectedEof("in escape sequence"),
694 span: Span::new(start + i - 1, 1),
695 });
696 }
697
698 match slice[i] {
699 b'"' => result.push('"'),
700 b'\\' => result.push('\\'),
701 b'/' => result.push('/'),
702 b'b' => result.push('\x08'),
703 b'f' => result.push('\x0c'),
704 b'n' => result.push('\n'),
705 b'r' => result.push('\r'),
706 b't' => result.push('\t'),
707 b'u' => {
708 i += 1;
709 if i + 4 > slice.len() {
710 return Err(ScanError {
711 kind: ScanErrorKind::UnexpectedEof("in unicode escape"),
712 span: Span::new(start + i - 2, slice.len() - i + 2),
713 });
714 }
715
716 let hex = &slice[i..i + 4];
717 let hex_str = str::from_utf8(hex).map_err(|_| ScanError {
718 kind: ScanErrorKind::InvalidUtf8,
719 span: Span::new(start + i, 4),
720 })?;
721
722 let code_unit = u16::from_str_radix(hex_str, 16).map_err(|_| ScanError {
723 kind: ScanErrorKind::UnexpectedChar('?'),
724 span: Span::new(start + i, 4),
725 })?;
726
727 i += 4;
728
729 let code_point = if (0xD800..=0xDBFF).contains(&code_unit) {
731 if i + 6 > slice.len() || slice[i] != b'\\' || slice[i + 1] != b'u' {
733 return Err(ScanError {
734 kind: ScanErrorKind::InvalidUtf8,
735 span: Span::new(start + i - 6, 6),
736 });
737 }
738
739 i += 2; let low_hex = &slice[i..i + 4];
741 let low_hex_str = str::from_utf8(low_hex).map_err(|_| ScanError {
742 kind: ScanErrorKind::InvalidUtf8,
743 span: Span::new(start + i, 4),
744 })?;
745
746 let low_unit =
747 u16::from_str_radix(low_hex_str, 16).map_err(|_| ScanError {
748 kind: ScanErrorKind::UnexpectedChar('?'),
749 span: Span::new(start + i, 4),
750 })?;
751
752 i += 4;
753
754 if !(0xDC00..=0xDFFF).contains(&low_unit) {
755 return Err(ScanError {
756 kind: ScanErrorKind::InvalidUtf8,
757 span: Span::new(start + i - 4, 4),
758 });
759 }
760
761 let high = code_unit as u32;
763 let low = low_unit as u32;
764 0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF)
765 } else if (0xDC00..=0xDFFF).contains(&code_unit) {
766 return Err(ScanError {
768 kind: ScanErrorKind::InvalidUtf8,
769 span: Span::new(start + i - 4, 4),
770 });
771 } else {
772 code_unit as u32
773 };
774
775 let c = char::from_u32(code_point).ok_or_else(|| ScanError {
776 kind: ScanErrorKind::InvalidUtf8,
777 span: Span::new(start + i - 4, 4),
778 })?;
779
780 result.push(c);
781 continue; }
783 other => {
784 result.push(other as char);
786 }
787 }
788 i += 1;
789 } else {
790 if byte < 0x80 {
793 result.push(byte as char);
794 i += 1;
795 } else {
796 let remaining = &slice[i..];
798 match str::from_utf8(remaining) {
799 Ok(s) => {
800 result.push_str(s);
801 break;
802 }
803 Err(e) => {
804 let valid_len = e.valid_up_to();
806 if valid_len > 0 {
807 let valid = str::from_utf8(&remaining[..valid_len])
809 .expect("valid_up_to guarantees valid UTF-8");
810 result.push_str(valid);
811 i += valid_len;
812 } else {
813 return Err(ScanError {
814 kind: ScanErrorKind::InvalidUtf8,
815 span: Span::new(start + i, 1),
816 });
817 }
818 }
819 }
820 }
821 }
822 }
823
824 Ok(result)
825}
826
827pub fn decode_string_borrowed(buf: &[u8], start: usize, end: usize) -> Option<&str> {
837 let slice = &buf[start..end];
838
839 if slice.contains(&b'\\') {
841 return None;
842 }
843
844 str::from_utf8(slice).ok()
845}
846
847pub fn decode_string<'a>(
851 buf: &'a [u8],
852 start: usize,
853 end: usize,
854 has_escapes: bool,
855) -> Result<alloc::borrow::Cow<'a, str>, ScanError> {
856 use alloc::borrow::Cow;
857
858 if has_escapes {
859 decode_string_owned(buf, start, end).map(Cow::Owned)
860 } else {
861 decode_string_borrowed(buf, start, end)
862 .map(Cow::Borrowed)
863 .ok_or_else(|| ScanError {
864 kind: ScanErrorKind::InvalidUtf8,
865 span: Span::new(start, end - start),
866 })
867 }
868}
869
870#[derive(Debug, Clone, PartialEq)]
874pub enum ParsedNumber {
875 U64(u64),
877 I64(i64),
879 U128(u128),
881 I128(i128),
883 F64(f64),
885}
886
887pub fn parse_number(
889 buf: &[u8],
890 start: usize,
891 end: usize,
892 hint: NumberHint,
893) -> Result<ParsedNumber, ScanError> {
894 use lexical_parse_float::FromLexical as _;
895 use lexical_parse_integer::FromLexical as _;
896
897 let slice = &buf[start..end];
898
899 match hint {
900 NumberHint::Float => f64::from_lexical(slice)
901 .map(ParsedNumber::F64)
902 .map_err(|_| ScanError {
903 kind: ScanErrorKind::UnexpectedChar('?'),
904 span: Span::new(start, end - start),
905 }),
906 NumberHint::Signed => {
907 if let Ok(n) = i64::from_lexical(slice) {
908 Ok(ParsedNumber::I64(n))
909 } else if let Ok(n) = i128::from_lexical(slice) {
910 Ok(ParsedNumber::I128(n))
911 } else {
912 Err(ScanError {
913 kind: ScanErrorKind::UnexpectedChar('?'),
914 span: Span::new(start, end - start),
915 })
916 }
917 }
918 NumberHint::Unsigned => {
919 if let Ok(n) = u64::from_lexical(slice) {
920 Ok(ParsedNumber::U64(n))
921 } else if let Ok(n) = u128::from_lexical(slice) {
922 Ok(ParsedNumber::U128(n))
923 } else {
924 Err(ScanError {
925 kind: ScanErrorKind::UnexpectedChar('?'),
926 span: Span::new(start, end - start),
927 })
928 }
929 }
930 }
931}
932
933#[cfg(test)]
934mod tests {
935 use super::*;
936
937 #[test]
938 fn test_simple_tokens() {
939 let input = b"{}[],:";
940 let mut scanner = Scanner::new();
941
942 assert!(matches!(
943 scanner.next_token(input).unwrap().token,
944 Token::ObjectStart
945 ));
946 assert!(matches!(
947 scanner.next_token(input).unwrap().token,
948 Token::ObjectEnd
949 ));
950 assert!(matches!(
951 scanner.next_token(input).unwrap().token,
952 Token::ArrayStart
953 ));
954 assert!(matches!(
955 scanner.next_token(input).unwrap().token,
956 Token::ArrayEnd
957 ));
958 assert!(matches!(
959 scanner.next_token(input).unwrap().token,
960 Token::Comma
961 ));
962 assert!(matches!(
963 scanner.next_token(input).unwrap().token,
964 Token::Colon
965 ));
966 assert!(matches!(
967 scanner.next_token(input).unwrap().token,
968 Token::Eof
969 ));
970 }
971
972 #[test]
973 fn test_string_no_escapes() {
974 let input = b"\"hello world\"";
975 let mut scanner = Scanner::new();
976
977 let result = scanner.next_token(input).unwrap();
978 assert!(matches!(
979 result.token,
980 Token::String {
981 start: 1,
982 end: 12,
983 has_escapes: false
984 }
985 ));
986 }
987
988 #[test]
989 fn test_string_with_escapes() {
990 let input = br#""hello\nworld""#;
991 let mut scanner = Scanner::new();
992
993 let result = scanner.next_token(input).unwrap();
994 assert!(matches!(
995 result.token,
996 Token::String {
997 start: 1,
998 end: 13,
999 has_escapes: true
1000 }
1001 ));
1002 }
1003
1004 #[test]
1005 fn test_numbers() {
1006 let mut scanner = Scanner::new();
1007
1008 let result = scanner.next_token(b"42,").unwrap();
1010 assert!(matches!(
1011 result.token,
1012 Token::Number {
1013 hint: NumberHint::Unsigned,
1014 ..
1015 }
1016 ));
1017
1018 scanner.set_pos(0);
1020 let result = scanner.next_token(b"-42]").unwrap();
1021 assert!(matches!(
1022 result.token,
1023 Token::Number {
1024 hint: NumberHint::Signed,
1025 ..
1026 }
1027 ));
1028
1029 scanner.set_pos(0);
1031 let result = scanner.next_token(b"3.14}").unwrap();
1032 assert!(matches!(
1033 result.token,
1034 Token::Number {
1035 hint: NumberHint::Float,
1036 ..
1037 }
1038 ));
1039
1040 scanner.set_pos(0);
1042 let result = scanner.next_token(b"1e10 ").unwrap();
1043 assert!(matches!(
1044 result.token,
1045 Token::Number {
1046 hint: NumberHint::Float,
1047 ..
1048 }
1049 ));
1050
1051 scanner.set_pos(0);
1053 let result = scanner.next_token(b"42").unwrap();
1054 assert!(matches!(result.token, Token::NeedMore { .. }));
1055 }
1056
1057 #[test]
1058 fn test_literals() {
1059 let mut scanner = Scanner::new();
1060
1061 let result = scanner.next_token(b"true,").unwrap();
1063 assert!(matches!(result.token, Token::True));
1064
1065 scanner.set_pos(0);
1066 let result = scanner.next_token(b"false]").unwrap();
1067 assert!(matches!(result.token, Token::False));
1068
1069 scanner.set_pos(0);
1070 let result = scanner.next_token(b"null}").unwrap();
1071 assert!(matches!(result.token, Token::Null));
1072 }
1073
1074 #[test]
1075 fn test_whitespace_handling() {
1076 let input = b" {\n\t\"key\" : 42 } ";
1077 let mut scanner = Scanner::new();
1078
1079 assert!(matches!(
1080 scanner.next_token(input).unwrap().token,
1081 Token::ObjectStart
1082 ));
1083 assert!(matches!(
1084 scanner.next_token(input).unwrap().token,
1085 Token::String { .. }
1086 ));
1087 assert!(matches!(
1088 scanner.next_token(input).unwrap().token,
1089 Token::Colon
1090 ));
1091 assert!(matches!(
1092 scanner.next_token(input).unwrap().token,
1093 Token::Number { .. }
1094 ));
1095 assert!(matches!(
1096 scanner.next_token(input).unwrap().token,
1097 Token::ObjectEnd
1098 ));
1099 assert!(matches!(
1100 scanner.next_token(input).unwrap().token,
1101 Token::Eof
1102 ));
1103 }
1104
1105 #[test]
1106 fn test_decode_string_no_escapes() {
1107 let input = b"hello world";
1108 let result = decode_string_borrowed(input, 0, input.len());
1109 assert_eq!(result, Some("hello world"));
1110 }
1111
1112 #[test]
1113 fn test_decode_string_with_escapes() {
1114 let input = br#"hello\nworld"#;
1115 let result = decode_string_owned(input, 0, input.len()).unwrap();
1116 assert_eq!(result, "hello\nworld");
1117 }
1118
1119 #[test]
1120 fn test_decode_string_unicode() {
1121 let input = br#"\u0048\u0065\u006C\u006C\u006F"#;
1123 let result = decode_string_owned(input, 0, input.len()).unwrap();
1124 assert_eq!(result, "Hello");
1125 }
1126
1127 #[test]
1128 fn test_decode_string_surrogate_pair() {
1129 let input = br#"\uD83D\uDE00"#;
1131 let result = decode_string_owned(input, 0, input.len()).unwrap();
1132 assert_eq!(result, "😀");
1133 }
1134
1135 #[test]
1136 fn test_decode_cow_borrowed() {
1137 let input = b"simple";
1138 let result = decode_string(input, 0, input.len(), false).unwrap();
1139 assert!(matches!(result, alloc::borrow::Cow::Borrowed(_)));
1140 assert_eq!(&*result, "simple");
1141 }
1142
1143 #[test]
1144 fn test_decode_cow_owned() {
1145 let input = br#"has\tescape"#;
1146 let result = decode_string(input, 0, input.len(), true).unwrap();
1147 assert!(matches!(result, alloc::borrow::Cow::Owned(_)));
1148 assert_eq!(&*result, "has\tescape");
1149 }
1150
1151 #[test]
1152 fn test_parse_numbers() {
1153 assert_eq!(
1154 parse_number(b"42", 0, 2, NumberHint::Unsigned).unwrap(),
1155 ParsedNumber::U64(42)
1156 );
1157 assert_eq!(
1158 parse_number(b"-42", 0, 3, NumberHint::Signed).unwrap(),
1159 ParsedNumber::I64(-42)
1160 );
1161 #[allow(clippy::approx_constant)]
1162 {
1163 assert_eq!(
1164 parse_number(b"3.14", 0, 4, NumberHint::Float).unwrap(),
1165 ParsedNumber::F64(3.14)
1166 );
1167 }
1168 }
1169}
1170
1171#[cfg(all(test, feature = "bolero-inline-tests"))]
1172#[allow(clippy::while_let_loop, clippy::same_item_push)]
1173mod fuzz_tests {
1174 use super::*;
1175 use bolero::check;
1176
1177 #[test]
1179 fn fuzz_scanner_arbitrary_bytes() {
1180 check!().for_each(|input: &[u8]| {
1181 let mut scanner = Scanner::new();
1182 loop {
1183 match scanner.next_token(input) {
1184 Ok(spanned) => {
1185 if matches!(spanned.token, Token::Eof | Token::NeedMore { .. }) {
1186 break;
1187 }
1188 }
1189 Err(_) => break, }
1191 }
1192 });
1193 }
1194
1195 #[test]
1197 fn fuzz_scanner_json_like() {
1198 check!().for_each(|input: &[u8]| {
1199 let mut wrapped = Vec::with_capacity(input.len() + 2);
1201 wrapped.push(b'[');
1202 wrapped.extend_from_slice(input);
1203 wrapped.push(b']');
1204
1205 let mut scanner = Scanner::new();
1206 loop {
1207 match scanner.next_token(&wrapped) {
1208 Ok(spanned) => {
1209 if matches!(spanned.token, Token::Eof | Token::NeedMore { .. }) {
1210 break;
1211 }
1212 }
1213 Err(_) => break,
1214 }
1215 }
1216 });
1217 }
1218
1219 #[test]
1221 fn fuzz_decode_string() {
1222 check!().for_each(|input: &[u8]| {
1223 if input.len() >= 2 {
1224 let _ = decode_string_owned(input, 0, input.len());
1226 }
1227 });
1228 }
1229
1230 #[test]
1232 fn fuzz_scanner_strings() {
1233 check!().for_each(|content: &[u8]| {
1234 let mut input = Vec::with_capacity(content.len() + 2);
1236 input.push(b'"');
1237 input.extend_from_slice(content);
1238 input.push(b'"');
1239
1240 let mut scanner = Scanner::new();
1241 let _ = scanner.next_token(&input);
1242 });
1243 }
1244
1245 #[test]
1247 fn fuzz_scanner_numbers() {
1248 check!().for_each(|content: &[u8]| {
1249 if !content.is_empty() && (content[0].is_ascii_digit() || content[0] == b'-') {
1251 let mut scanner = Scanner::new();
1252 let _ = scanner.next_token(content);
1253 }
1254 });
1255 }
1256
1257 #[test]
1259 fn fuzz_parse_number() {
1260 check!().for_each(|input: &[u8]| {
1261 if !input.is_empty() {
1262 let _ = parse_number(input, 0, input.len(), NumberHint::Unsigned);
1264 let _ = parse_number(input, 0, input.len(), NumberHint::Signed);
1265 let _ = parse_number(input, 0, input.len(), NumberHint::Float);
1266 }
1267 });
1268 }
1269
1270 #[test]
1272 fn fuzz_scanner_nested() {
1273 check!().for_each(|input: &[u8]| {
1274 let depth = input.first().copied().unwrap_or(0) as usize % 100;
1276 let mut nested = Vec::new();
1277 for _ in 0..depth {
1278 nested.push(b'[');
1279 }
1280 for _ in 0..depth {
1281 nested.push(b']');
1282 }
1283
1284 let mut scanner = Scanner::new();
1285 loop {
1286 match scanner.next_token(&nested) {
1287 Ok(spanned) => {
1288 if matches!(spanned.token, Token::Eof | Token::NeedMore { .. }) {
1289 break;
1290 }
1291 }
1292 Err(_) => break,
1293 }
1294 }
1295 });
1296 }
1297}