1use core::str;
13
14use facet_reflect::Span;
15
16#[derive(Debug, Clone, PartialEq)]
18pub enum Token {
19 ObjectStart,
21 ObjectEnd,
23 ArrayStart,
25 ArrayEnd,
27 Colon,
29 Comma,
31 Null,
33 True,
35 False,
37 String {
39 start: usize,
41 end: usize,
43 has_escapes: bool,
45 },
46 Number {
48 start: usize,
50 end: usize,
52 hint: NumberHint,
54 },
55 Eof,
57 NeedMore {
59 consumed: usize,
61 },
62}
63
64#[derive(Debug, Clone, Copy, PartialEq)]
66pub enum NumberHint {
67 Unsigned,
69 Signed,
71 Float,
73}
74
75#[derive(Debug, Clone, PartialEq)]
77pub struct SpannedToken {
78 pub token: Token,
80 pub span: Span,
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub struct ScanError {
87 pub kind: ScanErrorKind,
89 pub span: Span,
91}
92
93#[derive(Debug, Clone, PartialEq)]
95pub enum ScanErrorKind {
96 UnexpectedChar(char),
98 UnexpectedEof(&'static str),
100 InvalidUtf8,
102}
103
104pub type ScanResult = Result<SpannedToken, ScanError>;
106
107pub struct Scanner {
112 pos: usize,
114 state: ScanState,
116}
117
118#[derive(Debug, Clone, Default)]
120enum ScanState {
121 #[default]
122 Ready,
123 InString {
125 start: usize,
126 has_escapes: bool,
127 escape_next: bool,
128 },
129 InNumber { start: usize, hint: NumberHint },
131 InLiteral {
133 start: usize,
134 expected: &'static [u8],
135 matched: usize,
136 },
137}
138
139impl Scanner {
140 pub fn new() -> Self {
142 Self {
143 pos: 0,
144 state: ScanState::Ready,
145 }
146 }
147
148 pub fn at_position(pos: usize) -> Self {
150 Self {
151 pos,
152 state: ScanState::Ready,
153 }
154 }
155
156 pub fn pos(&self) -> usize {
158 self.pos
159 }
160
161 pub fn set_pos(&mut self, pos: usize) {
163 self.pos = pos;
164 }
165
166 pub fn finalize_at_eof(&mut self, buf: &[u8]) -> ScanResult {
172 match core::mem::take(&mut self.state) {
173 ScanState::Ready => {
174 Ok(SpannedToken {
176 token: Token::Eof,
177 span: Span::new(self.pos, 0),
178 })
179 }
180 ScanState::InNumber { start, hint } => {
181 let end = self.pos;
183 if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
184 return Err(ScanError {
185 kind: ScanErrorKind::UnexpectedEof("in number"),
186 span: Span::new(start, end - start),
187 });
188 }
189 Ok(SpannedToken {
190 token: Token::Number { start, end, hint },
191 span: Span::new(start, end - start),
192 })
193 }
194 ScanState::InString { start, .. } => {
195 Err(ScanError {
197 kind: ScanErrorKind::UnexpectedEof("in string"),
198 span: Span::new(start, self.pos - start),
199 })
200 }
201 ScanState::InLiteral {
202 start,
203 expected,
204 matched,
205 } => {
206 if matched == expected.len() {
208 let token = match expected {
209 b"true" => Token::True,
210 b"false" => Token::False,
211 b"null" => Token::Null,
212 _ => unreachable!(),
213 };
214 Ok(SpannedToken {
215 token,
216 span: Span::new(start, expected.len()),
217 })
218 } else {
219 Err(ScanError {
220 kind: ScanErrorKind::UnexpectedEof("in literal"),
221 span: Span::new(start, self.pos - start),
222 })
223 }
224 }
225 }
226 }
227
228 pub fn next_token(&mut self, buf: &[u8]) -> ScanResult {
233 match core::mem::take(&mut self.state) {
235 ScanState::Ready => {}
236 ScanState::InString {
237 start,
238 has_escapes,
239 escape_next,
240 } => {
241 return self.resume_string(buf, start, has_escapes, escape_next);
242 }
243 ScanState::InNumber { start, hint } => {
244 return self.resume_number(buf, start, hint);
245 }
246 ScanState::InLiteral {
247 start,
248 expected,
249 matched,
250 } => {
251 return self.resume_literal(buf, start, expected, matched);
252 }
253 }
254
255 self.skip_whitespace(buf);
256
257 let start = self.pos;
258 let Some(&byte) = buf.get(self.pos) else {
259 return Ok(SpannedToken {
260 token: Token::Eof,
261 span: Span::new(self.pos, 0),
262 });
263 };
264
265 match byte {
266 b'{' => {
267 self.pos += 1;
268 Ok(SpannedToken {
269 token: Token::ObjectStart,
270 span: Span::new(start, 1),
271 })
272 }
273 b'}' => {
274 self.pos += 1;
275 Ok(SpannedToken {
276 token: Token::ObjectEnd,
277 span: Span::new(start, 1),
278 })
279 }
280 b'[' => {
281 self.pos += 1;
282 Ok(SpannedToken {
283 token: Token::ArrayStart,
284 span: Span::new(start, 1),
285 })
286 }
287 b']' => {
288 self.pos += 1;
289 Ok(SpannedToken {
290 token: Token::ArrayEnd,
291 span: Span::new(start, 1),
292 })
293 }
294 b':' => {
295 self.pos += 1;
296 Ok(SpannedToken {
297 token: Token::Colon,
298 span: Span::new(start, 1),
299 })
300 }
301 b',' => {
302 self.pos += 1;
303 Ok(SpannedToken {
304 token: Token::Comma,
305 span: Span::new(start, 1),
306 })
307 }
308 b'"' => self.scan_string(buf, start),
309 b'-' | b'0'..=b'9' => self.scan_number(buf, start),
310 b't' => self.scan_literal(buf, start, b"true", Token::True),
311 b'f' => self.scan_literal(buf, start, b"false", Token::False),
312 b'n' => self.scan_literal(buf, start, b"null", Token::Null),
313 _ => Err(ScanError {
314 kind: ScanErrorKind::UnexpectedChar(byte as char),
315 span: Span::new(start, 1),
316 }),
317 }
318 }
319
320 fn skip_whitespace(&mut self, buf: &[u8]) {
321 while let Some(&b) = buf.get(self.pos) {
322 match b {
323 b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
324 _ => break,
325 }
326 }
327 }
328
329 fn scan_string(&mut self, buf: &[u8], start: usize) -> ScanResult {
331 self.pos += 1;
333 let content_start = self.pos;
334
335 self.scan_string_content(buf, start, content_start, false, false)
336 }
337
338 fn resume_string(
339 &mut self,
340 buf: &[u8],
341 start: usize,
342 has_escapes: bool,
343 escape_next: bool,
344 ) -> ScanResult {
345 let content_start = start + 1; self.scan_string_content(buf, start, content_start, has_escapes, escape_next)
347 }
348
349 fn scan_string_content(
350 &mut self,
351 buf: &[u8],
352 start: usize,
353 content_start: usize,
354 mut has_escapes: bool,
355 mut escape_next: bool,
356 ) -> ScanResult {
357 const STEP_SIZE: usize = 16;
359 type Window = u128;
360 type Chunk = [u8; STEP_SIZE];
361
362 if !escape_next {
364 loop {
365 if let Some(Ok(chunk)) = buf
366 .get(self.pos..)
367 .and_then(|s| s.get(..STEP_SIZE))
368 .map(Chunk::try_from)
369 {
370 let window = Window::from_ne_bytes(chunk);
371 let has_quote = contains_byte(window, b'"');
372 let has_backslash = contains_byte(window, b'\\');
373
374 if !has_quote && !has_backslash {
375 self.pos += STEP_SIZE;
377 continue;
378 }
379 }
380 break;
382 }
383 }
384
385 while let Some(&byte) = buf.get(self.pos) {
387 if escape_next {
388 escape_next = false;
390 self.pos += 1;
391
392 if byte == b'u' {
394 if self.pos + 4 > buf.len() {
396 self.state = ScanState::InString {
398 start,
399 has_escapes: true,
400 escape_next: false,
401 };
402 return Ok(SpannedToken {
403 token: Token::NeedMore { consumed: start },
404 span: Span::new(start, self.pos - start),
405 });
406 }
407 self.pos += 4;
408
409 if self.pos + 2 <= buf.len()
411 && buf.get(self.pos) == Some(&b'\\')
412 && buf.get(self.pos + 1) == Some(&b'u')
413 {
414 if self.pos + 6 > buf.len() {
415 self.state = ScanState::InString {
417 start,
418 has_escapes: true,
419 escape_next: false,
420 };
421 return Ok(SpannedToken {
422 token: Token::NeedMore { consumed: start },
423 span: Span::new(start, self.pos - start),
424 });
425 }
426 self.pos += 6;
428 }
429 }
430 continue;
431 }
432
433 match byte {
434 b'"' => {
435 let content_end = self.pos;
437 self.pos += 1; return Ok(SpannedToken {
440 token: Token::String {
441 start: content_start,
442 end: content_end,
443 has_escapes,
444 },
445 span: Span::new(start, self.pos - start),
446 });
447 }
448 b'\\' => {
449 has_escapes = true;
450 escape_next = true;
451 self.pos += 1;
452 }
453 _ => {
454 self.pos += 1;
455 }
456 }
457 }
458
459 if escape_next || self.pos > start {
461 self.state = ScanState::InString {
463 start,
464 has_escapes,
465 escape_next,
466 };
467 Ok(SpannedToken {
468 token: Token::NeedMore { consumed: start },
469 span: Span::new(start, self.pos - start),
470 })
471 } else {
472 Err(ScanError {
473 kind: ScanErrorKind::UnexpectedEof("in string"),
474 span: Span::new(start, self.pos - start),
475 })
476 }
477 }
478
479 fn scan_number(&mut self, buf: &[u8], start: usize) -> ScanResult {
481 let mut hint = NumberHint::Unsigned;
482
483 if buf.get(self.pos) == Some(&b'-') {
484 hint = NumberHint::Signed;
485 self.pos += 1;
486 }
487
488 self.scan_number_content(buf, start, hint)
489 }
490
491 fn resume_number(&mut self, buf: &[u8], start: usize, hint: NumberHint) -> ScanResult {
492 self.scan_number_content(buf, start, hint)
493 }
494
495 fn scan_number_content(
496 &mut self,
497 buf: &[u8],
498 start: usize,
499 mut hint: NumberHint,
500 ) -> ScanResult {
501 while let Some(&b) = buf.get(self.pos) {
503 if b.is_ascii_digit() {
504 self.pos += 1;
505 } else {
506 break;
507 }
508 }
509
510 if buf.get(self.pos) == Some(&b'.') {
512 hint = NumberHint::Float;
513 self.pos += 1;
514
515 while let Some(&b) = buf.get(self.pos) {
517 if b.is_ascii_digit() {
518 self.pos += 1;
519 } else {
520 break;
521 }
522 }
523 }
524
525 if matches!(buf.get(self.pos), Some(b'e') | Some(b'E')) {
527 hint = NumberHint::Float;
528 self.pos += 1;
529
530 if matches!(buf.get(self.pos), Some(b'+') | Some(b'-')) {
532 self.pos += 1;
533 }
534
535 while let Some(&b) = buf.get(self.pos) {
537 if b.is_ascii_digit() {
538 self.pos += 1;
539 } else {
540 break;
541 }
542 }
543 }
544
545 if self.pos == buf.len() {
548 self.state = ScanState::InNumber { start, hint };
550 return Ok(SpannedToken {
551 token: Token::NeedMore { consumed: start },
552 span: Span::new(start, self.pos - start),
553 });
554 }
555
556 let end = self.pos;
557
558 if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
560 return Err(ScanError {
561 kind: ScanErrorKind::UnexpectedChar(
562 buf.get(self.pos).map(|&b| b as char).unwrap_or('?'),
563 ),
564 span: Span::new(start, 1),
565 });
566 }
567
568 Ok(SpannedToken {
569 token: Token::Number { start, end, hint },
570 span: Span::new(start, end - start),
571 })
572 }
573
574 fn scan_literal(
576 &mut self,
577 buf: &[u8],
578 start: usize,
579 expected: &'static [u8],
580 token: Token,
581 ) -> ScanResult {
582 self.scan_literal_content(buf, start, expected, 0, token)
583 }
584
585 fn resume_literal(
586 &mut self,
587 buf: &[u8],
588 start: usize,
589 expected: &'static [u8],
590 matched: usize,
591 ) -> ScanResult {
592 let token = match expected {
593 b"true" => Token::True,
594 b"false" => Token::False,
595 b"null" => Token::Null,
596 _ => unreachable!(),
597 };
598 self.scan_literal_content(buf, start, expected, matched, token)
599 }
600
601 fn scan_literal_content(
602 &mut self,
603 buf: &[u8],
604 start: usize,
605 expected: &'static [u8],
606 mut matched: usize,
607 token: Token,
608 ) -> ScanResult {
609 while matched < expected.len() {
610 match buf.get(self.pos) {
611 Some(&b) if b == expected[matched] => {
612 self.pos += 1;
613 matched += 1;
614 }
615 Some(&b) => {
616 return Err(ScanError {
617 kind: ScanErrorKind::UnexpectedChar(b as char),
618 span: Span::new(self.pos, 1),
619 });
620 }
621 None => {
622 self.state = ScanState::InLiteral {
624 start,
625 expected,
626 matched,
627 };
628 return Ok(SpannedToken {
629 token: Token::NeedMore { consumed: start },
630 span: Span::new(start, self.pos - start),
631 });
632 }
633 }
634 }
635
636 Ok(SpannedToken {
637 token,
638 span: Span::new(start, expected.len()),
639 })
640 }
641}
642
643impl Default for Scanner {
644 fn default() -> Self {
645 Self::new()
646 }
647}
648
649#[inline]
651fn contains_byte(window: u128, byte: u8) -> bool {
652 let pattern = u128::from_ne_bytes([byte; 16]);
653 let xor = window ^ pattern;
654 let has_zero = (xor.wrapping_sub(0x01010101010101010101010101010101))
655 & !xor
656 & 0x80808080808080808080808080808080;
657 has_zero != 0
658}
659
660pub fn decode_string_owned(
677 buf: &[u8],
678 start: usize,
679 end: usize,
680) -> Result<alloc::string::String, ScanError> {
681 use alloc::string::String;
682
683 let slice = &buf[start..end];
684 let mut result = String::with_capacity(end - start);
685 let mut i = 0;
686
687 while i < slice.len() {
688 let byte = slice[i];
689 if byte == b'\\' {
690 i += 1;
691 if i >= slice.len() {
692 return Err(ScanError {
693 kind: ScanErrorKind::UnexpectedEof("in escape sequence"),
694 span: Span::new(start + i - 1, 1),
695 });
696 }
697
698 match slice[i] {
699 b'"' => result.push('"'),
700 b'\\' => result.push('\\'),
701 b'/' => result.push('/'),
702 b'b' => result.push('\x08'),
703 b'f' => result.push('\x0c'),
704 b'n' => result.push('\n'),
705 b'r' => result.push('\r'),
706 b't' => result.push('\t'),
707 b'u' => {
708 i += 1;
709 if i + 4 > slice.len() {
710 return Err(ScanError {
711 kind: ScanErrorKind::UnexpectedEof("in unicode escape"),
712 span: Span::new(start + i - 2, slice.len() - i + 2),
713 });
714 }
715
716 let hex = &slice[i..i + 4];
717 let hex_str = str::from_utf8(hex).map_err(|_| ScanError {
718 kind: ScanErrorKind::InvalidUtf8,
719 span: Span::new(start + i, 4),
720 })?;
721
722 let code_unit = u16::from_str_radix(hex_str, 16).map_err(|_| ScanError {
723 kind: ScanErrorKind::UnexpectedChar('?'),
724 span: Span::new(start + i, 4),
725 })?;
726
727 i += 4;
728
729 let code_point = if (0xD800..=0xDBFF).contains(&code_unit) {
731 if i + 6 > slice.len() || slice[i] != b'\\' || slice[i + 1] != b'u' {
733 return Err(ScanError {
734 kind: ScanErrorKind::InvalidUtf8,
735 span: Span::new(start + i - 6, 6),
736 });
737 }
738
739 i += 2; let low_hex = &slice[i..i + 4];
741 let low_hex_str = str::from_utf8(low_hex).map_err(|_| ScanError {
742 kind: ScanErrorKind::InvalidUtf8,
743 span: Span::new(start + i, 4),
744 })?;
745
746 let low_unit =
747 u16::from_str_radix(low_hex_str, 16).map_err(|_| ScanError {
748 kind: ScanErrorKind::UnexpectedChar('?'),
749 span: Span::new(start + i, 4),
750 })?;
751
752 i += 4;
753
754 if !(0xDC00..=0xDFFF).contains(&low_unit) {
755 return Err(ScanError {
756 kind: ScanErrorKind::InvalidUtf8,
757 span: Span::new(start + i - 4, 4),
758 });
759 }
760
761 let high = code_unit as u32;
763 let low = low_unit as u32;
764 0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF)
765 } else if (0xDC00..=0xDFFF).contains(&code_unit) {
766 return Err(ScanError {
768 kind: ScanErrorKind::InvalidUtf8,
769 span: Span::new(start + i - 4, 4),
770 });
771 } else {
772 code_unit as u32
773 };
774
775 let c = char::from_u32(code_point).ok_or_else(|| ScanError {
776 kind: ScanErrorKind::InvalidUtf8,
777 span: Span::new(start + i - 4, 4),
778 })?;
779
780 result.push(c);
781 continue; }
783 other => {
784 result.push(other as char);
786 }
787 }
788 i += 1;
789 } else {
790 if byte < 0x80 {
793 result.push(byte as char);
794 i += 1;
795 } else {
796 let remaining = &slice[i..];
799 match str::from_utf8(remaining) {
800 Ok(s) => {
801 let ch = s.chars().next().expect("non-empty remaining slice");
804 result.push(ch);
805 i += ch.len_utf8();
806 }
807 Err(e) => {
808 let valid_len = e.valid_up_to();
810 if valid_len > 0 {
811 let valid = str::from_utf8(&remaining[..valid_len])
814 .expect("valid_up_to guarantees valid UTF-8");
815 result.push_str(valid);
816 i += valid_len;
817 } else {
818 return Err(ScanError {
819 kind: ScanErrorKind::InvalidUtf8,
820 span: Span::new(start + i, 1),
821 });
822 }
823 }
824 }
825 }
826 }
827 }
828
829 Ok(result)
830}
831
832pub fn decode_string_borrowed(buf: &[u8], start: usize, end: usize) -> Option<&str> {
842 let slice = &buf[start..end];
843
844 if slice.contains(&b'\\') {
846 return None;
847 }
848
849 str::from_utf8(slice).ok()
850}
851
852pub fn decode_string<'a>(
856 buf: &'a [u8],
857 start: usize,
858 end: usize,
859 has_escapes: bool,
860) -> Result<alloc::borrow::Cow<'a, str>, ScanError> {
861 use alloc::borrow::Cow;
862
863 if has_escapes {
864 decode_string_owned(buf, start, end).map(Cow::Owned)
865 } else {
866 decode_string_borrowed(buf, start, end)
867 .map(Cow::Borrowed)
868 .ok_or_else(|| ScanError {
869 kind: ScanErrorKind::InvalidUtf8,
870 span: Span::new(start, end - start),
871 })
872 }
873}
874
875#[derive(Debug, Clone, PartialEq)]
879pub enum ParsedNumber {
880 U64(u64),
882 I64(i64),
884 U128(u128),
886 I128(i128),
888 F64(f64),
890}
891
892pub fn parse_number(
894 buf: &[u8],
895 start: usize,
896 end: usize,
897 hint: NumberHint,
898) -> Result<ParsedNumber, ScanError> {
899 use lexical_parse_float::FromLexical as _;
900 use lexical_parse_integer::FromLexical as _;
901
902 let slice = &buf[start..end];
903
904 match hint {
905 NumberHint::Float => f64::from_lexical(slice)
906 .map(ParsedNumber::F64)
907 .map_err(|_| ScanError {
908 kind: ScanErrorKind::UnexpectedChar('?'),
909 span: Span::new(start, end - start),
910 }),
911 NumberHint::Signed => {
912 if let Ok(n) = i64::from_lexical(slice) {
913 Ok(ParsedNumber::I64(n))
914 } else if let Ok(n) = i128::from_lexical(slice) {
915 Ok(ParsedNumber::I128(n))
916 } else {
917 Err(ScanError {
918 kind: ScanErrorKind::UnexpectedChar('?'),
919 span: Span::new(start, end - start),
920 })
921 }
922 }
923 NumberHint::Unsigned => {
924 if let Ok(n) = u64::from_lexical(slice) {
925 Ok(ParsedNumber::U64(n))
926 } else if let Ok(n) = u128::from_lexical(slice) {
927 Ok(ParsedNumber::U128(n))
928 } else {
929 Err(ScanError {
930 kind: ScanErrorKind::UnexpectedChar('?'),
931 span: Span::new(start, end - start),
932 })
933 }
934 }
935 }
936}
937
938#[cfg(test)]
939mod tests {
940 use super::*;
941
942 #[test]
943 fn test_simple_tokens() {
944 let input = b"{}[],:";
945 let mut scanner = Scanner::new();
946
947 assert!(matches!(
948 scanner.next_token(input).unwrap().token,
949 Token::ObjectStart
950 ));
951 assert!(matches!(
952 scanner.next_token(input).unwrap().token,
953 Token::ObjectEnd
954 ));
955 assert!(matches!(
956 scanner.next_token(input).unwrap().token,
957 Token::ArrayStart
958 ));
959 assert!(matches!(
960 scanner.next_token(input).unwrap().token,
961 Token::ArrayEnd
962 ));
963 assert!(matches!(
964 scanner.next_token(input).unwrap().token,
965 Token::Comma
966 ));
967 assert!(matches!(
968 scanner.next_token(input).unwrap().token,
969 Token::Colon
970 ));
971 assert!(matches!(
972 scanner.next_token(input).unwrap().token,
973 Token::Eof
974 ));
975 }
976
977 #[test]
978 fn test_string_no_escapes() {
979 let input = b"\"hello world\"";
980 let mut scanner = Scanner::new();
981
982 let result = scanner.next_token(input).unwrap();
983 assert!(matches!(
984 result.token,
985 Token::String {
986 start: 1,
987 end: 12,
988 has_escapes: false
989 }
990 ));
991 }
992
993 #[test]
994 fn test_string_with_escapes() {
995 let input = br#""hello\nworld""#;
996 let mut scanner = Scanner::new();
997
998 let result = scanner.next_token(input).unwrap();
999 assert!(matches!(
1000 result.token,
1001 Token::String {
1002 start: 1,
1003 end: 13,
1004 has_escapes: true
1005 }
1006 ));
1007 }
1008
1009 #[test]
1010 fn test_numbers() {
1011 let mut scanner = Scanner::new();
1012
1013 let result = scanner.next_token(b"42,").unwrap();
1015 assert!(matches!(
1016 result.token,
1017 Token::Number {
1018 hint: NumberHint::Unsigned,
1019 ..
1020 }
1021 ));
1022
1023 scanner.set_pos(0);
1025 let result = scanner.next_token(b"-42]").unwrap();
1026 assert!(matches!(
1027 result.token,
1028 Token::Number {
1029 hint: NumberHint::Signed,
1030 ..
1031 }
1032 ));
1033
1034 scanner.set_pos(0);
1036 let result = scanner.next_token(b"3.14}").unwrap();
1037 assert!(matches!(
1038 result.token,
1039 Token::Number {
1040 hint: NumberHint::Float,
1041 ..
1042 }
1043 ));
1044
1045 scanner.set_pos(0);
1047 let result = scanner.next_token(b"1e10 ").unwrap();
1048 assert!(matches!(
1049 result.token,
1050 Token::Number {
1051 hint: NumberHint::Float,
1052 ..
1053 }
1054 ));
1055
1056 scanner.set_pos(0);
1058 let result = scanner.next_token(b"42").unwrap();
1059 assert!(matches!(result.token, Token::NeedMore { .. }));
1060 }
1061
1062 #[test]
1063 fn test_literals() {
1064 let mut scanner = Scanner::new();
1065
1066 let result = scanner.next_token(b"true,").unwrap();
1068 assert!(matches!(result.token, Token::True));
1069
1070 scanner.set_pos(0);
1071 let result = scanner.next_token(b"false]").unwrap();
1072 assert!(matches!(result.token, Token::False));
1073
1074 scanner.set_pos(0);
1075 let result = scanner.next_token(b"null}").unwrap();
1076 assert!(matches!(result.token, Token::Null));
1077 }
1078
1079 #[test]
1080 fn test_whitespace_handling() {
1081 let input = b" {\n\t\"key\" : 42 } ";
1082 let mut scanner = Scanner::new();
1083
1084 assert!(matches!(
1085 scanner.next_token(input).unwrap().token,
1086 Token::ObjectStart
1087 ));
1088 assert!(matches!(
1089 scanner.next_token(input).unwrap().token,
1090 Token::String { .. }
1091 ));
1092 assert!(matches!(
1093 scanner.next_token(input).unwrap().token,
1094 Token::Colon
1095 ));
1096 assert!(matches!(
1097 scanner.next_token(input).unwrap().token,
1098 Token::Number { .. }
1099 ));
1100 assert!(matches!(
1101 scanner.next_token(input).unwrap().token,
1102 Token::ObjectEnd
1103 ));
1104 assert!(matches!(
1105 scanner.next_token(input).unwrap().token,
1106 Token::Eof
1107 ));
1108 }
1109
1110 #[test]
1111 fn test_decode_string_no_escapes() {
1112 let input = b"hello world";
1113 let result = decode_string_borrowed(input, 0, input.len());
1114 assert_eq!(result, Some("hello world"));
1115 }
1116
1117 #[test]
1118 fn test_decode_string_with_escapes() {
1119 let input = br#"hello\nworld"#;
1120 let result = decode_string_owned(input, 0, input.len()).unwrap();
1121 assert_eq!(result, "hello\nworld");
1122 }
1123
1124 #[test]
1125 fn test_decode_string_unicode() {
1126 let input = br#"\u0048\u0065\u006C\u006C\u006F"#;
1128 let result = decode_string_owned(input, 0, input.len()).unwrap();
1129 assert_eq!(result, "Hello");
1130 }
1131
1132 #[test]
1133 fn test_decode_string_surrogate_pair() {
1134 let input = br#"\uD83D\uDE00"#;
1136 let result = decode_string_owned(input, 0, input.len()).unwrap();
1137 assert_eq!(result, "😀");
1138 }
1139
1140 #[test]
1141 fn test_decode_cow_borrowed() {
1142 let input = b"simple";
1143 let result = decode_string(input, 0, input.len(), false).unwrap();
1144 assert!(matches!(result, alloc::borrow::Cow::Borrowed(_)));
1145 assert_eq!(&*result, "simple");
1146 }
1147
1148 #[test]
1149 fn test_decode_cow_owned() {
1150 let input = br#"has\tescape"#;
1151 let result = decode_string(input, 0, input.len(), true).unwrap();
1152 assert!(matches!(result, alloc::borrow::Cow::Owned(_)));
1153 assert_eq!(&*result, "has\tescape");
1154 }
1155
1156 #[test]
1157 fn test_parse_numbers() {
1158 assert_eq!(
1159 parse_number(b"42", 0, 2, NumberHint::Unsigned).unwrap(),
1160 ParsedNumber::U64(42)
1161 );
1162 assert_eq!(
1163 parse_number(b"-42", 0, 3, NumberHint::Signed).unwrap(),
1164 ParsedNumber::I64(-42)
1165 );
1166 #[allow(clippy::approx_constant)]
1167 {
1168 assert_eq!(
1169 parse_number(b"3.14", 0, 4, NumberHint::Float).unwrap(),
1170 ParsedNumber::F64(3.14)
1171 );
1172 }
1173 }
1174}
1175
1176#[cfg(all(test, feature = "bolero-inline-tests"))]
1177#[allow(clippy::while_let_loop, clippy::same_item_push)]
1178mod fuzz_tests {
1179 use super::*;
1180 use bolero::check;
1181
1182 #[test]
1184 fn fuzz_scanner_arbitrary_bytes() {
1185 check!().for_each(|input: &[u8]| {
1186 let mut scanner = Scanner::new();
1187 loop {
1188 match scanner.next_token(input) {
1189 Ok(spanned) => {
1190 if matches!(spanned.token, Token::Eof | Token::NeedMore { .. }) {
1191 break;
1192 }
1193 }
1194 Err(_) => break, }
1196 }
1197 });
1198 }
1199
1200 #[test]
1202 fn fuzz_scanner_json_like() {
1203 check!().for_each(|input: &[u8]| {
1204 let mut wrapped = Vec::with_capacity(input.len() + 2);
1206 wrapped.push(b'[');
1207 wrapped.extend_from_slice(input);
1208 wrapped.push(b']');
1209
1210 let mut scanner = Scanner::new();
1211 loop {
1212 match scanner.next_token(&wrapped) {
1213 Ok(spanned) => {
1214 if matches!(spanned.token, Token::Eof | Token::NeedMore { .. }) {
1215 break;
1216 }
1217 }
1218 Err(_) => break,
1219 }
1220 }
1221 });
1222 }
1223
1224 #[test]
1226 fn fuzz_decode_string() {
1227 check!().for_each(|input: &[u8]| {
1228 if input.len() >= 2 {
1229 let _ = decode_string_owned(input, 0, input.len());
1231 }
1232 });
1233 }
1234
1235 #[test]
1237 fn fuzz_scanner_strings() {
1238 check!().for_each(|content: &[u8]| {
1239 let mut input = Vec::with_capacity(content.len() + 2);
1241 input.push(b'"');
1242 input.extend_from_slice(content);
1243 input.push(b'"');
1244
1245 let mut scanner = Scanner::new();
1246 let _ = scanner.next_token(&input);
1247 });
1248 }
1249
1250 #[test]
1252 fn fuzz_scanner_numbers() {
1253 check!().for_each(|content: &[u8]| {
1254 if !content.is_empty() && (content[0].is_ascii_digit() || content[0] == b'-') {
1256 let mut scanner = Scanner::new();
1257 let _ = scanner.next_token(content);
1258 }
1259 });
1260 }
1261
1262 #[test]
1264 fn fuzz_parse_number() {
1265 check!().for_each(|input: &[u8]| {
1266 if !input.is_empty() {
1267 let _ = parse_number(input, 0, input.len(), NumberHint::Unsigned);
1269 let _ = parse_number(input, 0, input.len(), NumberHint::Signed);
1270 let _ = parse_number(input, 0, input.len(), NumberHint::Float);
1271 }
1272 });
1273 }
1274
1275 #[test]
1277 fn fuzz_scanner_nested() {
1278 check!().for_each(|input: &[u8]| {
1279 let depth = input.first().copied().unwrap_or(0) as usize % 100;
1281 let mut nested = Vec::new();
1282 for _ in 0..depth {
1283 nested.push(b'[');
1284 }
1285 for _ in 0..depth {
1286 nested.push(b']');
1287 }
1288
1289 let mut scanner = Scanner::new();
1290 loop {
1291 match scanner.next_token(&nested) {
1292 Ok(spanned) => {
1293 if matches!(spanned.token, Token::Eof | Token::NeedMore { .. }) {
1294 break;
1295 }
1296 }
1297 Err(_) => break,
1298 }
1299 }
1300 });
1301 }
1302}