1use core::str;
13
14use facet_reflect::Span;
15
16#[derive(Debug, Clone, PartialEq)]
18pub enum Token {
19 ObjectStart,
21 ObjectEnd,
23 ArrayStart,
25 ArrayEnd,
27 Colon,
29 Comma,
31 Null,
33 True,
35 False,
37 String {
39 start: usize,
41 end: usize,
43 has_escapes: bool,
45 },
46 Number {
48 start: usize,
50 end: usize,
52 hint: NumberHint,
54 },
55 Eof,
57 NeedMore {
59 consumed: usize,
61 },
62}
63
64#[derive(Debug, Clone, Copy, PartialEq)]
66pub enum NumberHint {
67 Unsigned,
69 Signed,
71 Float,
73}
74
75#[derive(Debug, Clone, PartialEq)]
77pub struct SpannedToken {
78 pub token: Token,
80 pub span: Span,
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub struct ScanError {
87 pub kind: ScanErrorKind,
89 pub span: Span,
91}
92
93#[derive(Debug, Clone, PartialEq)]
95pub enum ScanErrorKind {
96 UnexpectedChar(char),
98 UnexpectedEof(&'static str),
100 InvalidUtf8,
102}
103
104pub type ScanResult = Result<SpannedToken, ScanError>;
106
107pub struct Scanner {
112 pos: usize,
114 state: ScanState,
116}
117
118#[derive(Debug, Clone, Default)]
120enum ScanState {
121 #[default]
122 Ready,
123 InString {
125 start: usize,
126 has_escapes: bool,
127 escape_next: bool,
128 },
129 InNumber { start: usize, hint: NumberHint },
131 InLiteral {
133 start: usize,
134 expected: &'static [u8],
135 matched: usize,
136 },
137}
138
139impl Scanner {
140 pub fn new() -> Self {
142 Self {
143 pos: 0,
144 state: ScanState::Ready,
145 }
146 }
147
148 #[allow(dead_code)]
150 pub fn at_position(pos: usize) -> Self {
151 Self {
152 pos,
153 state: ScanState::Ready,
154 }
155 }
156
157 pub fn pos(&self) -> usize {
159 self.pos
160 }
161
162 pub fn set_pos(&mut self, pos: usize) {
164 self.pos = pos;
165 }
166
167 pub fn finalize_at_eof(&mut self, buf: &[u8]) -> ScanResult {
173 match core::mem::take(&mut self.state) {
174 ScanState::Ready => {
175 Ok(SpannedToken {
177 token: Token::Eof,
178 span: Span::new(self.pos, 0),
179 })
180 }
181 ScanState::InNumber { start, hint } => {
182 let end = self.pos;
184 if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
185 return Err(ScanError {
186 kind: ScanErrorKind::UnexpectedEof("in number"),
187 span: Span::new(start, end - start),
188 });
189 }
190 Ok(SpannedToken {
191 token: Token::Number { start, end, hint },
192 span: Span::new(start, end - start),
193 })
194 }
195 ScanState::InString { start, .. } => {
196 Err(ScanError {
198 kind: ScanErrorKind::UnexpectedEof("in string"),
199 span: Span::new(start, self.pos - start),
200 })
201 }
202 ScanState::InLiteral {
203 start,
204 expected,
205 matched,
206 } => {
207 if matched == expected.len() {
209 let token = match expected {
210 b"true" => Token::True,
211 b"false" => Token::False,
212 b"null" => Token::Null,
213 _ => unreachable!(),
214 };
215 Ok(SpannedToken {
216 token,
217 span: Span::new(start, expected.len()),
218 })
219 } else {
220 Err(ScanError {
221 kind: ScanErrorKind::UnexpectedEof("in literal"),
222 span: Span::new(start, self.pos - start),
223 })
224 }
225 }
226 }
227 }
228
229 pub fn next_token(&mut self, buf: &[u8]) -> ScanResult {
234 match core::mem::take(&mut self.state) {
236 ScanState::Ready => {}
237 ScanState::InString {
238 start,
239 has_escapes,
240 escape_next,
241 } => {
242 return self.resume_string(buf, start, has_escapes, escape_next);
243 }
244 ScanState::InNumber { start, hint } => {
245 return self.resume_number(buf, start, hint);
246 }
247 ScanState::InLiteral {
248 start,
249 expected,
250 matched,
251 } => {
252 return self.resume_literal(buf, start, expected, matched);
253 }
254 }
255
256 self.skip_whitespace(buf);
257
258 let start = self.pos;
259 let Some(&byte) = buf.get(self.pos) else {
260 return Ok(SpannedToken {
261 token: Token::Eof,
262 span: Span::new(self.pos, 0),
263 });
264 };
265
266 match byte {
267 b'{' => {
268 self.pos += 1;
269 Ok(SpannedToken {
270 token: Token::ObjectStart,
271 span: Span::new(start, 1),
272 })
273 }
274 b'}' => {
275 self.pos += 1;
276 Ok(SpannedToken {
277 token: Token::ObjectEnd,
278 span: Span::new(start, 1),
279 })
280 }
281 b'[' => {
282 self.pos += 1;
283 Ok(SpannedToken {
284 token: Token::ArrayStart,
285 span: Span::new(start, 1),
286 })
287 }
288 b']' => {
289 self.pos += 1;
290 Ok(SpannedToken {
291 token: Token::ArrayEnd,
292 span: Span::new(start, 1),
293 })
294 }
295 b':' => {
296 self.pos += 1;
297 Ok(SpannedToken {
298 token: Token::Colon,
299 span: Span::new(start, 1),
300 })
301 }
302 b',' => {
303 self.pos += 1;
304 Ok(SpannedToken {
305 token: Token::Comma,
306 span: Span::new(start, 1),
307 })
308 }
309 b'"' => self.scan_string(buf, start),
310 b'-' | b'0'..=b'9' => self.scan_number(buf, start),
311 b't' => self.scan_literal(buf, start, b"true", Token::True),
312 b'f' => self.scan_literal(buf, start, b"false", Token::False),
313 b'n' => self.scan_literal(buf, start, b"null", Token::Null),
314 _ => Err(ScanError {
315 kind: ScanErrorKind::UnexpectedChar(byte as char),
316 span: Span::new(start, 1),
317 }),
318 }
319 }
320
321 fn skip_whitespace(&mut self, buf: &[u8]) {
322 while let Some(&b) = buf.get(self.pos) {
323 match b {
324 b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
325 _ => break,
326 }
327 }
328 }
329
330 fn scan_string(&mut self, buf: &[u8], start: usize) -> ScanResult {
332 self.pos += 1;
334 let content_start = self.pos;
335
336 self.scan_string_content(buf, start, content_start, false, false)
337 }
338
339 fn resume_string(
340 &mut self,
341 buf: &[u8],
342 start: usize,
343 has_escapes: bool,
344 escape_next: bool,
345 ) -> ScanResult {
346 let content_start = start + 1; self.scan_string_content(buf, start, content_start, has_escapes, escape_next)
348 }
349
350 fn scan_string_content(
351 &mut self,
352 buf: &[u8],
353 start: usize,
354 content_start: usize,
355 mut has_escapes: bool,
356 mut escape_next: bool,
357 ) -> ScanResult {
358 const STEP_SIZE: usize = 16;
360 type Window = u128;
361 type Chunk = [u8; STEP_SIZE];
362
363 if !escape_next {
365 loop {
366 if let Some(Ok(chunk)) = buf
367 .get(self.pos..)
368 .and_then(|s| s.get(..STEP_SIZE))
369 .map(Chunk::try_from)
370 {
371 let window = Window::from_ne_bytes(chunk);
372 let has_quote = contains_byte(window, b'"');
373 let has_backslash = contains_byte(window, b'\\');
374
375 if !has_quote && !has_backslash {
376 self.pos += STEP_SIZE;
378 continue;
379 }
380 }
381 break;
383 }
384 }
385
386 while let Some(&byte) = buf.get(self.pos) {
388 if escape_next {
389 escape_next = false;
391 self.pos += 1;
392
393 if byte == b'u' {
395 if self.pos + 4 > buf.len() {
397 self.state = ScanState::InString {
399 start,
400 has_escapes: true,
401 escape_next: false,
402 };
403 return Ok(SpannedToken {
404 token: Token::NeedMore { consumed: start },
405 span: Span::new(start, self.pos - start),
406 });
407 }
408 self.pos += 4;
409
410 if self.pos + 2 <= buf.len()
412 && buf.get(self.pos) == Some(&b'\\')
413 && buf.get(self.pos + 1) == Some(&b'u')
414 {
415 if self.pos + 6 > buf.len() {
416 self.state = ScanState::InString {
418 start,
419 has_escapes: true,
420 escape_next: false,
421 };
422 return Ok(SpannedToken {
423 token: Token::NeedMore { consumed: start },
424 span: Span::new(start, self.pos - start),
425 });
426 }
427 self.pos += 6;
429 }
430 }
431 continue;
432 }
433
434 match byte {
435 b'"' => {
436 let content_end = self.pos;
438 self.pos += 1; return Ok(SpannedToken {
441 token: Token::String {
442 start: content_start,
443 end: content_end,
444 has_escapes,
445 },
446 span: Span::new(start, self.pos - start),
447 });
448 }
449 b'\\' => {
450 has_escapes = true;
451 escape_next = true;
452 self.pos += 1;
453 }
454 _ => {
455 self.pos += 1;
456 }
457 }
458 }
459
460 if escape_next || self.pos > start {
462 self.state = ScanState::InString {
464 start,
465 has_escapes,
466 escape_next,
467 };
468 Ok(SpannedToken {
469 token: Token::NeedMore { consumed: start },
470 span: Span::new(start, self.pos - start),
471 })
472 } else {
473 Err(ScanError {
474 kind: ScanErrorKind::UnexpectedEof("in string"),
475 span: Span::new(start, self.pos - start),
476 })
477 }
478 }
479
480 fn scan_number(&mut self, buf: &[u8], start: usize) -> ScanResult {
482 let mut hint = NumberHint::Unsigned;
483
484 if buf.get(self.pos) == Some(&b'-') {
485 hint = NumberHint::Signed;
486 self.pos += 1;
487 }
488
489 self.scan_number_content(buf, start, hint)
490 }
491
492 fn resume_number(&mut self, buf: &[u8], start: usize, hint: NumberHint) -> ScanResult {
493 self.scan_number_content(buf, start, hint)
494 }
495
496 fn scan_number_content(
497 &mut self,
498 buf: &[u8],
499 start: usize,
500 mut hint: NumberHint,
501 ) -> ScanResult {
502 while let Some(&b) = buf.get(self.pos) {
504 if b.is_ascii_digit() {
505 self.pos += 1;
506 } else {
507 break;
508 }
509 }
510
511 if buf.get(self.pos) == Some(&b'.') {
513 hint = NumberHint::Float;
514 self.pos += 1;
515
516 while let Some(&b) = buf.get(self.pos) {
518 if b.is_ascii_digit() {
519 self.pos += 1;
520 } else {
521 break;
522 }
523 }
524 }
525
526 if matches!(buf.get(self.pos), Some(b'e') | Some(b'E')) {
528 hint = NumberHint::Float;
529 self.pos += 1;
530
531 if matches!(buf.get(self.pos), Some(b'+') | Some(b'-')) {
533 self.pos += 1;
534 }
535
536 while let Some(&b) = buf.get(self.pos) {
538 if b.is_ascii_digit() {
539 self.pos += 1;
540 } else {
541 break;
542 }
543 }
544 }
545
546 if self.pos == buf.len() {
549 self.state = ScanState::InNumber { start, hint };
551 return Ok(SpannedToken {
552 token: Token::NeedMore { consumed: start },
553 span: Span::new(start, self.pos - start),
554 });
555 }
556
557 let end = self.pos;
558
559 if end == start || (end == start + 1 && buf.get(start) == Some(&b'-')) {
561 return Err(ScanError {
562 kind: ScanErrorKind::UnexpectedChar(
563 buf.get(self.pos).map(|&b| b as char).unwrap_or('?'),
564 ),
565 span: Span::new(start, 1),
566 });
567 }
568
569 Ok(SpannedToken {
570 token: Token::Number { start, end, hint },
571 span: Span::new(start, end - start),
572 })
573 }
574
575 fn scan_literal(
577 &mut self,
578 buf: &[u8],
579 start: usize,
580 expected: &'static [u8],
581 token: Token,
582 ) -> ScanResult {
583 self.scan_literal_content(buf, start, expected, 0, token)
584 }
585
586 fn resume_literal(
587 &mut self,
588 buf: &[u8],
589 start: usize,
590 expected: &'static [u8],
591 matched: usize,
592 ) -> ScanResult {
593 let token = match expected {
594 b"true" => Token::True,
595 b"false" => Token::False,
596 b"null" => Token::Null,
597 _ => unreachable!(),
598 };
599 self.scan_literal_content(buf, start, expected, matched, token)
600 }
601
602 fn scan_literal_content(
603 &mut self,
604 buf: &[u8],
605 start: usize,
606 expected: &'static [u8],
607 mut matched: usize,
608 token: Token,
609 ) -> ScanResult {
610 while matched < expected.len() {
611 match buf.get(self.pos) {
612 Some(&b) if b == expected[matched] => {
613 self.pos += 1;
614 matched += 1;
615 }
616 Some(&b) => {
617 return Err(ScanError {
618 kind: ScanErrorKind::UnexpectedChar(b as char),
619 span: Span::new(self.pos, 1),
620 });
621 }
622 None => {
623 self.state = ScanState::InLiteral {
625 start,
626 expected,
627 matched,
628 };
629 return Ok(SpannedToken {
630 token: Token::NeedMore { consumed: start },
631 span: Span::new(start, self.pos - start),
632 });
633 }
634 }
635 }
636
637 Ok(SpannedToken {
638 token,
639 span: Span::new(start, expected.len()),
640 })
641 }
642}
643
644impl Default for Scanner {
645 fn default() -> Self {
646 Self::new()
647 }
648}
649
650#[inline]
652fn contains_byte(window: u128, byte: u8) -> bool {
653 let pattern = u128::from_ne_bytes([byte; 16]);
654 let xor = window ^ pattern;
655 let has_zero = (xor.wrapping_sub(0x01010101010101010101010101010101))
656 & !xor
657 & 0x80808080808080808080808080808080;
658 has_zero != 0
659}
660
661pub fn decode_string_owned(
678 buf: &[u8],
679 start: usize,
680 end: usize,
681) -> Result<alloc::string::String, ScanError> {
682 use alloc::string::String;
683
684 let slice = &buf[start..end];
685 let mut result = String::with_capacity(end - start);
686 let mut i = 0;
687
688 while i < slice.len() {
689 let byte = slice[i];
690 if byte == b'\\' {
691 i += 1;
692 if i >= slice.len() {
693 return Err(ScanError {
694 kind: ScanErrorKind::UnexpectedEof("in escape sequence"),
695 span: Span::new(start + i - 1, 1),
696 });
697 }
698
699 match slice[i] {
700 b'"' => result.push('"'),
701 b'\\' => result.push('\\'),
702 b'/' => result.push('/'),
703 b'b' => result.push('\x08'),
704 b'f' => result.push('\x0c'),
705 b'n' => result.push('\n'),
706 b'r' => result.push('\r'),
707 b't' => result.push('\t'),
708 b'u' => {
709 i += 1;
710 if i + 4 > slice.len() {
711 return Err(ScanError {
712 kind: ScanErrorKind::UnexpectedEof("in unicode escape"),
713 span: Span::new(start + i - 2, slice.len() - i + 2),
714 });
715 }
716
717 let hex = &slice[i..i + 4];
718 let hex_str = str::from_utf8(hex).map_err(|_| ScanError {
719 kind: ScanErrorKind::InvalidUtf8,
720 span: Span::new(start + i, 4),
721 })?;
722
723 let code_unit = u16::from_str_radix(hex_str, 16).map_err(|_| ScanError {
724 kind: ScanErrorKind::UnexpectedChar('?'),
725 span: Span::new(start + i, 4),
726 })?;
727
728 i += 4;
729
730 let code_point = if (0xD800..=0xDBFF).contains(&code_unit) {
732 if i + 6 > slice.len() || slice[i] != b'\\' || slice[i + 1] != b'u' {
734 return Err(ScanError {
735 kind: ScanErrorKind::InvalidUtf8,
736 span: Span::new(start + i - 6, 6),
737 });
738 }
739
740 i += 2; let low_hex = &slice[i..i + 4];
742 let low_hex_str = str::from_utf8(low_hex).map_err(|_| ScanError {
743 kind: ScanErrorKind::InvalidUtf8,
744 span: Span::new(start + i, 4),
745 })?;
746
747 let low_unit =
748 u16::from_str_radix(low_hex_str, 16).map_err(|_| ScanError {
749 kind: ScanErrorKind::UnexpectedChar('?'),
750 span: Span::new(start + i, 4),
751 })?;
752
753 i += 4;
754
755 if !(0xDC00..=0xDFFF).contains(&low_unit) {
756 return Err(ScanError {
757 kind: ScanErrorKind::InvalidUtf8,
758 span: Span::new(start + i - 4, 4),
759 });
760 }
761
762 let high = code_unit as u32;
764 let low = low_unit as u32;
765 0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF)
766 } else if (0xDC00..=0xDFFF).contains(&code_unit) {
767 return Err(ScanError {
769 kind: ScanErrorKind::InvalidUtf8,
770 span: Span::new(start + i - 4, 4),
771 });
772 } else {
773 code_unit as u32
774 };
775
776 let c = char::from_u32(code_point).ok_or_else(|| ScanError {
777 kind: ScanErrorKind::InvalidUtf8,
778 span: Span::new(start + i - 4, 4),
779 })?;
780
781 result.push(c);
782 continue; }
784 other => {
785 result.push(other as char);
787 }
788 }
789 i += 1;
790 } else {
791 if byte < 0x80 {
794 result.push(byte as char);
795 i += 1;
796 } else {
797 let remaining = &slice[i..];
799 match str::from_utf8(remaining) {
800 Ok(s) => {
801 result.push_str(s);
802 break;
803 }
804 Err(e) => {
805 let valid_len = e.valid_up_to();
807 if valid_len > 0 {
808 let valid = str::from_utf8(&remaining[..valid_len])
810 .expect("valid_up_to guarantees valid UTF-8");
811 result.push_str(valid);
812 i += valid_len;
813 } else {
814 return Err(ScanError {
815 kind: ScanErrorKind::InvalidUtf8,
816 span: Span::new(start + i, 1),
817 });
818 }
819 }
820 }
821 }
822 }
823 }
824
825 Ok(result)
826}
827
828pub fn decode_string_borrowed(buf: &[u8], start: usize, end: usize) -> Option<&str> {
838 let slice = &buf[start..end];
839
840 if slice.contains(&b'\\') {
842 return None;
843 }
844
845 str::from_utf8(slice).ok()
846}
847
848pub fn decode_string<'a>(
852 buf: &'a [u8],
853 start: usize,
854 end: usize,
855 has_escapes: bool,
856) -> Result<alloc::borrow::Cow<'a, str>, ScanError> {
857 use alloc::borrow::Cow;
858
859 if has_escapes {
860 decode_string_owned(buf, start, end).map(Cow::Owned)
861 } else {
862 decode_string_borrowed(buf, start, end)
863 .map(Cow::Borrowed)
864 .ok_or_else(|| ScanError {
865 kind: ScanErrorKind::InvalidUtf8,
866 span: Span::new(start, end - start),
867 })
868 }
869}
870
871#[derive(Debug, Clone, PartialEq)]
875pub enum ParsedNumber {
876 U64(u64),
878 I64(i64),
880 U128(u128),
882 I128(i128),
884 F64(f64),
886}
887
888#[cfg(feature = "lexical-parse")]
890pub fn parse_number(
891 buf: &[u8],
892 start: usize,
893 end: usize,
894 hint: NumberHint,
895) -> Result<ParsedNumber, ScanError> {
896 use lexical_parse_float::FromLexical as _;
897 use lexical_parse_integer::FromLexical as _;
898
899 let slice = &buf[start..end];
900
901 match hint {
902 NumberHint::Float => f64::from_lexical(slice)
903 .map(ParsedNumber::F64)
904 .map_err(|_| ScanError {
905 kind: ScanErrorKind::UnexpectedChar('?'),
906 span: Span::new(start, end - start),
907 }),
908 NumberHint::Signed => {
909 if let Ok(n) = i64::from_lexical(slice) {
910 Ok(ParsedNumber::I64(n))
911 } else if let Ok(n) = i128::from_lexical(slice) {
912 Ok(ParsedNumber::I128(n))
913 } else {
914 Err(ScanError {
915 kind: ScanErrorKind::UnexpectedChar('?'),
916 span: Span::new(start, end - start),
917 })
918 }
919 }
920 NumberHint::Unsigned => {
921 if let Ok(n) = u64::from_lexical(slice) {
922 Ok(ParsedNumber::U64(n))
923 } else if let Ok(n) = u128::from_lexical(slice) {
924 Ok(ParsedNumber::U128(n))
925 } else {
926 Err(ScanError {
927 kind: ScanErrorKind::UnexpectedChar('?'),
928 span: Span::new(start, end - start),
929 })
930 }
931 }
932 }
933}
934
935#[cfg(not(feature = "lexical-parse"))]
937pub fn parse_number(
938 buf: &[u8],
939 start: usize,
940 end: usize,
941 hint: NumberHint,
942) -> Result<ParsedNumber, ScanError> {
943 let slice = &buf[start..end];
944 let s = str::from_utf8(slice).map_err(|_| ScanError {
945 kind: ScanErrorKind::InvalidUtf8,
946 span: Span::new(start, end - start),
947 })?;
948
949 match hint {
950 NumberHint::Float => s
951 .parse::<f64>()
952 .map(ParsedNumber::F64)
953 .map_err(|_| ScanError {
954 kind: ScanErrorKind::UnexpectedChar('?'),
955 span: Span::new(start, end - start),
956 }),
957 NumberHint::Signed => {
958 if let Ok(n) = s.parse::<i64>() {
959 Ok(ParsedNumber::I64(n))
960 } else if let Ok(n) = s.parse::<i128>() {
961 Ok(ParsedNumber::I128(n))
962 } else {
963 Err(ScanError {
964 kind: ScanErrorKind::UnexpectedChar('?'),
965 span: Span::new(start, end - start),
966 })
967 }
968 }
969 NumberHint::Unsigned => {
970 if let Ok(n) = s.parse::<u64>() {
971 Ok(ParsedNumber::U64(n))
972 } else if let Ok(n) = s.parse::<u128>() {
973 Ok(ParsedNumber::U128(n))
974 } else {
975 Err(ScanError {
976 kind: ScanErrorKind::UnexpectedChar('?'),
977 span: Span::new(start, end - start),
978 })
979 }
980 }
981 }
982}
983
984#[cfg(test)]
985mod tests {
986 use super::*;
987
988 #[test]
989 fn test_simple_tokens() {
990 let input = b"{}[],:";
991 let mut scanner = Scanner::new();
992
993 assert!(matches!(
994 scanner.next_token(input).unwrap().token,
995 Token::ObjectStart
996 ));
997 assert!(matches!(
998 scanner.next_token(input).unwrap().token,
999 Token::ObjectEnd
1000 ));
1001 assert!(matches!(
1002 scanner.next_token(input).unwrap().token,
1003 Token::ArrayStart
1004 ));
1005 assert!(matches!(
1006 scanner.next_token(input).unwrap().token,
1007 Token::ArrayEnd
1008 ));
1009 assert!(matches!(
1010 scanner.next_token(input).unwrap().token,
1011 Token::Comma
1012 ));
1013 assert!(matches!(
1014 scanner.next_token(input).unwrap().token,
1015 Token::Colon
1016 ));
1017 assert!(matches!(
1018 scanner.next_token(input).unwrap().token,
1019 Token::Eof
1020 ));
1021 }
1022
1023 #[test]
1024 fn test_string_no_escapes() {
1025 let input = b"\"hello world\"";
1026 let mut scanner = Scanner::new();
1027
1028 let result = scanner.next_token(input).unwrap();
1029 assert!(matches!(
1030 result.token,
1031 Token::String {
1032 start: 1,
1033 end: 12,
1034 has_escapes: false
1035 }
1036 ));
1037 }
1038
1039 #[test]
1040 fn test_string_with_escapes() {
1041 let input = br#""hello\nworld""#;
1042 let mut scanner = Scanner::new();
1043
1044 let result = scanner.next_token(input).unwrap();
1045 assert!(matches!(
1046 result.token,
1047 Token::String {
1048 start: 1,
1049 end: 13,
1050 has_escapes: true
1051 }
1052 ));
1053 }
1054
1055 #[test]
1056 fn test_numbers() {
1057 let mut scanner = Scanner::new();
1058
1059 let result = scanner.next_token(b"42,").unwrap();
1061 assert!(matches!(
1062 result.token,
1063 Token::Number {
1064 hint: NumberHint::Unsigned,
1065 ..
1066 }
1067 ));
1068
1069 scanner.set_pos(0);
1071 let result = scanner.next_token(b"-42]").unwrap();
1072 assert!(matches!(
1073 result.token,
1074 Token::Number {
1075 hint: NumberHint::Signed,
1076 ..
1077 }
1078 ));
1079
1080 scanner.set_pos(0);
1082 let result = scanner.next_token(b"3.14}").unwrap();
1083 assert!(matches!(
1084 result.token,
1085 Token::Number {
1086 hint: NumberHint::Float,
1087 ..
1088 }
1089 ));
1090
1091 scanner.set_pos(0);
1093 let result = scanner.next_token(b"1e10 ").unwrap();
1094 assert!(matches!(
1095 result.token,
1096 Token::Number {
1097 hint: NumberHint::Float,
1098 ..
1099 }
1100 ));
1101
1102 scanner.set_pos(0);
1104 let result = scanner.next_token(b"42").unwrap();
1105 assert!(matches!(result.token, Token::NeedMore { .. }));
1106 }
1107
1108 #[test]
1109 fn test_literals() {
1110 let mut scanner = Scanner::new();
1111
1112 let result = scanner.next_token(b"true,").unwrap();
1114 assert!(matches!(result.token, Token::True));
1115
1116 scanner.set_pos(0);
1117 let result = scanner.next_token(b"false]").unwrap();
1118 assert!(matches!(result.token, Token::False));
1119
1120 scanner.set_pos(0);
1121 let result = scanner.next_token(b"null}").unwrap();
1122 assert!(matches!(result.token, Token::Null));
1123 }
1124
1125 #[test]
1126 fn test_whitespace_handling() {
1127 let input = b" {\n\t\"key\" : 42 } ";
1128 let mut scanner = Scanner::new();
1129
1130 assert!(matches!(
1131 scanner.next_token(input).unwrap().token,
1132 Token::ObjectStart
1133 ));
1134 assert!(matches!(
1135 scanner.next_token(input).unwrap().token,
1136 Token::String { .. }
1137 ));
1138 assert!(matches!(
1139 scanner.next_token(input).unwrap().token,
1140 Token::Colon
1141 ));
1142 assert!(matches!(
1143 scanner.next_token(input).unwrap().token,
1144 Token::Number { .. }
1145 ));
1146 assert!(matches!(
1147 scanner.next_token(input).unwrap().token,
1148 Token::ObjectEnd
1149 ));
1150 assert!(matches!(
1151 scanner.next_token(input).unwrap().token,
1152 Token::Eof
1153 ));
1154 }
1155
1156 #[test]
1157 fn test_decode_string_no_escapes() {
1158 let input = b"hello world";
1159 let result = decode_string_borrowed(input, 0, input.len());
1160 assert_eq!(result, Some("hello world"));
1161 }
1162
1163 #[test]
1164 fn test_decode_string_with_escapes() {
1165 let input = br#"hello\nworld"#;
1166 let result = decode_string_owned(input, 0, input.len()).unwrap();
1167 assert_eq!(result, "hello\nworld");
1168 }
1169
1170 #[test]
1171 fn test_decode_string_unicode() {
1172 let input = br#"\u0048\u0065\u006C\u006C\u006F"#;
1174 let result = decode_string_owned(input, 0, input.len()).unwrap();
1175 assert_eq!(result, "Hello");
1176 }
1177
1178 #[test]
1179 fn test_decode_string_surrogate_pair() {
1180 let input = br#"\uD83D\uDE00"#;
1182 let result = decode_string_owned(input, 0, input.len()).unwrap();
1183 assert_eq!(result, "😀");
1184 }
1185
1186 #[test]
1187 fn test_decode_cow_borrowed() {
1188 let input = b"simple";
1189 let result = decode_string(input, 0, input.len(), false).unwrap();
1190 assert!(matches!(result, alloc::borrow::Cow::Borrowed(_)));
1191 assert_eq!(&*result, "simple");
1192 }
1193
1194 #[test]
1195 fn test_decode_cow_owned() {
1196 let input = br#"has\tescape"#;
1197 let result = decode_string(input, 0, input.len(), true).unwrap();
1198 assert!(matches!(result, alloc::borrow::Cow::Owned(_)));
1199 assert_eq!(&*result, "has\tescape");
1200 }
1201
1202 #[test]
1203 fn test_parse_numbers() {
1204 assert_eq!(
1205 parse_number(b"42", 0, 2, NumberHint::Unsigned).unwrap(),
1206 ParsedNumber::U64(42)
1207 );
1208 assert_eq!(
1209 parse_number(b"-42", 0, 3, NumberHint::Signed).unwrap(),
1210 ParsedNumber::I64(-42)
1211 );
1212 #[allow(clippy::approx_constant)]
1213 {
1214 assert_eq!(
1215 parse_number(b"3.14", 0, 4, NumberHint::Float).unwrap(),
1216 ParsedNumber::F64(3.14)
1217 );
1218 }
1219 }
1220}