1use crate::{
4 Pos,
5 lexical::{
6 self, state, {Analyzer, ErrorKind, Token, Unescaped},
7 },
8 syntax,
9};
10use std::{
11 fmt,
12 ops::{Deref, Range},
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17struct Ref<B> {
18 buf: Arc<B>,
19 rng: Range<usize>,
20}
21
22impl<B: Deref<Target = [u8]>> Ref<B> {
23 fn new(buf: Arc<B>, rng: Range<usize>) -> Ref<B> {
24 Self { buf, rng }
25 }
26
27 fn as_str(&self) -> &str {
28 unsafe { std::str::from_utf8_unchecked(&self.buf[self.rng.start..self.rng.end]) }
29 }
30}
31
32const INLINE_LEN: usize = 30;
33
34type InlineBuf = [u8; INLINE_LEN];
35
36#[derive(Clone, Debug)]
37enum InnerContent<B: Deref<Target = [u8]>> {
38 Static(&'static str),
39 Inline(u8, InlineBuf),
40 NotEscaped(Ref<B>),
41 Escaped(Ref<B>),
42}
43
44#[derive(Clone, Debug)]
49pub struct Content<B: Deref<Target = [u8]> + fmt::Debug>(InnerContent<B>);
50
51impl Content<Vec<u8>> {
52 pub fn from_static(s: &'static str) -> Self {
78 let b = s.as_bytes();
79 match state::Machine::verify_static(b) {
80 false => Self(InnerContent::Static(s)),
81 true => Self(InnerContent::Escaped(Ref::new(
82 Arc::new(b.to_vec()),
83 0..b.len(),
84 ))),
85 }
86 }
87}
88
89impl<B: Deref<Target = [u8]> + fmt::Debug> Content<B> {
90 #[inline]
96 pub fn literal(&self) -> &str {
97 match &self.0 {
98 InnerContent::Static(s) => s,
99 InnerContent::Inline(len, buf) => Self::inline_str(*len, buf),
100 InnerContent::NotEscaped(r) | InnerContent::Escaped(r) => r.as_str(),
101 }
102 }
103
104 #[inline]
108 pub fn literal_len(&self) -> usize {
109 match &self.0 {
110 InnerContent::Static(s) => s.len(),
111 InnerContent::Inline(len, _) => *len as usize,
112 InnerContent::NotEscaped(r) | InnerContent::Escaped(r) => r.rng.end - r.rng.start,
113 }
114 }
115
116 pub fn is_escaped(&self) -> bool {
122 matches!(self.0, InnerContent::Escaped(_))
123 }
124
125 pub fn unescaped(&self) -> Unescaped<&str> {
143 match &self.0 {
144 InnerContent::Static(s) => Unescaped::Literal(s),
145 InnerContent::Inline(len, buf) => Unescaped::Literal(Self::inline_str(*len, buf)),
146 InnerContent::NotEscaped(r) => Unescaped::Literal(r.as_str()),
147 InnerContent::Escaped(r) => {
148 let mut buf = Vec::new();
149 lexical::unescape(r.as_str(), &mut buf);
150
151 let s = unsafe { String::from_utf8_unchecked(buf) };
154
155 Unescaped::Expanded(s)
156 }
157 }
158 }
159}
160
161impl<B: Deref<Target = [u8]> + fmt::Debug> Content<B> {
162 fn from_buf(buf: &Arc<B>, r: Range<usize>, escaped: bool) -> Self {
163 debug_assert!(r.start <= r.end);
164 debug_assert!(r.end <= buf.len());
165
166 let len = r.end - r.start;
167
168 if len <= INLINE_LEN && !escaped {
169 let mut inner: InlineBuf = [0; INLINE_LEN];
170 inner[..len].copy_from_slice(&buf[r]);
171
172 Self(InnerContent::Inline(len as u8, inner))
173 } else {
174 let r = Ref::new(Arc::clone(buf), r);
175
176 Self(if !escaped {
177 InnerContent::NotEscaped(r)
178 } else {
179 InnerContent::Escaped(r)
180 })
181 }
182 }
183
184 fn inline_str(len: u8, buf: &InlineBuf) -> &str {
185 unsafe { std::str::from_utf8_unchecked(&buf[0..len as usize]) }
186 }
187}
188
189impl Default for Content<Vec<u8>> {
190 fn default() -> Self {
191 Self(InnerContent::Static("")) }
193}
194
195impl<B: Deref<Target = [u8]> + fmt::Debug> fmt::Display for Content<B> {
196 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
197 f.write_str(self.literal())
198 }
199}
200
201impl<B: Deref<Target = [u8]> + fmt::Debug> super::Content for Content<B> {
202 type Literal<'a>
203 = &'a str
204 where
205 Self: 'a;
206
207 #[inline(always)]
208 fn literal<'a>(&'a self) -> Self::Literal<'a> {
209 Content::literal(self)
210 }
211
212 #[inline(always)]
213 fn literal_len(&self) -> usize {
214 Content::literal_len(self)
215 }
216
217 #[inline(always)]
218 fn is_escaped(&self) -> bool {
219 Content::is_escaped(self)
220 }
221
222 #[inline(always)]
223 fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>> {
224 Content::unescaped(self)
225 }
226}
227
228#[cfg(target_pointer_width = "64")]
230const _: [(); 32] = [(); std::mem::size_of::<Content<Vec<u8>>>()];
231
232#[derive(Copy, Clone, Debug)]
236pub struct Error {
237 kind: ErrorKind,
238 pos: Pos,
239}
240
241impl Error {
242 pub fn kind(&self) -> ErrorKind {
247 self.kind
248 }
249
250 pub fn pos(&self) -> &Pos {
255 &self.pos
256 }
257}
258
259impl fmt::Display for Error {
260 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
261 self.kind.fmt_at(f, Some(&self.pos))
262 }
263}
264
265impl std::error::Error for Error {}
266
267impl lexical::Error for Error {
268 fn kind(&self) -> ErrorKind {
269 Error::kind(self)
270 }
271
272 fn pos(&self) -> &Pos {
273 Error::pos(self)
274 }
275}
276
277#[derive(Debug)]
278struct StoredContent {
279 len: usize,
280 escaped: bool,
281}
282
283impl StoredContent {
284 #[inline(always)]
285 fn new(len: usize, escaped: bool) -> Self {
286 Self { len, escaped }
287 }
288}
289
290impl Default for StoredContent {
291 fn default() -> Self {
292 Self::new(0, false)
293 }
294}
295
296pub struct FixedAnalyzer<B: Deref<Target = [u8]> + fmt::Debug> {
366 buf: Arc<B>,
367 content: StoredContent,
368 err: Option<Error>,
369 mach: state::Machine<state::DerefBuf<B, Arc<B>>>,
370 pos: Pos,
371}
372
373impl<B: Deref<Target = [u8]> + fmt::Debug> FixedAnalyzer<B> {
374 pub fn new(buf: B) -> Self {
388 let buf = Arc::new(buf);
389 let mach = state::Machine::new(state::DerefBuf::new(Arc::clone(&buf)));
390 let content = StoredContent::default();
391 let err = None;
392 let pos = Pos::default();
393
394 Self {
395 buf,
396 content,
397 err,
398 pos,
399 mach,
400 }
401 }
402
403 #[allow(clippy::should_implement_trait)]
418 pub fn next(&mut self) -> Token {
419 if self.err.is_some() {
420 return Token::Err;
421 }
422
423 self.pos = *self.mach.pos();
424
425 match self.mach.next() {
426 state::Next::Done(token, escaped, n) => {
427 self.content = StoredContent::new(n, escaped);
428
429 token
430 }
431 state::Next::Part(token, n) => match self.mach.end() {
432 state::End::Done => {
433 self.content = StoredContent::new(n, false);
434
435 token
436 }
437 state::End::Nil => unreachable!(),
438 state::End::Err => {
439 let kind = self.mach.err_kind().expect("there should be an error kind");
440 let pos = *self.mach.pos();
441 self.err = Some(Error { kind, pos });
442
443 Token::Err
444 }
445 },
446 state::Next::Nil => {
447 self.content = StoredContent::default();
448
449 Token::Eof
450 }
451 state::Next::Err(_) => {
452 let kind = self.mach.err_kind().expect("there should be an error kind");
453 let pos = *self.mach.pos();
454 self.err = Some(Error { kind, pos });
455
456 Token::Err
457 }
458 }
459 }
460
461 #[inline]
488 pub fn content(&self) -> Content<B> {
489 if let Ok(content) = self.try_content() {
490 content
491 } else {
492 panic!("no content: last `next()` returned `Token::Err` (use `err()` instead)");
493 }
494 }
495
496 #[inline]
523 pub fn err(&self) -> Error {
524 if let Err(err) = self.try_content() {
525 err
526 } else {
527 panic!("no error: last `next()` did not return `Token::Err` (use `content()` instead)");
528 }
529 }
530
531 #[inline(always)]
580 pub fn pos(&self) -> &Pos {
581 &self.pos
582 }
583
584 pub fn try_content(&self) -> Result<Content<B>, Error> {
612 match self.err {
613 None => {
614 let offset = self.pos.offset;
615
616 Ok(Content::from_buf(
617 &self.buf,
618 offset..offset + self.content.len,
619 self.content.escaped,
620 ))
621 }
622 Some(err) => Err(err),
623 }
624 }
625
626 pub fn into_parser(self) -> syntax::Parser<FixedAnalyzer<B>> {
649 syntax::Parser::new(self)
650 }
651}
652
653impl<B: Deref<Target = [u8]> + fmt::Debug> Analyzer for FixedAnalyzer<B> {
654 type Content = Content<B>;
655 type Error = Error;
656
657 #[inline(always)]
658 fn next(&mut self) -> Token {
659 FixedAnalyzer::next(self)
660 }
661
662 #[inline(always)]
663 fn try_content(&self) -> Result<Self::Content, Error> {
664 FixedAnalyzer::try_content(self)
665 }
666
667 #[inline(always)]
668 fn pos(&self) -> &Pos {
669 FixedAnalyzer::pos(self)
670 }
671}
672
673#[cfg(test)]
674mod tests {
675 use super::*;
676 use crate::lexical::Expect;
677 use rstest::rstest;
678 use std::error::Error as _;
679
680 #[rstest]
681 #[case::invalid_char("a")]
682 #[case::f("f")]
683 #[case::fa("fa")]
684 #[case::fal("fal")]
685 #[case::fals("fals")]
686 #[case::false_leading_space(" false")]
687 #[case::false_trailing_space("false ")]
688 #[case::n("n")]
689 #[case::nu("nu")]
690 #[case::nul("nul")]
691 #[case::null_leading_space(" null")]
692 #[case::null_trailing_space("null ")]
693 #[case::t("t")]
694 #[case::tr("tr")]
695 #[case::tru("tru")]
696 #[case::true_leading_space(" true")]
697 #[case::true_trailing_space("true ")]
698 #[case::num_dot(".")]
699 #[case::num_zero_dot("0.")]
700 #[case::num_zero_zero("00")]
701 #[case::num_leading_space(" 0")]
702 #[case::num_trailing_space("0 ")]
703 #[case::str_unterminated_0(r#"""#)]
704 #[case::str_unterminated_1(r#""a"#)]
705 #[case::str_leading_space(r#" "a""#)]
706 #[case::str_trailing_space(r#""a" "#)]
707 #[should_panic(expected = "invalid JSON content")]
708 fn test_content_from_static_panic(#[case] s: &'static str) {
709 let c = Content::from_static(s);
710
711 panic!("content unexpectedly created: {c:?}");
712 }
713
714 #[rstest]
715 #[case::eof("", None)]
716 #[case::arr_begin("[", None)]
717 #[case::arr_end("]", None)]
718 #[case::obj_begin("{", None)]
719 #[case::obj_end("}", None)]
720 #[case::value_sep(",", None)]
721 #[case::name_sep(":", None)]
722 #[case::white_space(" ", None)]
723 #[case::white_tab("\t", None)]
724 #[case::white_lf("\n", None)]
725 #[case::white_cr("\r", None)]
726 #[case::white_lots(" \t\t \r \r\n \t\n", None)]
727 #[case::lit_false("false", None)]
728 #[case::lit_null("null", None)]
729 #[case::lit_true("true", None)]
730 #[case::num_zero("0", None)]
731 #[case::num_zero_point_one("0.1", None)]
732 #[case::num_123("123", None)]
733 #[case::num_neg_zero("-0", None)]
734 #[case::num_neg_zero_point_one("-0.1", None)]
735 #[case::num_neg_123("-123", None)]
736 #[case::num_1e1("1e1", None)]
737 #[case::num_1e_plus_1("1e+1", None)]
738 #[case::num_1e_minus_1("1e-1", None)]
739 #[case::str_empty(r#""""#, None)]
740 #[case::str_a(r#""a""#, None)]
741 #[case::str_emoji(r#""π€ ""#, None)]
742 #[case::str_escaped_nul(r#""\u0000""#, Some("\"\u{00}\""))]
743 fn test_content_from_static_ok(
744 #[case] s: &'static str,
745 #[case] unescaped: Option<&'static str>,
746 ) {
747 let c = Content::from_static(s);
748
749 assert_eq!(s, c.literal());
750 assert_eq!(s, c.to_string());
751 assert_eq!(s.len(), c.literal_len());
752 assert_eq!(s.len(), lexical::Content::literal_len(&c));
753
754 match unescaped {
755 Some(u) => assert_eq!(u, c.unescaped()),
756 None => assert!(!c.is_escaped()),
757 };
758 }
759
760 #[rstest]
761 #[case::static_a(Content(InnerContent::Static("a")), "a", "a")]
762 #[case::inline_empty(Content(inline_buf(b"").into()), "", "")]
763 #[case::inline_a_1(Content(inline_buf(b"a").into()), "a", "a")]
764 #[case::inline_a_inline_len(Content(inline_buf(&[b'a'; INLINE_LEN]).into()), "a".repeat(INLINE_LEN), "a".repeat(INLINE_LEN))]
765 #[case::not_escaped_empty(Content(InnerContent::NotEscaped(Ref::new(Arc::new(b"".to_vec()), 0..0))), "", "")]
766 #[case::not_escaped_a(Content(InnerContent::NotEscaped(Ref::new(Arc::new(b"a".to_vec()), 0..1))), "a", "a")]
767 #[case::escaped_empty(Content(InnerContent::Escaped(Ref::new(Arc::new(b"".to_vec()), 0..0))), "", "")]
768 #[case::escaped_a(Content(InnerContent::Escaped(Ref::new(Arc::new(b"a".to_vec()), 0..1))), "a", "a")]
769 #[case::escaped_nl(Content(InnerContent::Escaped(Ref::new(Arc::new(b"\\n".to_vec()), 0..2))), "\\n", "\n")]
770 #[case::escaped_nul(Content(InnerContent::Escaped(Ref::new(Arc::new(b"\\u0000".to_vec()), 0..6))), "\\u0000", "\u{0000}")]
771 fn test_content(
772 #[case] content: Content<Vec<u8>>,
773 #[case] literal: impl AsRef<str>,
774 #[case] unescaped: impl AsRef<str>,
775 ) {
776 let literal = literal.as_ref();
777 let unescaped = unescaped.as_ref();
778
779 assert_eq!(literal, content.literal());
780 assert_eq!(literal, content.to_string());
781 assert_eq!(literal.len(), content.literal_len());
782 assert_eq!(literal.len(), lexical::Content::literal_len(&content));
783 assert_eq!(unescaped, content.unescaped());
784 }
785
786 #[rstest]
787 #[case(
788 ErrorKind::BadSurrogate { first: 0xd800, second: Some(0xd800), },
789 "bad Unicode escape sequence surrogate pair: high surrogate '\\uD800' followed by invalid low surrogate '\\uD800' at line",
790 )]
791 #[case(
792 ErrorKind::BadUtf8ContByte { seq_len: 3, offset: 2, value: 0x80 },
793 "bad UTF-8 continuation byte 0x80 in 3-byte UTF-8 sequence (byte #2) at line",
794 )]
795 #[case(
796 ErrorKind::UnexpectedByte { token: None, expect: Expect::TokenStartChar, actual: b'e' },
797 "expected token start character but got character 'e' (ASCII 0x65) at line",
798 )]
799 #[case(
800 ErrorKind::UnexpectedEof(Token::LitNull),
801 "unexpected EOF in null token at line"
802 )]
803 fn test_error(#[case] kind: ErrorKind, #[case] expect: &str) {
804 let pos = Pos::new(10, 2, 5);
805 let err = Error { kind, pos };
806
807 assert_eq!(kind, err.kind());
808 assert_eq!(kind, lexical::Error::kind(&err));
809 assert_eq!(pos, *err.pos());
810 assert_eq!(pos, *lexical::Error::pos(&err));
811 assert!(err.source().is_none());
812
813 let actual = format!("{err}");
814 assert!(
815 actual.starts_with(expect),
816 "expected {actual:?} to start with {expect:?}"
817 );
818 }
819
820 #[test]
821 fn test_analyzer_initial_state_content() {
822 let an = FixedAnalyzer::new(vec![]);
823
824 for _ in 0..5 {
825 let content = an.content();
826 assert_eq!("", content.literal());
827 assert!(!content.is_escaped());
828 assert_eq!("", content.unescaped());
829
830 let content = an.try_content().unwrap();
831 assert_eq!("", content.literal());
832 assert!(!content.is_escaped());
833 assert_eq!("", content.unescaped());
834 }
835 }
836
837 #[test]
838 #[should_panic(
839 expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
840 )]
841 fn test_analyzer_initial_state_err() {
842 let _ = FixedAnalyzer::new(vec![]).err();
843 }
844
845 #[rstest]
846 #[case("", Token::Eof, None)]
847 #[case("{", Token::ObjBegin, None)]
848 #[case("}", Token::ObjEnd, None)]
849 #[case("[", Token::ArrBegin, None)]
850 #[case("]", Token::ArrEnd, None)]
851 #[case(":", Token::NameSep, None)]
852 #[case(",", Token::ValueSep, None)]
853 #[case("false", Token::LitFalse, None)]
854 #[case("null", Token::LitNull, None)]
855 #[case("true", Token::LitTrue, None)]
856 #[case("0", Token::Num, None)]
857 #[case("-0", Token::Num, None)]
858 #[case("1", Token::Num, None)]
859 #[case("-1", Token::Num, None)]
860 #[case("12", Token::Num, None)]
861 #[case("-12", Token::Num, None)]
862 #[case("0.0", Token::Num, None)]
863 #[case("-0.0", Token::Num, None)]
864 #[case("0.123456789", Token::Num, None)]
865 #[case("-123.456789", Token::Num, None)]
866 #[case("0E0", Token::Num, None)]
867 #[case("0e0", Token::Num, None)]
868 #[case("0E+0", Token::Num, None)]
869 #[case("0e+0", Token::Num, None)]
870 #[case("0E-0", Token::Num, None)]
871 #[case("0e-0", Token::Num, None)]
872 #[case("0.0E0", Token::Num, None)]
873 #[case("0.0e0", Token::Num, None)]
874 #[case("0.0E+0", Token::Num, None)]
875 #[case("0.0e+0", Token::Num, None)]
876 #[case("0.0E0", Token::Num, None)]
877 #[case("0.0e0", Token::Num, None)]
878 #[case("0E0", Token::Num, None)]
879 #[case("0e0", Token::Num, None)]
880 #[case("-0E+0", Token::Num, None)]
881 #[case("-0e+0", Token::Num, None)]
882 #[case("-0E-0", Token::Num, None)]
883 #[case("-0e-0", Token::Num, None)]
884 #[case("-0.0E0", Token::Num, None)]
885 #[case("-0.0e0", Token::Num, None)]
886 #[case("-0.0E+0", Token::Num, None)]
887 #[case("-0.0e+0", Token::Num, None)]
888 #[case("-0.0E0", Token::Num, None)]
889 #[case("-0.0e0", Token::Num, None)]
890 #[case("123E456", Token::Num, None)]
891 #[case("123e456", Token::Num, None)]
892 #[case("123.456E+7", Token::Num, None)]
893 #[case("123.456e+7", Token::Num, None)]
894 #[case("123.456E-89", Token::Num, None)]
895 #[case("123.456e-89", Token::Num, None)]
896 #[case("-123E456", Token::Num, None)]
897 #[case("-123e456", Token::Num, None)]
898 #[case("-123.456E+7", Token::Num, None)]
899 #[case("-123.456e+7", Token::Num, None)]
900 #[case("-123.456E-89", Token::Num, None)]
901 #[case("-123.456e-89", Token::Num, None)]
902 #[case(r#""""#, Token::Str, None)]
903 #[case(r#"" ""#, Token::Str, None)]
904 #[case(r#""foo""#, Token::Str, None)]
905 #[case(r#""The quick brown fox jumped over the lazy dog!""#, Token::Str, None)]
906 #[case(r#""\"""#, Token::Str, Some(r#"""""#))]
907 #[case(r#""\\""#, Token::Str, Some(r#""\""#))]
908 #[case(r#""\/""#, Token::Str, Some(r#""/""#))]
909 #[case(r#""\t""#, Token::Str, Some("\"\t\""))]
910 #[case(r#""\r""#, Token::Str, Some("\"\r\""))]
911 #[case(r#""\n""#, Token::Str, Some("\"\n\""))]
912 #[case(r#""\f""#, Token::Str, Some("\"\u{000c}\""))]
913 #[case(r#""\b""#, Token::Str, Some("\"\u{0008}\""))]
914 #[case(r#""\r\n""#, Token::Str, Some("\"\r\n\""))]
915 #[case(r#""\u0000""#, Token::Str, Some("\"\u{0000}\""))]
916 #[case(r#""\u001f""#, Token::Str, Some("\"\u{001f}\""))]
917 #[case(r#""\u0020""#, Token::Str, Some(r#"" ""#))]
918 #[case(r#""\u007E""#, Token::Str, Some(r#""~""#))]
919 #[case(r#""\u007F""#, Token::Str, Some("\"\u{007f}\""))]
920 #[case(r#""\u0080""#, Token::Str, Some("\"\u{0080}\""))]
921 #[case(r#""\u0100""#, Token::Str, Some("\"\u{0100}\""))]
922 #[case(r#""\ud7FF""#, Token::Str, Some("\"\u{d7ff}\""))]
923 #[case(r#""\uE000""#, Token::Str, Some("\"\u{e000}\""))]
924 #[case(r#""\ufDCf""#, Token::Str, Some("\"\u{fdcf}\""))]
925 #[case(r#""\uFdeF""#, Token::Str, Some("\"\u{fdef}\""))]
926 #[case(r#""\ufffd""#, Token::Str, Some("\"\u{fffd}\""))]
927 #[case(r#""\uFFFE""#, Token::Str, Some("\"\u{fffe}\""))]
928 #[case(r#""\uFFFF""#, Token::Str, Some("\"\u{ffff}\""))]
929 #[case(r#""\ud800\udc00""#, Token::Str, Some("\"\u{10000}\""))] #[case(r#""\uD800\uDFFF""#, Token::Str, Some("\"\u{103ff}\""))] #[case(r#""\uDBFF\uDC00""#, Token::Str, Some("\"\u{10fc00}\""))] #[case(r#""\udbFf\udfff""#, Token::Str, Some("\"\u{10ffff}\""))] #[case(r#""\u0061b""#, Token::Str, Some(r#""ab""#))]
934 #[case(r#""\uD800\uDC00a""#, Token::Str, Some("\"\u{10000}a\""))]
935 #[case(r#""hello\nworld""#, Token::Str, Some("\"hello\nworld\""))]
936 #[case(" ", Token::White, None)]
937 #[case("\t", Token::White, None)]
938 #[case(" ", Token::White, None)]
939 #[case("\t\t", Token::White, None)]
940 #[case(" \t \t \t \t\t", Token::White, None)]
941 fn test_analyzer_single_token(
942 #[case] input: &str,
943 #[case] expect: Token,
944 #[case] unescaped: Option<&str>,
945 ) {
946 {
948 let mut an = FixedAnalyzer::new(input.as_bytes());
949 assert_eq!(Pos::default(), *an.pos());
950
951 assert_eq!(expect, an.next());
952 assert_eq!(Pos::default(), *an.pos());
953
954 let content = an.content();
955 assert_eq!(input, content.literal());
956 assert_eq!(unescaped.is_some(), content.is_escaped());
957 if let Some(u) = unescaped {
958 assert_eq!(u, content.unescaped());
959 } else {
960 assert_eq!(input, content.unescaped());
961 }
962
963 assert_eq!(Token::Eof, an.next());
964 assert_eq!(
965 Pos {
966 offset: input.len(),
967 line: 1,
968 col: input.len() + 1
969 },
970 *an.pos()
971 );
972
973 assert_eq!(Token::Eof, an.next());
974 assert_eq!(
975 Pos {
976 offset: input.len(),
977 line: 1,
978 col: input.len() + 1
979 },
980 *an.pos()
981 );
982 }
983
984 {
986 let mut an = FixedAnalyzer::new(input.as_bytes());
987 assert_eq!(Pos::default(), *an.pos());
988
989 assert_eq!(expect, an.next());
990 assert_eq!(Pos::default(), *an.pos());
991
992 assert_eq!(Token::Eof, an.next());
993 assert_eq!(
994 Pos {
995 offset: input.len(),
996 line: 1,
997 col: input.len() + 1
998 },
999 *an.pos()
1000 );
1001
1002 assert_eq!(Token::Eof, an.next());
1003 assert_eq!(
1004 Pos {
1005 offset: input.len(),
1006 line: 1,
1007 col: input.len() + 1
1008 },
1009 *an.pos()
1010 );
1011 }
1012 }
1013
1014 #[rstest]
1015 #[case("1".repeat(INLINE_LEN-1), Token::Num, None)]
1016 #[case("2".repeat(INLINE_LEN), Token::Num, None)]
1017 #[case("3".repeat(INLINE_LEN+1), Token::Num, None)]
1018 #[case(format!(r#""{}""#, "a".repeat(INLINE_LEN-3)), Token::Str, None)]
1019 #[case(format!(r#""{}""#, "b".repeat(INLINE_LEN-2)), Token::Str, None)]
1020 #[case(format!(r#""{}""#, "c".repeat(INLINE_LEN-1)), Token::Str, None)]
1021 #[case(format!(r#""{}""#, r#"\/"#.repeat(INLINE_LEN/2)), Token::Str, Some(format!(r#""{}""#, "/".repeat(INLINE_LEN/2))))]
1022 #[case(" ".repeat(INLINE_LEN-1), Token::White, None)]
1023 #[case("\t".repeat(INLINE_LEN), Token::White, None)]
1024 #[case(" ".repeat(INLINE_LEN+1), Token::White, None)]
1025 #[case(" \t".repeat(INLINE_LEN/2+1), Token::White, None)]
1026 fn test_analyzer_single_token_inline_len_boundary(
1027 #[case] input: String,
1028 #[case] expect: Token,
1029 #[case] unescaped: Option<String>,
1030 ) {
1031 let mut an = FixedAnalyzer::new(input.as_bytes());
1032 assert_eq!(Pos::default(), *an.pos());
1033
1034 assert_eq!(expect, an.next());
1035 assert_eq!(Pos::default(), *an.pos());
1036
1037 let content = an.content();
1038 assert_eq!(input, content.literal());
1039 assert_eq!(unescaped.is_some(), content.is_escaped());
1040 if let Some(u) = unescaped {
1041 assert_eq!(u, content.unescaped());
1042 } else {
1043 assert_eq!(input, content.unescaped());
1044 }
1045
1046 assert_eq!(Token::Eof, an.next());
1047 assert_eq!(
1048 Pos {
1049 offset: input.len(),
1050 line: 1,
1051 col: input.len() + 1
1052 },
1053 *an.pos()
1054 );
1055
1056 assert_eq!(Token::Eof, an.next());
1057 assert_eq!(
1058 Pos {
1059 offset: input.len(),
1060 line: 1,
1061 col: input.len() + 1
1062 },
1063 *an.pos()
1064 );
1065 }
1066
1067 #[rstest]
1068 #[case(r#"["#)]
1069 #[case(r#"]"#)]
1070 #[case(r#"false"#)]
1071 #[case(r#":"#)]
1072 #[case(r#"null"#)]
1073 #[case(r#"3.14159e+0"#)]
1074 #[case(r#"{"#)]
1075 #[case(r#"}"#)]
1076 #[case(r#""foo\/\u1234\/bar""#)]
1077 #[case(r#"true"#)]
1078 #[case(r#","#)]
1079 #[case("\n\n\n ")]
1080 #[should_panic(
1081 expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
1082 )]
1083 fn test_analyzer_single_token_panic_no_err(#[case] input: &str) {
1084 let mut an = FixedAnalyzer::new(input.as_bytes());
1085
1086 let token = an.next();
1087 assert!(!token.is_terminal(), "input = {input:?}, token = {token:?}");
1088
1089 let _ = an.err();
1090 }
1091
1092 #[rstest]
1093 #[case("\"\u{0020}\"")]
1094 #[case("\"\u{007f}\"")] #[case("\"\u{0080}\"")] #[case("\"\u{07ff}\"")] #[case("\"\u{0800}\"")] #[case("\"\u{d7ff}\"")] #[case("\"\u{e000}\"")] #[case("\"\u{ffff}\"")] #[case("\"\u{10000}\"")] #[case("\"\u{10ffff}\"")] #[case("\"\u{3f086}\"")] fn test_analyzer_utf8_char_1(#[case] input: &str) {
1105 {
1107 let mut an = FixedAnalyzer::new(input.as_bytes());
1108 assert_eq!(Pos::default(), *an.pos());
1109
1110 assert_eq!(Token::Str, an.next());
1111 assert_eq!(Pos::default(), *an.pos());
1112
1113 let content = an.content();
1114 assert_eq!(input, content.literal());
1115 assert!(!content.is_escaped());
1116 assert_eq!(input, content.unescaped());
1117
1118 assert_eq!(Token::Eof, an.next());
1119 assert_eq!(
1120 Pos {
1121 offset: input.len(),
1122 line: 1,
1123 col: 4,
1124 },
1125 *an.pos()
1126 );
1127
1128 assert_eq!(Token::Eof, an.next());
1129 assert_eq!(
1130 Pos {
1131 offset: input.len(),
1132 line: 1,
1133 col: 4,
1134 },
1135 *an.pos()
1136 );
1137 }
1138
1139 {
1141 let mut an = FixedAnalyzer::new(input.as_bytes());
1142 assert_eq!(Pos::default(), *an.pos());
1143
1144 assert_eq!(Token::Str, an.next());
1145 assert_eq!(Pos::default(), *an.pos());
1146
1147 assert_eq!(Token::Eof, an.next());
1148 assert_eq!(
1149 Pos {
1150 offset: input.len(),
1151 line: 1,
1152 col: 4,
1153 },
1154 *an.pos()
1155 );
1156
1157 assert_eq!(Token::Eof, an.next());
1158 assert_eq!(
1159 Pos {
1160 offset: input.len(),
1161 line: 1,
1162 col: 4,
1163 },
1164 *an.pos()
1165 );
1166 }
1167 }
1168
1169 #[rstest]
1170 #[case::two_1("\"\u{00e4}a\"")]
1172 #[case::two_2("\"\u{00e4}\u{00e4}\"")]
1173 #[case::two_3("\"\u{00e4}\u{3042}\"")]
1174 #[case::two_4("\"\u{00e4}\u{10000}\"")]
1175 #[case::three_1("\"\u{3042}a\"")]
1177 #[case::three_2("\"\u{3042}\u{00e4}\"")]
1178 #[case::three_3("\"\u{3042}\u{3042}\"")]
1179 #[case::three_4("\"\u{3042}\u{10000}\"")]
1180 #[case::four_1("\"\u{10000}a\"")]
1182 #[case::four_2("\"\u{10000}\u{00e4}\"")]
1183 #[case::four_3("\"\u{10000}\u{3042}\"")]
1184 #[case::four_4("\"\u{10000}\u{10000}\"")]
1185 fn test_analyzer_utf8_char_2(#[case] input: &str) {
1186 {
1188 let mut an = FixedAnalyzer::new(input.as_bytes());
1189 assert_eq!(Pos::default(), *an.pos());
1190
1191 assert_eq!(Token::Str, an.next());
1192 assert_eq!(Pos::default(), *an.pos());
1193
1194 let content = an.content();
1195 assert_eq!(input, content.literal());
1196 assert!(!content.is_escaped());
1197 assert_eq!(input, content.unescaped());
1198
1199 assert_eq!(Token::Eof, an.next());
1200 assert_eq!(
1201 Pos {
1202 offset: input.len(),
1203 line: 1,
1204 col: 5,
1205 },
1206 *an.pos()
1207 );
1208
1209 assert_eq!(Token::Eof, an.next());
1210 assert_eq!(
1211 Pos {
1212 offset: input.len(),
1213 line: 1,
1214 col: 5,
1215 },
1216 *an.pos()
1217 );
1218 }
1219
1220 {
1222 let mut an = FixedAnalyzer::new(input.as_bytes());
1223 assert_eq!(Pos::default(), *an.pos());
1224
1225 assert_eq!(Token::Str, an.next());
1226 assert_eq!(Pos::default(), *an.pos());
1227
1228 assert_eq!(Token::Eof, an.next());
1229 assert_eq!(
1230 Pos {
1231 offset: input.len(),
1232 line: 1,
1233 col: 5,
1234 },
1235 *an.pos()
1236 );
1237
1238 assert_eq!(Token::Eof, an.next());
1239 assert_eq!(
1240 Pos {
1241 offset: input.len(),
1242 line: 1,
1243 col: 5,
1244 },
1245 *an.pos()
1246 );
1247 }
1248 }
1249
1250 #[rstest]
1251 #[case("\n", 2, 1)]
1252 #[case("\n\n", 3, 1)]
1253 #[case("\r", 2, 1)]
1254 #[case("\r\r", 3, 1)]
1255 #[case("\r\n", 2, 1)]
1256 #[case("\n\r", 3, 1)]
1257 #[case("\n\n\r\r", 5, 1)]
1258 #[case("\r\n\r", 3, 1)]
1259 #[case("\n\r\n", 3, 1)]
1260 #[case(" \n", 2, 1)]
1261 #[case("\n ", 2, 2)]
1262 #[case(" \r", 2, 1)]
1263 #[case("\r ", 2, 2)]
1264 #[case("\t\n", 2, 1)]
1265 #[case("\n ", 2, 2)]
1266 #[case("\t\r", 2, 1)]
1267 #[case("\r\t", 2, 2)]
1268 fn test_analyzer_whitespace_multiline(
1269 #[case] input: &str,
1270 #[case] line: usize,
1271 #[case] col: usize,
1272 ) {
1273 {
1275 let mut an = FixedAnalyzer::new(input.as_bytes());
1276 assert_eq!(Pos::default(), *an.pos());
1277
1278 assert_eq!(Token::White, an.next());
1279 assert_eq!(Pos::default(), *an.pos());
1280
1281 let content = an.content();
1282 assert_eq!(input, content.literal());
1283 assert!(!content.is_escaped());
1284 assert_eq!(input, content.unescaped());
1285
1286 assert_eq!(Token::Eof, an.next());
1287 assert_eq!(
1288 Pos {
1289 offset: input.len(),
1290 line,
1291 col
1292 },
1293 *an.pos()
1294 );
1295 }
1296
1297 {
1299 let mut an = FixedAnalyzer::new(input.as_bytes());
1300 assert_eq!(Pos::default(), *an.pos());
1301
1302 assert_eq!(Token::White, an.next());
1303 assert_eq!(Pos::default(), *an.pos());
1304
1305 assert_eq!(Token::Eof, an.next());
1306 assert_eq!(
1307 Pos {
1308 offset: input.len(),
1309 line,
1310 col
1311 },
1312 *an.pos()
1313 );
1314 }
1315 }
1316
1317 #[rstest]
1318 #[case("", T::t(Token::Eof, ""), T::t(Token::Eof, ""), 1, 1)]
1319 #[case("{{", T::t(Token::ObjBegin, "{"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1323 #[case("{}", T::t(Token::ObjBegin, "{"), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1324 #[case("{[", T::t(Token::ObjBegin, "{"), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1325 #[case("{]", T::t(Token::ObjBegin, "{"), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1326 #[case("{:", T::t(Token::ObjBegin, "{"), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1327 #[case("{,", T::t(Token::ObjBegin, "{"), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1328 #[case("{false", T::t(Token::ObjBegin, "{"), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1329 #[case("{null", T::t(Token::ObjBegin, "{"), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1330 #[case("{true", T::t(Token::ObjBegin, "{"), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1331 #[case("{0", T::t(Token::ObjBegin, "{"), T::t(Token::Num, "0").pos(1, 1, 2), 1, 3)]
1332 #[case("{-1.9", T::t(Token::ObjBegin, "{"), T::t(Token::Num, "-1.9").pos(1, 1, 2), 1, 6)]
1333 #[case("{3.14e+0", T::t(Token::ObjBegin, "{"), T::t(Token::Num, "3.14e+0").pos(1, 1, 2), 1, 9)]
1334 #[case(r#"{"""#, T::t(Token::ObjBegin, "{"), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1335 #[case(r#"{"foo""#, T::t(Token::ObjBegin, "{"), T::t(Token::Str, r#""foo""#).pos(1, 1, 2), 1, 7)]
1336 #[case(r#"{"hello\u0020there,\nworld!""#, T::t(Token::ObjBegin, "{"), T::t(Token::Str, r#""hello\u0020there,\nworld!""#).pos(1, 1, 2).unescaped("\"hello there,\nworld!\""), 1,29)]
1337 #[case("{ ", T::t(Token::ObjBegin, "{"), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1338 #[case("{\t\t\r\n\n ", T::t(Token::ObjBegin, "{"), T::t(Token::White, "\t\t\r\n\n ").pos(1, 1, 2), 3, 2)]
1339 #[case("}{", T::t(Token::ObjEnd, "}"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1343 #[case("}}", T::t(Token::ObjEnd, "}"), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1344 #[case("}[", T::t(Token::ObjEnd, "}"), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1345 #[case("}]", T::t(Token::ObjEnd, "}"), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1346 #[case("}:", T::t(Token::ObjEnd, "}"), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1347 #[case("},", T::t(Token::ObjEnd, "}"), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1348 #[case("}false", T::t(Token::ObjEnd, "}"), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1349 #[case("}null", T::t(Token::ObjEnd, "}"), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1350 #[case("}true", T::t(Token::ObjEnd, "}"), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1351 #[case("}0", T::t(Token::ObjEnd, "}"), T::t(Token::Num, "0").pos(1, 1, 2), 1, 3)]
1352 #[case("}-1.9", T::t(Token::ObjEnd, "}"), T::t(Token::Num, "-1.9").pos(1, 1, 2), 1, 6)]
1353 #[case("}3.14e+0", T::t(Token::ObjEnd, "}"), T::t(Token::Num, "3.14e+0").pos(1, 1, 2), 1, 9)]
1354 #[case(r#"}"""#, T::t(Token::ObjEnd, "}"), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1355 #[case(r#"}"foo""#, T::t(Token::ObjEnd, "}"), T::t(Token::Str, r#""foo""#).pos(1, 1, 2), 1, 7)]
1356 #[case(r#"}"hello\u0020there,\nworld!""#, T::t(Token::ObjEnd, "}"), T::t(Token::Str, r#""hello\u0020there,\nworld!""#).pos(1, 1, 2).unescaped("\"hello there,\nworld!\""), 1,29)]
1357 #[case("} ", T::t(Token::ObjEnd, "}"), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1358 #[case("}\t\t\r\n\n ", T::t(Token::ObjEnd, "}"), T::t(Token::White, "\t\t\r\n\n ").pos(1, 1, 2), 3, 2)]
1359 #[case("[{", T::t(Token::ArrBegin, "["), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1363 #[case("[}", T::t(Token::ArrBegin, "["), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1364 #[case("[[", T::t(Token::ArrBegin, "["), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1365 #[case("[]", T::t(Token::ArrBegin, "["), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1366 #[case("[:", T::t(Token::ArrBegin, "["), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1367 #[case("[,", T::t(Token::ArrBegin, "["), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1368 #[case("[false", T::t(Token::ArrBegin, "["), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1369 #[case("[null", T::t(Token::ArrBegin, "["), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1370 #[case("[true", T::t(Token::ArrBegin, "["), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1371 #[case("[0", T::t(Token::ArrBegin, "["), T::t(Token::Num, "0").pos(1, 1, 2), 1, 3)]
1372 #[case("[-1.9", T::t(Token::ArrBegin, "["), T::t(Token::Num, "-1.9").pos(1, 1, 2), 1, 6)]
1373 #[case("[3.14e+0", T::t(Token::ArrBegin, "["), T::t(Token::Num, "3.14e+0").pos(1, 1, 2), 1, 9)]
1374 #[case(r#"["""#, T::t(Token::ArrBegin, "["), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1375 #[case(r#"["foo""#, T::t(Token::ArrBegin, "["), T::t(Token::Str, r#""foo""#).pos(1, 1, 2), 1, 7)]
1376 #[case(r#"["hello\u0020there,\nworld!""#, T::t(Token::ArrBegin, "["), T::t(Token::Str, r#""hello\u0020there,\nworld!""#).pos(1, 1, 2).unescaped("\"hello there,\nworld!\""), 1,29)]
1377 #[case("[ ", T::t(Token::ArrBegin, "["), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1378 #[case("[\t\t\r\n\n ", T::t(Token::ArrBegin, "["), T::t(Token::White, "\t\t\r\n\n ").pos(1, 1, 2), 3, 2)]
1379 #[case("]{", T::t(Token::ArrEnd, "]"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1383 #[case("]}", T::t(Token::ArrEnd, "]"), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1384 #[case("][", T::t(Token::ArrEnd, "]"), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1385 #[case("]]", T::t(Token::ArrEnd, "]"), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1386 #[case("]:", T::t(Token::ArrEnd, "]"), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1387 #[case("],", T::t(Token::ArrEnd, "]"), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1388 #[case("]false", T::t(Token::ArrEnd, "]"), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1389 #[case("]null", T::t(Token::ArrEnd, "]"), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1390 #[case("]true", T::t(Token::ArrEnd, "]"), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1391 #[case("]0", T::t(Token::ArrEnd, "]"), T::t(Token::Num, "0").pos(1, 1, 2), 1, 3)]
1392 #[case("]-1.9", T::t(Token::ArrEnd, "]"), T::t(Token::Num, "-1.9").pos(1, 1, 2), 1, 6)]
1393 #[case("]31.4e-1", T::t(Token::ArrEnd, "]"), T::t(Token::Num, "31.4e-1").pos(1, 1, 2), 1, 9)]
1394 #[case(r#"]"""#, T::t(Token::ArrEnd, "]"), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1395 #[case(r#"]"foo""#, T::t(Token::ArrEnd, "]"), T::t(Token::Str, r#""foo""#).pos(1, 1, 2), 1, 7)]
1396 #[case(r#"]"hello\u0020there,\nworld!""#, T::t(Token::ArrEnd, "]"), T::t(Token::Str, r#""hello\u0020there,\nworld!""#).pos(1, 1, 2).unescaped("\"hello there,\nworld!\""), 1,29)]
1397 #[case("] ", T::t(Token::ArrEnd, "]"), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1398 #[case("]\t\t\r\n\n ", T::t(Token::ArrEnd, "]"), T::t(Token::White, "\t\t\r\n\n ").pos(1, 1, 2), 3, 2)]
1399 #[case(":{", T::t(Token::NameSep, ":"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1403 #[case(":}", T::t(Token::NameSep, ":"), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1404 #[case(":[", T::t(Token::NameSep, ":"), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1405 #[case(":]", T::t(Token::NameSep, ":"), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1406 #[case("::", T::t(Token::NameSep, ":"), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1407 #[case(":,", T::t(Token::NameSep, ":"), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1408 #[case(":false", T::t(Token::NameSep, ":"), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1409 #[case(":null", T::t(Token::NameSep, ":"), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1410 #[case(":true", T::t(Token::NameSep, ":"), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1411 #[case(":0", T::t(Token::NameSep, ":"), T::t(Token::Num, "0").pos(1, 1, 2), 1, 3)]
1412 #[case(":-1.9", T::t(Token::NameSep, ":"), T::t(Token::Num, "-1.9").pos(1, 1, 2), 1, 6)]
1413 #[case(":31.4e-1", T::t(Token::NameSep, ":"), T::t(Token::Num, "31.4e-1").pos(1, 1, 2), 1, 9)]
1414 #[case(r#":"""#, T::t(Token::NameSep, ":"), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1415 #[case(r#":"foo""#, T::t(Token::NameSep, ":"), T::t(Token::Str, r#""foo""#).pos(1, 1, 2), 1, 7)]
1416 #[case(r#":"hello\u0020there,\nworld!""#, T::t(Token::NameSep, ":"), T::t(Token::Str, r#""hello\u0020there,\nworld!""#).pos(1, 1, 2).unescaped("\"hello there,\nworld!\""), 1,29)]
1417 #[case(": ", T::t(Token::NameSep, ":"), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1418 #[case(":\t\t\r\n\n ", T::t(Token::NameSep, ":"), T::t(Token::White, "\t\t\r\n\n ").pos(1, 1, 2), 3, 2)]
1419 #[case(",{", T::t(Token::ValueSep, ","), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1423 #[case(",}", T::t(Token::ValueSep, ","), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1424 #[case(",[", T::t(Token::ValueSep, ","), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1425 #[case(",]", T::t(Token::ValueSep, ","), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1426 #[case(",:", T::t(Token::ValueSep, ","), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1427 #[case(",,", T::t(Token::ValueSep, ","), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1428 #[case(",false", T::t(Token::ValueSep, ","), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1429 #[case(",null", T::t(Token::ValueSep, ","), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1430 #[case(",true", T::t(Token::ValueSep, ","), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1431 #[case(",-0.0", T::t(Token::ValueSep, ","), T::t(Token::Num, "-0.0").pos(1, 1, 2), 1, 6)]
1432 #[case(",1.9", T::t(Token::ValueSep, ","), T::t(Token::Num, "1.9").pos(1, 1, 2), 1, 5)]
1433 #[case(",314e-2", T::t(Token::ValueSep, ","), T::t(Token::Num, "314e-2").pos(1, 1, 2), 1, 8)]
1434 #[case(r#","""#, T::t(Token::ValueSep, ","), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1435 #[case(r#","foo""#, T::t(Token::ValueSep, ","), T::t(Token::Str, r#""foo""#).pos(1, 1, 2), 1, 7)]
1436 #[case(r#","hello\u0020there,\nworld!""#, T::t(Token::ValueSep, ","), T::t(Token::Str, r#""hello\u0020there,\nworld!""#).pos(1, 1, 2).unescaped("\"hello there,\nworld!\""), 1,29)]
1437 #[case(", ", T::t(Token::ValueSep, ","), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1438 #[case(",\t\t\r\n\n ", T::t(Token::ValueSep, ","), T::t(Token::White, "\t\t\r\n\n ").pos(1, 1, 2), 3, 2)]
1439 #[case("false{", T::t(Token::LitFalse, "false"), T::t(Token::ObjBegin, "{").pos(5, 1, 6), 1, 7)]
1443 #[case("false}", T::t(Token::LitFalse, "false"), T::t(Token::ObjEnd, "}").pos(5, 1, 6), 1, 7)]
1444 #[case("false[", T::t(Token::LitFalse, "false"), T::t(Token::ArrBegin, "[").pos(5, 1, 6), 1, 7)]
1445 #[case("false]", T::t(Token::LitFalse, "false"), T::t(Token::ArrEnd, "]").pos(5, 1, 6), 1, 7)]
1446 #[case("false:", T::t(Token::LitFalse, "false"), T::t(Token::NameSep, ":").pos(5, 1, 6), 1, 7)]
1447 #[case("false,", T::t(Token::LitFalse, "false"), T::t(Token::ValueSep, ",").pos(5, 1, 6), 1, 7)]
1448 #[case("false ", T::t(Token::LitFalse, "false"), T::t(Token::White, " ").pos(5, 1, 6), 1, 7)]
1449 #[case("false\t", T::t(Token::LitFalse, "false"), T::t(Token::White, "\t").pos(5, 1, 6), 1, 7)]
1450 #[case("false\r", T::t(Token::LitFalse, "false"), T::t(Token::White, "\r").pos(5, 1, 6), 2, 1)]
1451 #[case("false\n", T::t(Token::LitFalse, "false"), T::t(Token::White, "\n").pos(5, 1, 6), 2, 1)]
1452 #[case("false\r\n", T::t(Token::LitFalse, "false"), T::t(Token::White, "\r\n").pos(5, 1, 6), 2, 1)]
1453 #[case("false\n\r", T::t(Token::LitFalse, "false"), T::t(Token::White, "\n\r").pos(5, 1, 6), 3, 1)]
1454 #[case("false\r\n ", T::t(Token::LitFalse, "false"), T::t(Token::White, "\r\n ").pos(5, 1, 6), 2, 2)]
1455 #[case("false\n\r ", T::t(Token::LitFalse, "false"), T::t(Token::White, "\n\r ").pos(5, 1, 6), 3, 2)]
1456 #[case("false \r\n", T::t(Token::LitFalse, "false"), T::t(Token::White, " \r\n").pos(5, 1, 6), 2, 1)]
1457 #[case("false \n\r", T::t(Token::LitFalse, "false"), T::t(Token::White, " \n\r").pos(5, 1, 6), 3, 1)]
1458 #[case(r#"false"""#, T::t(Token::LitFalse, "false"), T::t(Token::Str, r#""""#).pos(5, 1, 6), 1, 8)]
1459 #[case("null{", T::t(Token::LitNull, "null"), T::t(Token::ObjBegin, "{").pos(4, 1, 5), 1, 6)]
1463 #[case("null}", T::t(Token::LitNull, "null"), T::t(Token::ObjEnd, "}").pos(4, 1, 5), 1, 6)]
1464 #[case("null[", T::t(Token::LitNull, "null"), T::t(Token::ArrBegin, "[").pos(4, 1, 5), 1, 6)]
1465 #[case("null]", T::t(Token::LitNull, "null"), T::t(Token::ArrEnd, "]").pos(4, 1, 5), 1, 6)]
1466 #[case("null:", T::t(Token::LitNull, "null"), T::t(Token::NameSep, ":").pos(4, 1, 5), 1, 6)]
1467 #[case("null,", T::t(Token::LitNull, "null"), T::t(Token::ValueSep, ",").pos(4, 1, 5), 1, 6)]
1468 #[case("null ", T::t(Token::LitNull, "null"), T::t(Token::White, " ").pos(4, 1, 5), 1, 6)]
1469 #[case("null\t", T::t(Token::LitNull, "null"), T::t(Token::White, "\t").pos(4, 1, 5), 1, 6)]
1470 #[case("null\r", T::t(Token::LitNull, "null"), T::t(Token::White, "\r").pos(4, 1, 5), 2, 1)]
1471 #[case("null\n", T::t(Token::LitNull, "null"), T::t(Token::White, "\n").pos(4, 1, 5), 2, 1)]
1472 #[case(r#"null"x""#, T::t(Token::LitNull, "null"), T::t(Token::Str, r#""x""#).pos(4, 1, 5), 1, 8)]
1473 #[case("true{", T::t(Token::LitTrue, "true"), T::t(Token::ObjBegin, "{").pos(4, 1, 5), 1, 6)]
1477 #[case("true}", T::t(Token::LitTrue, "true"), T::t(Token::ObjEnd, "}").pos(4, 1, 5), 1, 6)]
1478 #[case("true[", T::t(Token::LitTrue, "true"), T::t(Token::ArrBegin, "[").pos(4, 1, 5), 1, 6)]
1479 #[case("true]", T::t(Token::LitTrue, "true"), T::t(Token::ArrEnd, "]").pos(4, 1, 5), 1, 6)]
1480 #[case("true:", T::t(Token::LitTrue, "true"), T::t(Token::NameSep, ":").pos(4, 1, 5), 1, 6)]
1481 #[case("true,", T::t(Token::LitTrue, "true"), T::t(Token::ValueSep, ",").pos(4, 1, 5), 1, 6)]
1482 #[case("true ", T::t(Token::LitTrue, "true"), T::t(Token::White, " ").pos(4, 1, 5), 1, 6)]
1483 #[case("true\t", T::t(Token::LitTrue, "true"), T::t(Token::White, "\t").pos(4, 1, 5), 1, 6)]
1484 #[case("true\r", T::t(Token::LitTrue, "true"), T::t(Token::White, "\r").pos(4, 1, 5), 2, 1)]
1485 #[case("true\n", T::t(Token::LitTrue, "true"), T::t(Token::White, "\n").pos(4, 1, 5), 2, 1)]
1486 #[case(r#"true"π§Ά""#, T::t(Token::LitTrue, "true"), T::t(Token::Str, r#""π§Ά""#).pos(4, 1, 5), 1, 8)]
1487 #[case("0{", T::t(Token::Num, "0"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1491 #[case("0}", T::t(Token::Num, "0"), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1492 #[case("0[", T::t(Token::Num, "0"), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1493 #[case("0]", T::t(Token::Num, "0"), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1494 #[case("0:", T::t(Token::Num, "0"), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1495 #[case("0,", T::t(Token::Num, "0"), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1496 #[case("0 ", T::t(Token::Num, "0"), T::t(Token::White, " ").pos(1, 1, 2), 1, 3)]
1497 #[case("0\t", T::t(Token::Num, "0"), T::t(Token::White, "\t").pos(1, 1, 2), 1, 3)]
1498 #[case("0\r", T::t(Token::Num, "0"), T::t(Token::White, "\r").pos(1, 1, 2), 2, 1)]
1499 #[case("0\n", T::t(Token::Num, "0"), T::t(Token::White, "\n").pos(1, 1, 2), 2, 1)]
1500 #[case(r#"0"""#, T::t(Token::Num, "0"), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1501 #[case("1{", T::t(Token::Num, "1"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1502 #[case("-9}", T::t(Token::Num, "-9"), T::t(Token::ObjEnd, "}").pos(2, 1, 3), 1, 4)]
1503 #[case("0.0[", T::t(Token::Num, "0.0"), T::t(Token::ArrBegin, "[").pos(3, 1, 4), 1, 5)]
1504 #[case("-0]", T::t(Token::Num, "-0"), T::t(Token::ArrEnd, "]").pos(2, 1, 3), 1, 4)]
1505 #[case(r#"-0"a""#, T::t(Token::Num, "-0"), T::t(Token::Str, r#""a""#).pos(2, 1, 3), 1, 6)]
1506 #[case("-0.0123456789:", T::t(Token::Num, "-0.0123456789"), T::t(Token::NameSep, ":").pos(13, 1, 14), 1, 15)]
1507 #[case("123456789e10,", T::t(Token::Num, "123456789e10"), T::t(Token::ValueSep, ",").pos(12, 1, 13), 1, 14)]
1508 #[case("0e-1 ", T::t(Token::Num, "0e-1"), T::t(Token::White, " ").pos(4, 1, 5), 1, 6)]
1509 #[case("2e+3\t", T::t(Token::Num, "2e+3"), T::t(Token::White, "\t").pos(4, 1, 5), 1, 6)]
1510 #[case("-5e6\r", T::t(Token::Num, "-5e6"), T::t(Token::White, "\r").pos(4, 1, 5), 2, 1)]
1511 #[case("6.7e89\n", T::t(Token::Num, "6.7e89"), T::t(Token::White, "\n").pos(6, 1, 7), 2, 1)]
1512 #[case(r#"1"a""#, T::t(Token::Num, "1"), T::t(Token::Str, r#""a""#).pos(1, 1, 2), 1, 5)]
1513 #[case(r#"2.5"a""#, T::t(Token::Num, "2.5"), T::t(Token::Str, r#""a""#).pos(3, 1, 4), 1, 7)]
1514 #[case(r#"3e4"a""#, T::t(Token::Num, "3e4"), T::t(Token::Str, r#""a""#).pos(3, 1, 4), 1, 7)]
1515 #[case(r#"""{"#, T::t(Token::Str, r#""""#), T::t(Token::ObjBegin, "{").pos(2, 1, 3), 1, 4)]
1519 #[case(r#"""}"#, T::t(Token::Str, r#""""#), T::t(Token::ObjEnd, "}").pos(2, 1, 3), 1, 4)]
1520 #[case(r#"""["#, T::t(Token::Str, r#""""#), T::t(Token::ArrBegin, "[").pos(2, 1, 3), 1, 4)]
1521 #[case(r#"""]"#, T::t(Token::Str, r#""""#), T::t(Token::ArrEnd, "]").pos(2, 1, 3), 1, 4)]
1522 #[case(r#""":"#, T::t(Token::Str, r#""""#), T::t(Token::NameSep, ":").pos(2, 1, 3), 1, 4)]
1523 #[case(r#""","#, T::t(Token::Str, r#""""#), T::t(Token::ValueSep, ",").pos(2, 1, 3), 1, 4)]
1524 #[case(r#""" "#, T::t(Token::Str, r#""""#), T::t(Token::White, " ").pos(2, 1, 3), 1, 4)]
1525 #[case("\"\"\t", T::t(Token::Str, r#""""#), T::t(Token::White, "\t").pos(2, 1, 3), 1, 4)]
1526 #[case("\"\"\r", T::t(Token::Str, r#""""#), T::t(Token::White, "\r").pos(2, 1, 3), 2, 1)]
1527 #[case("\"\"\n", T::t(Token::Str, r#""""#), T::t(Token::White, "\n").pos(2, 1, 3), 2, 1)]
1528 #[case(r#""x"}"#, T::t(Token::Str, r#""x""#), T::t(Token::ObjEnd, "}").pos(3, 1, 4), 1, 5)]
1529 #[case(r#""foo bar"]"#, T::t(Token::Str, r#""foo bar""#), T::t(Token::ArrEnd, "]").pos(9, 1, 10), 1, 11)]
1530 #[case(r#""π§Ά":"#, T::t(Token::Str, r#""π§Ά""#), T::t(Token::NameSep, ":").pos(6, 1, 4), 1, 5)]
1531 #[case(r#""\"\t\r\n\\\/\u0020\"","#, T::t(Token::Str, r#""\"\t\r\n\\\/\u0020\"""#).unescaped("\"\"\t\r\n\\/ \"\""), T::t(Token::ValueSep, ",").pos(22, 1, 23), 1, 24)]
1532 #[case(r#""treble \uD834\uDD1E""clef""#, T::t(Token::Str, r#""treble \uD834\uDD1E""#).unescaped("\"treble π\""), T::t(Token::Str, r#""clef""#).pos(21, 1, 22), 1, 28)]
1533 #[case(r#""a"0e+12"#, T::t(Token::Str, r#""a""#), T::t(Token::Num, "0e+12").pos(3, 1, 4), 1, 9)]
1534 #[case(r#""β€π"-0"#, T::t(Token::Str, r#""β€π""#), T::t(Token::Num, "-0").pos(9, 1, 5), 1, 7)]
1535 #[case(r#""β€οΈπ"1"#, T::t(Token::Str, r#""β€οΈπ""#), T::t(Token::Num, "1").pos(12, 1, 6), 1, 7)]
1536 #[case(r#""""a""#, T::t(Token::Str, r#""""#), T::t(Token::Str, r#""a""#).pos(2, 1, 3), 1, 6)]
1537 #[case(r#""""cafΓ©""#, T::t(Token::Str, r#""""#), T::t(Token::Str, r#""cafΓ©""#).pos(2, 1, 3), 1, 9)]
1538 #[case(r#""a""""#, T::t(Token::Str, r#""a""#), T::t(Token::Str, r#""""#).pos(3, 1, 4), 1, 6)]
1539 #[case(r#""β¬10""""#, T::t(Token::Str, r#""β¬10""#), T::t(Token::Str, r#""""#).pos(7, 1, 6), 1, 8)]
1540 #[case(" {", T::t(Token::White, " "), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1544 #[case(" }", T::t(Token::White, " "), T::t(Token::ObjEnd, "}").pos(1, 1, 2), 1, 3)]
1545 #[case(" [", T::t(Token::White, " "), T::t(Token::ArrBegin, "[").pos(1, 1, 2), 1, 3)]
1546 #[case(" ]", T::t(Token::White, " "), T::t(Token::ArrEnd, "]").pos(1, 1, 2), 1, 3)]
1547 #[case(" :", T::t(Token::White, " "), T::t(Token::NameSep, ":").pos(1, 1, 2), 1, 3)]
1548 #[case(" ,", T::t(Token::White, " "), T::t(Token::ValueSep, ",").pos(1, 1, 2), 1, 3)]
1549 #[case(" false", T::t(Token::White, " "), T::t(Token::LitFalse, "false").pos(1, 1, 2), 1, 7)]
1550 #[case(" null", T::t(Token::White, " "), T::t(Token::LitNull, "null").pos(1, 1, 2), 1, 6)]
1551 #[case(" true", T::t(Token::White, " "), T::t(Token::LitTrue, "true").pos(1, 1, 2), 1, 6)]
1552 #[case(" 0", T::t(Token::White, " "), T::t(Token::Num, "0").pos(1, 1, 2), 1, 3)]
1553 #[case(" -0", T::t(Token::White, " "), T::t(Token::Num, "-0").pos(1, 1, 2), 1, 4)]
1554 #[case(r#" """#, T::t(Token::White, " "), T::t(Token::Str, r#""""#).pos(1, 1, 2), 1, 4)]
1555 #[case("\t{", T::t(Token::White, "\t"), T::t(Token::ObjBegin, "{").pos(1, 1, 2), 1, 3)]
1556 #[case(" {", T::t(Token::White, " "), T::t(Token::ObjBegin, "{").pos(2, 1, 3), 1, 4)]
1557 #[case("\n{", T::t(Token::White, "\n"), T::t(Token::ObjBegin, "{").pos(1, 2, 1), 2, 2)]
1558 #[case("\r{", T::t(Token::White, "\r"), T::t(Token::ObjBegin, "{").pos(1, 2, 1), 2, 2)]
1559 #[case("\r\n{", T::t(Token::White, "\r\n"), T::t(Token::ObjBegin, "{").pos(2, 2, 1), 2, 2)]
1560 #[case("\n\r{", T::t(Token::White, "\n\r"), T::t(Token::ObjBegin, "{").pos(2, 3, 1), 3, 2)]
1561 #[case("\n\n{", T::t(Token::White, "\n\n"), T::t(Token::ObjBegin, "{").pos(2, 3, 1), 3, 2)]
1562 #[case("\r\r{", T::t(Token::White, "\r\r"), T::t(Token::ObjBegin, "{").pos(2, 3, 1), 3, 2)]
1563 fn test_analyzer_two_tokens(
1564 #[case] input: &str,
1565 #[case] t1: T,
1566 #[case] t2: T,
1567 #[case] line: usize,
1568 #[case] col: usize,
1569 ) {
1570 {
1572 let mut an = FixedAnalyzer::new(input.as_bytes());
1573 assert_eq!(Pos::default(), *an.pos());
1574
1575 assert_eq!(t1.token, an.next());
1576 assert_eq!(t1.pos, *an.pos());
1577
1578 let content1 = an.content();
1579 assert_eq!(t1.literal, content1.literal());
1580 assert_eq!(t1.is_escaped(), content1.is_escaped());
1581 assert_eq!(t1.unescaped, content1.unescaped());
1582
1583 assert_eq!(t2.token, an.next());
1584 assert_eq!(t2.pos, *an.pos());
1585
1586 let content2 = an.content();
1587 assert_eq!(t2.literal, content2.literal());
1588 assert_eq!(t2.is_escaped(), content2.is_escaped());
1589 assert_eq!(t2.unescaped, content2.unescaped());
1590
1591 assert_eq!(Token::Eof, an.next());
1592 assert_eq!(
1593 Pos {
1594 offset: input.len(),
1595 line,
1596 col
1597 },
1598 *an.pos()
1599 );
1600 }
1601
1602 {
1604 let mut an = FixedAnalyzer::new(input.as_bytes());
1605 assert_eq!(Pos::default(), *an.pos());
1606
1607 assert_eq!(t1.token, an.next());
1608 assert_eq!(t1.pos, *an.pos());
1609
1610 assert_eq!(t2.token, an.next());
1611 assert_eq!(t2.pos, *an.pos());
1612
1613 assert_eq!(Token::Eof, an.next());
1614 assert_eq!(
1615 Pos {
1616 offset: input.len(),
1617 line,
1618 col
1619 },
1620 *an.pos()
1621 );
1622 }
1623 }
1624
1625 #[derive(Debug)]
1626 struct T {
1627 token: Token,
1628 pos: Pos,
1629 literal: &'static str,
1630 unescaped: &'static str,
1631 }
1632
1633 impl T {
1634 fn t(token: Token, literal: &'static str) -> Self {
1635 Self {
1636 token,
1637 pos: Pos::default(),
1638 literal,
1639 unescaped: literal,
1640 }
1641 }
1642
1643 fn pos(mut self, offset: usize, line: usize, col: usize) -> Self {
1644 self.pos = Pos { offset, line, col };
1645 self
1646 }
1647
1648 fn unescaped(mut self, unescaped: &'static str) -> Self {
1649 self.unescaped = unescaped;
1650 self
1651 }
1652
1653 fn is_escaped(&self) -> bool {
1654 self.unescaped != self.literal
1655 }
1656 }
1657
1658 #[rstest]
1659 #[case(r#""\uDC00""#, 0xdc00, None, 3)]
1660 #[case(r#""\udc00""#, 0xdc00, None, 3)]
1661 #[case(r#""\uDFFF""#, 0xdfff, None, 3)]
1662 #[case(r#""\udfff""#, 0xdfff, None, 3)]
1663 #[case(r#""\uD800""#, 0xd800, None, 3)]
1664 #[case(r#""\ud800""#, 0xd800, None, 3)]
1665 #[case(r#""\uDBFF""#, 0xdbff, None, 3)]
1666 #[case(r#""\udbff""#, 0xdbff, None, 3)]
1667 #[case(r#""\uD800x""#, 0xd800, None, 3)]
1668 #[case(r#""\ud800x""#, 0xd800, None, 3)]
1669 #[case(r#""\uDBFFx""#, 0xdbff, None, 3)]
1670 #[case(r#""\udbffx""#, 0xdbff, None, 3)]
1671 #[case(r#""\uD800\""#, 0xd800, None, 3)]
1672 #[case(r#""\ud800\""#, 0xd800, None, 3)]
1673 #[case(r#""\uDBFF\""#, 0xdbff, None, 3)]
1674 #[case(r#""\udbff\""#, 0xdbff, None, 3)]
1675 #[case(r#""\uD800\/""#, 0xd800, None, 3)]
1676 #[case(r#""\ud800\t""#, 0xd800, None, 3)]
1677 #[case(r#""\uDBFF\r""#, 0xdbff, None, 3)]
1678 #[case(r#""\udbff\n""#, 0xdbff, None, 3)]
1679 #[case(r#""\uD800\ud800""#, 0xd800, Some(0xd800), 9)]
1680 #[case(r#""\uD800\uDBFF""#, 0xd800, Some(0xdbff), 9)]
1681 #[case(r#""\udbff\ue000""#, 0xdbff, Some(0xe000), 9)]
1682 #[case(r#""\udbff\u0000""#, 0xdbff, Some(0x0000), 9)]
1683 fn test_analyzer_single_error_bad_surrogate(
1684 #[case] input: &str,
1685 #[case] first: u16,
1686 #[case] second: Option<u16>,
1687 #[case] pos_offset: usize,
1688 ) {
1689 {
1691 let mut an = FixedAnalyzer::new(input.as_bytes());
1692 assert_eq!(Pos::default(), *an.pos());
1693
1694 assert_eq!(Token::Err, an.next());
1695 assert_eq!(Pos::default(), *an.pos());
1696
1697 let err = an.err();
1698 assert_eq!(ErrorKind::BadSurrogate { first, second }, err.kind());
1699 assert_eq!(
1700 Pos {
1701 offset: pos_offset,
1702 line: 1,
1703 col: pos_offset + 1
1704 },
1705 *err.pos()
1706 );
1707
1708 assert_eq!(Token::Err, an.next());
1709 assert_eq!(Pos::default(), *an.pos());
1710 }
1711
1712 {
1714 let mut an = FixedAnalyzer::new(input.as_bytes());
1715 assert_eq!(Pos::default(), *an.pos());
1716
1717 assert_eq!(Token::Err, an.next());
1718 assert_eq!(Pos::default(), *an.pos());
1719
1720 assert_eq!(Token::Err, an.next());
1721 assert_eq!(Pos::default(), *an.pos());
1722 }
1723 }
1724
1725 #[rstest]
1726 #[case(&[0xc2, 0xc0], 1)]
1727 #[case(&[0xdf, 0xd0], 1)]
1728 #[case(&[0xe0, 0x7f, 0x80], 1)]
1729 #[case(&[0xe0, 0x80, 0x80], 1)]
1730 #[case(&[0xe0, 0xc0, 0x80], 1)]
1731 #[case(&[0xed, 0xa0, 0x80], 1)]
1732 #[case(&[0xed, 0xa0, 0xbf], 1)]
1733 #[case(&[0xed, 0xb0, 0x80], 1)]
1734 #[case(&[0xed, 0xb0, 0xbf], 1)]
1735 #[case(&[0xef, 0x7f, 0x80], 1)]
1736 #[case(&[0xef, 0xc0, 0x80], 1)]
1737 #[case(&[0xe0, 0x80, 0x7f], 2)]
1738 #[case(&[0xe0, 0x80, 0xc0], 2)]
1739 #[case(&[0xe0, 0xbf, 0x7f], 2)]
1740 #[case(&[0xe0, 0xbf, 0xc0], 2)]
1741 #[case(&[0xf0, 0x7f, 0x80, 0x80], 1)]
1742 #[case(&[0xf0, 0x80, 0x80, 0x80], 1)]
1743 #[case(&[0xf0, 0xc0, 0x80, 0x80], 1)]
1744 #[case(&[0xf4, 0x7f, 0x80, 0x80], 1)]
1745 #[case(&[0xf4, 0xc0, 0x80, 0x80], 1)]
1746 #[case(&[0xf4, 0x90, 0x80, 0x80], 1)]
1747 #[case(&[0xf0, 0x80, 0x7f, 0x80], 2)]
1748 #[case(&[0xf0, 0x80, 0xc0, 0x80], 2)]
1749 #[case(&[0xf0, 0xbf, 0x7f, 0x80], 2)]
1750 #[case(&[0xf0, 0xbf, 0xc0, 0x80], 2)]
1751 #[case(&[0xf0, 0x80, 0x80, 0x7f], 3)]
1752 #[case(&[0xf0, 0x80, 0x80, 0xc0], 3)]
1753 #[case(&[0xf0, 0xbf, 0xbf, 0x7f], 3)]
1754 #[case(&[0xf0, 0xbf, 0xbf, 0xc0], 3)]
1755 fn test_analyzer_single_error_bad_utf8_cont_byte(#[case] input: &[u8], #[case] offset: u8) {
1756 let mut buf = Vec::with_capacity(input.len() + 1);
1758 buf.push(b'"');
1759 buf.extend_from_slice(input);
1760
1761 {
1763 let mut an = FixedAnalyzer::new(buf.clone());
1764 assert_eq!(Pos::default(), *an.pos());
1765
1766 assert_eq!(Token::Err, an.next());
1767 assert_eq!(Pos::default(), *an.pos());
1768
1769 let err = an.err();
1770 assert_eq!(
1771 ErrorKind::BadUtf8ContByte {
1772 seq_len: input.len() as u8,
1773 offset,
1774 value: input[offset as usize]
1775 },
1776 err.kind()
1777 );
1778 assert_eq!(
1779 Pos {
1780 offset: 1,
1781 line: 1,
1782 col: 2, },
1784 *err.pos()
1785 );
1786
1787 assert_eq!(Token::Err, an.next());
1788 assert_eq!(Pos::default(), *an.pos());
1789 }
1790
1791 {
1793 let mut an = FixedAnalyzer::new(buf.clone());
1794 assert_eq!(Pos::default(), *an.pos());
1795
1796 assert_eq!(Token::Err, an.next());
1797 assert_eq!(Pos::default(), *an.pos());
1798
1799 assert_eq!(Token::Err, an.next());
1800 assert_eq!(Pos::default(), *an.pos());
1801 }
1802 }
1803
1804 #[rstest]
1805 #[case(0x80)]
1809 #[case(0x81)]
1810 #[case(0x82)]
1811 #[case(0x83)]
1812 #[case(0x84)]
1813 #[case(0x85)]
1814 #[case(0x86)]
1815 #[case(0x87)]
1816 #[case(0x88)]
1817 #[case(0x89)]
1818 #[case(0x8a)]
1819 #[case(0x8b)]
1820 #[case(0x8c)]
1821 #[case(0x8d)]
1822 #[case(0x8e)]
1823 #[case(0x8f)]
1824 #[case(0x90)]
1825 #[case(0x91)]
1826 #[case(0x92)]
1827 #[case(0x93)]
1828 #[case(0x94)]
1829 #[case(0x95)]
1830 #[case(0x96)]
1831 #[case(0x97)]
1832 #[case(0x98)]
1833 #[case(0x99)]
1834 #[case(0x9a)]
1835 #[case(0x9b)]
1836 #[case(0x9c)]
1837 #[case(0x9d)]
1838 #[case(0x9e)]
1839 #[case(0x9f)]
1840 #[case(0xa0)]
1841 #[case(0xa1)]
1842 #[case(0xa2)]
1843 #[case(0xa3)]
1844 #[case(0xa4)]
1845 #[case(0xa5)]
1846 #[case(0xa6)]
1847 #[case(0xa7)]
1848 #[case(0xa8)]
1849 #[case(0xa9)]
1850 #[case(0xaa)]
1851 #[case(0xab)]
1852 #[case(0xac)]
1853 #[case(0xad)]
1854 #[case(0xae)]
1855 #[case(0xaf)]
1856 #[case(0xb0)]
1857 #[case(0xb1)]
1858 #[case(0xb2)]
1859 #[case(0xb3)]
1860 #[case(0xb4)]
1861 #[case(0xb5)]
1862 #[case(0xb6)]
1863 #[case(0xb7)]
1864 #[case(0xb8)]
1865 #[case(0xb9)]
1866 #[case(0xba)]
1867 #[case(0xbb)]
1868 #[case(0xbc)]
1869 #[case(0xbd)]
1870 #[case(0xbe)]
1871 #[case(0xbf)]
1872 #[case(0xc0)]
1876 #[case(0xc1)]
1877 #[case(0xf5)]
1881 #[case(0xf6)]
1882 #[case(0xf7)]
1883 #[case(0xf8)]
1884 #[case(0xf9)]
1885 #[case(0xfa)]
1886 #[case(0xfb)]
1887 #[case(0xfc)]
1888 #[case(0xfd)]
1889 #[case(0xfe)]
1890 #[case(0xff)]
1891 fn test_analyzer_single_error_bad_utf8_start_byte(#[case] b: u8) {
1892 let mut buf = Vec::with_capacity(2);
1894 buf.push(b'"');
1895 buf.push(b);
1896
1897 {
1899 let mut an = FixedAnalyzer::new(buf.clone());
1900 assert_eq!(Pos::default(), *an.pos());
1901
1902 assert_eq!(Token::Err, an.next());
1903 assert_eq!(Pos::default(), *an.pos());
1904
1905 let err = an.err();
1906 assert_eq!(
1907 ErrorKind::UnexpectedByte {
1908 token: Some(Token::Str),
1909 expect: Expect::StrChar,
1910 actual: b,
1911 },
1912 err.kind()
1913 );
1914 assert_eq!(
1915 Pos {
1916 offset: 1,
1917 line: 1,
1918 col: 2,
1919 },
1920 *err.pos()
1921 );
1922
1923 assert_eq!(Token::Err, an.next());
1924 assert_eq!(Pos::default(), *an.pos());
1925 }
1926
1927 {
1929 let mut an = FixedAnalyzer::new(buf.clone());
1930 assert_eq!(Pos::default(), *an.pos());
1931
1932 assert_eq!(Token::Err, an.next());
1933 assert_eq!(Pos::default(), *an.pos());
1934
1935 assert_eq!(Token::Err, an.next());
1936 assert_eq!(Pos::default(), *an.pos());
1937 }
1938 }
1939
1940 #[rstest]
1941 #[case("-}", Expect::Digit)]
1945 #[case("-]", Expect::Digit)]
1946 #[case("-a", Expect::Digit)]
1947 #[case("- ", Expect::Digit)]
1948 #[case("00", Expect::DotExpOrBoundary)]
1952 #[case("01", Expect::DotExpOrBoundary)]
1953 #[case("02", Expect::DotExpOrBoundary)]
1954 #[case("03", Expect::DotExpOrBoundary)]
1955 #[case("04", Expect::DotExpOrBoundary)]
1956 #[case("05", Expect::DotExpOrBoundary)]
1957 #[case("06", Expect::DotExpOrBoundary)]
1958 #[case("07", Expect::DotExpOrBoundary)]
1959 #[case("08", Expect::DotExpOrBoundary)]
1960 #[case("09", Expect::DotExpOrBoundary)]
1961 #[case("-00", Expect::DotExpOrBoundary)]
1962 #[case("-01", Expect::DotExpOrBoundary)]
1963 #[case("-02", Expect::DotExpOrBoundary)]
1964 #[case("-03", Expect::DotExpOrBoundary)]
1965 #[case("-04", Expect::DotExpOrBoundary)]
1966 #[case("-05", Expect::DotExpOrBoundary)]
1967 #[case("-06", Expect::DotExpOrBoundary)]
1968 #[case("-07", Expect::DotExpOrBoundary)]
1969 #[case("-08", Expect::DotExpOrBoundary)]
1970 #[case("-09", Expect::DotExpOrBoundary)]
1971 #[case("0x", Expect::DotExpOrBoundary)]
1975 #[case("1x", Expect::DigitDotExpOrBoundary)]
1976 #[case("9/", Expect::DigitDotExpOrBoundary)]
1977 #[case("13456789000a", Expect::DigitDotExpOrBoundary)]
1978 #[case("0E,", Expect::DigitOrExpSign)]
1982 #[case("0e:", Expect::DigitOrExpSign)]
1983 #[case("1E ", Expect::DigitOrExpSign)]
1984 #[case("9ex", Expect::DigitOrExpSign)]
1985 #[case("0.a", Expect::Digit)]
1989 #[case("0.{", Expect::Digit)]
1990 #[case("0.:", Expect::Digit)]
1991 #[case("0.-", Expect::Digit)]
1992 #[case("-0.a", Expect::Digit)]
1993 #[case("-0.{", Expect::Digit)]
1994 #[case("-0.:", Expect::Digit)]
1995 #[case("-0.-", Expect::Digit)]
1996 #[case("1.E", Expect::Digit)]
1997 #[case("2.e", Expect::Digit)]
1998 #[case("3.a", Expect::Digit)]
1999 #[case("4.a", Expect::Digit)]
2000 #[case("5.a", Expect::Digit)]
2001 #[case("6.a", Expect::Digit)]
2002 #[case("7.a", Expect::Digit)]
2003 #[case("8.a", Expect::Digit)]
2004 #[case("9.a", Expect::Digit)]
2005 #[case("-1.E", Expect::Digit)]
2006 #[case("-2.e", Expect::Digit)]
2007 #[case("-3.a", Expect::Digit)]
2008 #[case("-4.a", Expect::Digit)]
2009 #[case("-5.a", Expect::Digit)]
2010 #[case("-6.a", Expect::Digit)]
2011 #[case("-7.a", Expect::Digit)]
2012 #[case("-8.a", Expect::Digit)]
2013 #[case("-9.a", Expect::Digit)]
2014 #[case("10.E", Expect::Digit)]
2015 #[case("20.e", Expect::Digit)]
2016 #[case("30.a", Expect::Digit)]
2017 #[case("40.a", Expect::Digit)]
2018 #[case("50.a", Expect::Digit)]
2019 #[case("60.a", Expect::Digit)]
2020 #[case("70.a", Expect::Digit)]
2021 #[case("80.a", Expect::Digit)]
2022 #[case("90.a", Expect::Digit)]
2023 #[case("-10.E", Expect::Digit)]
2024 #[case("-20.e", Expect::Digit)]
2025 #[case("-30.a", Expect::Digit)]
2026 #[case("-40.a", Expect::Digit)]
2027 #[case("-50.a", Expect::Digit)]
2028 #[case("-60.a", Expect::Digit)]
2029 #[case("-70.a", Expect::Digit)]
2030 #[case("-80.a", Expect::Digit)]
2031 #[case("-90.a", Expect::Digit)]
2032 #[case("0.0|", Expect::DigitExpOrBoundary)]
2036 #[case("-0.0-", Expect::DigitExpOrBoundary)]
2037 #[case("1.0D", Expect::DigitExpOrBoundary)]
2038 #[case("-1.5d", Expect::DigitExpOrBoundary)]
2039 #[case("9.01F", Expect::DigitExpOrBoundary)]
2040 #[case("-9.001f", Expect::DigitExpOrBoundary)]
2041 #[case("100.001x", Expect::DigitExpOrBoundary)]
2042 #[case("0Ee", Expect::DigitOrExpSign)]
2046 #[case("-0e.", Expect::DigitOrExpSign)]
2047 #[case("1Ee", Expect::DigitOrExpSign)]
2048 #[case("-1e.", Expect::DigitOrExpSign)]
2049 #[case("2.0Ef", Expect::DigitOrExpSign)]
2050 #[case("-2.0ef", Expect::DigitOrExpSign)]
2051 #[case("3.01e.", Expect::DigitOrExpSign)]
2052 #[case("-456789.10111213141516171819E\"", Expect::DigitOrExpSign)]
2053 #[case("0E++", Expect::Digit)]
2057 #[case("0e--", Expect::Digit)]
2058 #[case("1E+x", Expect::Digit)]
2059 #[case("2e+\"", Expect::Digit)]
2060 #[case("3E+:", Expect::Digit)]
2061 #[case("4e+,", Expect::Digit)]
2062 #[case("5E+{", Expect::Digit)]
2063 #[case("6e-}", Expect::Digit)]
2064 #[case("7E-[", Expect::Digit)]
2065 #[case("8e-]", Expect::Digit)]
2066 #[case("9E- ", Expect::Digit)]
2067 #[case("-0E+\t", Expect::Digit)]
2068 #[case("-0e-e", Expect::Digit)]
2069 #[case("-1E+E", Expect::Digit)]
2070 #[case("-2e+.", Expect::Digit)]
2071 #[case("-3E+!", Expect::Digit)]
2072 #[case("-4e+@", Expect::Digit)]
2073 #[case("-5E+#", Expect::Digit)]
2074 #[case("-6e-$", Expect::Digit)]
2075 #[case("-7E-%", Expect::Digit)]
2076 #[case("-8e-^", Expect::Digit)]
2077 #[case("-9E-&", Expect::Digit)]
2078 #[case("0.1E++", Expect::Digit)]
2079 #[case("0.1e--", Expect::Digit)]
2080 #[case("1.1E+x", Expect::Digit)]
2081 #[case("2.1e+\"", Expect::Digit)]
2082 #[case("3.1E+:", Expect::Digit)]
2083 #[case("4.1e+,", Expect::Digit)]
2084 #[case("5.1E+{", Expect::Digit)]
2085 #[case("6.1e-}", Expect::Digit)]
2086 #[case("7.1E-[", Expect::Digit)]
2087 #[case("8.1e-]", Expect::Digit)]
2088 #[case("9.1E- ", Expect::Digit)]
2089 #[case("-0.234E+\t", Expect::Digit)]
2090 #[case("-0.234e-e", Expect::Digit)]
2091 #[case("-1.234E+E", Expect::Digit)]
2092 #[case("-2.234e+.", Expect::Digit)]
2093 #[case("-3.234E+!", Expect::Digit)]
2094 #[case("-4.234e+@", Expect::Digit)]
2095 #[case("-5.234E+#", Expect::Digit)]
2096 #[case("-6.234e-$", Expect::Digit)]
2097 #[case("-7.234E-%", Expect::Digit)]
2098 #[case("-8.234e-^", Expect::Digit)]
2099 #[case("-9.234E-&", Expect::Digit)]
2100 #[case("0E0e", Expect::DigitOrBoundary)]
2104 #[case("0E+0e", Expect::DigitOrBoundary)]
2105 #[case("0E-0e", Expect::DigitOrBoundary)]
2106 #[case("0.0e0e", Expect::DigitOrBoundary)]
2107 #[case("0.00e00e", Expect::DigitOrBoundary)]
2108 #[case("1.1E+1e", Expect::DigitOrBoundary)]
2109 #[case("11.11E+11e", Expect::DigitOrBoundary)]
2110 #[case("99.999E-999e", Expect::DigitOrBoundary)]
2111 fn test_analyzer_single_error_bad_number(#[case] input: &str, #[case] expect: Expect) {
2112 let mut an = FixedAnalyzer::new(input.as_bytes());
2113
2114 assert_eq!(Token::Err, an.next());
2115 assert_eq!(Pos::default(), *an.pos());
2116
2117 let err = an.err();
2118 assert_eq!(
2119 ErrorKind::UnexpectedByte {
2120 token: Some(Token::Num),
2121 expect,
2122 actual: *input.as_bytes().last().unwrap(),
2123 },
2124 err.kind(),
2125 "input={input:?}"
2126 );
2127 assert_eq!(
2128 Pos {
2129 offset: input.len() - 1,
2130 line: 1,
2131 col: input.len(),
2132 },
2133 *err.pos(),
2134 "input={input:?}"
2135 );
2136
2137 assert_eq!(Token::Err, an.next(), "input={input:?}");
2138 assert_eq!(Pos::default(), *an.pos(), "input={input:?}");
2139 }
2140
2141 #[rstest]
2142 #[case(r#"\0"#, Expect::EscChar)]
2143 #[case(r#"\a"#, Expect::EscChar)]
2144 #[case(r#"\v"#, Expect::EscChar)]
2145 #[case(r#"\x"#, Expect::EscChar)]
2146 #[case(r#"\uG"#, Expect::UnicodeEscHexDigit)]
2147 #[case(r#"\u:"#, Expect::UnicodeEscHexDigit)]
2148 #[case(r#"\u_"#, Expect::UnicodeEscHexDigit)]
2149 #[case(r#"\u0G"#, Expect::UnicodeEscHexDigit)]
2150 #[case(r#"\u1:"#, Expect::UnicodeEscHexDigit)]
2151 #[case(r#"\u2,"#, Expect::UnicodeEscHexDigit)]
2152 #[case(r#"\u3["#, Expect::UnicodeEscHexDigit)]
2153 #[case(r#"\u4]"#, Expect::UnicodeEscHexDigit)]
2154 #[case(r#"\u5{"#, Expect::UnicodeEscHexDigit)]
2155 #[case(r#"\u6}"#, Expect::UnicodeEscHexDigit)]
2156 #[case(r#"\u7."#, Expect::UnicodeEscHexDigit)]
2157 #[case(r#"\u8""#, Expect::UnicodeEscHexDigit)]
2158 #[case(r#"\u9g"#, Expect::UnicodeEscHexDigit)]
2159 #[case(r#"\uAG"#, Expect::UnicodeEscHexDigit)]
2160 #[case(r#"\ua_"#, Expect::UnicodeEscHexDigit)]
2161 #[case(r#"\uB_"#, Expect::UnicodeEscHexDigit)]
2162 #[case(r#"\ub_"#, Expect::UnicodeEscHexDigit)]
2163 #[case(r#"\uC_"#, Expect::UnicodeEscHexDigit)]
2164 #[case(r#"\uc_"#, Expect::UnicodeEscHexDigit)]
2165 #[case(r#"\uD_"#, Expect::UnicodeEscHexDigit)]
2166 #[case(r#"\ud_"#, Expect::UnicodeEscHexDigit)]
2167 #[case(r#"\uE_"#, Expect::UnicodeEscHexDigit)]
2168 #[case(r#"\ue_"#, Expect::UnicodeEscHexDigit)]
2169 #[case(r#"\uF_"#, Expect::UnicodeEscHexDigit)]
2170 #[case(r#"\uf_"#, Expect::UnicodeEscHexDigit)]
2171 #[case(r#"\u1a_"#, Expect::UnicodeEscHexDigit)]
2172 #[case(r#"\ub2C_"#, Expect::UnicodeEscHexDigit)]
2173 #[case(r#"\ud800\ug"#, Expect::UnicodeEscHexDigit)]
2174 #[case(r#"\ud800\u0:"#, Expect::UnicodeEscHexDigit)]
2175 #[case(r#"\ud800\u00:"#, Expect::UnicodeEscHexDigit)]
2176 #[case(r#"\ud800\u000:"#, Expect::UnicodeEscHexDigit)]
2177 fn test_analyzer_single_error_bad_escape(#[case] input: &str, #[case] expect: Expect) {
2178 let mut s = String::with_capacity(1 + input.len());
2179 s.push('"');
2180 s.push_str(input);
2181
2182 let mut an = FixedAnalyzer::new(s.as_bytes());
2183
2184 assert_eq!(Token::Err, an.next());
2185 assert_eq!(Pos::default(), *an.pos());
2186
2187 let err = an.err();
2188 assert_eq!(
2189 ErrorKind::UnexpectedByte {
2190 token: Some(Token::Str),
2191 expect,
2192 actual: *input.as_bytes().last().unwrap(),
2193 },
2194 err.kind(),
2195 "input={input:?}"
2196 );
2197 assert_eq!(
2198 Pos {
2199 offset: s.len() - 1,
2200 line: 1,
2201 col: s.len(),
2202 },
2203 *err.pos(),
2204 "input={input:?}"
2205 );
2206
2207 assert_eq!(Token::Err, an.next(), "input={input:?}");
2208 assert_eq!(Pos::default(), *an.pos(), "input={input:?}");
2209 }
2210
2211 #[rstest]
2212 #[case::nul(0x00)]
2213 #[case::soh(0x01)]
2214 #[case::stx(0x02)]
2215 #[case::etx(0x03)]
2216 #[case::eot(0x04)]
2217 #[case::enq(0x05)]
2218 #[case::ack(0x06)]
2219 #[case::bel(0x07)]
2220 #[case::bs(0x08)]
2221 #[case::ht(0x09)]
2222 #[case::lf(0x0A)]
2223 #[case::vt(0x0B)]
2224 #[case::ff(0x0C)]
2225 #[case::cr(0x0D)]
2226 #[case::so(0x0E)]
2227 #[case::si(0x0F)]
2228 #[case::dle(0x10)]
2229 #[case::dc1(0x11)]
2230 #[case::dc2(0x12)]
2231 #[case::dc3(0x13)]
2232 #[case::dc4(0x14)]
2233 #[case::nak(0x15)]
2234 #[case::syn(0x16)]
2235 #[case::etb(0x17)]
2236 #[case::can(0x18)]
2237 #[case::em(0x19)]
2238 #[case::sub(0x1A)]
2239 #[case::esc(0x1B)]
2240 #[case::fs(0x1C)]
2241 #[case::gs(0x1D)]
2242 #[case::rs(0x1E)]
2243 #[case::us(0x1F)]
2244 fn test_analyzer_single_error_control_char(#[case] ctrl: u8) {
2245 static PREFIXES: [&str; 6] = ["", "a", r#"\u1234"#, "cafΓ©", "π", "π§Ά"];
2246 static COLS: [usize; 6] = [0, 1, 6, 4, 1, 1];
2247 let mut s: String = '"'.into();
2248
2249 for (prefix, cols) in PREFIXES.iter().zip(COLS.iter().copied()) {
2250 s.truncate(1);
2251 s.push_str(prefix);
2252 s.push(ctrl as char);
2253
2254 let mut an = FixedAnalyzer::new(s.as_bytes());
2255
2256 assert_eq!(Token::Err, an.next());
2257 assert_eq!(Pos::default(), *an.pos());
2258
2259 let err = an.err();
2260 assert_eq!(
2261 ErrorKind::UnexpectedByte {
2262 token: Some(Token::Str),
2263 expect: Expect::StrChar,
2264 actual: ctrl,
2265 },
2266 err.kind(),
2267 "s={s:?}"
2268 );
2269 assert_eq!(
2270 Pos {
2271 offset: s.len() - 1,
2272 line: 1,
2273 col: 2 + cols,
2274 },
2275 *err.pos(),
2276 "s={s:?}"
2277 );
2278 }
2279 }
2280
2281 #[rstest]
2282 #[case("f", 'a', Token::LitFalse)]
2283 #[case("fa", 'l', Token::LitFalse)]
2284 #[case("fal", 's', Token::LitFalse)]
2285 #[case("fals", 'e', Token::LitFalse)]
2286 #[case("n", 'u', Token::LitNull)]
2287 #[case("nu", 'l', Token::LitNull)]
2288 #[case("nul", 'l', Token::LitNull)]
2289 #[case("t", 'r', Token::LitTrue)]
2290 #[case("tr", 'u', Token::LitTrue)]
2291 #[case("tru", 'e', Token::LitTrue)]
2292 fn test_analyzer_single_error_expect_char(
2293 #[case] input: &str,
2294 #[case] expect: char,
2295 #[case] expect_token: Token,
2296 ) {
2297 let bad_chars = &[
2298 b'[', b']', b':', b'{', b'}', b',', b'"', b'\\', b'$', b' ', b'\0', b'\t', b'A', b'x',
2299 b'X', b'0', b'9',
2300 ];
2301 let mut buf = Vec::with_capacity(input.len() + 1);
2302 buf.extend_from_slice(input.as_bytes());
2303 buf.push(b'_');
2304
2305 for (i, actual) in bad_chars.into_iter().enumerate() {
2306 buf[input.len()] = *actual;
2307
2308 let mut an = FixedAnalyzer::new(buf.clone());
2309
2310 assert_eq!(Token::Err, an.next());
2311 assert_eq!(Pos::default(), *an.pos());
2312
2313 let err = an.err();
2314 assert_eq!(
2315 ErrorKind::UnexpectedByte {
2316 token: Some(expect_token),
2317 expect: Expect::Char(expect),
2318 actual: *actual,
2319 },
2320 err.kind(),
2321 "input={input:?}, i={i}, actual={actual:02x}"
2322 );
2323 assert_eq!(
2324 Pos {
2325 offset: input.len(),
2326 line: 1,
2327 col: buf.len(),
2328 },
2329 *err.pos(),
2330 "input={input:?}, i={i}, actual={actual:02x}"
2331 );
2332
2333 assert_eq!(
2334 Token::Err,
2335 an.next(),
2336 "input={input:?}, i={i}, actual={actual:02x}"
2337 );
2338 assert_eq!(
2339 Pos::default(),
2340 *an.pos(),
2341 "input={input:?}, i={i}, actual={actual:02x}"
2342 );
2343 }
2344 }
2345
2346 #[rstest]
2347 #[case("falsep", Token::LitFalse)]
2348 #[case("nullE", Token::LitNull)]
2349 #[case("true0", Token::LitTrue)]
2350 fn test_analyzer_single_error_expect_boundary(
2351 #[case] input: &str,
2352 #[case] expect_token: Token,
2353 ) {
2354 let actual = input.as_bytes().last().copied().unwrap();
2355 let mut an = FixedAnalyzer::new(input.as_bytes());
2356
2357 assert_eq!(Token::Err, an.next());
2358 assert_eq!(Pos::default(), *an.pos());
2359
2360 let err = an.err();
2361 assert_eq!(
2362 ErrorKind::UnexpectedByte {
2363 token: Some(expect_token),
2364 expect: Expect::Boundary,
2365 actual,
2366 },
2367 err.kind(),
2368 );
2369 assert_eq!(
2370 Pos {
2371 offset: input.len() - 1,
2372 line: 1,
2373 col: input.len(),
2374 },
2375 *err.pos(),
2376 );
2377 }
2378
2379 #[rstest]
2380 #[case(r#"f"#, Token::LitFalse)]
2381 #[case(r#"fa"#, Token::LitFalse)]
2382 #[case(r#"fal"#, Token::LitFalse)]
2383 #[case(r#"n"#, Token::LitNull)]
2384 #[case(r#"nu"#, Token::LitNull)]
2385 #[case(r#"nul"#, Token::LitNull)]
2386 #[case(r#"-"#, Token::Num)]
2387 #[case(r#"0."#, Token::Num)]
2388 #[case(r#"1."#, Token::Num)]
2389 #[case(r#"2."#, Token::Num)]
2390 #[case(r#"3."#, Token::Num)]
2391 #[case(r#"4."#, Token::Num)]
2392 #[case(r#"5."#, Token::Num)]
2393 #[case(r#"6."#, Token::Num)]
2394 #[case(r#"7."#, Token::Num)]
2395 #[case(r#"8."#, Token::Num)]
2396 #[case(r#"9."#, Token::Num)]
2397 #[case(r#"10."#, Token::Num)]
2398 #[case(r#"0E"#, Token::Num)]
2399 #[case(r#"0E+"#, Token::Num)]
2400 #[case(r#"0E-"#, Token::Num)]
2401 #[case(r#"0e"#, Token::Num)]
2402 #[case(r#"0e+"#, Token::Num)]
2403 #[case(r#"0e-"#, Token::Num)]
2404 #[case(r#"1.0E"#, Token::Num)]
2405 #[case(r#"1.0E+"#, Token::Num)]
2406 #[case(r#"1.0E-"#, Token::Num)]
2407 #[case(r#"1.0e"#, Token::Num)]
2408 #[case(r#"1.0e+"#, Token::Num)]
2409 #[case(r#"1.0e-"#, Token::Num)]
2410 #[case(r#"""#, Token::Str)]
2411 #[case(r#""a"#, Token::Str)]
2412 #[case(r#""\"#, Token::Str)]
2413 #[case(r#""\u"#, Token::Str)]
2414 #[case(r#""\u1"#, Token::Str)]
2415 #[case(r#""\u12"#, Token::Str)]
2416 #[case(r#""\u123"#, Token::Str)]
2417 #[case(r#""\u1234"#, Token::Str)]
2418 #[case(r#""\u1234 foo bar"#, Token::Str)]
2419 #[case(r#"t"#, Token::LitTrue)]
2420 #[case(r#"tr"#, Token::LitTrue)]
2421 #[case(r#"tru"#, Token::LitTrue)]
2422 fn test_analyzer_single_error_unexpected_eof(#[case] input: &str, #[case] expect: Token) {
2423 {
2425 let mut an = FixedAnalyzer::new(input.as_bytes());
2426 assert_eq!(Pos::default(), *an.pos());
2427
2428 assert_eq!(Token::Err, an.next());
2429 assert_eq!(Pos::default(), *an.pos());
2430
2431 let err = an.err();
2432 assert_eq!(
2433 ErrorKind::UnexpectedEof(expect),
2434 err.kind(),
2435 "input = {input:?}, expect = {expect:?}"
2436 );
2437 assert_eq!(
2438 Pos {
2439 offset: input.len(),
2440 line: 1,
2441 col: 1 + input.len(),
2442 },
2443 *err.pos(),
2444 "input = {input:?}, expect = {expect:?}"
2445 );
2446
2447 assert_eq!(Token::Err, an.next());
2448 assert_eq!(Pos::default(), *an.pos());
2449 }
2450
2451 {
2453 let mut an = FixedAnalyzer::new(input.as_bytes());
2454 assert_eq!(Pos::default(), *an.pos());
2455
2456 assert_eq!(Token::Err, an.next());
2457 assert_eq!(Pos::default(), *an.pos());
2458
2459 assert_eq!(Token::Err, an.next());
2460 assert_eq!(Pos::default(), *an.pos());
2461 }
2462 }
2463
2464 #[rstest]
2465 #[case(0x00)]
2466 #[case(0x01)]
2467 #[case(0x02)]
2468 #[case(0x03)]
2469 #[case(0x04)]
2470 #[case(0x05)]
2471 #[case(0x06)]
2472 #[case(0x07)]
2473 #[case(0x08)]
2474 #[case(0x0b)]
2475 #[case(0x0c)]
2476 #[case(0x0e)]
2477 #[case(0x0f)]
2478 #[case(0x10)]
2479 #[case(0x11)]
2480 #[case(0x12)]
2481 #[case(0x13)]
2482 #[case(0x14)]
2483 #[case(0x15)]
2484 #[case(0x16)]
2485 #[case(0x17)]
2486 #[case(0x18)]
2487 #[case(0x19)]
2488 #[case(0x1a)]
2489 #[case(0x1b)]
2490 #[case(0x1c)]
2491 #[case(0x1d)]
2492 #[case(0x1e)]
2493 #[case(0x1f)]
2494 #[case(b'\'')]
2495 #[case(b'+')]
2496 #[case(b'.')]
2497 #[case(b'E')]
2498 #[case(b'\\')]
2499 #[case(b'e')]
2500 #[case(0x7f)]
2501 #[case(0x80)]
2502 #[case(0xbf)]
2503 #[case(0xc0)]
2504 #[case(0xc7)]
2505 #[case(0xcf)]
2506 #[case(0xd0)]
2507 #[case(0xd7)]
2508 #[case(0xdf)]
2509 #[case(0xe0)]
2510 #[case(0xe7)]
2511 #[case(0xef)]
2512 #[case(0xf0)]
2513 #[case(0xf7)]
2514 #[case(0xff)]
2515 fn test_analyzer_error_non_token_start(#[case] bad: u8) {
2516 {
2518 let mut an = FixedAnalyzer::new(vec![bad]);
2519 assert_eq!(Pos::default(), *an.pos());
2520
2521 assert_eq!(Token::Err, an.next());
2522 assert_eq!(Pos::default(), *an.pos());
2523
2524 let err = an.err();
2525 assert_eq!(
2526 ErrorKind::UnexpectedByte {
2527 token: None,
2528 expect: Expect::TokenStartChar,
2529 actual: bad
2530 },
2531 err.kind(),
2532 "bad = {bad:02x}"
2533 );
2534 assert_eq!(Pos::default(), *err.pos(), "bad = {bad:02x}");
2535
2536 assert_eq!(Token::Err, an.next());
2537 assert_eq!(Pos::default(), *an.pos());
2538 }
2539
2540 {
2542 let valid_list = [
2543 "[",
2544 "]",
2545 "false ",
2546 "null ",
2547 "1 ",
2548 "{",
2549 "}",
2550 r#""a""#,
2551 r#""\u0000 foo \\//""#,
2552 "true\t",
2553 ];
2554
2555 for (i, valid) in valid_list.into_iter().enumerate() {
2556 let mut buf: Vec<u8> = Vec::with_capacity(valid.len() + 1);
2557 buf.extend_from_slice(valid.as_bytes());
2558 buf.push(bad);
2559
2560 let mut an = FixedAnalyzer::new(buf);
2561
2562 let token = an.next();
2563 assert!(
2564 !token.is_terminal(),
2565 "valid = {valid:?}, i = {i}, bad = {bad:02x}"
2566 );
2567 if token.is_literal() || token == Token::Num {
2568 assert_eq!(
2569 Token::White,
2570 an.next(),
2571 "valid = {valid:?}, i = {i}, bad = {bad:02x}"
2572 );
2573 }
2574
2575 assert_eq!(Token::Err, an.next());
2576 let err = an.err();
2577 assert_eq!(
2578 ErrorKind::UnexpectedByte {
2579 token: None,
2580 expect: Expect::TokenStartChar,
2581 actual: bad
2582 },
2583 err.kind(),
2584 "valid = {valid:?}, i = {i}, bad = {bad:02x}"
2585 );
2586 assert_eq!(
2587 Pos {
2588 offset: valid.len(),
2589 line: 1,
2590 col: 1 + valid.len(),
2591 },
2592 *err.pos(),
2593 "valid = {valid:?}, i = {i}, bad = {bad:02x}"
2594 );
2595 }
2596 }
2597 }
2598
2599 #[rstest]
2600 #[case(br#"123.456789:a"#)]
2601 #[case(br#"<"#)]
2602 #[case(br#""foo" "bar" "baz"#)]
2603 #[should_panic(
2604 expected = "no content: last `next()` returned `Token::Err` (use `err()` instead)"
2605 )]
2606 fn test_analyzer_panic_no_content(#[case] input: &[u8]) {
2607 let mut an = FixedAnalyzer::new(input);
2608
2609 loop {
2610 if an.next() == Token::Err {
2611 break;
2612 }
2613 }
2614
2615 let _ = an.content();
2616 }
2617
2618 #[test]
2619 fn test_analyzer_smoke() {
2620 const JSON_TEXT: &str = r#"{
2621 "foo":["bar",1,5e-7, false, null ,true, {"baz":"\\\"aÒÒbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb©’çc\"\\","qux":[{},{},null]}],
2622 "Lorem ipsum dolor sit amet, consectetur adipiscing elit." : "Cras sed ipsum at arcu porta blandit. Nunc eu mauris lacus. Vivamus dignissim tincidunt gravida. Fusce quis neque enim. Sed ac leo neque. Praesent feugiat efficitur eros, quis venenatis urna porttitor condimentum. Mauris finibus dui non vulputate mattis. Nullam scelerisque nibh vel dui egestas luctus. Vestibulum commodo mi ex. In laoreet hendrerit fringilla.\n\nPraesent vel ex sed dolor fermentum lobortis.",
2623 "π": ["π","π", "π", "γγγ«γ‘γ―γδΈη"],
2624 "abc\u0020123": {{{"inner":[[[-1,-2.0,-3.00e+0,-4E-0,3.141592653589793238462643383279,null]]]}}}
2625}"#;
2626 const EXPECT: &[(Token, Pos, &str, Option<&str>)] = &[
2627 (
2629 Token::ObjBegin,
2630 Pos {
2631 offset: 0,
2632 line: 1,
2633 col: 1,
2634 },
2635 "{",
2636 None,
2637 ),
2638 (
2639 Token::White,
2640 Pos {
2641 offset: 1,
2642 line: 1,
2643 col: 2,
2644 },
2645 "\n ",
2646 None,
2647 ),
2648 (
2650 Token::Str,
2651 Pos {
2652 offset: 4,
2653 line: 2,
2654 col: 3,
2655 },
2656 r#""foo""#,
2657 None,
2658 ),
2659 (
2660 Token::NameSep,
2661 Pos {
2662 offset: 9,
2663 line: 2,
2664 col: 8,
2665 },
2666 ":",
2667 None,
2668 ),
2669 (
2670 Token::ArrBegin,
2671 Pos {
2672 offset: 10,
2673 line: 2,
2674 col: 9,
2675 },
2676 "[",
2677 None,
2678 ),
2679 (
2680 Token::Str,
2681 Pos {
2682 offset: 11,
2683 line: 2,
2684 col: 10,
2685 },
2686 r#""bar""#,
2687 None,
2688 ),
2689 (
2690 Token::ValueSep,
2691 Pos {
2692 offset: 16,
2693 line: 2,
2694 col: 15,
2695 },
2696 ",",
2697 None,
2698 ),
2699 (
2700 Token::Num,
2701 Pos {
2702 offset: 17,
2703 line: 2,
2704 col: 16,
2705 },
2706 "1",
2707 None,
2708 ),
2709 (
2710 Token::ValueSep,
2711 Pos {
2712 offset: 18,
2713 line: 2,
2714 col: 17,
2715 },
2716 ",",
2717 None,
2718 ),
2719 (
2720 Token::Num,
2721 Pos {
2722 offset: 19,
2723 line: 2,
2724 col: 18,
2725 },
2726 "5e-7",
2727 None,
2728 ),
2729 (
2730 Token::ValueSep,
2731 Pos {
2732 offset: 23,
2733 line: 2,
2734 col: 22,
2735 },
2736 ",",
2737 None,
2738 ),
2739 (
2740 Token::White,
2741 Pos {
2742 offset: 24,
2743 line: 2,
2744 col: 23,
2745 },
2746 " ",
2747 None,
2748 ),
2749 (
2750 Token::LitFalse,
2751 Pos {
2752 offset: 25,
2753 line: 2,
2754 col: 24,
2755 },
2756 "false",
2757 None,
2758 ),
2759 (
2760 Token::ValueSep,
2761 Pos {
2762 offset: 30,
2763 line: 2,
2764 col: 29,
2765 },
2766 ",",
2767 None,
2768 ),
2769 (
2770 Token::White,
2771 Pos {
2772 offset: 31,
2773 line: 2,
2774 col: 30,
2775 },
2776 " ",
2777 None,
2778 ),
2779 (
2780 Token::LitNull,
2781 Pos {
2782 offset: 32,
2783 line: 2,
2784 col: 31,
2785 },
2786 "null",
2787 None,
2788 ),
2789 (
2790 Token::White,
2791 Pos {
2792 offset: 36,
2793 line: 2,
2794 col: 35,
2795 },
2796 " ",
2797 None,
2798 ),
2799 (
2800 Token::ValueSep,
2801 Pos {
2802 offset: 38,
2803 line: 2,
2804 col: 37,
2805 },
2806 ",",
2807 None,
2808 ),
2809 (
2810 Token::LitTrue,
2811 Pos {
2812 offset: 39,
2813 line: 2,
2814 col: 38,
2815 },
2816 "true",
2817 None,
2818 ),
2819 (
2820 Token::ValueSep,
2821 Pos {
2822 offset: 43,
2823 line: 2,
2824 col: 42,
2825 },
2826 ",",
2827 None,
2828 ),
2829 (
2830 Token::White,
2831 Pos {
2832 offset: 44,
2833 line: 2,
2834 col: 43,
2835 },
2836 " ",
2837 None,
2838 ),
2839 (
2840 Token::ObjBegin,
2841 Pos {
2842 offset: 45,
2843 line: 2,
2844 col: 44,
2845 },
2846 "{",
2847 None,
2848 ),
2849 (
2850 Token::Str,
2851 Pos {
2852 offset: 46,
2853 line: 2,
2854 col: 45,
2855 },
2856 r#""baz""#,
2857 None,
2858 ),
2859 (
2860 Token::NameSep,
2861 Pos {
2862 offset: 51,
2863 line: 2,
2864 col: 50,
2865 },
2866 ":",
2867 None,
2868 ),
2869 (
2870 Token::Str,
2871 Pos {
2872 offset: 52,
2873 line: 2,
2874 col: 51,
2875 },
2876 r#""\\\"aÒÒbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb©’çc\"\\""#,
2877 Some(
2878 r#""\"aÒÒbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb©’çc"\""#,
2879 ),
2880 ),
2881 (
2882 Token::ValueSep,
2883 Pos {
2884 offset: 149,
2885 line: 2,
2886 col: 143,
2887 },
2888 ",",
2889 None,
2890 ),
2891 (
2892 Token::Str,
2893 Pos {
2894 offset: 150,
2895 line: 2,
2896 col: 144,
2897 },
2898 r#""qux""#,
2899 None,
2900 ),
2901 (
2902 Token::NameSep,
2903 Pos {
2904 offset: 155,
2905 line: 2,
2906 col: 149,
2907 },
2908 ":",
2909 None,
2910 ),
2911 (
2912 Token::ArrBegin,
2913 Pos {
2914 offset: 156,
2915 line: 2,
2916 col: 150,
2917 },
2918 "[",
2919 None,
2920 ),
2921 (
2922 Token::ObjBegin,
2923 Pos {
2924 offset: 157,
2925 line: 2,
2926 col: 151,
2927 },
2928 "{",
2929 None,
2930 ),
2931 (
2932 Token::ObjEnd,
2933 Pos {
2934 offset: 158,
2935 line: 2,
2936 col: 152,
2937 },
2938 "}",
2939 None,
2940 ),
2941 (
2942 Token::ValueSep,
2943 Pos {
2944 offset: 159,
2945 line: 2,
2946 col: 153,
2947 },
2948 ",",
2949 None,
2950 ),
2951 (
2952 Token::ObjBegin,
2953 Pos {
2954 offset: 160,
2955 line: 2,
2956 col: 154,
2957 },
2958 "{",
2959 None,
2960 ),
2961 (
2962 Token::ObjEnd,
2963 Pos {
2964 offset: 161,
2965 line: 2,
2966 col: 155,
2967 },
2968 "}",
2969 None,
2970 ),
2971 (
2972 Token::ValueSep,
2973 Pos {
2974 offset: 162,
2975 line: 2,
2976 col: 156,
2977 },
2978 ",",
2979 None,
2980 ),
2981 (
2982 Token::LitNull,
2983 Pos {
2984 offset: 163,
2985 line: 2,
2986 col: 157,
2987 },
2988 "null",
2989 None,
2990 ),
2991 (
2992 Token::ArrEnd,
2993 Pos {
2994 offset: 167,
2995 line: 2,
2996 col: 161,
2997 },
2998 "]",
2999 None,
3000 ),
3001 (
3002 Token::ObjEnd,
3003 Pos {
3004 offset: 168,
3005 line: 2,
3006 col: 162,
3007 },
3008 "}",
3009 None,
3010 ),
3011 (
3012 Token::ArrEnd,
3013 Pos {
3014 offset: 169,
3015 line: 2,
3016 col: 163,
3017 },
3018 "]",
3019 None,
3020 ),
3021 (
3022 Token::ValueSep,
3023 Pos {
3024 offset: 170,
3025 line: 2,
3026 col: 164,
3027 },
3028 ",",
3029 None,
3030 ),
3031 (
3032 Token::White,
3033 Pos {
3034 offset: 171,
3035 line: 2,
3036 col: 165,
3037 },
3038 "\n ",
3039 None,
3040 ),
3041 (
3043 Token::Str,
3044 Pos {
3045 offset: 174,
3046 line: 3,
3047 col: 3,
3048 },
3049 r#""Lorem ipsum dolor sit amet, consectetur adipiscing elit.""#,
3050 None,
3051 ),
3052 (
3053 Token::White,
3054 Pos {
3055 offset: 232,
3056 line: 3,
3057 col: 61,
3058 },
3059 " ",
3060 None,
3061 ),
3062 (
3063 Token::NameSep,
3064 Pos {
3065 offset: 233,
3066 line: 3,
3067 col: 62,
3068 },
3069 ":",
3070 None,
3071 ),
3072 (
3073 Token::White,
3074 Pos {
3075 offset: 234,
3076 line: 3,
3077 col: 63,
3078 },
3079 " ",
3080 None,
3081 ),
3082 (
3083 Token::Str,
3084 Pos {
3085 offset: 235,
3086 line: 3,
3087 col: 64,
3088 },
3089 r#""Cras sed ipsum at arcu porta blandit. Nunc eu mauris lacus. Vivamus dignissim tincidunt gravida. Fusce quis neque enim. Sed ac leo neque. Praesent feugiat efficitur eros, quis venenatis urna porttitor condimentum. Mauris finibus dui non vulputate mattis. Nullam scelerisque nibh vel dui egestas luctus. Vestibulum commodo mi ex. In laoreet hendrerit fringilla.\n\nPraesent vel ex sed dolor fermentum lobortis.""#,
3090 Some(
3091 r#""Cras sed ipsum at arcu porta blandit. Nunc eu mauris lacus. Vivamus dignissim tincidunt gravida. Fusce quis neque enim. Sed ac leo neque. Praesent feugiat efficitur eros, quis venenatis urna porttitor condimentum. Mauris finibus dui non vulputate mattis. Nullam scelerisque nibh vel dui egestas luctus. Vestibulum commodo mi ex. In laoreet hendrerit fringilla.
3092
3093Praesent vel ex sed dolor fermentum lobortis.""#,
3094 ),
3095 ),
3096 (
3097 Token::ValueSep,
3098 Pos {
3099 offset: 646,
3100 line: 3,
3101 col: 475,
3102 },
3103 ",",
3104 None,
3105 ),
3106 (
3107 Token::White,
3108 Pos {
3109 offset: 647,
3110 line: 3,
3111 col: 476,
3112 },
3113 "\n ",
3114 None,
3115 ),
3116 (
3118 Token::Str,
3119 Pos {
3120 offset: 650,
3121 line: 4,
3122 col: 3,
3123 },
3124 r#""π""#,
3125 None,
3126 ),
3127 (
3128 Token::NameSep,
3129 Pos {
3130 offset: 656,
3131 line: 4,
3132 col: 6,
3133 },
3134 ":",
3135 None,
3136 ),
3137 (
3138 Token::White,
3139 Pos {
3140 offset: 657,
3141 line: 4,
3142 col: 7,
3143 },
3144 " ",
3145 None,
3146 ),
3147 (
3148 Token::ArrBegin,
3149 Pos {
3150 offset: 660,
3151 line: 4,
3152 col: 10,
3153 },
3154 "[",
3155 None,
3156 ),
3157 (
3158 Token::Str,
3159 Pos {
3160 offset: 661,
3161 line: 4,
3162 col: 11,
3163 },
3164 r#""π""#,
3165 None,
3166 ),
3167 (
3168 Token::ValueSep,
3169 Pos {
3170 offset: 667,
3171 line: 4,
3172 col: 14,
3173 },
3174 ",",
3175 None,
3176 ),
3177 (
3178 Token::Str,
3179 Pos {
3180 offset: 668,
3181 line: 4,
3182 col: 15,
3183 },
3184 r#""π""#,
3185 None,
3186 ),
3187 (
3188 Token::ValueSep,
3189 Pos {
3190 offset: 674,
3191 line: 4,
3192 col: 18,
3193 },
3194 ",",
3195 None,
3196 ),
3197 (
3198 Token::White,
3199 Pos {
3200 offset: 675,
3201 line: 4,
3202 col: 19,
3203 },
3204 " ",
3205 None,
3206 ),
3207 (
3208 Token::Str,
3209 Pos {
3210 offset: 676,
3211 line: 4,
3212 col: 20,
3213 },
3214 r#""π""#,
3215 None,
3216 ),
3217 (
3218 Token::ValueSep,
3219 Pos {
3220 offset: 682,
3221 line: 4,
3222 col: 23,
3223 },
3224 ",",
3225 None,
3226 ),
3227 (
3228 Token::White,
3229 Pos {
3230 offset: 683,
3231 line: 4,
3232 col: 24,
3233 },
3234 " ",
3235 None,
3236 ),
3237 (
3238 Token::Str,
3239 Pos {
3240 offset: 684,
3241 line: 4,
3242 col: 25,
3243 },
3244 r#""γγγ«γ‘γ―γδΈη""#,
3245 None,
3246 ),
3247 (
3248 Token::ArrEnd,
3249 Pos {
3250 offset: 710,
3251 line: 4,
3252 col: 35,
3253 },
3254 "]",
3255 None,
3256 ),
3257 (
3258 Token::ValueSep,
3259 Pos {
3260 offset: 711,
3261 line: 4,
3262 col: 36,
3263 },
3264 ",",
3265 None,
3266 ),
3267 (
3268 Token::White,
3269 Pos {
3270 offset: 712,
3271 line: 4,
3272 col: 37,
3273 },
3274 "\n ",
3275 None,
3276 ),
3277 (
3279 Token::Str,
3280 Pos {
3281 offset: 715,
3282 line: 5,
3283 col: 3,
3284 },
3285 r#""abc\u0020123""#,
3286 Some(r#""abc 123""#),
3287 ),
3288 (
3289 Token::NameSep,
3290 Pos {
3291 offset: 729,
3292 line: 5,
3293 col: 17,
3294 },
3295 ":",
3296 None,
3297 ),
3298 (
3299 Token::White,
3300 Pos {
3301 offset: 730,
3302 line: 5,
3303 col: 18,
3304 },
3305 " ",
3306 None,
3307 ),
3308 (
3309 Token::ObjBegin,
3310 Pos {
3311 offset: 731,
3312 line: 5,
3313 col: 19,
3314 },
3315 "{",
3316 None,
3317 ),
3318 (
3319 Token::ObjBegin,
3320 Pos {
3321 offset: 732,
3322 line: 5,
3323 col: 20,
3324 },
3325 "{",
3326 None,
3327 ),
3328 (
3329 Token::ObjBegin,
3330 Pos {
3331 offset: 733,
3332 line: 5,
3333 col: 21,
3334 },
3335 "{",
3336 None,
3337 ),
3338 (
3339 Token::Str,
3340 Pos {
3341 offset: 734,
3342 line: 5,
3343 col: 22,
3344 },
3345 r#""inner""#,
3346 None,
3347 ),
3348 (
3349 Token::NameSep,
3350 Pos {
3351 offset: 741,
3352 line: 5,
3353 col: 29,
3354 },
3355 ":",
3356 None,
3357 ),
3358 (
3359 Token::ArrBegin,
3360 Pos {
3361 offset: 742,
3362 line: 5,
3363 col: 30,
3364 },
3365 "[",
3366 None,
3367 ),
3368 (
3369 Token::ArrBegin,
3370 Pos {
3371 offset: 743,
3372 line: 5,
3373 col: 31,
3374 },
3375 "[",
3376 None,
3377 ),
3378 (
3379 Token::ArrBegin,
3380 Pos {
3381 offset: 744,
3382 line: 5,
3383 col: 32,
3384 },
3385 "[",
3386 None,
3387 ),
3388 (
3389 Token::Num,
3390 Pos {
3391 offset: 745,
3392 line: 5,
3393 col: 33,
3394 },
3395 "-1",
3396 None,
3397 ),
3398 (
3399 Token::ValueSep,
3400 Pos {
3401 offset: 747,
3402 line: 5,
3403 col: 35,
3404 },
3405 ",",
3406 None,
3407 ),
3408 (
3409 Token::Num,
3410 Pos {
3411 offset: 748,
3412 line: 5,
3413 col: 36,
3414 },
3415 "-2.0",
3416 None,
3417 ),
3418 (
3419 Token::ValueSep,
3420 Pos {
3421 offset: 752,
3422 line: 5,
3423 col: 40,
3424 },
3425 ",",
3426 None,
3427 ),
3428 (
3429 Token::Num,
3430 Pos {
3431 offset: 753,
3432 line: 5,
3433 col: 41,
3434 },
3435 "-3.00e+0",
3436 None,
3437 ),
3438 (
3439 Token::ValueSep,
3440 Pos {
3441 offset: 761,
3442 line: 5,
3443 col: 49,
3444 },
3445 ",",
3446 None,
3447 ),
3448 (
3449 Token::Num,
3450 Pos {
3451 offset: 762,
3452 line: 5,
3453 col: 50,
3454 },
3455 "-4E-0",
3456 None,
3457 ),
3458 (
3459 Token::ValueSep,
3460 Pos {
3461 offset: 767,
3462 line: 5,
3463 col: 55,
3464 },
3465 ",",
3466 None,
3467 ),
3468 (
3469 Token::Num,
3470 Pos {
3471 offset: 768,
3472 line: 5,
3473 col: 56,
3474 },
3475 "3.141592653589793238462643383279",
3476 None,
3477 ),
3478 (
3479 Token::ValueSep,
3480 Pos {
3481 offset: 800,
3482 line: 5,
3483 col: 88,
3484 },
3485 ",",
3486 None,
3487 ),
3488 (
3489 Token::LitNull,
3490 Pos {
3491 offset: 801,
3492 line: 5,
3493 col: 89,
3494 },
3495 "null",
3496 None,
3497 ),
3498 (
3499 Token::ArrEnd,
3500 Pos {
3501 offset: 805,
3502 line: 5,
3503 col: 93,
3504 },
3505 "]",
3506 None,
3507 ),
3508 (
3509 Token::ArrEnd,
3510 Pos {
3511 offset: 806,
3512 line: 5,
3513 col: 94,
3514 },
3515 "]",
3516 None,
3517 ),
3518 (
3519 Token::ArrEnd,
3520 Pos {
3521 offset: 807,
3522 line: 5,
3523 col: 95,
3524 },
3525 "]",
3526 None,
3527 ),
3528 (
3529 Token::ObjEnd,
3530 Pos {
3531 offset: 808,
3532 line: 5,
3533 col: 96,
3534 },
3535 "}",
3536 None,
3537 ),
3538 (
3539 Token::ObjEnd,
3540 Pos {
3541 offset: 809,
3542 line: 5,
3543 col: 97,
3544 },
3545 "}",
3546 None,
3547 ),
3548 (
3549 Token::ObjEnd,
3550 Pos {
3551 offset: 810,
3552 line: 5,
3553 col: 98,
3554 },
3555 "}",
3556 None,
3557 ),
3558 (
3559 Token::White,
3560 Pos {
3561 offset: 811,
3562 line: 5,
3563 col: 99,
3564 },
3565 "\n",
3566 None,
3567 ),
3568 (
3570 Token::ObjEnd,
3571 Pos {
3572 offset: 812,
3573 line: 6,
3574 col: 1,
3575 },
3576 "}",
3577 None,
3578 ),
3579 (
3580 Token::Eof,
3581 Pos {
3582 offset: 813,
3583 line: 6,
3584 col: 2,
3585 },
3586 "",
3587 None,
3588 ),
3589 (
3590 Token::Eof,
3591 Pos {
3592 offset: 813,
3593 line: 6,
3594 col: 2,
3595 },
3596 "",
3597 None,
3598 ),
3599 (
3600 Token::Eof,
3601 Pos {
3602 offset: 813,
3603 line: 6,
3604 col: 2,
3605 },
3606 "",
3607 None,
3608 ),
3609 ];
3610
3611 let mut an = FixedAnalyzer::new(JSON_TEXT.as_bytes());
3612
3613 for (i, (expect_token, expect_pos, expect_literal, expect_unescaped)) in
3614 EXPECT.iter().enumerate()
3615 {
3616 let actual_token = an.next();
3617 let actual_pos = *an.pos();
3618 let content = an.content();
3619
3620 assert_eq!(
3621 *expect_token, actual_token,
3622 "i = {i}, actual_pos = {actual_pos}, expect_pos = {expect_pos}"
3623 );
3624 assert_eq!(
3625 *expect_pos, actual_pos,
3626 "i = {i}, token = {actual_token}, content = {content}"
3627 );
3628 assert_eq!(
3629 *expect_literal,
3630 content.literal(),
3631 "i = {i}, token = {actual_token}"
3632 );
3633 if let Some(u) = expect_unescaped {
3634 assert!(
3635 content.is_escaped(),
3636 "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
3637 );
3638 assert_eq!(*u, content.unescaped());
3639 } else {
3640 assert!(
3641 !content.is_escaped(),
3642 "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
3643 );
3644 assert_eq!(*expect_literal, content.unescaped());
3645 }
3646 }
3647 }
3648
3649 fn inline_buf<const N: usize>(src: &[u8; N]) -> (u8, InlineBuf) {
3650 assert!(N <= INLINE_LEN);
3651 let mut dst = [0; INLINE_LEN];
3652 dst[0..N].copy_from_slice(src);
3653
3654 (u8::try_from(N).unwrap(), dst)
3655 }
3656
3657 impl From<(u8, InlineBuf)> for InnerContent<Vec<u8>> {
3658 fn from(value: (u8, InlineBuf)) -> Self {
3659 InnerContent::Inline(value.0, value.1)
3660 }
3661 }
3662}