1use crate::{
4 Buf, BufUnderflow, EqStr, IntoBuf, OrdStr, Pos,
5 lexical::{
6 self, state, {Analyzer, ErrorKind, Token, Unescaped},
7 },
8 syntax,
9};
10use std::{
11 borrow::Cow,
12 cmp::{Ordering, min},
13 collections::VecDeque,
14 convert::Infallible,
15 fmt,
16 hash::{Hash, Hasher},
17 io::{self, Read},
18 ops::Range,
19 str::FromStr,
20 sync::Arc,
21};
22
23#[cfg(test)]
27const INLINE_LEN: usize = 4;
28#[cfg(not(test))]
29const INLINE_LEN: usize = 21;
30
31type InlineBuf = [u8; INLINE_LEN];
32
33#[derive(Debug, Clone)]
34struct UniRef {
35 buf: Arc<Vec<u8>>,
36 rng: Range<u32>,
37}
38
39impl UniRef {
40 fn new(buf: Arc<Vec<u8>>, rng: Range<u32>) -> Self {
41 debug_assert!(rng.start <= rng.end);
42 debug_assert!(rng.end as usize <= buf.len());
43
44 Self { buf, rng }
45 }
46
47 #[cfg(test)]
48 fn test_new(buf: impl Into<Vec<u8>>, rng: Range<u32>) -> Self {
49 Self::new(Arc::new(buf.into()), rng)
50 }
51}
52
53impl Buf for UniRef {
54 fn advance(&mut self, n: usize) {
55 if self.remaining() < n {
56 panic!(
57 "{}",
58 &BufUnderflow {
59 requested: n,
60 remaining: self.remaining(),
61 }
62 );
63 } else {
64 debug_assert!(n <= u32::MAX as usize);
65 self.rng.start += n as u32;
66 }
67 }
68
69 fn chunk(&self) -> &[u8] {
70 &self.buf[self.rng.start as usize..self.rng.end as usize]
71 }
72
73 fn remaining(&self) -> usize {
74 (self.rng.end - self.rng.start) as usize
75 }
76
77 fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
78 if self.remaining() < dst.len() {
79 Err(BufUnderflow {
80 requested: dst.len(),
81 remaining: self.remaining(),
82 })
83 } else {
84 let start = self.rng.start as usize;
85 dst.copy_from_slice(&self.buf[start..start + dst.len()]);
86 self.rng.start += dst.len() as u32;
87
88 Ok(())
89 }
90 }
91}
92
93impl IntoBuf for UniRef {
94 type Buf = Self;
95
96 fn into_buf(self) -> Self::Buf {
97 self
98 }
99}
100
101#[derive(Debug, Clone)]
102struct MultiRef {
103 bufs: Arc<Vec<Arc<Vec<u8>>>>,
107 buf: usize,
111 off: usize,
112 rng: Range<usize>,
113}
114
115impl MultiRef {
116 fn new(bufs: Arc<Vec<Arc<Vec<u8>>>>, rng: Range<usize>) -> Self {
117 #[cfg(debug_assertions)]
118 {
119 debug_assert!(rng.start <= rng.end);
120 let len = bufs.iter().map(|v| v.len()).sum();
121 debug_assert!(
122 rng.end <= len,
123 "rng.end ({}) must not exceed total length of buffers ({})",
124 rng.end,
125 len
126 );
127 bufs.iter()
128 .take(bufs.len().saturating_sub(1))
129 .enumerate()
130 .for_each(|(i, buf)| {
131 debug_assert!(!buf.is_empty(), "empty buffer not allowed at index {i}")
132 });
133 }
134
135 Self {
136 bufs,
137 buf: 0,
138 off: rng.start,
139 rng,
140 }
141 }
142
143 #[cfg(test)]
144 fn test_new<I, T>(bufs: I, rng: Range<usize>) -> Self
145 where
146 I: IntoIterator<Item = T>,
147 T: Into<Vec<u8>>,
148 {
149 let bufs = Arc::new(
150 bufs.into_iter()
151 .map(Into::into)
152 .map(Arc::new)
153 .collect::<Vec<_>>(),
154 );
155
156 Self::new(bufs, rng)
157 }
158
159 fn remaining(&self) -> usize {
160 self.rng.end - self.rng.start
161 }
162
163 fn usable_len(&self, buf: &[u8]) -> usize {
164 let n = min(buf.len(), self.off + self.remaining());
165
166 debug_assert!(
167 self.off <= n,
168 "self.off ({}) > usable_len {n} for {}-byte buffer {buf:?}",
169 self.off,
170 buf.len()
171 );
172
173 n
174 }
175}
176
177impl Buf for MultiRef {
178 fn advance(&mut self, mut n: usize) {
179 if self.remaining() < n {
180 panic!(
181 "{}",
182 &BufUnderflow {
183 requested: n,
184 remaining: self.remaining(),
185 }
186 );
187 }
188
189 while n > 0 {
190 let step = self.bufs[self.buf].len() - self.off;
191 if n < step {
192 self.off += n;
193 self.rng.start += n;
194 break;
195 }
196 self.off = 0;
197 self.rng.start += step;
198 self.buf += 1;
199 n -= step;
200 }
201 }
202
203 fn chunk(&self) -> &[u8] {
204 if self.buf < self.bufs.len() {
205 let buf = &self.bufs[self.buf];
206
207 &buf[self.off..self.usable_len(buf)]
208 } else {
209 &[]
210 }
211 }
212
213 fn remaining(&self) -> usize {
214 MultiRef::remaining(self)
215 }
216
217 fn try_copy_to_slice(&mut self, mut dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
218 let mut n = dst.len();
219
220 if self.remaining() < n {
221 return Err(BufUnderflow {
222 requested: n,
223 remaining: self.remaining(),
224 });
225 }
226
227 while n > 0 {
228 let buf = &self.bufs[self.buf];
229 let step = self.usable_len(buf) - self.off;
230 if n < step {
231 dst[..n].copy_from_slice(&buf[self.off..self.off + n]);
232 self.off += n;
233 self.rng.start += n;
234 break;
235 }
236 dst[..step].copy_from_slice(&buf[self.off..self.off + step]);
237 dst = &mut dst[step..];
238 self.off = 0;
239 self.buf += 1;
240 self.rng.start += step;
241 n -= step;
242 debug_assert!(n == dst.len());
243 }
244
245 Ok(())
246 }
247}
248
249impl IntoBuf for MultiRef {
250 type Buf = Self;
251
252 fn into_buf(self) -> Self::Buf {
253 self
254 }
255}
256
257#[derive(Debug)]
258enum Repr<'a> {
259 Together(&'a str),
260 Split(&'a MultiRef),
261}
262
263#[derive(Clone, Debug)]
264enum InnerLiteral {
265 Static(&'static str),
266 Inline(u8, u8, InlineBuf),
267 Uni(UniRef),
268 Multi(Box<MultiRef>),
269}
270
271impl InnerLiteral {
272 fn len(&self) -> usize {
273 match self {
274 Self::Static(s) => s.len(),
275 Self::Inline(i, j, _b) => (*j - *i) as usize,
276 Self::Uni(r) => r.remaining(),
277 Self::Multi(r) => r.remaining(),
278 }
279 }
280
281 fn inline(s: &str) -> Self {
282 debug_assert!(s.len() <= INLINE_LEN);
283
284 let mut b = [0; INLINE_LEN];
285 b[0..s.len()].copy_from_slice(s.as_bytes());
286
287 Self::Inline(0, s.len() as u8, b)
288 }
289
290 fn uni(b: Vec<u8>) -> Self {
291 let n: u32 = b
292 .len()
293 .try_into()
294 .expect("buffer length cannot exceed u32::MAX");
295
296 Self::Uni(UniRef::new(Arc::new(b), 0..n))
297 }
298
299 fn repr(&self) -> Repr<'_> {
300 match self {
301 Self::Static(s) => Repr::Together(s),
302 Self::Inline(i, j, b) => {
303 Repr::Together(unsafe { str::from_utf8_unchecked(&b[*i as usize..*j as usize]) })
304 }
305 Self::Uni(r) => Repr::Together(unsafe {
306 str::from_utf8_unchecked(&r.buf[r.rng.start as usize..r.rng.end as usize])
307 }),
308 Self::Multi(r) => Repr::Split(r.as_ref()),
309 }
310 }
311}
312
313impl From<InnerContent> for InnerLiteral {
314 fn from(value: InnerContent) -> Self {
315 match value {
316 InnerContent::Static(s) => Self::Static(s),
317 InnerContent::Inline(len, b) => Self::Inline(0, len, b),
318 InnerContent::NotEscapedUni(r) | InnerContent::EscapedUni(r) => Self::Uni(r),
319 InnerContent::NotEscapedMulti(r) | InnerContent::EscapedMulti(r) => Self::Multi(r),
320 }
321 }
322}
323
324#[derive(Clone, Debug)]
351pub struct Literal(InnerLiteral);
352
353impl Literal {
354 pub const fn from_static(s: &'static str) -> Self {
398 Self(InnerLiteral::Static(s))
399 }
400
401 pub fn from_ref<T: AsRef<str> + ?Sized>(s: &T) -> Self {
412 let t = s.as_ref();
413
414 if t.len() <= INLINE_LEN {
415 Self(InnerLiteral::inline(t))
416 } else {
417 Self(InnerLiteral::uni(t.as_bytes().to_vec()))
418 }
419 }
420
421 pub fn from_string(s: String) -> Self {
443 if s.len() <= INLINE_LEN {
444 Self(InnerLiteral::inline(&s))
445 } else {
446 Self(InnerLiteral::uni(s.into_bytes()))
447 }
448 }
449
450 pub fn len(&self) -> usize {
468 self.0.len()
469 }
470
471 pub fn is_empty(&self) -> bool {
480 self.len() == 0
481 }
482
483 fn repr(&self) -> Repr<'_> {
484 self.0.repr()
485 }
486}
487
488impl IntoBuf for Literal {
489 type Buf = LiteralBuf;
490
491 fn into_buf(self) -> Self::Buf {
492 LiteralBuf(self.0)
493 }
494}
495
496impl fmt::Display for Literal {
497 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
498 match self.repr() {
499 Repr::Together(s) => f.write_str(s),
500 Repr::Split(r) => crate::buf_display(r.clone(), f),
501 }
502 }
503}
504
505impl EqStr for Literal {}
506
507impl Eq for Literal {}
508
509impl From<Literal> for String {
510 fn from(value: Literal) -> Self {
511 match value.repr() {
512 Repr::Together(s) => s.to_string(),
513 Repr::Split(r) => crate::buf_to_string(r.clone()),
514 }
515 }
516}
517
518impl<T: ?Sized + AsRef<str>> From<&T> for Literal {
519 fn from(value: &T) -> Self {
520 Literal::from_ref(&value)
521 }
522}
523
524impl<'a> From<Cow<'a, str>> for Literal {
525 fn from(value: Cow<'a, str>) -> Self {
526 match value {
527 Cow::Borrowed(s) => Literal::from_ref(&s),
528 Cow::Owned(s) => Literal::from_string(s),
529 }
530 }
531}
532
533impl From<String> for Literal {
534 fn from(value: String) -> Self {
535 Literal::from_string(value)
536 }
537}
538
539impl FromStr for Literal {
540 type Err = Infallible;
541
542 fn from_str(s: &str) -> Result<Self, Self::Err> {
543 Ok(Literal::from_ref(&s))
544 }
545}
546
547impl Hash for Literal {
548 fn hash<H: Hasher>(&self, state: &mut H) {
549 match self.repr() {
550 Repr::Together(s) => state.write(s.as_bytes()),
551 Repr::Split(r) => {
552 let mut x = r.clone();
553 while x.remaining() > 0 {
554 let b = x.chunk();
555 state.write(b);
556 x.advance(b.len());
557 }
558 }
559 }
560 }
561}
562
563impl Ord for Literal {
564 fn cmp(&self, other: &Self) -> Ordering {
565 match (self.repr(), other.repr()) {
566 (Repr::Together(a), Repr::Together(b)) => Ord::cmp(a, b),
567 (Repr::Together(a), Repr::Split(b)) => crate::buf_cmp(a, b.clone()),
568 (Repr::Split(a), Repr::Together(b)) => crate::buf_cmp(a.clone(), b),
569 (Repr::Split(a), Repr::Split(b)) => crate::buf_cmp(a.clone(), b.clone()),
570 }
571 }
572}
573
574impl OrdStr for Literal {
575 fn cmp(&self, other: &str) -> Ordering {
576 match self.repr() {
577 Repr::Together(s) => Ord::cmp(s, other),
578 Repr::Split(r) => crate::buf_cmp(r.clone(), other),
579 }
580 }
581}
582
583impl PartialEq for Literal {
584 fn eq(&self, other: &Self) -> bool {
585 if self.len() != other.len() {
586 false
587 } else {
588 match (self.repr(), other.repr()) {
589 (Repr::Together(a), Repr::Together(b)) => a == b,
590 (Repr::Together(a), Repr::Split(b)) => {
591 crate::buf_cmp(a, b.clone()) == Ordering::Equal
592 }
593 (Repr::Split(a), Repr::Together(b)) => {
594 crate::buf_cmp(a.clone(), b) == Ordering::Equal
595 }
596 (Repr::Split(a), Repr::Split(b)) => {
597 crate::buf_cmp(a.clone(), b.clone()) == Ordering::Equal
598 }
599 }
600 }
601 }
602}
603
604impl PartialEq<str> for Literal {
605 fn eq(&self, other: &str) -> bool {
606 if self.len() != other.len() {
607 false
608 } else {
609 match self.repr() {
610 Repr::Together(s) => s == other,
611 Repr::Split(r) => crate::buf_cmp(r.clone(), other) == Ordering::Equal,
612 }
613 }
614 }
615}
616
617impl PartialEq<&str> for Literal {
618 fn eq(&self, other: &&str) -> bool {
619 self == *other
620 }
621}
622
623impl PartialEq<String> for Literal {
624 fn eq(&self, other: &String) -> bool {
625 self == other.as_str()
626 }
627}
628
629impl PartialEq<Literal> for str {
630 fn eq(&self, other: &Literal) -> bool {
631 other == self
632 }
633}
634
635impl PartialEq<Literal> for &str {
636 fn eq(&self, other: &Literal) -> bool {
637 other == self
638 }
639}
640
641impl PartialEq<Literal> for String {
642 fn eq(&self, other: &Literal) -> bool {
643 other == self
644 }
645}
646
647impl PartialOrd for Literal {
648 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
649 Some(Ord::cmp(self, other))
650 }
651}
652
653impl PartialOrd<str> for Literal {
654 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
655 Some(OrdStr::cmp(self, other))
656 }
657}
658
659impl PartialOrd<Literal> for str {
660 fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
661 Some(OrdStr::cmp(other, self).reverse())
662 }
663}
664
665impl PartialOrd<&str> for Literal {
666 fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
667 Some(OrdStr::cmp(self, other))
668 }
669}
670
671impl PartialOrd<Literal> for &str {
672 fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
673 Some(OrdStr::cmp(other, self).reverse())
674 }
675}
676
677impl PartialOrd<String> for Literal {
678 fn partial_cmp(&self, other: &String) -> Option<Ordering> {
679 self.partial_cmp(other.as_str())
680 }
681}
682
683impl PartialOrd<Literal> for String {
684 fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
685 self.as_str().partial_cmp(other)
686 }
687}
688
689pub struct LiteralBuf(InnerLiteral);
708
709impl LiteralBuf {
710 pub fn advance(&mut self, n: usize) {
723 match &mut self.0 {
724 InnerLiteral::Static(s) => {
725 if s.len() < n {
726 panic!(
727 "{}",
728 &BufUnderflow {
729 requested: n,
730 remaining: s.len(),
731 }
732 );
733 } else {
734 self.0 = InnerLiteral::Static(&s[n..]);
735 }
736 }
737
738 InnerLiteral::Inline(i, j, b) => {
739 let len = (*j - *i) as usize;
740 if len < n {
741 panic!(
742 "{}",
743 &BufUnderflow {
744 requested: n,
745 remaining: len,
746 }
747 );
748 } else {
749 self.0 = InnerLiteral::Inline(*i + n as u8, *j, *b);
750 }
751 }
752
753 InnerLiteral::Uni(r) => r.advance(n),
754 InnerLiteral::Multi(r) => r.advance(n),
755 }
756 }
757
758 pub fn chunk(&self) -> &[u8] {
772 match &self.0 {
773 InnerLiteral::Static(s) => s.as_bytes(),
774 InnerLiteral::Inline(i, j, b) => &b[*i as usize..*j as usize],
775 InnerLiteral::Uni(r) => r.chunk(),
776 InnerLiteral::Multi(r) => r.chunk(),
777 }
778 }
779
780 pub fn remaining(&self) -> usize {
789 self.0.len()
790 }
791
792 pub fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
804 match &mut self.0 {
805 InnerLiteral::Static(s) => {
806 if s.len() < dst.len() {
807 Err(BufUnderflow {
808 requested: dst.len(),
809 remaining: s.len(),
810 })
811 } else {
812 dst.copy_from_slice(&s.as_bytes()[..dst.len()]);
813 *self = Self(InnerLiteral::Static(&s[dst.len()..]));
814
815 Ok(())
816 }
817 }
818
819 InnerLiteral::Inline(i, j, b) => {
820 let len = (*j - *i) as usize;
821 if len < dst.len() {
822 Err(BufUnderflow {
823 requested: dst.len(),
824 remaining: len,
825 })
826 } else {
827 dst.copy_from_slice(&b[*i as usize..*i as usize + dst.len()]);
828 *i += dst.len() as u8;
829
830 Ok(())
831 }
832 }
833
834 InnerLiteral::Uni(r) => r.try_copy_to_slice(dst),
835 InnerLiteral::Multi(r) => r.try_copy_to_slice(dst),
836 }
837 }
838}
839
840impl Buf for LiteralBuf {
841 fn advance(&mut self, n: usize) {
842 LiteralBuf::advance(self, n);
843 }
844
845 fn chunk(&self) -> &[u8] {
846 LiteralBuf::chunk(self)
847 }
848
849 fn remaining(&self) -> usize {
850 LiteralBuf::remaining(self)
851 }
852
853 fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
854 LiteralBuf::try_copy_to_slice(self, dst)
855 }
856}
857
858#[derive(Debug, Clone)]
859enum InnerContent {
860 Static(&'static str),
861 Inline(u8, InlineBuf),
862 NotEscapedUni(UniRef),
863 NotEscapedMulti(Box<MultiRef>),
864 EscapedUni(UniRef),
865 EscapedMulti(Box<MultiRef>),
866}
867
868#[derive(Debug)]
884pub struct Content(InnerContent);
885
886impl Content {
887 pub fn literal(&self) -> Literal {
893 Literal(self.0.clone().into())
894 }
895
896 pub fn is_escaped(&self) -> bool {
902 matches!(
903 self.0,
904 InnerContent::EscapedUni(_) | InnerContent::EscapedMulti(_)
905 )
906 }
907
908 pub fn unescaped(&self) -> Unescaped<Literal> {
926 match &self.0 {
927 InnerContent::EscapedUni(r) => {
928 let mut buf = Vec::new();
929 lexical::unescape(r.clone(), &mut buf);
930
931 let s = unsafe { String::from_utf8_unchecked(buf) };
934
935 Unescaped::Expanded(s)
936 }
937
938 InnerContent::EscapedMulti(r) => {
939 let mut buf = Vec::new();
940 lexical::unescape(r.as_ref().clone(), &mut buf);
941
942 let s = unsafe { String::from_utf8_unchecked(buf) };
945
946 Unescaped::Expanded(s)
947 }
948
949 _ => Unescaped::Literal(self.literal()),
950 }
951 }
952
953 fn from_static(s: &'static str) -> Self {
954 Self(InnerContent::Static(s))
955 }
956
957 fn from_bufs(bufs: &Bufs, rng: Range<usize>, escaped: bool) -> Self {
958 let len = rng.end - rng.start;
959
960 if len <= INLINE_LEN && !escaped {
961 let mut buf = [0u8; INLINE_LEN];
962 let mut off = 0;
963 let mut rem = len;
964
965 let mut used_iter = bufs.used.iter();
966 let cur_off = if let Some(used_0) = used_iter.next() {
967 let n = used_0.len() - rng.start;
968 buf[0..n].copy_from_slice(&used_0[rng.start..]);
969 off = n;
970 rem = len - n;
971 debug_assert!(off <= len && rem <= len && off + rem == len);
972
973 for used_i in used_iter {
974 let n = used_i.len();
975 buf[off..off + n].copy_from_slice(&used_i[..n]);
976 off += n;
977 rem -= n;
978 debug_assert!(off <= len && rem <= len && off + rem == len);
979 }
980
981 0
982 } else {
983 rng.start
984 };
985
986 buf[off..off + rem].copy_from_slice(&bufs.current[cur_off..cur_off + rem]);
987
988 Self(InnerContent::Inline(len as u8, buf))
989 } else if rng.end <= bufs.current.len() && rng.end < u32::MAX as usize {
990 let r = UniRef::new(Arc::clone(&bufs.current), rng.start as u32..rng.end as u32);
991
992 if escaped {
993 Self(InnerContent::EscapedUni(r))
994 } else {
995 Self(InnerContent::NotEscapedUni(r))
996 }
997 } else {
998 let mut all = Vec::with_capacity(bufs.used.len() + 1);
999 all.extend(bufs.used.iter().cloned());
1000 all.push(Arc::clone(&bufs.current));
1001
1002 let r = MultiRef::new(Arc::new(all), rng);
1003
1004 if escaped {
1005 Self(InnerContent::EscapedMulti(Box::new(r)))
1006 } else {
1007 Self(InnerContent::NotEscapedMulti(Box::new(r)))
1008 }
1009 }
1010 }
1011}
1012
1013impl fmt::Display for Content {
1014 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1015 self.literal().fmt(f)
1016 }
1017}
1018
1019impl super::Content for Content {
1020 type Literal<'a> = Literal;
1021
1022 #[inline(always)]
1023 fn literal<'a>(&'a self) -> Self::Literal<'a> {
1024 Content::literal(self)
1025 }
1026
1027 #[inline(always)]
1028 fn is_escaped(&self) -> bool {
1029 Content::is_escaped(self)
1030 }
1031
1032 #[inline(always)]
1033 fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>> {
1034 Content::unescaped(self)
1035 }
1036}
1037
1038const _: [(); 24] = [(); std::mem::size_of::<Literal>()];
1040
1041const _: [(); 24] = [(); std::mem::size_of::<Content>()];
1043
1044#[derive(Clone, Debug)]
1048pub struct Error {
1049 kind: ErrorKind,
1050 pos: Pos,
1051 source: Option<Arc<io::Error>>,
1052}
1053
1054impl Error {
1055 pub fn kind(&self) -> ErrorKind {
1060 self.kind
1061 }
1062
1063 pub fn pos(&self) -> &Pos {
1068 &self.pos
1069 }
1070}
1071
1072impl fmt::Display for Error {
1073 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1074 self.kind.fmt_at(f, Some(&self.pos))
1075 }
1076}
1077
1078impl std::error::Error for Error {
1079 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
1080 self.source.as_ref().map(|e| &**e as &dyn std::error::Error)
1081 }
1082}
1083
1084impl lexical::Error for Error {
1085 fn kind(&self) -> ErrorKind {
1086 Error::kind(self)
1087 }
1088
1089 fn pos(&self) -> &Pos {
1090 Error::pos(self)
1091 }
1092}
1093
1094#[derive(Debug)]
1095struct Bufs {
1096 current: Arc<Vec<u8>>,
1097 used: Vec<Arc<Vec<u8>>>,
1098 i: usize, j: usize, k: usize, maybe_free: VecDeque<Arc<Vec<u8>>>,
1102 buf_size: usize,
1103 eof: bool,
1104}
1105
1106impl Bufs {
1107 const DEFAULT_BUF_SIZE: usize = 8 * 1024;
1108
1109 #[cfg(test)]
1113 const MIN_BUF_SIZE: usize = 1;
1114 #[cfg(not(test))]
1115 const MIN_BUF_SIZE: usize = 512;
1116
1117 fn new(buf_size: usize) -> Self {
1118 if buf_size < Self::MIN_BUF_SIZE {
1119 panic!(
1120 "buffer size too low: minimum is {} bytes, but {} was given",
1121 Self::MIN_BUF_SIZE,
1122 buf_size
1123 );
1124 }
1125
1126 Self {
1127 current: Arc::new(Vec::new()), used: Vec::new(), i: 0,
1130 j: 0,
1131 k: 0,
1132 maybe_free: VecDeque::new(), buf_size,
1134 eof: false,
1135 }
1136 }
1137
1138 #[inline]
1139 fn rewind(&mut self) {
1140 self.j -= 1;
1141 self.k -= 1;
1142 }
1143
1144 #[inline]
1145 fn reset(&mut self) {
1146 if !self.used.is_empty() {
1147 self.maybe_free.extend(self.used.drain(..));
1148 }
1149
1150 self.i = self.j;
1151 self.k = self.j;
1152 }
1153
1154 #[inline(always)]
1155 fn byte(&mut self) -> Option<u8> {
1156 if self.j < self.current.len() {
1157 let b = unsafe { self.current.get_unchecked(self.j) };
1158 self.j += 1;
1159 self.k += 1;
1160
1161 Some(*b)
1162 } else {
1163 None
1164 }
1165 }
1166
1167 fn read<R: Read>(&mut self, r: &mut R) -> io::Result<bool> {
1168 debug_assert!(self.j == self.current.len());
1169
1170 if self.eof {
1171 return Ok(true);
1172 }
1173
1174 let mut buf = Arc::new(self.alloc_or_reuse());
1177 let inner =
1178 Arc::get_mut(&mut buf).expect("buffer must be exclusively owned to use for read");
1179 debug_assert!(
1180 inner.len() == self.buf_size,
1181 "allocated buffer must have len buf_size = {}, but its len is {}",
1182 self.buf_size,
1183 inner.len()
1184 );
1185
1186 match r.read(inner.as_mut_slice()) {
1187 Ok(0) => {
1188 self.eof = true;
1189
1190 Ok(true)
1191 }
1192
1193 Ok(n) if n <= inner.len() => {
1194 inner.truncate(n);
1208
1209 if self.j != self.i {
1210 debug_assert!(!self.current.is_empty());
1212
1213 self.used.push(Arc::clone(&self.current));
1214 } else if !self.current.is_empty() {
1215 debug_assert!(self.k > 0);
1217
1218 self.i = 0;
1219 self.k = 0;
1220 self.maybe_free.push_back(Arc::clone(&self.current));
1221 } else {
1222 debug_assert!(self.i == 0 && self.j == 0 && self.k == 0);
1224 }
1225
1226 self.current = buf;
1227 self.j = 0;
1228
1229 Ok(false)
1230 }
1231 Ok(n) => panic!("read {n} bytes but buffer size is only {}", inner.len()),
1232 Err(err) => Err(err),
1233 }
1234 }
1235
1236 fn alloc_or_reuse(&mut self) -> Vec<u8> {
1237 if let Some(buf) = self.maybe_free.pop_front() {
1238 let mut replace: Option<Arc<Vec<u8>>> = None;
1239
1240 match Arc::try_unwrap(buf) {
1243 Ok(inner) => return inner,
1244 Err(buf) => {
1245 if self.maybe_free.is_empty() {
1246 replace = Some(buf);
1247 }
1248 }
1249 };
1250
1251 while let Some(buf) = self.maybe_free.pop_front() {
1255 if let Ok(inner) = Arc::try_unwrap(buf) {
1256 return inner;
1257 }
1258 }
1259
1260 if let Some(buf) = replace {
1263 self.maybe_free.push_back(buf);
1264 }
1265 }
1266
1267 let mut v = Vec::with_capacity(self.buf_size);
1269 #[allow(clippy::uninit_vec)]
1270 unsafe {
1271 v.set_len(self.buf_size);
1272 };
1273 v
1274 }
1275}
1276
1277#[derive(Debug)]
1278enum StoredContent {
1279 Literal(&'static str),
1280 Range(Range<usize>, bool),
1281 Err(Error),
1282}
1283
1284impl Default for StoredContent {
1285 fn default() -> Self {
1286 StoredContent::Literal("")
1287 }
1288}
1289
1290#[derive(Debug)]
1378pub struct ReadAnalyzer<R: Read> {
1379 bufs: Bufs,
1380 content: StoredContent,
1381 content_pos: Pos,
1382 mach: state::Machine,
1383 read: R,
1384}
1385
1386impl<R: Read> ReadAnalyzer<R> {
1387 pub fn new(read: R) -> Self {
1406 Self::with_buf_size(read, Bufs::DEFAULT_BUF_SIZE)
1407 }
1408
1409 #[allow(clippy::should_implement_trait)]
1431 pub fn next(&mut self) -> Token {
1432 if matches!(self.content, StoredContent::Err(_)) {
1433 return Token::Err;
1434 }
1435
1436 self.content_pos = *self.mach.pos();
1437 self.bufs.reset();
1438
1439 let mut b = match self.byte() {
1440 Ok(b) => b,
1441 Err(err) => {
1442 self.content = StoredContent::Err(err);
1443
1444 return Token::Err;
1445 }
1446 };
1447
1448 loop {
1449 match self.mach.next(b) {
1450 state::State::Mid => match self.byte() {
1451 Ok(v) => b = v,
1452 Err(err) => {
1453 self.content = StoredContent::Err(err);
1454
1455 return Token::Err;
1456 }
1457 },
1458
1459 state::State::End {
1460 token,
1461 escaped,
1462 repeat,
1463 } => {
1464 if repeat && b.is_some() {
1465 self.bufs.rewind();
1466 }
1467
1468 self.content = match token {
1469 Token::ObjBegin => StoredContent::Literal("{"),
1470 Token::ObjEnd => StoredContent::Literal("}"),
1471 Token::ArrBegin => StoredContent::Literal("["),
1472 Token::NameSep => StoredContent::Literal(":"),
1473 Token::ValueSep => StoredContent::Literal(","),
1474 Token::LitFalse => StoredContent::Literal("false"),
1475 Token::LitNull => StoredContent::Literal("null"),
1476 Token::LitTrue => StoredContent::Literal("true"),
1477 _ => StoredContent::Range(self.bufs.i..self.bufs.k, escaped),
1478 };
1479
1480 return token;
1481 }
1482
1483 state::State::Err(kind) => {
1484 let mut pos = *self.mach.pos();
1485
1486 match &kind {
1487 ErrorKind::BadSurrogate {
1488 first: _,
1489 second: _,
1490 offset,
1491 } => {
1492 pos.offset -= *offset as usize;
1493 pos.col -= *offset as usize;
1494 }
1495
1496 ErrorKind::BadUtf8ContByte {
1497 seq_len,
1498 offset: _,
1499 value: _,
1500 } => {
1501 let rewind = seq_len - 1;
1504 pos.offset -= rewind as usize;
1505 }
1506
1507 _ => (),
1508 }
1509
1510 self.content = StoredContent::Err(Error {
1511 kind,
1512 pos,
1513 source: None,
1514 });
1515
1516 return Token::Err;
1517 }
1518 }
1519 }
1520 }
1521
1522 #[inline]
1552 pub fn content(&self) -> Content {
1553 if let Ok(content) = self.try_content() {
1554 content
1555 } else {
1556 panic!("no content: last `next()` returned `Token::Err` (use `err()` instead)");
1557 }
1558 }
1559
1560 #[inline]
1590 pub fn err(&self) -> Error {
1591 if let Err(err) = self.try_content() {
1592 err
1593 } else {
1594 panic!("no error: last `next()` did not return `Token::Err` (use `content()` instead)");
1595 }
1596 }
1597
1598 #[inline(always)]
1647 pub fn pos(&self) -> &Pos {
1648 &self.content_pos
1649 }
1650
1651 pub fn try_content(&self) -> Result<Content, Error> {
1679 match &self.content {
1680 StoredContent::Literal(s) => Ok(Content::from_static(s)),
1681 StoredContent::Range(rng, escaped) => {
1682 Ok(Content::from_bufs(&self.bufs, rng.clone(), *escaped))
1683 }
1684 StoredContent::Err(err) => Err(err.clone()),
1685 }
1686 }
1687
1688 pub fn into_parser(self) -> syntax::Parser<ReadAnalyzer<R>> {
1717 syntax::Parser::new(self)
1718 }
1719
1720 pub fn with_buf_size(read: R, buf_size: usize) -> Self {
1747 Self {
1748 bufs: Bufs::new(buf_size),
1749 content: StoredContent::default(),
1750 content_pos: Pos::default(),
1751 mach: state::Machine::default(),
1752 read,
1753 }
1754 }
1755
1756 #[inline]
1757 fn byte(&mut self) -> Result<Option<u8>, Error> {
1758 if let Some(b) = self.bufs.byte() {
1759 Ok(Some(b))
1760 } else {
1761 match self.bufs.read(&mut self.read) {
1762 Ok(eof) if eof => Ok(None),
1763 Ok(_) => Ok(self.bufs.byte()),
1764 Err(err) => Err(Error {
1765 kind: ErrorKind::Read,
1766 pos: *self.mach.pos(),
1767 source: Some(Arc::new(err)),
1768 }),
1769 }
1770 }
1771 }
1772}
1773
1774impl<R: Read> Analyzer for ReadAnalyzer<R> {
1775 type Content = Content;
1776 type Error = Error;
1777
1778 #[inline(always)]
1779 fn next(&mut self) -> Token {
1780 ReadAnalyzer::next(self)
1781 }
1782
1783 #[inline(always)]
1784 fn try_content(&self) -> Result<Self::Content, Error> {
1785 ReadAnalyzer::try_content(self)
1786 }
1787
1788 #[inline(always)]
1789 fn pos(&self) -> &Pos {
1790 ReadAnalyzer::pos(self)
1791 }
1792}
1793
1794#[cfg(test)]
1795mod tests {
1796 use super::*;
1797 use crate::{IntoBuf, lexical::Expect};
1798 use rstest::rstest;
1799 use std::{
1800 collections::{BTreeMap, HashMap},
1801 error::Error as _,
1802 };
1803
1804 #[test]
1805 #[should_panic(expected = "not enough bytes in buffer (4 requested, but only 3 remain)")]
1806 fn test_uniref_buf_advance_panic() {
1807 let mut b = UniRef::test_new("foo", 0..3);
1808
1809 b.advance(4);
1810 }
1811
1812 #[rstest]
1813 #[case("", 0..0, 0, "")]
1814 #[case("x", 0..0, 0, "")]
1815 #[case("x", 1..1, 0, "")]
1816 #[case("x", 0..1, 0, "x")]
1817 #[case("x", 0..1, 1, "")]
1818 #[case("hello", 0..5, 0, "hello")]
1819 #[case("hello", 0..5, 5, "")]
1820 #[case("hello", 0..2, 0, "he")]
1821 #[case("hello", 0..2, 1, "e")]
1822 #[case("hello", 0..2, 2, "")]
1823 #[case("hello", 1..5, 2, "lo")]
1824 #[case("hello", 1..5, 1, "llo")]
1825 #[case("hello", 1..5, 0, "ello")]
1826 #[case("hello", 1..4, 0, "ell")]
1827 fn test_uniref_buf_advance_ok(
1828 #[case] buf: &str,
1829 #[case] rng: Range<u32>,
1830 #[case] n: usize,
1831 #[case] chunk: &str,
1832 ) {
1833 let mut b = UniRef::test_new(buf, rng);
1834
1835 b.advance(n);
1836
1837 assert_eq!(chunk, str::from_utf8(b.chunk()).unwrap());
1838 assert_eq!(chunk.len(), b.remaining());
1839 }
1840
1841 #[rstest]
1842 #[case("", 0..0, "")]
1843 #[case("a", 0..0, "")]
1844 #[case("a", 0..1, "a")]
1845 #[case("a", 1..1, "")]
1846 #[case("foo", 0..3, "foo")]
1847 fn test_uniref_buf_chunk(#[case] buf: &str, #[case] rng: Range<u32>, #[case] expect: &str) {
1848 let b = UniRef::test_new(buf, rng);
1849
1850 assert_eq!(expect, str::from_utf8(b.chunk()).unwrap());
1851 }
1852
1853 #[rstest]
1854 #[case("", 0..0, 0, false)]
1855 #[case("a", 0..1, 1, true)]
1856 #[case("foo", 0..3, 3, true)]
1857 fn test_uniref_buf_remaining(
1858 #[case] buf: &str,
1859 #[case] rng: Range<u32>,
1860 #[case] expect_remaining: usize,
1861 #[case] expect_has_remaining: bool,
1862 ) {
1863 let b = UniRef::test_new(buf, rng);
1864
1865 assert_eq!(expect_remaining, b.remaining());
1866 assert_eq!(expect_has_remaining, b.has_remaining());
1867 }
1868
1869 #[rstest]
1870 #[case("", 0..0, b"", "")]
1871 #[case("a", 0..0, b"", "")]
1872 #[case("a", 0..1, b"", "a")]
1873 #[case("a", 0..1, b"a", "")]
1874 #[case("bar", 0..3, b"", "bar")]
1875 #[case("bar", 0..3, b"b", "ar")]
1876 #[case("bar", 0..3, b"ba", "r")]
1877 #[case("bar", 0..3, b"bar", "")]
1878 #[case("bar", 0..2, b"b", "a")]
1879 #[case("bar", 1..3, b"ar", "")]
1880 fn test_uniref_buf_try_copy_to_slice_ok<const N: usize>(
1881 #[case] buf: &str,
1882 #[case] rng: Range<u32>,
1883 #[case] expect: &[u8; N],
1884 #[case] rem: &str,
1885 ) {
1886 let mut b = UniRef::test_new(buf, rng);
1887 let mut actual = [0; N];
1888
1889 let result = b.try_copy_to_slice(&mut actual);
1890
1891 assert_eq!(Ok(()), result);
1892 assert_eq!(expect, &actual);
1893 assert_eq!(rem, str::from_utf8(b.chunk()).unwrap());
1894 }
1895
1896 #[rstest]
1897 #[case("", 0..0, [0; 1])]
1898 #[case("", 0..0, [0; 2])]
1899 #[case("a", 0..1, [0; 2])]
1900 #[case("foo", 0..3, [0; 4])]
1901 #[case("foo", 1..2, [0; 99])]
1902 fn test_uniref_buf_try_copy_to_slice_err<const N: usize>(
1903 #[case] buf: &str,
1904 #[case] rng: Range<u32>,
1905 #[case] mut dst: [u8; N],
1906 ) {
1907 let expect = &buf[rng.start as usize..rng.end as usize];
1908 let mut b = UniRef::test_new(buf, rng.clone());
1909
1910 let result = b.try_copy_to_slice(&mut dst);
1911
1912 assert_eq!(
1913 Err(BufUnderflow {
1914 remaining: (rng.end - rng.start) as usize,
1915 requested: N
1916 }),
1917 result
1918 );
1919 assert_eq!(expect, str::from_utf8(b.chunk()).unwrap());
1920 }
1921
1922 #[rstest]
1923 #[case(MultiRef::test_new([""; 0], 0..0), 1)]
1924 #[case(MultiRef::test_new([""], 0..0), 1)]
1925 #[case(MultiRef::test_new(["foo", ""], 0..3), 4)]
1926 #[case(MultiRef::test_new(["f", "o", "o", ""], 0..3), 4)]
1927 #[case(MultiRef::test_new(["hell", "o worl", "d"], 6..11), 6)]
1928 #[should_panic(expected = "not enough bytes in buffer")]
1929 fn test_multiref_buf_advance_panic(#[case] mut b: MultiRef, #[case] n: usize) {
1930 b.advance(n);
1931 }
1932
1933 #[rstest]
1934 #[case(MultiRef::test_new([""; 0], 0..0), "", 0, b"")]
1935 #[case(MultiRef::test_new([""], 0..0), "", 0, b"")]
1936 #[case(MultiRef::test_new(["a"], 0..0), "", 0, b"")]
1937 #[case(MultiRef::test_new(["a"], 0..1), "a", 0, b"a")]
1938 #[case(MultiRef::test_new(["a", ""], 0..1), "a", 1, b"")]
1939 #[case(MultiRef::test_new(["f", "o", "o"], 0..3), "f", 1, b"oo")]
1940 #[case(MultiRef::test_new(["f", "o", "o"], 0..3), "f", 2, b"o")]
1941 #[case(MultiRef::test_new(["f", "o", "o"], 0..3), "f", 3, b"")]
1942 #[case(MultiRef::test_new(["fo", "o", ""], 0..3), "fo", 1, b"oo")]
1943 #[case(MultiRef::test_new(["fo", "o", ""], 0..3), "fo", 2, b"o")]
1944 #[case(MultiRef::test_new(["fo", "o", ""], 0..3), "fo", 3, b"")]
1945 #[case(MultiRef::test_new(["he", "ll", "o world"], 0..5), "he", 0, b"hello")]
1946 #[case(MultiRef::test_new(["he", "ll", "o world"], 0..5), "he", 1, b"ello")]
1947 #[case(MultiRef::test_new(["he", "ll", "o world"], 0..5), "he", 2, b"llo")]
1948 #[case(MultiRef::test_new(["he", "ll", "o world"], 0..5), "he", 3, b"lo")]
1949 #[case(MultiRef::test_new(["he", "ll", "o world"], 0..5), "he", 4, b"o")]
1950 #[case(MultiRef::test_new(["he", "ll", "o world"], 0..5), "he", 5, b"")]
1951 fn test_multiref_buf_advance_ok<const N: usize>(
1952 #[case] mut b: MultiRef,
1953 #[case] expect_chunk: &str,
1954 #[case] n: usize,
1955 #[case] expect_tail: &[u8; N],
1956 ) {
1957 let before = b.chunk();
1958 assert_eq!(expect_chunk, str::from_utf8(before).unwrap());
1959
1960 b.advance(n);
1961
1962 assert_eq!(N, b.remaining());
1963
1964 let after = b.chunk();
1965 if N > 0 {
1966 assert!(after.len() > 0);
1967 } else {
1968 assert!(after.is_empty());
1969 }
1970
1971 let mut dst = [0u8; N];
1972 b.copy_to_slice(&mut dst);
1973 assert_eq!(expect_tail, &dst);
1974
1975 assert_eq!(0, b.remaining());
1976 assert_eq!(b"", b.chunk());
1977 }
1978
1979 #[test]
1980 #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
1981 fn test_multiref_copy_to_slice_underflow_panic() {
1982 let mut b = MultiRef::test_new([""], 0..0).into_buf();
1983 let mut dst = [0u8; 1];
1984
1985 b.copy_to_slice(&mut dst);
1986 }
1987
1988 #[test]
1989 fn test_multiref_copy_to_slice_partial_buf() {
1990 let mut b = MultiRef::test_new([" f", "oolishness"], 1..8);
1991 let mut dst = [0u8; 3];
1992
1993 b.copy_to_slice(&mut dst);
1994
1995 assert_eq!(b"foo", &dst);
1996 assert_eq!(4, b.remaining());
1997 assert_eq!(b"lish", b.chunk());
1998 }
1999
2000 #[test]
2001 fn test_multiref_copy_to_slice_full_buf() {
2002 let mut b = MultiRef::test_new([" f", "oolishness"], 1..5);
2003 let mut dst = [0u8; 4];
2004
2005 b.copy_to_slice(&mut dst);
2006
2007 assert_eq!(b"fool", &dst);
2008 }
2009
2010 #[test]
2011 fn test_multiref_copy_to_slice_blarg() {
2012 let mut b = MultiRef::test_new(["foo", "li", "shness"], 0..7);
2013 let mut dst = [0u8; 4];
2014
2015 b.copy_to_slice(&mut dst);
2016
2017 assert_eq!(b"fool", &dst);
2018 assert_eq!(3, b.remaining());
2019 assert_eq!(b"i", b.chunk());
2020 }
2021
2022 #[rstest]
2023 #[case(InnerLiteral::Static(""), 0)]
2024 #[case(InnerLiteral::Static("a"), 1)]
2025 #[case(InnerLiteral::Inline(0, 0, [0; INLINE_LEN]), 0)]
2026 #[case(InnerLiteral::Inline(0, 1, [0; INLINE_LEN]), 1)]
2027 #[case(InnerLiteral::Inline(1, 1, [0; INLINE_LEN]), 0)]
2028 #[case(InnerLiteral::Inline(1, 2, [0; INLINE_LEN]), 1)]
2029 #[case(InnerLiteral::Inline(3, 7, [0; INLINE_LEN]), 4)]
2030 #[case(InnerLiteral::Uni(UniRef::test_new("", 0..0)), 0)]
2031 #[case(InnerLiteral::Uni(UniRef::test_new("a", 0..0)), 0)]
2032 #[case(InnerLiteral::Uni(UniRef::test_new("a", 0..1)), 1)]
2033 #[case(InnerLiteral::Uni(UniRef::test_new("ab", 1..2)), 1)]
2034 #[case(InnerLiteral::Uni(UniRef::test_new("abcd", 1..3)), 2)]
2035 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new([""; 0], 0..0))), 0)]
2036 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new([""], 0..0))), 0)]
2037 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a"], 0..0))), 0)]
2038 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a", ""], 0..0))), 0)]
2039 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a"], 0..1))), 1)]
2040 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a", "b"], 0..2))), 2)]
2041 #[case(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a", "b", "cd"], 1..4))), 3)]
2042 fn test_inner_literal_len(#[case] inner: InnerLiteral, #[case] expect: usize) {
2043 assert_eq!(expect, inner.len());
2044 }
2045
2046 #[rstest]
2047 #[case(InnerLiteral::Static(""), "")]
2048 #[case(InnerLiteral::Static("a"), "a")]
2049 #[case(InnerLiteral::Inline(0, 0, [0; INLINE_LEN]), "")]
2050 #[case(InnerLiteral::Inline(0, 1, [b'a'; INLINE_LEN]), "a")]
2051 #[case(InnerLiteral::Inline(0, INLINE_LEN as u8, [b'b'; INLINE_LEN]), "b".repeat(INLINE_LEN))]
2052 #[case(InnerLiteral::Uni(UniRef::test_new("c", 0..1)), "c")]
2053 #[case(InnerLiteral::Uni(UniRef::test_new("def".repeat(u8::MAX as usize), 0..(3 * u8::MAX as u32))), "def".repeat(u8::MAX as usize))]
2054 fn test_inner_literal_repr_together(
2055 #[case] inner: InnerLiteral,
2056 #[case] expect: impl AsRef<str>,
2057 ) {
2058 assert!(matches!(inner.repr(), Repr::Together(s) if s == expect.as_ref()));
2059 }
2060
2061 #[test]
2062 fn test_inner_literal_repr_split() {
2063 let inner = InnerLiteral::Multi(Box::new(MultiRef::test_new(["xfoo", " ", "barx"], 1..8)));
2064 let repr = inner.repr();
2065
2066 if let Repr::Split(m) = repr {
2067 let mut b = m.clone();
2068 let mut dst = [0u8; 7];
2069
2070 b.copy_to_slice(&mut dst);
2071
2072 assert_eq!(b"foo bar", &dst);
2073 assert_eq!(0, b.remaining());
2074 assert_eq!(0, b.chunk().len());
2075 } else {
2076 panic!("expected {:?} to be Repr::Split", repr);
2077 }
2078 }
2079
2080 #[rstest]
2081 #[case(Literal::from_static(""), 0)]
2082 #[case(Literal::from_static("a"), 1)]
2083 #[case(Literal::from_static(concat!(
2084 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
2085 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
2086 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
2087 "aaaaaaaaaaaaaab",
2088 )), u8::MAX as usize)]
2089 #[case(Literal::from_ref(""), 0)]
2090 #[case(Literal::from_ref(&"a".repeat(INLINE_LEN)), INLINE_LEN)]
2091 #[case(Literal::from_ref(&"b".repeat(INLINE_LEN+1)), INLINE_LEN+1)]
2092 #[case(Literal::from_ref(&Cow::Borrowed("foo")), 3)]
2093 #[case(Literal::from_ref(&Cow::Owned("bar".to_string())), 3)]
2094 #[case(Literal::from_string("".to_string()), 0)]
2095 #[case(Literal::from_string("c".to_string()), 1)]
2096 #[case(Literal::from_string("d".repeat(100 * INLINE_LEN)), 100 * INLINE_LEN)]
2097 #[case("baz".into(), 3)]
2098 #[case(Cow::Borrowed("").into(), 0)]
2099 #[case(Cow::<str>::Owned("e".repeat(INLINE_LEN-1)).into(), INLINE_LEN-1)]
2100 #[case("qux".to_string().into(), 3)]
2101 #[case(Literal::from_str("hello, world").unwrap(), 12)]
2102 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["b", "a", "z"], 0..3)))), 3)]
2103 fn test_literal_convert(#[case] literal: Literal, #[case] expect_len: usize) {
2104 assert_eq!(expect_len, literal.len());
2105 assert_eq!(expect_len == 0, literal.is_empty());
2106
2107 let mut b = literal.clone().into_buf();
2108
2109 assert_eq!(expect_len, b.remaining());
2110 assert_eq!(expect_len == 0, !b.has_remaining());
2111
2112 let mut dst = vec![0u8; expect_len];
2113 b.copy_to_slice(&mut dst);
2114
2115 let s = String::from_utf8(dst).unwrap();
2116
2117 assert_eq!(literal.to_string(), s);
2118 assert_eq!(Into::<String>::into(literal), s);
2119 }
2120
2121 #[test]
2122 fn test_literal_compare() {
2123 let a_s = vec![
2124 Literal::from_static("a"),
2125 Literal::from_ref("a"),
2126 Literal::from_string("a".to_string()),
2127 Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(
2128 ["aaa"],
2129 1..2,
2130 )))),
2131 ];
2132 let aa_s: Vec<Literal> = vec![
2133 Literal::from_ref(&"a".repeat(INLINE_LEN)),
2134 Literal::from_string("a".repeat(INLINE_LEN)),
2135 Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(
2136 [[b'a'; INLINE_LEN]],
2137 0..INLINE_LEN,
2138 )))),
2139 Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(
2140 ["a"; INLINE_LEN],
2141 0..INLINE_LEN,
2142 )))),
2143 ];
2144 let aab_s: Vec<Literal> = vec![
2145 Literal::from_static(concat!(
2146 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
2147 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
2148 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
2149 "aaaaaaaaaaaaaab",
2150 )),
2151 Literal::from_ref(("a".repeat(u8::MAX as usize - 1) + "b").as_str()),
2152 Literal::from_string("a".repeat(u8::MAX as usize - 1) + "b"),
2153 Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(
2154 ["a".repeat(u8::MAX as usize - 1), "abc".to_string()],
2155 1..u8::MAX as usize + 1,
2156 )))),
2157 ];
2158
2159 macro_rules! assert_all_eq {
2160 ($a:expr, $b:expr) => {
2161 assert_eq!($a, $a);
2162 assert_eq!($b, $a);
2163 assert_eq!($a, $b);
2164 assert!($a <= $a);
2165 assert!(!($a < $a));
2166 assert!($a >= $a);
2167 assert!(!($a > $a));
2168 };
2169 }
2170
2171 macro_rules! assert_all_ne {
2172 ($a:expr, $b:expr) => {
2173 assert_ne!($a, $b);
2174 assert_ne!($b, $a);
2175 };
2176 }
2177
2178 macro_rules! assert_all_lt {
2179 ($a:expr, $b:expr) => {
2180 assert!($a < $b);
2181 assert!(!($b < $a));
2182 assert!(!($a > $b));
2183 assert!($b > $a);
2184 assert!($a <= $b);
2185 assert!($b >= $a);
2186 };
2187 }
2188
2189 macro_rules! assert_all_gt {
2190 ($a:expr, $b:expr) => {
2191 assert!($a > $b);
2192 assert!(!($b > $a));
2193 assert!(!($a < $b));
2194 assert!($b < $a);
2195 assert!($a >= $b);
2196 assert!($b <= $a);
2197 };
2198 }
2199
2200 for a in &a_s {
2201 assert_all_eq!(a, "a");
2202 assert_all_eq!(Unescaped::Literal(a), "a");
2203 assert_all_ne!(a, "ab");
2204 assert_all_ne!(Unescaped::Literal(a), "aa");
2205 assert_eq!(&"a", a);
2206 assert_eq!(&"a".to_string(), a);
2207 assert_eq!(a, &"a");
2208 assert_eq!(a, &"a".to_string());
2209
2210 assert!(a <= &"a");
2211 assert!(a <= &"a".to_string());
2212 assert!(!(a < &"a"));
2213 assert!(!(a < &"a".to_string()));
2214 assert!(a >= &"a");
2215 assert!(a >= &"a".to_string());
2216 assert!(!(a > &"a"));
2217 assert!(!(a > &"a".to_string()));
2218
2219 for other in aa_s.iter().chain(aab_s.iter()) {
2220 assert_all_ne!(a, other);
2221 assert_all_lt!(a, other);
2222 assert_all_gt!(other, a);
2223 }
2224 }
2225
2226 for aa in &aa_s {
2227 assert_all_eq!(aa, "a".repeat(INLINE_LEN).as_str());
2228 assert_all_eq!(Unescaped::Literal(aa), "a".repeat(INLINE_LEN).as_str());
2229 assert_all_ne!(aa, "aab");
2230 assert_all_ne!(Unescaped::Literal(aa), "aab");
2231
2232 assert_all_gt!(aa, "a");
2233 assert_all_gt!(Unescaped::Literal(aa), "a");
2234 assert_all_lt!(aa, "aab");
2235 assert_all_lt!(Unescaped::Literal(aa), "aab");
2236
2237 assert!(aa < &"aab");
2238 assert!(aa < &"aab".to_string());
2239 assert!(aa <= &"aab");
2240 assert!(aa <= &"aab".to_string());
2241 assert!(&"aab" > aa);
2242 assert!(&"aab".to_string() > aa);
2243 assert!(aa <= &"aab");
2244 assert!(aa <= &"aab".to_string());
2245 assert!(&"aab" > aa);
2246 assert!(&"aab".to_string() > aa);
2247
2248 for aab in &aab_s {
2249 assert_all_ne!(aa, aab);
2250 assert_all_lt!(aa, aab);
2251 assert_all_gt!(aab, aa);
2252 }
2253 }
2254
2255 macro_rules! check_map {
2256 ($map:ident, $patient_zero:expr, $iter:expr) => {
2257 assert!($map.insert($patient_zero, $patient_zero).is_none());
2258 for item in $iter {
2259 assert_eq!($patient_zero, *$map.get(&item).unwrap());
2260 }
2261 };
2262 }
2263
2264 let mut hash_map1 = HashMap::new();
2265
2266 check_map!(hash_map1, a_s[0].clone(), a_s.clone());
2267 check_map!(hash_map1, aa_s[0].clone(), aa_s.clone());
2268 check_map!(hash_map1, aab_s[0].clone(), aab_s.clone());
2269
2270 let mut hash_map2 = HashMap::new();
2271
2272 let unescaped_a = Unescaped::Literal(a_s[0].clone());
2273 let unescaped_aa = Unescaped::Literal(aa_s[0].clone());
2274 let unescaped_aab = Unescaped::Literal(aab_s[0].clone());
2275
2276 check_map!(
2277 hash_map2,
2278 unescaped_a.clone(),
2279 a_s.iter().cloned().map(Unescaped::Literal)
2280 );
2281 check_map!(
2282 hash_map2,
2283 unescaped_aa.clone(),
2284 aa_s.iter().cloned().map(Unescaped::Literal)
2285 );
2286 check_map!(
2287 hash_map2,
2288 unescaped_aab.clone(),
2289 aab_s.iter().cloned().map(Unescaped::Literal)
2290 );
2291
2292 let mut btree_map1 = BTreeMap::new();
2293
2294 check_map!(btree_map1, a_s[0].clone(), a_s.clone());
2295 check_map!(btree_map1, aa_s[0].clone(), aa_s.clone());
2296 check_map!(btree_map1, aab_s[0].clone(), aab_s.clone());
2297
2298 let mut btree_map2 = BTreeMap::new();
2299
2300 check_map!(
2301 btree_map2,
2302 unescaped_a.clone(),
2303 a_s.iter().cloned().map(Unescaped::Literal)
2304 );
2305 check_map!(
2306 btree_map2,
2307 unescaped_aa.clone(),
2308 aa_s.iter().cloned().map(Unescaped::Literal)
2309 );
2310 check_map!(
2311 btree_map2,
2312 unescaped_aab.clone(),
2313 aab_s.iter().cloned().map(Unescaped::Literal)
2314 );
2315 }
2316
2317 #[rstest]
2318 #[case(Literal::from_static(""))]
2319 #[case(Literal::from_ref(""))]
2320 #[case(Literal::from_string("".into()))]
2321 #[case(Literal(InnerLiteral::Uni(UniRef::test_new("", 0..0))))]
2322 #[case(Literal(InnerLiteral::Uni(UniRef::test_new("a", 1..1))))]
2323 #[case(Literal(InnerLiteral::Uni(UniRef::test_new("ab", 1..1))))]
2324 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["0"], 0..0)))))]
2325 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a"], 1..1)))))]
2326 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a", "b"], 1..1)))))]
2327 #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
2328 fn test_literal_buf_advance_panic(#[case] literal: Literal) {
2329 let _ = literal.into_buf().advance(1);
2330 }
2331
2332 #[rstest]
2333 #[case(Literal::from_static(""))]
2334 #[case(Literal::from_ref(""))]
2335 #[case(Literal::from_string("".into()))]
2336 #[case(Literal(InnerLiteral::Uni(UniRef::test_new("", 0..0))))]
2337 #[case(Literal(InnerLiteral::Uni(UniRef::test_new("a", 1..1))))]
2338 #[case(Literal(InnerLiteral::Uni(UniRef::test_new("ab", 1..1))))]
2339 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["0"], 0..0)))))]
2340 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a"], 1..1)))))]
2341 #[case(Literal(InnerLiteral::Multi(Box::new(MultiRef::test_new(["a", "b"], 1..1)))))]
2342 #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
2343 fn test_literal_buf_copy_to_slice_panic(#[case] literal: Literal) {
2344 let mut dst = [0; 1];
2345
2346 let _ = literal.into_buf().copy_to_slice(&mut dst);
2347 }
2348
2349 #[rstest]
2350 #[case(Content::from_static(""), "", None)]
2351 #[case(Content::from_static(""), "", None)]
2352 #[case(
2353 Content::from_static(concat!(
2354 "................................................................................",
2355 ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
2356 "________________________________________________________________________________",
2357 "+++++++++++++++",
2358 )),
2359 concat!(
2360 "................................................................................",
2361 ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
2362 "________________________________________________________________________________",
2363 "+++++++++++++++",
2364 ),
2365 None,
2366 )]
2367 #[case(Content(InnerContent::Inline(0, [0; INLINE_LEN])), "", None)]
2368 #[case(Content(InnerContent::NotEscapedUni(UniRef::test_new("", 0..0))), "", None)]
2369 #[case(Content(InnerContent::NotEscapedUni(UniRef::test_new("foo", 0..3))), "foo", None)]
2370 #[case(Content(InnerContent::NotEscapedUni(UniRef::test_new("a barge", 2..5))), "bar", None)]
2371 #[case(Content(InnerContent::NotEscapedMulti(Box::new(MultiRef::test_new([""], 0..0)))), "", None)]
2372 #[case(Content(InnerContent::NotEscapedMulti(Box::new(MultiRef::test_new(["a b", "a", "rge"], 2..5)))), "bar", None)]
2373 #[case(Content(InnerContent::EscapedUni(UniRef::test_new("", 0..0))), "", Some(""))]
2374 #[case(Content(InnerContent::EscapedUni(UniRef::test_new("foo", 0..3))), "foo", Some("foo"))]
2375 #[case(Content(InnerContent::EscapedUni(UniRef::test_new("a b\\u0061rge", 2..10))), "b\\u0061r", Some("bar"))]
2376 #[case(Content(InnerContent::EscapedMulti(Box::new(MultiRef::test_new([""], 0..0)))), "", Some(""))]
2377 #[case(Content(InnerContent::EscapedMulti(Box::new(MultiRef::test_new(["tomf", "oo", "lery"], 3..6)))), "foo", Some("foo"))]
2378 #[case(Content(InnerContent::EscapedMulti(Box::new(MultiRef::test_new(["\\", "u", "006", "6\\u", "0", "06", "fox"], 0..13)))), "\\u0066\\u006fo", Some("foo"))]
2379 #[case(Content::from_bufs(&Bufs::new(Bufs::MIN_BUF_SIZE), 0..0, false), "", None)]
2380 #[case(Content::from_bufs(&Bufs::new(Bufs::MIN_BUF_SIZE), 0..0, true), "", Some(""))]
2381 fn test_content(
2382 #[case] content: Content,
2383 #[case] expect_literal: &str,
2384 #[case] expect_unescaped: Option<&str>,
2385 ) {
2386 assert_eq!(expect_literal, content.literal().into_string());
2387 assert_eq!(expect_unescaped.is_some(), content.is_escaped());
2388 if let Some(expect) = expect_unescaped {
2389 assert_eq!(expect, content.unescaped().into_string());
2390 }
2391 }
2392
2393 #[rstest]
2394 #[case(
2395 ErrorKind::Read,
2396 "read error at line 2, column 1 (offset: 3)",
2397 Some(io::ErrorKind::BrokenPipe)
2398 )]
2399 #[case(
2400 ErrorKind::UnexpectedEof(Token::LitNull),
2401 "unexpected EOF in null token at line 2, column 1 (offset: 3)",
2402 None
2403 )]
2404 fn test_error(
2405 #[case] kind: ErrorKind,
2406 #[case] expect_display: &str,
2407 #[case] source: Option<std::io::ErrorKind>,
2408 ) {
2409 let pos = Pos::new(3, 2, 1);
2410 let err = Error {
2411 kind,
2412 pos,
2413 source: source.map(io::Error::from).map(Arc::new),
2414 };
2415
2416 assert_eq!(kind, err.kind());
2417 assert_eq!(&pos, err.pos());
2418 assert_eq!(
2419 source,
2420 err.source()
2421 .and_then(|e| e.downcast_ref::<io::Error>())
2422 .map(|e| e.kind()),
2423 );
2424
2425 let actual_display = format!("{err}");
2426 assert_eq!(expect_display, actual_display);
2427 }
2428
2429 #[test]
2430 #[should_panic(expected = "buffer size too low: minimum is 1 bytes, but 0 was given")]
2431 fn test_bufs_new_panic() {
2432 let _ = Bufs::new(0);
2433 }
2434
2435 #[test]
2436 fn test_bufs_new_reset() {
2437 let mut bufs = Bufs::new(Bufs::MIN_BUF_SIZE);
2438
2439 bufs.reset();
2440
2441 assert!(bufs.current.is_empty());
2442 assert!(bufs.used.is_empty());
2443 assert_eq!(0, bufs.i);
2444 assert_eq!(0, bufs.j);
2445 assert_eq!(0, bufs.k);
2446 assert!(bufs.maybe_free.is_empty());
2447 assert_eq!(Bufs::MIN_BUF_SIZE, bufs.buf_size);
2448 assert!(!bufs.eof);
2449
2450 assert!(bufs.byte().is_none());
2451 }
2452
2453 #[test]
2454 fn test_bufs_new_byte() {
2455 let mut bufs = Bufs::new(Bufs::MIN_BUF_SIZE);
2456
2457 assert!(bufs.byte().is_none());
2458
2459 assert!(bufs.current.is_empty());
2460 assert!(bufs.used.is_empty());
2461 assert_eq!(0, bufs.i);
2462 assert_eq!(0, bufs.j);
2463 assert_eq!(0, bufs.k);
2464 assert!(bufs.maybe_free.is_empty());
2465 assert_eq!(Bufs::MIN_BUF_SIZE, bufs.buf_size);
2466 assert!(!bufs.eof);
2467 }
2468
2469 #[test]
2470 fn test_bufs_read_empty() {
2471 let mut bufs = Bufs::new(Bufs::MIN_BUF_SIZE);
2472 let mut empty: &[u8] = &[];
2473
2474 assert!(matches!(bufs.read(&mut empty), Ok(true)));
2475
2476 assert!(bufs.current.is_empty());
2477 assert!(bufs.used.is_empty());
2478 assert_eq!(0, bufs.i);
2479 assert_eq!(0, bufs.j);
2480 assert_eq!(0, bufs.k);
2481 assert!(bufs.maybe_free.is_empty());
2482 assert_eq!(Bufs::MIN_BUF_SIZE, bufs.buf_size);
2483 assert!(bufs.eof);
2484
2485 assert!(matches!(bufs.read(&mut empty), Ok(true)));
2486 }
2487
2488 #[rstest]
2489 #[case(Bufs::MIN_BUF_SIZE, "a", 0)]
2490 #[case(Bufs::DEFAULT_BUF_SIZE, "b", 0)]
2491 #[case(Bufs::MIN_BUF_SIZE, "foo", 2)]
2492 #[case(Bufs::DEFAULT_BUF_SIZE, "bar", 0)]
2493 fn test_bufs_read_to_end(
2494 #[case] buf_size: usize,
2495 #[case] input: &str,
2496 #[case] expect_used: usize,
2497 ) {
2498 let mut bufs = Bufs::new(buf_size);
2499 let mut reader = input.as_bytes();
2500 let mut dst = Vec::with_capacity(input.len());
2501
2502 loop {
2503 assert!(bufs.used.len() <= expect_used);
2504
2505 loop {
2506 match bufs.byte() {
2507 Some(b) => dst.push(b),
2508 None => break,
2509 }
2510 }
2511
2512 match bufs.read(&mut reader) {
2513 Ok(true) => break,
2514 Ok(false) => continue,
2515 Err(err) => panic!("unexpected error: {err},"),
2516 }
2517 }
2518
2519 assert!(bufs.eof);
2520 assert_eq!(input, str::from_utf8(&dst).unwrap());
2521 assert_eq!(expect_used, bufs.used.len());
2522 assert_eq!(buf_size, bufs.current.capacity());
2523 bufs.used.iter().enumerate().for_each(|(i, u)| {
2524 assert_eq!(
2525 buf_size,
2526 u.len(),
2527 "expected used[{i}] to have length {buf_size}, but it is {}",
2528 u.len()
2529 )
2530 });
2531 }
2532
2533 #[test]
2534 #[should_panic(expected = "read 2 bytes but buffer size is only 1")]
2535 fn test_bufs_read_too_much() {
2536 struct ReadTooMuch;
2537
2538 impl Read for ReadTooMuch {
2539 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
2540 Ok(buf.len() + 1)
2541 }
2542 }
2543
2544 let mut bufs = Bufs::new(Bufs::MIN_BUF_SIZE);
2545 let mut reader = ReadTooMuch;
2546
2547 let _ = bufs.read(&mut reader);
2548 }
2549
2550 #[test]
2551 fn test_bufs_read_error() {
2552 struct ReadError;
2553
2554 impl Read for ReadError {
2555 fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
2556 Err(io::Error::new(io::ErrorKind::Other, "snafu"))
2557 }
2558 }
2559
2560 let mut bufs = Bufs::new(Bufs::MIN_BUF_SIZE);
2561 let mut reader = ReadError;
2562
2563 let result = bufs.read(&mut reader);
2564
2565 assert!(
2566 matches!(result, Err(err) if err.kind() == io::ErrorKind::Other && err.to_string() == "snafu")
2567 );
2568 }
2569
2570 #[test]
2571 fn test_analyzer_empty() {
2572 let mut an = ReadAnalyzer::new(io::empty());
2573
2574 assert_eq!(an.next(), Token::Eof);
2575 assert_eq!("", an.content().literal().into_string());
2576 assert_eq!("", an.content().unescaped().into_string());
2577 }
2578
2579 #[test]
2580 fn test_analyzer_initial_state_content() {
2581 let an = ReadAnalyzer::new(io::empty());
2582
2583 for _ in 0..5 {
2584 let content = an.content();
2585 assert_eq!("", content.literal().into_string());
2586 assert!(!content.is_escaped());
2587 assert_eq!("", content.unescaped().into_string());
2588
2589 let content = an.try_content().unwrap();
2590 assert_eq!("", content.literal().into_string());
2591 assert!(!content.is_escaped());
2592 assert_eq!("", content.unescaped().into_string());
2593 }
2594 }
2595
2596 #[test]
2597 #[should_panic(
2598 expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
2599 )]
2600 fn test_analyzer_initial_state_err() {
2601 let _ = ReadAnalyzer::new(io::empty()).err();
2602 }
2603
2604 #[rstest]
2605 #[case("", Token::Eof, None)]
2606 #[case("{", Token::ObjBegin, None)]
2607 #[case("}", Token::ObjEnd, None)]
2608 #[case("[", Token::ArrBegin, None)]
2609 #[case("]", Token::ArrEnd, None)]
2610 #[case(":", Token::NameSep, None)]
2611 #[case(",", Token::ValueSep, None)]
2612 #[case("false", Token::LitFalse, None)]
2613 #[case("null", Token::LitNull, None)]
2614 #[case("true", Token::LitTrue, None)]
2615 #[case("0", Token::Num, None)]
2616 #[case("-0", Token::Num, None)]
2617 #[case("1", Token::Num, None)]
2618 #[case("-1", Token::Num, None)]
2619 #[case("12", Token::Num, None)]
2620 #[case("-12", Token::Num, None)]
2621 #[case("0.0", Token::Num, None)]
2622 #[case("-0.0", Token::Num, None)]
2623 #[case("0.123456789", Token::Num, None)]
2624 #[case("-123.456789", Token::Num, None)]
2625 #[case("0E0", Token::Num, None)]
2626 #[case("0e0", Token::Num, None)]
2627 #[case("0E+0", Token::Num, None)]
2628 #[case("0e+0", Token::Num, None)]
2629 #[case("0E-0", Token::Num, None)]
2630 #[case("0e-0", Token::Num, None)]
2631 #[case("0.0E0", Token::Num, None)]
2632 #[case("0.0e0", Token::Num, None)]
2633 #[case("0.0E+0", Token::Num, None)]
2634 #[case("0.0e+0", Token::Num, None)]
2635 #[case("0.0E0", Token::Num, None)]
2636 #[case("0.0e0", Token::Num, None)]
2637 #[case("0E0", Token::Num, None)]
2638 #[case("0e0", Token::Num, None)]
2639 #[case("-0E+0", Token::Num, None)]
2640 #[case("-0e+0", Token::Num, None)]
2641 #[case("-0E-0", Token::Num, None)]
2642 #[case("-0e-0", Token::Num, None)]
2643 #[case("-0.0E0", Token::Num, None)]
2644 #[case("-0.0e0", Token::Num, None)]
2645 #[case("-0.0E+0", Token::Num, None)]
2646 #[case("-0.0e+0", Token::Num, None)]
2647 #[case("-0.0E0", Token::Num, None)]
2648 #[case("-0.0e0", Token::Num, None)]
2649 #[case("123E456", Token::Num, None)]
2650 #[case("123e456", Token::Num, None)]
2651 #[case("123.456E+7", Token::Num, None)]
2652 #[case("123.456e+7", Token::Num, None)]
2653 #[case("123.456E-89", Token::Num, None)]
2654 #[case("123.456e-89", Token::Num, None)]
2655 #[case("-123E456", Token::Num, None)]
2656 #[case("-123e456", Token::Num, None)]
2657 #[case("-123.456E+7", Token::Num, None)]
2658 #[case("-123.456e+7", Token::Num, None)]
2659 #[case("-123.456E-89", Token::Num, None)]
2660 #[case("-123.456e-89", Token::Num, None)]
2661 #[case(r#""""#, Token::Str, None)]
2662 #[case(r#"" ""#, Token::Str, None)]
2663 #[case(r#""foo""#, Token::Str, None)]
2664 #[case(r#""The quick brown fox jumped over the lazy dog!""#, Token::Str, None)]
2665 #[case(r#""\\""#, Token::Str, Some(r#""\""#))]
2666 #[case(r#""\/""#, Token::Str, Some(r#""/""#))]
2667 #[case(r#""\t""#, Token::Str, Some("\"\t\""))]
2668 #[case(r#""\r""#, Token::Str, Some("\"\r\""))]
2669 #[case(r#""\n""#, Token::Str, Some("\"\n\""))]
2670 #[case(r#""\f""#, Token::Str, Some("\"\u{000c}\""))]
2671 #[case(r#""\b""#, Token::Str, Some("\"\u{0008}\""))]
2672 #[case(r#""\u0000""#, Token::Str, Some("\"\u{0000}\""))]
2673 #[case(r#""\u001f""#, Token::Str, Some("\"\u{001f}\""))]
2674 #[case(r#""\u0020""#, Token::Str, Some(r#"" ""#))]
2675 #[case(r#""\u007E""#, Token::Str, Some(r#""~""#))]
2676 #[case(r#""\u007F""#, Token::Str, Some("\"\u{007f}\""))]
2677 #[case(r#""\u0080""#, Token::Str, Some("\"\u{0080}\""))]
2678 #[case(r#""\u0100""#, Token::Str, Some("\"\u{0100}\""))]
2679 #[case(r#""\uE000""#, Token::Str, Some("\"\u{e000}\""))]
2680 #[case(r#""\ufDCf""#, Token::Str, Some("\"\u{fdcf}\""))]
2681 #[case(r#""\uFdeF""#, Token::Str, Some("\"\u{fdef}\""))]
2682 #[case(r#""\ufffd""#, Token::Str, Some("\"\u{fffd}\""))]
2683 #[case(r#""\uFFFE""#, Token::Str, Some("\"\u{fffe}\""))]
2684 #[case(r#""\uFFFF""#, Token::Str, Some("\"\u{ffff}\""))]
2685 #[case(r#""\ud800\udc00""#, Token::Str, Some("\"\u{10000}\""))] #[case(r#""\uD800\uDFFF""#, Token::Str, Some("\"\u{103ff}\""))] #[case(r#""\uDBFF\uDC00""#, Token::Str, Some("\"\u{10fc00}\""))] #[case(r#""\udbFf\udfff""#, Token::Str, Some("\"\u{10ffff}\""))] #[case(" ", Token::White, None)]
2690 #[case("\t", Token::White, None)]
2691 #[case(" ", Token::White, None)]
2692 #[case("\t\t", Token::White, None)]
2693 #[case(" \t \t \t \t\t", Token::White, None)]
2694 fn test_analyzer_single_token(
2695 #[case] input: &str,
2696 #[case] expect: Token,
2697 #[case] unescaped: Option<&str>,
2698 ) {
2699 const BUF_SIZES: [usize; 7] = [
2700 1,
2701 2,
2702 INLINE_LEN - 1,
2703 INLINE_LEN,
2704 INLINE_LEN + 1,
2705 10,
2706 Bufs::DEFAULT_BUF_SIZE,
2707 ];
2708
2709 for buf_size in BUF_SIZES {
2710 {
2712 let mut an =
2713 ReadAnalyzer::with_buf_size(io::Cursor::new(input.as_bytes()), buf_size);
2714 assert_eq!(Pos::default(), *an.pos());
2715
2716 assert_eq!(expect, an.next());
2717 assert_eq!(Pos::default(), *an.pos());
2718
2719 let content = an.content();
2720 assert_eq!(
2721 input,
2722 content.literal().into_string(),
2723 "buf_size = {buf_size}, input = {input:?}, content = {content}"
2724 );
2725 assert_eq!(unescaped.is_some(), content.is_escaped());
2726 if let Some(u) = unescaped {
2727 assert_eq!(u, content.unescaped().into_string());
2728 } else {
2729 assert_eq!(input, content.unescaped().into_string());
2730 }
2731
2732 assert_eq!(Token::Eof, an.next());
2733 assert_eq!(
2734 Pos {
2735 offset: input.len(),
2736 line: 1,
2737 col: input.len() + 1,
2738 },
2739 *an.pos()
2740 );
2741
2742 assert_eq!(Token::Eof, an.next());
2743 assert_eq!(
2744 Pos {
2745 offset: input.len(),
2746 line: 1,
2747 col: input.len() + 1,
2748 },
2749 *an.pos()
2750 );
2751 }
2752
2753 {
2755 let mut an =
2756 ReadAnalyzer::with_buf_size(io::Cursor::new(input.as_bytes()), buf_size);
2757 assert_eq!(Pos::default(), *an.pos());
2758
2759 assert_eq!(expect, an.next());
2760 assert_eq!(Pos::default(), *an.pos());
2761
2762 assert_eq!(Token::Eof, an.next());
2763 assert_eq!(
2764 Pos {
2765 offset: input.len(),
2766 line: 1,
2767 col: input.len() + 1,
2768 },
2769 *an.pos()
2770 );
2771
2772 assert_eq!(Token::Eof, an.next());
2773 assert_eq!(
2774 Pos {
2775 offset: input.len(),
2776 line: 1,
2777 col: input.len() + 1,
2778 },
2779 *an.pos()
2780 );
2781 }
2782 }
2783 }
2784
2785 #[rstest]
2786 #[case(r#"["#)]
2787 #[case(r#"]"#)]
2788 #[case(r#"false"#)]
2789 #[case(r#":"#)]
2790 #[case(r#"null"#)]
2791 #[case(r#"3.14159e+0"#)]
2792 #[case(r#"{"#)]
2793 #[case(r#"}"#)]
2794 #[case(r#""foo\/\u1234\/bar""#)]
2795 #[case(r#"true"#)]
2796 #[case(r#","#)]
2797 #[case("\n\n\n ")]
2798 #[should_panic(
2799 expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
2800 )]
2801 fn test_analyzer_single_token_panic_no_err(#[case] input: &str) {
2802 const BUF_SIZES: [usize; 7] = [
2803 1,
2804 2,
2805 INLINE_LEN - 1,
2806 INLINE_LEN,
2807 INLINE_LEN + 1,
2808 10,
2809 Bufs::DEFAULT_BUF_SIZE,
2810 ];
2811
2812 for buf_size in BUF_SIZES {
2813 let mut an = ReadAnalyzer::with_buf_size(io::Cursor::new(input.as_bytes()), buf_size);
2814
2815 let token = an.next();
2816 assert!(!token.is_terminal(), "input = {input:?}, token = {token:?}");
2817
2818 let _ = an.err();
2819 }
2820 }
2821
2822 #[test]
2823 #[should_panic(expected = "last `next()` returned `Token::Err` (use `err()` instead)")]
2824 fn test_analyzer_single_error_panic_no_content() {
2825 let mut an = ReadAnalyzer::new("a".as_bytes());
2826
2827 assert_eq!(Token::Err, an.next());
2828
2829 let _ = an.content();
2830 }
2831
2832 #[rstest]
2833 #[case(r#""\uDC00""#, ErrorKind::BadSurrogate { first: 0xdc00, second: None, offset: 5 }, 1)]
2834 #[case(&[b'"', 0xc2, 0xc0], ErrorKind::BadUtf8ContByte { seq_len: 2, offset: 1, value: 0xc0 }, 1)]
2835 #[case(&b"\"\x80", ErrorKind::UnexpectedByte { token: Some(Token::Str), expect: Expect::StrChar, actual: 0x80 }, 1)]
2836 #[case([b'"'], ErrorKind::UnexpectedEof(Token::Str), 1)]
2837 #[case("10.", ErrorKind::UnexpectedEof(Token::Num), 3)]
2838 fn test_analyzer_single_lexical_error<T>(
2839 #[case] input: T,
2840 #[case] kind: ErrorKind,
2841 #[case] pos_offset: usize,
2842 ) where
2843 T: AsRef<[u8]> + fmt::Debug,
2844 {
2845 const BUF_SIZES: [usize; 7] = [
2846 1,
2847 2,
2848 INLINE_LEN - 1,
2849 INLINE_LEN,
2850 INLINE_LEN + 1,
2851 10,
2852 Bufs::DEFAULT_BUF_SIZE,
2853 ];
2854
2855 for buf_size in BUF_SIZES {
2856 {
2858 let mut an = ReadAnalyzer::with_buf_size(input.as_ref(), buf_size);
2859 assert_eq!(Pos::default(), *an.pos());
2860
2861 assert_eq!(Token::Err, an.next());
2862 assert_eq!(Pos::default(), *an.pos());
2863
2864 let err = an.err();
2865 assert_eq!(kind, err.kind());
2866 assert_eq!(
2867 Pos {
2868 offset: pos_offset,
2869 line: 1,
2870 col: pos_offset + 1
2871 },
2872 *err.pos()
2873 );
2874 assert!(err.source().is_none());
2875
2876 assert_eq!(Token::Err, an.next());
2877 assert_eq!(Pos::default(), *an.pos());
2878 }
2879
2880 {
2882 let mut an = ReadAnalyzer::with_buf_size(input.as_ref(), buf_size);
2883 assert_eq!(Pos::default(), *an.pos());
2884
2885 assert_eq!(Token::Err, an.next());
2886 assert_eq!(Pos::default(), *an.pos());
2887
2888 assert_eq!(Token::Err, an.next());
2889 assert_eq!(Pos::default(), *an.pos());
2890 }
2891 }
2892 }
2893
2894 #[rstest]
2895 #[case(1, r#"{"#, [Token::ObjBegin], Pos::new(1, 1, 2), Pos::new(1, 1, 2))]
2896 #[case(1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2897 #[case(2, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2898 #[case(Bufs::DEFAULT_BUF_SIZE, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2899 #[case(1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2900 #[case(2, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2901 #[case(1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2902 #[case(2, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2903 #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2904 #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
2905 #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2906 #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
2907 #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2908 #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279E+999 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(40, 1, 41), Pos::new(40, 1, 41))]
2909 #[case(Bufs::DEFAULT_BUF_SIZE, r#"[3141.592653589793238462643383279e-3,{"aaaaaaaaaaaaaaaaaaaaaaaaaaaa":true}] "#, [Token::ArrBegin, Token::Num, Token::ValueSep, Token::ObjBegin, Token::Str, Token::NameSep, Token::LitTrue, Token::ObjEnd, Token::ArrEnd], Pos::new(75, 1, 76), Pos::new(79, 1, 80))]
2910 fn test_analyzer_single_read_error<T>(
2911 #[case] buf_size: usize,
2912 #[case] input: &str,
2913 #[case] expect_tokens: T,
2914 #[case] expect_token_pos: Pos,
2915 #[case] expect_err_pos: Pos,
2916 ) where
2917 T: IntoIterator<Item = Token>,
2918 {
2919 struct ErrorRead<'a>(&'a [u8]);
2920
2921 impl<'a> Read for ErrorRead<'a> {
2922 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
2923 let n = min(buf.len(), self.0.len());
2924 if n == 0 {
2925 Err(io::Error::new(io::ErrorKind::Other, "snafu"))
2926 } else {
2927 buf[..n].copy_from_slice(&self.0[..n]);
2928 self.0 = &self.0[n..];
2929
2930 Ok(n)
2931 }
2932 }
2933 }
2934
2935 let reader = ErrorRead(input.as_bytes());
2936 let mut an = ReadAnalyzer::with_buf_size(reader, buf_size);
2937
2938 for expect_token in expect_tokens.into_iter() {
2939 let actual_token = an.next();
2940
2941 assert_eq!(expect_token, actual_token);
2942 }
2943
2944 assert_eq!(Token::Err, an.next());
2945 assert_eq!(expect_token_pos, *an.pos());
2946 let err = an.err();
2947 assert_eq!(ErrorKind::Read, err.kind());
2948 assert_eq!(expect_err_pos, *err.pos());
2949
2950 assert_eq!(Token::Err, an.next());
2951 assert_eq!(expect_token_pos, *an.pos());
2952 let err = an.try_content().unwrap_err();
2953 assert_eq!(ErrorKind::Read, err.kind());
2954 assert_eq!(expect_err_pos, *err.pos());
2955 assert_eq!(
2956 io::ErrorKind::Other,
2957 err.source()
2958 .and_then(|e| e.downcast_ref::<io::Error>())
2959 .map(|e| e.kind())
2960 .unwrap(),
2961 );
2962 assert_eq!("snafu", format!("{}", err.source().unwrap()));
2963
2964 assert_eq!(Token::Err, an.next());
2965 }
2966
2967 #[rstest]
2968 #[case(1)]
2969 #[case(2)]
2970 #[case(INLINE_LEN - 1)]
2971 #[case(INLINE_LEN)]
2972 #[case(INLINE_LEN + 1)]
2973 #[case(Bufs::DEFAULT_BUF_SIZE)]
2974 fn test_analyzer_into_parser(#[case] buf_size: usize) {
2975 let input = r#"{"hello":["🌍"]}"#;
2976 let mut parser = ReadAnalyzer::with_buf_size(input.as_bytes(), buf_size).into_parser();
2977
2978 assert_eq!(Token::ObjBegin, parser.next());
2979 assert_eq!("{", parser.content().literal());
2980 assert_eq!(Pos::default(), *parser.pos());
2981 assert_eq!(1, parser.level());
2982
2983 assert_eq!(Token::Str, parser.next());
2984 assert_eq!(r#""hello""#, parser.content().literal());
2985 assert_eq!(Pos::new(1, 1, 2), *parser.pos());
2986 assert_eq!(1, parser.level());
2987
2988 assert_eq!(Token::NameSep, parser.next());
2989 assert_eq!(":", parser.content().literal());
2990 assert_eq!(Pos::new(8, 1, 9), *parser.pos());
2991 assert_eq!(1, parser.level());
2992
2993 assert_eq!(Token::ArrBegin, parser.next());
2994 assert_eq!("[", parser.content().literal());
2995 assert_eq!(Pos::new(9, 1, 10), *parser.pos());
2996 assert_eq!(2, parser.level());
2997
2998 assert_eq!(Token::Str, parser.next());
2999 assert_eq!(r#""🌍""#, parser.content().literal());
3000 assert_eq!(Pos::new(10, 1, 11), *parser.pos());
3001 assert_eq!(2, parser.level());
3002
3003 assert_eq!(Token::ArrEnd, parser.next());
3004 assert_eq!("]", parser.content().literal());
3005 assert_eq!(Pos::new(16, 1, 14), *parser.pos());
3006 assert_eq!(1, parser.level());
3007
3008 assert_eq!(Token::ObjEnd, parser.next());
3009 assert_eq!("}", parser.content().literal());
3010 assert_eq!(Pos::new(17, 1, 15), *parser.pos());
3011 assert_eq!(0, parser.level());
3012
3013 for _ in 0..5 {
3014 assert_eq!(Token::Eof, parser.next());
3015 assert_eq!(Pos::new(18, 1, 16), *parser.pos());
3016 assert_eq!(0, parser.level());
3017 }
3018 }
3019
3020 #[rstest]
3021 #[case(1)]
3022 #[case(2)]
3023 #[case(INLINE_LEN - 1)]
3024 #[case(INLINE_LEN)]
3025 #[case(INLINE_LEN + 1)]
3026 #[case(Bufs::DEFAULT_BUF_SIZE)]
3027 fn test_analyzer_smoke(#[case] buf_size: usize) {
3028 const JSON_TEXT: &str = r#"
3029
3030[
3031 [],
3032 {},
3033 [true, false, null, "foo",-9, -9.9, -99.99e-99, {"❤️😊":1}, 10000000],
3034 "\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064",
3035 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\nUt labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\nLaboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in."
3036]"#;
3037
3038 const EXPECT: &[(Token, Pos, &str, Option<&str>)] = &[
3039 (Token::White, Pos::new(0, 1, 1), "\n\n", None),
3041 (Token::ArrBegin, Pos::new(2, 3, 1), "[", None),
3043 (Token::White, Pos::new(3, 3, 2), "\n ", None),
3044 (Token::ArrBegin, Pos::new(6, 4, 3), "[", None),
3046 (Token::ArrEnd, Pos::new(7, 4, 4), "]", None),
3047 (Token::ValueSep, Pos::new(8, 4, 5), ",", None),
3048 (Token::White, Pos::new(9, 4, 6), "\n ", None),
3049 (Token::ObjBegin, Pos::new(12, 5, 3), "{", None),
3051 (Token::ObjEnd, Pos::new(13, 5, 4), "}", None),
3052 (Token::ValueSep, Pos::new(14, 5, 5), ",", None),
3053 (Token::White, Pos::new(15, 5, 6), "\n ", None),
3054 (Token::ArrBegin, Pos::new(18, 6, 3), "[", None),
3056 (Token::LitTrue, Pos::new(19, 6, 4), "true", None),
3057 (Token::ValueSep, Pos::new(23, 6, 8), ",", None),
3058 (Token::White, Pos::new(24, 6, 9), " ", None),
3059 (Token::LitFalse, Pos::new(25, 6, 10), "false", None),
3060 (Token::ValueSep, Pos::new(30, 6, 15), ",", None),
3061 (Token::White, Pos::new(31, 6, 16), " ", None),
3062 (Token::LitNull, Pos::new(32, 6, 17), "null", None),
3063 (Token::ValueSep, Pos::new(36, 6, 21), ",", None),
3064 (Token::White, Pos::new(37, 6, 22), " ", None),
3065 (Token::Str, Pos::new(38, 6, 23), r#""foo""#, None),
3066 (Token::ValueSep, Pos::new(43, 6, 28), ",", None),
3067 (Token::Num, Pos::new(44, 6, 29), "-9", None),
3068 (Token::ValueSep, Pos::new(46, 6, 31), ",", None),
3069 (Token::White, Pos::new(47, 6, 32), " ", None),
3070 (Token::Num, Pos::new(48, 6, 33), "-9.9", None),
3071 (Token::ValueSep, Pos::new(52, 6, 37), ",", None),
3072 (Token::White, Pos::new(53, 6, 38), " ", None),
3073 (Token::Num, Pos::new(54, 6, 39), "-99.99e-99", None),
3074 (Token::ValueSep, Pos::new(64, 6, 49), ",", None),
3075 (Token::White, Pos::new(65, 6, 50), " ", None),
3076 (Token::ObjBegin, Pos::new(66, 6, 51), "{", None),
3077 (Token::Str, Pos::new(67, 6, 52), r#""❤️😊""#, None),
3078 (Token::NameSep, Pos::new(79, 6, 57), ":", None),
3079 (Token::Num, Pos::new(80, 6, 58), "1", None),
3080 (Token::ObjEnd, Pos::new(81, 6, 59), "}", None),
3081 (Token::ValueSep, Pos::new(82, 6, 60), ",", None),
3082 (Token::White, Pos::new(83, 6, 61), " ", None),
3083 (Token::Num, Pos::new(84, 6, 62), "10000000", None),
3084 (Token::ArrEnd, Pos::new(92, 6, 70), "]", None),
3085 (Token::ValueSep, Pos::new(93, 6, 71), ",", None),
3086 (Token::White, Pos::new(94, 6, 72), "\n ", None),
3087 (
3089 Token::Str,
3090 Pos::new(97, 7, 3),
3091 r#""\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064""#,
3092 Some(r#""hello, world""#),
3093 ),
3094 (Token::ValueSep, Pos::new(171, 7, 77), ",", None),
3095 (Token::White, Pos::new(172, 7, 78), "\n ", None),
3096 (
3098 Token::Str,
3099 Pos::new(175, 8, 3),
3100 concat!(
3101 r#""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n"#,
3102 r#"Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n"#,
3103 r#"Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.""#,
3104 ),
3105 Some(concat!(
3106 "\"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n",
3107 "Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n",
3108 "Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.\"",
3109 )),
3110 ),
3111 (Token::White, Pos::new(455, 8, 283), "\n", None),
3113 (Token::ArrEnd, Pos::new(456, 9, 1), "]", None),
3114 (Token::Eof, Pos::new(457, 9, 2), "", None),
3115 ];
3116
3117 let mut an = ReadAnalyzer::with_buf_size(JSON_TEXT.as_bytes(), buf_size);
3118
3119 for (i, (expect_token, expect_pos, expect_literal, expect_unescaped)) in
3120 EXPECT.iter().enumerate()
3121 {
3122 let actual_token = an.next();
3123 let actual_pos = *an.pos();
3124 let content = an.content();
3125
3126 assert_eq!(
3127 *expect_token, actual_token,
3128 "i = {i}, actual_pos = {actual_pos}, expect_pos = {expect_pos}"
3129 );
3130 assert_eq!(
3131 *expect_pos, actual_pos,
3132 "i = {i}, token = {actual_token}, content = {content}"
3133 );
3134 assert_eq!(
3135 *expect_literal,
3136 content.literal(),
3137 "i = {i}, token = {actual_token}, expect_literal = {expect_literal:?}, content.literal() = {}",
3138 content.literal(),
3139 );
3140 if let Some(u) = expect_unescaped {
3141 assert!(
3142 content.is_escaped(),
3143 "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
3144 );
3145 assert_eq!(*u, content.unescaped());
3146 } else {
3147 assert!(
3148 !content.is_escaped(),
3149 "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
3150 );
3151 assert_eq!(*expect_literal, content.unescaped());
3152 }
3153 }
3154 }
3155
3156 #[rstest]
3157 #[case(29)]
3158 #[case(30)]
3159 #[case(31)]
3160 fn test_analyzer_replace_buf(#[case] buf_size: usize) {
3161 const INPUT: &str = r#"["_________xxxxxxxxxx_______", true ,1.000000001111111111000000000] null"#;
3182 let mut an = ReadAnalyzer::with_buf_size(INPUT.as_bytes(), buf_size);
3183
3184 assert_eq!(Token::ArrBegin, an.next());
3186 assert_eq!(Token::Str, an.next());
3187 let str_content = an.content();
3188 assert_eq!(r#""_________xxxxxxxxxx_______""#, str_content.literal());
3189 assert_eq!(Token::ValueSep, an.next());
3190
3191 assert_eq!(Token::White, an.next());
3193 assert_eq!(Token::LitTrue, an.next());
3194 assert_eq!(Token::White, an.next());
3195 assert_eq!(Token::ValueSep, an.next());
3196
3197 assert_eq!(Token::Num, an.next());
3199
3200 drop(str_content);
3203
3204 assert_eq!(Token::ArrEnd, an.next());
3206
3207 assert_eq!(Token::White, an.next());
3209 assert_eq!(Token::LitNull, an.next());
3210 assert_eq!(Token::Eof, an.next());
3211 }
3212
3213 trait IntoString {
3214 fn into_string(self) -> String;
3215 }
3216
3217 impl<T: IntoBuf> IntoString for T {
3218 fn into_string(self) -> String {
3219 let mut src = self.into_buf();
3220 let mut dst = Vec::with_capacity(src.remaining());
3221 while src.remaining() > 0 {
3222 let chunk = src.chunk();
3223 dst.extend_from_slice(chunk);
3224 src.advance(chunk.len());
3225 }
3226
3227 String::from_utf8(dst).expect("valid UTF-8")
3228 }
3229 }
3230}