1use crate::{
24 Buf, BufUnderflow, EqStr, IntoBuf, OrdStr, Pos,
25 lexical::{self, ErrorKind, Token, Unescaped, state},
26 syntax,
27};
28use bytes::{Buf as _, Bytes};
29use smallvec::{SmallVec, smallvec};
30use std::{
31 borrow::Cow,
32 cmp::Ordering,
33 convert::Infallible,
34 fmt,
35 hash::{Hash, Hasher},
36 mem::MaybeUninit,
37 str::FromStr,
38 sync::Arc,
39};
40
41#[cfg(test)]
45const INLINE_LEN: usize = 4;
46#[cfg(not(test))]
47const INLINE_LEN: usize = 36;
48
49type InlineBuf = [u8; INLINE_LEN];
50
51#[derive(Clone, Debug)]
57struct USizeBool(usize);
58
59impl USizeBool {
60 const FLAG_BIT: usize = 1 << (usize::BITS - 1);
61 const VALUE_MASK: usize = !Self::FLAG_BIT;
62
63 fn new(value: usize, flag: bool) -> Self {
64 debug_assert!(value <= Self::VALUE_MASK);
65 Self(value | if flag { Self::FLAG_BIT } else { 0 })
66 }
67
68 #[inline(always)]
69 fn get_usize(&self) -> usize {
70 self.0 & Self::VALUE_MASK
71 }
72
73 #[inline(always)]
74 fn set_usize(&mut self, value: usize) {
75 debug_assert!(value <= Self::VALUE_MASK);
76 self.0 = (self.0 & Self::FLAG_BIT) | value;
77 }
78
79 #[inline(always)]
80 fn get_bool(&self) -> bool {
81 self.0 & Self::FLAG_BIT != 0
82 }
83}
84
85#[derive(Clone, Debug)]
86struct MultiBytes {
87 arr: Box<[Bytes]>,
88 rem: usize,
89 pos_escaped: USizeBool,
90}
91
92impl MultiBytes {
93 fn new(mut arr: Box<[Bytes]>, start_pos: usize, len: usize, escaped: bool) -> Self {
94 #[cfg(debug_assertions)]
95 {
96 #[cfg(test)]
97 const ALLOW_FIRST_BUFFER_EMPTY: bool = true;
98 #[cfg(not(test))]
99 const ALLOW_FIRST_BUFFER_EMPTY: bool = false;
100 debug_assert!(
101 ALLOW_FIRST_BUFFER_EMPTY || start_pos < arr[0].len(),
102 "start_pos ({start_pos}) < arr[0].len ({})",
103 arr[0].len()
104 );
105 #[cfg(test)]
106 const ALLOW_SINGLE_BUFFER: bool = true;
107 #[cfg(not(test))]
108 const ALLOW_SINGLE_BUFFER: bool = false;
109 debug_assert!(
110 ALLOW_SINGLE_BUFFER || arr[0].len() < start_pos + len,
111 "arr[0].len() ({}) < start_pos ({start_pos}) + len ({len})",
112 arr[0].len()
113 );
114 }
115
116 arr[0].advance(start_pos);
118
119 let n = arr.len();
121 let contrib: usize = arr.iter().take(n - 1).map(Bytes::len).sum();
122 debug_assert!(
123 contrib <= len,
124 "contrib ({contrib}) <= len ({len}) for arr = {arr:?}"
125 );
126 arr[n - 1].truncate(len - contrib);
127
128 Self {
130 arr,
131 rem: len,
132 pos_escaped: USizeBool::new(0, escaped),
133 }
134 }
135}
136
137impl Buf for MultiBytes {
138 fn advance(&mut self, mut n: usize) {
139 if self.remaining() < n {
140 panic!(
141 "{}",
142 &BufUnderflow {
143 requested: n,
144 remaining: self.remaining(),
145 }
146 );
147 } else {
148 self.rem -= n;
149 let mut pos = self.pos_escaped.get_usize();
150 while pos < self.arr.len() && self.arr[pos].len() <= n {
151 n -= self.arr[pos].len();
152 pos += 1;
153 }
154 if n > 0 {
155 debug_assert!((pos) < self.arr.len());
156 debug_assert!(self.arr[pos].len() > n);
157 self.arr[pos] = self.arr[pos].slice(n..);
158 }
159 self.pos_escaped.set_usize(pos);
160 }
161 }
162
163 #[inline]
164 fn chunk(&self) -> &[u8] {
165 let pos = self.pos_escaped.get_usize();
166 if pos < self.arr.len() {
167 &self.arr[pos]
168 } else {
169 &[]
170 }
171 }
172
173 #[inline(always)]
174 fn remaining(&self) -> usize {
175 self.rem
176 }
177
178 fn try_copy_to_slice(&mut self, mut dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
179 if self.remaining() < dst.len() {
180 Err(BufUnderflow {
181 requested: dst.len(),
182 remaining: self.remaining(),
183 })
184 } else {
185 self.rem -= dst.len();
186 let mut pos = self.pos_escaped.get_usize();
187 while pos < self.arr.len() && self.arr[pos].len() <= dst.len() {
188 let b = &self.arr[pos];
189 let m = b.len();
190 dst[0..m].copy_from_slice(b);
191 dst = &mut dst[m..];
192 pos += 1;
193 }
194 if !dst.is_empty() {
195 debug_assert!(pos < self.arr.len());
196 debug_assert!(self.arr[pos].len() > dst.len());
197 let n = dst.len();
198 dst.copy_from_slice(&self.arr[pos][..n]);
199 self.arr[pos] = self.arr[pos].slice(n..);
200 }
201 self.pos_escaped.set_usize(pos);
202
203 Ok(())
204 }
205 }
206}
207
208impl IntoBuf for MultiBytes {
209 type Buf = Self;
210
211 fn into_buf(self) -> Self::Buf {
212 self
213 }
214}
215
216#[derive(Debug)]
217enum Repr<'a> {
218 Together(&'a str),
219 Split(&'a MultiBytes),
220}
221
222#[derive(Clone, Debug)]
223enum InnerLiteral {
224 Static(&'static str, bool),
225 Inline(u8, u8, InlineBuf, bool),
226 Bytes(Bytes, bool),
227 Multi(MultiBytes),
228}
229
230impl InnerLiteral {
231 fn inline(src: &[u8]) -> Self {
232 let mut dst: InlineBuf = [0; INLINE_LEN];
233 dst[0..src.len()].copy_from_slice(src);
234
235 Self::Inline(0, src.len() as u8, dst, false)
236 }
237
238 #[cfg(test)]
239 fn test_new_bytes(s: &'static str, escaped: bool) -> Self {
240 Self::Bytes(Bytes::from_static(s.as_bytes()), escaped)
241 }
242
243 #[cfg(test)]
244 fn test_new_multi<I, T>(bufs: I, start_pos: usize, len: usize, escaped: bool) -> Self
245 where
246 I: IntoIterator<Item = T>,
247 T: Into<Bytes>,
248 {
249 let arr: Box<[Bytes]> = bufs.into_iter().map(Into::into).collect();
250
251 Self::Multi(MultiBytes::new(arr, start_pos, len, escaped))
252 }
253
254 #[inline(always)]
255 fn len(&self) -> usize {
256 match self {
257 Self::Static(s, _) => s.len(),
258 Self::Inline(i, j, _, _) => (*j - *i) as usize,
259 Self::Bytes(b, _) => b.len(),
260 Self::Multi(v) => v.rem,
261 }
262 }
263
264 #[inline]
265 fn repr(&self) -> Repr<'_> {
266 match self {
267 Self::Static(s, _) => Repr::Together(s),
268 Self::Inline(i, j, b, _) => {
269 Repr::Together(unsafe { str::from_utf8_unchecked(&b[*i as usize..*j as usize]) })
270 }
271 Self::Bytes(b, _) => Repr::Together(unsafe { str::from_utf8_unchecked(b) }),
272 Self::Multi(v) => Repr::Split(v),
273 }
274 }
275
276 #[inline]
277 fn is_escaped(&self) -> bool {
278 match self {
279 Self::Static(_, escaped) | Self::Inline(_, _, _, escaped) | Self::Bytes(_, escaped) => {
280 *escaped
281 }
282 Self::Multi(m) => m.pos_escaped.get_bool(),
283 }
284 }
285
286 fn unescaped(&self) -> Unescaped<Literal> {
287 match self {
288 Self::Static(_, false) | Self::Inline(_, _, _, false) | Self::Bytes(_, false) => {
289 Unescaped::Literal(Literal(self.clone()))
290 }
291 Self::Multi(m) if !m.pos_escaped.get_bool() => {
292 Unescaped::Literal(Literal(self.clone()))
293 }
294 _ => {
295 let mut buf = Vec::new();
296 lexical::unescape(self.clone(), &mut buf);
297
298 let s = unsafe { String::from_utf8_unchecked(buf) };
301
302 Unescaped::Expanded(s)
303 }
304 }
305 }
306}
307
308impl Buf for InnerLiteral {
309 fn advance(&mut self, n: usize) {
310 match self {
311 Self::Static(s, _) => {
312 if s.len() < n {
313 panic!(
314 "{}",
315 &BufUnderflow {
316 requested: n,
317 remaining: s.len(),
318 }
319 );
320 } else {
321 *self = Self::Static(&s[n..], false)
322 }
323 }
324
325 Self::Inline(i, j, b, _) => {
326 let len = (*j - *i) as usize;
327 if len < n {
328 panic!(
329 "{}",
330 &BufUnderflow {
331 requested: n,
332 remaining: len,
333 }
334 );
335 } else {
336 *self = Self::Inline(*i + n as u8, *j, *b, false);
337 }
338 }
339
340 Self::Bytes(b, _) => {
341 if b.len() < n {
342 panic!(
343 "{}",
344 &BufUnderflow {
345 requested: n,
346 remaining: b.len(),
347 }
348 );
349 } else {
350 *self = Self::Bytes(b.slice(n..), false);
351 }
352 }
353
354 Self::Multi(m) => m.advance(n),
355 }
356 }
357
358 fn chunk(&self) -> &[u8] {
359 match &self {
360 Self::Static(s, _) => s.as_bytes(),
361 Self::Inline(i, j, b, _) => &b[*i as usize..*j as usize],
362 Self::Bytes(b, _) => b,
363 Self::Multi(r) => r.chunk(),
364 }
365 }
366
367 #[inline]
368 fn remaining(&self) -> usize {
369 self.len()
370 }
371
372 fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
373 match self {
374 Self::Static(s, _) => {
375 if s.len() < dst.len() {
376 Err(BufUnderflow {
377 requested: dst.len(),
378 remaining: s.len(),
379 })
380 } else {
381 dst.copy_from_slice(&s.as_bytes()[..dst.len()]);
382 *self = Self::Static(&s[dst.len()..], false);
383
384 Ok(())
385 }
386 }
387
388 InnerLiteral::Inline(i, j, b, _) => {
389 let len = (*j - *i) as usize;
390 if len < dst.len() {
391 Err(BufUnderflow {
392 requested: dst.len(),
393 remaining: len,
394 })
395 } else {
396 dst.copy_from_slice(&b[*i as usize..*i as usize + dst.len()]);
397 *i += dst.len() as u8;
398
399 Ok(())
400 }
401 }
402
403 InnerLiteral::Bytes(b, _) => {
404 if b.len() < dst.len() {
405 panic!(
406 "{}",
407 &BufUnderflow {
408 requested: dst.len(),
409 remaining: b.len(),
410 }
411 );
412 } else {
413 dst.copy_from_slice(&b[..dst.len()]);
414 *self = Self::Bytes(b.slice(dst.len()..), false);
415
416 Ok(())
417 }
418 }
419
420 InnerLiteral::Multi(m) => m.try_copy_to_slice(dst),
421 }
422 }
423}
424
425impl IntoBuf for InnerLiteral {
426 type Buf = Self;
427
428 fn into_buf(self) -> Self::Buf {
429 self
430 }
431}
432
433#[derive(Clone, Debug)]
459pub struct Literal(InnerLiteral);
460
461impl Literal {
462 pub const fn from_static(s: &'static str) -> Self {
510 Self(InnerLiteral::Static(s, false))
511 }
512
513 pub fn from_ref<T: AsRef<str> + ?Sized>(s: &T) -> Self {
524 let t = s.as_ref();
525 let b = t.as_bytes();
526
527 if b.len() <= INLINE_LEN {
528 Self(InnerLiteral::inline(b))
529 } else {
530 Self(InnerLiteral::Bytes(Bytes::copy_from_slice(b), false))
531 }
532 }
533
534 pub fn from_string(s: String) -> Self {
556 if s.len() <= INLINE_LEN {
557 Self(InnerLiteral::inline(s.as_bytes()))
558 } else {
559 Self(InnerLiteral::Bytes(
560 Bytes::from_owner(s.into_bytes()),
561 false,
562 ))
563 }
564 }
565
566 pub fn len(&self) -> usize {
584 self.0.len()
585 }
586
587 pub fn is_empty(&self) -> bool {
596 self.len() == 0
597 }
598
599 #[inline(always)]
600 fn repr(&self) -> Repr<'_> {
601 self.0.repr()
602 }
603}
604
605impl IntoBuf for Literal {
606 type Buf = LiteralBuf;
607
608 fn into_buf(self) -> Self::Buf {
609 LiteralBuf(self.0)
610 }
611}
612
613impl fmt::Display for Literal {
614 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
615 match self.repr() {
616 Repr::Together(s) => f.write_str(s),
617 Repr::Split(r) => crate::buf::display(r.clone(), f),
618 }
619 }
620}
621
622impl EqStr for Literal {}
623
624impl Eq for Literal {}
625
626impl From<Literal> for String {
627 fn from(value: Literal) -> Self {
628 match value.repr() {
629 Repr::Together(s) => s.to_string(),
630 Repr::Split(r) => crate::buf::to_string(r.clone()),
631 }
632 }
633}
634
635impl<T: ?Sized + AsRef<str>> From<&T> for Literal {
636 fn from(value: &T) -> Self {
637 Literal::from_ref(&value)
638 }
639}
640
641impl<'a> From<Cow<'a, str>> for Literal {
642 fn from(value: Cow<'a, str>) -> Self {
643 match value {
644 Cow::Borrowed(s) => Literal::from_ref(&s),
645 Cow::Owned(s) => Literal::from_string(s),
646 }
647 }
648}
649
650impl From<String> for Literal {
651 fn from(value: String) -> Self {
652 Literal::from_string(value)
653 }
654}
655
656impl FromStr for Literal {
657 type Err = Infallible;
658
659 fn from_str(s: &str) -> Result<Self, Self::Err> {
660 Ok(Literal::from_ref(&s))
661 }
662}
663
664impl Hash for Literal {
665 fn hash<H: Hasher>(&self, state: &mut H) {
666 match self.repr() {
667 Repr::Together(s) if s.len() <= crate::buf::HASH_CHUNK => state.write(s.as_bytes()),
668 Repr::Together(s) => crate::buf::hash(s, state),
669 Repr::Split(m) => crate::buf::hash(m.clone(), state),
670 }
671 }
672}
673
674impl Ord for Literal {
675 fn cmp(&self, other: &Self) -> Ordering {
676 match (self.repr(), other.repr()) {
677 (Repr::Together(a), Repr::Together(b)) => Ord::cmp(a, b),
678 (Repr::Together(a), Repr::Split(b)) => crate::buf_cmp(a, b.clone()),
679 (Repr::Split(a), Repr::Together(b)) => crate::buf_cmp(a.clone(), b),
680 (Repr::Split(a), Repr::Split(b)) => crate::buf_cmp(a.clone(), b.clone()),
681 }
682 }
683}
684
685impl OrdStr for Literal {
686 fn cmp(&self, other: &str) -> Ordering {
687 match self.repr() {
688 Repr::Together(s) => Ord::cmp(s, other),
689 Repr::Split(m) => crate::buf_cmp(m.clone(), other),
690 }
691 }
692}
693
694impl PartialEq for Literal {
695 fn eq(&self, other: &Self) -> bool {
696 if self.len() != other.len() {
697 false
698 } else {
699 match (self.repr(), other.repr()) {
700 (Repr::Together(a), Repr::Together(b)) => a == b,
701 (Repr::Together(a), Repr::Split(b)) => {
702 crate::buf_cmp(a, b.clone()) == Ordering::Equal
703 }
704 (Repr::Split(a), Repr::Together(b)) => {
705 crate::buf_cmp(a.clone(), b) == Ordering::Equal
706 }
707 (Repr::Split(a), Repr::Split(b)) => {
708 crate::buf_cmp(a.clone(), b.clone()) == Ordering::Equal
709 }
710 }
711 }
712 }
713}
714
715impl PartialEq<str> for Literal {
716 fn eq(&self, other: &str) -> bool {
717 if self.len() != other.len() {
718 false
719 } else {
720 match self.repr() {
721 Repr::Together(s) => s == other,
722 Repr::Split(r) => crate::buf_cmp(r.clone(), other) == Ordering::Equal,
723 }
724 }
725 }
726}
727
728impl PartialEq<&str> for Literal {
729 fn eq(&self, other: &&str) -> bool {
730 self == *other
731 }
732}
733
734impl PartialEq<String> for Literal {
735 fn eq(&self, other: &String) -> bool {
736 self == other.as_str()
737 }
738}
739
740impl PartialEq<Literal> for str {
741 fn eq(&self, other: &Literal) -> bool {
742 other == self
743 }
744}
745
746impl PartialEq<Literal> for &str {
747 fn eq(&self, other: &Literal) -> bool {
748 other == self
749 }
750}
751
752impl PartialEq<Literal> for String {
753 fn eq(&self, other: &Literal) -> bool {
754 other == self
755 }
756}
757
758impl PartialOrd for Literal {
759 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
760 Some(Ord::cmp(self, other))
761 }
762}
763
764impl PartialOrd<str> for Literal {
765 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
766 Some(OrdStr::cmp(self, other))
767 }
768}
769
770impl PartialOrd<Literal> for str {
771 fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
772 Some(OrdStr::cmp(other, self).reverse())
773 }
774}
775
776impl PartialOrd<&str> for Literal {
777 fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
778 Some(OrdStr::cmp(self, other))
779 }
780}
781
782impl PartialOrd<Literal> for &str {
783 fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
784 Some(OrdStr::cmp(other, self).reverse())
785 }
786}
787
788impl PartialOrd<String> for Literal {
789 fn partial_cmp(&self, other: &String) -> Option<Ordering> {
790 self.partial_cmp(other.as_str())
791 }
792}
793
794impl PartialOrd<Literal> for String {
795 fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
796 self.as_str().partial_cmp(other)
797 }
798}
799
800pub struct LiteralBuf(InnerLiteral);
819
820impl LiteralBuf {
821 #[inline(always)]
834 pub fn advance(&mut self, n: usize) {
835 self.0.advance(n)
836 }
837
838 #[inline(always)]
852 pub fn chunk(&self) -> &[u8] {
853 self.0.chunk()
854 }
855
856 #[inline(always)]
865 pub fn remaining(&self) -> usize {
866 self.0.remaining()
867 }
868
869 #[inline(always)]
881 pub fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
882 self.0.try_copy_to_slice(dst)
883 }
884}
885
886impl Buf for LiteralBuf {
887 #[inline(always)]
888 fn advance(&mut self, n: usize) {
889 LiteralBuf::advance(self, n);
890 }
891
892 #[inline(always)]
893 fn chunk(&self) -> &[u8] {
894 LiteralBuf::chunk(self)
895 }
896
897 #[inline(always)]
898 fn remaining(&self) -> usize {
899 LiteralBuf::remaining(self)
900 }
901
902 #[inline(always)]
903 fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
904 LiteralBuf::try_copy_to_slice(self, dst)
905 }
906}
907
908#[derive(Debug)]
923pub struct Content(InnerLiteral);
924
925impl Content {
926 #[inline(always)]
932 pub fn literal(&self) -> Literal {
933 Literal(self.0.clone())
934 }
935
936 #[inline(always)]
942 pub fn is_escaped(&self) -> bool {
943 self.0.is_escaped()
944 }
945
946 #[inline(always)]
964 pub fn unescaped(&self) -> Unescaped<Literal> {
965 self.0.unescaped()
966 }
967}
968
969impl fmt::Display for Content {
970 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
971 self.literal().fmt(f)
972 }
973}
974
975impl super::Content for Content {
976 type Literal<'a> = Literal;
977
978 #[inline(always)]
979 fn literal<'a>(&'a self) -> Self::Literal<'a> {
980 Content::literal(self)
981 }
982
983 #[inline(always)]
984 fn is_escaped(&self) -> bool {
985 Content::is_escaped(self)
986 }
987
988 #[inline(always)]
989 fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>> {
990 Content::unescaped(self)
991 }
992}
993
994#[cfg(target_pointer_width = "64")]
996const _: [(); 40] = [(); std::mem::size_of::<Literal>()];
997
998#[cfg(target_pointer_width = "64")]
1000const _: [(); 40] = [(); std::mem::size_of::<Content>()];
1001
1002#[derive(Debug)]
1006pub struct Error<E> {
1007 kind: ErrorKind,
1008 pos: Pos,
1009 source: Option<Arc<E>>,
1010}
1011
1012impl<E> Error<E> {
1013 pub fn kind(&self) -> ErrorKind {
1018 self.kind
1019 }
1020
1021 pub fn pos(&self) -> &Pos {
1026 &self.pos
1027 }
1028
1029 fn new_lexical(kind: ErrorKind, pos: Pos) -> Self {
1030 Self {
1031 kind,
1032 pos,
1033 source: None,
1034 }
1035 }
1036
1037 fn new_read(source: E, pos: Pos) -> Self {
1038 Self {
1039 kind: ErrorKind::Read,
1040 pos,
1041 source: Some(Arc::new(source)),
1042 }
1043 }
1044}
1045
1046impl<E> Clone for Error<E> {
1047 fn clone(&self) -> Self {
1048 Self {
1049 kind: self.kind,
1050 pos: self.pos,
1051 source: self.source.clone(),
1052 }
1053 }
1054}
1055
1056impl<E> fmt::Display for Error<E> {
1057 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1058 self.kind.fmt_at(f, Some(&self.pos))
1059 }
1060}
1061
1062impl<E> std::error::Error for Error<E>
1063where
1064 E: std::error::Error + 'static,
1065{
1066 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
1067 self.source.as_ref().map(|e| &**e as &dyn std::error::Error)
1068 }
1069}
1070
1071impl<E> lexical::Error for Error<E>
1072where
1073 E: std::error::Error + Send + Sync + 'static,
1074{
1075 fn kind(&self) -> ErrorKind {
1076 Error::kind(self)
1077 }
1078
1079 fn pos(&self) -> &Pos {
1080 Error::pos(self)
1081 }
1082}
1083
1084pub trait Pipe {
1154 type Error: std::error::Error + Send + Sync + 'static;
1156
1157 fn recv(&mut self) -> Option<Result<Bytes, Self::Error>>;
1167
1168 fn try_recv(&mut self) -> Option<Bytes> {
1177 None
1178 }
1179}
1180
1181impl Pipe for std::sync::mpsc::Receiver<Bytes> {
1182 type Error = Infallible;
1183
1184 fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
1185 std::sync::mpsc::Receiver::recv(self).ok().map(Ok)
1186 }
1187
1188 fn try_recv(&mut self) -> Option<Bytes> {
1189 std::sync::mpsc::Receiver::try_recv(self).ok()
1190 }
1191}
1192
1193#[derive(Debug)]
1194enum StoredContent<E> {
1195 Ok {
1196 start_pos: usize,
1197 len: usize,
1198 escaped: bool,
1199 },
1200 Err(Error<E>),
1201}
1202
1203impl<E> Default for StoredContent<E> {
1204 fn default() -> Self {
1205 StoredContent::Ok {
1206 start_pos: 0,
1207 len: 0,
1208 escaped: false,
1209 }
1210 }
1211}
1212
1213#[derive(Debug)]
1300pub struct PipeAnalyzer<P: Pipe> {
1301 bufs: SmallVec<[Bytes; 4]>,
1302 content: StoredContent<P::Error>,
1303 content_pos: Pos,
1304 mach: state::Machine<Bytes>,
1305 pipe: P,
1306 start_pos: usize,
1307}
1308
1309impl<P: Pipe> PipeAnalyzer<P> {
1310 pub fn new(mut pipe: P) -> Self {
1325 let first = match pipe.try_recv() {
1326 Some(chunk) => chunk,
1327 None => Bytes::new(),
1328 };
1329
1330 let bufs = smallvec![first.clone()];
1331 let content = StoredContent::default();
1332 let content_pos = Pos::default();
1333 let mach = state::Machine::new(first);
1334 let start_pos = 0;
1335
1336 Self {
1337 bufs,
1338 content,
1339 content_pos,
1340 mach,
1341 pipe,
1342 start_pos,
1343 }
1344 }
1345
1346 #[allow(clippy::should_implement_trait)]
1367 pub fn next(&mut self) -> Token {
1368 if matches!(self.content, StoredContent::Err(_)) {
1369 return Token::Err;
1370 }
1371
1372 self.content_pos = *self.mach.pos();
1373 let n = self.bufs.len();
1374 if n > 1 {
1375 let contrib: usize = self.bufs.iter().take(n - 1).map(Bytes::len).sum();
1376 self.start_pos -= contrib;
1377 self.bufs.swap(0, n - 1);
1378 self.bufs.truncate(1);
1379 }
1380
1381 macro_rules! done {
1382 ($token:ident, $escaped:ident, $n: expr, $len:ident) => {{
1383 $len += $n;
1384 self.content = StoredContent::Ok {
1385 start_pos: self.start_pos,
1386 len: $len,
1387 escaped: $escaped,
1388 };
1389 self.start_pos += $len;
1390
1391 return $token;
1392 }};
1393 }
1394
1395 macro_rules! lexical_err {
1396 () => {{
1397 let kind = self.mach.err_kind().expect("there should be an error kind");
1398 let pos = *self.mach.pos();
1399 self.content = StoredContent::Err(Error::new_lexical(kind, pos));
1400
1401 return Token::Err;
1402 }};
1403 }
1404
1405 macro_rules! io_err {
1406 ($source:ident) => {{
1407 self.content = StoredContent::Err(Error::new_read($source, *self.mach.pos()));
1408
1409 return Token::Err;
1410 }};
1411 }
1412
1413 let mut next = self.mach.next();
1414 let mut len = 0;
1415 loop {
1416 match next {
1417 state::Next::Done(token, escaped, n) => done!(token, escaped, n, len),
1418 state::Next::Part(token, n) => {
1419 len += n;
1420 match self.pipe.recv() {
1421 None => match self.mach.end() {
1422 state::End::Done => done!(token, false, 0, len),
1423 state::End::Nil => unreachable!(),
1424 state::End::Err => lexical_err!(),
1425 },
1426 Some(Ok(buf)) => {
1427 self.bufs.push(buf.clone());
1428 next = self.mach.resume(buf);
1429 }
1430 Some(Err(err)) => io_err!(err),
1431 }
1432 }
1433 state::Next::Nil => match self.pipe.recv() {
1434 None => {
1435 self.content = StoredContent::default();
1436 return Token::Eof;
1437 }
1438 Some(Ok(buf)) => {
1439 debug_assert!(self.bufs.len() == 1);
1440 self.start_pos = 0;
1441 self.bufs[0] = buf.clone();
1442 next = self.mach.resume(buf);
1443 }
1444 Some(Err(err)) => io_err!(err),
1445 },
1446 state::Next::Err(_) => lexical_err!(),
1447 }
1448 }
1449 }
1450
1451 #[inline]
1480 pub fn content(&self) -> Content {
1481 if let Ok(content) = self.try_content() {
1482 content
1483 } else {
1484 panic!("no content: last `next()` returned `Token::Err` (use `err()` instead)");
1485 }
1486 }
1487
1488 #[inline]
1517 pub fn err(&self) -> Error<P::Error> {
1518 if let Err(err) = self.try_content() {
1519 err
1520 } else {
1521 panic!("no error: last `next()` did not return `Token::Err` (use `content()` instead)");
1522 }
1523 }
1524
1525 #[inline(always)]
1586 pub fn pos(&self) -> &Pos {
1587 &self.content_pos
1588 }
1589
1590 pub fn try_content(&self) -> Result<Content, Error<P::Error>> {
1629 match &self.content {
1630 StoredContent::Ok {
1631 start_pos,
1632 len,
1633 escaped,
1634 } if *start_pos + *len <= self.bufs[0].len() => {
1635 let src = &self.bufs[0];
1636 debug_assert!(*start_pos <= src.len());
1637 debug_assert!(
1638 *start_pos + *len <= src.len(),
1639 "start_pos ({start_pos}) + len ({len}) <= src.len() ({})",
1640 src.len()
1641 );
1642 if *len <= INLINE_LEN {
1643 unsafe {
1647 let mut dst: MaybeUninit<InlineBuf> = MaybeUninit::uninit();
1648 std::ptr::copy_nonoverlapping(
1649 src.as_ptr().add(*start_pos),
1650 dst.as_mut_ptr() as *mut u8,
1651 *len,
1652 );
1653
1654 Ok(Content(InnerLiteral::Inline(
1655 0,
1656 *len as u8,
1657 dst.assume_init(),
1658 *escaped,
1659 )))
1660 }
1661 } else {
1662 Ok(Content(InnerLiteral::Bytes(
1663 src.slice(*start_pos..*start_pos + *len),
1664 *escaped,
1665 )))
1666 }
1667 }
1668
1669 StoredContent::Ok {
1670 start_pos,
1671 len,
1672 escaped,
1673 } => self.multi_content(*start_pos, *len, *escaped),
1674
1675 StoredContent::Err(err) => Err(err.clone()),
1676 }
1677 }
1678
1679 pub fn into_parser(self) -> syntax::Parser<PipeAnalyzer<P>> {
1708 syntax::Parser::new(self)
1709 }
1710
1711 fn multi_content(
1712 &self,
1713 start_pos: usize,
1714 len: usize,
1715 escaped: bool,
1716 ) -> Result<Content, Error<P::Error>> {
1717 debug_assert!(self.bufs.len() > 1);
1718
1719 let arr: Box<[Bytes]> = self.bufs.iter().cloned().collect(); let multi_bytes = MultiBytes::new(arr, start_pos, len, escaped);
1721 let content = Content(InnerLiteral::Multi(multi_bytes));
1722
1723 Ok(content)
1724 }
1725}
1726
1727impl<P: Pipe> lexical::Analyzer for PipeAnalyzer<P> {
1728 type Content = Content;
1729 type Error = Error<P::Error>;
1730
1731 #[inline(always)]
1732 fn next(&mut self) -> Token {
1733 PipeAnalyzer::next(self)
1734 }
1735
1736 #[inline(always)]
1737 fn try_content(&self) -> Result<Self::Content, Error<P::Error>> {
1738 PipeAnalyzer::try_content(self)
1739 }
1740
1741 #[inline(always)]
1742 fn pos(&self) -> &Pos {
1743 PipeAnalyzer::pos(self)
1744 }
1745}
1746
1747#[cfg(test)]
1748mod tests {
1749 use super::*;
1750 use crate::{IntoBuf, lexical::Expect};
1751 use rstest::rstest;
1752 use std::{
1753 collections::{BTreeMap, HashMap},
1754 error::Error as _,
1755 hash::DefaultHasher,
1756 sync::mpsc::channel,
1757 };
1758
1759 #[test]
1760 fn temp_test_empty_chunk() {
1761 let (tx, rx) = channel();
1763 tx.send("tru".into()).unwrap();
1764 tx.send("".into()).unwrap();
1765 tx.send("e".into()).unwrap();
1766 drop(tx);
1767
1768 let mut an = PipeAnalyzer::new(rx);
1769
1770 assert_eq!(Token::LitTrue, an.next());
1771 assert_eq!(Token::Eof, an.next());
1772 }
1773
1774 #[rstest]
1775 #[case(Literal::from_static(""), 0)]
1776 #[case(Literal::from_static("a"), 1)]
1777 #[case(Literal::from_static(concat!(
1778 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1779 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1780 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1781 "aaaaaaaaaaaaaab",
1782 )), u8::MAX as usize)]
1783 #[case(Literal::from_ref(""), 0)]
1784 #[case(Literal::from_ref(&"a".repeat(INLINE_LEN)), INLINE_LEN)]
1785 #[case(Literal::from_ref(&"b".repeat(INLINE_LEN+1)), INLINE_LEN+1)]
1786 #[case(Literal::from_ref(&Cow::Borrowed("foo")), 3)]
1787 #[case(Literal::from_ref(&Cow::Owned("bar".to_string())), 3)]
1788 #[case(Literal::from_string("".to_string()), 0)]
1789 #[case(Literal::from_string("c".to_string()), 1)]
1790 #[case(Literal::from_string("d".repeat(100 * INLINE_LEN)), 100 * INLINE_LEN)]
1791 #[case("baz".into(), 3)]
1792 #[case(Cow::Borrowed("").into(), 0)]
1793 #[case(Cow::<str>::Owned("e".repeat(INLINE_LEN-1)).into(), INLINE_LEN-1)]
1794 #[case("qux".to_string().into(), 3)]
1795 #[case(Literal::from_str("hello, world").unwrap(), 12)]
1796 #[case(Literal(InnerLiteral::test_new_multi(["b", "a", "z"], 0, 3, false)), 3)]
1797 #[case(Literal(InnerLiteral::test_new_multi(["b", "a", "z"], 0, 3, true)), 3)]
1798 #[case(Literal(InnerLiteral::test_new_multi(["_f", "o", "o_"], 1, 3, false)), 3)]
1799 #[case(Literal(InnerLiteral::test_new_multi(["_f", "oo", ""], 1, 3, true)), 3)]
1800 fn test_literal_convert(#[case] literal: Literal, #[case] expect_len: usize) {
1801 assert_eq!(expect_len, literal.len());
1802 assert_eq!(expect_len == 0, literal.is_empty());
1803
1804 let mut b = literal.clone().into_buf();
1805
1806 assert_eq!(expect_len, b.remaining());
1807 assert_eq!(expect_len == 0, !b.has_remaining());
1808
1809 let mut dst = vec![0u8; expect_len];
1810 b.copy_to_slice(&mut dst);
1811
1812 let s = String::from_utf8(dst).unwrap();
1813
1814 assert_eq!(literal.to_string(), s);
1815 assert_eq!(Into::<String>::into(literal), s);
1816 }
1817
1818 #[test]
1819 fn test_literal_compare() {
1820 let a_s = vec![
1821 Literal::from_static("a"),
1822 Literal::from_ref("a"),
1823 Literal::from_string("a".to_string()),
1824 Literal(InnerLiteral::test_new_multi(["a"], 0, 1, false)),
1825 ];
1826 let aa_s: Vec<Literal> = vec![
1827 Literal::from_ref(&"a".repeat(INLINE_LEN)),
1828 Literal::from_string("a".repeat(INLINE_LEN)),
1829 Literal(InnerLiteral::test_new_multi(
1830 [vec![b'a'; INLINE_LEN]],
1831 0,
1832 INLINE_LEN,
1833 false,
1834 )),
1835 Literal(InnerLiteral::test_new_multi(
1836 ["a"; INLINE_LEN],
1837 0,
1838 INLINE_LEN,
1839 true,
1840 )),
1841 ];
1842 let aab_s: Vec<Literal> = vec![
1843 Literal::from_static(concat!(
1844 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1845 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1846 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1847 "aaaaaaaaaaaaaab",
1848 )),
1849 Literal::from_ref(("a".repeat(u8::MAX as usize - 1) + "b").as_str()),
1850 Literal::from_string("a".repeat(u8::MAX as usize - 1) + "b"),
1851 Literal(InnerLiteral::test_new_multi(
1852 ["a".repeat(u8::MAX as usize - 1), "abc".to_string()],
1853 1,
1854 u8::MAX as usize,
1855 true,
1856 )),
1857 ];
1858
1859 macro_rules! assert_all_eq {
1860 ($a:expr, $b:expr) => {
1861 assert_eq!($a, $a);
1862 assert_eq!($b, $a);
1863 assert_eq!($a, $b);
1864 assert!($a <= $a);
1865 assert!(!($a < $a));
1866 assert!($a >= $a);
1867 assert!(!($a > $a));
1868 };
1869 }
1870
1871 macro_rules! assert_all_ne {
1872 ($a:expr, $b:expr) => {
1873 assert_ne!($a, $b);
1874 assert_ne!($b, $a);
1875 };
1876 }
1877
1878 macro_rules! assert_all_lt {
1879 ($a:expr, $b:expr) => {
1880 assert!($a < $b);
1881 assert!(!($b < $a));
1882 assert!(!($a > $b));
1883 assert!($b > $a);
1884 assert!($a <= $b);
1885 assert!($b >= $a);
1886 };
1887 }
1888
1889 macro_rules! assert_all_gt {
1890 ($a:expr, $b:expr) => {
1891 assert!($a > $b);
1892 assert!(!($b > $a));
1893 assert!(!($a < $b));
1894 assert!($b < $a);
1895 assert!($a >= $b);
1896 assert!($b <= $a);
1897 };
1898 }
1899
1900 for a in &a_s {
1901 assert_all_eq!(a, "a");
1902 assert_all_eq!(Unescaped::Literal(a), "a");
1903 assert_all_ne!(a, "ab");
1904 assert_all_ne!(Unescaped::Literal(a), "aa");
1905 assert_eq!(&"a", a);
1906 assert_eq!(&"a".to_string(), a);
1907 assert_eq!(a, &"a");
1908 assert_eq!(a, &"a".to_string());
1909
1910 assert!(a <= &"a");
1911 assert!(a <= &"a".to_string());
1912 assert!(!(a < &"a"));
1913 assert!(!(a < &"a".to_string()));
1914 assert!(a >= &"a");
1915 assert!(a >= &"a".to_string());
1916 assert!(!(a > &"a"));
1917 assert!(!(a > &"a".to_string()));
1918
1919 for other in aa_s.iter().chain(aab_s.iter()) {
1920 assert_all_ne!(a, other);
1921 assert_all_lt!(a, other);
1922 assert_all_gt!(other, a);
1923 }
1924 }
1925
1926 for aa in &aa_s {
1927 assert_all_eq!(aa, "a".repeat(INLINE_LEN).as_str());
1928 assert_all_eq!(Unescaped::Literal(aa), "a".repeat(INLINE_LEN).as_str());
1929 assert_all_ne!(aa, "aab");
1930 assert_all_ne!(Unescaped::Literal(aa), "aab");
1931
1932 assert_all_gt!(aa, "a");
1933 assert_all_gt!(Unescaped::Literal(aa), "a");
1934 assert_all_lt!(aa, "aab");
1935 assert_all_lt!(Unescaped::Literal(aa), "aab");
1936
1937 assert!(aa < &"aab");
1938 assert!(aa < &"aab".to_string());
1939 assert!(aa <= &"aab");
1940 assert!(aa <= &"aab".to_string());
1941 assert!(&"aab" > aa);
1942 assert!(&"aab".to_string() > aa);
1943 assert!(aa <= &"aab");
1944 assert!(aa <= &"aab".to_string());
1945 assert!(&"aab" > aa);
1946 assert!(&"aab".to_string() > aa);
1947
1948 for aab in &aab_s {
1949 assert_all_ne!(aa, aab);
1950 assert_all_lt!(aa, aab);
1951 assert_all_gt!(aab, aa);
1952 }
1953 }
1954
1955 fn hash<T: Hash>(t: &T) -> u64 {
1956 let mut hasher = DefaultHasher::new();
1957 t.hash(&mut hasher);
1958 hasher.finish()
1959 }
1960
1961 macro_rules! check_hash {
1962 ($patient_zero:expr, $iter:expr) => {
1963 let hash_zero = hash($patient_zero);
1964 for (i, item) in $iter.enumerate() {
1965 let hash_item = hash(item);
1966 assert_eq!(hash_zero, hash_item, "hash difference between item 0 ({:?}, {hash_zero}) and item {i}, {item:?}, {hash_item})", $patient_zero);
1967 }
1968 }
1969 }
1970
1971 check_hash!(&a_s[0], a_s.iter().skip(1));
1972 check_hash!(&aa_s[0], aa_s.iter().skip(1));
1973 check_hash!(&aab_s[0], aab_s.iter().skip(1));
1974
1975 macro_rules! check_map {
1976 ($map:ident, $patient_zero:expr, $iter:expr) => {
1977 assert!($map.insert($patient_zero, $patient_zero).is_none());
1978 for item in $iter {
1979 assert_eq!($patient_zero, *$map.get(&item).unwrap());
1980 }
1981 };
1982 }
1983
1984 let mut hash_map1 = HashMap::new();
1985
1986 check_map!(hash_map1, a_s[0].clone(), a_s.clone());
1987 check_map!(hash_map1, aa_s[0].clone(), aa_s.clone());
1988 check_map!(hash_map1, aab_s[0].clone(), aab_s.clone());
1989
1990 let mut hash_map2 = HashMap::new();
1991
1992 let unescaped_a = Unescaped::Literal(a_s[0].clone());
1993 let unescaped_aa = Unescaped::Literal(aa_s[0].clone());
1994 let unescaped_aab = Unescaped::Literal(aab_s[0].clone());
1995
1996 check_map!(
1997 hash_map2,
1998 unescaped_a.clone(),
1999 a_s.iter().cloned().map(Unescaped::Literal)
2000 );
2001 check_map!(
2002 hash_map2,
2003 unescaped_aa.clone(),
2004 aa_s.iter().cloned().map(Unescaped::Literal)
2005 );
2006 check_map!(
2007 hash_map2,
2008 unescaped_aab.clone(),
2009 aab_s.iter().cloned().map(Unescaped::Literal)
2010 );
2011
2012 let mut btree_map1 = BTreeMap::new();
2013
2014 check_map!(btree_map1, a_s[0].clone(), a_s.clone());
2015 check_map!(btree_map1, aa_s[0].clone(), aa_s.clone());
2016 check_map!(btree_map1, aab_s[0].clone(), aab_s.clone());
2017
2018 let mut btree_map2 = BTreeMap::new();
2019
2020 check_map!(
2021 btree_map2,
2022 unescaped_a.clone(),
2023 a_s.iter().cloned().map(Unescaped::Literal)
2024 );
2025 check_map!(
2026 btree_map2,
2027 unescaped_aa.clone(),
2028 aa_s.iter().cloned().map(Unescaped::Literal)
2029 );
2030 check_map!(
2031 btree_map2,
2032 unescaped_aab.clone(),
2033 aab_s.iter().cloned().map(Unescaped::Literal)
2034 );
2035 }
2036
2037 #[rstest]
2038 #[case(Literal::from_static(""))]
2039 #[case(Literal::from_ref(""))]
2040 #[case(Literal::from_string("".into()))]
2041 #[case(Literal(InnerLiteral::test_new_bytes("", false)))]
2042 #[case(Literal(InnerLiteral::test_new_bytes("", true)))]
2043 #[case(Literal(InnerLiteral::test_new_multi([""], 0, 0, false)))]
2044 #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
2045 fn test_literal_buf_advance_panic(#[case] literal: Literal) {
2046 let _ = literal.into_buf().advance(1);
2047 }
2048
2049 #[rstest]
2050 #[case(Literal::from_static(""))]
2051 #[case(Literal::from_ref(""))]
2052 #[case(Literal::from_string("".into()))]
2053 #[case(Literal(InnerLiteral::test_new_bytes("", false)))]
2054 #[case(Literal(InnerLiteral::test_new_bytes("", true)))]
2055 #[case(Literal(InnerLiteral::test_new_multi([""], 0, 0, false)))]
2056 #[case(Literal(InnerLiteral::test_new_multi(["", ""], 0, 0, true)))]
2057 #[case(Literal(InnerLiteral::test_new_multi(["a"], 1, 0, false)))]
2058 #[case(Literal(InnerLiteral::test_new_multi(["a", "a"], 1, 0, true)))]
2059 #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
2060 fn test_literal_buf_copy_to_slice_panic(#[case] literal: Literal) {
2061 let mut dst = [0; 1];
2062
2063 let _ = literal.into_buf().copy_to_slice(&mut dst);
2064 }
2065
2066 #[rstest]
2067 #[case(Content(InnerLiteral::Static("", false)), "", None)]
2068 #[case(Content(InnerLiteral::Static("", false)), "", None)]
2069 #[case(
2070 Content(InnerLiteral::Static(concat!(
2071 "................................................................................",
2072 ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
2073 "________________________________________________________________________________",
2074 "+++++++++++++++",
2075 ), false)),
2076 concat!(
2077 "................................................................................",
2078 ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
2079 "________________________________________________________________________________",
2080 "+++++++++++++++",
2081 ),
2082 None,
2083 )]
2084 #[case(Content(InnerLiteral::Inline(0, 0, [0; INLINE_LEN], false)), "", None)]
2085 #[case(Content(InnerLiteral::test_new_bytes("", false)), "", None)]
2086 #[case(Content(InnerLiteral::test_new_bytes("foo", false)), "foo", None)]
2087 #[case(Content(InnerLiteral::Bytes(Bytes::from_static(b"a barge").slice(2..5), false)), "bar", None)]
2088 #[case(Content(InnerLiteral::test_new_multi([""], 0, 0, false)), "", None)]
2089 #[case(Content(InnerLiteral::test_new_multi(["a b", "a", "rge"], 2, 3, false)), "bar", None)]
2090 #[case(Content(InnerLiteral::test_new_bytes("", true)), "", Some(""))]
2091 #[case(Content(InnerLiteral::test_new_bytes("foo", true)), "foo", Some("foo"))]
2092 #[case(Content(InnerLiteral::Bytes(Bytes::from_static(b"a b\\u0061rge").slice(2..10), true)), "b\\u0061r", Some("bar"))]
2093 #[case(Content(InnerLiteral::test_new_multi([""], 0, 0, true)), "", Some(""))]
2094 #[case(Content(InnerLiteral::test_new_multi(["tomf", "oo", "lery"], 3, 3, true)), "foo", Some("foo"))]
2095 #[case(Content(InnerLiteral::test_new_multi(["\\", "u", "006", "6\\u", "0", "06", "fox"], 0, 13, true)), "\\u0066\\u006fo", Some("foo"))]
2096 fn test_content(
2100 #[case] content: Content,
2101 #[case] expect_literal: &str,
2102 #[case] expect_unescaped: Option<&str>,
2103 ) {
2104 assert_eq!(expect_literal, content.literal().into_string());
2105 assert_eq!(expect_unescaped.is_some(), content.is_escaped());
2106 if let Some(expect) = expect_unescaped {
2107 assert_eq!(expect, content.unescaped().into_string());
2108 }
2109 }
2110
2111 #[rstest]
2112 #[case(
2113 Error::new_lexical(ErrorKind::UnexpectedEof(Token::LitTrue), Pos::new(3, 2, 1)),
2114 ErrorKind::UnexpectedEof(Token::LitTrue),
2115 "unexpected EOF in true token at line 2, column 1 (offset: 3)",
2116 None
2117 )]
2118 #[case(
2119 Error::new_read(ToyError("foo"), Pos::new(3, 2, 1)),
2120 ErrorKind::Read,
2121 "read error at line 2, column 1 (offset: 3)",
2122 Some(ToyError("foo"))
2123 )]
2124 fn test_error(
2125 #[case] err: Error<ToyError>,
2126 #[case] expect_kind: ErrorKind,
2127 #[case] expect_display: &str,
2128 #[case] expect_source: Option<ToyError>,
2129 ) {
2130 let pos = Pos::new(3, 2, 1);
2131
2132 assert_eq!(expect_kind, err.kind());
2133 assert_eq!(&pos, err.pos());
2134 assert_eq!(
2135 expect_source.as_ref(),
2136 err.source().and_then(|e| e.downcast_ref::<ToyError>())
2137 );
2138
2139 let actual_display = format!("{err}");
2140 assert_eq!(expect_display, actual_display);
2141 }
2142
2143 #[test]
2144 fn test_analyzer_empty() {
2145 let (tx, rx) = channel();
2146 drop(tx);
2147 let mut an = PipeAnalyzer::new(rx);
2148
2149 assert_eq!(an.next(), Token::Eof);
2150 assert_eq!("", an.content().literal().into_string());
2151 assert_eq!("", an.content().unescaped().into_string());
2152 }
2153
2154 #[test]
2155 fn test_analyzer_initial_state_content() {
2156 let (_, rx) = channel();
2157 let an = PipeAnalyzer::new(rx);
2158
2159 for _ in 0..5 {
2160 let content = an.content();
2161 assert_eq!("", content.literal().into_string());
2162 assert!(!content.is_escaped());
2163 assert_eq!("", content.unescaped().into_string());
2164
2165 let content = an.try_content().unwrap();
2166 assert_eq!("", content.literal().into_string());
2167 assert!(!content.is_escaped());
2168 assert_eq!("", content.unescaped().into_string());
2169 }
2170 }
2171
2172 #[test]
2173 #[should_panic(
2174 expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
2175 )]
2176 fn test_analyzer_initial_state_err() {
2177 let (_, rx) = channel();
2178 let _ = PipeAnalyzer::new(rx).err();
2179 }
2180
2181 #[rstest]
2182 #[case("", Token::Eof, None)]
2183 #[case("{", Token::ObjBegin, None)]
2184 #[case("}", Token::ObjEnd, None)]
2185 #[case("[", Token::ArrBegin, None)]
2186 #[case("]", Token::ArrEnd, None)]
2187 #[case(":", Token::NameSep, None)]
2188 #[case(",", Token::ValueSep, None)]
2189 #[case("false", Token::LitFalse, None)]
2190 #[case("null", Token::LitNull, None)]
2191 #[case("true", Token::LitTrue, None)]
2192 #[case("0", Token::Num, None)]
2193 #[case("-0", Token::Num, None)]
2194 #[case("1", Token::Num, None)]
2195 #[case("-1", Token::Num, None)]
2196 #[case("12", Token::Num, None)]
2197 #[case("-12", Token::Num, None)]
2198 #[case("0.0", Token::Num, None)]
2199 #[case("-0.0", Token::Num, None)]
2200 #[case("0.123456789", Token::Num, None)]
2201 #[case("-123.456789", Token::Num, None)]
2202 #[case("0E0", Token::Num, None)]
2203 #[case("0e0", Token::Num, None)]
2204 #[case("0E+0", Token::Num, None)]
2205 #[case("0e+0", Token::Num, None)]
2206 #[case("0E-0", Token::Num, None)]
2207 #[case("0e-0", Token::Num, None)]
2208 #[case("0.0E0", Token::Num, None)]
2209 #[case("0.0e0", Token::Num, None)]
2210 #[case("0.0E+0", Token::Num, None)]
2211 #[case("0.0e+0", Token::Num, None)]
2212 #[case("0.0E0", Token::Num, None)]
2213 #[case("0.0e0", Token::Num, None)]
2214 #[case("0E0", Token::Num, None)]
2215 #[case("0e0", Token::Num, None)]
2216 #[case("-0E+0", Token::Num, None)]
2217 #[case("-0e+0", Token::Num, None)]
2218 #[case("-0E-0", Token::Num, None)]
2219 #[case("-0e-0", Token::Num, None)]
2220 #[case("-0.0E0", Token::Num, None)]
2221 #[case("-0.0e0", Token::Num, None)]
2222 #[case("-0.0E+0", Token::Num, None)]
2223 #[case("-0.0e+0", Token::Num, None)]
2224 #[case("-0.0E0", Token::Num, None)]
2225 #[case("-0.0e0", Token::Num, None)]
2226 #[case("123E456", Token::Num, None)]
2227 #[case("123e456", Token::Num, None)]
2228 #[case("123.456E+7", Token::Num, None)]
2229 #[case("123.456e+7", Token::Num, None)]
2230 #[case("123.456E-89", Token::Num, None)]
2231 #[case("123.456e-89", Token::Num, None)]
2232 #[case("-123E456", Token::Num, None)]
2233 #[case("-123e456", Token::Num, None)]
2234 #[case("-123.456E+7", Token::Num, None)]
2235 #[case("-123.456e+7", Token::Num, None)]
2236 #[case("-123.456E-89", Token::Num, None)]
2237 #[case("-123.456e-89", Token::Num, None)]
2238 #[case(r#""""#, Token::Str, None)]
2239 #[case(r#"" ""#, Token::Str, None)]
2240 #[case(r#""foo""#, Token::Str, None)]
2241 #[case(r#""The quick brown fox jumped over the lazy dog!""#, Token::Str, None)]
2242 #[case(r#""\\""#, Token::Str, Some(r#""\""#))]
2243 #[case(r#""\/""#, Token::Str, Some(r#""/""#))]
2244 #[case(r#""\t""#, Token::Str, Some("\"\t\""))]
2245 #[case(r#""\r""#, Token::Str, Some("\"\r\""))]
2246 #[case(r#""\n""#, Token::Str, Some("\"\n\""))]
2247 #[case(r#""\f""#, Token::Str, Some("\"\u{000c}\""))]
2248 #[case(r#""\b""#, Token::Str, Some("\"\u{0008}\""))]
2249 #[case(r#""\u0000""#, Token::Str, Some("\"\u{0000}\""))]
2250 #[case(r#""\u001f""#, Token::Str, Some("\"\u{001f}\""))]
2251 #[case(r#""\u0020""#, Token::Str, Some(r#"" ""#))]
2252 #[case(r#""\u007E""#, Token::Str, Some(r#""~""#))]
2253 #[case(r#""\u007F""#, Token::Str, Some("\"\u{007f}\""))]
2254 #[case(r#""\u0080""#, Token::Str, Some("\"\u{0080}\""))]
2255 #[case(r#""\u0100""#, Token::Str, Some("\"\u{0100}\""))]
2256 #[case(r#""\uE000""#, Token::Str, Some("\"\u{e000}\""))]
2257 #[case(r#""\ufDCf""#, Token::Str, Some("\"\u{fdcf}\""))]
2258 #[case(r#""\uFdeF""#, Token::Str, Some("\"\u{fdef}\""))]
2259 #[case(r#""\ufffd""#, Token::Str, Some("\"\u{fffd}\""))]
2260 #[case(r#""\uFFFE""#, Token::Str, Some("\"\u{fffe}\""))]
2261 #[case(r#""\uFFFF""#, Token::Str, Some("\"\u{ffff}\""))]
2262 #[case(r#""\ud800\udc00""#, Token::Str, Some("\"\u{10000}\""))] #[case(r#""\uD800\uDFFF""#, Token::Str, Some("\"\u{103ff}\""))] #[case(r#""\uDBFF\uDC00""#, Token::Str, Some("\"\u{10fc00}\""))] #[case(r#""\udbFf\udfff""#, Token::Str, Some("\"\u{10ffff}\""))] #[case(" ", Token::White, None)]
2267 #[case("\t", Token::White, None)]
2268 #[case(" ", Token::White, None)]
2269 #[case("\t\t", Token::White, None)]
2270 #[case(" \t \t \t \t\t", Token::White, None)]
2271 fn test_analyzer_single_token(
2272 #[case] input: &str,
2273 #[case] expect: Token,
2274 #[case] unescaped: Option<&str>,
2275 ) {
2276 const CHUNK_SIZES: [usize; 6] = [1, 2, INLINE_LEN - 1, INLINE_LEN, INLINE_LEN + 1, 10];
2277
2278 for chunk_size in CHUNK_SIZES {
2279 {
2281 let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
2282 assert_eq!(Pos::default(), *an.pos());
2283
2284 assert_eq!(expect, an.next());
2285 assert_eq!(Pos::default(), *an.pos());
2286
2287 let content = an.content();
2288 assert_eq!(
2289 input,
2290 content.literal().into_string(),
2291 "chunk_size = {chunk_size}, input = {input:?}, content = {content}"
2292 );
2293 assert_eq!(unescaped.is_some(), content.is_escaped());
2294 if let Some(u) = unescaped {
2295 assert_eq!(u, content.unescaped().into_string());
2296 } else {
2297 assert_eq!(input, content.unescaped().into_string());
2298 }
2299
2300 assert_eq!(Token::Eof, an.next());
2301 assert_eq!(
2302 Pos {
2303 offset: input.len(),
2304 line: 1,
2305 col: input.len() + 1,
2306 },
2307 *an.pos()
2308 );
2309
2310 assert_eq!(Token::Eof, an.next());
2311 assert_eq!(
2312 Pos {
2313 offset: input.len(),
2314 line: 1,
2315 col: input.len() + 1,
2316 },
2317 *an.pos()
2318 );
2319 }
2320
2321 {
2323 let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
2324 assert_eq!(Pos::default(), *an.pos());
2325
2326 assert_eq!(expect, an.next());
2327 assert_eq!(Pos::default(), *an.pos());
2328
2329 assert_eq!(Token::Eof, an.next());
2330 assert_eq!(
2331 Pos {
2332 offset: input.len(),
2333 line: 1,
2334 col: input.len() + 1,
2335 },
2336 *an.pos()
2337 );
2338
2339 assert_eq!(Token::Eof, an.next());
2340 assert_eq!(
2341 Pos {
2342 offset: input.len(),
2343 line: 1,
2344 col: input.len() + 1,
2345 },
2346 *an.pos()
2347 );
2348 }
2349 }
2350 }
2351
2352 #[rstest]
2353 #[case(r#"["#)]
2354 #[case(r#"]"#)]
2355 #[case(r#"false"#)]
2356 #[case(r#":"#)]
2357 #[case(r#"null"#)]
2358 #[case(r#"3.14159e+0"#)]
2359 #[case(r#"{"#)]
2360 #[case(r#"}"#)]
2361 #[case(r#""foo\/\u1234\/bar""#)]
2362 #[case(r#"true"#)]
2363 #[case(r#","#)]
2364 #[case("\n\n\n ")]
2365 #[should_panic(
2366 expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
2367 )]
2368 fn test_analyzer_single_token_panic_no_err(#[case] input: &str) {
2369 const CHUNK_SIZES: [usize; 6] = [1, 2, INLINE_LEN - 1, INLINE_LEN, INLINE_LEN + 1, 10];
2370
2371 for chunk_size in CHUNK_SIZES {
2372 let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
2373
2374 let token = an.next();
2375 assert!(
2376 !token.is_terminal(),
2377 "input = {input:?}, token = {token:?}, chunk_size = {chunk_size}"
2378 );
2379
2380 let _ = an.err();
2381 }
2382 }
2383
2384 #[test]
2385 #[should_panic(expected = "last `next()` returned `Token::Err` (use `err()` instead)")]
2386 fn test_analyzer_single_error_panic_no_content() {
2387 let mut an = PipeAnalyzer::new(SlicePipe::new(1, &b"a"[..]));
2388
2389 assert_eq!(Token::Err, an.next());
2390
2391 let _ = an.content();
2392 }
2393
2394 #[rstest]
2395 #[case(r#""\uDC00""#, ErrorKind::BadSurrogate { first: 0xdc00, second: None, }, 3)]
2396 #[case(&[b'"', 0xc2, 0xc0], ErrorKind::BadUtf8ContByte { seq_len: 2, offset: 1, value: 0xc0 }, 1)]
2397 #[case(&b"\"\x80", ErrorKind::UnexpectedByte { token: Some(Token::Str), expect: Expect::StrChar, actual: 0x80 }, 1)]
2398 #[case([b'"'], ErrorKind::UnexpectedEof(Token::Str), 1)]
2399 #[case("10.", ErrorKind::UnexpectedEof(Token::Num), 3)]
2400 fn test_analyzer_single_lexical_error<T>(
2401 #[case] input: T,
2402 #[case] kind: ErrorKind,
2403 #[case] pos_offset: usize,
2404 ) where
2405 T: AsRef<[u8]> + fmt::Debug,
2406 {
2407 const CHUNK_SIZES: [usize; 6] = [1, 2, INLINE_LEN - 1, INLINE_LEN, INLINE_LEN + 1, 10];
2408
2409 for chunk_size in CHUNK_SIZES {
2410 {
2412 let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_ref()));
2413 assert_eq!(Pos::default(), *an.pos());
2414
2415 assert_eq!(Token::Err, an.next());
2416 assert_eq!(Pos::default(), *an.pos());
2417
2418 let err = an.err();
2419 assert_eq!(kind, err.kind());
2420 assert_eq!(
2421 Pos {
2422 offset: pos_offset,
2423 line: 1,
2424 col: pos_offset + 1
2425 },
2426 *err.pos()
2427 );
2428 assert!(err.source().is_none());
2429
2430 assert_eq!(Token::Err, an.next());
2431 assert_eq!(Pos::default(), *an.pos());
2432 }
2433
2434 {
2436 let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_ref()));
2437 assert_eq!(Pos::default(), *an.pos());
2438
2439 assert_eq!(Token::Err, an.next());
2440 assert_eq!(Pos::default(), *an.pos());
2441
2442 assert_eq!(Token::Err, an.next());
2443 assert_eq!(Pos::default(), *an.pos());
2444 }
2445 }
2446 }
2447
2448 #[rstest]
2449 #[case(1, r#"{"#, [Token::ObjBegin], Pos::new(1, 1, 2), Pos::new(1, 1, 2))]
2450 #[case(1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2451 #[case(2, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2452 #[case(INLINE_LEN-1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2453 #[case(INLINE_LEN-1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2454 #[case(INLINE_LEN+1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2455 #[case(512, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2456 #[case(1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2457 #[case(2, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2458 #[case(INLINE_LEN-1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2459 #[case(INLINE_LEN, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2460 #[case(INLINE_LEN+1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2461 #[case(1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2462 #[case(2, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2463 #[case(INLINE_LEN-1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2464 #[case(INLINE_LEN, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2465 #[case(INLINE_LEN+1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2466 #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2467 #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
2468 #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2469 #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
2470 #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2471 #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279E+999 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(40, 1, 41), Pos::new(40, 1, 41))]
2472 #[case(512, r#"[3141.592653589793238462643383279e-3,{"aaaaaaaaaaaaaaaaaaaaaaaaaaaa":true}] "#, [Token::ArrBegin, Token::Num, Token::ValueSep, Token::ObjBegin, Token::Str, Token::NameSep, Token::LitTrue, Token::ObjEnd, Token::ArrEnd], Pos::new(75, 1, 76), Pos::new(79, 1, 80))]
2473 fn test_analyzer_single_read_error<T>(
2474 #[case] chunk_size: usize,
2475 #[case] input: &str,
2476 #[case] expect_tokens: T,
2477 #[case] expect_token_pos: Pos,
2478 #[case] expect_err_pos: Pos,
2479 ) where
2480 T: IntoIterator<Item = Token>,
2481 {
2482 #[derive(Debug)]
2483 struct PipeError;
2484
2485 impl fmt::Display for PipeError {
2486 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2487 f.write_str("there's an error in the pipe!")
2488 }
2489 }
2490
2491 impl std::error::Error for PipeError {}
2492
2493 struct ErrorPipe<'a> {
2494 chunk_size: usize,
2495 input: &'a [u8],
2496 }
2497
2498 impl<'a> ErrorPipe<'a> {
2499 fn new(chunk_size: usize, input: &'a [u8]) -> Self {
2500 assert!(chunk_size > 0);
2501
2502 Self { chunk_size, input }
2503 }
2504 }
2505
2506 impl<'a> Pipe for ErrorPipe<'a> {
2507 type Error = PipeError;
2508
2509 fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
2510 if self.input.len() > 0 {
2511 let n = self.input.len().min(self.chunk_size);
2512 let b = self.input[..n].to_vec().into();
2513 self.input = &self.input[n..];
2514
2515 Some(Ok(b))
2516 } else {
2517 Some(Err(PipeError))
2518 }
2519 }
2520 }
2521
2522 let mut an = PipeAnalyzer::new(ErrorPipe::new(chunk_size, input.as_bytes()));
2523
2524 for expect_token in expect_tokens.into_iter() {
2525 let actual_token = an.next();
2526
2527 assert_eq!(expect_token, actual_token);
2528 }
2529
2530 assert_eq!(Token::Err, an.next());
2531 assert_eq!(expect_token_pos, *an.pos());
2532 let err = an.err();
2533 assert_eq!(ErrorKind::Read, err.kind());
2534 assert_eq!(expect_err_pos, *err.pos());
2535
2536 assert_eq!(Token::Err, an.next());
2537 assert_eq!(expect_token_pos, *an.pos());
2538 let err = an.try_content().unwrap_err();
2539 assert_eq!(ErrorKind::Read, err.kind());
2540 assert_eq!(expect_err_pos, *err.pos());
2541 assert!(
2542 err.source()
2543 .and_then(|e| e.downcast_ref::<PipeError>())
2544 .is_some()
2545 );
2546
2547 assert_eq!(Token::Err, an.next());
2548 }
2549
2550 #[rstest]
2551 #[case(1)]
2552 #[case(2)]
2553 #[case(INLINE_LEN - 1)]
2554 #[case(INLINE_LEN)]
2555 #[case(INLINE_LEN + 1)]
2556 fn test_analyzer_into_parser(#[case] chunk_size: usize) {
2557 const INPUT: &str = r#"{"hello":["🌍"]}"#;
2558
2559 let mut parser =
2560 PipeAnalyzer::new(SlicePipe::new(chunk_size, INPUT.as_bytes())).into_parser();
2561
2562 assert_eq!(Token::ObjBegin, parser.next());
2563 assert_eq!("{", parser.content().literal());
2564 assert_eq!(Pos::default(), *parser.pos());
2565 assert_eq!(1, parser.level());
2566
2567 assert_eq!(Token::Str, parser.next());
2568 assert_eq!(r#""hello""#, parser.content().literal());
2569 assert_eq!(Pos::new(1, 1, 2), *parser.pos());
2570 assert_eq!(1, parser.level());
2571
2572 assert_eq!(Token::NameSep, parser.next());
2573 assert_eq!(":", parser.content().literal());
2574 assert_eq!(Pos::new(8, 1, 9), *parser.pos());
2575 assert_eq!(1, parser.level());
2576
2577 assert_eq!(Token::ArrBegin, parser.next());
2578 assert_eq!("[", parser.content().literal());
2579 assert_eq!(Pos::new(9, 1, 10), *parser.pos());
2580 assert_eq!(2, parser.level());
2581
2582 assert_eq!(Token::Str, parser.next());
2583 assert_eq!(r#""🌍""#, parser.content().literal());
2584 assert_eq!(Pos::new(10, 1, 11), *parser.pos());
2585 assert_eq!(2, parser.level());
2586
2587 assert_eq!(Token::ArrEnd, parser.next());
2588 assert_eq!("]", parser.content().literal());
2589 assert_eq!(Pos::new(16, 1, 14), *parser.pos());
2590 assert_eq!(1, parser.level());
2591
2592 assert_eq!(Token::ObjEnd, parser.next());
2593 assert_eq!("}", parser.content().literal());
2594 assert_eq!(Pos::new(17, 1, 15), *parser.pos());
2595 assert_eq!(0, parser.level());
2596
2597 for _ in 0..5 {
2598 assert_eq!(Token::Eof, parser.next());
2599 assert_eq!(Pos::new(18, 1, 16), *parser.pos());
2600 assert_eq!(0, parser.level());
2601 }
2602 }
2603
2604 #[rstest]
2605 #[case(1)]
2606 #[case(2)]
2607 #[case(INLINE_LEN - 1)]
2608 #[case(INLINE_LEN)]
2609 #[case(INLINE_LEN + 1)]
2610 fn test_analyzer_smoke(#[case] chunk_size: usize) {
2611 const JSON_TEXT: &str = r#"
2612
2613[
2614 [],
2615 {},
2616 [true, false, null, "foo",-9, -9.9, -99.99e-99, {"❤️😊":1}, 10000000],
2617 "\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064",
2618 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\nUt labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\nLaboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in."
2619]"#;
2620
2621 const EXPECT: &[(Token, Pos, &str, Option<&str>)] = &[
2622 (Token::White, Pos::new(0, 1, 1), "\n\n", None),
2624 (Token::ArrBegin, Pos::new(2, 3, 1), "[", None),
2626 (Token::White, Pos::new(3, 3, 2), "\n ", None),
2627 (Token::ArrBegin, Pos::new(6, 4, 3), "[", None),
2629 (Token::ArrEnd, Pos::new(7, 4, 4), "]", None),
2630 (Token::ValueSep, Pos::new(8, 4, 5), ",", None),
2631 (Token::White, Pos::new(9, 4, 6), "\n ", None),
2632 (Token::ObjBegin, Pos::new(12, 5, 3), "{", None),
2634 (Token::ObjEnd, Pos::new(13, 5, 4), "}", None),
2635 (Token::ValueSep, Pos::new(14, 5, 5), ",", None),
2636 (Token::White, Pos::new(15, 5, 6), "\n ", None),
2637 (Token::ArrBegin, Pos::new(18, 6, 3), "[", None),
2639 (Token::LitTrue, Pos::new(19, 6, 4), "true", None),
2640 (Token::ValueSep, Pos::new(23, 6, 8), ",", None),
2641 (Token::White, Pos::new(24, 6, 9), " ", None),
2642 (Token::LitFalse, Pos::new(25, 6, 10), "false", None),
2643 (Token::ValueSep, Pos::new(30, 6, 15), ",", None),
2644 (Token::White, Pos::new(31, 6, 16), " ", None),
2645 (Token::LitNull, Pos::new(32, 6, 17), "null", None),
2646 (Token::ValueSep, Pos::new(36, 6, 21), ",", None),
2647 (Token::White, Pos::new(37, 6, 22), " ", None),
2648 (Token::Str, Pos::new(38, 6, 23), r#""foo""#, None),
2649 (Token::ValueSep, Pos::new(43, 6, 28), ",", None),
2650 (Token::Num, Pos::new(44, 6, 29), "-9", None),
2651 (Token::ValueSep, Pos::new(46, 6, 31), ",", None),
2652 (Token::White, Pos::new(47, 6, 32), " ", None),
2653 (Token::Num, Pos::new(48, 6, 33), "-9.9", None),
2654 (Token::ValueSep, Pos::new(52, 6, 37), ",", None),
2655 (Token::White, Pos::new(53, 6, 38), " ", None),
2656 (Token::Num, Pos::new(54, 6, 39), "-99.99e-99", None),
2657 (Token::ValueSep, Pos::new(64, 6, 49), ",", None),
2658 (Token::White, Pos::new(65, 6, 50), " ", None),
2659 (Token::ObjBegin, Pos::new(66, 6, 51), "{", None),
2660 (Token::Str, Pos::new(67, 6, 52), r#""❤️😊""#, None),
2661 (Token::NameSep, Pos::new(79, 6, 57), ":", None),
2662 (Token::Num, Pos::new(80, 6, 58), "1", None),
2663 (Token::ObjEnd, Pos::new(81, 6, 59), "}", None),
2664 (Token::ValueSep, Pos::new(82, 6, 60), ",", None),
2665 (Token::White, Pos::new(83, 6, 61), " ", None),
2666 (Token::Num, Pos::new(84, 6, 62), "10000000", None),
2667 (Token::ArrEnd, Pos::new(92, 6, 70), "]", None),
2668 (Token::ValueSep, Pos::new(93, 6, 71), ",", None),
2669 (Token::White, Pos::new(94, 6, 72), "\n ", None),
2670 (
2672 Token::Str,
2673 Pos::new(97, 7, 3),
2674 r#""\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064""#,
2675 Some(r#""hello, world""#),
2676 ),
2677 (Token::ValueSep, Pos::new(171, 7, 77), ",", None),
2678 (Token::White, Pos::new(172, 7, 78), "\n ", None),
2679 (
2681 Token::Str,
2682 Pos::new(175, 8, 3),
2683 concat!(
2684 r#""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n"#,
2685 r#"Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n"#,
2686 r#"Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.""#,
2687 ),
2688 Some(concat!(
2689 "\"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n",
2690 "Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n",
2691 "Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.\"",
2692 )),
2693 ),
2694 (Token::White, Pos::new(455, 8, 283), "\n", None),
2696 (Token::ArrEnd, Pos::new(456, 9, 1), "]", None),
2697 (Token::Eof, Pos::new(457, 9, 2), "", None),
2698 ];
2699
2700 let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, JSON_TEXT.as_bytes()));
2701
2702 for (i, (expect_token, expect_pos, expect_literal, expect_unescaped)) in
2703 EXPECT.iter().enumerate()
2704 {
2705 let actual_token = an.next();
2706 let actual_pos = *an.pos();
2707 let content = an.content();
2708
2709 assert_eq!(
2710 *expect_token, actual_token,
2711 "i = {i}, actual_pos = {actual_pos}, expect_pos = {expect_pos}"
2712 );
2713 assert_eq!(
2714 *expect_pos, actual_pos,
2715 "i = {i}, token = {actual_token}, content = {content}"
2716 );
2717 assert_eq!(
2718 *expect_literal,
2719 content.literal(),
2720 "i = {i}, token = {actual_token}, expect_literal = {expect_literal:?}, content.literal() = {}",
2721 content.literal(),
2722 );
2723 if let Some(u) = expect_unescaped {
2724 assert!(
2725 content.is_escaped(),
2726 "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
2727 );
2728 assert_eq!(*u, content.unescaped());
2729 } else {
2730 assert!(
2731 !content.is_escaped(),
2732 "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
2733 );
2734 assert_eq!(*expect_literal, content.unescaped());
2735 }
2736 }
2737 }
2738
2739 #[derive(Debug, Eq, PartialEq)]
2740 struct ToyError(&'static str);
2741
2742 impl fmt::Display for ToyError {
2743 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2744 f.write_str(self.0)
2745 }
2746 }
2747
2748 impl std::error::Error for ToyError {}
2749
2750 struct SlicePipe<'a> {
2751 chunk_size: usize,
2752 input: &'a [u8],
2753 }
2754
2755 impl<'a> SlicePipe<'a> {
2756 fn new(chunk_size: usize, input: &'a [u8]) -> Self {
2757 Self { chunk_size, input }
2758 }
2759 }
2760
2761 impl<'a> Pipe for SlicePipe<'a> {
2762 type Error = Infallible;
2763
2764 fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
2765 if self.input.len() > 0 {
2766 let n = self.input.len().min(self.chunk_size);
2767 let b = self.input[..n].to_vec().into();
2768 self.input = &self.input[n..];
2769
2770 Some(Ok(b))
2771 } else {
2772 None
2773 }
2774 }
2775 }
2776
2777 trait IntoString {
2778 fn into_string(self) -> String;
2779 }
2780
2781 impl<T: IntoBuf> IntoString for T {
2782 fn into_string(self) -> String {
2783 let mut src = self.into_buf();
2784 let mut dst = Vec::with_capacity(src.remaining());
2785 while src.remaining() > 0 {
2786 let chunk = src.chunk();
2787 dst.extend_from_slice(chunk);
2788 src.advance(chunk.len());
2789 }
2790
2791 String::from_utf8(dst).expect("valid UTF-8")
2792 }
2793 }
2794}