bufjson/lexical/
pipe.rs

1//! Convert a stream (usually async) of [`bytes::Bytes`] chunks into JSON lexical tokens.
2//!
3//! The `Bytes` chunks can be produced either using the asynchronous programming model or using a
4//! multi-threaded programming model.
5//!
6//! # Difference between `pipe` and `read`
7//!
8//! Both this module and the `read` module provide lexical analyzers that scan JSON read from an
9//! external source.
10//!
11//! For the `read` module, that external source is a [`std::io::Read`]. A consequence of its design
12//! is that `read::ReadAnalyzer` has to read *from* the `Read` *into* its internal buffers, so every
13//! byte of input has to be copied or moved in order to be scanned by the lexical analyzer.
14//!
15//! In contrast, the external source for this module is a [`Pipe`] that provides input chunks to the
16//! [`PipeAnalyzer`] as [`Bytes`] buffers. `Bytes` buffers are reference-counted, immutable values
17//! that support shared ownership. Because of these features, input bytes already resident in memory
18//! can be sent to a [`PipeAnalyzer`] without any copying or allocation. These properties make
19//! [`PipeAnalyzer`] an excellent fit for some use cases, like web programming, where chunks of the
20//! JSON text are already in memory because they were read by some other subsystem, such as the
21//! network stack.
22
23use crate::{
24    Buf, BufUnderflow, EqStr, IntoBuf, OrdStr, Pos,
25    lexical::{self, ErrorKind, Token, Unescaped, state},
26    syntax,
27};
28use bytes::{Buf as _, Bytes};
29use smallvec::{SmallVec, smallvec};
30use std::{
31    borrow::Cow,
32    cmp::Ordering,
33    convert::Infallible,
34    fmt,
35    hash::{Hash, Hasher},
36    mem::MaybeUninit,
37    str::FromStr,
38    sync::Arc,
39};
40
41// Use a smaller inline buffer size in tests to push more test cases out of the inline
42// representation and into the more complex representations that contain references into the actual
43// read buffers.
44#[cfg(test)]
45const INLINE_LEN: usize = 4;
46#[cfg(not(test))]
47const INLINE_LEN: usize = 36;
48
49type InlineBuf = [u8; INLINE_LEN];
50
51// A value that has most of the range of a `usize`, minus one bit, which is used to store a `bool`
52// flag.
53//
54// Used to combine the vector index position and the escaped flag into one `usize` value, which
55// helps keep the size of `InnerLiteral` to not more than 40 bytes on a 64-bit machine.
56#[derive(Clone, Debug)]
57struct USizeBool(usize);
58
59impl USizeBool {
60    const FLAG_BIT: usize = 1 << (usize::BITS - 1);
61    const VALUE_MASK: usize = !Self::FLAG_BIT;
62
63    fn new(value: usize, flag: bool) -> Self {
64        debug_assert!(value <= Self::VALUE_MASK);
65        Self(value | if flag { Self::FLAG_BIT } else { 0 })
66    }
67
68    #[inline(always)]
69    fn get_usize(&self) -> usize {
70        self.0 & Self::VALUE_MASK
71    }
72
73    #[inline(always)]
74    fn set_usize(&mut self, value: usize) {
75        debug_assert!(value <= Self::VALUE_MASK);
76        self.0 = (self.0 & Self::FLAG_BIT) | value;
77    }
78
79    #[inline(always)]
80    fn get_bool(&self) -> bool {
81        self.0 & Self::FLAG_BIT != 0
82    }
83}
84
85#[derive(Clone, Debug)]
86struct MultiBytes {
87    arr: Box<[Bytes]>,
88    rem: usize,
89    pos_escaped: USizeBool,
90}
91
92impl MultiBytes {
93    fn new(mut arr: Box<[Bytes]>, start_pos: usize, len: usize, escaped: bool) -> Self {
94        #[cfg(debug_assertions)]
95        {
96            #[cfg(test)]
97            const ALLOW_FIRST_BUFFER_EMPTY: bool = true;
98            #[cfg(not(test))]
99            const ALLOW_FIRST_BUFFER_EMPTY: bool = false;
100            debug_assert!(
101                ALLOW_FIRST_BUFFER_EMPTY || start_pos < arr[0].len(),
102                "start_pos ({start_pos}) < arr[0].len ({})",
103                arr[0].len()
104            );
105            #[cfg(test)]
106            const ALLOW_SINGLE_BUFFER: bool = true;
107            #[cfg(not(test))]
108            const ALLOW_SINGLE_BUFFER: bool = false;
109            debug_assert!(
110                ALLOW_SINGLE_BUFFER || arr[0].len() < start_pos + len,
111                "arr[0].len() ({}) < start_pos ({start_pos}) + len ({len})",
112                arr[0].len()
113            );
114        }
115
116        // Slice away the unneeded prefix bytes from the first buffer.
117        arr[0].advance(start_pos);
118
119        // Slice away the unneeded suffix bytes from the last buffer.
120        let n = arr.len();
121        let contrib: usize = arr.iter().take(n - 1).map(Bytes::len).sum();
122        debug_assert!(
123            contrib <= len,
124            "contrib ({contrib}) <= len ({len}) for arr = {arr:?}"
125        );
126        arr[n - 1].truncate(len - contrib);
127
128        // Return the new multi-bytes.
129        Self {
130            arr,
131            rem: len,
132            pos_escaped: USizeBool::new(0, escaped),
133        }
134    }
135}
136
137impl Buf for MultiBytes {
138    fn advance(&mut self, mut n: usize) {
139        if self.remaining() < n {
140            panic!(
141                "{}",
142                &BufUnderflow {
143                    requested: n,
144                    remaining: self.remaining(),
145                }
146            );
147        } else {
148            self.rem -= n;
149            let mut pos = self.pos_escaped.get_usize();
150            while pos < self.arr.len() && self.arr[pos].len() <= n {
151                n -= self.arr[pos].len();
152                pos += 1;
153            }
154            if n > 0 {
155                debug_assert!((pos) < self.arr.len());
156                debug_assert!(self.arr[pos].len() > n);
157                self.arr[pos] = self.arr[pos].slice(n..);
158            }
159            self.pos_escaped.set_usize(pos);
160        }
161    }
162
163    #[inline]
164    fn chunk(&self) -> &[u8] {
165        let pos = self.pos_escaped.get_usize();
166        if pos < self.arr.len() {
167            &self.arr[pos]
168        } else {
169            &[]
170        }
171    }
172
173    #[inline(always)]
174    fn remaining(&self) -> usize {
175        self.rem
176    }
177
178    fn try_copy_to_slice(&mut self, mut dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
179        if self.remaining() < dst.len() {
180            Err(BufUnderflow {
181                requested: dst.len(),
182                remaining: self.remaining(),
183            })
184        } else {
185            self.rem -= dst.len();
186            let mut pos = self.pos_escaped.get_usize();
187            while pos < self.arr.len() && self.arr[pos].len() <= dst.len() {
188                let b = &self.arr[pos];
189                let m = b.len();
190                dst[0..m].copy_from_slice(b);
191                dst = &mut dst[m..];
192                pos += 1;
193            }
194            if !dst.is_empty() {
195                debug_assert!(pos < self.arr.len());
196                debug_assert!(self.arr[pos].len() > dst.len());
197                let n = dst.len();
198                dst.copy_from_slice(&self.arr[pos][..n]);
199                self.arr[pos] = self.arr[pos].slice(n..);
200            }
201            self.pos_escaped.set_usize(pos);
202
203            Ok(())
204        }
205    }
206}
207
208impl IntoBuf for MultiBytes {
209    type Buf = Self;
210
211    fn into_buf(self) -> Self::Buf {
212        self
213    }
214}
215
216#[derive(Debug)]
217enum Repr<'a> {
218    Together(&'a str),
219    Split(&'a MultiBytes),
220}
221
222#[derive(Clone, Debug)]
223enum InnerLiteral {
224    Static(&'static str, bool),
225    Inline(u8, u8, InlineBuf, bool),
226    Bytes(Bytes, bool),
227    Multi(MultiBytes),
228}
229
230impl InnerLiteral {
231    fn inline(src: &[u8]) -> Self {
232        let mut dst: InlineBuf = [0; INLINE_LEN];
233        dst[0..src.len()].copy_from_slice(src);
234
235        Self::Inline(0, src.len() as u8, dst, false)
236    }
237
238    #[cfg(test)]
239    fn test_new_bytes(s: &'static str, escaped: bool) -> Self {
240        Self::Bytes(Bytes::from_static(s.as_bytes()), escaped)
241    }
242
243    #[cfg(test)]
244    fn test_new_multi<I, T>(bufs: I, start_pos: usize, len: usize, escaped: bool) -> Self
245    where
246        I: IntoIterator<Item = T>,
247        T: Into<Bytes>,
248    {
249        let arr: Box<[Bytes]> = bufs.into_iter().map(Into::into).collect();
250
251        Self::Multi(MultiBytes::new(arr, start_pos, len, escaped))
252    }
253
254    #[inline(always)]
255    fn len(&self) -> usize {
256        match self {
257            Self::Static(s, _) => s.len(),
258            Self::Inline(i, j, _, _) => (*j - *i) as usize,
259            Self::Bytes(b, _) => b.len(),
260            Self::Multi(v) => v.rem,
261        }
262    }
263
264    #[inline]
265    fn repr(&self) -> Repr<'_> {
266        match self {
267            Self::Static(s, _) => Repr::Together(s),
268            Self::Inline(i, j, b, _) => {
269                Repr::Together(unsafe { str::from_utf8_unchecked(&b[*i as usize..*j as usize]) })
270            }
271            Self::Bytes(b, _) => Repr::Together(unsafe { str::from_utf8_unchecked(b) }),
272            Self::Multi(v) => Repr::Split(v),
273        }
274    }
275
276    #[inline]
277    fn is_escaped(&self) -> bool {
278        match self {
279            Self::Static(_, escaped) | Self::Inline(_, _, _, escaped) | Self::Bytes(_, escaped) => {
280                *escaped
281            }
282            Self::Multi(m) => m.pos_escaped.get_bool(),
283        }
284    }
285
286    fn unescaped(&self) -> Unescaped<Literal> {
287        match self {
288            Self::Static(_, false) | Self::Inline(_, _, _, false) | Self::Bytes(_, false) => {
289                Unescaped::Literal(Literal(self.clone()))
290            }
291            Self::Multi(m) if !m.pos_escaped.get_bool() => {
292                Unescaped::Literal(Literal(self.clone()))
293            }
294            _ => {
295                let mut buf = Vec::new();
296                lexical::unescape(self.clone(), &mut buf);
297
298                // SAFETY: `self` was valid UTF-8 before it was de-escaped, and the de-escaping
299                //         process maintains UTF-8 safety.
300                let s = unsafe { String::from_utf8_unchecked(buf) };
301
302                Unescaped::Expanded(s)
303            }
304        }
305    }
306}
307
308impl Buf for InnerLiteral {
309    fn advance(&mut self, n: usize) {
310        match self {
311            Self::Static(s, _) => {
312                if s.len() < n {
313                    panic!(
314                        "{}",
315                        &BufUnderflow {
316                            requested: n,
317                            remaining: s.len(),
318                        }
319                    );
320                } else {
321                    *self = Self::Static(&s[n..], false)
322                }
323            }
324
325            Self::Inline(i, j, b, _) => {
326                let len = (*j - *i) as usize;
327                if len < n {
328                    panic!(
329                        "{}",
330                        &BufUnderflow {
331                            requested: n,
332                            remaining: len,
333                        }
334                    );
335                } else {
336                    *self = Self::Inline(*i + n as u8, *j, *b, false);
337                }
338            }
339
340            Self::Bytes(b, _) => {
341                if b.len() < n {
342                    panic!(
343                        "{}",
344                        &BufUnderflow {
345                            requested: n,
346                            remaining: b.len(),
347                        }
348                    );
349                } else {
350                    *self = Self::Bytes(b.slice(n..), false);
351                }
352            }
353
354            Self::Multi(m) => m.advance(n),
355        }
356    }
357
358    fn chunk(&self) -> &[u8] {
359        match &self {
360            Self::Static(s, _) => s.as_bytes(),
361            Self::Inline(i, j, b, _) => &b[*i as usize..*j as usize],
362            Self::Bytes(b, _) => b,
363            Self::Multi(r) => r.chunk(),
364        }
365    }
366
367    #[inline]
368    fn remaining(&self) -> usize {
369        self.len()
370    }
371
372    fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
373        match self {
374            Self::Static(s, _) => {
375                if s.len() < dst.len() {
376                    Err(BufUnderflow {
377                        requested: dst.len(),
378                        remaining: s.len(),
379                    })
380                } else {
381                    dst.copy_from_slice(&s.as_bytes()[..dst.len()]);
382                    *self = Self::Static(&s[dst.len()..], false);
383
384                    Ok(())
385                }
386            }
387
388            InnerLiteral::Inline(i, j, b, _) => {
389                let len = (*j - *i) as usize;
390                if len < dst.len() {
391                    Err(BufUnderflow {
392                        requested: dst.len(),
393                        remaining: len,
394                    })
395                } else {
396                    dst.copy_from_slice(&b[*i as usize..*i as usize + dst.len()]);
397                    *i += dst.len() as u8;
398
399                    Ok(())
400                }
401            }
402
403            InnerLiteral::Bytes(b, _) => {
404                if b.len() < dst.len() {
405                    panic!(
406                        "{}",
407                        &BufUnderflow {
408                            requested: dst.len(),
409                            remaining: b.len(),
410                        }
411                    );
412                } else {
413                    dst.copy_from_slice(&b[..dst.len()]);
414                    *self = Self::Bytes(b.slice(dst.len()..), false);
415
416                    Ok(())
417                }
418            }
419
420            InnerLiteral::Multi(m) => m.try_copy_to_slice(dst),
421        }
422    }
423}
424
425impl IntoBuf for InnerLiteral {
426    type Buf = Self;
427
428    fn into_buf(self) -> Self::Buf {
429        self
430    }
431}
432
433/// Zero allocation view of the literal text content of a JSON token.
434///
435/// To prevent allocation and minimize copying, a `Literal` may contain one or more [`Bytes`]
436/// buffers that share memory with the `Bytes` values that were piped into the [`PipeAnalyzer`].
437/// Since a token's text content can span the boundary between two or more of these buffers, the
438/// full text of the token may be non-contiguous in memory. To make this data structure usable in
439/// the widest range of use cases, `Literal` implements the [`Buf`] trait, which provides a uniform
440/// interface for reading data from potentially non-contiguous sources.
441///
442/// # Performance considerations
443///
444/// Clones are cheap and do not allocate. However, for the memory considerations described below, it
445/// is preferable to use short-lifetime clones for discrete tasks and not to proliferate long-lived
446/// clones.
447///
448/// # Memory considerations
449///
450/// Because a `Literal` may share memory with the `Bytes` buffers that were piped into a
451/// `PipeAnalyzer`, holding on to a `Literal` instance may prevent the `PipeAnalyzer` from reusing
452/// buffers. This can lead to increased memory usage. If all `Literal` instances produced by a
453/// `PipeAnalyzer` are retained, they will tend to prevent any of the allocations backing the input
454/// `Bytes` buffers from being dropped. This may undermine the value proposition of a streaming
455/// analyzer and, for large enough JSON texts, may lead to out-of-memory conditions. Therefore, it
456/// is advised that you retain `Literal` instances only as long as necessary to process them,
457/// extracting owned copies of their data if you need long-lived access to the token text.
458#[derive(Clone, Debug)]
459pub struct Literal(InnerLiteral);
460
461impl Literal {
462    /// Converts a static lifetime string slice to a literal value.
463    ///
464    /// This function is the most efficient way to wrap a static string as a `Literal`. It does not
465    /// allocate and produces the lightest-weight `Literal` value.
466    ///
467    /// If you have a non-static string slice, use [`from_ref`], one of the [`From`] trait
468    /// implementations, or the [`FromStr`] implementation. If creating a literal value from an
469    /// owned `String`, use [`from_string`].
470    ///
471    /// # Examples
472    ///
473    /// Populate and use a hash set of allowed JSON object keys.
474    ///
475    /// ```
476    /// use bufjson::lexical::{Token, pipe::{Literal, PipeAnalyzer}};
477    /// use bytes::Bytes;
478    /// use std::{collections::HashSet, sync::mpsc::channel, thread};
479    ///
480    /// // Populate the set of allowed JSON object keys.
481    /// let mut allowed = HashSet::with_capacity(3);
482    /// allowed.insert(Literal::from_static(r#""foo""#)); // Note: store `"foo"`, not `foo`
483    /// allowed.insert(Literal::from_static(r#""baz""#)); // Note: store `"baz"`, not `baz`
484    ///
485    /// // Parse some JSON.
486    /// let (tx, rx) = channel();
487    /// tx.send(r#"{"foo":"bar","baz":"qux"}"#.into()).unwrap();
488    /// drop(tx);
489    /// let mut parser = PipeAnalyzer::new(rx).into_parser();
490    ///
491    /// // Verify that the literal value of every object key is allowed.
492    /// assert_eq!(Token::ObjBegin, parser.next());
493    /// loop {
494    ///     match parser.next_meaningful() {
495    ///         Token::Str => {
496    ///             let key = parser.content().literal();
497    ///             assert!(allowed.contains(&key));
498    ///             assert_eq!(Token::Str, parser.next_meaningful()); // Skip corresponding value.
499    ///         },
500    ///         Token::ObjEnd => (),
501    ///         Token::Eof => break,
502    ///         _ => unreachable!(),
503    ///     }
504    /// }
505    /// ```
506    ///
507    /// [`from_ref`]: method@Self::from_ref
508    /// [`from_string`]: method@Self::from_string
509    pub const fn from_static(s: &'static str) -> Self {
510        Self(InnerLiteral::Static(s, false))
511    }
512
513    /// Creates a literal value from anything that cheaply converts to a string slice reference.
514    ///
515    /// If you have a static string slice, prefer [`from_static`], which has a lower construction
516    /// cost and a more efficient implementation. If you have an owned `String` you can consume,
517    /// prefer [`from_string`], which will avoid allocation. If you have a `Cow` you can consume,
518    /// prefer `From<Cow<'_, str>>`, which will avoid allocation if the `Cow` contains an owned
519    /// value.
520    ///
521    /// [`from_static`]: method@Self::from_static
522    /// [`from_string`]: method@Self::from_string
523    pub fn from_ref<T: AsRef<str> + ?Sized>(s: &T) -> Self {
524        let t = s.as_ref();
525        let b = t.as_bytes();
526
527        if b.len() <= INLINE_LEN {
528            Self(InnerLiteral::inline(b))
529        } else {
530            Self(InnerLiteral::Bytes(Bytes::copy_from_slice(b), false))
531        }
532    }
533
534    /// Creates a literal value by consuming an owned string value.
535    ///
536    /// # Examples
537    ///
538    /// Create a literal from an owned string.
539    ///
540    /// ```
541    /// # use bufjson::lexical::pipe::Literal;
542    /// let s = "foo".to_string();
543    /// let lit = Literal::from_string(s);
544    /// assert_eq!("foo", lit);
545    /// ```
546    ///
547    /// There is a `From<String>` implementation that is functionally equivalent.
548    ///
549    /// ```
550    /// # use bufjson::lexical::pipe::Literal;
551    /// let s = "bar".to_string();
552    /// let lit: Literal = s.into();
553    /// assert_eq!("bar", lit);
554    /// ```
555    pub fn from_string(s: String) -> Self {
556        if s.len() <= INLINE_LEN {
557            Self(InnerLiteral::inline(s.as_bytes()))
558        } else {
559            Self(InnerLiteral::Bytes(
560                Bytes::from_owner(s.into_bytes()),
561                false,
562            ))
563        }
564    }
565
566    /// Returns the length of `self`.
567    ///
568    /// This length is in bytes, not `char` values or graphemes. In other words, it might not be
569    /// what a human considers the length of the string.
570    ///
571    /// # Examples
572    ///
573    /// Get the length of a literal.
574    ///
575    /// ```
576    /// # use bufjson::lexical::read::Literal;
577    /// let boring = Literal::from_static("foo");
578    /// assert_eq!(3, boring.len());
579    ///
580    /// let fancy = Literal::from_static("ƒoo"); // fancy f!
581    /// assert_eq!(fancy.len(), 4);
582    /// ```
583    pub fn len(&self) -> usize {
584        self.0.len()
585    }
586
587    /// Returns `true` if `self` has a length of zero bytes.
588    ///
589    /// # Examples
590    ///
591    /// ```
592    /// # use bufjson::lexical::read::Literal;
593    /// assert_eq!(true, Literal::from_static("").is_empty());
594    /// ```
595    pub fn is_empty(&self) -> bool {
596        self.len() == 0
597    }
598
599    #[inline(always)]
600    fn repr(&self) -> Repr<'_> {
601        self.0.repr()
602    }
603}
604
605impl IntoBuf for Literal {
606    type Buf = LiteralBuf;
607
608    fn into_buf(self) -> Self::Buf {
609        LiteralBuf(self.0)
610    }
611}
612
613impl fmt::Display for Literal {
614    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
615        match self.repr() {
616            Repr::Together(s) => f.write_str(s),
617            Repr::Split(r) => crate::buf::display(r.clone(), f),
618        }
619    }
620}
621
622impl EqStr for Literal {}
623
624impl Eq for Literal {}
625
626impl From<Literal> for String {
627    fn from(value: Literal) -> Self {
628        match value.repr() {
629            Repr::Together(s) => s.to_string(),
630            Repr::Split(r) => crate::buf::to_string(r.clone()),
631        }
632    }
633}
634
635impl<T: ?Sized + AsRef<str>> From<&T> for Literal {
636    fn from(value: &T) -> Self {
637        Literal::from_ref(&value)
638    }
639}
640
641impl<'a> From<Cow<'a, str>> for Literal {
642    fn from(value: Cow<'a, str>) -> Self {
643        match value {
644            Cow::Borrowed(s) => Literal::from_ref(&s),
645            Cow::Owned(s) => Literal::from_string(s),
646        }
647    }
648}
649
650impl From<String> for Literal {
651    fn from(value: String) -> Self {
652        Literal::from_string(value)
653    }
654}
655
656impl FromStr for Literal {
657    type Err = Infallible;
658
659    fn from_str(s: &str) -> Result<Self, Self::Err> {
660        Ok(Literal::from_ref(&s))
661    }
662}
663
664impl Hash for Literal {
665    fn hash<H: Hasher>(&self, state: &mut H) {
666        match self.repr() {
667            Repr::Together(s) if s.len() <= crate::buf::HASH_CHUNK => state.write(s.as_bytes()),
668            Repr::Together(s) => crate::buf::hash(s, state),
669            Repr::Split(m) => crate::buf::hash(m.clone(), state),
670        }
671    }
672}
673
674impl Ord for Literal {
675    fn cmp(&self, other: &Self) -> Ordering {
676        match (self.repr(), other.repr()) {
677            (Repr::Together(a), Repr::Together(b)) => Ord::cmp(a, b),
678            (Repr::Together(a), Repr::Split(b)) => crate::buf_cmp(a, b.clone()),
679            (Repr::Split(a), Repr::Together(b)) => crate::buf_cmp(a.clone(), b),
680            (Repr::Split(a), Repr::Split(b)) => crate::buf_cmp(a.clone(), b.clone()),
681        }
682    }
683}
684
685impl OrdStr for Literal {
686    fn cmp(&self, other: &str) -> Ordering {
687        match self.repr() {
688            Repr::Together(s) => Ord::cmp(s, other),
689            Repr::Split(m) => crate::buf_cmp(m.clone(), other),
690        }
691    }
692}
693
694impl PartialEq for Literal {
695    fn eq(&self, other: &Self) -> bool {
696        if self.len() != other.len() {
697            false
698        } else {
699            match (self.repr(), other.repr()) {
700                (Repr::Together(a), Repr::Together(b)) => a == b,
701                (Repr::Together(a), Repr::Split(b)) => {
702                    crate::buf_cmp(a, b.clone()) == Ordering::Equal
703                }
704                (Repr::Split(a), Repr::Together(b)) => {
705                    crate::buf_cmp(a.clone(), b) == Ordering::Equal
706                }
707                (Repr::Split(a), Repr::Split(b)) => {
708                    crate::buf_cmp(a.clone(), b.clone()) == Ordering::Equal
709                }
710            }
711        }
712    }
713}
714
715impl PartialEq<str> for Literal {
716    fn eq(&self, other: &str) -> bool {
717        if self.len() != other.len() {
718            false
719        } else {
720            match self.repr() {
721                Repr::Together(s) => s == other,
722                Repr::Split(r) => crate::buf_cmp(r.clone(), other) == Ordering::Equal,
723            }
724        }
725    }
726}
727
728impl PartialEq<&str> for Literal {
729    fn eq(&self, other: &&str) -> bool {
730        self == *other
731    }
732}
733
734impl PartialEq<String> for Literal {
735    fn eq(&self, other: &String) -> bool {
736        self == other.as_str()
737    }
738}
739
740impl PartialEq<Literal> for str {
741    fn eq(&self, other: &Literal) -> bool {
742        other == self
743    }
744}
745
746impl PartialEq<Literal> for &str {
747    fn eq(&self, other: &Literal) -> bool {
748        other == self
749    }
750}
751
752impl PartialEq<Literal> for String {
753    fn eq(&self, other: &Literal) -> bool {
754        other == self
755    }
756}
757
758impl PartialOrd for Literal {
759    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
760        Some(Ord::cmp(self, other))
761    }
762}
763
764impl PartialOrd<str> for Literal {
765    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
766        Some(OrdStr::cmp(self, other))
767    }
768}
769
770impl PartialOrd<Literal> for str {
771    fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
772        Some(OrdStr::cmp(other, self).reverse())
773    }
774}
775
776impl PartialOrd<&str> for Literal {
777    fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
778        Some(OrdStr::cmp(self, other))
779    }
780}
781
782impl PartialOrd<Literal> for &str {
783    fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
784        Some(OrdStr::cmp(other, self).reverse())
785    }
786}
787
788impl PartialOrd<String> for Literal {
789    fn partial_cmp(&self, other: &String) -> Option<Ordering> {
790        self.partial_cmp(other.as_str())
791    }
792}
793
794impl PartialOrd<Literal> for String {
795    fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
796        self.as_str().partial_cmp(other)
797    }
798}
799
800/// A [`Buf`] implementation for [`Literal`].
801///
802/// # Example
803///
804/// ```
805/// use bufjson::{Buf, IntoBuf, lexical::pipe::Literal};
806///
807/// let lit = Literal::from_static("hello, world!");
808/// let mut buf = lit.into_buf();
809///
810/// assert_eq!(13, buf.remaining());
811///
812/// let mut dst = [0; 5];
813/// buf.copy_to_slice(&mut dst);
814///
815/// assert_eq!(b"hello", &dst);
816/// assert_eq!(8, buf.remaining());
817/// ```
818pub struct LiteralBuf(InnerLiteral);
819
820impl LiteralBuf {
821    /// Advances the internal cursor.
822    ///
823    /// The next call to [`chunk`] will return a slice starting `n` bytes further into the literal.
824    ///
825    /// This is an inherent implementation of [`Buf::advance`] for convenience, so it is available
826    /// even when you don't have the trait imported.
827    ///
828    /// # Panics
829    ///
830    /// Panics if `n > self.remaining()`.
831    ///
832    /// [`chunk`]: method@Self::chunk
833    #[inline(always)]
834    pub fn advance(&mut self, n: usize) {
835        self.0.advance(n)
836    }
837
838    /// Returns a slice of bytes starting at the current position, with length between 0 and
839    /// [`remaining`].
840    ///
841    /// The returned slice may be shorter than [`remaining`] if the internal representation is not
842    /// contiguous. An empty slice is returned only when [`remaining`] returns 0, and is always
843    /// returned in this case since this method never panics.
844    ///
845    /// Calling `chunk` does not advance the internal cursor.
846    ///
847    /// This is an inherent implementation of [`Buf::chunk`] for convenience, so it is available
848    /// even when you don't have the trait imported.
849    ///
850    /// [`remaining`]: method@Self::remaining
851    #[inline(always)]
852    pub fn chunk(&self) -> &[u8] {
853        self.0.chunk()
854    }
855
856    /// Returns the number of bytes between the current position and the end of the `Literal`.
857    ///
858    /// This value is always greater than or equal to the length of the slice returned by [`chunk`].
859    ///
860    /// This is an inherent implementation of [`Buf::remaining`] for convenience, so it is available
861    /// even when you don't have the trait imported.
862    ///
863    /// [`chunk`]: method@Self::chunk
864    #[inline(always)]
865    pub fn remaining(&self) -> usize {
866        self.0.remaining()
867    }
868
869    /// Copies bytes from `self` into `dst`.
870    ///
871    /// Advances the internal cursor by the number of bytes copied.
872    ///
873    /// Returns a buffer underflow error without advancing the cursor if `self` does not have enough
874    /// bytes [`remaining`] to fill `dst`.
875    ///
876    /// This is an inherent implementation of [`Buf::try_copy_to_slice`] for convenience, so it is
877    /// available even when you don't have the trait imported.
878    ///
879    /// [`remaining`]: method@Self::remaining
880    #[inline(always)]
881    pub fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
882        self.0.try_copy_to_slice(dst)
883    }
884}
885
886impl Buf for LiteralBuf {
887    #[inline(always)]
888    fn advance(&mut self, n: usize) {
889        LiteralBuf::advance(self, n);
890    }
891
892    #[inline(always)]
893    fn chunk(&self) -> &[u8] {
894        LiteralBuf::chunk(self)
895    }
896
897    #[inline(always)]
898    fn remaining(&self) -> usize {
899        LiteralBuf::remaining(self)
900    }
901
902    #[inline(always)]
903    fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
904        LiteralBuf::try_copy_to_slice(self, dst)
905    }
906}
907
908/// Text content of a JSON token identified by a [`PipeAnalyzer`].
909///
910/// See the [`lexical::Content`] trait, implemented by this struct, for detailed conceptual
911/// documentation.
912///
913/// # Memory considerations
914///
915/// A `Content` value may hold references to one or more [`Bytes`] values that were piped into the
916/// `PipeAnalyzer`. Consequently, holding on to a `Content` value may prevent the `PipeAnalyzer`
917/// from dropping `Bytes` buffers it has finished scanning. This can lead to increased memory usage.
918/// If all `Content` values produced by a `PipeAnalyzer` are retained, it will potentially keep all
919/// inputted `Bytes` buffers alive. This undermines a key value proposition of a streaming analyzer
920/// and, for large enough JSON texts, may lead to out-of-memory conditions. Therefore, it is advised
921/// that you drop `Content` values once you have finished examining them.
922#[derive(Debug)]
923pub struct Content(InnerLiteral);
924
925impl Content {
926    /// Returns the literal content of the token exactly as it appears in the JSON text.
927    ///
928    /// This is an inherent implementation of [`lexical::Content::literal`] for convenience, so it
929    /// is available even when you don't have the trait imported. Refer to the trait documentation
930    /// for conceptual details.
931    #[inline(always)]
932    pub fn literal(&self) -> Literal {
933        Literal(self.0.clone())
934    }
935
936    /// Indicates whether the token content contains escape sequences.
937    ///
938    /// This is an inherent implementation of [`lexical::Content::is_escaped`] for convenience, so
939    /// it is available even when you don't have the trait imported. Refer to the trait
940    /// documentation for conceptual details.
941    #[inline(always)]
942    pub fn is_escaped(&self) -> bool {
943        self.0.is_escaped()
944    }
945
946    /// Returns a normalized version of [`literal`] with all escape sequences in the JSON text
947    /// fully expanded.
948    ///
949    /// This is an inherent implementation of [`lexical::Content::unescaped`] for convenience, so
950    /// it is available even when you don't have the trait imported. Refer to the trait
951    /// documentation for conceptual details.
952    ///
953    /// # Performance considerations
954    ///
955    /// - If this content belongs to a non-string token, or a string token that contains no escape
956    ///   sequences, does not allocate, and simply returns an [`Unescaped::Literal`] wrapping the
957    ///   `Literal` returned by [`literal`], which is a reference to the internals of this content.
958    /// - If this content belongs to a string token containing at least one escape sequence,
959    ///   allocates a new owned string value containing the unescaped string content and returns it
960    ///   wrapped in [`Unescaped::Expanded`].
961    ///
962    /// [`literal`]: method@Self::literal
963    #[inline(always)]
964    pub fn unescaped(&self) -> Unescaped<Literal> {
965        self.0.unescaped()
966    }
967}
968
969impl fmt::Display for Content {
970    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
971        self.literal().fmt(f)
972    }
973}
974
975impl super::Content for Content {
976    type Literal<'a> = Literal;
977
978    #[inline(always)]
979    fn literal<'a>(&'a self) -> Self::Literal<'a> {
980        Content::literal(self)
981    }
982
983    #[inline(always)]
984    fn is_escaped(&self) -> bool {
985        Content::is_escaped(self)
986    }
987
988    #[inline(always)]
989    fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>> {
990        Content::unescaped(self)
991    }
992}
993
994// Assert that `Literal` does not grow beyond 40 bytes (five 64-bit words).
995#[cfg(target_pointer_width = "64")]
996const _: [(); 40] = [(); std::mem::size_of::<Literal>()];
997
998// Assert that `Content` does not grow beyond 40 bytes (five 64-bit words).
999#[cfg(target_pointer_width = "64")]
1000const _: [(); 40] = [(); std::mem::size_of::<Content>()];
1001
1002/// Lexical analysis error detected by a [`PipeAnalyzer`].
1003///
1004/// See the [`lexical::Error`] trait, implemented by this struct, for further documentation.
1005#[derive(Debug)]
1006pub struct Error<E> {
1007    kind: ErrorKind,
1008    pos: Pos,
1009    source: Option<Arc<E>>,
1010}
1011
1012impl<E> Error<E> {
1013    /// Returns the category of error.
1014    ///
1015    /// This is an inherent implementation of [`lexical::Error::kind`] for convenience, so it is
1016    /// available even when you don't have the trait imported.
1017    pub fn kind(&self) -> ErrorKind {
1018        self.kind
1019    }
1020
1021    /// Returns the position in the JSON text where the error was encountered.
1022    ///
1023    /// This is an inherent implementation of [`lexical::Error::pos`] for convenience, so it is
1024    /// available even when you don't have the trait imported.
1025    pub fn pos(&self) -> &Pos {
1026        &self.pos
1027    }
1028
1029    fn new_lexical(kind: ErrorKind, pos: Pos) -> Self {
1030        Self {
1031            kind,
1032            pos,
1033            source: None,
1034        }
1035    }
1036
1037    fn new_read(source: E, pos: Pos) -> Self {
1038        Self {
1039            kind: ErrorKind::Read,
1040            pos,
1041            source: Some(Arc::new(source)),
1042        }
1043    }
1044}
1045
1046impl<E> Clone for Error<E> {
1047    fn clone(&self) -> Self {
1048        Self {
1049            kind: self.kind,
1050            pos: self.pos,
1051            source: self.source.clone(),
1052        }
1053    }
1054}
1055
1056impl<E> fmt::Display for Error<E> {
1057    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1058        self.kind.fmt_at(f, Some(&self.pos))
1059    }
1060}
1061
1062impl<E> std::error::Error for Error<E>
1063where
1064    E: std::error::Error + 'static,
1065{
1066    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
1067        self.source.as_ref().map(|e| &**e as &dyn std::error::Error)
1068    }
1069}
1070
1071impl<E> lexical::Error for Error<E>
1072where
1073    E: std::error::Error + Send + Sync + 'static,
1074{
1075    fn kind(&self) -> ErrorKind {
1076        Error::kind(self)
1077    }
1078
1079    fn pos(&self) -> &Pos {
1080        Error::pos(self)
1081    }
1082}
1083
1084/// Provides JSON text to a [`PipeAnalyzer`] as a stream of [`bytes::Bytes`] buffers.
1085///
1086/// A pipe connects a provider of `Bytes` into a `PipeAnalyzer`. It allows a concurrent provider of
1087/// JSON text, such as an `async` task or a worker thread, to push the text into the lexical
1088/// analyzer as a stream of `Bytes` buffers.
1089///
1090/// `Pipe` is a synchronous trait, *i.e.*, the [`recv`][method@Self::recv] function is an ordinary
1091/// synchronous `fn`. Therefore, implementations of `Pipe` for `async` tasks need to bridge between
1092/// sync and async contexts. Examples are provided below.
1093///
1094/// # Examples
1095///
1096/// An implementation of `Pipe` for standard library channels is provided out of the box.
1097///
1098/// ```
1099/// # use bufjson::lexical::{Token, pipe::PipeAnalyzer};
1100/// use std::{sync::mpsc::channel, thread};
1101///
1102/// let (tx, rx) = channel();
1103/// thread::spawn(move || {
1104///     tx.send("[123]".into());
1105/// });
1106/// let mut lexer = PipeAnalyzer::new(rx);
1107///
1108/// assert_eq!(Token::ArrBegin, lexer.next());
1109/// assert_eq!(Token::Num, lexer.next());
1110/// assert_eq!(Token::ArrEnd, lexer.next());
1111/// assert_eq!(Token::Eof, lexer.next());
1112/// ```
1113///
1114/// Implementing `Pipe` for synchronization constructs that have built-in sync/async bridging, such
1115/// as `tokio` channels, is straightforward.
1116///
1117/// ```
1118/// # use bufjson::lexical::{Token, pipe::{Pipe, PipeAnalyzer}};
1119/// # #[tokio::main(flavor = "current_thread")]
1120/// # async fn main() {
1121/// use bytes::Bytes;
1122/// use std::convert::Infallible;
1123/// use tokio::sync::mpsc::{Receiver, channel};
1124///
1125/// struct PipeReceiver(Receiver<Bytes>); // Newtype for Receiver<Bytes>
1126///
1127/// impl Pipe for PipeReceiver {
1128///     type Error = Infallible;
1129///
1130///     fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
1131///         self.0.blocking_recv().map(Ok)
1132///     }
1133/// }
1134///
1135/// let (tx, rx) = channel(1);
1136///
1137/// tokio::spawn(async move {
1138///     tx.send(Bytes::from("null")).await.unwrap();
1139/// });
1140///
1141/// let result = tokio::task::spawn_blocking(move || {
1142///     let mut lexer = PipeAnalyzer::new(PipeReceiver(rx));
1143///     let first = lexer.next();
1144///     let second = lexer.next();
1145///
1146///     (first, second)
1147/// }).await.unwrap();
1148///
1149/// assert_eq!(Token::LitNull, result.0);
1150/// assert_eq!(Token::Eof, result.1);
1151/// # }
1152/// ```
1153pub trait Pipe {
1154    /// Error type returned when [`recv`][method@Self::recv] fails.
1155    type Error: std::error::Error + Send + Sync + 'static;
1156
1157    /// Attempts to wait for the next chunk from this pipe, returning an error if the pipe's data
1158    /// source is in a failure state.
1159    ///
1160    /// This function blocks the caller if the next chunk isn't yet available, provided it is
1161    /// possible that a next chunk will become available. Once a chunk, or the end of the chunk
1162    /// stream, becomes available, this pipe will wake up and return it.
1163    ///
1164    /// The return value is `Some` if a chunk is available, or if the pipe's data source is in a
1165    /// failure state; and `None` if the end of the stream of JSON text chunks has been reached.
1166    fn recv(&mut self) -> Option<Result<Bytes, Self::Error>>;
1167
1168    /// Attempts to return the next chunk pending in this pipe without blocking.
1169    ///
1170    /// This function will never block the caller in order to wait for a chunk to become available.
1171    ///
1172    /// The return value can not represent an error state. If the pipe is in an error state, it
1173    /// should return `None` and wait for a call to [`recv`][method@Self::recv] to return the error.
1174    ///
1175    /// The provided implementation simply returns `None`.
1176    fn try_recv(&mut self) -> Option<Bytes> {
1177        None
1178    }
1179}
1180
1181impl Pipe for std::sync::mpsc::Receiver<Bytes> {
1182    type Error = Infallible;
1183
1184    fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
1185        std::sync::mpsc::Receiver::recv(self).ok().map(Ok)
1186    }
1187
1188    fn try_recv(&mut self) -> Option<Bytes> {
1189        std::sync::mpsc::Receiver::try_recv(self).ok()
1190    }
1191}
1192
1193#[derive(Debug)]
1194enum StoredContent<E> {
1195    Ok {
1196        start_pos: usize,
1197        len: usize,
1198        escaped: bool,
1199    },
1200    Err(Error<E>),
1201}
1202
1203impl<E> Default for StoredContent<E> {
1204    fn default() -> Self {
1205        StoredContent::Ok {
1206            start_pos: 0,
1207            len: 0,
1208            escaped: false,
1209        }
1210    }
1211}
1212
1213/// A [`lexical::Analyzer`] to tokenize JSON text from a stream of [`Bytes`] buffers.
1214///
1215/// Use `PipeAnalyzer` for zero allocation, low-copy, stream-oriented lexical analysis of JSON text
1216/// from any input source that can provide the input JSON in one or more `Bytes` chunks.
1217///
1218/// As with any [`lexical::Analyzer`] implementation, you can construct a [`syntax::Parser`] from a
1219/// `PipeAnalyzer` to unlock richer stream-oriented syntactic analysis while retaining low overhead
1220/// guarantees of the underlying lexical analyzer.
1221///
1222/// # Performance considerations
1223///
1224/// ## Method performance
1225///
1226/// The [`next`] method never allocates or copies and has very low overhead, above and beyond just
1227/// examining the bytes of the next token in the buffer, for doing state transitions and remembering
1228/// state.
1229///
1230/// The [`content`] method never allocates. For punctuation and literal tokens, it never copies. For
1231/// number and string tokens, it may copy if the token is very short; otherwise, it just returns a
1232/// reference-counted slice of the input chunk or chunks from which the token was scanned.
1233///
1234/// It should be noted that the `Content` structure returned by [`content`] is somewhat "fat", at 24
1235/// bytes; it is preferable not to fetch it for tokens where the content is either statically
1236/// knowable (literals and punctuation) or not required (*e.g.*, whitespace in some applications).
1237///
1238/// [Unescaping][`lexical::Content::unescaped`] a [`Content`] value that contains an escaped string
1239/// token always allocates; but calling `unescaped` on a `Content` value that does not contain any
1240/// escape sequences is a no-op that neither allocates nor does any other work.
1241///
1242/// # Memory considerations
1243///
1244/// Because [`Content`] can refer directly to slices within the input `Bytes` buffers, a live
1245/// `Content` value may keep the reference count of an input chunk above zero. In the most extreme
1246/// case, if every content value in the JSON text is fetched and kept alive, this can keep input
1247/// chunks that would otherwise have been freed alive in memory. If this behavior isn't desirable,
1248/// it is recommended that you drop `Content` values soon after inspecting them; and, when a longer
1249/// lifetime is required, convert them into some other convenient owned value.
1250///
1251/// # Examples
1252///
1253/// Scan a JSON text contained in a sequence of chunks.
1254///
1255/// ```
1256/// use bufjson::lexical::{Token, pipe::{Pipe, PipeAnalyzer}};
1257/// use std::{sync::mpsc::channel, thread};
1258///
1259/// // Create a channel, because there's a provided implementation of the `Pipe` for a channel
1260/// // receiver. You can also create your own arbitrary implementations of `Pipe`.
1261/// let (tx, rx) = channel();
1262///
1263/// // Use a separate thread to send chunks of JSON to the channel.
1264/// thread::spawn(move || {
1265///     [
1266///         r#"{"user":"alice","#,
1267///         r#""score":95,"#,
1268///         r#""tags":["admin"]}"#,
1269///     ]
1270///         .into_iter()
1271///         .map(Into::into)                                    // Convert static string to `Bytes`.
1272///         .for_each(|chunk| { tx.send(chunk).unwrap(); });    // Send `Bytes` chunk to the lexer.
1273/// });
1274///
1275/// // Create a `PipeAnalyzer` reading chunks from the channel.
1276/// let mut lexer = PipeAnalyzer::new(rx);
1277///
1278/// // Scan the tokens.
1279/// assert_eq!(Token::ObjBegin, lexer.next());
1280/// assert_eq!(Token::Str, lexer.next());
1281/// assert_eq!(Token::NameSep, lexer.next());
1282/// assert_eq!(Token::Str, lexer.next());
1283/// assert_eq!(Token::ValueSep, lexer.next());
1284/// assert_eq!(Token::Str, lexer.next());
1285/// assert_eq!(Token::NameSep, lexer.next());
1286/// assert_eq!(Token::Num, lexer.next());
1287/// assert_eq!(Token::ValueSep, lexer.next());
1288/// assert_eq!(Token::Str, lexer.next());
1289/// assert_eq!(Token::NameSep, lexer.next());
1290/// assert_eq!(Token::ArrBegin, lexer.next());
1291/// assert_eq!(Token::Str, lexer.next());
1292/// assert_eq!(Token::ArrEnd, lexer.next());
1293/// assert_eq!(Token::ObjEnd, lexer.next());
1294/// assert_eq!(Token::Eof, lexer.next());
1295/// ```
1296///
1297/// [`content`]: method@Self::content
1298/// [`next`]: method@Self::next
1299#[derive(Debug)]
1300pub struct PipeAnalyzer<P: Pipe> {
1301    bufs: SmallVec<[Bytes; 4]>,
1302    content: StoredContent<P::Error>,
1303    content_pos: Pos,
1304    mach: state::Machine<Bytes>,
1305    pipe: P,
1306    start_pos: usize,
1307}
1308
1309impl<P: Pipe> PipeAnalyzer<P> {
1310    /// Constructs a new lexer to tokenize JSON text in a stream of `Bytes` buffers.
1311    ///
1312    /// # Example
1313    ///
1314    /// ```
1315    /// # use bufjson::lexical::{Token, pipe::PipeAnalyzer};
1316    /// use std::{sync::mpsc::channel, thread};
1317    ///
1318    /// let (tx, rx) = channel();
1319    /// thread::spawn(move || {
1320    ///     tx.send("[123]".into());
1321    /// });
1322    /// let mut lexer = PipeAnalyzer::new(rx);
1323    /// ```
1324    pub fn new(mut pipe: P) -> Self {
1325        let first = match pipe.try_recv() {
1326            Some(chunk) => chunk,
1327            None => Bytes::new(),
1328        };
1329
1330        let bufs = smallvec![first.clone()];
1331        let content = StoredContent::default();
1332        let content_pos = Pos::default();
1333        let mach = state::Machine::new(first);
1334        let start_pos = 0;
1335
1336        Self {
1337            bufs,
1338            content,
1339            content_pos,
1340            mach,
1341            pipe,
1342            start_pos,
1343        }
1344    }
1345
1346    /// Recognizes the next lexical token in the buffer without allocating or copying.
1347    ///
1348    /// This is an inherent implementation of [`lexical::Analyzer::next`] for convenience, so it is
1349    /// available even when you don't have the trait imported.
1350    ///
1351    /// # Example
1352    ///
1353    /// ```
1354    /// # use bufjson::lexical::{Token, pipe::PipeAnalyzer};
1355    /// use std::sync::mpsc::channel;
1356    ///
1357    /// let (tx, rx) = channel();
1358    /// tx.send("99.9e-1".into());
1359    /// drop(tx);
1360    /// let mut lexer = PipeAnalyzer::new(rx);
1361    ///
1362    /// assert_eq!(Token::Num, lexer.next());
1363    /// assert_eq!(Token::Eof, lexer.next());
1364    /// assert_eq!(Token::Eof, lexer.next());
1365    /// ```
1366    #[allow(clippy::should_implement_trait)]
1367    pub fn next(&mut self) -> Token {
1368        if matches!(self.content, StoredContent::Err(_)) {
1369            return Token::Err;
1370        }
1371
1372        self.content_pos = *self.mach.pos();
1373        let n = self.bufs.len();
1374        if n > 1 {
1375            let contrib: usize = self.bufs.iter().take(n - 1).map(Bytes::len).sum();
1376            self.start_pos -= contrib;
1377            self.bufs.swap(0, n - 1);
1378            self.bufs.truncate(1);
1379        }
1380
1381        macro_rules! done {
1382            ($token:ident, $escaped:ident, $n: expr, $len:ident) => {{
1383                $len += $n;
1384                self.content = StoredContent::Ok {
1385                    start_pos: self.start_pos,
1386                    len: $len,
1387                    escaped: $escaped,
1388                };
1389                self.start_pos += $len;
1390
1391                return $token;
1392            }};
1393        }
1394
1395        macro_rules! lexical_err {
1396            () => {{
1397                let kind = self.mach.err_kind().expect("there should be an error kind");
1398                let pos = *self.mach.pos();
1399                self.content = StoredContent::Err(Error::new_lexical(kind, pos));
1400
1401                return Token::Err;
1402            }};
1403        }
1404
1405        macro_rules! io_err {
1406            ($source:ident) => {{
1407                self.content = StoredContent::Err(Error::new_read($source, *self.mach.pos()));
1408
1409                return Token::Err;
1410            }};
1411        }
1412
1413        let mut next = self.mach.next();
1414        let mut len = 0;
1415        loop {
1416            match next {
1417                state::Next::Done(token, escaped, n) => done!(token, escaped, n, len),
1418                state::Next::Part(token, n) => {
1419                    len += n;
1420                    match self.pipe.recv() {
1421                        None => match self.mach.end() {
1422                            state::End::Done => done!(token, false, 0, len),
1423                            state::End::Nil => unreachable!(),
1424                            state::End::Err => lexical_err!(),
1425                        },
1426                        Some(Ok(buf)) => {
1427                            self.bufs.push(buf.clone());
1428                            next = self.mach.resume(buf);
1429                        }
1430                        Some(Err(err)) => io_err!(err),
1431                    }
1432                }
1433                state::Next::Nil => match self.pipe.recv() {
1434                    None => {
1435                        self.content = StoredContent::default();
1436                        return Token::Eof;
1437                    }
1438                    Some(Ok(buf)) => {
1439                        debug_assert!(self.bufs.len() == 1);
1440                        self.start_pos = 0;
1441                        self.bufs[0] = buf.clone();
1442                        next = self.mach.resume(buf);
1443                    }
1444                    Some(Err(err)) => io_err!(err),
1445                },
1446                state::Next::Err(_) => lexical_err!(),
1447            }
1448        }
1449    }
1450
1451    /// Fetches the text content of the most recent non-error token.
1452    ///
1453    /// This is an inherent implementation of [`lexical::Analyzer::content`] for convenience, so it
1454    /// is available even when you don't have the trait imported.
1455    ///
1456    /// # Panics
1457    ///
1458    /// Panics if the most recent token returned by [`next`] was [`Token::Err`].
1459    ///
1460    /// # Example
1461    ///
1462    /// ```
1463    /// # use bufjson::lexical::{Token, pipe::PipeAnalyzer};
1464    /// use std::sync::mpsc::channel;
1465    ///
1466    /// let (tx, rx) = channel();
1467    /// tx.send("  null".into());
1468    /// drop(tx);
1469    /// let mut lexer = PipeAnalyzer::new(rx);
1470    ///
1471    /// assert_eq!(Token::White, lexer.next());
1472    /// assert_eq!("  ", lexer.content().literal());
1473    ///
1474    /// assert_eq!(Token::LitNull, lexer.next());
1475    /// assert_eq!("null", lexer.content().literal());
1476    /// ```
1477    ///
1478    /// [`next`]: method@Self::next
1479    #[inline]
1480    pub fn content(&self) -> Content {
1481        if let Ok(content) = self.try_content() {
1482            content
1483        } else {
1484            panic!("no content: last `next()` returned `Token::Err` (use `err()` instead)");
1485        }
1486    }
1487
1488    /// Fetches the error value associated with the most recent error token.
1489    ///
1490    /// This is an inherent implementation of [`lexical::Analyzer::err`] for convenience, so it is
1491    /// available even when you don't have the trait imported.
1492    ///
1493    /// # Panics
1494    ///
1495    /// Panics if the most recent token returned by [`next`] was not [`Token::Err`].
1496    ///
1497    /// # Example
1498    ///
1499    /// ```
1500    /// use bufjson::lexical::{ErrorKind, Expect, Token, pipe::PipeAnalyzer};
1501    /// use std::sync::mpsc::channel;
1502    ///
1503    /// let (tx, rx) = channel();
1504    /// tx.send("garbage!".into());
1505    /// drop(tx);
1506    /// let mut lexer = PipeAnalyzer::new(rx);
1507    ///
1508    /// assert_eq!(Token::Err, lexer.next());
1509    /// assert!(matches!(
1510    ///     lexer.err().kind(),
1511    ///     ErrorKind::UnexpectedByte { token: None, expect: Expect::TokenStartChar, actual: b'g'}
1512    /// ));
1513    /// ```
1514    ///
1515    /// [`next`]: method@Self::next
1516    #[inline]
1517    pub fn err(&self) -> Error<P::Error> {
1518        if let Err(err) = self.try_content() {
1519            err
1520        } else {
1521            panic!("no error: last `next()` did not return `Token::Err` (use `content()` instead)");
1522        }
1523    }
1524
1525    /// Returns the position of the start of the token most recently scanned by [`next`].
1526    ///
1527    /// This is an inherent implementation of [`lexical::Analyzer::pos`] for convenience, so it is
1528    /// available even when you don't have the trait imported.
1529    ///
1530    /// # Examples
1531    ///
1532    /// Before any token is scanned, the position is the default position.
1533    ///
1534    /// ```
1535    /// # use bufjson::{Pos, lexical::pipe::PipeAnalyzer};
1536    /// use std::sync::mpsc::channel;
1537    ///
1538    /// let (_, rx) = channel();
1539    ///
1540    /// assert_eq!(Pos::default(), *PipeAnalyzer::new(rx).pos());
1541    /// ```
1542    ///
1543    /// The position of the first token returned is always the start of the buffer.
1544    ///
1545    /// ```
1546    /// use bufjson::{Pos, lexical::{Token, pipe::PipeAnalyzer}};
1547    /// use std::sync::mpsc::channel;
1548    ///
1549    /// let (tx, rx) = channel();
1550    /// tx.send(" \n".into());
1551    /// drop(tx);
1552    /// let mut lexer = PipeAnalyzer::new(rx);
1553    ///
1554    /// // Read the two-byte whitespace token that starts at offset 0.
1555    /// assert_eq!(Token::White, lexer.next());
1556    /// assert_eq!(Pos::default(), *lexer.pos());
1557    ///
1558    /// // The EOF token starts at the end of the whitespace token.
1559    /// assert_eq!(Token::Eof, lexer.next());
1560    /// assert_eq!(Pos { offset: 2, line: 2, col: 1}, *lexer.pos());
1561    /// ```
1562    ///
1563    /// On errors, the position reported by `pos` may be different from the position reported by the
1564    /// error returned from [`err`]. This is because the `pos` indicates the start of the token
1565    /// where the error occurred, and the error position is the exact position of the error.
1566    ///
1567    /// ```
1568    /// use bufjson::{Pos, lexical::{Token, pipe::PipeAnalyzer}};
1569    /// use std::sync::mpsc::channel;
1570    ///
1571    /// let (tx, rx) = channel();
1572    /// tx.send("123_".into());
1573    /// drop(tx);
1574    /// let mut lexer = PipeAnalyzer::new(rx);
1575    ///
1576    /// assert_eq!(Token::Err, lexer.next());
1577    /// // `pos` is at the start of the number token that has the problem...
1578    /// assert_eq!(Pos::default(), *lexer.pos());
1579    /// // ...but the error contains the exact problem position: offset 3, column 4.
1580    /// assert_eq!(Pos { offset: 3, line: 1, col: 4 }, *lexer.err().pos())
1581    /// ```
1582    ///
1583    /// [`next`]: method@Self::next
1584    /// [`err`]: method@Self::err
1585    #[inline(always)]
1586    pub fn pos(&self) -> &Pos {
1587        &self.content_pos
1588    }
1589
1590    /// Fetches the content or error associated with the most recent token.
1591    ///
1592    /// This is an inherent implementation of [`lexical::Analyzer::try_content`] for convenience, so
1593    /// it is available even when you don't have the trait imported.
1594    ///
1595    /// # Examples
1596    ///
1597    /// An `Ok` value is returned as long as the lexical analyzer isn't in an error state.
1598    ///
1599    /// ```
1600    /// # use bufjson::lexical::{Token, pipe::PipeAnalyzer};
1601    /// use std::sync::mpsc::channel;
1602    ///
1603    /// let (tx, rx) = channel();
1604    /// tx.send("99.9e-1".into());
1605    /// drop(tx);
1606    /// let mut lexer = PipeAnalyzer::new(rx);
1607    ///
1608    /// assert_eq!(Token::Num, lexer.next());
1609    /// assert!(matches!(lexer.try_content(), Ok(c) if c.literal() == "99.9e-1"));
1610    /// ```
1611    ///
1612    /// Once the lexical analyzer encounters a lexical error, it will return an `Err` value
1613    /// describing that error.
1614    ///
1615    /// ```
1616    /// use bufjson::{Pos, lexical::{Token, pipe::PipeAnalyzer}};
1617    /// use std::sync::mpsc::channel;
1618    ///
1619    /// let (tx, rx) = channel();
1620    /// tx.send("[unquoted]".into());
1621    /// drop(tx);
1622    /// let mut lexer = PipeAnalyzer::new(rx);
1623    ///
1624    /// assert_eq!(Token::ArrBegin, lexer.next());
1625    /// assert_eq!(Token::Err, lexer.next());
1626    /// assert_eq!(Pos { offset: 1, line: 1, col: 2}, *lexer.try_content().unwrap_err().pos());
1627    /// ```
1628    pub fn try_content(&self) -> Result<Content, Error<P::Error>> {
1629        match &self.content {
1630            StoredContent::Ok {
1631                start_pos,
1632                len,
1633                escaped,
1634            } if *start_pos + *len <= self.bufs[0].len() => {
1635                let src = &self.bufs[0];
1636                debug_assert!(*start_pos <= src.len());
1637                debug_assert!(
1638                    *start_pos + *len <= src.len(),
1639                    "start_pos ({start_pos}) + len ({len}) <= src.len() ({})",
1640                    src.len()
1641                );
1642                if *len <= INLINE_LEN {
1643                    // SAFETY: We have length checked ☝️, the heap-based `src` can't overlap our new
1644                    //         stack-based `InlineBuf`, and the range [start_pos..start_ops + len]
1645                    //         is within `src`.
1646                    unsafe {
1647                        let mut dst: MaybeUninit<InlineBuf> = MaybeUninit::uninit();
1648                        std::ptr::copy_nonoverlapping(
1649                            src.as_ptr().add(*start_pos),
1650                            dst.as_mut_ptr() as *mut u8,
1651                            *len,
1652                        );
1653
1654                        Ok(Content(InnerLiteral::Inline(
1655                            0,
1656                            *len as u8,
1657                            dst.assume_init(),
1658                            *escaped,
1659                        )))
1660                    }
1661                } else {
1662                    Ok(Content(InnerLiteral::Bytes(
1663                        src.slice(*start_pos..*start_pos + *len),
1664                        *escaped,
1665                    )))
1666                }
1667            }
1668
1669            StoredContent::Ok {
1670                start_pos,
1671                len,
1672                escaped,
1673            } => self.multi_content(*start_pos, *len, *escaped),
1674
1675            StoredContent::Err(err) => Err(err.clone()),
1676        }
1677    }
1678
1679    /// Converts a lexical analyzer into a syntax parser, consuming the lexical analyzer in the
1680    /// process.
1681    ///
1682    /// You can convert the parser back into the underlying lexical analyzer using
1683    /// [`Parser::into_inner`].
1684    ///
1685    /// # Examples
1686    ///
1687    /// ```
1688    /// use bufjson::lexical::{Token, pipe::PipeAnalyzer};
1689    /// use std::sync::mpsc::channel;
1690    ///
1691    /// // Create a lexical analyzer to analyze the JSON text `true false`.
1692    /// let (tx, rx) = channel();
1693    /// tx.send("true false".into());
1694    /// drop(tx);
1695    /// let mut lexer = PipeAnalyzer::new(rx);
1696    ///
1697    /// // Consume the first lexical token, `true`.
1698    /// assert_eq!(Token::LitTrue, lexer.next());
1699    ///
1700    /// // Convert the lexer into a parser. Since `true` is consumed, the next meaningful token is
1701    /// // `false`.
1702    /// let mut parser = lexer.into_parser();
1703    /// assert_eq!(Token::LitFalse, parser.next_meaningful());
1704    /// ```
1705    ///
1706    /// [`Parser::into_inner`]: syntax::Parser::into_inner
1707    pub fn into_parser(self) -> syntax::Parser<PipeAnalyzer<P>> {
1708        syntax::Parser::new(self)
1709    }
1710
1711    fn multi_content(
1712        &self,
1713        start_pos: usize,
1714        len: usize,
1715        escaped: bool,
1716    ) -> Result<Content, Error<P::Error>> {
1717        debug_assert!(self.bufs.len() > 1);
1718
1719        let arr: Box<[Bytes]> = self.bufs.iter().cloned().collect(); // Only one allocation.
1720        let multi_bytes = MultiBytes::new(arr, start_pos, len, escaped);
1721        let content = Content(InnerLiteral::Multi(multi_bytes));
1722
1723        Ok(content)
1724    }
1725}
1726
1727impl<P: Pipe> lexical::Analyzer for PipeAnalyzer<P> {
1728    type Content = Content;
1729    type Error = Error<P::Error>;
1730
1731    #[inline(always)]
1732    fn next(&mut self) -> Token {
1733        PipeAnalyzer::next(self)
1734    }
1735
1736    #[inline(always)]
1737    fn try_content(&self) -> Result<Self::Content, Error<P::Error>> {
1738        PipeAnalyzer::try_content(self)
1739    }
1740
1741    #[inline(always)]
1742    fn pos(&self) -> &Pos {
1743        PipeAnalyzer::pos(self)
1744    }
1745}
1746
1747#[cfg(test)]
1748mod tests {
1749    use super::*;
1750    use crate::{IntoBuf, lexical::Expect};
1751    use rstest::rstest;
1752    use std::{
1753        collections::{BTreeMap, HashMap},
1754        error::Error as _,
1755        hash::DefaultHasher,
1756        sync::mpsc::channel,
1757    };
1758
1759    #[test]
1760    fn temp_test_empty_chunk() {
1761        // Temporary unit test relating to bug that comes from the temp hack using `ReadAnalyzer`.
1762        let (tx, rx) = channel();
1763        tx.send("tru".into()).unwrap();
1764        tx.send("".into()).unwrap();
1765        tx.send("e".into()).unwrap();
1766        drop(tx);
1767
1768        let mut an = PipeAnalyzer::new(rx);
1769
1770        assert_eq!(Token::LitTrue, an.next());
1771        assert_eq!(Token::Eof, an.next());
1772    }
1773
1774    #[rstest]
1775    #[case(Literal::from_static(""), 0)]
1776    #[case(Literal::from_static("a"), 1)]
1777    #[case(Literal::from_static(concat!(
1778        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1779        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1780        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1781        "aaaaaaaaaaaaaab",
1782    )), u8::MAX as usize)]
1783    #[case(Literal::from_ref(""), 0)]
1784    #[case(Literal::from_ref(&"a".repeat(INLINE_LEN)), INLINE_LEN)]
1785    #[case(Literal::from_ref(&"b".repeat(INLINE_LEN+1)), INLINE_LEN+1)]
1786    #[case(Literal::from_ref(&Cow::Borrowed("foo")), 3)]
1787    #[case(Literal::from_ref(&Cow::Owned("bar".to_string())), 3)]
1788    #[case(Literal::from_string("".to_string()), 0)]
1789    #[case(Literal::from_string("c".to_string()), 1)]
1790    #[case(Literal::from_string("d".repeat(100 * INLINE_LEN)), 100 * INLINE_LEN)]
1791    #[case("baz".into(), 3)]
1792    #[case(Cow::Borrowed("").into(), 0)]
1793    #[case(Cow::<str>::Owned("e".repeat(INLINE_LEN-1)).into(), INLINE_LEN-1)]
1794    #[case("qux".to_string().into(), 3)]
1795    #[case(Literal::from_str("hello, world").unwrap(), 12)]
1796    #[case(Literal(InnerLiteral::test_new_multi(["b", "a", "z"], 0, 3, false)), 3)]
1797    #[case(Literal(InnerLiteral::test_new_multi(["b", "a", "z"], 0, 3, true)), 3)]
1798    #[case(Literal(InnerLiteral::test_new_multi(["_f", "o", "o_"], 1, 3, false)), 3)]
1799    #[case(Literal(InnerLiteral::test_new_multi(["_f", "oo", ""], 1, 3, true)), 3)]
1800    fn test_literal_convert(#[case] literal: Literal, #[case] expect_len: usize) {
1801        assert_eq!(expect_len, literal.len());
1802        assert_eq!(expect_len == 0, literal.is_empty());
1803
1804        let mut b = literal.clone().into_buf();
1805
1806        assert_eq!(expect_len, b.remaining());
1807        assert_eq!(expect_len == 0, !b.has_remaining());
1808
1809        let mut dst = vec![0u8; expect_len];
1810        b.copy_to_slice(&mut dst);
1811
1812        let s = String::from_utf8(dst).unwrap();
1813
1814        assert_eq!(literal.to_string(), s);
1815        assert_eq!(Into::<String>::into(literal), s);
1816    }
1817
1818    #[test]
1819    fn test_literal_compare() {
1820        let a_s = vec![
1821            Literal::from_static("a"),
1822            Literal::from_ref("a"),
1823            Literal::from_string("a".to_string()),
1824            Literal(InnerLiteral::test_new_multi(["a"], 0, 1, false)),
1825        ];
1826        let aa_s: Vec<Literal> = vec![
1827            Literal::from_ref(&"a".repeat(INLINE_LEN)),
1828            Literal::from_string("a".repeat(INLINE_LEN)),
1829            Literal(InnerLiteral::test_new_multi(
1830                [vec![b'a'; INLINE_LEN]],
1831                0,
1832                INLINE_LEN,
1833                false,
1834            )),
1835            Literal(InnerLiteral::test_new_multi(
1836                ["a"; INLINE_LEN],
1837                0,
1838                INLINE_LEN,
1839                true,
1840            )),
1841        ];
1842        let aab_s: Vec<Literal> = vec![
1843            Literal::from_static(concat!(
1844                "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1845                "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1846                "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1847                "aaaaaaaaaaaaaab",
1848            )),
1849            Literal::from_ref(("a".repeat(u8::MAX as usize - 1) + "b").as_str()),
1850            Literal::from_string("a".repeat(u8::MAX as usize - 1) + "b"),
1851            Literal(InnerLiteral::test_new_multi(
1852                ["a".repeat(u8::MAX as usize - 1), "abc".to_string()],
1853                1,
1854                u8::MAX as usize,
1855                true,
1856            )),
1857        ];
1858
1859        macro_rules! assert_all_eq {
1860            ($a:expr, $b:expr) => {
1861                assert_eq!($a, $a);
1862                assert_eq!($b, $a);
1863                assert_eq!($a, $b);
1864                assert!($a <= $a);
1865                assert!(!($a < $a));
1866                assert!($a >= $a);
1867                assert!(!($a > $a));
1868            };
1869        }
1870
1871        macro_rules! assert_all_ne {
1872            ($a:expr, $b:expr) => {
1873                assert_ne!($a, $b);
1874                assert_ne!($b, $a);
1875            };
1876        }
1877
1878        macro_rules! assert_all_lt {
1879            ($a:expr, $b:expr) => {
1880                assert!($a < $b);
1881                assert!(!($b < $a));
1882                assert!(!($a > $b));
1883                assert!($b > $a);
1884                assert!($a <= $b);
1885                assert!($b >= $a);
1886            };
1887        }
1888
1889        macro_rules! assert_all_gt {
1890            ($a:expr, $b:expr) => {
1891                assert!($a > $b);
1892                assert!(!($b > $a));
1893                assert!(!($a < $b));
1894                assert!($b < $a);
1895                assert!($a >= $b);
1896                assert!($b <= $a);
1897            };
1898        }
1899
1900        for a in &a_s {
1901            assert_all_eq!(a, "a");
1902            assert_all_eq!(Unescaped::Literal(a), "a");
1903            assert_all_ne!(a, "ab");
1904            assert_all_ne!(Unescaped::Literal(a), "aa");
1905            assert_eq!(&"a", a);
1906            assert_eq!(&"a".to_string(), a);
1907            assert_eq!(a, &"a");
1908            assert_eq!(a, &"a".to_string());
1909
1910            assert!(a <= &"a");
1911            assert!(a <= &"a".to_string());
1912            assert!(!(a < &"a"));
1913            assert!(!(a < &"a".to_string()));
1914            assert!(a >= &"a");
1915            assert!(a >= &"a".to_string());
1916            assert!(!(a > &"a"));
1917            assert!(!(a > &"a".to_string()));
1918
1919            for other in aa_s.iter().chain(aab_s.iter()) {
1920                assert_all_ne!(a, other);
1921                assert_all_lt!(a, other);
1922                assert_all_gt!(other, a);
1923            }
1924        }
1925
1926        for aa in &aa_s {
1927            assert_all_eq!(aa, "a".repeat(INLINE_LEN).as_str());
1928            assert_all_eq!(Unescaped::Literal(aa), "a".repeat(INLINE_LEN).as_str());
1929            assert_all_ne!(aa, "aab");
1930            assert_all_ne!(Unescaped::Literal(aa), "aab");
1931
1932            assert_all_gt!(aa, "a");
1933            assert_all_gt!(Unescaped::Literal(aa), "a");
1934            assert_all_lt!(aa, "aab");
1935            assert_all_lt!(Unescaped::Literal(aa), "aab");
1936
1937            assert!(aa < &"aab");
1938            assert!(aa < &"aab".to_string());
1939            assert!(aa <= &"aab");
1940            assert!(aa <= &"aab".to_string());
1941            assert!(&"aab" > aa);
1942            assert!(&"aab".to_string() > aa);
1943            assert!(aa <= &"aab");
1944            assert!(aa <= &"aab".to_string());
1945            assert!(&"aab" > aa);
1946            assert!(&"aab".to_string() > aa);
1947
1948            for aab in &aab_s {
1949                assert_all_ne!(aa, aab);
1950                assert_all_lt!(aa, aab);
1951                assert_all_gt!(aab, aa);
1952            }
1953        }
1954
1955        fn hash<T: Hash>(t: &T) -> u64 {
1956            let mut hasher = DefaultHasher::new();
1957            t.hash(&mut hasher);
1958            hasher.finish()
1959        }
1960
1961        macro_rules! check_hash {
1962            ($patient_zero:expr, $iter:expr) => {
1963                let hash_zero = hash($patient_zero);
1964                for (i, item) in $iter.enumerate() {
1965                    let hash_item = hash(item);
1966                    assert_eq!(hash_zero, hash_item, "hash difference between item 0 ({:?}, {hash_zero}) and item {i}, {item:?}, {hash_item})", $patient_zero);
1967                }
1968            }
1969        }
1970
1971        check_hash!(&a_s[0], a_s.iter().skip(1));
1972        check_hash!(&aa_s[0], aa_s.iter().skip(1));
1973        check_hash!(&aab_s[0], aab_s.iter().skip(1));
1974
1975        macro_rules! check_map {
1976            ($map:ident, $patient_zero:expr, $iter:expr) => {
1977                assert!($map.insert($patient_zero, $patient_zero).is_none());
1978                for item in $iter {
1979                    assert_eq!($patient_zero, *$map.get(&item).unwrap());
1980                }
1981            };
1982        }
1983
1984        let mut hash_map1 = HashMap::new();
1985
1986        check_map!(hash_map1, a_s[0].clone(), a_s.clone());
1987        check_map!(hash_map1, aa_s[0].clone(), aa_s.clone());
1988        check_map!(hash_map1, aab_s[0].clone(), aab_s.clone());
1989
1990        let mut hash_map2 = HashMap::new();
1991
1992        let unescaped_a = Unescaped::Literal(a_s[0].clone());
1993        let unescaped_aa = Unescaped::Literal(aa_s[0].clone());
1994        let unescaped_aab = Unescaped::Literal(aab_s[0].clone());
1995
1996        check_map!(
1997            hash_map2,
1998            unescaped_a.clone(),
1999            a_s.iter().cloned().map(Unescaped::Literal)
2000        );
2001        check_map!(
2002            hash_map2,
2003            unescaped_aa.clone(),
2004            aa_s.iter().cloned().map(Unescaped::Literal)
2005        );
2006        check_map!(
2007            hash_map2,
2008            unescaped_aab.clone(),
2009            aab_s.iter().cloned().map(Unescaped::Literal)
2010        );
2011
2012        let mut btree_map1 = BTreeMap::new();
2013
2014        check_map!(btree_map1, a_s[0].clone(), a_s.clone());
2015        check_map!(btree_map1, aa_s[0].clone(), aa_s.clone());
2016        check_map!(btree_map1, aab_s[0].clone(), aab_s.clone());
2017
2018        let mut btree_map2 = BTreeMap::new();
2019
2020        check_map!(
2021            btree_map2,
2022            unescaped_a.clone(),
2023            a_s.iter().cloned().map(Unescaped::Literal)
2024        );
2025        check_map!(
2026            btree_map2,
2027            unescaped_aa.clone(),
2028            aa_s.iter().cloned().map(Unescaped::Literal)
2029        );
2030        check_map!(
2031            btree_map2,
2032            unescaped_aab.clone(),
2033            aab_s.iter().cloned().map(Unescaped::Literal)
2034        );
2035    }
2036
2037    #[rstest]
2038    #[case(Literal::from_static(""))]
2039    #[case(Literal::from_ref(""))]
2040    #[case(Literal::from_string("".into()))]
2041    #[case(Literal(InnerLiteral::test_new_bytes("", false)))]
2042    #[case(Literal(InnerLiteral::test_new_bytes("", true)))]
2043    #[case(Literal(InnerLiteral::test_new_multi([""], 0, 0, false)))]
2044    #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
2045    fn test_literal_buf_advance_panic(#[case] literal: Literal) {
2046        let _ = literal.into_buf().advance(1);
2047    }
2048
2049    #[rstest]
2050    #[case(Literal::from_static(""))]
2051    #[case(Literal::from_ref(""))]
2052    #[case(Literal::from_string("".into()))]
2053    #[case(Literal(InnerLiteral::test_new_bytes("", false)))]
2054    #[case(Literal(InnerLiteral::test_new_bytes("", true)))]
2055    #[case(Literal(InnerLiteral::test_new_multi([""], 0, 0, false)))]
2056    #[case(Literal(InnerLiteral::test_new_multi(["", ""], 0, 0, true)))]
2057    #[case(Literal(InnerLiteral::test_new_multi(["a"], 1, 0, false)))]
2058    #[case(Literal(InnerLiteral::test_new_multi(["a", "a"], 1, 0, true)))]
2059    #[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
2060    fn test_literal_buf_copy_to_slice_panic(#[case] literal: Literal) {
2061        let mut dst = [0; 1];
2062
2063        let _ = literal.into_buf().copy_to_slice(&mut dst);
2064    }
2065
2066    #[rstest]
2067    #[case(Content(InnerLiteral::Static("", false)), "", None)]
2068    #[case(Content(InnerLiteral::Static("", false)), "", None)]
2069    #[case(
2070        Content(InnerLiteral::Static(concat!(
2071            "................................................................................",
2072            ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
2073            "________________________________________________________________________________",
2074            "+++++++++++++++",
2075        ), false)),
2076        concat!(
2077            "................................................................................",
2078            ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
2079            "________________________________________________________________________________",
2080            "+++++++++++++++",
2081        ),
2082        None,
2083    )]
2084    #[case(Content(InnerLiteral::Inline(0, 0, [0; INLINE_LEN], false)), "", None)]
2085    #[case(Content(InnerLiteral::test_new_bytes("", false)), "", None)]
2086    #[case(Content(InnerLiteral::test_new_bytes("foo", false)), "foo", None)]
2087    #[case(Content(InnerLiteral::Bytes(Bytes::from_static(b"a barge").slice(2..5), false)), "bar", None)]
2088    #[case(Content(InnerLiteral::test_new_multi([""], 0, 0, false)), "", None)]
2089    #[case(Content(InnerLiteral::test_new_multi(["a b", "a", "rge"], 2, 3, false)), "bar", None)]
2090    #[case(Content(InnerLiteral::test_new_bytes("", true)), "", Some(""))]
2091    #[case(Content(InnerLiteral::test_new_bytes("foo", true)), "foo", Some("foo"))]
2092    #[case(Content(InnerLiteral::Bytes(Bytes::from_static(b"a b\\u0061rge").slice(2..10), true)), "b\\u0061r", Some("bar"))]
2093    #[case(Content(InnerLiteral::test_new_multi([""], 0, 0, true)), "", Some(""))]
2094    #[case(Content(InnerLiteral::test_new_multi(["tomf", "oo", "lery"], 3, 3, true)), "foo", Some("foo"))]
2095    #[case(Content(InnerLiteral::test_new_multi(["\\", "u", "006", "6\\u", "0", "06", "fox"], 0, 13, true)), "\\u0066\\u006fo", Some("foo"))]
2096    // TODO: FIXME: Uncomment below after refactor, converting from the Read types to the relevant Pipe types.
2097    // #[case(Content::from_bufs(&Bufs::new(Bufs::MIN_BUF_SIZE), 0..0, false), "", None)]
2098    // #[case(Content::from_bufs(&Bufs::new(Bufs::MIN_BUF_SIZE), 0..0, true), "", Some(""))]
2099    fn test_content(
2100        #[case] content: Content,
2101        #[case] expect_literal: &str,
2102        #[case] expect_unescaped: Option<&str>,
2103    ) {
2104        assert_eq!(expect_literal, content.literal().into_string());
2105        assert_eq!(expect_unescaped.is_some(), content.is_escaped());
2106        if let Some(expect) = expect_unescaped {
2107            assert_eq!(expect, content.unescaped().into_string());
2108        }
2109    }
2110
2111    #[rstest]
2112    #[case(
2113        Error::new_lexical(ErrorKind::UnexpectedEof(Token::LitTrue), Pos::new(3, 2, 1)),
2114        ErrorKind::UnexpectedEof(Token::LitTrue),
2115        "unexpected EOF in true token at line 2, column 1 (offset: 3)",
2116        None
2117    )]
2118    #[case(
2119        Error::new_read(ToyError("foo"), Pos::new(3, 2, 1)),
2120        ErrorKind::Read,
2121        "read error at line 2, column 1 (offset: 3)",
2122        Some(ToyError("foo"))
2123    )]
2124    fn test_error(
2125        #[case] err: Error<ToyError>,
2126        #[case] expect_kind: ErrorKind,
2127        #[case] expect_display: &str,
2128        #[case] expect_source: Option<ToyError>,
2129    ) {
2130        let pos = Pos::new(3, 2, 1);
2131
2132        assert_eq!(expect_kind, err.kind());
2133        assert_eq!(&pos, err.pos());
2134        assert_eq!(
2135            expect_source.as_ref(),
2136            err.source().and_then(|e| e.downcast_ref::<ToyError>())
2137        );
2138
2139        let actual_display = format!("{err}");
2140        assert_eq!(expect_display, actual_display);
2141    }
2142
2143    #[test]
2144    fn test_analyzer_empty() {
2145        let (tx, rx) = channel();
2146        drop(tx);
2147        let mut an = PipeAnalyzer::new(rx);
2148
2149        assert_eq!(an.next(), Token::Eof);
2150        assert_eq!("", an.content().literal().into_string());
2151        assert_eq!("", an.content().unescaped().into_string());
2152    }
2153
2154    #[test]
2155    fn test_analyzer_initial_state_content() {
2156        let (_, rx) = channel();
2157        let an = PipeAnalyzer::new(rx);
2158
2159        for _ in 0..5 {
2160            let content = an.content();
2161            assert_eq!("", content.literal().into_string());
2162            assert!(!content.is_escaped());
2163            assert_eq!("", content.unescaped().into_string());
2164
2165            let content = an.try_content().unwrap();
2166            assert_eq!("", content.literal().into_string());
2167            assert!(!content.is_escaped());
2168            assert_eq!("", content.unescaped().into_string());
2169        }
2170    }
2171
2172    #[test]
2173    #[should_panic(
2174        expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
2175    )]
2176    fn test_analyzer_initial_state_err() {
2177        let (_, rx) = channel();
2178        let _ = PipeAnalyzer::new(rx).err();
2179    }
2180
2181    #[rstest]
2182    #[case("", Token::Eof, None)]
2183    #[case("{", Token::ObjBegin, None)]
2184    #[case("}", Token::ObjEnd, None)]
2185    #[case("[", Token::ArrBegin, None)]
2186    #[case("]", Token::ArrEnd, None)]
2187    #[case(":", Token::NameSep, None)]
2188    #[case(",", Token::ValueSep, None)]
2189    #[case("false", Token::LitFalse, None)]
2190    #[case("null", Token::LitNull, None)]
2191    #[case("true", Token::LitTrue, None)]
2192    #[case("0", Token::Num, None)]
2193    #[case("-0", Token::Num, None)]
2194    #[case("1", Token::Num, None)]
2195    #[case("-1", Token::Num, None)]
2196    #[case("12", Token::Num, None)]
2197    #[case("-12", Token::Num, None)]
2198    #[case("0.0", Token::Num, None)]
2199    #[case("-0.0", Token::Num, None)]
2200    #[case("0.123456789", Token::Num, None)]
2201    #[case("-123.456789", Token::Num, None)]
2202    #[case("0E0", Token::Num, None)]
2203    #[case("0e0", Token::Num, None)]
2204    #[case("0E+0", Token::Num, None)]
2205    #[case("0e+0", Token::Num, None)]
2206    #[case("0E-0", Token::Num, None)]
2207    #[case("0e-0", Token::Num, None)]
2208    #[case("0.0E0", Token::Num, None)]
2209    #[case("0.0e0", Token::Num, None)]
2210    #[case("0.0E+0", Token::Num, None)]
2211    #[case("0.0e+0", Token::Num, None)]
2212    #[case("0.0E0", Token::Num, None)]
2213    #[case("0.0e0", Token::Num, None)]
2214    #[case("0E0", Token::Num, None)]
2215    #[case("0e0", Token::Num, None)]
2216    #[case("-0E+0", Token::Num, None)]
2217    #[case("-0e+0", Token::Num, None)]
2218    #[case("-0E-0", Token::Num, None)]
2219    #[case("-0e-0", Token::Num, None)]
2220    #[case("-0.0E0", Token::Num, None)]
2221    #[case("-0.0e0", Token::Num, None)]
2222    #[case("-0.0E+0", Token::Num, None)]
2223    #[case("-0.0e+0", Token::Num, None)]
2224    #[case("-0.0E0", Token::Num, None)]
2225    #[case("-0.0e0", Token::Num, None)]
2226    #[case("123E456", Token::Num, None)]
2227    #[case("123e456", Token::Num, None)]
2228    #[case("123.456E+7", Token::Num, None)]
2229    #[case("123.456e+7", Token::Num, None)]
2230    #[case("123.456E-89", Token::Num, None)]
2231    #[case("123.456e-89", Token::Num, None)]
2232    #[case("-123E456", Token::Num, None)]
2233    #[case("-123e456", Token::Num, None)]
2234    #[case("-123.456E+7", Token::Num, None)]
2235    #[case("-123.456e+7", Token::Num, None)]
2236    #[case("-123.456E-89", Token::Num, None)]
2237    #[case("-123.456e-89", Token::Num, None)]
2238    #[case(r#""""#, Token::Str, None)]
2239    #[case(r#"" ""#, Token::Str, None)]
2240    #[case(r#""foo""#, Token::Str, None)]
2241    #[case(r#""The quick brown fox jumped over the lazy dog!""#, Token::Str, None)]
2242    #[case(r#""\\""#, Token::Str, Some(r#""\""#))]
2243    #[case(r#""\/""#, Token::Str, Some(r#""/""#))]
2244    #[case(r#""\t""#, Token::Str, Some("\"\t\""))]
2245    #[case(r#""\r""#, Token::Str, Some("\"\r\""))]
2246    #[case(r#""\n""#, Token::Str, Some("\"\n\""))]
2247    #[case(r#""\f""#, Token::Str, Some("\"\u{000c}\""))]
2248    #[case(r#""\b""#, Token::Str, Some("\"\u{0008}\""))]
2249    #[case(r#""\u0000""#, Token::Str, Some("\"\u{0000}\""))]
2250    #[case(r#""\u001f""#, Token::Str, Some("\"\u{001f}\""))]
2251    #[case(r#""\u0020""#, Token::Str, Some(r#"" ""#))]
2252    #[case(r#""\u007E""#, Token::Str, Some(r#""~""#))]
2253    #[case(r#""\u007F""#, Token::Str, Some("\"\u{007f}\""))]
2254    #[case(r#""\u0080""#, Token::Str, Some("\"\u{0080}\""))]
2255    #[case(r#""\u0100""#, Token::Str, Some("\"\u{0100}\""))]
2256    #[case(r#""\uE000""#, Token::Str, Some("\"\u{e000}\""))]
2257    #[case(r#""\ufDCf""#, Token::Str, Some("\"\u{fdcf}\""))]
2258    #[case(r#""\uFdeF""#, Token::Str, Some("\"\u{fdef}\""))]
2259    #[case(r#""\ufffd""#, Token::Str, Some("\"\u{fffd}\""))]
2260    #[case(r#""\uFFFE""#, Token::Str, Some("\"\u{fffe}\""))]
2261    #[case(r#""\uFFFF""#, Token::Str, Some("\"\u{ffff}\""))]
2262    #[case(r#""\ud800\udc00""#, Token::Str, Some("\"\u{10000}\""))] // Lowest valid surrogate pair → U+10000
2263    #[case(r#""\uD800\uDFFF""#, Token::Str, Some("\"\u{103ff}\""))] // High surrogate with highest low surrogate → U+103FF
2264    #[case(r#""\uDBFF\uDC00""#, Token::Str, Some("\"\u{10fc00}\""))] // Highest high surrogate with lowest low surrogate → U+10FC00
2265    #[case(r#""\udbFf\udfff""#, Token::Str, Some("\"\u{10ffff}\""))] // Highest valid surrogate pair → U+10FFFF (max Unicode scalar value)
2266    #[case(" ", Token::White, None)]
2267    #[case("\t", Token::White, None)]
2268    #[case("  ", Token::White, None)]
2269    #[case("\t\t", Token::White, None)]
2270    #[case(" \t \t    \t          \t\t", Token::White, None)]
2271    fn test_analyzer_single_token(
2272        #[case] input: &str,
2273        #[case] expect: Token,
2274        #[case] unescaped: Option<&str>,
2275    ) {
2276        const CHUNK_SIZES: [usize; 6] = [1, 2, INLINE_LEN - 1, INLINE_LEN, INLINE_LEN + 1, 10];
2277
2278        for chunk_size in CHUNK_SIZES {
2279            // With content fetch.
2280            {
2281                let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
2282                assert_eq!(Pos::default(), *an.pos());
2283
2284                assert_eq!(expect, an.next());
2285                assert_eq!(Pos::default(), *an.pos());
2286
2287                let content = an.content();
2288                assert_eq!(
2289                    input,
2290                    content.literal().into_string(),
2291                    "chunk_size = {chunk_size}, input = {input:?}, content = {content}"
2292                );
2293                assert_eq!(unescaped.is_some(), content.is_escaped());
2294                if let Some(u) = unescaped {
2295                    assert_eq!(u, content.unescaped().into_string());
2296                } else {
2297                    assert_eq!(input, content.unescaped().into_string());
2298                }
2299
2300                assert_eq!(Token::Eof, an.next());
2301                assert_eq!(
2302                    Pos {
2303                        offset: input.len(),
2304                        line: 1,
2305                        col: input.len() + 1,
2306                    },
2307                    *an.pos()
2308                );
2309
2310                assert_eq!(Token::Eof, an.next());
2311                assert_eq!(
2312                    Pos {
2313                        offset: input.len(),
2314                        line: 1,
2315                        col: input.len() + 1,
2316                    },
2317                    *an.pos()
2318                );
2319            }
2320
2321            // Without content fetch.
2322            {
2323                let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
2324                assert_eq!(Pos::default(), *an.pos());
2325
2326                assert_eq!(expect, an.next());
2327                assert_eq!(Pos::default(), *an.pos());
2328
2329                assert_eq!(Token::Eof, an.next());
2330                assert_eq!(
2331                    Pos {
2332                        offset: input.len(),
2333                        line: 1,
2334                        col: input.len() + 1,
2335                    },
2336                    *an.pos()
2337                );
2338
2339                assert_eq!(Token::Eof, an.next());
2340                assert_eq!(
2341                    Pos {
2342                        offset: input.len(),
2343                        line: 1,
2344                        col: input.len() + 1,
2345                    },
2346                    *an.pos()
2347                );
2348            }
2349        }
2350    }
2351
2352    #[rstest]
2353    #[case(r#"["#)]
2354    #[case(r#"]"#)]
2355    #[case(r#"false"#)]
2356    #[case(r#":"#)]
2357    #[case(r#"null"#)]
2358    #[case(r#"3.14159e+0"#)]
2359    #[case(r#"{"#)]
2360    #[case(r#"}"#)]
2361    #[case(r#""foo\/\u1234\/bar""#)]
2362    #[case(r#"true"#)]
2363    #[case(r#","#)]
2364    #[case("\n\n\n   ")]
2365    #[should_panic(
2366        expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
2367    )]
2368    fn test_analyzer_single_token_panic_no_err(#[case] input: &str) {
2369        const CHUNK_SIZES: [usize; 6] = [1, 2, INLINE_LEN - 1, INLINE_LEN, INLINE_LEN + 1, 10];
2370
2371        for chunk_size in CHUNK_SIZES {
2372            let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
2373
2374            let token = an.next();
2375            assert!(
2376                !token.is_terminal(),
2377                "input = {input:?}, token = {token:?}, chunk_size = {chunk_size}"
2378            );
2379
2380            let _ = an.err();
2381        }
2382    }
2383
2384    #[test]
2385    #[should_panic(expected = "last `next()` returned `Token::Err` (use `err()` instead)")]
2386    fn test_analyzer_single_error_panic_no_content() {
2387        let mut an = PipeAnalyzer::new(SlicePipe::new(1, &b"a"[..]));
2388
2389        assert_eq!(Token::Err, an.next());
2390
2391        let _ = an.content();
2392    }
2393
2394    #[rstest]
2395    #[case(r#""\uDC00""#, ErrorKind::BadSurrogate { first: 0xdc00, second: None, }, 3)]
2396    #[case(&[b'"', 0xc2, 0xc0], ErrorKind::BadUtf8ContByte { seq_len: 2, offset: 1, value: 0xc0 }, 1)]
2397    #[case(&b"\"\x80", ErrorKind::UnexpectedByte { token: Some(Token::Str), expect: Expect::StrChar, actual: 0x80 }, 1)]
2398    #[case([b'"'], ErrorKind::UnexpectedEof(Token::Str), 1)]
2399    #[case("10.", ErrorKind::UnexpectedEof(Token::Num), 3)]
2400    fn test_analyzer_single_lexical_error<T>(
2401        #[case] input: T,
2402        #[case] kind: ErrorKind,
2403        #[case] pos_offset: usize,
2404    ) where
2405        T: AsRef<[u8]> + fmt::Debug,
2406    {
2407        const CHUNK_SIZES: [usize; 6] = [1, 2, INLINE_LEN - 1, INLINE_LEN, INLINE_LEN + 1, 10];
2408
2409        for chunk_size in CHUNK_SIZES {
2410            // With error fetch.
2411            {
2412                let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_ref()));
2413                assert_eq!(Pos::default(), *an.pos());
2414
2415                assert_eq!(Token::Err, an.next());
2416                assert_eq!(Pos::default(), *an.pos());
2417
2418                let err = an.err();
2419                assert_eq!(kind, err.kind());
2420                assert_eq!(
2421                    Pos {
2422                        offset: pos_offset,
2423                        line: 1,
2424                        col: pos_offset + 1
2425                    },
2426                    *err.pos()
2427                );
2428                assert!(err.source().is_none());
2429
2430                assert_eq!(Token::Err, an.next());
2431                assert_eq!(Pos::default(), *an.pos());
2432            }
2433
2434            // Without error fetch.
2435            {
2436                let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_ref()));
2437                assert_eq!(Pos::default(), *an.pos());
2438
2439                assert_eq!(Token::Err, an.next());
2440                assert_eq!(Pos::default(), *an.pos());
2441
2442                assert_eq!(Token::Err, an.next());
2443                assert_eq!(Pos::default(), *an.pos());
2444            }
2445        }
2446    }
2447
2448    #[rstest]
2449    #[case(1, r#"{"#, [Token::ObjBegin], Pos::new(1, 1, 2), Pos::new(1, 1, 2))]
2450    #[case(1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2451    #[case(2, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2452    #[case(INLINE_LEN-1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2453    #[case(INLINE_LEN-1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2454    #[case(INLINE_LEN+1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2455    #[case(512, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
2456    #[case(1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2457    #[case(2, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2458    #[case(INLINE_LEN-1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2459    #[case(INLINE_LEN, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2460    #[case(INLINE_LEN+1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
2461    #[case(1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2462    #[case(2, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2463    #[case(INLINE_LEN-1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2464    #[case(INLINE_LEN, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2465    #[case(INLINE_LEN+1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
2466    #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2467    #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
2468    #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2469    #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
2470    #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
2471    #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279E+999 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(40, 1, 41), Pos::new(40, 1, 41))]
2472    #[case(512, r#"[3141.592653589793238462643383279e-3,{"aaaaaaaaaaaaaaaaaaaaaaaaaaaa":true}]    "#, [Token::ArrBegin, Token::Num, Token::ValueSep, Token::ObjBegin, Token::Str, Token::NameSep, Token::LitTrue,  Token::ObjEnd, Token::ArrEnd], Pos::new(75, 1, 76), Pos::new(79, 1, 80))]
2473    fn test_analyzer_single_read_error<T>(
2474        #[case] chunk_size: usize,
2475        #[case] input: &str,
2476        #[case] expect_tokens: T,
2477        #[case] expect_token_pos: Pos,
2478        #[case] expect_err_pos: Pos,
2479    ) where
2480        T: IntoIterator<Item = Token>,
2481    {
2482        #[derive(Debug)]
2483        struct PipeError;
2484
2485        impl fmt::Display for PipeError {
2486            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2487                f.write_str("there's an error in the pipe!")
2488            }
2489        }
2490
2491        impl std::error::Error for PipeError {}
2492
2493        struct ErrorPipe<'a> {
2494            chunk_size: usize,
2495            input: &'a [u8],
2496        }
2497
2498        impl<'a> ErrorPipe<'a> {
2499            fn new(chunk_size: usize, input: &'a [u8]) -> Self {
2500                assert!(chunk_size > 0);
2501
2502                Self { chunk_size, input }
2503            }
2504        }
2505
2506        impl<'a> Pipe for ErrorPipe<'a> {
2507            type Error = PipeError;
2508
2509            fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
2510                if self.input.len() > 0 {
2511                    let n = self.input.len().min(self.chunk_size);
2512                    let b = self.input[..n].to_vec().into();
2513                    self.input = &self.input[n..];
2514
2515                    Some(Ok(b))
2516                } else {
2517                    Some(Err(PipeError))
2518                }
2519            }
2520        }
2521
2522        let mut an = PipeAnalyzer::new(ErrorPipe::new(chunk_size, input.as_bytes()));
2523
2524        for expect_token in expect_tokens.into_iter() {
2525            let actual_token = an.next();
2526
2527            assert_eq!(expect_token, actual_token);
2528        }
2529
2530        assert_eq!(Token::Err, an.next());
2531        assert_eq!(expect_token_pos, *an.pos());
2532        let err = an.err();
2533        assert_eq!(ErrorKind::Read, err.kind());
2534        assert_eq!(expect_err_pos, *err.pos());
2535
2536        assert_eq!(Token::Err, an.next());
2537        assert_eq!(expect_token_pos, *an.pos());
2538        let err = an.try_content().unwrap_err();
2539        assert_eq!(ErrorKind::Read, err.kind());
2540        assert_eq!(expect_err_pos, *err.pos());
2541        assert!(
2542            err.source()
2543                .and_then(|e| e.downcast_ref::<PipeError>())
2544                .is_some()
2545        );
2546
2547        assert_eq!(Token::Err, an.next());
2548    }
2549
2550    #[rstest]
2551    #[case(1)]
2552    #[case(2)]
2553    #[case(INLINE_LEN - 1)]
2554    #[case(INLINE_LEN)]
2555    #[case(INLINE_LEN + 1)]
2556    fn test_analyzer_into_parser(#[case] chunk_size: usize) {
2557        const INPUT: &str = r#"{"hello":["🌍"]}"#;
2558
2559        let mut parser =
2560            PipeAnalyzer::new(SlicePipe::new(chunk_size, INPUT.as_bytes())).into_parser();
2561
2562        assert_eq!(Token::ObjBegin, parser.next());
2563        assert_eq!("{", parser.content().literal());
2564        assert_eq!(Pos::default(), *parser.pos());
2565        assert_eq!(1, parser.level());
2566
2567        assert_eq!(Token::Str, parser.next());
2568        assert_eq!(r#""hello""#, parser.content().literal());
2569        assert_eq!(Pos::new(1, 1, 2), *parser.pos());
2570        assert_eq!(1, parser.level());
2571
2572        assert_eq!(Token::NameSep, parser.next());
2573        assert_eq!(":", parser.content().literal());
2574        assert_eq!(Pos::new(8, 1, 9), *parser.pos());
2575        assert_eq!(1, parser.level());
2576
2577        assert_eq!(Token::ArrBegin, parser.next());
2578        assert_eq!("[", parser.content().literal());
2579        assert_eq!(Pos::new(9, 1, 10), *parser.pos());
2580        assert_eq!(2, parser.level());
2581
2582        assert_eq!(Token::Str, parser.next());
2583        assert_eq!(r#""🌍""#, parser.content().literal());
2584        assert_eq!(Pos::new(10, 1, 11), *parser.pos());
2585        assert_eq!(2, parser.level());
2586
2587        assert_eq!(Token::ArrEnd, parser.next());
2588        assert_eq!("]", parser.content().literal());
2589        assert_eq!(Pos::new(16, 1, 14), *parser.pos());
2590        assert_eq!(1, parser.level());
2591
2592        assert_eq!(Token::ObjEnd, parser.next());
2593        assert_eq!("}", parser.content().literal());
2594        assert_eq!(Pos::new(17, 1, 15), *parser.pos());
2595        assert_eq!(0, parser.level());
2596
2597        for _ in 0..5 {
2598            assert_eq!(Token::Eof, parser.next());
2599            assert_eq!(Pos::new(18, 1, 16), *parser.pos());
2600            assert_eq!(0, parser.level());
2601        }
2602    }
2603
2604    #[rstest]
2605    #[case(1)]
2606    #[case(2)]
2607    #[case(INLINE_LEN - 1)]
2608    #[case(INLINE_LEN)]
2609    #[case(INLINE_LEN + 1)]
2610    fn test_analyzer_smoke(#[case] chunk_size: usize) {
2611        const JSON_TEXT: &str = r#"
2612
2613[
2614  [],
2615  {},
2616  [true, false, null, "foo",-9, -9.9, -99.99e-99, {"❤️😊":1}, 10000000],
2617  "\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064",
2618  "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\nUt labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\nLaboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in."
2619]"#;
2620
2621        const EXPECT: &[(Token, Pos, &str, Option<&str>)] = &[
2622            // Line 1.
2623            (Token::White, Pos::new(0, 1, 1), "\n\n", None),
2624            // Line 3.
2625            (Token::ArrBegin, Pos::new(2, 3, 1), "[", None),
2626            (Token::White, Pos::new(3, 3, 2), "\n  ", None),
2627            // Line 4.
2628            (Token::ArrBegin, Pos::new(6, 4, 3), "[", None),
2629            (Token::ArrEnd, Pos::new(7, 4, 4), "]", None),
2630            (Token::ValueSep, Pos::new(8, 4, 5), ",", None),
2631            (Token::White, Pos::new(9, 4, 6), "\n  ", None),
2632            // Line 5.
2633            (Token::ObjBegin, Pos::new(12, 5, 3), "{", None),
2634            (Token::ObjEnd, Pos::new(13, 5, 4), "}", None),
2635            (Token::ValueSep, Pos::new(14, 5, 5), ",", None),
2636            (Token::White, Pos::new(15, 5, 6), "\n  ", None),
2637            // Line 6.
2638            (Token::ArrBegin, Pos::new(18, 6, 3), "[", None),
2639            (Token::LitTrue, Pos::new(19, 6, 4), "true", None),
2640            (Token::ValueSep, Pos::new(23, 6, 8), ",", None),
2641            (Token::White, Pos::new(24, 6, 9), " ", None),
2642            (Token::LitFalse, Pos::new(25, 6, 10), "false", None),
2643            (Token::ValueSep, Pos::new(30, 6, 15), ",", None),
2644            (Token::White, Pos::new(31, 6, 16), " ", None),
2645            (Token::LitNull, Pos::new(32, 6, 17), "null", None),
2646            (Token::ValueSep, Pos::new(36, 6, 21), ",", None),
2647            (Token::White, Pos::new(37, 6, 22), " ", None),
2648            (Token::Str, Pos::new(38, 6, 23), r#""foo""#, None),
2649            (Token::ValueSep, Pos::new(43, 6, 28), ",", None),
2650            (Token::Num, Pos::new(44, 6, 29), "-9", None),
2651            (Token::ValueSep, Pos::new(46, 6, 31), ",", None),
2652            (Token::White, Pos::new(47, 6, 32), " ", None),
2653            (Token::Num, Pos::new(48, 6, 33), "-9.9", None),
2654            (Token::ValueSep, Pos::new(52, 6, 37), ",", None),
2655            (Token::White, Pos::new(53, 6, 38), " ", None),
2656            (Token::Num, Pos::new(54, 6, 39), "-99.99e-99", None),
2657            (Token::ValueSep, Pos::new(64, 6, 49), ",", None),
2658            (Token::White, Pos::new(65, 6, 50), " ", None),
2659            (Token::ObjBegin, Pos::new(66, 6, 51), "{", None),
2660            (Token::Str, Pos::new(67, 6, 52), r#""❤️😊""#, None),
2661            (Token::NameSep, Pos::new(79, 6, 57), ":", None),
2662            (Token::Num, Pos::new(80, 6, 58), "1", None),
2663            (Token::ObjEnd, Pos::new(81, 6, 59), "}", None),
2664            (Token::ValueSep, Pos::new(82, 6, 60), ",", None),
2665            (Token::White, Pos::new(83, 6, 61), " ", None),
2666            (Token::Num, Pos::new(84, 6, 62), "10000000", None),
2667            (Token::ArrEnd, Pos::new(92, 6, 70), "]", None),
2668            (Token::ValueSep, Pos::new(93, 6, 71), ",", None),
2669            (Token::White, Pos::new(94, 6, 72), "\n  ", None),
2670            // Line 7.
2671            (
2672                Token::Str,
2673                Pos::new(97, 7, 3),
2674                r#""\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064""#,
2675                Some(r#""hello, world""#),
2676            ),
2677            (Token::ValueSep, Pos::new(171, 7, 77), ",", None),
2678            (Token::White, Pos::new(172, 7, 78), "\n  ", None),
2679            // Line 8.
2680            (
2681                Token::Str,
2682                Pos::new(175, 8, 3),
2683                concat!(
2684                    r#""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n"#,
2685                    r#"Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n"#,
2686                    r#"Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.""#,
2687                ),
2688                Some(concat!(
2689                    "\"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n",
2690                    "Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n",
2691                    "Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.\"",
2692                )),
2693            ),
2694            // Line 9.
2695            (Token::White, Pos::new(455, 8, 283), "\n", None),
2696            (Token::ArrEnd, Pos::new(456, 9, 1), "]", None),
2697            (Token::Eof, Pos::new(457, 9, 2), "", None),
2698        ];
2699
2700        let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, JSON_TEXT.as_bytes()));
2701
2702        for (i, (expect_token, expect_pos, expect_literal, expect_unescaped)) in
2703            EXPECT.iter().enumerate()
2704        {
2705            let actual_token = an.next();
2706            let actual_pos = *an.pos();
2707            let content = an.content();
2708
2709            assert_eq!(
2710                *expect_token, actual_token,
2711                "i = {i}, actual_pos = {actual_pos}, expect_pos = {expect_pos}"
2712            );
2713            assert_eq!(
2714                *expect_pos, actual_pos,
2715                "i = {i}, token = {actual_token}, content = {content}"
2716            );
2717            assert_eq!(
2718                *expect_literal,
2719                content.literal(),
2720                "i = {i}, token = {actual_token}, expect_literal = {expect_literal:?}, content.literal() = {}",
2721                content.literal(),
2722            );
2723            if let Some(u) = expect_unescaped {
2724                assert!(
2725                    content.is_escaped(),
2726                    "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
2727                );
2728                assert_eq!(*u, content.unescaped());
2729            } else {
2730                assert!(
2731                    !content.is_escaped(),
2732                    "i = {i}, token = {actual_token}, literal = {expect_literal:?}"
2733                );
2734                assert_eq!(*expect_literal, content.unescaped());
2735            }
2736        }
2737    }
2738
2739    #[derive(Debug, Eq, PartialEq)]
2740    struct ToyError(&'static str);
2741
2742    impl fmt::Display for ToyError {
2743        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2744            f.write_str(self.0)
2745        }
2746    }
2747
2748    impl std::error::Error for ToyError {}
2749
2750    struct SlicePipe<'a> {
2751        chunk_size: usize,
2752        input: &'a [u8],
2753    }
2754
2755    impl<'a> SlicePipe<'a> {
2756        fn new(chunk_size: usize, input: &'a [u8]) -> Self {
2757            Self { chunk_size, input }
2758        }
2759    }
2760
2761    impl<'a> Pipe for SlicePipe<'a> {
2762        type Error = Infallible;
2763
2764        fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
2765            if self.input.len() > 0 {
2766                let n = self.input.len().min(self.chunk_size);
2767                let b = self.input[..n].to_vec().into();
2768                self.input = &self.input[n..];
2769
2770                Some(Ok(b))
2771            } else {
2772                None
2773            }
2774        }
2775    }
2776
2777    trait IntoString {
2778        fn into_string(self) -> String;
2779    }
2780
2781    impl<T: IntoBuf> IntoString for T {
2782        fn into_string(self) -> String {
2783            let mut src = self.into_buf();
2784            let mut dst = Vec::with_capacity(src.remaining());
2785            while src.remaining() > 0 {
2786                let chunk = src.chunk();
2787                dst.extend_from_slice(chunk);
2788                src.advance(chunk.len());
2789            }
2790
2791            String::from_utf8(dst).expect("valid UTF-8")
2792        }
2793    }
2794}
bufjson/lexical/pipe.rs

bufjson/lexical/
pipe.rs