nombytes/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2//
3#![cfg_attr(docsrs, feature(doc_cfg))]
4//
5#![doc = include_str!("../README.md")]
6//
7#![deny(anonymous_parameters)]
8#![deny(nonstandard_style)]
9#![deny(rust_2018_idioms)]
10#![deny(trivial_numeric_casts)]
11#![deny(unsafe_code)]
12#![deny(rustdoc::broken_intra_doc_links)]
13#![deny(unused)]
14#![deny(unreachable_pub)]
15//
16// Warn (try not to do this)
17#![warn(missing_copy_implementations)]
18#![warn(missing_debug_implementations)]
19#![warn(variant_size_differences)]
20#![warn(missing_docs)]
21//
22// Clippy
23#![warn(clippy::pedantic)]
24
25use bytes::Bytes;
26use core::fmt::Display;
27use core::iter::Enumerate;
28use core::ops::{Range, RangeFrom, RangeFull, RangeTo};
29use core::str::Utf8Error;
30use nom::{
31    AsBytes, Compare, InputIter, InputLength, InputTake, InputTakeAtPosition, Needed, Offset, Slice,
32};
33
34mod range_type;
35pub use range_type::RangeType;
36
37#[cfg(feature = "miette")]
38#[cfg_attr(docsrs, doc(cfg(feature = "miette")))]
39mod miette;
40
41/// A wrapper around [`bytes::Bytes`] to be able to use it with [`nom`].
42#[derive(Clone, Debug)]
43#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
44pub struct NomBytes(Bytes, Option<RangeType<usize>>);
45
46// Why the extra `Option<RangeType<usize>>`? Nom expects to be able to calculate
47// offsets between two of its inputs, but `Bytes` has this optimization where if
48// slicing results in an empty slice, it returns a new, empty `Bytes` rather than
49// an empty slice of the existing `Bytes`. This causes problems down the line when
50// nom asks for offsets between two inputs. Thus, in cases where slicing would
51// result in an empty slice, we instead store the original `Bytes` plus the slice
52// range itself, which we can use to hand out correct offsets.
53//
54// All the code here uses `bytes()` or `as_bytes()` for doing operations on the
55// underlying bytes rather than accessing the "raw" `.0` field, because those two
56// contain code that handles this custom slicing correctly, and thus we don't have
57// to be careful anywhere else.
58//
59// Tried reporting this as unexpected/incorrect behavior, but it was said to be an
60// intentional behavior:
61// <https://github.com/tokio-rs/bytes/issues/557>
62
63impl NomBytes {
64    /// Creates a new `NomBytes` wrapping the provided [`Bytes`].
65    ///
66    /// # Examples
67    ///
68    /// ```
69    /// use bytes::Bytes;
70    /// use nombytes::NomBytes;
71    ///
72    /// let b = Bytes::new();
73    /// let nb = NomBytes::new(b);
74    /// ```
75    #[inline]
76    pub fn new(bytes: Bytes) -> Self {
77        Self(bytes, None)
78    }
79
80    /// Returns a string slice to the contents of the inner [`Bytes`].
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// use bytes::Bytes;
86    /// use nombytes::NomBytes;
87    ///
88    /// let nb = NomBytes::new(Bytes::from("hello"));
89    /// assert_eq!(nb.to_str(), "hello");
90    /// ```
91    ///
92    /// # Panics
93    ///
94    /// Panics if the [`Bytes`] slice is not UTF-8.
95    #[inline]
96    pub fn to_str(&self) -> &str {
97        self.try_to_str().unwrap()
98    }
99
100    /// Returns a string slice to the contents of the inner [`Bytes`].
101    ///
102    /// # Examples
103    ///
104    /// ```
105    /// use bytes::Bytes;
106    /// use nombytes::NomBytes;
107    ///
108    /// let nb = NomBytes::new(Bytes::from("hello"));
109    /// assert_eq!(nb.try_to_str().unwrap(), "hello");
110    /// ```
111    ///
112    /// # Errors
113    ///
114    /// Returns `Err` if the [`Bytes`] slice is not UTF-8 with a description
115    /// as to why the provided slice is not UTF-8.
116    #[inline]
117    pub fn try_to_str(&self) -> Result<&str, Utf8Error> {
118        core::str::from_utf8(self.as_bytes())
119    }
120
121    #[doc = include_str!("to_bytes_doc.md")]
122    /// # Examples
123    ///
124    /// ```
125    /// use bytes::Bytes;
126    /// use nombytes::NomBytes;
127    ///
128    /// let nb = NomBytes::new(Bytes::from("hello"));
129    /// let b = nb.to_bytes();
130    /// assert_eq!(b.as_ref(), b"hello");
131    /// ```
132    #[inline]
133    pub fn to_bytes(&self) -> Bytes {
134        match self.1.as_ref() {
135            Some(range) => self.0.slice(range.clone()),
136            None => self.0.clone(),
137        }
138    }
139
140    #[doc = include_str!("to_bytes_doc.md")]
141    /// # Examples
142    ///
143    /// ```
144    /// use bytes::Bytes;
145    /// use nombytes::NomBytes;
146    ///
147    /// let nb = NomBytes::new(Bytes::from("hello"));
148    /// let b = nb.into_bytes();
149    /// assert_eq!(b.as_ref(), b"hello");
150    /// ```
151    #[inline]
152    pub fn into_bytes(self) -> Bytes {
153        match self.1.as_ref() {
154            Some(range) => self.0.slice(range.clone()),
155            None => self.0,
156        }
157    }
158
159    /// Returns the values from the inner representation of this type.
160    ///
161    /// See [`into_bytes`](Self::into_bytes) for an explanation of why this
162    /// inner representation exists.
163    // I dunno what anyone would use this for, but... might as well
164    // offer it.
165    pub fn into_raw(self) -> (Bytes, Option<RangeType<usize>>) {
166        let Self(bytes, range_type) = self;
167        (bytes, range_type)
168    }
169
170    /// Returns a new `NomBytes` using the raw values passed in. If these
171    /// values represent something invalid, you'll likely see incorrect
172    /// behavior or even panics. Regular usage should create values using
173    /// [`new`](Self::new) instead.
174    ///
175    /// See [`into_bytes`](Self::into_bytes) for an explanation of why this
176    /// inner representation exists.
177    // I dunno what anyone would use this for, but... might as well
178    // offer it.
179    pub fn from_raw((bytes, range_type): (Bytes, Option<RangeType<usize>>)) -> Self {
180        Self(bytes, range_type)
181    }
182}
183
184impl AsBytes for NomBytes {
185    #[inline]
186    fn as_bytes(&self) -> &[u8] {
187        match self.1.as_ref() {
188            Some(range) => range.slice(self.0.as_ref()),
189            None => self.0.as_ref(),
190        }
191    }
192}
193
194impl InputIter for NomBytes {
195    type Item = u8;
196    type Iter = Enumerate<Self::IterElem>;
197    type IterElem = bytes::buf::IntoIter<Bytes>;
198
199    #[inline]
200    fn iter_indices(&self) -> Self::Iter {
201        self.iter_elements().enumerate()
202    }
203
204    #[inline]
205    fn iter_elements(&self) -> Self::IterElem {
206        self.to_bytes().into_iter()
207    }
208
209    #[inline]
210    fn position<P>(&self, predicate: P) -> Option<usize>
211    where
212        P: Fn(Self::Item) -> bool,
213    {
214        self.as_bytes().iter().position(|b| predicate(*b))
215    }
216
217    #[inline]
218    fn slice_index(&self, count: usize) -> Result<usize, nom::Needed> {
219        if self.as_bytes().len() >= count {
220            Ok(count)
221        } else {
222            Err(Needed::new(count - self.as_bytes().len()))
223        }
224    }
225}
226
227impl InputTake for NomBytes {
228    #[inline]
229    fn take(&self, count: usize) -> Self {
230        self.slice(..count)
231    }
232
233    #[inline]
234    fn take_split(&self, count: usize) -> (Self, Self) {
235        let prefix = self.slice(..count);
236        let suffix = self.slice(count..);
237        (suffix, prefix)
238    }
239}
240
241impl InputTakeAtPosition for NomBytes {
242    type Item = <Self as InputIter>::Item;
243
244    fn split_at_position<P, E: nom::error::ParseError<Self>>(
245        &self,
246        predicate: P,
247    ) -> nom::IResult<Self, Self, E>
248    where
249        P: Fn(Self::Item) -> bool,
250    {
251        match self.as_bytes().iter().position(|c| predicate(*c)) {
252            Some(i) => Ok(self.take_split(i)),
253            None => Err(nom::Err::Incomplete(Needed::new(1))),
254        }
255    }
256
257    fn split_at_position1<P, E: nom::error::ParseError<Self>>(
258        &self,
259        predicate: P,
260        e: nom::error::ErrorKind,
261    ) -> nom::IResult<Self, Self, E>
262    where
263        P: Fn(Self::Item) -> bool,
264    {
265        match self.as_bytes().iter().position(|c| predicate(*c)) {
266            Some(0) => Err(nom::Err::Error(E::from_error_kind(self.clone(), e))),
267            Some(i) => Ok(self.take_split(i)),
268            None => Err(nom::Err::Incomplete(Needed::new(1))),
269        }
270    }
271
272    fn split_at_position_complete<P, E: nom::error::ParseError<Self>>(
273        &self,
274        predicate: P,
275    ) -> nom::IResult<Self, Self, E>
276    where
277        P: Fn(Self::Item) -> bool,
278    {
279        match self.as_bytes().iter().position(|c| predicate(*c)) {
280            Some(i) => Ok(self.take_split(i)),
281            None => Ok(self.take_split(self.input_len())),
282        }
283    }
284
285    fn split_at_position1_complete<P, E: nom::error::ParseError<Self>>(
286        &self,
287        predicate: P,
288        e: nom::error::ErrorKind,
289    ) -> nom::IResult<Self, Self, E>
290    where
291        P: Fn(Self::Item) -> bool,
292    {
293        let bytes = self.as_bytes();
294        match bytes.iter().position(|c| predicate(*c)) {
295            Some(0) => Err(nom::Err::Error(E::from_error_kind(self.clone(), e))),
296            Some(i) => Ok(self.take_split(i)),
297            None => {
298                if bytes.is_empty() {
299                    Err(nom::Err::Error(E::from_error_kind(self.clone(), e)))
300                } else {
301                    Ok(self.take_split(self.input_len()))
302                }
303            }
304        }
305    }
306}
307
308impl InputLength for NomBytes {
309    #[inline]
310    fn input_len(&self) -> usize {
311        self.as_bytes().len()
312    }
313}
314
315macro_rules! nom_bytes_slice {
316    ($range_ty:ty, $requirement:expr) => {
317        impl Slice<$range_ty> for NomBytes {
318            fn slice(&self, range: $range_ty) -> Self {
319                let bytes = self.to_bytes();
320                if bytes.is_empty() && $requirement(&range) {
321                    return self.clone();
322                }
323
324                let slice = bytes.slice(range.clone());
325                if slice.is_empty() {
326                    NomBytes(bytes, Some(RangeType::from(range)))
327                } else {
328                    assert!(!slice.is_empty());
329                    NomBytes(slice, None)
330                }
331            }
332        }
333    };
334}
335
336nom_bytes_slice!(Range<usize>, |r: &Range<usize>| r.start == 0 && r.end == 0);
337nom_bytes_slice!(RangeTo<usize>, |r: &RangeTo<usize>| r.end == 0);
338nom_bytes_slice!(RangeFrom<usize>, |r: &RangeFrom<usize>| r.start == 0);
339nom_bytes_slice!(RangeFull, |_: &RangeFull| true);
340
341impl Offset for NomBytes {
342    #[inline]
343    fn offset(&self, second: &Self) -> usize {
344        self.as_bytes().offset(second.as_bytes())
345    }
346}
347
348impl Compare<NomBytes> for NomBytes {
349    #[inline]
350    fn compare(&self, t: NomBytes) -> nom::CompareResult {
351        self.as_bytes().compare(t.as_bytes())
352    }
353
354    #[inline]
355    fn compare_no_case(&self, t: NomBytes) -> nom::CompareResult {
356        self.as_bytes().compare_no_case(t.as_bytes())
357    }
358}
359
360impl Compare<&'_ str> for NomBytes {
361    #[inline]
362    fn compare(&self, t: &str) -> nom::CompareResult {
363        self.as_bytes().compare(t.as_bytes())
364    }
365
366    #[inline]
367    fn compare_no_case(&self, t: &str) -> nom::CompareResult {
368        self.as_bytes().compare_no_case(t.as_bytes())
369    }
370}
371
372impl Display for NomBytes {
373    #[inline]
374    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
375        write!(f, "{}", self.to_str())
376    }
377}
378
379impl From<&'_ str> for NomBytes {
380    #[inline]
381    fn from(string: &'_ str) -> Self {
382        Self::from(string.as_bytes())
383    }
384}
385
386impl From<&'_ [u8]> for NomBytes {
387    #[inline]
388    fn from(byte_slice: &'_ [u8]) -> Self {
389        use bytes::{BufMut, BytesMut};
390
391        let mut buf = BytesMut::with_capacity(byte_slice.len());
392        buf.put(byte_slice);
393        Self::new(buf.into())
394    }
395}
396
397#[cfg(feature = "std")]
398#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
399impl From<String> for NomBytes {
400    #[inline]
401    fn from(string: String) -> Self {
402        Self::new(Bytes::from(string))
403    }
404}
405
406// We implement the eq/ord traits in terms of &[u8] since it's both
407// cheap and easy:
408
409impl PartialEq for NomBytes {
410    fn eq(&self, other: &Self) -> bool {
411        self.as_bytes().eq(other.as_bytes())
412    }
413}
414impl Eq for NomBytes {}
415
416impl PartialOrd for NomBytes {
417    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
418        self.as_bytes().partial_cmp(other.as_bytes())
419    }
420}
421impl Ord for NomBytes {
422    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
423        self.as_bytes().cmp(other.as_bytes())
424    }
425}
nombytes/lib.rs

nombytes/
lib.rs