tl/
bytes.rs

1use core::{fmt, fmt::Debug};
2use std::{
3    borrow::Cow,
4    hash::{Hash, Hasher},
5    marker::PhantomData,
6    mem::ManuallyDrop,
7};
8
9use crate::errors::SetBytesError;
10
11/// A storage type for raw bytes, used by the parser
12#[derive(Eq, PartialOrd, Ord)]
13pub struct Bytes<'a> {
14    /// The inner data
15    data: BytesInner,
16    /// Enforce the lifetime of the referenced data
17    _lt: PhantomData<&'a [u8]>,
18}
19
20/// The inner data of [`Bytes`]
21///
22/// Instead of using `&[u8]` and `Vec<u8>` for the variants,
23/// we use raw pointers and a `u32` for the length.
24/// This is to keep the size of the enum to 16 (on 64-bit machines),
25/// which is the same as if this was just `struct Bytes<'a>(&'a [u8])`
26#[derive(PartialEq, Eq, PartialOrd, Ord)]
27enum BytesInner {
28    /// Borrowed bytes
29    Borrowed(*const u8, u32),
30    /// Owned bytes
31    ///
32    /// This pointer is managed and will be freed when dropped
33    Owned(*mut u8, u32),
34}
35
36impl<'a> PartialEq<str> for Bytes<'a> {
37    #[inline]
38    fn eq(&self, other: &str) -> bool {
39        self == other.as_bytes()
40    }
41}
42
43impl<'a> PartialEq<[u8]> for Bytes<'a> {
44    #[inline]
45    fn eq(&self, other: &[u8]) -> bool {
46        self.as_bytes() == other
47    }
48}
49
50impl<'a> PartialEq for Bytes<'a> {
51    #[inline]
52    fn eq(&self, other: &Self) -> bool {
53        let this = self.as_bytes();
54        let that = other.as_bytes();
55        this == that
56    }
57}
58
59impl<'a> Hash for Bytes<'a> {
60    #[inline]
61    fn hash<H: Hasher>(&self, state: &mut H) {
62        // Hash must be implemented manually for Bytes, otherwise it would only hash a pointer
63        let this = self.as_bytes();
64        this.hash(state);
65    }
66}
67
68impl<'a> Clone for Bytes<'a> {
69    fn clone(&self) -> Self {
70        // It is important to manually implement Clone for Bytes,
71        // because if `self` was owned, then the default clone
72        // implementation would only clone the pointer
73        // which leads to aliasing boxes, and later, when `Bytes` is dropped,
74        // the box is freed twice!
75        match &self.data {
76            BytesInner::Borrowed(data, len) => {
77                Bytes::from(unsafe { compact_bytes_to_slice(*data, *len) })
78            }
79            BytesInner::Owned(data, len) => {
80                let (ptr, len) = unsafe { clone_compact_bytes_parts(*data, *len) };
81                Bytes {
82                    data: BytesInner::Owned(ptr, len),
83                    _lt: PhantomData,
84                }
85            }
86        }
87    }
88}
89
90impl<'a> From<&'a str> for Bytes<'a> {
91    #[inline]
92    fn from(s: &'a str) -> Self {
93        <Self as From<&'a [u8]>>::from(s.as_bytes())
94    }
95}
96
97impl<'a> From<&'a [u8]> for Bytes<'a> {
98    #[inline]
99    fn from(s: &'a [u8]) -> Self {
100        Bytes {
101            data: BytesInner::Borrowed(s.as_ptr(), s.len() as u32),
102            _lt: PhantomData,
103        }
104    }
105}
106
107impl TryFrom<String> for Bytes<'static> {
108    type Error = SetBytesError;
109
110    #[inline]
111    fn try_from(s: String) -> Result<Self, Self::Error> {
112        let mut bytes = Bytes::new();
113        bytes.set(s)?;
114        Ok(bytes)
115    }
116}
117
118/// Converts `Bytes` raw parts to a slice
119#[inline]
120unsafe fn compact_bytes_to_slice<'a>(ptr: *const u8, l: u32) -> &'a [u8] {
121    std::slice::from_raw_parts(ptr, l as usize)
122}
123
124/// Converts a boxed byte slice to compact raw parts
125///
126/// The caller is responsible for freeing the returned pointer and that the length of the slice does not overflow a u32!
127unsafe fn boxed_slice_into_compact_parts(slice: Box<[u8]>) -> (*mut u8, u32) {
128    // wrap box in `ManuallyDrop` so it's not dropped at the end of the scope
129    let mut slice = ManuallyDrop::new(slice);
130    let len = slice.len();
131    let ptr = slice.as_mut_ptr();
132
133    (ptr, len as u32)
134}
135
136/// Clones a slice given its raw parts and returns the new, cloned parts
137#[inline]
138unsafe fn clone_compact_bytes_parts(ptr: *mut u8, len: u32) -> (*mut u8, u32) {
139    let slice = compact_bytes_to_slice(ptr, len).to_vec().into_boxed_slice();
140    boxed_slice_into_compact_parts(slice)
141}
142
143// Custom `Debug` trait is implemented which displays the data as a UTF8 string,
144// to make it easier to read for humans when logging
145impl<'a> Debug for Bytes<'a> {
146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147        f.debug_tuple("Bytes").field(&self.as_utf8_str()).finish()
148    }
149}
150
151impl<'a> Default for Bytes<'a> {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157impl<'a> Bytes<'a> {
158    /// Creates an empty `Bytes`
159    #[inline]
160    pub fn new() -> Self {
161        Self {
162            data: BytesInner::Borrowed("".as_bytes().as_ptr(), 0),
163            _lt: PhantomData,
164        }
165    }
166
167    /// Convenient method for lossy-encoding the data as UTF8
168    #[inline]
169    pub fn as_utf8_str(&self) -> Cow<'_, str> {
170        String::from_utf8_lossy(self.as_bytes())
171    }
172
173    /// Tries to convert the inner data to a `&str`, without allocating in the case
174    /// that the inner data is not valid UTF8
175    #[inline]
176    pub fn try_as_utf8_str(&self) -> Option<&str> {
177        std::str::from_utf8(self.as_bytes()).ok()
178    }
179
180    /// Returns the raw data wrapped by this struct
181    #[inline]
182    pub fn as_bytes(&self) -> &[u8] {
183        match &self.data {
184            BytesInner::Borrowed(b, l) => unsafe { compact_bytes_to_slice(*b, *l) },
185            BytesInner::Owned(o, l) => unsafe { compact_bytes_to_slice(*o, *l) },
186        }
187    }
188
189    /// Returns the raw data referenced by this struct
190    ///
191    /// The lifetime of the returned data is tied to 'a, unlike `Bytes::as_bytes`
192    /// which has a lifetime of '_ (self) in case it is owned
193    #[inline]
194    pub fn as_bytes_borrowed(&self) -> Option<&'a [u8]> {
195        match &self.data {
196            BytesInner::Borrowed(b, l) => Some(unsafe { compact_bytes_to_slice(*b, *l) }),
197            _ => None,
198        }
199    }
200
201    /// Returns a read-only raw pointer to the inner data
202    #[inline]
203    pub fn as_ptr(&self) -> *const u8 {
204        match &self.data {
205            BytesInner::Borrowed(b, _) => *b,
206            BytesInner::Owned(o, _) => *o,
207        }
208    }
209
210    /// Sets the inner data to the given data and returns the old bytes
211    pub fn set<B: IntoOwnedBytes>(&mut self, data: B) -> Result<Option<Box<[u8]>>, SetBytesError> {
212        const MAX: usize = u32::MAX as usize;
213
214        let data = <B as IntoOwnedBytes>::into_bytes(data);
215
216        if data.len() > MAX {
217            return Err(SetBytesError::LengthOverflow);
218        }
219
220        // SAFETY: All invariants are checked
221        Ok(unsafe { self.set_unchecked(data) })
222    }
223
224    /// Sets the inner data to the given data without checking for validity of the data
225    ///
226    /// ## Safety
227    /// - Once `data` is converted to a `Box<[u8]>`, its length must not be greater than u32::MAX
228    #[inline]
229    pub unsafe fn set_unchecked<B: IntoOwnedBytes>(&mut self, data: B) -> Option<Box<[u8]>> {
230        let data = <B as IntoOwnedBytes>::into_bytes(data);
231
232        let (ptr, len) = boxed_slice_into_compact_parts(data);
233
234        let bytes = BytesInner::Owned(ptr, len);
235        let old = std::mem::replace(&mut self.data, bytes);
236
237        // we cannot let Drop code run because that would deallocate `old`
238        let old = ManuallyDrop::new(old);
239
240        match &*old {
241            BytesInner::Borrowed(_, _) => None,
242            BytesInner::Owned(ptr, len) => {
243                let len = *len as usize;
244                Some(Vec::from_raw_parts(*ptr, len, len).into_boxed_slice())
245            }
246        }
247    }
248}
249
250mod private {
251    pub trait Sealed {}
252}
253
254/// A trait implemented on types that can be used for `Bytes::set`.
255///
256/// This trait is sealed and cannot be implemented outside of this crate.
257pub trait IntoOwnedBytes: private::Sealed {
258    fn into_bytes(self) -> Box<[u8]>;
259}
260
261macro_rules! impl_into_owned_bytes_trivial {
262    ($($t:ty),*) => {
263        $(
264            impl private::Sealed for $t {}
265            impl IntoOwnedBytes for $t {
266                #[inline]
267                fn into_bytes(self) -> Box<[u8]> {
268                    self.into()
269                }
270            }
271        )*
272    };
273}
274
275impl_into_owned_bytes_trivial!(Box<[u8]>, &[u8], Vec<u8>);
276
277impl private::Sealed for &str {}
278impl IntoOwnedBytes for &str {
279    #[inline]
280    fn into_bytes(self) -> Box<[u8]> {
281        self.as_bytes().into()
282    }
283}
284
285impl private::Sealed for String {}
286impl IntoOwnedBytes for String {
287    #[inline]
288    fn into_bytes(self) -> Box<[u8]> {
289        self.into_bytes().into()
290    }
291}
292
293impl Drop for BytesInner {
294    fn drop(&mut self) {
295        // we only need to deallocate if we own the data
296        // if we don't, just do nothing
297        if let BytesInner::Owned(ptr, len) = self {
298            let ptr = *ptr;
299            let len = *len as usize;
300
301            // carefully reconstruct a `Box<[u8]>` from the raw pointer and length
302            // and immediately drop it to free memory
303            unsafe { drop(Vec::from_raw_parts(ptr, len, len).into_boxed_slice()) };
304        }
305    }
306}