utf8_bytes/
bytes.rs

1use super::Utf8BytesMut;
2
3use core::iter::FromIterator;
4use core::ops::{Deref, RangeBounds};
5use core::{cmp, hash};
6use std::borrow::Cow;
7
8use alloc::{borrow::Borrow, boxed::Box, string::String, vec::Vec};
9
10/// A cheaply cloneable and sliceable chunk of contiguous memory.
11///
12/// `Bytes` is an efficient container for storing and operating on contiguous
13/// slices of memory. It is intended for use primarily in networking code, but
14/// could have applications elsewhere as well.
15///
16/// `Bytes` values facilitate zero-copy network programming by allowing multiple
17/// `Bytes` objects to point to the same underlying memory.
18///
19/// `Bytes` does not have a single implementation. It is an interface, whose
20/// exact behavior is implemented through dynamic dispatch in several underlying
21/// implementations of `Bytes`.
22///
23/// All `Bytes` implementations must fulfill the following requirements:
24/// - They are cheaply cloneable and thereby shareable between an unlimited amount
25///   of components, for example by modifying a reference count.
26/// - Instances can be sliced to refer to a subset of the original buffer.
27///
28/// ```
29/// use bytes::Bytes;
30///
31/// let mut mem = Bytes::from("Hello world");
32/// let a = mem.slice(0..5);
33///
34/// assert_eq!(a, "Hello");
35///
36/// let b = mem.split_to(6);
37///
38/// assert_eq!(mem, "world");
39/// assert_eq!(b, "Hello ");
40/// ```
41///
42/// # Memory layout
43///
44/// The `Bytes` struct itself is fairly small, limited to 4 `usize` fields used
45/// to track information about which segment of the underlying memory the
46/// `Bytes` handle has access to.
47///
48/// `Bytes` keeps both a pointer to the shared state containing the full memory
49/// slice and a pointer to the start of the region visible by the handle.
50/// `Bytes` also tracks the length of its view into the memory.
51///
52/// # Sharing
53///
54/// `Bytes` contains a vtable, which allows implementations of `Bytes` to define
55/// how sharing/cloning is implemented in detail.
56/// When `Bytes::clone()` is called, `Bytes` will call the vtable function for
57/// cloning the backing storage in order to share it behind multiple `Bytes`
58/// instances.
59///
60/// For `Bytes` implementations which refer to constant memory (e.g. created
61/// via `Bytes::from_static()`) the cloning implementation will be a no-op.
62///
63/// For `Bytes` implementations which point to a reference counted shared storage
64/// (e.g. an `Arc<[u8]>`), sharing will be implemented by increasing the
65/// reference count.
66///
67/// Due to this mechanism, multiple `Bytes` instances may point to the same
68/// shared memory region.
69/// Each `Bytes` instance can point to different sections within that
70/// memory region, and `Bytes` instances may or may not have overlapping views
71/// into the memory.
72///
73/// The following diagram visualizes a scenario where 2 `Bytes` instances make
74/// use of an `Arc`-based backing storage, and provide access to different views:
75///
76/// ```text
77///
78///    Arc ptrs                   ┌─────────┐
79///    ________________________ / │ Bytes 2 │
80///   /                           └─────────┘
81///  /          ┌───────────┐     |         |
82/// |_________/ │  Bytes 1  │     |         |
83/// |           └───────────┘     |         |
84/// |           |           | ___/ data     | tail
85/// |      data |      tail |/              |
86/// v           v           v               v
87/// ┌─────┬─────┬───────────┬───────────────┬─────┐
88/// │ Arc │     │           │               │     │
89/// └─────┴─────┴───────────┴───────────────┴─────┘
90/// ```
91pub struct Utf8Bytes {
92    inner: bytes::Bytes,
93}
94
95impl Utf8Bytes {
96    pub const unsafe fn from_bytes_unchecked(inner: bytes::Bytes) -> Self {
97        Self { inner }
98    }
99    pub fn as_str(&self) -> &str {
100        unsafe { str::from_utf8_unchecked(&self.inner) }
101    }
102}
103
104impl Utf8Bytes {
105    /// Creates a new empty `Bytes`.
106    ///
107    /// This will not allocate and the returned `Bytes` handle will be empty.
108    ///
109    /// # Examples
110    ///
111    /// ```
112    /// use bytes::Bytes;
113    ///
114    /// let b = Bytes::new();
115    /// assert_eq!(&b[..], b"");
116    /// ```
117    #[inline]
118    pub const fn new() -> Self {
119        unsafe { Self::from_bytes_unchecked(bytes::Bytes::new()) }
120    }
121
122    /// Creates a new `Bytes` from a static slice.
123    ///
124    /// The returned `Bytes` will point directly to the static slice. There is
125    /// no allocating or copying.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// use bytes::Bytes;
131    ///
132    /// let b = Bytes::from_static(b"hello");
133    /// assert_eq!(&b[..], b"hello");
134    /// ```
135    #[inline]
136    pub const fn from_static(bytes: &'static str) -> Self {
137        unsafe { Self::from_bytes_unchecked(bytes::Bytes::from_static(bytes.as_bytes())) }
138    }
139
140    /// Create [Bytes] with a buffer whose lifetime is controlled
141    /// via an explicit owner.
142    ///
143    /// A common use case is to zero-copy construct from mapped memory.
144    ///
145    /// ```
146    /// # struct File;
147    /// #
148    /// # impl File {
149    /// #     pub fn open(_: &str) -> Result<Self, ()> {
150    /// #         Ok(Self)
151    /// #     }
152    /// # }
153    /// #
154    /// # mod memmap2 {
155    /// #     pub struct Mmap;
156    /// #
157    /// #     impl Mmap {
158    /// #         pub unsafe fn map(_file: &super::File) -> Result<Self, ()> {
159    /// #             Ok(Self)
160    /// #         }
161    /// #     }
162    /// #
163    /// #     impl AsRef<[u8]> for Mmap {
164    /// #         fn as_ref(&self) -> &[u8] {
165    /// #             b"buf"
166    /// #         }
167    /// #     }
168    /// # }
169    /// use bytes::Bytes;
170    /// use memmap2::Mmap;
171    ///
172    /// # fn main() -> Result<(), ()> {
173    /// let file = File::open("upload_bundle.tar.gz")?;
174    /// let mmap = unsafe { Mmap::map(&file) }?;
175    /// let b = Bytes::from_owner(mmap);
176    /// # Ok(())
177    /// # }
178    /// ```
179    ///
180    /// The `owner` will be transferred to the constructed [Bytes] object, which
181    /// will ensure it is dropped once all remaining clones of the constructed
182    /// object are dropped. The owner will then be responsible for dropping the
183    /// specified region of memory as part of its [Drop] implementation.
184    ///
185    /// Note that converting [Bytes] constructed from an owner into a [BytesMut]
186    /// will always create a deep copy of the buffer into newly allocated memory.
187    pub fn from_owner<T>(owner: T) -> Self
188    where
189        T: AsRef<str> + Send + 'static,
190    {
191        #[repr(transparent)]
192        struct AsBytes<T>(T);
193        impl<T: AsRef<str>> AsRef<[u8]> for AsBytes<T> {
194            fn as_ref(&self) -> &[u8] {
195                self.0.as_ref().as_bytes()
196            }
197        }
198        unsafe { Self::from_bytes_unchecked(bytes::Bytes::from_owner(AsBytes(owner))) }
199    }
200
201    /// Returns the number of bytes contained in this `Bytes`.
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// use bytes::Bytes;
207    ///
208    /// let b = Bytes::from(&b"hello"[..]);
209    /// assert_eq!(b.len(), 5);
210    /// ```
211    #[inline]
212    pub const fn len(&self) -> usize {
213        self.inner.len()
214    }
215
216    /// Returns true if the `Bytes` has a length of 0.
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use bytes::Bytes;
222    ///
223    /// let b = Bytes::new();
224    /// assert!(b.is_empty());
225    /// ```
226    #[inline]
227    pub const fn is_empty(&self) -> bool {
228        self.inner.is_empty()
229    }
230
231    /// Returns true if this is the only reference to the data and
232    /// `Into<BytesMut>` would avoid cloning the underlying buffer.
233    ///
234    /// Always returns false if the data is backed by a [static slice](Bytes::from_static),
235    /// or an [owner](Bytes::from_owner).
236    ///
237    /// The result of this method may be invalidated immediately if another
238    /// thread clones this value while this is being called. Ensure you have
239    /// unique access to this value (`&mut Bytes`) first if you need to be
240    /// certain the result is valid (i.e. for safety reasons).
241    /// # Examples
242    ///
243    /// ```
244    /// use bytes::Bytes;
245    ///
246    /// let a = Bytes::from(vec![1, 2, 3]);
247    /// assert!(a.is_unique());
248    /// let b = a.clone();
249    /// assert!(!a.is_unique());
250    /// ```
251    pub fn is_unique(&self) -> bool {
252        self.inner.is_unique()
253    }
254
255    /// Creates `Bytes` instance from slice, by copying it.
256    pub fn copy_from_str(data: &str) -> Self {
257        unsafe { Self::from_bytes_unchecked(bytes::Bytes::copy_from_slice(data.as_bytes())) }
258    }
259
260    /// Returns a slice of self for the provided range.
261    ///
262    /// This will increment the reference count for the underlying memory and
263    /// return a new `Bytes` handle set to the slice.
264    ///
265    /// This operation is `O(1)`.
266    ///
267    /// # Examples
268    ///
269    /// ```
270    /// use bytes::Bytes;
271    ///
272    /// let a = Bytes::from(&b"hello world"[..]);
273    /// let b = a.slice(2..5);
274    ///
275    /// assert_eq!(&b[..], b"llo");
276    /// ```
277    ///
278    /// # Panics
279    ///
280    /// Requires that `begin <= end` and `end <= self.len()`, otherwise slicing
281    /// will panic.
282    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
283        let lo = range.start_bound().cloned();
284        let hi = range.end_bound().cloned();
285        self.as_str().get((lo, hi)).unwrap();
286        unsafe { Self::from_bytes_unchecked(self.inner.slice((lo, hi))) }
287    }
288
289    /// Returns a slice of self that is equivalent to the given `subset`.
290    ///
291    /// When processing a `Bytes` buffer with other tools, one often gets a
292    /// `&[u8]` which is in fact a slice of the `Bytes`, i.e. a subset of it.
293    /// This function turns that `&[u8]` into another `Bytes`, as if one had
294    /// called `self.slice()` with the offsets that correspond to `subset`.
295    ///
296    /// This operation is `O(1)`.
297    ///
298    /// # Examples
299    ///
300    /// ```
301    /// use bytes::Bytes;
302    ///
303    /// let bytes = Bytes::from(&b"012345678"[..]);
304    /// let as_slice = bytes.as_ref();
305    /// let subset = &as_slice[2..6];
306    /// let subslice = bytes.slice_ref(&subset);
307    /// assert_eq!(&subslice[..], b"2345");
308    /// ```
309    ///
310    /// # Panics
311    ///
312    /// Requires that the given `sub` slice is in fact contained within the
313    /// `Bytes` buffer; otherwise this function will panic.
314    pub fn slice_ref(&self, subset: &str) -> Self {
315        unsafe { Self::from_bytes_unchecked(self.inner.slice_ref(subset.as_bytes())) }
316    }
317
318    /// Splits the bytes into two at the given index.
319    ///
320    /// Afterwards `self` contains elements `[0, at)`, and the returned `Bytes`
321    /// contains elements `[at, len)`. It's guaranteed that the memory does not
322    /// move, that is, the address of `self` does not change, and the address of
323    /// the returned slice is `at` bytes after that.
324    ///
325    /// This is an `O(1)` operation that just increases the reference count and
326    /// sets a few indices.
327    ///
328    /// # Examples
329    ///
330    /// ```
331    /// use bytes::Bytes;
332    ///
333    /// let mut a = Bytes::from(&b"hello world"[..]);
334    /// let b = a.split_off(5);
335    ///
336    /// assert_eq!(&a[..], b"hello");
337    /// assert_eq!(&b[..], b" world");
338    /// ```
339    ///
340    /// # Panics
341    ///
342    /// Panics if `at > len`.
343    #[must_use = "consider Bytes::truncate if you don't need the other half"]
344    pub fn split_off(&mut self, at: usize) -> Self {
345        let _char_boundary = self.as_str().split_at(at);
346        unsafe { Self::from_bytes_unchecked(self.inner.split_off(at)) }
347    }
348
349    /// Splits the bytes into two at the given index.
350    ///
351    /// Afterwards `self` contains elements `[at, len)`, and the returned
352    /// `Bytes` contains elements `[0, at)`.
353    ///
354    /// This is an `O(1)` operation that just increases the reference count and
355    /// sets a few indices.
356    ///
357    /// # Examples
358    ///
359    /// ```
360    /// use bytes::Bytes;
361    ///
362    /// let mut a = Bytes::from(&b"hello world"[..]);
363    /// let b = a.split_to(5);
364    ///
365    /// assert_eq!(&a[..], b" world");
366    /// assert_eq!(&b[..], b"hello");
367    /// ```
368    ///
369    /// # Panics
370    ///
371    /// Panics if `at > len`.
372    #[must_use = "consider Bytes::advance if you don't need the other half"]
373    pub fn split_to(&mut self, at: usize) -> Self {
374        let _char_boundary = self.as_str().split_at(at);
375        unsafe { Self::from_bytes_unchecked(self.inner.split_to(at)) }
376    }
377
378    /// Shortens the buffer, keeping the first `len` bytes and dropping the
379    /// rest.
380    ///
381    /// If `len` is greater than the buffer's current length, this has no
382    /// effect.
383    ///
384    /// The [split_off](`Self::split_off()`) method can emulate `truncate`, but this causes the
385    /// excess bytes to be returned instead of dropped.
386    ///
387    /// # Examples
388    ///
389    /// ```
390    /// use bytes::Bytes;
391    ///
392    /// let mut buf = Bytes::from(&b"hello world"[..]);
393    /// buf.truncate(5);
394    /// assert_eq!(buf, b"hello"[..]);
395    /// ```
396    #[inline]
397    pub fn truncate(&mut self, len: usize) {
398        if len < self.len() {
399            let _char_boundary = self.as_str().split_at(len);
400            self.inner.truncate(len)
401        };
402    }
403
404    /// Clears the buffer, removing all data.
405    ///
406    /// # Examples
407    ///
408    /// ```
409    /// use bytes::Bytes;
410    ///
411    /// let mut buf = Bytes::from(&b"hello world"[..]);
412    /// buf.clear();
413    /// assert!(buf.is_empty());
414    /// ```
415    #[inline]
416    pub fn clear(&mut self) {
417        self.truncate(0);
418    }
419
420    /// Try to convert self into `BytesMut`.
421    ///
422    /// If `self` is unique for the entire original buffer, this will succeed
423    /// and return a `BytesMut` with the contents of `self` without copying.
424    /// If `self` is not unique for the entire original buffer, this will fail
425    /// and return self.
426    ///
427    /// This will also always fail if the buffer was constructed via either
428    /// [from_owner](Bytes::from_owner) or [from_static](Bytes::from_static).
429    ///
430    /// # Examples
431    ///
432    /// ```
433    /// use bytes::{Bytes, BytesMut};
434    ///
435    /// let bytes = Bytes::from(b"hello".to_vec());
436    /// assert_eq!(bytes.try_into_mut(), Ok(BytesMut::from(&b"hello"[..])));
437    /// ```
438    pub fn try_into_mut(self) -> Result<Utf8BytesMut, Utf8Bytes> {
439        match self.inner.try_into_mut() {
440            Ok(it) => Ok(unsafe { Utf8BytesMut::from_bytes_mut_unchecked(it) }),
441            Err(it) => Err(unsafe { Self::from_bytes_unchecked(it) }),
442        }
443    }
444}
445
446impl Clone for Utf8Bytes {
447    #[inline]
448    fn clone(&self) -> Utf8Bytes {
449        unsafe { Self::from_bytes_unchecked(self.inner.clone()) }
450    }
451    fn clone_from(&mut self, source: &Self) {
452        self.inner.clone_from(&source.inner);
453    }
454}
455
456impl Deref for Utf8Bytes {
457    type Target = str;
458
459    #[inline]
460    fn deref(&self) -> &str {
461        self.as_str()
462    }
463}
464
465impl AsRef<str> for Utf8Bytes {
466    #[inline]
467    fn as_ref(&self) -> &str {
468        self.as_str()
469    }
470}
471
472impl AsRef<[u8]> for Utf8Bytes {
473    #[inline]
474    fn as_ref(&self) -> &[u8] {
475        self.as_str().as_bytes()
476    }
477}
478
479impl hash::Hash for Utf8Bytes {
480    fn hash<H>(&self, state: &mut H)
481    where
482        H: hash::Hasher,
483    {
484        self.as_str().hash(state);
485    }
486}
487
488impl Borrow<str> for Utf8Bytes {
489    fn borrow(&self) -> &str {
490        self.as_str()
491    }
492}
493
494impl FromIterator<char> for Utf8Bytes {
495    fn from_iter<T: IntoIterator<Item = char>>(into_iter: T) -> Self {
496        String::from_iter(into_iter).into()
497    }
498}
499
500// impl Eq
501
502impl<T: AsRef<str>> PartialEq<T> for Utf8Bytes {
503    fn eq(&self, other: &T) -> bool {
504        self.as_str() == other.as_ref()
505    }
506}
507
508impl<T: AsRef<str>> PartialOrd<T> for Utf8Bytes {
509    fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
510        self.as_str().partial_cmp(other.as_ref())
511    }
512}
513
514impl Ord for Utf8Bytes {
515    fn cmp(&self, other: &Utf8Bytes) -> cmp::Ordering {
516        self.as_str().cmp(other.as_str())
517    }
518}
519
520impl Eq for Utf8Bytes {}
521
522impl PartialEq<Utf8Bytes> for str {
523    fn eq(&self, other: &Utf8Bytes) -> bool {
524        self.eq(other.as_str())
525    }
526}
527impl PartialEq<Utf8Bytes> for String {
528    fn eq(&self, other: &Utf8Bytes) -> bool {
529        self.eq(other.as_str())
530    }
531}
532impl<'a> PartialEq<Utf8Bytes> for Cow<'a, str> {
533    fn eq(&self, other: &Utf8Bytes) -> bool {
534        self.eq(other.as_str())
535    }
536}
537
538impl PartialOrd<Utf8Bytes> for str {
539    fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
540        self.partial_cmp(other.as_str())
541    }
542}
543impl PartialOrd<Utf8Bytes> for String {
544    fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
545        self.as_str().partial_cmp(other.as_str())
546    }
547}
548impl PartialOrd<Utf8Bytes> for Cow<'_, str> {
549    fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
550        (**self).partial_cmp(other.as_str())
551    }
552}
553
554// impl From
555
556impl Default for Utf8Bytes {
557    #[inline]
558    fn default() -> Utf8Bytes {
559        Utf8Bytes::new()
560    }
561}
562
563impl From<&'static str> for Utf8Bytes {
564    fn from(s: &'static str) -> Utf8Bytes {
565        Utf8Bytes::from_static(s)
566    }
567}
568
569impl From<Box<str>> for Utf8Bytes {
570    fn from(slice: Box<str>) -> Utf8Bytes {
571        unsafe { Self::from_bytes_unchecked(bytes::Bytes::from(slice.into_boxed_bytes())) }
572    }
573}
574
575impl From<Utf8Bytes> for bytes::Bytes {
576    /// Convert self into `BytesMut`.
577    ///
578    /// If `bytes` is unique for the entire original buffer, this will return a
579    /// `BytesMut` with the contents of `bytes` without copying.
580    /// If `bytes` is not unique for the entire original buffer, this will make
581    /// a copy of `bytes` subset of the original buffer in a new `BytesMut`.
582    ///
583    /// # Examples
584    ///
585    /// ```
586    /// use bytes::{Bytes, BytesMut};
587    ///
588    /// let bytes = Bytes::from(b"hello".to_vec());
589    /// assert_eq!(BytesMut::from(bytes), BytesMut::from(&b"hello"[..]));
590    /// ```
591    fn from(utf8: Utf8Bytes) -> Self {
592        utf8.inner
593    }
594}
595
596impl From<Utf8Bytes> for Utf8BytesMut {
597    /// Convert self into `BytesMut`.
598    ///
599    /// If `bytes` is unique for the entire original buffer, this will return a
600    /// `BytesMut` with the contents of `bytes` without copying.
601    /// If `bytes` is not unique for the entire original buffer, this will make
602    /// a copy of `bytes` subset of the original buffer in a new `BytesMut`.
603    ///
604    /// # Examples
605    ///
606    /// ```
607    /// use bytes::{Bytes, BytesMut};
608    ///
609    /// let bytes = Bytes::from(b"hello".to_vec());
610    /// assert_eq!(BytesMut::from(bytes), BytesMut::from(&b"hello"[..]));
611    /// ```
612    fn from(bytes: Utf8Bytes) -> Self {
613        unsafe { Self::from_bytes_mut_unchecked(bytes.inner.into()) }
614    }
615}
616
617impl From<String> for Utf8Bytes {
618    fn from(s: String) -> Utf8Bytes {
619        unsafe { Utf8Bytes::from_bytes_unchecked(bytes::Bytes::from(s.into_bytes())) }
620    }
621}
622
623impl From<Utf8Bytes> for Vec<u8> {
624    fn from(utf8: Utf8Bytes) -> Vec<u8> {
625        utf8.inner.into()
626    }
627}