utf8_bytes/bytes.rs
1use super::Utf8BytesMut;
2
3use core::iter::FromIterator;
4use core::ops::{Deref, RangeBounds};
5use core::{cmp, hash};
6use std::borrow::Cow;
7
8use alloc::{borrow::Borrow, boxed::Box, string::String, vec::Vec};
9
10/// A cheaply cloneable and sliceable chunk of contiguous memory.
11///
12/// `Bytes` is an efficient container for storing and operating on contiguous
13/// slices of memory. It is intended for use primarily in networking code, but
14/// could have applications elsewhere as well.
15///
16/// `Bytes` values facilitate zero-copy network programming by allowing multiple
17/// `Bytes` objects to point to the same underlying memory.
18///
19/// `Bytes` does not have a single implementation. It is an interface, whose
20/// exact behavior is implemented through dynamic dispatch in several underlying
21/// implementations of `Bytes`.
22///
23/// All `Bytes` implementations must fulfill the following requirements:
24/// - They are cheaply cloneable and thereby shareable between an unlimited amount
25/// of components, for example by modifying a reference count.
26/// - Instances can be sliced to refer to a subset of the original buffer.
27///
28/// ```
29/// use bytes::Bytes;
30///
31/// let mut mem = Bytes::from("Hello world");
32/// let a = mem.slice(0..5);
33///
34/// assert_eq!(a, "Hello");
35///
36/// let b = mem.split_to(6);
37///
38/// assert_eq!(mem, "world");
39/// assert_eq!(b, "Hello ");
40/// ```
41///
42/// # Memory layout
43///
44/// The `Bytes` struct itself is fairly small, limited to 4 `usize` fields used
45/// to track information about which segment of the underlying memory the
46/// `Bytes` handle has access to.
47///
48/// `Bytes` keeps both a pointer to the shared state containing the full memory
49/// slice and a pointer to the start of the region visible by the handle.
50/// `Bytes` also tracks the length of its view into the memory.
51///
52/// # Sharing
53///
54/// `Bytes` contains a vtable, which allows implementations of `Bytes` to define
55/// how sharing/cloning is implemented in detail.
56/// When `Bytes::clone()` is called, `Bytes` will call the vtable function for
57/// cloning the backing storage in order to share it behind multiple `Bytes`
58/// instances.
59///
60/// For `Bytes` implementations which refer to constant memory (e.g. created
61/// via `Bytes::from_static()`) the cloning implementation will be a no-op.
62///
63/// For `Bytes` implementations which point to a reference counted shared storage
64/// (e.g. an `Arc<[u8]>`), sharing will be implemented by increasing the
65/// reference count.
66///
67/// Due to this mechanism, multiple `Bytes` instances may point to the same
68/// shared memory region.
69/// Each `Bytes` instance can point to different sections within that
70/// memory region, and `Bytes` instances may or may not have overlapping views
71/// into the memory.
72///
73/// The following diagram visualizes a scenario where 2 `Bytes` instances make
74/// use of an `Arc`-based backing storage, and provide access to different views:
75///
76/// ```text
77///
78/// Arc ptrs ┌─────────┐
79/// ________________________ / │ Bytes 2 │
80/// / └─────────┘
81/// / ┌───────────┐ | |
82/// |_________/ │ Bytes 1 │ | |
83/// | └───────────┘ | |
84/// | | | ___/ data | tail
85/// | data | tail |/ |
86/// v v v v
87/// ┌─────┬─────┬───────────┬───────────────┬─────┐
88/// │ Arc │ │ │ │ │
89/// └─────┴─────┴───────────┴───────────────┴─────┘
90/// ```
91pub struct Utf8Bytes {
92 inner: bytes::Bytes,
93}
94
95impl Utf8Bytes {
96 pub const unsafe fn from_bytes_unchecked(inner: bytes::Bytes) -> Self {
97 Self { inner }
98 }
99 pub fn as_str(&self) -> &str {
100 unsafe { str::from_utf8_unchecked(&self.inner) }
101 }
102}
103
104impl Utf8Bytes {
105 /// Creates a new empty `Bytes`.
106 ///
107 /// This will not allocate and the returned `Bytes` handle will be empty.
108 ///
109 /// # Examples
110 ///
111 /// ```
112 /// use bytes::Bytes;
113 ///
114 /// let b = Bytes::new();
115 /// assert_eq!(&b[..], b"");
116 /// ```
117 #[inline]
118 pub const fn new() -> Self {
119 unsafe { Self::from_bytes_unchecked(bytes::Bytes::new()) }
120 }
121
122 /// Creates a new `Bytes` from a static slice.
123 ///
124 /// The returned `Bytes` will point directly to the static slice. There is
125 /// no allocating or copying.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// use bytes::Bytes;
131 ///
132 /// let b = Bytes::from_static(b"hello");
133 /// assert_eq!(&b[..], b"hello");
134 /// ```
135 #[inline]
136 pub const fn from_static(bytes: &'static str) -> Self {
137 unsafe { Self::from_bytes_unchecked(bytes::Bytes::from_static(bytes.as_bytes())) }
138 }
139
140 /// Create [Bytes] with a buffer whose lifetime is controlled
141 /// via an explicit owner.
142 ///
143 /// A common use case is to zero-copy construct from mapped memory.
144 ///
145 /// ```
146 /// # struct File;
147 /// #
148 /// # impl File {
149 /// # pub fn open(_: &str) -> Result<Self, ()> {
150 /// # Ok(Self)
151 /// # }
152 /// # }
153 /// #
154 /// # mod memmap2 {
155 /// # pub struct Mmap;
156 /// #
157 /// # impl Mmap {
158 /// # pub unsafe fn map(_file: &super::File) -> Result<Self, ()> {
159 /// # Ok(Self)
160 /// # }
161 /// # }
162 /// #
163 /// # impl AsRef<[u8]> for Mmap {
164 /// # fn as_ref(&self) -> &[u8] {
165 /// # b"buf"
166 /// # }
167 /// # }
168 /// # }
169 /// use bytes::Bytes;
170 /// use memmap2::Mmap;
171 ///
172 /// # fn main() -> Result<(), ()> {
173 /// let file = File::open("upload_bundle.tar.gz")?;
174 /// let mmap = unsafe { Mmap::map(&file) }?;
175 /// let b = Bytes::from_owner(mmap);
176 /// # Ok(())
177 /// # }
178 /// ```
179 ///
180 /// The `owner` will be transferred to the constructed [Bytes] object, which
181 /// will ensure it is dropped once all remaining clones of the constructed
182 /// object are dropped. The owner will then be responsible for dropping the
183 /// specified region of memory as part of its [Drop] implementation.
184 ///
185 /// Note that converting [Bytes] constructed from an owner into a [BytesMut]
186 /// will always create a deep copy of the buffer into newly allocated memory.
187 pub fn from_owner<T>(owner: T) -> Self
188 where
189 T: AsRef<str> + Send + 'static,
190 {
191 #[repr(transparent)]
192 struct AsBytes<T>(T);
193 impl<T: AsRef<str>> AsRef<[u8]> for AsBytes<T> {
194 fn as_ref(&self) -> &[u8] {
195 self.0.as_ref().as_bytes()
196 }
197 }
198 unsafe { Self::from_bytes_unchecked(bytes::Bytes::from_owner(AsBytes(owner))) }
199 }
200
201 /// Returns the number of bytes contained in this `Bytes`.
202 ///
203 /// # Examples
204 ///
205 /// ```
206 /// use bytes::Bytes;
207 ///
208 /// let b = Bytes::from(&b"hello"[..]);
209 /// assert_eq!(b.len(), 5);
210 /// ```
211 #[inline]
212 pub const fn len(&self) -> usize {
213 self.inner.len()
214 }
215
216 /// Returns true if the `Bytes` has a length of 0.
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// use bytes::Bytes;
222 ///
223 /// let b = Bytes::new();
224 /// assert!(b.is_empty());
225 /// ```
226 #[inline]
227 pub const fn is_empty(&self) -> bool {
228 self.inner.is_empty()
229 }
230
231 /// Returns true if this is the only reference to the data and
232 /// `Into<BytesMut>` would avoid cloning the underlying buffer.
233 ///
234 /// Always returns false if the data is backed by a [static slice](Bytes::from_static),
235 /// or an [owner](Bytes::from_owner).
236 ///
237 /// The result of this method may be invalidated immediately if another
238 /// thread clones this value while this is being called. Ensure you have
239 /// unique access to this value (`&mut Bytes`) first if you need to be
240 /// certain the result is valid (i.e. for safety reasons).
241 /// # Examples
242 ///
243 /// ```
244 /// use bytes::Bytes;
245 ///
246 /// let a = Bytes::from(vec![1, 2, 3]);
247 /// assert!(a.is_unique());
248 /// let b = a.clone();
249 /// assert!(!a.is_unique());
250 /// ```
251 pub fn is_unique(&self) -> bool {
252 self.inner.is_unique()
253 }
254
255 /// Creates `Bytes` instance from slice, by copying it.
256 pub fn copy_from_str(data: &str) -> Self {
257 unsafe { Self::from_bytes_unchecked(bytes::Bytes::copy_from_slice(data.as_bytes())) }
258 }
259
260 /// Returns a slice of self for the provided range.
261 ///
262 /// This will increment the reference count for the underlying memory and
263 /// return a new `Bytes` handle set to the slice.
264 ///
265 /// This operation is `O(1)`.
266 ///
267 /// # Examples
268 ///
269 /// ```
270 /// use bytes::Bytes;
271 ///
272 /// let a = Bytes::from(&b"hello world"[..]);
273 /// let b = a.slice(2..5);
274 ///
275 /// assert_eq!(&b[..], b"llo");
276 /// ```
277 ///
278 /// # Panics
279 ///
280 /// Requires that `begin <= end` and `end <= self.len()`, otherwise slicing
281 /// will panic.
282 pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
283 let lo = range.start_bound().cloned();
284 let hi = range.end_bound().cloned();
285 self.as_str().get((lo, hi)).unwrap();
286 unsafe { Self::from_bytes_unchecked(self.inner.slice((lo, hi))) }
287 }
288
289 /// Returns a slice of self that is equivalent to the given `subset`.
290 ///
291 /// When processing a `Bytes` buffer with other tools, one often gets a
292 /// `&[u8]` which is in fact a slice of the `Bytes`, i.e. a subset of it.
293 /// This function turns that `&[u8]` into another `Bytes`, as if one had
294 /// called `self.slice()` with the offsets that correspond to `subset`.
295 ///
296 /// This operation is `O(1)`.
297 ///
298 /// # Examples
299 ///
300 /// ```
301 /// use bytes::Bytes;
302 ///
303 /// let bytes = Bytes::from(&b"012345678"[..]);
304 /// let as_slice = bytes.as_ref();
305 /// let subset = &as_slice[2..6];
306 /// let subslice = bytes.slice_ref(&subset);
307 /// assert_eq!(&subslice[..], b"2345");
308 /// ```
309 ///
310 /// # Panics
311 ///
312 /// Requires that the given `sub` slice is in fact contained within the
313 /// `Bytes` buffer; otherwise this function will panic.
314 pub fn slice_ref(&self, subset: &str) -> Self {
315 unsafe { Self::from_bytes_unchecked(self.inner.slice_ref(subset.as_bytes())) }
316 }
317
318 /// Splits the bytes into two at the given index.
319 ///
320 /// Afterwards `self` contains elements `[0, at)`, and the returned `Bytes`
321 /// contains elements `[at, len)`. It's guaranteed that the memory does not
322 /// move, that is, the address of `self` does not change, and the address of
323 /// the returned slice is `at` bytes after that.
324 ///
325 /// This is an `O(1)` operation that just increases the reference count and
326 /// sets a few indices.
327 ///
328 /// # Examples
329 ///
330 /// ```
331 /// use bytes::Bytes;
332 ///
333 /// let mut a = Bytes::from(&b"hello world"[..]);
334 /// let b = a.split_off(5);
335 ///
336 /// assert_eq!(&a[..], b"hello");
337 /// assert_eq!(&b[..], b" world");
338 /// ```
339 ///
340 /// # Panics
341 ///
342 /// Panics if `at > len`.
343 #[must_use = "consider Bytes::truncate if you don't need the other half"]
344 pub fn split_off(&mut self, at: usize) -> Self {
345 let _char_boundary = self.as_str().split_at(at);
346 unsafe { Self::from_bytes_unchecked(self.inner.split_off(at)) }
347 }
348
349 /// Splits the bytes into two at the given index.
350 ///
351 /// Afterwards `self` contains elements `[at, len)`, and the returned
352 /// `Bytes` contains elements `[0, at)`.
353 ///
354 /// This is an `O(1)` operation that just increases the reference count and
355 /// sets a few indices.
356 ///
357 /// # Examples
358 ///
359 /// ```
360 /// use bytes::Bytes;
361 ///
362 /// let mut a = Bytes::from(&b"hello world"[..]);
363 /// let b = a.split_to(5);
364 ///
365 /// assert_eq!(&a[..], b" world");
366 /// assert_eq!(&b[..], b"hello");
367 /// ```
368 ///
369 /// # Panics
370 ///
371 /// Panics if `at > len`.
372 #[must_use = "consider Bytes::advance if you don't need the other half"]
373 pub fn split_to(&mut self, at: usize) -> Self {
374 let _char_boundary = self.as_str().split_at(at);
375 unsafe { Self::from_bytes_unchecked(self.inner.split_to(at)) }
376 }
377
378 /// Shortens the buffer, keeping the first `len` bytes and dropping the
379 /// rest.
380 ///
381 /// If `len` is greater than the buffer's current length, this has no
382 /// effect.
383 ///
384 /// The [split_off](`Self::split_off()`) method can emulate `truncate`, but this causes the
385 /// excess bytes to be returned instead of dropped.
386 ///
387 /// # Examples
388 ///
389 /// ```
390 /// use bytes::Bytes;
391 ///
392 /// let mut buf = Bytes::from(&b"hello world"[..]);
393 /// buf.truncate(5);
394 /// assert_eq!(buf, b"hello"[..]);
395 /// ```
396 #[inline]
397 pub fn truncate(&mut self, len: usize) {
398 if len < self.len() {
399 let _char_boundary = self.as_str().split_at(len);
400 self.inner.truncate(len)
401 };
402 }
403
404 /// Clears the buffer, removing all data.
405 ///
406 /// # Examples
407 ///
408 /// ```
409 /// use bytes::Bytes;
410 ///
411 /// let mut buf = Bytes::from(&b"hello world"[..]);
412 /// buf.clear();
413 /// assert!(buf.is_empty());
414 /// ```
415 #[inline]
416 pub fn clear(&mut self) {
417 self.truncate(0);
418 }
419
420 /// Try to convert self into `BytesMut`.
421 ///
422 /// If `self` is unique for the entire original buffer, this will succeed
423 /// and return a `BytesMut` with the contents of `self` without copying.
424 /// If `self` is not unique for the entire original buffer, this will fail
425 /// and return self.
426 ///
427 /// This will also always fail if the buffer was constructed via either
428 /// [from_owner](Bytes::from_owner) or [from_static](Bytes::from_static).
429 ///
430 /// # Examples
431 ///
432 /// ```
433 /// use bytes::{Bytes, BytesMut};
434 ///
435 /// let bytes = Bytes::from(b"hello".to_vec());
436 /// assert_eq!(bytes.try_into_mut(), Ok(BytesMut::from(&b"hello"[..])));
437 /// ```
438 pub fn try_into_mut(self) -> Result<Utf8BytesMut, Utf8Bytes> {
439 match self.inner.try_into_mut() {
440 Ok(it) => Ok(unsafe { Utf8BytesMut::from_bytes_mut_unchecked(it) }),
441 Err(it) => Err(unsafe { Self::from_bytes_unchecked(it) }),
442 }
443 }
444}
445
446impl Clone for Utf8Bytes {
447 #[inline]
448 fn clone(&self) -> Utf8Bytes {
449 unsafe { Self::from_bytes_unchecked(self.inner.clone()) }
450 }
451 fn clone_from(&mut self, source: &Self) {
452 self.inner.clone_from(&source.inner);
453 }
454}
455
456impl Deref for Utf8Bytes {
457 type Target = str;
458
459 #[inline]
460 fn deref(&self) -> &str {
461 self.as_str()
462 }
463}
464
465impl AsRef<str> for Utf8Bytes {
466 #[inline]
467 fn as_ref(&self) -> &str {
468 self.as_str()
469 }
470}
471
472impl AsRef<[u8]> for Utf8Bytes {
473 #[inline]
474 fn as_ref(&self) -> &[u8] {
475 self.as_str().as_bytes()
476 }
477}
478
479impl hash::Hash for Utf8Bytes {
480 fn hash<H>(&self, state: &mut H)
481 where
482 H: hash::Hasher,
483 {
484 self.as_str().hash(state);
485 }
486}
487
488impl Borrow<str> for Utf8Bytes {
489 fn borrow(&self) -> &str {
490 self.as_str()
491 }
492}
493
494impl FromIterator<char> for Utf8Bytes {
495 fn from_iter<T: IntoIterator<Item = char>>(into_iter: T) -> Self {
496 String::from_iter(into_iter).into()
497 }
498}
499
500// impl Eq
501
502impl<T: AsRef<str>> PartialEq<T> for Utf8Bytes {
503 fn eq(&self, other: &T) -> bool {
504 self.as_str() == other.as_ref()
505 }
506}
507
508impl<T: AsRef<str>> PartialOrd<T> for Utf8Bytes {
509 fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
510 self.as_str().partial_cmp(other.as_ref())
511 }
512}
513
514impl Ord for Utf8Bytes {
515 fn cmp(&self, other: &Utf8Bytes) -> cmp::Ordering {
516 self.as_str().cmp(other.as_str())
517 }
518}
519
520impl Eq for Utf8Bytes {}
521
522impl PartialEq<Utf8Bytes> for str {
523 fn eq(&self, other: &Utf8Bytes) -> bool {
524 self.eq(other.as_str())
525 }
526}
527impl PartialEq<Utf8Bytes> for String {
528 fn eq(&self, other: &Utf8Bytes) -> bool {
529 self.eq(other.as_str())
530 }
531}
532impl<'a> PartialEq<Utf8Bytes> for Cow<'a, str> {
533 fn eq(&self, other: &Utf8Bytes) -> bool {
534 self.eq(other.as_str())
535 }
536}
537
538impl PartialOrd<Utf8Bytes> for str {
539 fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
540 self.partial_cmp(other.as_str())
541 }
542}
543impl PartialOrd<Utf8Bytes> for String {
544 fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
545 self.as_str().partial_cmp(other.as_str())
546 }
547}
548impl PartialOrd<Utf8Bytes> for Cow<'_, str> {
549 fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
550 (**self).partial_cmp(other.as_str())
551 }
552}
553
554// impl From
555
556impl Default for Utf8Bytes {
557 #[inline]
558 fn default() -> Utf8Bytes {
559 Utf8Bytes::new()
560 }
561}
562
563impl From<&'static str> for Utf8Bytes {
564 fn from(s: &'static str) -> Utf8Bytes {
565 Utf8Bytes::from_static(s)
566 }
567}
568
569impl From<Box<str>> for Utf8Bytes {
570 fn from(slice: Box<str>) -> Utf8Bytes {
571 unsafe { Self::from_bytes_unchecked(bytes::Bytes::from(slice.into_boxed_bytes())) }
572 }
573}
574
575impl From<Utf8Bytes> for bytes::Bytes {
576 /// Convert self into `BytesMut`.
577 ///
578 /// If `bytes` is unique for the entire original buffer, this will return a
579 /// `BytesMut` with the contents of `bytes` without copying.
580 /// If `bytes` is not unique for the entire original buffer, this will make
581 /// a copy of `bytes` subset of the original buffer in a new `BytesMut`.
582 ///
583 /// # Examples
584 ///
585 /// ```
586 /// use bytes::{Bytes, BytesMut};
587 ///
588 /// let bytes = Bytes::from(b"hello".to_vec());
589 /// assert_eq!(BytesMut::from(bytes), BytesMut::from(&b"hello"[..]));
590 /// ```
591 fn from(utf8: Utf8Bytes) -> Self {
592 utf8.inner
593 }
594}
595
596impl From<Utf8Bytes> for Utf8BytesMut {
597 /// Convert self into `BytesMut`.
598 ///
599 /// If `bytes` is unique for the entire original buffer, this will return a
600 /// `BytesMut` with the contents of `bytes` without copying.
601 /// If `bytes` is not unique for the entire original buffer, this will make
602 /// a copy of `bytes` subset of the original buffer in a new `BytesMut`.
603 ///
604 /// # Examples
605 ///
606 /// ```
607 /// use bytes::{Bytes, BytesMut};
608 ///
609 /// let bytes = Bytes::from(b"hello".to_vec());
610 /// assert_eq!(BytesMut::from(bytes), BytesMut::from(&b"hello"[..]));
611 /// ```
612 fn from(bytes: Utf8Bytes) -> Self {
613 unsafe { Self::from_bytes_mut_unchecked(bytes.inner.into()) }
614 }
615}
616
617impl From<String> for Utf8Bytes {
618 fn from(s: String) -> Utf8Bytes {
619 unsafe { Utf8Bytes::from_bytes_unchecked(bytes::Bytes::from(s.into_bytes())) }
620 }
621}
622
623impl From<Utf8Bytes> for Vec<u8> {
624 fn from(utf8: Utf8Bytes) -> Vec<u8> {
625 utf8.inner.into()
626 }
627}