utf8_bytes/bytes.rs
1use crate::FromUtf8Error;
2
3use super::Utf8BytesMut;
4
5use core::iter::FromIterator;
6use core::ops::{Deref, RangeBounds};
7use core::{cmp, fmt, hash};
8use std::borrow::Cow;
9
10use alloc::{borrow::Borrow, boxed::Box, string::String, vec::Vec};
11
12/// A cheaply cloneable and sliceable chunk of contiguous memory filled with
13/// UTF-8 bytes.
14///
15/// This is built on [`Bytes`](bytes::Bytes), see its documentation for more.
16#[repr(transparent)]
17pub struct Utf8Bytes {
18 /// # Invariant
19 /// - contains UTF-8.
20 #[deprecated = "use the accessors to preserve the invariants"]
21 inner: bytes::Bytes,
22}
23
24impl Utf8Bytes {
25 /// Wrap `bytes` if it is UTF-8.
26 ///
27 /// If it is not, you can perform a lossy conversion using [`FromUtf8Error::into_utf8_lossy`].
28 pub fn from_bytes(bytes: bytes::Bytes) -> Result<Self, FromUtf8Error<bytes::Bytes>> {
29 match str::from_utf8(&bytes) {
30 // SAFETY:
31 // - performed validation
32 Ok(_) => Ok(unsafe { Self::from_bytes_unchecked(bytes) }),
33 Err(error) => Err(FromUtf8Error { bytes, error }),
34 }
35 }
36
37 /// # Safety
38 /// `bytes` must only contain UTF-8.
39 pub const unsafe fn from_bytes_unchecked(bytes: bytes::Bytes) -> Self {
40 #[expect(deprecated)]
41 Self { inner: bytes }
42 }
43
44 /// Get the contents of the buffer.
45 pub fn as_str(&self) -> &str {
46 // SAFETY:
47 // - cannot create Self from invalid UTF-8 without using `unsafe`
48 unsafe { str::from_utf8_unchecked(self.inner()) }
49 }
50}
51
52impl Utf8Bytes {
53 /// Return a shared reference to the inner object.
54 #[inline]
55 pub const fn inner(&self) -> &bytes::Bytes {
56 #[expect(deprecated)]
57 &self.inner
58 }
59
60 /// Return an exclusive reference to the inner object.
61 ///
62 /// # Safety
63 /// - The returned bytes must be returned containing UTF-8
64 #[inline]
65 pub const unsafe fn inner_mut(&mut self) -> &mut bytes::Bytes {
66 #[expect(deprecated)]
67 &mut self.inner
68 }
69 #[inline]
70 pub fn into_inner(self) -> bytes::Bytes {
71 #[expect(deprecated)]
72 self.inner
73 }
74}
75
76impl Utf8Bytes {
77 /// Creates a new empty `Bytes`.
78 ///
79 /// This will not allocate and the returned handle will be empty.
80 ///
81 /// # Examples
82 ///
83 /// ```
84 /// use utf8_bytes::Utf8Bytes;
85 ///
86 /// let b = Utf8Bytes::new();
87 /// assert_eq!(b, "");
88 /// ```
89 #[inline]
90 pub const fn new() -> Self {
91 // SAFETY:
92 // - empty is valid UTF-8
93 unsafe { Self::from_bytes_unchecked(bytes::Bytes::new()) }
94 }
95
96 /// Creates a new [`Utf8Bytes`] from a static slice.
97 ///
98 /// The returned [`Utf8Bytes`] will point directly to the static slice.
99 /// There is no allocating or copying.
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// use utf8_bytes::Utf8Bytes;
105 ///
106 /// let b = Utf8Bytes::from_static("hello");
107 /// assert_eq!(b, "hello");
108 /// ```
109 #[inline]
110 pub const fn from_static(str: &'static str) -> Self {
111 // SAFETY:
112 // - bytes: &str
113 unsafe { Self::from_bytes_unchecked(bytes::Bytes::from_static(str.as_bytes())) }
114 }
115
116 /// Create [`Utf8Bytes`] with a buffer whose lifetime is controlled
117 /// via an explicit owner.
118 ///
119 /// See [`bytes::Bytes::from_owner`] for more.
120 pub fn from_owner<T>(owner: T) -> Self
121 where
122 T: AsRef<str> + Send + 'static,
123 {
124 #[repr(transparent)]
125 struct AsBytes<T>(T);
126 impl<T: AsRef<str>> AsRef<[u8]> for AsBytes<T> {
127 fn as_ref(&self) -> &[u8] {
128 self.0.as_ref().as_bytes()
129 }
130 }
131 // SAFETY:
132 // - owner: AsRef<str>
133 unsafe { Self::from_bytes_unchecked(bytes::Bytes::from_owner(AsBytes(owner))) }
134 }
135
136 /// Returns the number of bytes contained in this [`Utf8Bytes`].
137 ///
138 /// # Examples
139 ///
140 /// ```
141 /// use utf8_bytes::Utf8Bytes;
142 ///
143 /// let b = Utf8Bytes::from("hello");
144 /// assert_eq!(b.len(), 5);
145 /// ```
146 #[inline]
147 pub const fn len(&self) -> usize {
148 self.inner().len()
149 }
150
151 /// Returns true if the [`Utf8Bytes`] has a length of 0.
152 ///
153 /// # Examples
154 ///
155 /// ```
156 /// use utf8_bytes::Utf8Bytes;
157 ///
158 /// let b = Utf8Bytes::new();
159 /// assert!(b.is_empty());
160 /// ```
161 #[inline]
162 pub const fn is_empty(&self) -> bool {
163 self.inner().is_empty()
164 }
165
166 /// Returns true if this is the only reference to the data and
167 /// <code>[Into]<[Utf8BytesMut]></code> would avoid cloning the underlying
168 /// buffer.
169 ///
170 /// Always returns false if the data is backed by a [static slice](Self::from_static),
171 /// or an [owner](Self::from_owner).
172 ///
173 /// The result of this method may be invalidated immediately if another
174 /// thread clones this value while this is being called. Ensure you have
175 /// unique access to this value (`&mut Bytes`) first if you need to be
176 /// certain the result is valid (i.e. for safety reasons).
177 ///
178 /// # Examples
179 ///
180 /// ```
181 /// use utf8_bytes::Utf8Bytes;
182 ///
183 /// let a = Utf8Bytes::copy_from_str("123");
184 /// assert!(a.is_unique());
185 /// let b = a.clone();
186 /// assert!(!a.is_unique());
187 /// ```
188 pub fn is_unique(&self) -> bool {
189 self.inner().is_unique()
190 }
191
192 /// Creates a [`Utf8Bytes`] instance from slice, by copying it.
193 pub fn copy_from_str(data: &str) -> Self {
194 // SAFETY:
195 // - data: &str
196 unsafe { Self::from_bytes_unchecked(bytes::Bytes::copy_from_slice(data.as_bytes())) }
197 }
198
199 /// Returns a slice of self for the provided range.
200 ///
201 /// This will increment the reference count for the underlying memory and
202 /// return a new [`Utf8Bytes`] handle set to the slice.
203 ///
204 /// This operation is `O(1)`.
205 ///
206 /// # Examples
207 ///
208 /// ```
209 /// use utf8_bytes::Utf8Bytes;
210 ///
211 /// let a = Utf8Bytes::from("hello world");
212 /// let b = a.slice(2..5);
213 ///
214 /// assert_eq!(b, "llo");
215 /// ```
216 ///
217 /// # Panics
218 ///
219 /// - If `range` is out of bounds.
220 /// - `range` breaks a char boundary.
221 ///
222 #[track_caller]
223 pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
224 let lo = range.start_bound().cloned();
225 let hi = range.end_bound().cloned();
226 self.as_str()
227 .get((lo, hi))
228 .expect("range out of bounds or not on a char boundary");
229 // Safety:
230 // - checked the equivalent operation on &str
231 unsafe { Self::from_bytes_unchecked(self.inner().slice((lo, hi))) }
232 }
233
234 /// Returns a slice of self that is equivalent to the given `subset`.
235 ///
236 /// When processing a [`Utf8Bytes`] buffer with other tools, one often gets
237 /// a `&str` which is in fact a slice of the [`Utf8Bytes`],
238 /// i.e. a subset of it.
239 ///
240 /// This function turns that `&str` into another [`Utf8Bytes`],
241 /// as if one had called `self.slice()` with the offsets that correspond to
242 /// `subset`.
243 ///
244 /// This operation is `O(1)`.
245 ///
246 /// # Examples
247 ///
248 /// ```
249 /// use utf8_bytes::Utf8Bytes;
250 ///
251 /// let bytes = Utf8Bytes::from("012345678");
252 /// let subset = &bytes[2..6];
253 /// let subslice = bytes.slice_ref(&subset);
254 /// assert_eq!(subslice, "2345");
255 /// ```
256 ///
257 /// # Panics
258 ///
259 /// Requires that the given `subset` slice is in fact contained within the
260 /// [`Utf8Bytes`] buffer; otherwise this function will panic.
261 pub fn slice_ref(&self, subset: &str) -> Self {
262 // SAFETY:
263 // - subset: &str _and_ the forwarded call does the bounds checks
264 unsafe { Self::from_bytes_unchecked(self.inner().slice_ref(subset.as_bytes())) }
265 }
266
267 /// Splits the bytes into two at the given index.
268 ///
269 /// Afterwards `self` contains elements `[0, at)`, and the returned `Bytes`
270 /// contains elements `[at, len)`. It's guaranteed that the memory does not
271 /// move, that is, the address of `self` does not change, and the address of
272 /// the returned slice is `at` bytes after that.
273 ///
274 /// This is an `O(1)` operation that just increases the reference count and
275 /// sets a few indices.
276 ///
277 /// # Examples
278 ///
279 /// ```
280 /// use utf8_bytes::Utf8Bytes;
281 ///
282 /// let mut a = Utf8Bytes::from("hello world");
283 /// let b = a.split_off(5);
284 ///
285 /// assert_eq!(a, "hello");
286 /// assert_eq!(b, " world");
287 /// ```
288 ///
289 /// # Panics
290 ///
291 /// Panics if `at > len` or does not lie on a char boundary.
292 #[must_use = "consider Bytes::truncate if you don't need the other half"]
293 pub fn split_off(&mut self, at: usize) -> Self {
294 let _char_boundary = self.as_str().split_at(at);
295 // SAFETY:
296 // - checked boundary above
297 unsafe { Self::from_bytes_unchecked(self.inner_mut().split_off(at)) }
298 }
299
300 /// Splits the bytes into two at the given index.
301 ///
302 /// Afterwards `self` contains elements `[at, len)`, and the returned
303 /// `Bytes` contains elements `[0, at)`.
304 ///
305 /// This is an `O(1)` operation that just increases the reference count and
306 /// sets a few indices.
307 ///
308 /// # Examples
309 ///
310 /// ```
311 /// use utf8_bytes::Utf8Bytes;
312 ///
313 /// let mut a = Utf8Bytes::from("hello world");
314 /// let b = a.split_to(5);
315 ///
316 /// assert_eq!(a, " world");
317 /// assert_eq!(b, "hello");
318 /// ```
319 ///
320 /// # Panics
321 ///
322 /// Panics if `at > len` or does not lie on a char boundary.
323 #[must_use = "consider Bytes::advance if you don't need the other half"]
324 pub fn split_to(&mut self, at: usize) -> Self {
325 let _char_boundary = self.as_str().split_at(at);
326 // SAFETY:
327 // - checked boundary above
328 unsafe { Self::from_bytes_unchecked(self.inner_mut().split_to(at)) }
329 }
330
331 /// Shortens the buffer, keeping the first `len` bytes and dropping the
332 /// rest.
333 ///
334 /// If `len` is greater than the buffer's current length, this has no
335 /// effect.
336 ///
337 /// The [split_off](`Self::split_off()`) method can emulate `truncate`, but this causes the
338 /// excess bytes to be returned instead of dropped.
339 ///
340 /// # Examples
341 ///
342 /// ```
343 /// use utf8_bytes::Utf8Bytes;
344 ///
345 /// let mut buf = Utf8Bytes::from("hello world");
346 /// buf.truncate(5);
347 /// assert_eq!(buf, "hello");
348 /// ```
349 ///
350 /// # Panics
351 ///
352 /// If `len` does not lie on a char boundary.
353 #[inline]
354 pub fn truncate(&mut self, len: usize) {
355 if len < self.len() {
356 let _char_boundary = self.as_str().split_at(len);
357 // SAFETY:
358 // - checked char boundary above
359 unsafe { self.inner_mut().truncate(len) }
360 };
361 }
362
363 /// Clears the buffer, removing all data.
364 ///
365 /// # Examples
366 ///
367 /// ```
368 /// use utf8_bytes::Utf8Bytes;
369 ///
370 /// let mut buf = Utf8Bytes::from("hello world");
371 /// buf.clear();
372 /// assert!(buf.is_empty());
373 /// ```
374 #[inline]
375 pub fn clear(&mut self) {
376 self.truncate(0);
377 }
378
379 /// Try to convert self into `BytesMut`.
380 ///
381 /// If `self` is unique for the entire original buffer, this will succeed
382 /// and return a `BytesMut` with the contents of `self` without copying.
383 /// If `self` is not unique for the entire original buffer, this will fail
384 /// and return self.
385 ///
386 /// This will also always fail if the buffer was constructed via either
387 /// [from_owner](Bytes::from_owner) or [from_static](Bytes::from_static).
388 ///
389 /// # Examples
390 ///
391 /// ```
392 /// use utf8_bytes::{Utf8Bytes, Utf8BytesMut};
393 ///
394 /// let bytes = Utf8Bytes::from("hello".to_string());
395 /// assert_eq!(bytes.try_into_mut(), Ok(Utf8BytesMut::from("hello")));
396 /// ```
397 pub fn try_into_mut(self) -> Result<Utf8BytesMut, Utf8Bytes> {
398 match self.into_inner().try_into_mut() {
399 // SAFETY:
400 // - the bytes came from `self`
401 Ok(it) => Ok(unsafe { Utf8BytesMut::from_bytes_mut_unchecked(it) }),
402 Err(it) => Err(unsafe { Self::from_bytes_unchecked(it) }),
403 }
404 }
405}
406
407impl fmt::Debug for Utf8Bytes {
408 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
409 self.as_str().fmt(f)
410 }
411}
412
413impl fmt::Display for Utf8Bytes {
414 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
415 self.as_str().fmt(f)
416 }
417}
418
419impl Clone for Utf8Bytes {
420 #[inline]
421 fn clone(&self) -> Utf8Bytes {
422 unsafe { Self::from_bytes_unchecked(self.inner().clone()) }
423 }
424 fn clone_from(&mut self, source: &Self) {
425 self.inner().clone_from(&source.inner());
426 }
427}
428
429impl Deref for Utf8Bytes {
430 type Target = str;
431
432 #[inline]
433 fn deref(&self) -> &str {
434 self.as_str()
435 }
436}
437
438impl AsRef<str> for Utf8Bytes {
439 #[inline]
440 fn as_ref(&self) -> &str {
441 self.as_str()
442 }
443}
444
445impl AsRef<[u8]> for Utf8Bytes {
446 #[inline]
447 fn as_ref(&self) -> &[u8] {
448 self.as_str().as_bytes()
449 }
450}
451
452impl hash::Hash for Utf8Bytes {
453 fn hash<H>(&self, state: &mut H)
454 where
455 H: hash::Hasher,
456 {
457 self.as_str().hash(state);
458 }
459}
460
461impl Borrow<str> for Utf8Bytes {
462 fn borrow(&self) -> &str {
463 self.as_str()
464 }
465}
466
467impl FromIterator<char> for Utf8Bytes {
468 fn from_iter<T: IntoIterator<Item = char>>(into_iter: T) -> Self {
469 String::from_iter(into_iter).into()
470 }
471}
472
473// impl Eq
474
475impl<T: AsRef<str>> PartialEq<T> for Utf8Bytes {
476 fn eq(&self, other: &T) -> bool {
477 self.as_str() == other.as_ref()
478 }
479}
480
481impl<T: AsRef<str>> PartialOrd<T> for Utf8Bytes {
482 fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
483 self.as_str().partial_cmp(other.as_ref())
484 }
485}
486
487impl Ord for Utf8Bytes {
488 fn cmp(&self, other: &Utf8Bytes) -> cmp::Ordering {
489 self.as_str().cmp(other.as_str())
490 }
491}
492
493impl Eq for Utf8Bytes {}
494
495impl PartialEq<Utf8Bytes> for str {
496 fn eq(&self, other: &Utf8Bytes) -> bool {
497 self.eq(other.as_str())
498 }
499}
500impl PartialEq<Utf8Bytes> for String {
501 fn eq(&self, other: &Utf8Bytes) -> bool {
502 self.eq(other.as_str())
503 }
504}
505impl<'a> PartialEq<Utf8Bytes> for Cow<'a, str> {
506 fn eq(&self, other: &Utf8Bytes) -> bool {
507 self.eq(other.as_str())
508 }
509}
510
511impl PartialOrd<Utf8Bytes> for str {
512 fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
513 self.partial_cmp(other.as_str())
514 }
515}
516impl PartialOrd<Utf8Bytes> for String {
517 fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
518 self.as_str().partial_cmp(other.as_str())
519 }
520}
521impl PartialOrd<Utf8Bytes> for Cow<'_, str> {
522 fn partial_cmp(&self, other: &Utf8Bytes) -> Option<cmp::Ordering> {
523 (**self).partial_cmp(other.as_str())
524 }
525}
526
527// impl From
528
529impl Default for Utf8Bytes {
530 #[inline]
531 fn default() -> Utf8Bytes {
532 Utf8Bytes::new()
533 }
534}
535
536impl From<&'static str> for Utf8Bytes {
537 fn from(s: &'static str) -> Utf8Bytes {
538 Utf8Bytes::from_static(s)
539 }
540}
541
542impl From<Box<str>> for Utf8Bytes {
543 fn from(slice: Box<str>) -> Utf8Bytes {
544 unsafe { Self::from_bytes_unchecked(bytes::Bytes::from(slice.into_boxed_bytes())) }
545 }
546}
547
548impl From<Utf8Bytes> for bytes::Bytes {
549 fn from(utf8: Utf8Bytes) -> Self {
550 utf8.into_inner()
551 }
552}
553
554impl From<Utf8Bytes> for Utf8BytesMut {
555 /// Convert self into [`Utf8BytesMut`].
556 ///
557 /// If `bytes` is unique for the entire original buffer, this will return a
558 /// `BytesMut` with the contents of `bytes` without copying.
559 /// If `bytes` is not unique for the entire original buffer, this will make
560 /// a copy of `bytes` subset of the original buffer in a new `BytesMut`.
561 ///
562 /// # Examples
563 ///
564 /// ```
565 /// use utf8_bytes::{Utf8Bytes, Utf8BytesMut};
566 ///
567 /// let bytes = Utf8Bytes::copy_from_str("hello");
568 /// assert_eq!(Utf8BytesMut::from(bytes), "hello");
569 /// ```
570 fn from(bytes: Utf8Bytes) -> Self {
571 // SAFETY:
572 // - `bytes` is preserved.
573 unsafe { Self::from_bytes_mut_unchecked(bytes.into_inner().into()) }
574 }
575}
576
577impl From<String> for Utf8Bytes {
578 fn from(s: String) -> Utf8Bytes {
579 // SAFETY:
580 // - s contains UTF-8.
581 unsafe { Utf8Bytes::from_bytes_unchecked(bytes::Bytes::from(s.into_bytes())) }
582 }
583}
584
585impl From<Utf8Bytes> for Vec<u8> {
586 fn from(utf8: Utf8Bytes) -> Vec<u8> {
587 utf8.into_inner().into()
588 }
589}
590
591impl From<Utf8Bytes> for String {
592 fn from(utf8: Utf8Bytes) -> Self {
593 // SAFETY:
594 // - only contains UTF-8
595 unsafe { String::from_utf8_unchecked(utf8.into()) }
596 }
597}