bytestr/
lib.rs

1#![no_std]
2#![warn(
3    missing_docs,
4    missing_debug_implementations,
5    clippy::all,
6    clippy::style,
7    clippy::correctness,
8    clippy::complexity,
9    clippy::suspicious,
10    clippy::perf,
11    clippy::pedantic,
12    clippy::nursery,
13    clippy::cargo
14)]
15
16//! # `ByteStr`
17//!
18//! A zero-copy, cheaply cloneable, and sliceable immutable UTF-8 encoded string type.
19//!
20//! `ByteStr` is built on top of [`bytes::Bytes`] and provides a UTF-8 guaranteed string
21//! that can be cloned and sliced without additional allocations. This makes it perfect
22//! for high-performance network programming, parsing, and any scenario where you need
23//! efficient string manipulation.
24//!
25//! ## Examples
26//!
27//! ### Basic Usage
28//!
29//! ```rust
30//! use bytestr::ByteStr;
31//!
32//! // Create from static string (zero-cost)
33//! let static_str = ByteStr::from_static("Hello, world!");
34//!
35//! // Create from String (reuses allocation)
36//! let from_string = ByteStr::from("Hello, world!".to_string());
37//!
38//! // Create from bytes with validation
39//! let from_bytes = ByteStr::from_utf8(b"Hello, world!".to_vec()).unwrap();
40//!
41//! // All are equal
42//! assert_eq!(static_str, from_string);
43//! assert_eq!(from_string, from_bytes);
44//! ```
45//!
46//! ### Zero-Copy Operations
47//!
48//! ```rust
49//! use bytestr::ByteStr;
50//!
51//! let original = ByteStr::from_static("Hello, world!");
52//!
53//! // Cloning is O(1) - just increments reference count
54//! let cloned = original.clone();
55//!
56//! // Slicing is O(1) - creates a new view without copying
57//! let original_str = original.as_str();
58//! let slice = original.slice_ref(&original_str[7..12]); // "world"
59//!
60//! // Or use convenient indexing syntax
61//! let slice_by_index = &original[7..12]; // "world" (returns &str)
62//!
63//! assert_eq!(slice.as_str(), "world");
64//! assert_eq!(slice_by_index, "world");
65//! ```
66//!
67//! ### String Operations
68//!
69//! ```rust
70//! use bytestr::ByteStr;
71//!
72//! let s = ByteStr::from("Hello, 世界! 🦀");
73//!
74//! // All standard string operations work
75//! assert_eq!(s.len(), 19); // Byte length (not character count)
76//! assert!(s.starts_with("Hello"));
77//! assert!(s.contains("世界"));
78//! assert!(s.contains("🦀"));
79//! assert!(s.ends_with("🦀"));
80//! ```
81//!
82//! ### Zero-Copy Parsing
83//!
84//! `ByteStr` provides powerful parsing utilities that maintain zero-copy semantics:
85//!
86//! ```rust
87//! use bytestr::ByteStr;
88//!
89//! // HTTP request parsing
90//! let request = ByteStr::from("GET /api/users HTTP/1.1\r\nHost: example.com\r\n");
91//! let (request_line, headers) = request.split_once("\r\n").unwrap();
92//!
93//! let mut parts = request_line.split_whitespace();
94//! let method = parts.next().unwrap();     // "GET"
95//! let path = parts.next().unwrap();       // "/api/users"
96//! let version = parts.next().unwrap();    // "HTTP/1.1"
97//!
98//! // Configuration parsing
99//! let config = ByteStr::from("port=8080\nhost=localhost\n");
100//! for line in config.lines() {
101//!     if let Some((key, value)) = line.split_once("=") {
102//!         println!("{}={}", key.as_str(), value.as_str());
103//!     }
104//! }
105//!
106//! // Lexical analysis
107//! let code = ByteStr::from("let x = 42;");
108//! let (identifier, rest) = code.skip_while(|c| c.is_whitespace())
109//!                              .take_while(|c| c.is_alphabetic());
110//! assert_eq!(identifier.as_str(), "let");
111//! ```
112//!
113//! ## Optional Features
114//!
115//! ### Serde Support
116//!
117//! Enable the `serde` feature for serialization support:
118//!
119//! ```toml
120//! [dependencies]
121//! bytestr = { version = "0.2", features = ["serde"] }
122//! ```
123
124extern crate alloc;
125
126mod helper;
127mod impls;
128#[cfg(feature = "serde")]
129mod serde;
130use alloc::borrow::Cow;
131use alloc::string::{FromUtf16Error, String};
132use bytes::Bytes;
133use core::ops::Deref;
134use core::str::Utf8Error;
135
136/// A cheaply cloneable and sliceable immutable UTF-8 encoded string.
137#[derive(Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
138pub struct ByteStr(Bytes);
139
140impl ByteStr {
141    /// Creates an empty new `ByteStr`.
142    ///
143    /// This operation is very cheap as it doesn't allocate any memory.
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use bytestr::ByteStr;
149    ///
150    /// let s = ByteStr::new();
151    /// assert!(s.is_empty());
152    /// assert_eq!(s.len(), 0);
153    /// ```
154    #[must_use]
155    pub const fn new() -> Self {
156        Self(Bytes::new())
157    }
158
159    /// Converts a vector of bytes to a `ByteStr`.
160    ///
161    /// This method will reuse the existing allocation, so no cloning will happen.
162    /// The bytes are validated to ensure they form valid UTF-8.
163    ///
164    /// # Examples
165    ///
166    /// ```
167    /// use bytestr::ByteStr;
168    ///
169    /// // Valid UTF-8
170    /// let valid_bytes = b"Hello, world!".to_vec();
171    /// let s = ByteStr::from_utf8(valid_bytes).unwrap();
172    /// assert_eq!(s.as_str(), "Hello, world!");
173    ///
174    /// // Invalid UTF-8
175    /// let invalid_bytes = vec![0xFF, 0xFE, 0xFD];
176    /// assert!(ByteStr::from_utf8(invalid_bytes).is_err());
177    /// ```
178    ///
179    /// # Errors
180    ///
181    /// Returns an error if the provided bytes are not valid UTF-8.
182    pub fn from_utf8(bytes: impl Into<Bytes>) -> Result<Self, Utf8Error> {
183        let bytes = bytes.into();
184
185        match core::str::from_utf8(bytes.as_ref()) {
186            Ok(_) => Ok(unsafe { Self::from_utf8_unchecked(bytes) }),
187            Err(e) => Err(e),
188        }
189    }
190
191    /// Converts a vector of bytes to a `ByteStr`, replacing invalid UTF-8 sequences with the replacement character (U+FFFD).
192    ///
193    /// This method will reuse the existing allocation if the bytes are valid UTF-8, or allocate a new string if invalid sequences are found.
194    ///
195    /// # Examples
196    ///
197    /// ```
198    /// use bytestr::ByteStr;
199    ///
200    /// let valid_bytes = b"Hello, world!".to_vec();
201    /// let s = ByteStr::from_utf8_lossy(valid_bytes);
202    /// assert_eq!(s.as_str(), "Hello, world!");
203    ///
204    /// let invalid_bytes = vec![0xFF, 0xFE, 0xFD];
205    /// let s = ByteStr::from_utf8_lossy(invalid_bytes);
206    /// assert_eq!(s.as_str(), "\u{FFFD}\u{FFFD}\u{FFFD}");
207    /// ```
208    pub fn from_utf8_lossy(bytes: impl Into<Bytes>) -> Self {
209        let bytes = bytes.into();
210
211        match String::from_utf8_lossy(bytes.as_ref()) {
212            Cow::Borrowed(_) => unsafe { Self::from_utf8_unchecked(bytes) },
213            Cow::Owned(string) => Self::from(string),
214        }
215    }
216
217    /// Converts a slice of UTF-16 encoded data to a `ByteStr`.
218    ///
219    /// This method will allocate a new string and convert the UTF-16 data to UTF-8.
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use bytestr::ByteStr;
225    ///
226    /// let utf16: Vec<u16> = "Hello, world!".encode_utf16().collect();
227    /// let s = ByteStr::from_utf16(&utf16).unwrap();
228    /// assert_eq!(s.as_str(), "Hello, world!");
229    /// ```
230    ///
231    /// # Errors
232    ///
233    /// Returns an error if the provided UTF-16 data is not valid.
234    pub fn from_utf16(bytes: impl AsRef<[u16]>) -> Result<Self, FromUtf16Error> {
235        String::from_utf16(bytes.as_ref()).map(Self::from)
236    }
237
238    /// Converts a slice of UTF-16 encoded data to a `ByteStr`, replacing invalid sequences with the replacement character (U+FFFD).
239    ///
240    /// This method will allocate a new string and convert the UTF-16 data to UTF-8, replacing any invalid sequences.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// use bytestr::ByteStr;
246    ///
247    /// let utf16: Vec<u16> = vec![0xD800, 0xDC00, 0x0041]; // valid surrogate pair + 'A'
248    /// let s = ByteStr::from_utf16_lossy(&utf16);
249    /// assert!(s.as_str().contains('\u{FFFD}') || s.as_str().contains('A'));
250    /// ```
251    pub fn from_utf16_lossy(bytes: impl AsRef<[u16]>) -> Self {
252        String::from_utf16_lossy(bytes.as_ref()).into()
253    }
254
255    /// Creates a `ByteStr` from a static string slice.
256    ///
257    /// This is a zero-cost operation as it directly references the static data
258    /// without any allocation or copying.
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// use bytestr::ByteStr;
264    ///
265    /// let s = ByteStr::from_static("Hello, static world!");
266    /// assert_eq!(s.as_str(), "Hello, static world!");
267    /// ```
268    #[must_use]
269    pub const fn from_static(s: &'static str) -> Self {
270        unsafe { Self::from_utf8_unchecked(Bytes::from_static(s.as_bytes())) }
271    }
272
273    /// Creates a `ByteStr` from bytes without UTF-8 validation.
274    ///
275    /// # Examples
276    ///
277    /// ```
278    /// use bytestr::ByteStr;
279    /// use bytes::Bytes;
280    ///
281    /// let bytes = Bytes::from("Hello, world!");
282    /// let s = unsafe { ByteStr::from_utf8_unchecked(bytes) };
283    /// assert_eq!(s.as_str(), "Hello, world!");
284    /// ```
285    ///
286    /// # Safety
287    ///
288    /// This function is unsafe because it does not check that the bytes passed
289    /// to it are valid UTF-8. If this constraint is violated, it may cause
290    /// memory unsafety issues with future users of the `ByteStr`.
291    #[must_use]
292    pub const unsafe fn from_utf8_unchecked(bytes: Bytes) -> Self {
293        Self(bytes)
294    }
295    /// Unwraps the `ByteStr` into the inner `Bytes` object.
296    ///
297    /// This operation consumes the `ByteStr` and returns the underlying
298    /// `Bytes` without any copying.
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use bytestr::ByteStr;
304    ///
305    /// let s = ByteStr::from("Hello, world!");
306    /// let bytes = s.into_bytes();
307    /// assert_eq!(bytes.as_ref(), b"Hello, world!");
308    /// ```
309    pub fn into_bytes(self) -> Bytes {
310        self.0
311    }
312
313    /// Extracts a string slice containing the entire string.
314    ///
315    /// # Examples
316    ///
317    /// ```
318    /// use bytestr::ByteStr;
319    ///
320    /// let s = ByteStr::from("Hello, world!");
321    /// assert_eq!(s.as_str(), "Hello, world!");
322    /// ```
323    pub fn as_str(&self) -> &str {
324        unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
325    }
326
327    /// Shortens the string, keeping the first `len` bytes and dropping the rest.
328    ///
329    /// # Panics
330    ///
331    /// Panics if the position of dropping the rest is not on a UTF-8 code point boundary, or if it is beyond the last code point.
332    pub fn truncate(&mut self, len: usize) {
333        if len < self.len() {
334            assert!(self.deref().is_char_boundary(len));
335            unsafe {
336                self.as_bytes_mut().truncate(len);
337            }
338        }
339    }
340
341    /// Returns a slice of self that is equivalent to the given subset.
342    ///
343    /// This operation creates a new `ByteStr` that references a subset of the
344    /// original data without copying. The subset must be a slice of the original
345    /// string that lies on UTF-8 character boundaries.
346    ///
347    /// # Examples
348    ///
349    /// ```
350    /// use bytestr::ByteStr;
351    ///
352    /// let s = ByteStr::from("Hello, world!");
353    /// let original_str = s.as_str();
354    /// let world_slice = &original_str[7..12]; // "world"
355    /// let sliced = s.slice_ref(world_slice);
356    /// assert_eq!(sliced.as_str(), "world");
357    /// ```
358    ///
359    /// # Panics
360    ///
361    /// Panics if the given `subset` is not contained within the `ByteStr`.
362    #[must_use]
363    pub fn slice_ref(&self, subset: &str) -> Self {
364        unsafe { Self::from_utf8_unchecked(self.0.slice_ref(subset.as_bytes())) }
365    }
366
367    /// Removes all contents of the `ByteStr` while retaining the capacity.
368    ///
369    /// # Examples
370    ///
371    /// ```
372    /// use bytestr::ByteStr;
373    ///
374    /// let mut s = ByteStr::from("Hello, world!");
375    /// assert!(!s.is_empty());
376    /// s.clear();
377    /// assert!(s.is_empty());
378    /// ```
379    pub fn clear(&mut self) {
380        self.0.clear();
381    }
382
383    /// Provides a reference to the inner `Bytes` object.
384    ///
385    /// # Examples
386    ///
387    /// ```
388    /// use bytestr::ByteStr;
389    ///
390    /// let s = ByteStr::from("Hello, world!");
391    /// let bytes = s.as_bytes();
392    /// assert_eq!(bytes.len(), 13);
393    /// ```
394    pub const fn as_bytes(&self) -> &Bytes {
395        &self.0
396    }
397
398    /// Provides a mutable reference to the inner `Bytes` object.
399    ///
400    /// # Examples
401    ///
402    /// ```
403    /// use bytestr::ByteStr;
404    ///
405    /// let mut s = ByteStr::from("Hello, world!");
406    /// unsafe {
407    ///     let bytes_mut = s.as_bytes_mut();
408    ///     // Careful: ensure any modifications maintain UTF-8 validity
409    /// }
410    /// ```
411    ///
412    /// # Safety
413    ///
414    /// The caller must ensure that the content of the slice is valid UTF-8
415    /// before the borrow ends and the `ByteStr` is used.
416    pub const unsafe fn as_bytes_mut(&mut self) -> &mut Bytes {
417        &mut self.0
418    }
419
420    /// Returns `true` if the `ByteStr` has a length of zero bytes.
421    ///
422    /// # Examples
423    ///
424    /// ```
425    /// use bytestr::ByteStr;
426    ///
427    /// let empty = ByteStr::new();
428    /// assert!(empty.is_empty());
429    ///
430    /// let non_empty = ByteStr::from("hello");
431    /// assert!(!non_empty.is_empty());
432    /// ```
433    #[must_use]
434    pub const fn is_empty(&self) -> bool {
435        self.0.is_empty()
436    }
437
438    /// Returns the length of this `ByteStr` in bytes.
439    ///
440    /// # Examples
441    ///
442    /// ```
443    /// use bytestr::ByteStr;
444    ///
445    /// let s = ByteStr::from("Hello, 世界!");
446    /// assert_eq!(s.len(), 14);
447    /// ```
448    #[must_use]
449    pub const fn len(&self) -> usize {
450        self.0.len()
451    }
452
453    /// Returns the capacity of this `ByteStr` in bytes.
454    ///
455    /// The capacity represents the total amount of memory allocated
456    /// for this `ByteStr`, which may be larger than the length.
457    ///
458    /// # Examples
459    ///
460    /// ```
461    /// use bytestr::ByteStr;
462    ///
463    /// let s = ByteStr::from("Hello!");
464    /// assert!(s.capacity() >= s.len());
465    /// ```
466    #[must_use]
467    pub const fn capacity(&self) -> usize {
468        // Bytes doesn't expose capacity directly, but we can use len() as a reasonable approximation
469        // since Bytes manages memory efficiently
470        self.0.len()
471    }
472}
473
474#[cfg(test)]
475mod tests;
bytestr/lib.rs

bytestr/
lib.rs