bytestr/lib.rs
1#![no_std]
2#![warn(
3 missing_docs,
4 missing_debug_implementations,
5 clippy::all,
6 clippy::style,
7 clippy::correctness,
8 clippy::complexity,
9 clippy::suspicious,
10 clippy::perf,
11 clippy::pedantic,
12 clippy::nursery,
13 clippy::cargo
14)]
15
16//! # `ByteStr`
17//!
18//! A zero-copy, cheaply cloneable, and sliceable immutable UTF-8 encoded string type.
19//!
20//! `ByteStr` is built on top of [`bytes::Bytes`] and provides a UTF-8 guaranteed string
21//! that can be cloned and sliced without additional allocations. This makes it perfect
22//! for high-performance network programming, parsing, and any scenario where you need
23//! efficient string manipulation.
24//!
25//! ## Examples
26//!
27//! ### Basic Usage
28//!
29//! ```rust
30//! use bytestr::ByteStr;
31//!
32//! // Create from static string (zero-cost)
33//! let static_str = ByteStr::from_static("Hello, world!");
34//!
35//! // Create from String (reuses allocation)
36//! let from_string = ByteStr::from("Hello, world!".to_string());
37//!
38//! // Create from bytes with validation
39//! let from_bytes = ByteStr::from_utf8(b"Hello, world!".to_vec()).unwrap();
40//!
41//! // All are equal
42//! assert_eq!(static_str, from_string);
43//! assert_eq!(from_string, from_bytes);
44//! ```
45//!
46//! ### Zero-Copy Operations
47//!
48//! ```rust
49//! use bytestr::ByteStr;
50//!
51//! let original = ByteStr::from_static("Hello, world!");
52//!
53//! // Cloning is O(1) - just increments reference count
54//! let cloned = original.clone();
55//!
56//! // Slicing is O(1) - creates a new view without copying
57//! let original_str = original.as_str();
58//! let slice = original.slice_ref(&original_str[7..12]); // "world"
59//!
60//! // Or use convenient indexing syntax
61//! let slice_by_index = &original[7..12]; // "world" (returns &str)
62//!
63//! assert_eq!(slice.as_str(), "world");
64//! assert_eq!(slice_by_index, "world");
65//! ```
66//!
67//! ### String Operations
68//!
69//! ```rust
70//! use bytestr::ByteStr;
71//!
72//! let s = ByteStr::from("Hello, δΈη! π¦");
73//!
74//! // All standard string operations work
75//! assert_eq!(s.len(), 19); // Byte length (not character count)
76//! assert!(s.starts_with("Hello"));
77//! assert!(s.contains("δΈη"));
78//! assert!(s.contains("π¦"));
79//! assert!(s.ends_with("π¦"));
80//! ```
81//!
82//! ### Zero-Copy Parsing
83//!
84//! `ByteStr` provides powerful parsing utilities that maintain zero-copy semantics:
85//!
86//! ```rust
87//! use bytestr::ByteStr;
88//!
89//! // HTTP request parsing
90//! let request = ByteStr::from("GET /api/users HTTP/1.1\r\nHost: example.com\r\n");
91//! let (request_line, headers) = request.split_once("\r\n").unwrap();
92//!
93//! let mut parts = request_line.split_whitespace();
94//! let method = parts.next().unwrap(); // "GET"
95//! let path = parts.next().unwrap(); // "/api/users"
96//! let version = parts.next().unwrap(); // "HTTP/1.1"
97//!
98//! // Configuration parsing
99//! let config = ByteStr::from("port=8080\nhost=localhost\n");
100//! for line in config.lines() {
101//! if let Some((key, value)) = line.split_once("=") {
102//! println!("{}={}", key.as_str(), value.as_str());
103//! }
104//! }
105//!
106//! // Lexical analysis
107//! let code = ByteStr::from("let x = 42;");
108//! let (identifier, rest) = code.skip_while(|c| c.is_whitespace())
109//! .take_while(|c| c.is_alphabetic());
110//! assert_eq!(identifier.as_str(), "let");
111//! ```
112//!
113//! ## Optional Features
114//!
115//! ### Serde Support
116//!
117//! Enable the `serde` feature for serialization support:
118//!
119//! ```toml
120//! [dependencies]
121//! bytestr = { version = "0.2", features = ["serde"] }
122//! ```
123
124extern crate alloc;
125
126mod helper;
127mod impls;
128#[cfg(feature = "serde")]
129mod serde;
130use alloc::borrow::Cow;
131use alloc::string::{FromUtf16Error, String};
132use bytes::Bytes;
133use core::ops::Deref;
134use core::str::Utf8Error;
135
136/// A cheaply cloneable and sliceable immutable UTF-8 encoded string.
137#[derive(Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
138pub struct ByteStr(Bytes);
139
140impl ByteStr {
141 /// Creates an empty new `ByteStr`.
142 ///
143 /// This operation is very cheap as it doesn't allocate any memory.
144 ///
145 /// # Examples
146 ///
147 /// ```
148 /// use bytestr::ByteStr;
149 ///
150 /// let s = ByteStr::new();
151 /// assert!(s.is_empty());
152 /// assert_eq!(s.len(), 0);
153 /// ```
154 #[must_use]
155 pub const fn new() -> Self {
156 Self(Bytes::new())
157 }
158
159 /// Converts a vector of bytes to a `ByteStr`.
160 ///
161 /// This method will reuse the existing allocation, so no cloning will happen.
162 /// The bytes are validated to ensure they form valid UTF-8.
163 ///
164 /// # Examples
165 ///
166 /// ```
167 /// use bytestr::ByteStr;
168 ///
169 /// // Valid UTF-8
170 /// let valid_bytes = b"Hello, world!".to_vec();
171 /// let s = ByteStr::from_utf8(valid_bytes).unwrap();
172 /// assert_eq!(s.as_str(), "Hello, world!");
173 ///
174 /// // Invalid UTF-8
175 /// let invalid_bytes = vec![0xFF, 0xFE, 0xFD];
176 /// assert!(ByteStr::from_utf8(invalid_bytes).is_err());
177 /// ```
178 ///
179 /// # Errors
180 ///
181 /// Returns an error if the provided bytes are not valid UTF-8.
182 pub fn from_utf8(bytes: impl Into<Bytes>) -> Result<Self, Utf8Error> {
183 let bytes = bytes.into();
184
185 match core::str::from_utf8(bytes.as_ref()) {
186 Ok(_) => Ok(unsafe { Self::from_utf8_unchecked(bytes) }),
187 Err(e) => Err(e),
188 }
189 }
190
191 /// Converts a vector of bytes to a `ByteStr`, replacing invalid UTF-8 sequences with the replacement character (U+FFFD).
192 ///
193 /// This method will reuse the existing allocation if the bytes are valid UTF-8, or allocate a new string if invalid sequences are found.
194 ///
195 /// # Examples
196 ///
197 /// ```
198 /// use bytestr::ByteStr;
199 ///
200 /// let valid_bytes = b"Hello, world!".to_vec();
201 /// let s = ByteStr::from_utf8_lossy(valid_bytes);
202 /// assert_eq!(s.as_str(), "Hello, world!");
203 ///
204 /// let invalid_bytes = vec![0xFF, 0xFE, 0xFD];
205 /// let s = ByteStr::from_utf8_lossy(invalid_bytes);
206 /// assert_eq!(s.as_str(), "\u{FFFD}\u{FFFD}\u{FFFD}");
207 /// ```
208 pub fn from_utf8_lossy(bytes: impl Into<Bytes>) -> Self {
209 let bytes = bytes.into();
210
211 match String::from_utf8_lossy(bytes.as_ref()) {
212 Cow::Borrowed(_) => unsafe { Self::from_utf8_unchecked(bytes) },
213 Cow::Owned(string) => Self::from(string),
214 }
215 }
216
217 /// Converts a slice of UTF-16 encoded data to a `ByteStr`.
218 ///
219 /// This method will allocate a new string and convert the UTF-16 data to UTF-8.
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// use bytestr::ByteStr;
225 ///
226 /// let utf16: Vec<u16> = "Hello, world!".encode_utf16().collect();
227 /// let s = ByteStr::from_utf16(&utf16).unwrap();
228 /// assert_eq!(s.as_str(), "Hello, world!");
229 /// ```
230 ///
231 /// # Errors
232 ///
233 /// Returns an error if the provided UTF-16 data is not valid.
234 pub fn from_utf16(bytes: impl AsRef<[u16]>) -> Result<Self, FromUtf16Error> {
235 String::from_utf16(bytes.as_ref()).map(Self::from)
236 }
237
238 /// Converts a slice of UTF-16 encoded data to a `ByteStr`, replacing invalid sequences with the replacement character (U+FFFD).
239 ///
240 /// This method will allocate a new string and convert the UTF-16 data to UTF-8, replacing any invalid sequences.
241 ///
242 /// # Examples
243 ///
244 /// ```
245 /// use bytestr::ByteStr;
246 ///
247 /// let utf16: Vec<u16> = vec![0xD800, 0xDC00, 0x0041]; // valid surrogate pair + 'A'
248 /// let s = ByteStr::from_utf16_lossy(&utf16);
249 /// assert!(s.as_str().contains('\u{FFFD}') || s.as_str().contains('A'));
250 /// ```
251 pub fn from_utf16_lossy(bytes: impl AsRef<[u16]>) -> Self {
252 String::from_utf16_lossy(bytes.as_ref()).into()
253 }
254
255 /// Creates a `ByteStr` from a static string slice.
256 ///
257 /// This is a zero-cost operation as it directly references the static data
258 /// without any allocation or copying.
259 ///
260 /// # Examples
261 ///
262 /// ```
263 /// use bytestr::ByteStr;
264 ///
265 /// let s = ByteStr::from_static("Hello, static world!");
266 /// assert_eq!(s.as_str(), "Hello, static world!");
267 /// ```
268 #[must_use]
269 pub const fn from_static(s: &'static str) -> Self {
270 unsafe { Self::from_utf8_unchecked(Bytes::from_static(s.as_bytes())) }
271 }
272
273 /// Creates a `ByteStr` from bytes without UTF-8 validation.
274 ///
275 /// # Examples
276 ///
277 /// ```
278 /// use bytestr::ByteStr;
279 /// use bytes::Bytes;
280 ///
281 /// let bytes = Bytes::from("Hello, world!");
282 /// let s = unsafe { ByteStr::from_utf8_unchecked(bytes) };
283 /// assert_eq!(s.as_str(), "Hello, world!");
284 /// ```
285 ///
286 /// # Safety
287 ///
288 /// This function is unsafe because it does not check that the bytes passed
289 /// to it are valid UTF-8. If this constraint is violated, it may cause
290 /// memory unsafety issues with future users of the `ByteStr`.
291 #[must_use]
292 pub const unsafe fn from_utf8_unchecked(bytes: Bytes) -> Self {
293 Self(bytes)
294 }
295 /// Unwraps the `ByteStr` into the inner `Bytes` object.
296 ///
297 /// This operation consumes the `ByteStr` and returns the underlying
298 /// `Bytes` without any copying.
299 ///
300 /// # Examples
301 ///
302 /// ```
303 /// use bytestr::ByteStr;
304 ///
305 /// let s = ByteStr::from("Hello, world!");
306 /// let bytes = s.into_bytes();
307 /// assert_eq!(bytes.as_ref(), b"Hello, world!");
308 /// ```
309 pub fn into_bytes(self) -> Bytes {
310 self.0
311 }
312
313 /// Extracts a string slice containing the entire string.
314 ///
315 /// # Examples
316 ///
317 /// ```
318 /// use bytestr::ByteStr;
319 ///
320 /// let s = ByteStr::from("Hello, world!");
321 /// assert_eq!(s.as_str(), "Hello, world!");
322 /// ```
323 pub fn as_str(&self) -> &str {
324 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
325 }
326
327 /// Shortens the string, keeping the first `len` bytes and dropping the rest.
328 ///
329 /// # Panics
330 ///
331 /// Panics if the position of dropping the rest is not on a UTF-8 code point boundary, or if it is beyond the last code point.
332 pub fn truncate(&mut self, len: usize) {
333 if len < self.len() {
334 assert!(self.deref().is_char_boundary(len));
335 unsafe {
336 self.as_bytes_mut().truncate(len);
337 }
338 }
339 }
340
341 /// Returns a slice of self that is equivalent to the given subset.
342 ///
343 /// This operation creates a new `ByteStr` that references a subset of the
344 /// original data without copying. The subset must be a slice of the original
345 /// string that lies on UTF-8 character boundaries.
346 ///
347 /// # Examples
348 ///
349 /// ```
350 /// use bytestr::ByteStr;
351 ///
352 /// let s = ByteStr::from("Hello, world!");
353 /// let original_str = s.as_str();
354 /// let world_slice = &original_str[7..12]; // "world"
355 /// let sliced = s.slice_ref(world_slice);
356 /// assert_eq!(sliced.as_str(), "world");
357 /// ```
358 ///
359 /// # Panics
360 ///
361 /// Panics if the given `subset` is not contained within the `ByteStr`.
362 #[must_use]
363 pub fn slice_ref(&self, subset: &str) -> Self {
364 unsafe { Self::from_utf8_unchecked(self.0.slice_ref(subset.as_bytes())) }
365 }
366
367 /// Removes all contents of the `ByteStr` while retaining the capacity.
368 ///
369 /// # Examples
370 ///
371 /// ```
372 /// use bytestr::ByteStr;
373 ///
374 /// let mut s = ByteStr::from("Hello, world!");
375 /// assert!(!s.is_empty());
376 /// s.clear();
377 /// assert!(s.is_empty());
378 /// ```
379 pub fn clear(&mut self) {
380 self.0.clear();
381 }
382
383 /// Provides a reference to the inner `Bytes` object.
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use bytestr::ByteStr;
389 ///
390 /// let s = ByteStr::from("Hello, world!");
391 /// let bytes = s.as_bytes();
392 /// assert_eq!(bytes.len(), 13);
393 /// ```
394 pub const fn as_bytes(&self) -> &Bytes {
395 &self.0
396 }
397
398 /// Provides a mutable reference to the inner `Bytes` object.
399 ///
400 /// # Examples
401 ///
402 /// ```
403 /// use bytestr::ByteStr;
404 ///
405 /// let mut s = ByteStr::from("Hello, world!");
406 /// unsafe {
407 /// let bytes_mut = s.as_bytes_mut();
408 /// // Careful: ensure any modifications maintain UTF-8 validity
409 /// }
410 /// ```
411 ///
412 /// # Safety
413 ///
414 /// The caller must ensure that the content of the slice is valid UTF-8
415 /// before the borrow ends and the `ByteStr` is used.
416 pub const unsafe fn as_bytes_mut(&mut self) -> &mut Bytes {
417 &mut self.0
418 }
419
420 /// Returns `true` if the `ByteStr` has a length of zero bytes.
421 ///
422 /// # Examples
423 ///
424 /// ```
425 /// use bytestr::ByteStr;
426 ///
427 /// let empty = ByteStr::new();
428 /// assert!(empty.is_empty());
429 ///
430 /// let non_empty = ByteStr::from("hello");
431 /// assert!(!non_empty.is_empty());
432 /// ```
433 #[must_use]
434 pub const fn is_empty(&self) -> bool {
435 self.0.is_empty()
436 }
437
438 /// Returns the length of this `ByteStr` in bytes.
439 ///
440 /// # Examples
441 ///
442 /// ```
443 /// use bytestr::ByteStr;
444 ///
445 /// let s = ByteStr::from("Hello, δΈη!");
446 /// assert_eq!(s.len(), 14);
447 /// ```
448 #[must_use]
449 pub const fn len(&self) -> usize {
450 self.0.len()
451 }
452
453 /// Returns the capacity of this `ByteStr` in bytes.
454 ///
455 /// The capacity represents the total amount of memory allocated
456 /// for this `ByteStr`, which may be larger than the length.
457 ///
458 /// # Examples
459 ///
460 /// ```
461 /// use bytestr::ByteStr;
462 ///
463 /// let s = ByteStr::from("Hello!");
464 /// assert!(s.capacity() >= s.len());
465 /// ```
466 #[must_use]
467 pub const fn capacity(&self) -> usize {
468 // Bytes doesn't expose capacity directly, but we can use len() as a reasonable approximation
469 // since Bytes manages memory efficiently
470 self.0.len()
471 }
472}
473
474#[cfg(test)]
475mod tests;