token_string/
string.rs

1// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
2// SPDX-License-Identifier: MPL-2.0
3//
4// Project:  token-string
5// File:     string.rs
6// Date:     22.Nov.2024
7// =============================================================================
8//! The string type [`TokenString`].
9
10extern crate alloc;
11
12use alloc::string::ToString as _;
13use alloc::vec;
14use core::{borrow, cmp, fmt, hash, mem, ops, panic, slice, str};
15
16use crate::{StringPtr, TkStrError};
17
18/// The length of the prefix of the string, that is, the first bytes stored
19/// in the field `prefix` for comparisons.
20pub const PREFIX_LENGTH: usize = mem::size_of::<u64>() - mem::size_of::<u16>();
21
22/// Helper constant for matching intervals.
23const PREFIX_LENGTH_ADD1: usize = PREFIX_LENGTH + 1;
24
25/// The length of the non-prefix part of a "small string", 8 bytes. The content
26/// of the field `_d.small`.
27pub const SMALL_DATA_LENGTH: usize = mem::size_of::<u64>();
28
29/// The maximum length in bytes, not Unicode scalar values, of a "small" string
30/// that is saved in the struct [`TokenString`] itself and not on the heap.
31pub const MAX_LENGTH_SMALL: usize = PREFIX_LENGTH + SMALL_DATA_LENGTH;
32
33/// Helper constant for matching intervals.
34pub const MAX_LENGTH_SMALL_ADD1: usize = MAX_LENGTH_SMALL + 1;
35
36/// The maximum length in bytes, not Unicode scalar values, of a
37/// [`TokenString`].
38pub const MAX_LENGTH: usize = u16::MAX as usize;
39
40/// A string which can hold at most [`MAX_LENGTH`] bytes (not Unicode scalar
41/// values).
42///
43/// This holds valid UTF-8 encoded strings only.
44/// Strings that are short enough, which need at most [`MAX_LENGTH_SMALL`]
45/// bytes, are stored in the struct itself, bigger ones use the heap.
46///
47/// # Invariant
48///
49/// - [`TokenString`] must be a UTF-8 string (like &[`prim@str`] and
50///   [`alloc::string::String`]).
51/// - The length of a [`TokenString`] is at most [`MAX_LENGTH`] and at least 0 -
52///   the empty string.
53#[repr(C)]
54pub struct TokenString {
55	/// The length of the string.
56	///
57	/// Maximum: [`MAX_LENGTH`].
58	pub(crate) len: u16,
59	/// The first [`PREFIX_LENGTH`] bytes of the string.
60	pub(crate) prefix: [u8; PREFIX_LENGTH],
61	/// The data (see [`Data`]).
62	///
63	/// If the string is at most [`MAX_LENGTH_SMALL`] bytes, this holds the
64	/// other bytes of the string, else this is a pointer to the heap.
65	pub(crate) u: Data,
66}
67
68
69// Invariants: [`TokenString`] must be aligned to 64 bits and its size must be
70// 128 bits. That means that `sizeof len + prefix == 64 bit` and
71// `sizeof u == 64 bit`. So there is no padding.
72
73const _: () = assert!(
74	mem::align_of::<TokenString>() == mem::size_of::<u64>(),
75	"struct TokenString is not aligned to 64 bits!"
76);
77const _: () = assert!(
78	mem::size_of::<TokenString>() == 2 * mem::size_of::<u64>(),
79	"struct TokenString has size != 128 bits"
80);
81const _: () = assert!(
82	mem::align_of::<Data>() == mem::size_of::<u64>(),
83	"struct Data is not aligned to 64 bits!"
84);
85const _: () = assert!(
86	mem::size_of::<Data>() == mem::size_of::<u64>(),
87	"union Data has size != 64 bits"
88);
89
90// =============================================================================
91// Inner types of `TokenString`.
92
93/// This is either a pointer to the string, if the string is bigger than
94/// [`SMALL_DATA_LENGTH`] bytes, or a pointer to a string as an array of bytes.
95///
96/// See [`StringPtr`]
97#[repr(C)]
98pub union Data {
99	/// If the string is small enough (at most [`MAX_LENGTH_SMALL`]), its data
100	/// after the prefix is here.
101	pub(crate) small: [u8; SMALL_DATA_LENGTH],
102	/// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory
103	/// holding the whole string.
104	pub(crate) ptr: mem::ManuallyDrop<StringPtr>,
105}
106
107// =============================================================================
108// `TokenString` itself
109
110/// The empty string.
111///
112/// Has a length of zero.
113pub const EMPTY: TokenString = TokenString {
114	len: 0,
115	prefix: [0_u8; PREFIX_LENGTH],
116	u: Data {
117		small: [0_u8; SMALL_DATA_LENGTH],
118	},
119};
120
121// =============================================================================
122// Traits
123
124impl TryFrom<&str> for TokenString {
125	type Error = TkStrError;
126
127	/// Create a [`TokenString`] from a &[`prim@str`].
128	///
129	/// Return [`TkStrError::TooBig`] if the argument is greater than
130	/// [`MAX_LENGTH`].
131	///
132	/// Memory:
133	///
134	/// Allocates if and only if the length of `value` is bigger than
135	/// [`MAX_LENGTH_SMALL`].
136	fn try_from(value: &str) -> Result<Self, Self::Error> {
137		let bytes = value.as_bytes();
138		match value.len() {
139			| 0 => Ok(Self {
140				len: 0,
141				prefix: [0_u8; PREFIX_LENGTH],
142				u: Data {
143					small: [0_u8; SMALL_DATA_LENGTH],
144				},
145			}),
146			| 1 ..= PREFIX_LENGTH => {
147				let s = value.len();
148				let mut prefix = [0_u8; PREFIX_LENGTH];
149				prefix[.. s].copy_from_slice(&bytes[.. s]);
150				Ok(Self {
151					#[expect(
152						clippy::cast_possible_truncation,
153						reason = "Length has been checked above"
154					)]
155					len: s as u16,
156					prefix,
157					u: Data {
158						small: [0_u8; SMALL_DATA_LENGTH],
159					},
160				})
161			}
162			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
163				let s = value.len();
164				let mut prefix = [0_u8; PREFIX_LENGTH];
165				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
166				let mut small = [0_u8; SMALL_DATA_LENGTH];
167				small[.. s - PREFIX_LENGTH]
168					.copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
169				Ok(Self {
170					#[expect(
171						clippy::cast_possible_truncation,
172						reason = "Length has been checked above"
173					)]
174					len: s as u16,
175					prefix,
176					u: Data { small },
177				})
178			}
179			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
180				let ptr = StringPtr::from(bytes);
181				let u = Data {
182					ptr: mem::ManuallyDrop::new(ptr),
183				};
184				let mut prefix = [0_u8; PREFIX_LENGTH];
185				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
186				Ok(Self {
187					#[expect(
188						clippy::cast_possible_truncation,
189						reason = "Length has been checked above"
190					)]
191					len: value.len() as u16,
192					prefix,
193					u,
194				})
195			}
196			| _ => Err(TkStrError::TooBig(value.len())),
197		}
198	}
199}
200
201impl TryFrom<&[u8]> for TokenString {
202	type Error = TkStrError;
203
204	/// Try to create a [`TokenString`] from the given slice.
205	///
206	/// Return [`TkStrError::TooBig`] if the given slice is too big, greater
207	/// than [`MAX_LENGTH`].
208	/// Return [`TkStrError::UnicodeError`]
209	///
210	/// Memory:
211	///
212	/// Allocates if and only if the length of `value` is bigger than
213	/// [`MAX_LENGTH_SMALL`].
214	#[inline]
215	fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
216		match str::from_utf8(value) {
217			| Ok(str) => Self::try_from(str),
218			| Err(utf_err) => Err(TkStrError::UnicodeError(utf_err)),
219		}
220	}
221}
222
223impl TryFrom<&[char]> for TokenString {
224	type Error = TkStrError;
225
226	/// Try to create a [`TokenString`] from the given slice.
227	///
228	/// Return [`TkStrError::TooBig`] if the given slice is too big, greater
229	/// than [`MAX_LENGTH`].
230	///
231	/// Memory
232	///
233	/// Allocates and deallocates a temporary [`alloc::string::String`]
234	/// collecting the converted bytes.
235	#[inline]
236	fn try_from(value: &[char]) -> Result<Self, Self::Error> {
237		let i = value.iter();
238		Self::try_from(i.collect::<alloc::string::String>())
239	}
240}
241
242impl TryFrom<&alloc::string::String> for TokenString {
243	type Error = TkStrError;
244
245	/// Create a `TokenString` from a &[`alloc::string::String`].
246	///
247	/// Return [`TkStrError::TooBig`] if the argument is greater than
248	/// [`MAX_LENGTH`].
249	///
250	/// Memory:
251	///
252	/// Allocates if and only if the length of `value` is bigger than
253	/// [`MAX_LENGTH_SMALL`].
254	#[inline]
255	fn try_from(value: &alloc::string::String) -> Result<Self, Self::Error> {
256		let str = value.as_str();
257		Self::try_from(str)
258	}
259}
260
261impl TryFrom<alloc::string::String> for TokenString {
262	type Error = TkStrError;
263
264	/// Create a [`TokenString`] from a [`alloc::string::String`].
265	///
266	/// Return [`TkStrError::TooBig`] if the argument is greater than
267	/// [`MAX_LENGTH`].
268	///
269	/// Memory:
270	///
271	/// Allocates if and only if the length of `value` is bigger than
272	/// [`MAX_LENGTH_SMALL`].
273	#[inline]
274	fn try_from(value: alloc::string::String) -> Result<Self, Self::Error> {
275		// Sadly we can't use the string's data directly, as a [`String`] has a
276		// capacity which is to be known when deallocating the data.
277		// See [`String::into_raw_parts`].
278		let str = value.as_str();
279		Self::try_from(str)
280	}
281}
282
283impl Drop for TokenString {
284	#[cfg_attr(test, mutants::skip)]
285	#[inline]
286	fn drop(&mut self) {
287		if usize::from(self.len) > MAX_LENGTH_SMALL {
288			// SAFETY:
289			// We know that there is a pointer saved in the union.
290			// The whole string is being dropped, so taking a mutable
291			// reference of the pointer is legal.
292			let mut m_ptr = unsafe { mem::ManuallyDrop::take(&mut self.u.ptr) };
293			m_ptr.drop_manually(self.len.into());
294		}
295	}
296}
297
298impl Clone for TokenString {
299	/// Return a clone of the [`TokenString`].
300	///
301	/// Memory:
302	///
303	/// Allocates if and only if the length of `value` is bigger than
304	/// [`MAX_LENGTH_SMALL`].
305	#[inline]
306	fn clone(&self) -> Self {
307		let u = if self.len as usize > MAX_LENGTH_SMALL {
308			Data {
309				// SAFETY:
310				// We check, that there is an allocated pointer saved in the
311				// union.
312				ptr: mem::ManuallyDrop::new(unsafe {
313					self.u.ptr.clone_manually(self.len.into())
314				}),
315			}
316		} else {
317			Data {
318				// SAFETY:
319				// We check, that there is a small string in the union.
320				small: unsafe { self.u.small },
321			}
322		};
323		Self {
324			len: self.len,
325			prefix: self.prefix,
326			u,
327		}
328	}
329}
330
331impl Default for TokenString {
332	/// Return the empty string.
333	#[inline]
334	fn default() -> Self {
335		EMPTY
336	}
337}
338
339impl Eq for TokenString {}
340
341impl PartialEq for TokenString {
342	#[inline]
343	fn eq(&self, other: &Self) -> bool {
344		if self.len != other.len || self.prefix != other.prefix {
345			return false;
346		}
347
348		if self.len as usize <= MAX_LENGTH_SMALL {
349			// SAFETY:
350			// We know we have two small strings to compare.
351			unsafe { self.u.small == other.u.small }
352		} else {
353			// SAFETY:
354			// We know we have two string pointers to compare.
355			unsafe { self.u.ptr.eq_manually(&other.u.ptr, self.len.into()) }
356		}
357	}
358}
359
360impl PartialEq<[u8]> for TokenString {
361	fn eq(&self, other: &[u8]) -> bool {
362		if self.len as usize != other.len() {
363			return false;
364		}
365		let len = self.len as usize;
366		match len {
367			| 0 => true,
368			| 1 ..= PREFIX_LENGTH => self.prefix[.. len] == other[.. len],
369			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
370				// SAFETY:
371				// Use the whole memory region of self.`prefix` and
372				// `self.u.small` as a single array. This is not UB, as the
373				// whole memory `TokenString` has been allocated at once and
374				// is guaranteed to be continuous in memory. If Miri
375				// complains about this, use the flag `MIRIFLAGS="
376				// -Zmiri-tree-borrows"` to use "tree borrows" instead of
377				// "stacked borrows".
378				let bytes =
379					unsafe { slice::from_raw_parts(self.prefix.as_ptr(), len) };
380				bytes == other
381			}
382			// SAFETY:
383			// We know that the pointer actually points to allocated memory.
384			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => unsafe {
385				self.u.ptr.as_slice_manually(len) == other
386			},
387			| _ => panic!("The TokenString is bigger than MAX_LENGTH!"),
388		}
389	}
390}
391
392impl PartialEq<str> for TokenString {
393	#[inline]
394	fn eq(&self, other: &str) -> bool {
395		self == other.as_bytes()
396	}
397}
398
399impl PartialEq<alloc::string::String> for TokenString {
400	#[inline]
401	fn eq(&self, other: &alloc::string::String) -> bool {
402		self == other.as_bytes()
403	}
404}
405
406
407impl Ord for TokenString {
408	/// Compare two [`TokenString`]s byte-wise.
409	///
410	/// This is not a sensible alphabetical comparison for anything that isn't
411	/// ASCII.
412	#[inline]
413	fn cmp(&self, other: &Self) -> cmp::Ordering {
414		let pref_ord = self.prefix.cmp(&other.prefix);
415		if pref_ord != cmp::Ordering::Equal {
416			return pref_ord;
417		}
418
419		self.suffix().cmp(other.suffix())
420	}
421}
422
423impl PartialOrd for TokenString {
424	/// Compare two [`TokenString`]s byte-wise.
425	///
426	/// This is not a sensible alphabetical comparison for anything that isn't
427	/// ASCII.
428	#[inline]
429	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
430		Some(self.cmp(other))
431	}
432}
433
434impl fmt::Display for TokenString {
435	#[inline]
436	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
437		write!(f, "{}", self.as_str())
438	}
439}
440
441impl fmt::Debug for TokenString {
442	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
443		if self.len as usize > MAX_LENGTH_SMALL {
444			let string =
445			// SAFETY:
446			// We know that the pointer points to a string.
447				unsafe { self.u.ptr.as_string_manually(self.len.into()) };
448			// SAFETY:
449			// We know that the pointer points to a string.
450			let ptr = unsafe { &self.u.ptr };
451			f.debug_struct("TokenString")
452				.field("len", &self.len)
453				.field("prefix", &self.prefix_str())
454				.field("ptr", ptr)
455				.field("string", &string)
456				.finish()
457		} else {
458			// SAFETY:
459			// We've checked that this is a small string.
460			unsafe {
461				f.debug_struct("TokenString")
462					.field("len", &self.len)
463					.field("prefix", &self.prefix_str())
464					.field("small", &self.small_str())
465					.field("string", &self.as_str())
466					.finish()
467			}
468		}
469	}
470}
471
472impl<Idx> ops::Index<Idx> for TokenString
473where
474	Idx: slice::SliceIndex<str>,
475{
476	type Output = Idx::Output;
477
478	#[inline]
479	fn index(&self, index: Idx) -> &Self::Output {
480		self.as_str().index(index)
481	}
482}
483
484impl borrow::Borrow<str> for TokenString {
485	#[inline]
486	fn borrow(&self) -> &str {
487		self.as_str()
488	}
489}
490
491impl AsRef<str> for TokenString {
492	#[inline]
493	fn as_ref(&self) -> &str {
494		self.as_str()
495	}
496}
497
498impl hash::Hash for TokenString {
499	#[inline]
500	fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
501		self.as_str().hash(state);
502	}
503}
504
505// SAFETY:
506// There can be no shared references of a `TokenString`.
507unsafe impl Send for TokenString {}
508
509// SAFETY:
510// `TokenString` is immutable.
511unsafe impl Sync for TokenString {}
512
513// =============================================================================
514// Non trait methods
515
516impl TokenString {
517	/// Return the prefix as a `&[u8]`.
518	fn prefix_str(&self) -> &[u8] {
519		let l = cmp::min(self.len as usize, PREFIX_LENGTH);
520		&self.prefix[.. l]
521	}
522
523	/// Return the suffix of a small string as a `&[u8]`.
524	///
525	/// # Safety
526	///
527	/// Must be called with a small string only!
528	unsafe fn small_str(&self) -> &[u8] {
529		let l = if self.len as usize > PREFIX_LENGTH {
530			self.len as usize - PREFIX_LENGTH
531		} else {
532			0
533		};
534		// SAFETY:
535		// We know that the union contains a small string.
536		unsafe { &self.u.small[.. l] }
537	}
538
539	/// Return the length of the string in bytes.
540	///
541	/// This is the length of the string in bytes, not Unicode scalar values and
542	/// not grapheme clusters.
543	#[must_use]
544	#[inline]
545	pub const fn len(&self) -> usize {
546		self.len as usize
547	}
548
549	/// Return `true` if the string is a "small string", that is, it is saved in
550	/// the [`TokenString`] struct itself.
551	///
552	/// If this returns `false`, the string is allocated on the heap.
553	#[must_use]
554	#[inline]
555	pub const fn is_small(&self) -> bool {
556		self.len as usize <= MAX_LENGTH_SMALL
557	}
558
559	/// Return `true`, if this is the empty string.
560	///
561	/// Returns `false` else.
562	#[must_use]
563	#[inline]
564	pub const fn is_empty(&self) -> bool {
565		self.len == 0
566	}
567
568	/// Convert to a [`TokenString`].
569	///
570	/// `bytes` must be valid UTF-8, use [`TokenString::try_from`] if you are
571	/// not sure that it is valid. If the given byte slice is bigger than
572	/// [`MAX_LENGTH`], this panics.
573	///
574	/// Memory:
575	///
576	/// Allocates if and only if the length of `bytes` is bigger than
577	/// [`MAX_LENGTH_SMALL`].
578	///
579	/// # Panics
580	///
581	/// Panics if `bytes` is bigger than [`MAX_LENGTH`].
582	///
583	/// # Safety
584	///
585	/// `bytes` must be valid UTF-8, if not, all bets are off - UB!
586	#[must_use]
587	pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self {
588		match bytes.len() {
589			| 0 => Self {
590				len: 0,
591				prefix: [0_u8; PREFIX_LENGTH],
592				u: Data {
593					small: [0_u8; SMALL_DATA_LENGTH],
594				},
595			},
596			| 1 ..= PREFIX_LENGTH => {
597				let s = bytes.len();
598				let mut prefix = [0_u8; PREFIX_LENGTH];
599				prefix[.. s].copy_from_slice(&bytes[.. s]);
600				Self {
601					#[expect(
602						clippy::cast_possible_truncation,
603						reason = "Length has been checked above"
604					)]
605					len: s as u16,
606					prefix,
607					u: Data {
608						small: [0_u8; SMALL_DATA_LENGTH],
609					},
610				}
611			}
612			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
613				let s = bytes.len();
614				let mut prefix = [0_u8; PREFIX_LENGTH];
615				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
616				let mut small = [0_u8; SMALL_DATA_LENGTH];
617				small[.. s - PREFIX_LENGTH]
618					.copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
619				Self {
620					#[expect(
621						clippy::cast_possible_truncation,
622						reason = "Length has been checked above"
623					)]
624					len: s as u16,
625					prefix,
626					u: Data { small },
627				}
628			}
629			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
630				let ptr = StringPtr::from(bytes);
631				let u = Data {
632					ptr: mem::ManuallyDrop::new(ptr),
633				};
634				let mut prefix = [0_u8; PREFIX_LENGTH];
635				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
636				Self {
637					#[expect(
638						clippy::cast_possible_truncation,
639						reason = "Length has been checked above"
640					)]
641					len: bytes.len() as u16,
642					prefix,
643					u,
644				}
645			}
646			| _ => panic!(
647				"This byte slice is too big for a TokenString, {} > \
648				 {MAX_LENGTH}",
649				bytes.len()
650			),
651		}
652	}
653
654	/// Convert to a [`TokenString`].
655	///
656	/// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
657	/// [`TokenString::try_from`] for a function that does not panic. The string
658	/// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
659	///
660	/// Memory:
661	///
662	/// Allocates if and only if the length of `s` is bigger than
663	/// [`MAX_LENGTH_SMALL`].
664	///
665	/// # Panics
666	///
667	/// Panics if `s` is bigger than [`MAX_LENGTH`].
668	#[must_use]
669	#[inline]
670	pub fn from_str_unchecked(s: &str) -> Self {
671		// SAFETY:
672		// The unsafe part of `from_bytes_unchecked` is the possibility of the
673		// byte slice not being valid UTF-8. We are processing an UTF-8 string
674		// here.
675		unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
676	}
677
678	/// Convert to a [`TokenString`].
679	///
680	/// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
681	/// [`TokenString::try_from`] for a function that does not panic. The string
682	/// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
683	///
684	/// Memory:
685	///
686	/// Allocates if and only if the length of `s` is bigger than
687	/// [`MAX_LENGTH_SMALL`].
688	///
689	/// # Panics
690	///
691	/// Panics if `s` is bigger than [`MAX_LENGTH`].
692	#[must_use]
693	#[inline]
694	pub fn from_string_unchecked(s: &alloc::string::String) -> Self {
695		// SAFETY:
696		// The unsafe part of `from_bytes_unchecked` is the possibility of the
697		// byte slice not being valid UTF-8. We are processing an UTF-8 string
698		// here.
699		unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
700	}
701
702	/// Return the string as a &[`prim@str`].
703	#[must_use]
704	#[inline]
705	pub fn as_str(&self) -> &str {
706		if self.len == 0 {
707			""
708		} else if self.len as usize > MAX_LENGTH_SMALL {
709			// SAFETY:
710			// We know, that in the union must be a valid pointer.
711			unsafe { self.u.ptr.as_string_manually(self.len.into()) }
712		} else {
713			// SAFETY:
714			// Use the whole memory region of self.`prefix` and `self.u.small`
715			// as a single array. This is not UB, as the whole memory
716			// `TokenString` has been allocated at once and is guaranteed to be
717			// continuous in memory. If Miri complains about this, use the
718			// flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
719			// instead of "stacked borrows".
720			let bytes = unsafe {
721				slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
722			};
723			// SAFETY:
724			// The precondition of `TokenString` is that the string is a valid
725			// UTF-8 byte sequence.
726			unsafe { str::from_utf8_unchecked(bytes) }
727		}
728	}
729
730	/// Return the string as a byte slice.
731	#[must_use]
732	#[inline]
733	pub fn as_bytes(&self) -> &[u8] {
734		if self.len == 0 {
735			Default::default()
736		} else if self.len as usize > MAX_LENGTH_SMALL {
737			// SAFETY:
738			// We know, that in the union must be a valid pointer.
739			unsafe { self.u.ptr.as_slice_manually(self.len.into()) }
740		} else {
741			// SAFETY:
742			// Use the whole memory region of self.`prefix` and `self.u.small`
743			// as a single array. This is not UB, as the whole memory
744			// `TokenString` has been allocated at once and is guaranteed to be
745			// continuous in memory. If Miri complains about this, use the
746			// flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
747			// instead of "stacked borrows".
748			unsafe {
749				slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
750			}
751		}
752	}
753
754	/// Return the string as a new [`alloc::string::String`].
755	///
756	/// Memory:
757	///
758	/// Allocates a new [`alloc::string::String`].
759	#[must_use]
760	#[inline]
761	pub fn as_string(&self) -> alloc::string::String {
762		self.to_string()
763	}
764
765	/// Return the string as a new vector of [`char`]s.
766	///
767	/// Memory:
768	///
769	/// Allocates a new [`vec::Vec`].
770	#[must_use]
771	#[inline]
772	pub fn as_chars(&self) -> vec::Vec<char> {
773		self.as_str().chars().collect()
774	}
775
776	/// Return the part of the string which is not stored in `self.prefix`.
777	///
778	/// If the string is <= [`PREFIX_LENGTH`], the empty slice is returned.
779	fn suffix(&self) -> &[u8] {
780		match self.len as usize {
781			| 0 ..= PREFIX_LENGTH => Default::default(),
782			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL =>
783			// SAFETY:
784			// We checked and know that this is a small string.
785			unsafe { &self.u.small },
786			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH =>
787			// SAFETY:
788			// We checked and know that this string is allocated on the heap.
789			unsafe {
790				&self.u.ptr.as_slice_manually(self.len.into())[PREFIX_LENGTH ..]
791			},
792			| _ => panic!(
793				"Error: this TokenString is bigger than \
794				 TokenString::MAX_LENGTH!"
795			),
796		}
797	}
798
799	/// Return the byte at index `idx`, check bounds.
800	///
801	/// Returns [`TkStrError::OutOfBounds`] if the index is bigger than the
802	/// string's length.
803	///
804	/// # Errors
805	/// [`TkStrError::OutOfBounds`] if `idx` is bigger than the string's length.
806	#[inline]
807	pub fn get(&self, idx: u16) -> Result<u8, TkStrError> {
808		if idx >= self.len {
809			return Err(TkStrError::OutOfBounds(idx as usize));
810		}
811		// SAFETY:
812		// We check above that the index is in bounds.
813		unsafe { Ok(*self.as_bytes().get_unchecked(idx as usize)) }
814	}
815
816	/// Return the byte at index `idx`, don't check bounds.
817	///
818	/// Panics if the index is bigger than the
819	/// string's length.
820	///
821	/// # Panics
822	///
823	/// if `idx` is bigger than the string's length.
824	#[must_use]
825	#[inline]
826	pub fn get_unchecked(&self, idx: u16) -> u8 {
827		assert!((idx < self.len), "index {idx} out of bounds");
828		// SAFETY:
829		// We check above that the index is in bounds.
830		unsafe { *self.as_bytes().get_unchecked(idx as usize) }
831	}
832
833	/// Return an iterator over the `[char]`s of a string.
834	///
835	/// That is, an iterator over the Unicode scalar values of the
836	/// `TokenString`.
837	#[inline]
838	pub fn chars(&self) -> str::Chars {
839		self.as_str().chars()
840	}
841
842	/// Get a reference iterator.
843	#[must_use]
844	#[inline]
845	pub fn iter(&self) -> TokenStringIter<'_> {
846		<&Self as IntoIterator>::into_iter(self)
847	}
848
849	/// Return `true`, if the first byte is an uppercase ASCII character.
850	#[must_use]
851	#[inline]
852	pub const fn starts_ascii_uppercase(&self) -> bool {
853		self.prefix[0].is_ascii_uppercase()
854	}
855
856	/// Return `true`, if the first byte is an lowercase ASCII character.
857	#[must_use]
858	#[inline]
859	pub const fn starts_ascii_lowercase(&self) -> bool {
860		self.prefix[0].is_ascii_lowercase()
861	}
862
863	/// Return `true`, if the string contains only ASCII characters.
864	#[must_use]
865	#[inline]
866	pub fn is_ascii(&self) -> bool {
867		self.as_bytes().is_ascii()
868	}
869
870	/// Return `true`, if the string starts with `needle`.
871	///
872	/// Returns `true` too if the string is `needle`.
873	#[must_use]
874	#[inline]
875	pub fn starts_with(&self, needle: &Self) -> bool {
876		self.as_bytes().starts_with(needle.as_bytes())
877	}
878
879	/// Return `true`, if the string starts with `needle`.
880	///
881	/// Returns `true` too if the string is `needle`.
882	#[must_use]
883	#[inline]
884	pub fn starts_with_bytes(&self, needle: &[u8]) -> bool {
885		self.as_bytes().starts_with(needle)
886	}
887
888	/// Return `true`, if the string starts with `needle`.
889	///
890	/// Returns `true` too if the string is `needle`.
891	#[must_use]
892	#[inline]
893	pub fn starts_with_str(&self, needle: &str) -> bool {
894		self.as_str().starts_with(needle)
895	}
896
897	/// Return `true`, if the string ends with `needle`.
898	///
899	/// Returns `true` too if the string is `needle`.
900	#[must_use]
901	#[inline]
902	pub fn ends_with(&self, needle: &Self) -> bool {
903		self.as_bytes().ends_with(needle.as_bytes())
904	}
905
906	/// Return `true`, if the string ends with `needle`.
907	///
908	/// Returns `true` too if the string is `needle`.
909	#[must_use]
910	#[inline]
911	pub fn ends_with_bytes(&self, needle: &[u8]) -> bool {
912		self.as_bytes().ends_with(needle)
913	}
914
915	/// Return `true`, if the string ends with `needle`.
916	///
917	/// Returns `true` too if the string is `needle`.
918	#[must_use]
919	#[inline]
920	pub fn ends_with_str(&self, needle: &str) -> bool {
921		self.as_str().ends_with(needle)
922	}
923
924	/// Map the given function `f` over the bytes of the string, mutating it.
925	fn map_bytes_mut(&mut self, f: fn(&mut [u8]) -> ()) {
926		if self.len as usize > MAX_LENGTH_SMALL {
927			// SAFETY:
928			// We check, that we actually have a valid pointer.
929			unsafe {
930				f((*self.u.ptr).as_slice_manually_mut(self.len as usize));
931			}
932		} else {
933			// SAFETY:
934			// The two arrays, `prefix` and `small`, are guaranteed to be
935			// continuous in memory.
936			unsafe {
937				f(slice::from_raw_parts_mut(
938					self.prefix.as_mut_ptr(),
939					self.len as usize,
940				));
941			}
942		}
943	}
944
945	/// Return a new string with all uppercase ASCII characters changed to
946	/// lowercase.
947	#[must_use]
948	#[inline]
949	pub fn to_ascii_lowercase(&self) -> Self {
950		let mut ret_val = self.clone();
951		ret_val.map_bytes_mut(<[u8]>::make_ascii_lowercase);
952		ret_val
953	}
954
955	/// Return a new string with all lowercase ASCII characters changed to
956	/// uppercase.
957	#[must_use]
958	#[inline]
959	pub fn to_ascii_uppercase(&self) -> Self {
960		let mut ret_val = self.clone();
961		ret_val.map_bytes_mut(<[u8]>::make_ascii_uppercase);
962		ret_val
963	}
964
965	/// Return a new string with all ASCII whitespace removed from the start and
966	/// end.
967	#[must_use]
968	#[inline]
969	pub fn trim_ascii(&self) -> Self {
970		// SAFETY:
971		// We copy the current string, so the invariants should hold for the
972		// copy too. The string does not get longer, so cannot be greater than
973		// `MAX_LENGTH`.
974		unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii()) }
975	}
976
977	/// Return a new string with all ASCII whitespace removed from the start.
978	#[must_use]
979	#[inline]
980	pub fn trim_ascii_start(&self) -> Self {
981		// SAFETY:
982		// We copy the current string, so the invariants should hold for the
983		// copy too:
984		// - The string does not get longer, so cannot be greater than
985		// `MAX_LENGTH`.
986		// - if the string is valid UTF-8, removing ASCII characters does not
987		//   change that.
988		unsafe {
989			Self::from_bytes_unchecked(self.as_bytes().trim_ascii_start())
990		}
991	}
992
993	/// Return a new string with all ASCII whitespace removed from the end.
994	#[must_use]
995	#[inline]
996	pub fn trim_ascii_end(&self) -> Self {
997		// SAFETY:
998		// We copy the current string, so the invariants should hold for the
999		// copy too:
1000		// - The string does not get longer, so cannot be greater than
1001		// `MAX_LENGTH`.
1002		// - if the string is valid UTF-8, removing ASCII characters does not
1003		//   change that.
1004		unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii_end()) }
1005	}
1006
1007	/// Return a new string with `prefix` removed from the start.
1008	#[cfg(feature = "pattern")]
1009	#[doc(cfg(pattern))]
1010	#[inline]
1011	pub fn strip_prefix<P: str::pattern::Pattern>(
1012		&self,
1013		prefix: P,
1014	) -> Option<Self> {
1015		self.as_str()
1016			.strip_prefix(prefix)
1017			// stripping a prefix should not make the string invalid UTF-8, and
1018			// does shorten it.
1019			.map(Self::from_str_unchecked)
1020	}
1021
1022	/// Return a new string with `suffix` removed from the end.
1023	#[cfg(feature = "pattern")]
1024	#[doc(cfg(pattern))]
1025	#[inline]
1026	pub fn strip_suffix<P>(&self, suffix: P) -> Option<Self>
1027	where
1028		P: str::pattern::Pattern,
1029		for<'a> P::Searcher<'a>: str::pattern::ReverseSearcher<'a>,
1030	{
1031		self.as_str()
1032			.strip_suffix(suffix)
1033			// stripping a suffix should not make the string invalid UTF-8, and
1034			// does shorten it.
1035			.map(Self::from_str_unchecked)
1036	}
1037
1038	/// Return `true` if the string contains the pattern `pat`.
1039	///
1040	/// Returns `false` else.
1041	///
1042	/// The feature
1043	#[cfg(feature = "pattern")]
1044	#[doc(cfg(pattern))]
1045	#[inline]
1046	pub fn contains<P: str::pattern::Pattern>(&self, pat: P) -> bool {
1047		self.as_str().contains(pat)
1048	}
1049}
1050
1051
1052//==============================================================================
1053// Iterating by reference
1054
1055/// Iterator struct for a `&TokenString`.
1056///
1057/// Iterator items are single bytes, `u8`.
1058pub struct TokenStringIter<'a> {
1059	/// The [`TokenString`] to iterate over.
1060	string: &'a TokenString,
1061	/// The current index in the string.
1062	idx: usize,
1063}
1064
1065impl<'a> TokenStringIter<'a> {
1066	/// Generate a reference iterator for the given [`TokenString`].
1067	#[must_use]
1068	#[inline]
1069	pub const fn new(s: &'a TokenString) -> Self {
1070		TokenStringIter { string: s, idx: 0 }
1071	}
1072}
1073
1074impl Iterator for TokenStringIter<'_> {
1075	type Item = u8;
1076
1077	/// Return either the next byte, [`u8`], or [`None`] if we are at the end of
1078	/// the string.
1079	fn next(&mut self) -> Option<Self::Item> {
1080		debug_assert!(
1081			self.idx <= self.string.len.into(),
1082			"The iterator index '{0}' is greater than the string length '{1}'!",
1083			self.idx,
1084			self.string.len
1085		);
1086		if self.idx == self.string.len.into() {
1087			None
1088		} else if self.string.len as usize > MAX_LENGTH_SMALL {
1089			self.idx += 1;
1090			Some(self.string.as_bytes()[self.idx - 1])
1091		} else {
1092			self.idx += 1;
1093			Some(
1094				// SAFETY:
1095				// The two arrays, `prefix` and `u.small`, are guaranteed to be
1096				// consecutive in memory and allocated at the same time.
1097				unsafe {
1098					slice::from_raw_parts(
1099						self.string.prefix.as_ptr(),
1100						self.string.len as usize,
1101					)
1102				}[self.idx - 1],
1103			)
1104		}
1105	}
1106}
1107
1108impl<'a> IntoIterator for &'a TokenString {
1109	type IntoIter = TokenStringIter<'a>;
1110	type Item = u8;
1111
1112	#[inline]
1113	fn into_iter(self) -> Self::IntoIter {
1114		Self::IntoIter::new(self)
1115	}
1116}
1117
1118//==============================================================================
1119// Iterating an owned `TokenString`.
1120
1121/// Iterator struct for an owned [`TokenString`].
1122///
1123/// Iterator items are single bytes, [`u8`].
1124pub struct TokenStringIterOwn {
1125	/// The [`TokenString`] to iterate over.
1126	string: TokenString,
1127	/// The current index in the string.
1128	idx: usize,
1129}
1130
1131impl TokenStringIterOwn {
1132	/// Generate an owned iterator for the given [`TokenString`].
1133	#[must_use]
1134	#[inline]
1135	pub const fn new(s: TokenString) -> Self {
1136		Self { string: s, idx: 0 }
1137	}
1138}
1139
1140impl Iterator for TokenStringIterOwn {
1141	type Item = u8;
1142
1143	/// Return either the next byte, [`u8`], or [`None`] if we are at the end of
1144	/// the string.
1145	fn next(&mut self) -> Option<Self::Item> {
1146		debug_assert!(
1147			self.idx <= self.string.len.into(),
1148			"The iterator index '{0}' is greater than the string length '{1}'!",
1149			self.idx,
1150			self.string.len
1151		);
1152		if self.idx == self.string.len.into() {
1153			None
1154		} else if self.string.len as usize > MAX_LENGTH_SMALL {
1155			self.idx += 1;
1156			Some(self.string.as_bytes()[self.idx - 1])
1157		} else {
1158			self.idx += 1;
1159			Some(
1160				// SAFETY:
1161				// The two arrays, `prefix` and `u.small`, are guaranteed to be
1162				// consecutive in memory and allocated at the same time.
1163				unsafe {
1164					slice::from_raw_parts(
1165						self.string.prefix.as_ptr(),
1166						self.string.len as usize,
1167					)
1168				}[self.idx - 1],
1169			)
1170		}
1171	}
1172}
1173
1174impl IntoIterator for TokenString {
1175	type IntoIter = TokenStringIterOwn;
1176	type Item = u8;
1177
1178	#[inline]
1179	fn into_iter(self) -> Self::IntoIter {
1180		Self::IntoIter::new(self)
1181	}
1182}
1183
1184
1185// =============================================================================
1186//                                  Tests
1187// =============================================================================
1188
1189#[cfg(test)]
1190mod prefix {
1191	extern crate std;
1192	use assert2::{check, let_assert};
1193
1194	use crate::TokenString;
1195
1196
1197	#[test]
1198	fn empty_is_empty() {
1199		let_assert!(Ok(res) = TokenString::try_from(""));
1200		check!(res.prefix[0] == 0);
1201		check!(res.len == 0);
1202		check!(res.is_small() == true);
1203	}
1204
1205	#[test]
1206	fn clone_empty() {
1207		let_assert!(Ok(s1) = TokenString::try_from(""));
1208		#[expect(
1209			clippy::redundant_clone,
1210			reason = "this clone isn't redundant?!"
1211		)]
1212		let res = s1.clone();
1213		check!(res.prefix[0] == s1.prefix[0]);
1214		check!(res.len == s1.len);
1215		check!(res.is_small() == true);
1216	}
1217
1218	#[test]
1219	fn try_from_str() {
1220		let_assert!(Ok(res) = TokenString::try_from("123456"));
1221		check!(&res.prefix[0 .. 6] == b"123456");
1222		check!(res.len == 6);
1223		check!(res.is_small() == true);
1224	}
1225
1226	#[test]
1227	fn clone() {
1228		let_assert!(Ok(s1) = TokenString::try_from("123456"));
1229		#[expect(
1230			clippy::redundant_clone,
1231			reason = "this clone isn't redundant?!"
1232		)]
1233		let res = s1.clone();
1234		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1235		check!(res.len == s1.len);
1236		check!(res.is_small() == true);
1237	}
1238
1239	#[test]
1240	fn try_from_bytes() {
1241		let s1: &[u8] = b"123456";
1242		let_assert!(Ok(res) = TokenString::try_from(s1));
1243		check!(&res.prefix[0 .. 6] == b"123456");
1244		check!(res.len == 6);
1245		check!(res.is_small() == true);
1246	}
1247
1248	#[test]
1249	fn try_from_chars() {
1250		#[expect(
1251			clippy::std_instead_of_alloc,
1252			reason = "We are testing, this needs std"
1253		)]
1254		let s1: std::vec::Vec<char> = "123456".chars().collect();
1255		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1256		check!(&res.prefix[0 .. 6] == b"123456");
1257		check!(res.len == 6);
1258		check!(res.is_small() == true);
1259	}
1260
1261	#[test]
1262	fn try_from_string() {
1263		#[expect(
1264			clippy::std_instead_of_alloc,
1265			reason = "We are testing, this needs std"
1266		)]
1267		let s1: std::string::String = "123456".into();
1268		let_assert!(Ok(res) = TokenString::try_from(s1));
1269		check!(&res.prefix[0 .. 6] == b"123456");
1270		check!(res.len == 6);
1271		check!(res.is_small() == true);
1272	}
1273
1274	#[test]
1275	fn try_from_stringref() {
1276		#[expect(
1277			clippy::std_instead_of_alloc,
1278			reason = "We are testing, this needs std"
1279		)]
1280		let s1: std::string::String = "123456".into();
1281		let_assert!(Ok(res) = TokenString::try_from(&s1));
1282		check!(&res.prefix[0 .. 6] == b"123456");
1283		check!(res.len == 6);
1284		check!(res.is_small() == true);
1285	}
1286
1287	#[test]
1288	fn from_str_unchecked() {
1289		let res = TokenString::from_str_unchecked("123456");
1290		check!(&res.prefix[0 .. 6] == b"123456");
1291		check!(res.len == 6);
1292	}
1293
1294	#[test]
1295	fn from_bytes_unchecked() {
1296		let s1: &[u8] = b"123456";
1297		// SAFETY:
1298		// We know that the string is valid UTF-8.
1299		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1300		check!(&res.prefix[0 .. 6] == b"123456");
1301		check!(res.len == 6);
1302		check!(res.is_small() == true);
1303	}
1304
1305	#[test]
1306	fn from_stringref_unchecked() {
1307		#[expect(
1308			clippy::std_instead_of_alloc,
1309			reason = "We are testing, this needs std"
1310		)]
1311		let s1: std::string::String = "123456".into();
1312		let res = TokenString::from_string_unchecked(&s1);
1313		check!(&res.prefix[0 .. 6] == b"123456");
1314		check!(res.len == 6);
1315		check!(res.is_small() == true);
1316	}
1317}
1318
1319#[cfg(test)]
1320mod small {
1321	extern crate std;
1322	use assert2::{check, let_assert};
1323
1324	use crate::TokenString;
1325
1326
1327	#[test]
1328	fn try_from_str() {
1329		let_assert!(Ok(res) = TokenString::try_from("1234567"));
1330		check!(&res.prefix[0 .. 6] == b"123456");
1331		// SAFETY:
1332		// We know there is a small string in the union.
1333		check!(unsafe { res.u.small[0] } == b'7');
1334		check!(res.len == 7);
1335		check!(res.is_small() == true);
1336	}
1337
1338	#[test]
1339	fn clone() {
1340		let_assert!(Ok(s1) = TokenString::try_from("1234567"));
1341		#[expect(
1342			clippy::redundant_clone,
1343			reason = "this clone isn't redundant?!"
1344		)]
1345		let res = s1.clone();
1346		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1347		// SAFETY:
1348		// We know there is a small string in the union.
1349		check!(unsafe { res.u.small[0] == s1.u.small[0] });
1350		check!(res.len == s1.len);
1351		check!(res.is_small() == true);
1352	}
1353
1354	#[test]
1355	fn try_from_bytes() {
1356		let s1: &[u8] = b"1234567";
1357		let_assert!(Ok(res) = TokenString::try_from(s1));
1358		check!(&res.prefix[0 .. 6] == b"123456");
1359		// SAFETY:
1360		// We know there is a small string in the union.
1361		check!(unsafe { res.u.small[0] } == b'7');
1362		check!(res.len == 7);
1363		check!(res.is_small() == true);
1364	}
1365
1366	#[test]
1367	fn try_from_chars() {
1368		#[expect(
1369			clippy::std_instead_of_alloc,
1370			reason = "We are testing, this needs std"
1371		)]
1372		let s1: std::vec::Vec<char> = "1234567".chars().collect();
1373		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1374		check!(&res.prefix[0 .. 6] == b"123456");
1375		// SAFETY:
1376		// We know there is a small string in the union.
1377		check!(unsafe { res.u.small[0] } == b'7');
1378		check!(res.len == 7);
1379		check!(res.is_small() == true);
1380	}
1381
1382	#[test]
1383	fn try_from_string() {
1384		#[expect(
1385			clippy::std_instead_of_alloc,
1386			reason = "We are testing, this needs std"
1387		)]
1388		let s1: std::string::String = "1234567".into();
1389		let_assert!(Ok(res) = TokenString::try_from(s1));
1390		check!(&res.prefix[0 .. 6] == b"123456");
1391		// SAFETY:
1392		// We know there is a small string in the union.
1393		check!(unsafe { res.u.small[0] } == b'7');
1394		check!(res.len == 7);
1395		check!(res.is_small() == true);
1396	}
1397
1398	#[test]
1399	fn try_from_stringref() {
1400		#[expect(
1401			clippy::std_instead_of_alloc,
1402			reason = "We are testing, this needs std"
1403		)]
1404		let s1: std::string::String = "1234567".into();
1405		let_assert!(Ok(res) = TokenString::try_from(&s1));
1406		check!(&res.prefix[0 .. 6] == b"123456");
1407		// SAFETY:
1408		// We know there is a small string in the union.
1409		check!(unsafe { res.u.small[0] } == b'7');
1410		check!(res.len == 7);
1411		check!(res.is_small() == true);
1412	}
1413
1414	#[test]
1415	fn from_str_unchecked() {
1416		let res = TokenString::from_str_unchecked("1234567");
1417		check!(&res.prefix[0 .. 6] == b"123456");
1418		// SAFETY:
1419		// We know there is a small string in the union.
1420		check!(unsafe { res.u.small[0] } == b'7');
1421		check!(res.len == 7);
1422		check!(res.is_small() == true);
1423	}
1424
1425	#[test]
1426	fn from_bytes_unchecked() {
1427		let s1: &[u8] = b"1234567";
1428		// SAFETY:
1429		// We know that the string is valid UTF-8.
1430		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1431		check!(&res.prefix[0 .. 6] == b"123456");
1432		// SAFETY:
1433		// We know there is a small string in the union.
1434		check!(unsafe { res.u.small[0] } == b'7');
1435		check!(res.len == 7);
1436		check!(res.is_small() == true);
1437	}
1438
1439	#[test]
1440	fn from_stringref_unchecked() {
1441		#[expect(
1442			clippy::std_instead_of_alloc,
1443			reason = "We are testing, this needs std"
1444		)]
1445		let s1: std::string::String = "1234567".into();
1446		let res = TokenString::from_string_unchecked(&s1);
1447		check!(&res.prefix[0 .. 6] == b"123456");
1448		// SAFETY:
1449		// We know there is a small string in the union.
1450		check!(unsafe { res.u.small[0] } == b'7');
1451		check!(res.len == 7);
1452		check!(res.is_small() == true);
1453	}
1454}
1455
1456#[cfg(test)]
1457mod heap {
1458	extern crate std;
1459	use assert2::{check, let_assert};
1460
1461	use crate::TokenString;
1462
1463
1464	#[test]
1465	fn try_from_str() {
1466		let_assert!(Ok(res) = TokenString::try_from("1234567890ABCDE"));
1467		check!(&res.prefix[0 .. 6] == b"123456");
1468		check!(
1469			// SAFETY:
1470			// We know there is a large string in the union.
1471			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1472				== b"1234567890ABCDE"
1473		);
1474		check!(res.len == 15);
1475		check!(res.is_small() == false);
1476	}
1477
1478
1479	#[test]
1480	fn clone() {
1481		let_assert!(Ok(s1) = TokenString::try_from("1234567890ABCDE"));
1482		#[expect(
1483			clippy::redundant_clone,
1484			reason = "this clone isn't redundant?!"
1485		)]
1486		let res = s1.clone();
1487		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
1488		check!(
1489			// SAFETY:
1490			// We know there is a large string in the union.
1491			unsafe {
1492				res.u.ptr.as_slice_manually(res.len as usize)[.. 15]
1493					== s1.u.ptr.as_slice_manually(res.len as usize)[.. 15]
1494			}
1495		);
1496		check!(res.len == s1.len);
1497		check!(res.is_small() == false);
1498	}
1499
1500	#[test]
1501	fn try_from_bytes() {
1502		let s1: &[u8] = b"1234567890ABCDE";
1503		let_assert!(Ok(res) = TokenString::try_from(s1));
1504		check!(&res.prefix[0 .. 6] == b"123456");
1505		check!(
1506			// SAFETY:
1507			// We know there is a large string in the union.
1508			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1509				== b"1234567890ABCDE"
1510		);
1511		check!(res.len == 15);
1512		check!(res.is_small() == false);
1513	}
1514
1515	#[test]
1516	fn try_from_chars() {
1517		#[expect(
1518			clippy::std_instead_of_alloc,
1519			reason = "We are testing, this needs std"
1520		)]
1521		let s1: std::vec::Vec<char> = "1234567890ABCDE".chars().collect();
1522		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
1523		check!(&res.prefix[0 .. 6] == b"123456");
1524		check!(
1525			// SAFETY:
1526			// We know there is a large string in the union.
1527			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1528				== b"1234567890ABCDE"
1529		);
1530		check!(res.len == 15);
1531		check!(res.is_small() == false);
1532	}
1533
1534	#[test]
1535	fn try_from_string() {
1536		#[expect(
1537			clippy::std_instead_of_alloc,
1538			reason = "We are testing, this needs std"
1539		)]
1540		let s1: std::string::String = "1234567890ABCDE".into();
1541		let_assert!(Ok(res) = TokenString::try_from(s1));
1542		check!(&res.prefix[0 .. 6] == b"123456");
1543		check!(
1544			// SAFETY:
1545			// We know there is a large string in the union.
1546			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1547				== b"1234567890ABCDE"
1548		);
1549		check!(res.len == 15);
1550		check!(res.is_small() == false);
1551	}
1552
1553	#[test]
1554	fn try_from_stringref() {
1555		#[expect(
1556			clippy::std_instead_of_alloc,
1557			reason = "We are testing, this needs std"
1558		)]
1559		let s1: std::string::String = "1234567890ABCDE".into();
1560		let_assert!(Ok(res) = TokenString::try_from(&s1));
1561		check!(&res.prefix[0 .. 6] == b"123456");
1562		check!(
1563			// SAFETY:
1564			// We know there is a large string in the union.
1565			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1566				== b"1234567890ABCDE"
1567		);
1568		check!(res.len == 15);
1569		check!(res.is_small() == false);
1570	}
1571
1572	#[test]
1573	fn from_str_unchecked() {
1574		let res = TokenString::from_str_unchecked("1234567890ABCDE");
1575		check!(&res.prefix[0 .. 6] == b"123456");
1576		check!(
1577			// SAFETY:
1578			// We know there is a large string in the union.
1579			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1580				== b"1234567890ABCDE"
1581		);
1582		check!(res.len == 15);
1583		check!(res.is_small() == false);
1584	}
1585
1586	#[test]
1587	fn from_bytes_unchecked() {
1588		let s1: &[u8] = b"1234567890ABCDE";
1589		// SAFETY:
1590		// We know that the string is valid UTF-8.
1591		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
1592		check!(&res.prefix[0 .. 6] == b"123456");
1593		check!(
1594			// SAFETY:
1595			// We know there is a large string in the union.
1596			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1597				== b"1234567890ABCDE"
1598		);
1599		check!(res.len == 15);
1600		check!(res.is_small() == false);
1601	}
1602
1603	#[test]
1604	fn from_stringref_unchecked() {
1605		#[expect(
1606			clippy::std_instead_of_alloc,
1607			reason = "We are testing, this needs std"
1608		)]
1609		let s1: std::string::String = "1234567890ABCDE".into();
1610		let res = TokenString::from_string_unchecked(&s1);
1611		check!(&res.prefix[0 .. 6] == b"123456");
1612		check!(
1613			// SAFETY:
1614			// We know there is a large string in the union.
1615			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
1616				== b"1234567890ABCDE"
1617		);
1618		check!(res.len == 15);
1619		check!(res.is_small() == false);
1620	}
1621}
token_string/string.rs

token_string/
string.rs