token_string/
string.rs

// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
// SPDX-License-Identifier: MPL-2.0
//
// Project:  token-string
// File:     string.rs
// Date:     22.Nov.2024
// =============================================================================
//! The string type [`TokenString`].

extern crate alloc;

use alloc::string::ToString as _;
use alloc::vec;
use core::{borrow, cmp, fmt, hash, mem, ops, panic, slice, str};

use crate::{StringPtr, TkStrError};

/// The length of the prefix of the string, that is, the first bytes stored
/// in the field `prefix` for comparisons.
pub const PREFIX_LENGTH: usize = mem::size_of::<u64>() - mem::size_of::<u16>();

/// Helper constant for matching intervals.
const PREFIX_LENGTH_ADD1: usize = PREFIX_LENGTH + 1;

/// The length of the non-prefix part of a "small string", 8 bytes. The content
/// of the field `_d.small`.
pub const SMALL_DATA_LENGTH: usize = mem::size_of::<u64>();

/// The maximum length in bytes, not Unicode scalar values, of a "small" string
/// that is saved in the struct [`TokenString`] itself and not on the heap.
pub const MAX_LENGTH_SMALL: usize = PREFIX_LENGTH + SMALL_DATA_LENGTH;

/// Helper constant for matching intervals.
pub const MAX_LENGTH_SMALL_ADD1: usize = MAX_LENGTH_SMALL + 1;

/// The maximum length in bytes, not Unicode scalar values, of a
/// [`TokenString`].
pub const MAX_LENGTH: usize = u16::MAX as usize;

/// A string which can hold at most [`MAX_LENGTH`] bytes (not Unicode scalar
/// values).
///
/// This holds valid UTF-8 encoded strings only.
/// Strings that are short enough, which need at most [`MAX_LENGTH_SMALL`]
/// bytes, are stored in the struct itself, bigger ones use the heap.
///
/// # Invariant
///
/// - [`TokenString`] must be a UTF-8 string (like &[`prim@str`] and
///   [`alloc::string::String`]).
/// - The length of a [`TokenString`] is at most [`MAX_LENGTH`] and at least 0 -
///   the empty string.
#[repr(C)]
pub struct TokenString {
	/// The length of the string.
	///
	/// Maximum: [`MAX_LENGTH`].
	pub(crate) len: u16,
	/// The first [`PREFIX_LENGTH`] bytes of the string.
	pub(crate) prefix: [u8; PREFIX_LENGTH],
	/// The data (see [`Data`]).
	///
	/// If the string is at most [`MAX_LENGTH_SMALL`] bytes, this holds the
	/// other bytes of the string, else this is a pointer to the heap.
	pub(crate) u: Data,
}


// Invariants: [`TokenString`] must be aligned to 64 bits and its size must be
// 128 bits. That means that `sizeof len + prefix == 64 bit` and
// `sizeof u == 64 bit`. So there is no padding.

const _: () = assert!(
	mem::align_of::<TokenString>() == mem::size_of::<u64>(),
	"struct TokenString is not aligned to 64 bits!"
);
const _: () = assert!(
	mem::size_of::<TokenString>() == 2 * mem::size_of::<u64>(),
	"struct TokenString has size != 128 bits"
);
const _: () = assert!(
	mem::align_of::<Data>() == mem::size_of::<u64>(),
	"struct Data is not aligned to 64 bits!"
);
const _: () = assert!(
	mem::size_of::<Data>() == mem::size_of::<u64>(),
	"union Data has size != 64 bits"
);

// =============================================================================
// Inner types of `TokenString`.

/// This is either a pointer to the string, if the string is bigger than
/// [`SMALL_DATA_LENGTH`] bytes, or a pointer to a string as an array of bytes.
///
/// See [`StringPtr`]
#[repr(C)]
pub union Data {
	/// If the string is small enough (at most [`MAX_LENGTH_SMALL`]), its data
	/// after the prefix is here.
	pub(crate) small: [u8; SMALL_DATA_LENGTH],
	/// For bigger strings as [`MAX_LENGTH_SMALL`], this points to the memory
	/// holding the whole string.
	pub(crate) ptr: mem::ManuallyDrop<StringPtr>,
}

// =============================================================================
// `TokenString` itself

/// The empty string.
///
/// Has a length of zero.
pub const EMPTY: TokenString = TokenString {
	len: 0,
	prefix: [0_u8; PREFIX_LENGTH],
	u: Data {
		small: [0_u8; SMALL_DATA_LENGTH],
	},
};

// =============================================================================
// Traits

impl TryFrom<&str> for TokenString {
	type Error = TkStrError;

	/// Create a [`TokenString`] from a &[`prim@str`].
	///
	/// Return [`TkStrError::TooBig`] if the argument is greater than
	/// [`MAX_LENGTH`].
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `value` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	fn try_from(value: &str) -> Result<Self, Self::Error> {
		let bytes = value.as_bytes();
		match value.len() {
			| 0 => Ok(Self {
				len: 0,
				prefix: [0_u8; PREFIX_LENGTH],
				u: Data {
					small: [0_u8; SMALL_DATA_LENGTH],
				},
			}),
			| 1 ..= PREFIX_LENGTH => {
				let s = value.len();
				let mut prefix = [0_u8; PREFIX_LENGTH];
				prefix[.. s].copy_from_slice(&bytes[.. s]);
				Ok(Self {
					#[expect(
						clippy::cast_possible_truncation,
						reason = "Length has been checked above"
					)]
					len: s as u16,
					prefix,
					u: Data {
						small: [0_u8; SMALL_DATA_LENGTH],
					},
				})
			}
			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
				let s = value.len();
				let mut prefix = [0_u8; PREFIX_LENGTH];
				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
				let mut small = [0_u8; SMALL_DATA_LENGTH];
				small[.. s - PREFIX_LENGTH]
					.copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
				Ok(Self {
					#[expect(
						clippy::cast_possible_truncation,
						reason = "Length has been checked above"
					)]
					len: s as u16,
					prefix,
					u: Data { small },
				})
			}
			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
				let ptr = StringPtr::from(bytes);
				let u = Data {
					ptr: mem::ManuallyDrop::new(ptr),
				};
				let mut prefix = [0_u8; PREFIX_LENGTH];
				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
				Ok(Self {
					#[expect(
						clippy::cast_possible_truncation,
						reason = "Length has been checked above"
					)]
					len: value.len() as u16,
					prefix,
					u,
				})
			}
			| _ => Err(TkStrError::TooBig(value.len())),
		}
	}
}

impl TryFrom<&[u8]> for TokenString {
	type Error = TkStrError;

	/// Try to create a [`TokenString`] from the given slice.
	///
	/// Return [`TkStrError::TooBig`] if the given slice is too big, greater
	/// than [`MAX_LENGTH`].
	/// Return [`TkStrError::UnicodeError`]
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `value` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
		match str::from_utf8(value) {
			| Ok(str) => Self::try_from(str),
			| Err(utf_err) => Err(TkStrError::UnicodeError(utf_err)),
		}
	}
}

impl TryFrom<&[char]> for TokenString {
	type Error = TkStrError;

	/// Try to create a [`TokenString`] from the given slice.
	///
	/// Return [`TkStrError::TooBig`] if the given slice is too big, greater
	/// than [`MAX_LENGTH`].
	///
	/// Memory
	///
	/// Allocates and deallocates a temporary [`alloc::string::String`]
	/// collecting the converted bytes.
	fn try_from(value: &[char]) -> Result<Self, Self::Error> {
		let i = value.iter();
		Self::try_from(i.collect::<alloc::string::String>())
	}
}

impl TryFrom<&alloc::string::String> for TokenString {
	type Error = TkStrError;

	/// Create a `TokenString` from a &[`alloc::string::String`].
	///
	/// Return [`TkStrError::TooBig`] if the argument is greater than
	/// [`MAX_LENGTH`].
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `value` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	fn try_from(value: &alloc::string::String) -> Result<Self, Self::Error> {
		let str = value.as_str();
		Self::try_from(str)
	}
}

impl TryFrom<alloc::string::String> for TokenString {
	type Error = TkStrError;

	/// Create a [`TokenString`] from a [`alloc::string::String`].
	///
	/// Return [`TkStrError::TooBig`] if the argument is greater than
	/// [`MAX_LENGTH`].
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `value` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	fn try_from(value: alloc::string::String) -> Result<Self, Self::Error> {
		// Sadly we can't use the string's data directly, as a [`String`] has a
		// capacity which is to be known when deallocating the data.
		// See [`String::into_raw_parts`].
		let str = value.as_str();
		Self::try_from(str)
	}
}

impl Drop for TokenString {
	#[cfg_attr(test, mutants::skip)]
	fn drop(&mut self) {
		if usize::from(self.len) > MAX_LENGTH_SMALL {
			// SAFETY:
			// We know that there is a pointer saved in the union.
			// The whole string is being dropped, so taking a mutable
			// reference of the pointer is legal.
			let mut m_ptr = unsafe { mem::ManuallyDrop::take(&mut self.u.ptr) };
			m_ptr.drop_manually(self.len.into());
		}
	}
}

impl Clone for TokenString {
	/// Return a clone of the [`TokenString`].
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `value` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	fn clone(&self) -> Self {
		let u = if self.len as usize > MAX_LENGTH_SMALL {
			Data {
				// SAFETY:
				// We check, that there is an allocated pointer saved in the
				// union.
				ptr: mem::ManuallyDrop::new(unsafe {
					self.u.ptr.clone_manually(self.len.into())
				}),
			}
		} else {
			Data {
				// SAFETY:
				// We check, that there is a small string in the union.
				small: unsafe { self.u.small },
			}
		};
		Self {
			len: self.len,
			prefix: self.prefix,
			u,
		}
	}
}

impl Default for TokenString {
	/// Return the empty string.
	fn default() -> Self {
		EMPTY
	}
}

impl Eq for TokenString {}

impl PartialEq for TokenString {
	fn eq(&self, other: &Self) -> bool {
		if self.len != other.len || self.prefix != other.prefix {
			return false;
		}

		if self.len as usize <= MAX_LENGTH_SMALL {
			// SAFETY:
			// We know we have two small strings to compare.
			unsafe { self.u.small == other.u.small }
		} else {
			// SAFETY:
			// We know we have two string pointers to compare.
			unsafe { self.u.ptr.eq_manually(&other.u.ptr, self.len.into()) }
		}
	}
}

impl PartialEq<[u8]> for TokenString {
	fn eq(&self, other: &[u8]) -> bool {
		if self.len as usize != other.len() {
			return false;
		}
		let len = self.len as usize;
		match len {
			| 0 => true,
			| 1 ..= PREFIX_LENGTH => self.prefix[.. len] == other[.. len],
			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
				// SAFETY:
				// Use the whole memory region of self.`prefix` and
				// `self.u.small` as a single array. This is not UB, as the
				// whole memory `TokenString` has been allocated at once and
				// is guaranteed to be continuous in memory. If Miri
				// complains about this, use the flag `MIRIFLAGS="
				// -Zmiri-tree-borrows"` to use "tree borrows" instead of
				// "stacked borrows".
				let bytes =
					unsafe { slice::from_raw_parts(self.prefix.as_ptr(), len) };
				bytes == other
			}
			// SAFETY:
			// We know that the pointer actually points to allocated memory.
			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => unsafe {
				self.u.ptr.as_slice_manually(len) == other
			},
			| _ => panic!("The TokenString is bigger than MAX_LENGTH!"),
		}
	}
}

impl PartialEq<str> for TokenString {
	fn eq(&self, other: &str) -> bool {
		self == other.as_bytes()
	}
}

impl PartialEq<alloc::string::String> for TokenString {
	fn eq(&self, other: &alloc::string::String) -> bool {
		self == other.as_bytes()
	}
}


impl Ord for TokenString {
	/// Compare two [`TokenString`]s byte-wise.
	///
	/// This is not a sensible alphabetical comparison for anything that isn't
	/// ASCII.
	fn cmp(&self, other: &Self) -> cmp::Ordering {
		let pref_ord = self.prefix.cmp(&other.prefix);
		if pref_ord != cmp::Ordering::Equal {
			return pref_ord;
		}

		self.suffix().cmp(other.suffix())
	}
}

impl PartialOrd for TokenString {
	/// Compare two [`TokenString`]s byte-wise.
	///
	/// This is not a sensible alphabetical comparison for anything that isn't
	/// ASCII.
	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
		Some(self.cmp(other))
	}
}

impl fmt::Display for TokenString {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		write!(f, "{}", self.as_str())
	}
}

impl fmt::Debug for TokenString {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		if self.len as usize > MAX_LENGTH_SMALL {
			let string =
			// SAFETY:
			// We know that the pointer points to a string.
				unsafe { self.u.ptr.as_string_manually(self.len.into()) };
			// SAFETY:
			// We know that the pointer points to a string.
			let ptr = unsafe { &self.u.ptr };
			f.debug_struct("TokenString")
				.field("len", &self.len)
				.field("prefix", &self.prefix_str())
				.field("ptr", ptr)
				.field("string", &string)
				.finish()
		} else {
			// SAFETY:
			// We've checked that this is a small string.
			unsafe {
				f.debug_struct("TokenString")
					.field("len", &self.len)
					.field("prefix", &self.prefix_str())
					.field("small", &self.small_str())
					.field("string", &self.as_str())
					.finish()
			}
		}
	}
}

impl<Idx> ops::Index<Idx> for TokenString
where
	Idx: slice::SliceIndex<str>,
{
	type Output = Idx::Output;

	fn index(&self, index: Idx) -> &Self::Output {
		self.as_str().index(index)
	}
}

impl borrow::Borrow<str> for TokenString {
	fn borrow(&self) -> &str {
		self.as_str()
	}
}

impl AsRef<str> for TokenString {
	fn as_ref(&self) -> &str {
		self.as_str()
	}
}

impl hash::Hash for TokenString {
	fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
		self.as_str().hash(state);
	}
}

// SAFETY:
// There can be no shared references of a `TokenString`.
unsafe impl Send for TokenString {}

// SAFETY:
// `TokenString` is immutable.
unsafe impl Sync for TokenString {}

// =============================================================================
// Non trait methods

impl TokenString {
	/// Return the prefix as a `&[u8]`.
	fn prefix_str(&self) -> &[u8] {
		let l = cmp::min(self.len as usize, PREFIX_LENGTH);
		&self.prefix[.. l]
	}

	/// Return the suffix of a small string as a `&[u8]`.
	///
	/// # Safety
	///
	/// Must be called with a small string only!
	unsafe fn small_str(&self) -> &[u8] {
		let l = if self.len as usize > PREFIX_LENGTH {
			self.len as usize - PREFIX_LENGTH
		} else {
			0
		};
		// SAFETY:
		// We know that the union contains a small string.
		unsafe { &self.u.small[.. l] }
	}

	/// Return the length of the string in bytes.
	///
	/// This is the length of the string in bytes, not Unicode scalar values and
	/// not grapheme clusters.
	#[must_use]
	pub const fn len(&self) -> usize {
		self.len as usize
	}

	/// Return `true` if the string is a "small string", that is, it is saved in
	/// the [`TokenString`] struct itself.
	///
	/// If this returns `false`, the string is allocated on the heap.
	#[must_use]
	pub const fn is_small(&self) -> bool {
		self.len as usize <= MAX_LENGTH_SMALL
	}

	/// Return `true`, if this is the empty string.
	///
	/// Returns `false` else.
	#[must_use]
	pub const fn is_empty(&self) -> bool {
		self.len == 0
	}

	/// Convert to a [`TokenString`].
	///
	/// `bytes` must be valid UTF-8, use [`TokenString::try_from`] if you are
	/// not sure that it is valid. If the given byte slice is bigger than
	/// [`MAX_LENGTH`], this panics.
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `bytes` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	///
	/// # Panics
	///
	/// Panics if `bytes` is bigger than [`MAX_LENGTH`].
	///
	/// # Safety
	///
	/// `bytes` must be valid UTF-8, if not, all bets are off - UB!
	#[must_use]
	pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self {
		match bytes.len() {
			| 0 => Self {
				len: 0,
				prefix: [0_u8; PREFIX_LENGTH],
				u: Data {
					small: [0_u8; SMALL_DATA_LENGTH],
				},
			},
			| 1 ..= PREFIX_LENGTH => {
				let s = bytes.len();
				let mut prefix = [0_u8; PREFIX_LENGTH];
				prefix[.. s].copy_from_slice(&bytes[.. s]);
				Self {
					#[expect(
						clippy::cast_possible_truncation,
						reason = "Length has been checked above"
					)]
					len: s as u16,
					prefix,
					u: Data {
						small: [0_u8; SMALL_DATA_LENGTH],
					},
				}
			}
			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL => {
				let s = bytes.len();
				let mut prefix = [0_u8; PREFIX_LENGTH];
				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
				let mut small = [0_u8; SMALL_DATA_LENGTH];
				small[.. s - PREFIX_LENGTH]
					.copy_from_slice(&bytes[PREFIX_LENGTH .. s]);
				Self {
					#[expect(
						clippy::cast_possible_truncation,
						reason = "Length has been checked above"
					)]
					len: s as u16,
					prefix,
					u: Data { small },
				}
			}
			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH => {
				let ptr = StringPtr::from(bytes);
				let u = Data {
					ptr: mem::ManuallyDrop::new(ptr),
				};
				let mut prefix = [0_u8; PREFIX_LENGTH];
				prefix.copy_from_slice(&bytes[.. PREFIX_LENGTH]);
				Self {
					#[expect(
						clippy::cast_possible_truncation,
						reason = "Length has been checked above"
					)]
					len: bytes.len() as u16,
					prefix,
					u,
				}
			}
			| _ => panic!(
				"This byte slice is too big for a TokenString, {} > \
				 {MAX_LENGTH}",
				bytes.len()
			),
		}
	}

	/// Convert to a [`TokenString`].
	///
	/// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
	/// [`TokenString::try_from`] for a function that does not panic. The string
	/// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `s` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	///
	/// # Panics
	///
	/// Panics if `s` is bigger than [`MAX_LENGTH`].
	#[must_use]
	pub fn from_str_unchecked(s: &str) -> Self {
		// SAFETY:
		// The unsafe part of `from_bytes_unchecked` is the possibility of the
		// byte slice not being valid UTF-8. We are processing an UTF-8 string
		// here.
		unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
	}

	/// Convert to a [`TokenString`].
	///
	/// If the given string `s` is bigger than [`MAX_LENGTH`], this panics. Use
	/// [`TokenString::try_from`] for a function that does not panic. The string
	/// `s` must be valid UTF-8 too, but it has already been UB if it isn't.
	///
	/// Memory:
	///
	/// Allocates if and only if the length of `s` is bigger than
	/// [`MAX_LENGTH_SMALL`].
	///
	/// # Panics
	///
	/// Panics if `s` is bigger than [`MAX_LENGTH`].
	#[must_use]
	pub fn from_string_unchecked(s: &alloc::string::String) -> Self {
		// SAFETY:
		// The unsafe part of `from_bytes_unchecked` is the possibility of the
		// byte slice not being valid UTF-8. We are processing an UTF-8 string
		// here.
		unsafe { Self::from_bytes_unchecked(s.as_bytes()) }
	}

	/// Return the string as a &[`prim@str`].
	#[must_use]
	pub fn as_str(&self) -> &str {
		if self.len == 0 {
			""
		} else if self.len as usize > MAX_LENGTH_SMALL {
			// SAFETY:
			// We know, that in the union must be a valid pointer.
			unsafe { self.u.ptr.as_string_manually(self.len.into()) }
		} else {
			// SAFETY:
			// Use the whole memory region of self.`prefix` and `self.u.small`
			// as a single array. This is not UB, as the whole memory
			// `TokenString` has been allocated at once and is guaranteed to be
			// continuous in memory. If Miri complains about this, use the
			// flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
			// instead of "stacked borrows".
			let bytes = unsafe {
				slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
			};
			// SAFETY:
			// The precondition of `TokenString` is that the string is a valid
			// UTF-8 byte sequence.
			unsafe { str::from_utf8_unchecked(bytes) }
		}
	}

	/// Return the string as a byte slice.
	#[must_use]
	pub fn as_bytes(&self) -> &[u8] {
		if self.len == 0 {
			Default::default()
		} else if self.len as usize > MAX_LENGTH_SMALL {
			// SAFETY:
			// We know, that in the union must be a valid pointer.
			unsafe { self.u.ptr.as_slice_manually(self.len.into()) }
		} else {
			// SAFETY:
			// Use the whole memory region of self.`prefix` and `self.u.small`
			// as a single array. This is not UB, as the whole memory
			// `TokenString` has been allocated at once and is guaranteed to be
			// continuous in memory. If Miri complains about this, use the
			// flag `MIRIFLAGS="-Zmiri-tree-borrows"` to use "tree borrows"
			// instead of "stacked borrows".
			unsafe {
				slice::from_raw_parts(self.prefix.as_ptr(), self.len.into())
			}
		}
	}

	/// Return the string as a new [`alloc::string::String`].
	///
	/// Memory:
	///
	/// Allocates a new [`alloc::string::String`].
	#[must_use]
	pub fn as_string(&self) -> alloc::string::String {
		self.to_string()
	}

	/// Return the string as a new vector of [`char`]s.
	///
	/// Memory:
	///
	/// Allocates a new [`vec::Vec`].
	#[must_use]
	pub fn as_chars(&self) -> vec::Vec<char> {
		self.as_str().chars().collect()
	}

	/// Return the part of the string which is not stored in `self.prefix`.
	///
	/// If the string is <= [`PREFIX_LENGTH`], the empty slice is returned.
	fn suffix(&self) -> &[u8] {
		match self.len as usize {
			| 0 ..= PREFIX_LENGTH => Default::default(),
			| PREFIX_LENGTH_ADD1 ..= MAX_LENGTH_SMALL =>
			// SAFETY:
			// We checked and know that this is a small string.
			unsafe { &self.u.small },
			| MAX_LENGTH_SMALL_ADD1 ..= MAX_LENGTH =>
			// SAFETY:
			// We checked and know that this string is allocated on the heap.
			unsafe {
				&self.u.ptr.as_slice_manually(self.len.into())[PREFIX_LENGTH ..]
			},
			| _ => panic!(
				"Error: this TokenString is bigger than \
				 TokenString::MAX_LENGTH!"
			),
		}
	}

	/// Return the byte at index `idx`, check bounds.
	///
	/// Returns [`TkStrError::OutOfBounds`] if the index is bigger than the
	/// string's length.
	///
	/// # Errors
	/// [`TkStrError::OutOfBounds`] if `idx` is bigger than the string's length.
	pub fn get(&self, idx: u16) -> Result<u8, TkStrError> {
		if idx >= self.len {
			return Err(TkStrError::OutOfBounds(idx as usize));
		}
		// SAFETY:
		// We check above that the index is in bounds.
		unsafe { Ok(*self.as_bytes().get_unchecked(idx as usize)) }
	}

	/// Return the byte at index `idx`, don't check bounds.
	///
	/// Panics if the index is bigger than the
	/// string's length.
	///
	/// # Panics
	///
	/// if `idx` is bigger than the string's length.
	#[must_use]
	pub fn get_unchecked(&self, idx: u16) -> u8 {
		assert!((idx < self.len), "index {idx} out of bounds");
		// SAFETY:
		// We check above that the index is in bounds.
		unsafe { *self.as_bytes().get_unchecked(idx as usize) }
	}

	/// Return an iterator over the `[char]`s of a string.
	///
	/// That is, an iterator over the Unicode scalar values of the
	/// `TokenString`.
	pub fn chars(&self) -> str::Chars {
		self.as_str().chars()
	}

	/// Get a reference iterator.
	#[must_use]
	pub fn iter(&self) -> TokenStringIter<'_> {
		<&Self as IntoIterator>::into_iter(self)
	}

	/// Return `true`, if the first byte is an uppercase ASCII character.
	#[must_use]
	pub const fn starts_ascii_uppercase(&self) -> bool {
		self.prefix[0].is_ascii_uppercase()
	}

	/// Return `true`, if the first byte is an lowercase ASCII character.
	#[must_use]
	pub const fn starts_ascii_lowercase(&self) -> bool {
		self.prefix[0].is_ascii_lowercase()
	}

	/// Return `true`, if the string contains only ASCII characters.
	#[must_use]
	pub fn is_ascii(&self) -> bool {
		self.as_bytes().is_ascii()
	}

	/// Return `true`, if the string starts with `needle`.
	///
	/// Returns `true` too if the string is `needle`.
	#[must_use]
	pub fn starts_with(&self, needle: &Self) -> bool {
		self.as_bytes().starts_with(needle.as_bytes())
	}

	/// Return `true`, if the string starts with `needle`.
	///
	/// Returns `true` too if the string is `needle`.
	#[must_use]
	pub fn starts_with_bytes(&self, needle: &[u8]) -> bool {
		self.as_bytes().starts_with(needle)
	}

	/// Return `true`, if the string starts with `needle`.
	///
	/// Returns `true` too if the string is `needle`.
	#[must_use]
	pub fn starts_with_str(&self, needle: &str) -> bool {
		self.as_str().starts_with(needle)
	}

	/// Return `true`, if the string ends with `needle`.
	///
	/// Returns `true` too if the string is `needle`.
	#[must_use]
	pub fn ends_with(&self, needle: &Self) -> bool {
		self.as_bytes().ends_with(needle.as_bytes())
	}

	/// Return `true`, if the string ends with `needle`.
	///
	/// Returns `true` too if the string is `needle`.
	#[must_use]
	pub fn ends_with_bytes(&self, needle: &[u8]) -> bool {
		self.as_bytes().ends_with(needle)
	}

	/// Return `true`, if the string ends with `needle`.
	///
	/// Returns `true` too if the string is `needle`.
	#[must_use]
	pub fn ends_with_str(&self, needle: &str) -> bool {
		self.as_str().ends_with(needle)
	}

	/// Map the given function `f` over the bytes of the string, mutating it.
	fn map_bytes_mut(&mut self, f: fn(&mut [u8]) -> ()) {
		if self.len as usize > MAX_LENGTH_SMALL {
			// SAFETY:
			// We check, that we actually have a valid pointer.
			unsafe {
				f((*self.u.ptr).as_slice_manually_mut(self.len as usize));
			}
		} else {
			// SAFETY:
			// The two arrays, `prefix` and `small`, are guaranteed to be
			// continuous in memory.
			unsafe {
				f(slice::from_raw_parts_mut(
					self.prefix.as_mut_ptr(),
					self.len as usize,
				));
			}
		}
	}

	/// Return a new string with all uppercase ASCII characters changed to
	/// lowercase.
	#[must_use]
	pub fn to_ascii_lowercase(&self) -> Self {
		let mut ret_val = self.clone();
		ret_val.map_bytes_mut(<[u8]>::make_ascii_lowercase);
		ret_val
	}

	/// Return a new string with all lowercase ASCII characters changed to
	/// uppercase.
	#[must_use]
	pub fn to_ascii_uppercase(&self) -> Self {
		let mut ret_val = self.clone();
		ret_val.map_bytes_mut(<[u8]>::make_ascii_uppercase);
		ret_val
	}

	/// Return a new string with all ASCII whitespace removed from the start and
	/// end.
	#[must_use]
	pub fn trim_ascii(&self) -> Self {
		// SAFETY:
		// We copy the current string, so the invariants should hold for the
		// copy too. The string does not get longer, so cannot be greater than
		// `MAX_LENGTH`.
		unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii()) }
	}

	/// Return a new string with all ASCII whitespace removed from the start.
	#[must_use]
	pub fn trim_ascii_start(&self) -> Self {
		// SAFETY:
		// We copy the current string, so the invariants should hold for the
		// copy too:
		// - The string does not get longer, so cannot be greater than
		// `MAX_LENGTH`.
		// - if the string is valid UTF-8, removing ASCII characters does not
		//   change that.
		unsafe {
			Self::from_bytes_unchecked(self.as_bytes().trim_ascii_start())
		}
	}

	/// Return a new string with all ASCII whitespace removed from the end.
	#[must_use]
	pub fn trim_ascii_end(&self) -> Self {
		// SAFETY:
		// We copy the current string, so the invariants should hold for the
		// copy too:
		// - The string does not get longer, so cannot be greater than
		// `MAX_LENGTH`.
		// - if the string is valid UTF-8, removing ASCII characters does not
		//   change that.
		unsafe { Self::from_bytes_unchecked(self.as_bytes().trim_ascii_end()) }
	}

	/// Return a new string with `prefix` removed from the start.
	#[cfg(feature = "pattern")]
	#[doc(cfg(pattern))]
	pub fn strip_prefix<P: str::pattern::Pattern>(
		&self,
		prefix: P,
	) -> Option<Self> {
		self.as_str()
			.strip_prefix(prefix)
			// stripping a prefix should not make the string invalid UTF-8, and
			// does shorten it.
			.map(Self::from_str_unchecked)
	}

	/// Return a new string with `suffix` removed from the end.
	#[cfg(feature = "pattern")]
	#[doc(cfg(pattern))]
	pub fn strip_suffix<P>(&self, suffix: P) -> Option<Self>
	where
		P: str::pattern::Pattern,
		for<'a> P::Searcher<'a>: str::pattern::ReverseSearcher<'a>,
	{
		self.as_str()
			.strip_suffix(suffix)
			// stripping a suffix should not make the string invalid UTF-8, and
			// does shorten it.
			.map(Self::from_str_unchecked)
	}

	/// Return `true` if the string contains the pattern `pat`.
	///
	/// Returns `false` else.
	///
	/// The feature
	#[cfg(feature = "pattern")]
	#[doc(cfg(pattern))]
	pub fn contains<P: str::pattern::Pattern>(&self, pat: P) -> bool {
		self.as_str().contains(pat)
	}
}


//==============================================================================
// Iterating by reference

/// Iterator struct for a `&TokenString`.
///
/// Iterator items are single bytes, `u8`.
pub struct TokenStringIter<'a> {
	/// The [`TokenString`] to iterate over.
	string: &'a TokenString,
	/// The current index in the string.
	idx: usize,
}

impl<'a> TokenStringIter<'a> {
	/// Generate a reference iterator for the given [`TokenString`].
	#[must_use]
	pub const fn new(s: &'a TokenString) -> Self {
		TokenStringIter { string: s, idx: 0 }
	}
}

impl Iterator for TokenStringIter<'_> {
	type Item = u8;

	/// Return either the next byte, [`u8`], or [`None`] if we are at the end of
	/// the string.
	fn next(&mut self) -> Option<Self::Item> {
		debug_assert!(
			self.idx <= self.string.len.into(),
			"The iterator index '{0}' is greater than the string length '{1}'!",
			self.idx,
			self.string.len
		);
		if self.idx == self.string.len.into() {
			None
		} else if self.string.len as usize > MAX_LENGTH_SMALL {
			self.idx += 1;
			Some(self.string.as_bytes()[self.idx - 1])
		} else {
			self.idx += 1;
			Some(
				// SAFETY:
				// The two arrays, `prefix` and `u.small`, are guaranteed to be
				// consecutive in memory and allocated at the same time.
				unsafe {
					slice::from_raw_parts(
						self.string.prefix.as_ptr(),
						self.string.len as usize,
					)
				}[self.idx - 1],
			)
		}
	}
}

impl<'a> IntoIterator for &'a TokenString {
	type IntoIter = TokenStringIter<'a>;
	type Item = u8;

	fn into_iter(self) -> Self::IntoIter {
		Self::IntoIter::new(self)
	}
}

//==============================================================================
// Iterating an owned `TokenString`.

/// Iterator struct for an owned [`TokenString`].
///
/// Iterator items are single bytes, [`u8`].
pub struct TokenStringIterOwn {
	/// The [`TokenString`] to iterate over.
	string: TokenString,
	/// The current index in the string.
	idx: usize,
}

impl TokenStringIterOwn {
	/// Generate an owned iterator for the given [`TokenString`].
	#[must_use]
	pub const fn new(s: TokenString) -> Self {
		Self { string: s, idx: 0 }
	}
}

impl Iterator for TokenStringIterOwn {
	type Item = u8;

	/// Return either the next byte, [`u8`], or [`None`] if we are at the end of
	/// the string.
	fn next(&mut self) -> Option<Self::Item> {
		debug_assert!(
			self.idx <= self.string.len.into(),
			"The iterator index '{0}' is greater than the string length '{1}'!",
			self.idx,
			self.string.len
		);
		if self.idx == self.string.len.into() {
			None
		} else if self.string.len as usize > MAX_LENGTH_SMALL {
			self.idx += 1;
			Some(self.string.as_bytes()[self.idx - 1])
		} else {
			self.idx += 1;
			Some(
				// SAFETY:
				// The two arrays, `prefix` and `u.small`, are guaranteed to be
				// consecutive in memory and allocated at the same time.
				unsafe {
					slice::from_raw_parts(
						self.string.prefix.as_ptr(),
						self.string.len as usize,
					)
				}[self.idx - 1],
			)
		}
	}
}

impl IntoIterator for TokenString {
	type IntoIter = TokenStringIterOwn;
	type Item = u8;

	fn into_iter(self) -> Self::IntoIter {
		Self::IntoIter::new(self)
	}
}


// =============================================================================
//                                  Tests
// =============================================================================

#[cfg(test)]
mod prefix {
	extern crate std;
	use assert2::{check, let_assert};

	use crate::TokenString;


	#[test]
	fn empty_is_empty() {
		let_assert!(Ok(res) = TokenString::try_from(""));
		check!(res.prefix[0] == 0);
		check!(res.len == 0);
		check!(res.is_small() == true);
	}

	#[test]
	fn clone_empty() {
		let_assert!(Ok(s1) = TokenString::try_from(""));
		#[expect(
			clippy::redundant_clone,
			reason = "this clone isn't redundant?!"
		)]
		let res = s1.clone();
		check!(res.prefix[0] == s1.prefix[0]);
		check!(res.len == s1.len);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_str() {
		let_assert!(Ok(res) = TokenString::try_from("123456"));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}

	#[test]
	fn clone() {
		let_assert!(Ok(s1) = TokenString::try_from("123456"));
		#[expect(
			clippy::redundant_clone,
			reason = "this clone isn't redundant?!"
		)]
		let res = s1.clone();
		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
		check!(res.len == s1.len);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_bytes() {
		let s1: &[u8] = b"123456";
		let_assert!(Ok(res) = TokenString::try_from(s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_chars() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::vec::Vec<char> = "123456".chars().collect();
		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_string() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "123456".into();
		let_assert!(Ok(res) = TokenString::try_from(s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_stringref() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "123456".into();
		let_assert!(Ok(res) = TokenString::try_from(&s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}

	#[test]
	fn from_str_unchecked() {
		let res = TokenString::from_str_unchecked("123456");
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
	}

	#[test]
	fn from_bytes_unchecked() {
		let s1: &[u8] = b"123456";
		// SAFETY:
		// We know that the string is valid UTF-8.
		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}

	#[test]
	fn from_stringref_unchecked() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "123456".into();
		let res = TokenString::from_string_unchecked(&s1);
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(res.len == 6);
		check!(res.is_small() == true);
	}
}

#[cfg(test)]
mod small {
	extern crate std;
	use assert2::{check, let_assert};

	use crate::TokenString;


	#[test]
	fn try_from_str() {
		let_assert!(Ok(res) = TokenString::try_from("1234567"));
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn clone() {
		let_assert!(Ok(s1) = TokenString::try_from("1234567"));
		#[expect(
			clippy::redundant_clone,
			reason = "this clone isn't redundant?!"
		)]
		let res = s1.clone();
		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] == s1.u.small[0] });
		check!(res.len == s1.len);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_bytes() {
		let s1: &[u8] = b"1234567";
		let_assert!(Ok(res) = TokenString::try_from(s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_chars() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::vec::Vec<char> = "1234567".chars().collect();
		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_string() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "1234567".into();
		let_assert!(Ok(res) = TokenString::try_from(s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn try_from_stringref() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "1234567".into();
		let_assert!(Ok(res) = TokenString::try_from(&s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn from_str_unchecked() {
		let res = TokenString::from_str_unchecked("1234567");
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn from_bytes_unchecked() {
		let s1: &[u8] = b"1234567";
		// SAFETY:
		// We know that the string is valid UTF-8.
		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}

	#[test]
	fn from_stringref_unchecked() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "1234567".into();
		let res = TokenString::from_string_unchecked(&s1);
		check!(&res.prefix[0 .. 6] == b"123456");
		// SAFETY:
		// We know there is a small string in the union.
		check!(unsafe { res.u.small[0] } == b'7');
		check!(res.len == 7);
		check!(res.is_small() == true);
	}
}

#[cfg(test)]
mod heap {
	extern crate std;
	use assert2::{check, let_assert};

	use crate::TokenString;


	#[test]
	fn try_from_str() {
		let_assert!(Ok(res) = TokenString::try_from("1234567890ABCDE"));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}


	#[test]
	fn clone() {
		let_assert!(Ok(s1) = TokenString::try_from("1234567890ABCDE"));
		#[expect(
			clippy::redundant_clone,
			reason = "this clone isn't redundant?!"
		)]
		let res = s1.clone();
		check!(&res.prefix[0 .. 6] == &s1.prefix[0 .. 6]);
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe {
				res.u.ptr.as_slice_manually(res.len as usize)[.. 15]
					== s1.u.ptr.as_slice_manually(res.len as usize)[.. 15]
			}
		);
		check!(res.len == s1.len);
		check!(res.is_small() == false);
	}

	#[test]
	fn try_from_bytes() {
		let s1: &[u8] = b"1234567890ABCDE";
		let_assert!(Ok(res) = TokenString::try_from(s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}

	#[test]
	fn try_from_chars() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::vec::Vec<char> = "1234567890ABCDE".chars().collect();
		let_assert!(Ok(res) = TokenString::try_from(s1.as_slice()));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}

	#[test]
	fn try_from_string() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "1234567890ABCDE".into();
		let_assert!(Ok(res) = TokenString::try_from(s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}

	#[test]
	fn try_from_stringref() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "1234567890ABCDE".into();
		let_assert!(Ok(res) = TokenString::try_from(&s1));
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}

	#[test]
	fn from_str_unchecked() {
		let res = TokenString::from_str_unchecked("1234567890ABCDE");
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}

	#[test]
	fn from_bytes_unchecked() {
		let s1: &[u8] = b"1234567890ABCDE";
		// SAFETY:
		// We know that the string is valid UTF-8.
		let res = unsafe { TokenString::from_bytes_unchecked(s1) };
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}

	#[test]
	fn from_stringref_unchecked() {
		#[expect(
			clippy::std_instead_of_alloc,
			reason = "We are testing, this needs std"
		)]
		let s1: std::string::String = "1234567890ABCDE".into();
		let res = TokenString::from_string_unchecked(&s1);
		check!(&res.prefix[0 .. 6] == b"123456");
		check!(
			// SAFETY:
			// We know there is a large string in the union.
			unsafe { &res.u.ptr.as_slice_manually(res.len as usize)[.. 15] }
				== b"1234567890ABCDE"
		);
		check!(res.len == 15);
		check!(res.is_small() == false);
	}
}
token_string/string.rs

token_string/
string.rs