token-string 0.8.3

Short (up to 65,535 bytes) immutable strings to e.g. parse tokens, implemented in Rust. These are sometimes called 'German Strings', because Germans have written the paper mentioning them.
Documentation
// SPDX-FileCopyrightText: Copyright (C) 2024 Roland Csaszar
// SPDX-License-Identifier: MPL-2.0
//
// Project:  token-string
// File:     string_ptr.rs
// Date:     22.Nov.2024
// =============================================================================
//! Pointer to a string, an internal, private struct used by the heap allocated
//! strings of [`crate::TokenString`].

extern crate alloc;

use core::{mem, ptr, slice, str};


/// The string's data pointer, if allocated on the heap.
///
/// The data this points to must be a valid UTF-8 string.
///
/// This uses a trick to get the size to 64 bits. A non statically sized array
/// needs two fields, the pointer and a length, but we already know the length
/// and do not want another useless field holding it.
#[repr(C)]
#[derive(Debug)]
pub struct StringPtr {
	/// The pointer to the string's data, allocated on the heap.
	ptr: ptr::NonNull<u8>,
}

const _: () = assert!(
	mem::align_of::<StringPtr>() == mem::size_of::<u64>(),
	"struct StringPtr is not aligned to 64 bits!"
);
const _: () = assert!(
	mem::size_of::<StringPtr>() == mem::size_of::<u64>(),
	"struct StringPtr has size != 64 bits"
);


/// Return a [`Layout`] suitable for a byte array to hold the string's data.
///
/// Panics if something does not work out creating the layout - which should
/// never happen.
fn array_layout<T>(len: usize) -> core::alloc::Layout {
	core::alloc::Layout::array::<T>(len)
		.expect("Error: constructing an array layout for TokenString failed!")
}

impl From<&[u8]> for StringPtr {
	fn from(value: &[u8]) -> Self {
		let ptr = if value.is_empty() {
			ptr::NonNull::dangling()
		} else {
			let s = value.len();
			let l = array_layout::<u8>(s);
			// SAFETY:
			// Well, we must allocate memory for the array, which is guaranteed
			// to have a positive size `s`.
			let raw = unsafe { alloc::alloc::alloc(l) };
			let Some(not_null) = ptr::NonNull::new(raw) else {
				alloc::alloc::handle_alloc_error(l)
			};
			// SAFETY:
			// `not_null` is a newly allocated pointer, so it must be different
			// from `value` and not overlapping. `s` is the size of both `value`
			// and `not_null`.
			unsafe {
				ptr::copy_nonoverlapping(value.as_ptr(), not_null.as_ptr(), s);
			}
			not_null
		};
		Self { ptr }
	}
}

// SAFETY:
// This is an immutable pointer to a non-shared string.
unsafe impl Send for StringPtr {}

// SAFETY:
// This is an immutable pointer to a non-shared string.
unsafe impl Sync for StringPtr {}

impl StringPtr {
	/// Drop the [`StringPtr`], deallocate its memory.
	///
	/// We cannot implement the [`Drop`] trait, as we must explicitly pass the
	/// size of the string (the array).
	///
	/// `len` must be the correct length of the string, else we get memory
	/// corruption.
	#[cfg_attr(test, mutants::skip)]
	pub fn drop_manually(&mut self, len: usize) {
		if len > 0 {
			// SAFETY:
			// The layout is the same as has been used when allocating.
			unsafe {
				alloc::alloc::dealloc(
					self.ptr.as_ptr(),
					array_layout::<u8>(len),
				);
			}
		}
	}

	/// Clone the string by copying the array in memory.
	///
	/// We cannot implement the [`Clone`] trait, as we must explicitly pass the
	/// size of the string (the array).
	///
	/// `len` must be the correct length of the string, else we get memory
	/// corruption.
	pub fn clone_manually(&self, len: usize) -> Self {
		let ptr = if len == 0 {
			ptr::NonNull::dangling()
		} else {
			let l = array_layout::<u8>(len);
			// SAFETY:
			// Well, we must allocate memory for the array, which is guaranteed
			// to have a positive size `len`.
			let raw = unsafe { alloc::alloc::alloc(l) };
			let Some(not_null) = ptr::NonNull::new(raw) else {
				alloc::alloc::handle_alloc_error(l)
			};
			// SAFETY:
			// `not_null` is a newly allocated pointer, so it must be different
			// from `self` and not overlapping. `len` is the size of both `self`
			// and `not_null`.
			unsafe {
				ptr::copy_nonoverlapping(
					self.ptr.as_ptr(),
					not_null.as_ptr(),
					len,
				);
			}
			not_null
		};
		Self { ptr }
	}

	/// Copy the slice `value` into the string, starting at `index`.
	pub(crate) fn copy_manually(&mut self, idx: usize, value: &[u8]) {
		// SAFETY:
		// `not_null` is a newly allocated pointer, so it must be different
		// from `self` and not overlapping. `len` is the size of both `self`
		// and `not_null`.
		unsafe {
			ptr::copy_nonoverlapping(
				value.as_ptr(),
				self.ptr.as_ptr().add(idx),
				value.len(),
			);
		}
	}

	/// Allocate memory for the string with a size of `len`.
	pub(crate) fn alloc_manually(len: usize) -> Self {
		debug_assert!(len > 0, "don't allocate an array of length 0!");
		let l = array_layout::<u8>(len);
		// SAFETY:
		// Well, we must allocate memory for the array, which is guaranteed
		// to have a positive size `s`.
		let raw = unsafe { alloc::alloc::alloc(l) };
		let Some(ptr) = ptr::NonNull::new(raw) else {
			alloc::alloc::handle_alloc_error(l)
		};
		Self { ptr }
	}

	/// Return the string as a byte slice.
	///
	/// `len` must be the correct length of the string, else we get memory
	/// corruption.
	pub(crate) fn as_slice_manually(&self, len: usize) -> &[u8] {
		if len == 0 {
			Default::default()
		} else {
			// SAFETY:
			// `ptr` is not null and properly aligned.
			// `len` is the correct length.
			unsafe { slice::from_raw_parts(self.ptr.as_ptr(), len) }
		}
	}

	/// Return the string as a mutable byte slice.
	pub(crate) fn as_slice_manually_mut(&mut self, len: usize) -> &mut [u8] {
		if len == 0 {
			Default::default()
		} else {
			// SAFETY:
			// `ptr` is not null and properly aligned.
			// `len` is the correct length.
			unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), len) }
		}
	}

	/// Return the string this pointer holds.
	pub(crate) fn as_string_manually(&self, len: usize) -> &str {
		let bytes = self.as_slice_manually(len);
		// SAFETY:
		// Being valid UTF-8 is a precondition of `StringPtr`.
		unsafe { str::from_utf8_unchecked(bytes) }
	}

	/// Return `true` if the given strings are equal, `false` else.
	///
	/// Both strings to compare must have the same length.
	pub fn eq_manually(&self, other: &Self, len: usize) -> bool {
		self.as_slice_manually(len) == other.as_slice_manually(len)
	}
}