use crate::arena;
use std::fmt;
pub struct SeqString {
ptr: *const u8,
len: usize,
capacity: usize, global: bool,
}
impl PartialEq for SeqString {
fn eq(&self, other: &Self) -> bool {
self.as_bytes() == other.as_bytes()
}
}
impl Eq for SeqString {}
unsafe impl Send for SeqString {}
unsafe impl Sync for SeqString {}
impl SeqString {
pub fn as_bytes(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
}
pub fn as_str(&self) -> Option<&str> {
std::str::from_utf8(self.as_bytes()).ok()
}
pub fn as_str_lossy(&self) -> std::borrow::Cow<'_, str> {
String::from_utf8_lossy(self.as_bytes())
}
pub fn as_str_or_empty(&self) -> &str {
self.as_str().unwrap_or("")
}
pub fn is_global(&self) -> bool {
self.global
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn is_interned(&self) -> bool {
self.capacity == 0 && self.global
}
pub fn as_ptr(&self) -> *const u8 {
self.ptr
}
pub unsafe fn from_raw_parts(
ptr: *const u8,
len: usize,
capacity: usize,
global: bool,
) -> Self {
SeqString {
ptr,
len,
capacity,
global,
}
}
}
impl Clone for SeqString {
fn clone(&self) -> Self {
global_bytes(self.as_bytes().to_vec())
}
}
impl Drop for SeqString {
fn drop(&mut self) {
if self.global && self.capacity > 0 {
unsafe {
let _v = Vec::<u8>::from_raw_parts(self.ptr as *mut u8, self.len, self.capacity);
}
}
}
}
impl fmt::Debug for SeqString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SeqString({:?}, global={})",
self.as_str_lossy(),
self.global
)
}
}
impl fmt::Display for SeqString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str_lossy())
}
}
pub fn arena_bytes(bytes: &[u8]) -> SeqString {
arena::with_arena(|arena| {
let arena_buf = arena.alloc_slice_copy(bytes);
SeqString {
ptr: arena_buf.as_ptr(),
len: arena_buf.len(),
capacity: 0, global: false,
}
})
}
pub fn arena_string(s: &str) -> SeqString {
arena_bytes(s.as_bytes())
}
pub fn global_bytes(bytes: Vec<u8>) -> SeqString {
let len = bytes.len();
let capacity = bytes.capacity();
let ptr = bytes.as_ptr();
std::mem::forget(bytes);
SeqString {
ptr,
len,
capacity,
global: true,
}
}
pub fn global_string(s: String) -> SeqString {
global_bytes(s.into_bytes())
}
impl From<&str> for SeqString {
fn from(s: &str) -> Self {
arena_string(s)
}
}
impl From<String> for SeqString {
fn from(s: String) -> Self {
global_string(s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_arena_string() {
let s = arena_string("Hello, arena!");
assert_eq!(s.as_str(), Some("Hello, arena!"));
assert_eq!(s.len(), 13);
assert!(!s.is_global());
}
#[test]
fn test_global_string() {
let s = global_string("Hello, global!".to_string());
assert_eq!(s.as_str(), Some("Hello, global!"));
assert_eq!(s.len(), 14);
assert!(s.is_global());
}
#[test]
fn test_clone_creates_global() {
let s1 = arena_string("test");
let s2 = s1.clone();
assert_eq!(s1.as_bytes(), s2.as_bytes());
assert!(!s1.is_global());
assert!(s2.is_global()); }
#[test]
fn test_clone_global() {
let s1 = global_string("test".to_string());
let s2 = s1.clone();
assert_eq!(s1.as_bytes(), s2.as_bytes());
assert!(s1.is_global());
assert!(s2.is_global());
}
#[test]
fn test_drop_global() {
{
let s = global_string("Will be dropped".to_string());
assert_eq!(s.as_str(), Some("Will be dropped"));
}
}
#[test]
fn test_drop_arena() {
{
let s = arena_string("Will be dropped (no-op)");
assert_eq!(s.as_str(), Some("Will be dropped (no-op)"));
}
}
#[test]
fn test_equality() {
let s1 = arena_string("test");
let s2 = arena_string("test");
let s3 = global_string("test".to_string());
let s4 = arena_string("different");
assert_eq!(s1, s2); assert_eq!(s1, s3); assert_ne!(s1, s4); }
#[test]
fn test_from_str() {
let s: SeqString = "test".into();
assert_eq!(s.as_str(), Some("test"));
assert!(!s.is_global()); }
#[test]
fn test_from_string() {
let s: SeqString = "test".to_string().into();
assert_eq!(s.as_str(), Some("test"));
assert!(s.is_global()); }
#[test]
fn test_debug_format() {
let s = arena_string("debug");
let debug_str = format!("{:?}", s);
assert!(debug_str.contains("debug"));
assert!(debug_str.contains("global=false"));
}
#[test]
fn test_display_format() {
let s = global_string("display".to_string());
let display_str = format!("{}", s);
assert_eq!(display_str, "display");
}
#[test]
fn test_empty_string() {
let s = arena_string("");
assert_eq!(s.len(), 0);
assert!(s.is_empty());
assert_eq!(s.as_str(), Some(""));
}
#[test]
fn test_unicode() {
let s = arena_string("Hello, 世界! 🦀");
assert_eq!(s.as_str(), Some("Hello, 世界! 🦀"));
assert!(s.len() > 10); }
#[test]
fn test_global_string_preserves_capacity() {
let mut s = String::with_capacity(100);
s.push_str("hi");
assert_eq!(s.len(), 2);
assert_eq!(s.capacity(), 100);
let cem = global_string(s);
assert_eq!(cem.len(), 2);
assert_eq!(cem.capacity, 100); assert_eq!(cem.as_str(), Some("hi"));
assert!(cem.is_global());
drop(cem);
}
#[test]
fn test_arena_string_capacity_zero() {
let s = arena_string("test");
assert_eq!(s.capacity, 0); assert!(!s.is_global());
}
const SENTINEL: &[u8] = &[0x00, 0xDC, b'x', 0xFF, 0xC3, b'!'];
#[test]
fn global_bytes_carries_arbitrary_bytes() {
let s = global_bytes(SENTINEL.to_vec());
assert_eq!(s.as_bytes(), SENTINEL);
assert_eq!(s.len(), SENTINEL.len());
assert!(s.is_global());
assert_eq!(s.as_str(), None);
}
#[test]
fn arena_bytes_carries_arbitrary_bytes() {
let s = arena_bytes(SENTINEL);
assert_eq!(s.as_bytes(), SENTINEL);
assert_eq!(s.len(), SENTINEL.len());
assert!(!s.is_global());
assert_eq!(s.as_str(), None);
}
#[test]
fn equality_uses_bytes_not_utf8() {
let s1 = arena_bytes(SENTINEL);
let s2 = global_bytes(SENTINEL.to_vec());
assert_eq!(s1, s2);
let mut alt = SENTINEL.to_vec();
alt[0] = 0x01;
let s3 = global_bytes(alt);
assert_ne!(s1, s3);
}
#[test]
fn clone_round_trips_arbitrary_bytes() {
let s = arena_bytes(SENTINEL);
let cloned = s.clone();
assert_eq!(s.as_bytes(), cloned.as_bytes());
assert!(cloned.is_global());
}
#[test]
fn drop_does_not_require_utf8() {
for _ in 0..16 {
let _ = global_bytes(SENTINEL.to_vec());
}
}
#[test]
fn as_str_lossy_replaces_invalid() {
let s = global_bytes(SENTINEL.to_vec());
let lossy = s.as_str_lossy();
assert!(lossy.contains('\u{FFFD}'));
assert!(lossy.contains('x'));
assert!(lossy.contains('!'));
}
}