use super::slice_set::SliceSet;
use super::SetError;
use super::ThinStr;
use std::ffi::c_void;
use std::hash::BuildHasher;
use std::ops::Deref;
use std::ptr::NonNull;
use super::SetHasher as Hasher;
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StringRef(pub ThinStr<'static>);
impl StringRef {
pub fn into_raw(self) -> NonNull<c_void> {
self.0.into_raw()
}
pub unsafe fn from_raw(this: NonNull<c_void>) -> Self {
Self(ThinStr::from_raw(this))
}
}
impl From<&StringRef> for StringRef {
fn from(value: &StringRef) -> Self {
*value
}
}
impl Default for StringRef {
fn default() -> Self {
Self::EMPTY
}
}
impl StringRef {
pub const EMPTY: StringRef = StringRef(ThinStr::new());
pub const END_TIMESTAMP_NS: StringRef = StringRef(ThinStr::end_timestamp_ns());
pub const LOCAL_ROOT_SPAN_ID: StringRef = StringRef(ThinStr::local_root_span_id());
pub const TRACE_ENDPOINT: StringRef = StringRef(ThinStr::trace_endpoint());
pub const SPAN_ID: StringRef = StringRef(ThinStr::span_id());
}
pub const WELL_KNOWN_STRING_REFS: [StringRef; 5] = [
StringRef::EMPTY,
StringRef::END_TIMESTAMP_NS,
StringRef::LOCAL_ROOT_SPAN_ID,
StringRef::TRACE_ENDPOINT,
StringRef::SPAN_ID,
];
pub struct UnsyncStringSet(SliceSet<u8>);
impl UnsyncStringSet {
pub fn try_with_capacity(capacity: usize) -> Result<Self, SetError> {
let mut set = Self(SliceSet::try_with_capacity(capacity)?);
let strings = &mut set.0.slices;
for id in WELL_KNOWN_STRING_REFS {
let hash = Hasher::default().hash_one(id.0.deref().as_bytes());
strings.insert_unique(hash, id.0.into(), |t| Hasher::default().hash_one(t.deref()));
}
Ok(set)
}
pub fn try_new() -> Result<Self, SetError> {
Self::try_with_capacity(28)
}
unsafe fn find_with_hash(&self, hash: u64, str: &str) -> Option<StringRef> {
let interned_str = self.0.slices.find(hash, |thin_slice| {
let slice_str = unsafe { std::str::from_utf8_unchecked(thin_slice.as_slice()) };
slice_str == str
})?;
Some(StringRef((*interned_str).into()))
}
pub unsafe fn insert_unique_uncontended(&mut self, str: &str) -> Result<StringRef, SetError> {
let hash = Hasher::default().hash_one(str.as_bytes());
self.insert_unique_uncontended_with_hash(hash, str)
}
pub unsafe fn insert_unique_uncontended_with_hash(
&mut self,
hash: u64,
str: &str,
) -> Result<StringRef, SetError> {
let new_slice = self
.0
.insert_unique_uncontended_with_hash(hash, str.as_bytes())?;
Ok(StringRef(new_slice.into()))
}
pub fn try_insert(&mut self, str: &str) -> Result<StringRef, SetError> {
let hash = Hasher::default().hash_one(str.as_bytes());
unsafe { self.try_insert_with_hash(hash, str) }
}
pub unsafe fn try_insert_with_hash(
&mut self,
hash: u64,
str: &str,
) -> Result<StringRef, SetError> {
if let Some(id) = self.find_with_hash(hash, str) {
return Ok(id);
}
self.insert_unique_uncontended_with_hash(hash, str)
}
pub fn string_ids(&self) -> impl Iterator<Item = StringRef> + '_ {
self.0.slices.iter().map(|slice| StringRef((*slice).into()))
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn capacity(&self) -> usize {
self.0.capacity()
}
pub unsafe fn get_string(&self, id: StringRef) -> &str {
unsafe { core::mem::transmute::<&str, &str>(id.0.deref()) }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_set_basic_operations() {
let mut set = UnsyncStringSet::try_new().unwrap();
let id1 = set.try_insert("hello").unwrap();
let id2 = set.try_insert("world").unwrap();
let id3 = set.try_insert("hello").unwrap();
assert_eq!(&*id1.0, &*id3.0);
assert_ne!(&*id1.0, &*id2.0);
unsafe {
assert_eq!(set.get_string(id1), "hello");
assert_eq!(set.get_string(id2), "world");
assert_eq!(set.get_string(id3), "hello");
}
}
#[test]
fn test_string_lengths_and_alignment() {
let mut set = UnsyncStringSet::try_new().unwrap();
let test_strings = [
"", "a", "ab", "abc", "abcd", "abcdefg", "abcdefgh", "abcdefghijklmno", "abcdefghijklmnop", "abcdefghijklmnopqrstuvwxyz123456789", ];
let mut ids = Vec::new();
for s in &test_strings {
let id = set.try_insert(s).unwrap();
ids.push(id);
}
for (id, expected) in ids.iter().zip(&test_strings) {
unsafe {
assert_eq!(set.get_string(*id), *expected);
}
}
}
#[test]
fn test_unicode_strings() {
let mut set = UnsyncStringSet::try_new().unwrap();
let unicode_strings = [
"café", "🦀", "こんにちは", "Здравствуй", "🔥💯✨", "a\u{0000}b", "line1\nline2", "tab\there", ];
let mut ids = Vec::new();
for s in &unicode_strings {
let id = set.try_insert(s).unwrap();
ids.push(id);
}
for (id, expected) in ids.iter().zip(&unicode_strings) {
unsafe {
assert_eq!(set.get_string(*id), *expected);
}
}
}
#[test]
fn test_capacity_and_growth() {
let mut set = UnsyncStringSet::try_with_capacity(1).unwrap();
let test_strings: Vec<String> = (0..50).map(|i| format!("growth_test_{}", i)).collect();
let mut ids = Vec::new();
for s in &test_strings {
let id = set.try_insert(s).unwrap();
ids.push(id);
}
for (id, expected) in ids.iter().zip(&test_strings) {
unsafe {
assert_eq!(set.get_string(*id), expected);
}
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_large_strings() {
let mut set = UnsyncStringSet::try_new().unwrap();
let large_string = "x".repeat(1024);
let id1 = set.try_insert(&large_string).unwrap();
unsafe {
assert_eq!(set.get_string(id1), large_string);
}
let very_large_string = "y".repeat(65536);
let id2 = set.try_insert(&very_large_string).unwrap();
unsafe {
assert_eq!(set.get_string(id2), very_large_string);
assert_eq!(set.get_string(id1), large_string);
}
let huge_string = "z".repeat(2 * 1024 * 1024 + 1000); let id3 = set.try_insert(&huge_string).unwrap();
unsafe {
assert_eq!(set.get_string(id3), huge_string);
assert_eq!(set.get_string(id1), large_string);
assert_eq!(set.get_string(id2), very_large_string);
}
}
#[test]
fn test_many_small_strings() {
const NUM_STRINGS: usize = if cfg!(miri) { 100 } else { 1000 };
let mut set = UnsyncStringSet::try_new().unwrap();
let mut ids = Vec::with_capacity(NUM_STRINGS);
let mut expected = Vec::with_capacity(NUM_STRINGS);
for i in 0..NUM_STRINGS {
let s = format!("{}", i);
let id = set.try_insert(&s).unwrap();
ids.push(id);
expected.push(s);
}
for (id, expected_str) in ids.iter().zip(&expected) {
unsafe {
assert_eq!(set.get_string(*id), expected_str);
}
}
}
}