use std::fmt;
use super::gc::Color;
use super::gc::arena::{Arena, GcRef};
use super::gc::trace::Trace;
pub struct LuaString {
hash: u32,
data: Box<[u8]>,
}
impl LuaString {
pub(crate) fn new(data: &[u8], hash: u32) -> Self {
Self {
hash,
data: data.into(),
}
}
#[inline]
pub fn hash(&self) -> u32 {
self.hash
}
#[inline]
pub fn data(&self) -> &[u8] {
&self.data
}
#[inline]
pub fn len(&self) -> usize {
self.data.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn as_str(&self) -> Option<&str> {
std::str::from_utf8(&self.data).ok()
}
}
impl PartialEq for LuaString {
fn eq(&self, other: &Self) -> bool {
self.hash == other.hash && self.data == other.data
}
}
impl Eq for LuaString {}
impl fmt::Debug for LuaString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.as_str() {
Some(s) => write!(f, "LuaString({s:?})"),
None => write!(f, "LuaString({:?})", &*self.data),
}
}
}
impl fmt::Display for LuaString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(s) = self.as_str() {
write!(f, "{s}")
} else {
let s = String::from_utf8_lossy(&self.data);
write!(f, "{s}")
}
}
}
impl Trace for LuaString {
fn trace(&self) {}
fn needs_trace(&self) -> bool {
false
}
}
pub fn lua_hash(data: &[u8]) -> u32 {
let len = data.len();
let mut h = len as u32;
let step = (len >> 5) + 1;
let mut l1 = len;
while l1 >= step {
h ^= (h << 5)
.wrapping_add(h >> 2)
.wrapping_add(u32::from(data[l1 - 1]));
l1 -= step;
}
h
}
const MIN_STR_TAB_SIZE: usize = 32;
pub struct StringTable {
buckets: Vec<Vec<GcRef<LuaString>>>,
count: usize,
}
impl StringTable {
pub fn new() -> Self {
Self {
buckets: vec![Vec::new(); MIN_STR_TAB_SIZE],
count: 0,
}
}
pub fn intern(
&mut self,
data: &[u8],
arena: &mut Arena<LuaString>,
current_white: Color,
) -> GcRef<LuaString> {
let h = lua_hash(data);
let bucket_idx = (h as usize) & (self.buckets.len() - 1);
for &r in &self.buckets[bucket_idx] {
if let Some(s) = arena.get(r)
&& s.hash == h
&& s.data.len() == data.len()
&& *s.data == *data
{
return r;
}
}
let s = LuaString::new(data, h);
let r = arena.alloc(s, current_white);
self.buckets[bucket_idx].push(r);
self.count += 1;
if self.count > self.buckets.len() {
self.grow(arena);
}
r
}
#[inline]
pub fn count(&self) -> usize {
self.count
}
#[inline]
pub fn bucket_count(&self) -> usize {
self.buckets.len()
}
fn grow(&mut self, arena: &Arena<LuaString>) {
let new_size = self.buckets.len().saturating_mul(2);
if new_size <= self.buckets.len() {
return;
}
self.rehash(new_size, arena);
}
pub fn maybe_shrink(&mut self, arena: &Arena<LuaString>) {
if self.count < self.buckets.len() / 4 && self.buckets.len() > MIN_STR_TAB_SIZE * 2 {
self.rehash(self.buckets.len() / 2, arena);
}
}
pub fn remove(&mut self, r: GcRef<LuaString>, arena: &Arena<LuaString>) {
let Some(s) = arena.get(r) else {
return;
};
let bucket_idx = (s.hash as usize) & (self.buckets.len() - 1);
let bucket = &mut self.buckets[bucket_idx];
if let Some(pos) = bucket.iter().position(|&entry| entry == r) {
bucket.swap_remove(pos);
self.count -= 1;
}
}
pub fn retain<F>(&mut self, predicate: F)
where
F: Fn(GcRef<LuaString>) -> bool,
{
let mut removed = 0usize;
for bucket in &mut self.buckets {
let before = bucket.len();
bucket.retain(|&r| predicate(r));
removed += before - bucket.len();
}
self.count = self.count.saturating_sub(removed);
}
pub fn sweep_dead(&mut self, arena: &Arena<LuaString>) {
self.retain(|r| arena.get(r).is_some());
}
fn rehash(&mut self, new_size: usize, arena: &Arena<LuaString>) {
let mut new_buckets = vec![Vec::new(); new_size];
for bucket in &self.buckets {
for &r in bucket {
if let Some(s) = arena.get(r) {
let new_idx = (s.hash as usize) & (new_size - 1);
new_buckets[new_idx].push(r);
}
}
}
self.buckets = new_buckets;
}
}
impl Default for StringTable {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hash_empty_string() {
assert_eq!(lua_hash(b""), 0);
}
#[test]
fn hash_single_char() {
assert_eq!(lua_hash(b"a"), 128);
}
#[test]
fn hash_deterministic() {
let h1 = lua_hash(b"hello");
let h2 = lua_hash(b"hello");
assert_eq!(h1, h2);
}
#[test]
fn hash_different_strings_differ() {
assert_ne!(lua_hash(b"hello"), lua_hash(b"world"));
assert_ne!(lua_hash(b"a"), lua_hash(b"b"));
}
#[test]
fn hash_short_strings_sample_all() {
let h1 = lua_hash(b"abc");
let h2 = lua_hash(b"aXc");
assert_ne!(h1, h2);
}
#[test]
fn hash_long_string_samples() {
let mut data = vec![b'a'; 64];
let h1 = lua_hash(&data);
data[1] = b'Z';
let h2 = lua_hash(&data);
assert_eq!(h1, h2, "unsampled position should not affect hash");
data[1] = b'a'; data[63] = b'Z';
let h3 = lua_hash(&data);
assert_ne!(h1, h3, "sampled position should affect hash");
}
#[test]
fn hash_embedded_null() {
let h1 = lua_hash(b"ab\0cd");
let h2 = lua_hash(b"ab\0ce");
assert_ne!(h1, h2);
}
#[test]
fn lua_string_accessors() {
let s = LuaString::new(b"hello", lua_hash(b"hello"));
assert_eq!(s.data(), b"hello");
assert_eq!(s.len(), 5);
assert!(!s.is_empty());
assert_eq!(s.as_str(), Some("hello"));
assert_eq!(s.hash(), lua_hash(b"hello"));
}
#[test]
fn lua_string_empty() {
let s = LuaString::new(b"", lua_hash(b""));
assert!(s.is_empty());
assert_eq!(s.len(), 0);
assert_eq!(s.data(), b"");
}
#[test]
fn lua_string_non_utf8() {
let data = &[0xFF, 0xFE, 0x00, 0x80];
let s = LuaString::new(data, lua_hash(data));
assert_eq!(s.as_str(), None);
assert_eq!(s.data(), data);
}
#[test]
fn lua_string_equality() {
let s1 = LuaString::new(b"hello", lua_hash(b"hello"));
let s2 = LuaString::new(b"hello", lua_hash(b"hello"));
let s3 = LuaString::new(b"world", lua_hash(b"world"));
assert_eq!(s1, s2);
assert_ne!(s1, s3);
}
#[test]
fn lua_string_debug() {
let s = LuaString::new(b"test", lua_hash(b"test"));
let debug = format!("{s:?}");
assert!(debug.contains("test"));
}
#[test]
fn lua_string_display() {
let s = LuaString::new(b"hello world", lua_hash(b"hello world"));
assert_eq!(format!("{s}"), "hello world");
}
#[test]
fn lua_string_trace_is_noop() {
let s = LuaString::new(b"test", lua_hash(b"test"));
assert!(!s.needs_trace());
s.trace(); }
#[test]
fn intern_creates_string() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let r = table.intern(b"hello", &mut arena, Color::White0);
let Some(s) = arena.get(r) else {
unreachable!("arena lookup should succeed");
};
assert_eq!(s.data(), b"hello");
assert_eq!(s.hash(), lua_hash(b"hello"));
assert_eq!(table.count(), 1);
}
#[test]
fn intern_deduplicates() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let r1 = table.intern(b"hello", &mut arena, Color::White0);
let r2 = table.intern(b"hello", &mut arena, Color::White0);
assert_eq!(r1, r2, "same content should return same GcRef");
assert_eq!(table.count(), 1, "should not allocate twice");
assert_eq!(arena.len(), 1);
}
#[test]
fn intern_different_strings() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let r1 = table.intern(b"hello", &mut arena, Color::White0);
let r2 = table.intern(b"world", &mut arena, Color::White0);
assert_ne!(r1, r2);
assert_eq!(table.count(), 2);
assert_eq!(arena.len(), 2);
}
#[test]
fn intern_empty_string() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let r = table.intern(b"", &mut arena, Color::White0);
let Some(s) = arena.get(r) else {
unreachable!("arena lookup should succeed");
};
assert!(s.is_empty());
assert_eq!(table.count(), 1);
}
#[test]
fn intern_with_embedded_null() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let data = b"hello\0world";
let r = table.intern(data, &mut arena, Color::White0);
let Some(s) = arena.get(r) else {
unreachable!("arena lookup should succeed");
};
assert_eq!(s.data(), data);
assert_eq!(s.len(), 11);
}
#[test]
fn intern_triggers_resize() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let initial_buckets = table.bucket_count();
assert_eq!(initial_buckets, MIN_STR_TAB_SIZE);
for i in 0..=initial_buckets {
let data = format!("string_{i}");
table.intern(data.as_bytes(), &mut arena, Color::White0);
}
assert_eq!(table.count(), initial_buckets + 1);
assert_eq!(
table.bucket_count(),
initial_buckets * 2,
"should have doubled"
);
for i in 0..=initial_buckets {
let data = format!("string_{i}");
let r = table.intern(data.as_bytes(), &mut arena, Color::White0);
let Some(s) = arena.get(r) else {
unreachable!("arena lookup should succeed");
};
assert_eq!(s.data(), data.as_bytes());
}
assert_eq!(table.count(), initial_buckets + 1);
}
#[test]
fn remove_decrements_count() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let r1 = table.intern(b"hello", &mut arena, Color::White0);
let r2 = table.intern(b"world", &mut arena, Color::White0);
assert_eq!(table.count(), 2);
table.remove(r1, &arena);
assert_eq!(table.count(), 1);
let r2_again = table.intern(b"world", &mut arena, Color::White0);
assert_eq!(r2, r2_again);
assert_eq!(table.count(), 1);
}
#[test]
fn maybe_shrink_below_threshold() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let mut refs = Vec::new();
for i in 0..65 {
let data = format!("s{i}");
refs.push(table.intern(data.as_bytes(), &mut arena, Color::White0));
}
assert!(table.bucket_count() > MIN_STR_TAB_SIZE * 2);
let large_bucket_count = table.bucket_count();
for &r in &refs[..60] {
table.remove(r, &arena);
}
assert_eq!(table.count(), 5);
table.maybe_shrink(&arena);
assert!(
table.bucket_count() < large_bucket_count,
"should have shrunk"
);
for &r in &refs[60..] {
assert!(arena.get(r).is_some());
}
}
#[test]
fn no_shrink_below_minimum() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let r = table.intern(b"x", &mut arena, Color::White0);
table.remove(r, &arena);
assert_eq!(table.count(), 0);
table.maybe_shrink(&arena);
assert_eq!(table.bucket_count(), MIN_STR_TAB_SIZE);
}
#[test]
fn string_table_default() {
let table = StringTable::default();
assert_eq!(table.count(), 0);
assert_eq!(table.bucket_count(), MIN_STR_TAB_SIZE);
}
#[test]
fn interning_preserves_identity_across_types() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let from_literal = table.intern(b"test", &mut arena, Color::White0);
let from_string = table.intern(b"test", &mut arena, Color::White0);
let from_vec = table.intern(b"test".as_ref(), &mut arena, Color::White0);
assert_eq!(from_literal, from_string);
assert_eq!(from_string, from_vec);
}
#[test]
fn stress_many_strings() {
let mut arena = Arena::new();
let mut table = StringTable::new();
let mut refs = Vec::new();
for i in 0..1000 {
let data = format!("key_{i:04}");
refs.push(table.intern(data.as_bytes(), &mut arena, Color::White0));
}
assert_eq!(table.count(), 1000);
assert_eq!(arena.len(), 1000);
for (i, &expected_ref) in refs.iter().enumerate() {
let data = format!("key_{i:04}");
let r = table.intern(data.as_bytes(), &mut arena, Color::White0);
assert_eq!(r, expected_ref);
}
assert_eq!(table.count(), 1000);
}
}