use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use super::super::string::id::StringId;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ResolveError {
pub id: StringId,
}
impl fmt::Display for ResolveError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "failed to resolve StringId {:?}", self.id)
}
}
impl std::error::Error for ResolveError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum InternError {
CapacityExhausted,
}
impl fmt::Display for InternError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::CapacityExhausted => {
write!(f, "string interner capacity exhausted (> 2^32 - 2 strings)")
}
}
}
}
impl std::error::Error for InternError {}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StringInterner {
strings: Vec<Option<Arc<str>>>,
#[serde(with = "super::serde_helpers::sorted_arc_str_map")]
lookup: HashMap<Arc<str>, u32>,
ref_counts: Vec<u32>,
free_list: Vec<u32>,
#[serde(skip, default)]
max_ids: Option<u32>,
#[serde(skip, default)]
lookup_stale: bool,
}
impl StringInterner {
#[must_use]
pub fn new() -> Self {
Self {
strings: vec![None],
lookup: HashMap::new(),
ref_counts: vec![0],
free_list: Vec::new(),
max_ids: None,
lookup_stale: false,
}
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
let mut strings = Vec::with_capacity(capacity + 1);
let mut ref_counts = Vec::with_capacity(capacity + 1);
strings.push(None); ref_counts.push(0);
Self {
strings,
lookup: HashMap::with_capacity(capacity),
ref_counts,
free_list: Vec::new(),
max_ids: None,
lookup_stale: false,
}
}
#[must_use]
pub fn with_max_ids(max_ids: u32) -> Self {
Self {
strings: vec![None],
lookup: HashMap::new(),
ref_counts: vec![0],
free_list: Vec::new(),
max_ids: Some(max_ids),
lookup_stale: false,
}
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
assert!(
!self.lookup_stale,
"StringInterner::len() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
self.lookup.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
assert!(
!self.lookup_stale,
"StringInterner::is_empty() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
self.lookup.is_empty()
}
pub fn intern(&mut self, s: &str) -> Result<StringId, InternError> {
assert!(
!self.lookup_stale,
"StringInterner::intern() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
if let Some(&index) = self.lookup.get(s) {
self.ref_counts[index as usize] = self.ref_counts[index as usize].saturating_add(1);
return Ok(StringId::new(index));
}
if let Some(max) = self.max_ids
&& self.lookup.len() >= max as usize
{
return Err(InternError::CapacityExhausted);
}
let arc_str: Arc<str> = Arc::from(s);
let index = if let Some(free_idx) = self.free_list.pop() {
self.strings[free_idx as usize] = Some(Arc::clone(&arc_str));
self.ref_counts[free_idx as usize] = 1;
free_idx
} else {
let idx = self.strings.len();
let idx_u32 = u32::try_from(idx).map_err(|_| InternError::CapacityExhausted)?;
if idx_u32 == u32::MAX {
return Err(InternError::CapacityExhausted);
}
if idx_u32 & StringId::LOCAL_TAG_BIT != 0 {
return Err(InternError::CapacityExhausted);
}
self.strings.push(Some(Arc::clone(&arc_str)));
self.ref_counts.push(1);
idx_u32
};
self.lookup.insert(arc_str, index);
Ok(StringId::new(index))
}
#[allow(dead_code)] pub fn intern_without_ref(&mut self, s: &str) -> Result<StringId, InternError> {
assert!(
!self.lookup_stale,
"StringInterner::intern_without_ref() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
if let Some(&index) = self.lookup.get(s) {
return Ok(StringId::new(index));
}
if let Some(max) = self.max_ids
&& self.lookup.len() >= max as usize
{
return Err(InternError::CapacityExhausted);
}
let arc_str: Arc<str> = Arc::from(s);
let index = if let Some(free_idx) = self.free_list.pop() {
self.strings[free_idx as usize] = Some(Arc::clone(&arc_str));
self.ref_counts[free_idx as usize] = 0;
free_idx
} else {
let idx = self.strings.len();
let idx_u32 = u32::try_from(idx).map_err(|_| InternError::CapacityExhausted)?;
if idx_u32 == u32::MAX {
return Err(InternError::CapacityExhausted);
}
if idx_u32 & StringId::LOCAL_TAG_BIT != 0 {
return Err(InternError::CapacityExhausted);
}
self.strings.push(Some(Arc::clone(&arc_str)));
self.ref_counts.push(0);
idx_u32
};
self.lookup.insert(arc_str, index);
Ok(StringId::new(index))
}
#[must_use]
pub fn resolve(&self, id: StringId) -> Option<Arc<str>> {
if id.is_invalid() {
return None;
}
let index = id.index() as usize;
self.strings.get(index).and_then(std::clone::Clone::clone)
}
#[must_use]
pub fn ref_count(&self, id: StringId) -> u32 {
if id.is_invalid() {
return 0;
}
let index = id.index() as usize;
self.ref_counts.get(index).copied().unwrap_or(0)
}
pub fn inc_ref(&mut self, id: StringId) -> Option<u32> {
if id.is_invalid() {
return None;
}
let index = id.index() as usize;
if index < self.ref_counts.len() && self.strings[index].is_some() {
self.ref_counts[index] = self.ref_counts[index].saturating_add(1);
Some(self.ref_counts[index])
} else {
None
}
}
pub fn dec_ref(&mut self, id: StringId) -> Option<u32> {
if id.is_invalid() {
return None;
}
let index = id.index() as usize;
if index < self.ref_counts.len() && self.strings[index].is_some() {
self.ref_counts[index] = self.ref_counts[index].saturating_sub(1);
Some(self.ref_counts[index])
} else {
None
}
}
#[allow(dead_code)] pub fn recycle_unreferenced(&mut self) -> usize {
assert!(
!self.lookup_stale,
"StringInterner::recycle_unreferenced() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
let mut recycled = 0;
for index in 1..self.strings.len() {
if self.ref_counts[index] == 0
&& let Some(arc_str) = self.strings[index].take()
{
self.lookup.remove(&arc_str);
if let Ok(index_u32) = u32::try_from(index) {
self.free_list.push(index_u32);
}
recycled += 1;
}
}
recycled
}
#[must_use]
pub fn contains(&self, s: &str) -> bool {
assert!(
!self.lookup_stale,
"StringInterner::contains() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
self.lookup.contains_key(s)
}
#[must_use]
pub fn get(&self, s: &str) -> Option<StringId> {
assert!(
!self.lookup_stale,
"StringInterner::get() called while lookup is stale \
(bulk slots written without rebuild). Call build_dedup_table() first."
);
self.lookup.get(s).map(|&idx| StringId::new(idx))
}
pub fn iter(&self) -> impl Iterator<Item = (StringId, &Arc<str>)> {
self.strings
.iter()
.enumerate()
.skip(1) .filter_map(|(idx, opt)| {
let index_u32 = u32::try_from(idx).ok()?;
opt.as_ref().map(|s| (StringId::new(index_u32), s))
})
}
pub fn clear(&mut self) {
self.strings.truncate(1); self.strings[0] = None;
self.lookup.clear();
self.ref_counts.truncate(1);
self.ref_counts[0] = 0;
self.free_list.clear();
self.lookup_stale = false;
}
pub fn reserve(&mut self, additional: usize) {
self.strings.reserve(additional);
self.ref_counts.reserve(additional);
self.lookup.reserve(additional);
}
pub fn alloc_range(&mut self, count: u32) -> Result<u32, InternError> {
let start = self.strings.len();
let start_u32 = u32::try_from(start).map_err(|_| InternError::CapacityExhausted)?;
if count == 0 {
return Ok(start_u32);
}
let end_u32 = start_u32
.checked_add(count)
.ok_or(InternError::CapacityExhausted)?;
if (end_u32 - 1) & StringId::LOCAL_TAG_BIT != 0 {
return Err(InternError::CapacityExhausted);
}
self.strings.resize(end_u32 as usize, None);
self.ref_counts.resize(end_u32 as usize, 0);
self.lookup_stale = true;
Ok(start_u32)
}
pub fn bulk_slices_mut(
&mut self,
start: u32,
count: u32,
) -> (&mut [Option<Arc<str>>], &mut [u32]) {
if count > 0 {
self.lookup_stale = true;
}
let s = start as usize;
let e = s + count as usize;
let strings_slice = &mut self.strings[s..e];
let ref_counts_slice = &mut self.ref_counts[s..e];
(strings_slice, ref_counts_slice)
}
pub fn build_dedup_table(&mut self) -> HashMap<StringId, StringId> {
let mut remap: HashMap<StringId, StringId> = HashMap::new();
let mut canonical: HashMap<Arc<str>, u32> = HashMap::new();
for idx in 1..self.strings.len() {
let Some(ref arc_str) = self.strings[idx] else {
continue;
};
if let Some(&canon_idx) = canonical.get(arc_str) {
let dup_rc = self.ref_counts[idx];
self.ref_counts[canon_idx as usize] =
self.ref_counts[canon_idx as usize].saturating_add(dup_rc);
self.strings[idx] = None;
self.ref_counts[idx] = 0;
let idx_u32 = u32::try_from(idx)
.unwrap_or_else(|_| unreachable!("string slot index exceeds u32 invariant"));
self.free_list.push(idx_u32);
remap.insert(StringId::new(idx_u32), StringId::new(canon_idx));
} else {
let idx_u32 = u32::try_from(idx)
.unwrap_or_else(|_| unreachable!("string slot index exceeds u32 invariant"));
canonical.insert(Arc::clone(arc_str), idx_u32);
}
}
self.lookup.clear();
self.lookup.reserve(canonical.len());
for (arc_str, idx) in canonical {
self.lookup.insert(arc_str, idx);
}
self.lookup_stale = false;
remap
}
pub fn truncate_to(&mut self, saved_len: usize) {
assert!(saved_len >= 1, "cannot truncate sentinel slot at index 0");
self.strings.truncate(saved_len);
self.ref_counts.truncate(saved_len);
self.lookup_stale = false;
}
#[inline]
#[must_use]
pub fn string_count_raw(&self) -> usize {
self.strings.len()
}
#[inline]
#[must_use]
pub fn is_lookup_stale(&self) -> bool {
self.lookup_stale
}
#[must_use]
pub fn stats(&self) -> InternerStats {
let total_bytes: usize = self
.strings
.iter()
.filter_map(|opt| opt.as_ref())
.map(|s| s.len())
.sum();
let string_count = self
.strings
.iter()
.skip(1) .filter(|s| s.is_some())
.count();
InternerStats {
string_count,
total_bytes,
free_slots: self.free_list.len(),
capacity: self.strings.capacity(),
}
}
}
impl Default for StringInterner {
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for StringInterner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let count = self.stats().string_count;
write!(
f,
"StringInterner(strings={count}, free={}{})",
self.free_list.len(),
if self.lookup_stale {
", lookup_stale"
} else {
""
}
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct InternerStats {
pub string_count: usize,
pub total_bytes: usize,
pub free_slots: usize,
pub capacity: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
let interner = StringInterner::new();
assert_eq!(interner.len(), 0);
assert!(interner.is_empty());
}
#[test]
fn test_with_capacity() {
let interner = StringInterner::with_capacity(100);
assert_eq!(interner.len(), 0);
assert!(interner.strings.capacity() >= 101); }
#[test]
fn test_intern_single() {
let mut interner = StringInterner::new();
let id = interner.intern("hello").unwrap();
assert!(!id.is_invalid());
assert_eq!(interner.len(), 1);
let resolved = interner.resolve(id).unwrap();
assert_eq!(&*resolved, "hello");
}
#[test]
fn test_intern_duplicate() {
let mut interner = StringInterner::new();
let id1 = interner.intern("foo").unwrap();
let id2 = interner.intern("foo").unwrap();
assert_eq!(id1, id2);
assert_eq!(interner.len(), 1);
assert_eq!(interner.ref_count(id1), 2);
}
#[test]
fn test_intern_different() {
let mut interner = StringInterner::new();
let id1 = interner.intern("foo").unwrap();
let id2 = interner.intern("bar").unwrap();
assert_ne!(id1, id2);
assert_eq!(interner.len(), 2);
}
#[test]
fn test_resolve_invalid() {
let interner = StringInterner::new();
assert!(interner.resolve(StringId::INVALID).is_none());
}
#[test]
fn test_resolve_out_of_bounds() {
let interner = StringInterner::new();
assert!(interner.resolve(StringId::new(999)).is_none());
}
#[test]
fn test_ref_count() {
let mut interner = StringInterner::new();
let id = interner.intern("test").unwrap();
assert_eq!(interner.ref_count(id), 1);
interner.intern("test").unwrap(); assert_eq!(interner.ref_count(id), 2);
interner.dec_ref(id);
assert_eq!(interner.ref_count(id), 1);
interner.inc_ref(id);
assert_eq!(interner.ref_count(id), 2);
}
#[test]
fn test_inc_ref_invalid() {
let mut interner = StringInterner::new();
assert!(interner.inc_ref(StringId::INVALID).is_none());
}
#[test]
fn test_dec_ref_invalid() {
let mut interner = StringInterner::new();
assert!(interner.dec_ref(StringId::INVALID).is_none());
}
#[test]
fn test_dec_ref_saturating() {
let mut interner = StringInterner::new();
let id = interner.intern_without_ref("test").unwrap();
assert_eq!(interner.ref_count(id), 0);
interner.dec_ref(id);
assert_eq!(interner.ref_count(id), 0); }
#[test]
fn test_intern_without_ref() {
let mut interner = StringInterner::new();
let id = interner.intern_without_ref("test").unwrap();
assert_eq!(interner.ref_count(id), 0);
assert_eq!(interner.resolve(id).unwrap().as_ref(), "test");
}
#[test]
fn test_recycle_unreferenced() {
let mut interner = StringInterner::new();
let id1 = interner.intern_without_ref("foo").unwrap();
let id2 = interner.intern("bar").unwrap();
assert_eq!(interner.len(), 2);
let recycled = interner.recycle_unreferenced();
assert_eq!(recycled, 1); assert_eq!(interner.len(), 1);
assert!(interner.resolve(id1).is_none());
assert_eq!(interner.resolve(id2).unwrap().as_ref(), "bar");
}
#[test]
fn test_free_list_reuse() {
let mut interner = StringInterner::new();
let id1 = interner.intern_without_ref("foo").unwrap();
let _id2 = interner.intern("bar").unwrap();
interner.recycle_unreferenced();
let id3 = interner.intern("baz").unwrap();
assert_eq!(id3.index(), id1.index()); }
#[test]
fn test_contains() {
let mut interner = StringInterner::new();
interner.intern("hello").unwrap();
assert!(interner.contains("hello"));
assert!(!interner.contains("world"));
}
#[test]
fn test_get() {
let mut interner = StringInterner::new();
let id = interner.intern("hello").unwrap();
assert_eq!(interner.get("hello"), Some(id));
assert_eq!(interner.get("world"), None);
}
#[test]
fn test_iter() {
let mut interner = StringInterner::new();
interner.intern("foo").unwrap();
interner.intern("bar").unwrap();
interner.intern("baz").unwrap();
let strings: Vec<_> = interner.iter().map(|(_, s)| s.as_ref()).collect();
assert_eq!(strings.len(), 3);
assert!(strings.contains(&"foo"));
assert!(strings.contains(&"bar"));
assert!(strings.contains(&"baz"));
}
#[test]
fn test_clear() {
let mut interner = StringInterner::new();
interner.intern("foo").unwrap();
interner.intern("bar").unwrap();
assert_eq!(interner.len(), 2);
interner.clear();
assert_eq!(interner.len(), 0);
assert!(interner.is_empty());
}
#[test]
fn test_reserve() {
let mut interner = StringInterner::new();
interner.reserve(1000);
assert!(interner.strings.capacity() >= 1001);
}
#[test]
fn test_display() {
let mut interner = StringInterner::new();
interner.intern("test").unwrap();
let display = format!("{interner}");
assert!(display.contains("StringInterner"));
assert!(display.contains("strings=1"));
}
#[test]
fn test_stats() {
let mut interner = StringInterner::new();
interner.intern("hello").unwrap(); interner.intern("world").unwrap();
let stats = interner.stats();
assert_eq!(stats.string_count, 2);
assert_eq!(stats.total_bytes, 10);
assert_eq!(stats.free_slots, 0);
}
#[test]
fn test_empty_string() {
let mut interner = StringInterner::new();
let id = interner.intern("").unwrap();
assert!(!id.is_invalid());
assert_eq!(interner.resolve(id).unwrap().as_ref(), "");
}
#[test]
fn test_unicode() {
let mut interner = StringInterner::new();
let id = interner.intern("日本語").unwrap();
let resolved = interner.resolve(id).unwrap();
assert_eq!(&*resolved, "日本語");
}
#[test]
fn test_default() {
let interner: StringInterner = StringInterner::default();
assert_eq!(interner.len(), 0);
}
#[test]
fn test_resolve_error_display() {
let err = ResolveError {
id: StringId::new(42),
};
let display = format!("{err}");
assert!(display.contains("StringId"));
}
#[test]
fn test_intern_error_display() {
let err = InternError::CapacityExhausted;
let display = format!("{err}");
assert!(display.contains("capacity exhausted"));
}
#[test]
fn test_alloc_range_basic() {
let mut interner = StringInterner::new();
assert_eq!(interner.string_count_raw(), 1);
let start = interner.alloc_range(5).unwrap();
assert!(start >= 1);
assert_eq!(start, 1); assert_eq!(interner.string_count_raw(), 6);
for i in start..start + 5 {
let id = StringId::new(i);
assert!(interner.resolve(id).is_none());
assert_eq!(interner.ref_count(id), 0);
}
}
#[test]
fn test_alloc_range_zero_noop() {
let mut interner = StringInterner::new();
let before = interner.string_count_raw();
let start = interner.alloc_range(0).unwrap();
assert_eq!(start as usize, before);
assert_eq!(interner.string_count_raw(), before);
}
#[test]
fn test_alloc_range_after_existing_strings() {
let mut interner = StringInterner::new();
interner.intern("existing").unwrap();
let start = interner.alloc_range(3).unwrap();
assert_eq!(start, 2);
assert_eq!(interner.string_count_raw(), 5);
assert_eq!(
interner.resolve(StringId::new(1)).unwrap().as_ref(),
"existing"
);
}
#[test]
fn test_bulk_slices_mut() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(3).unwrap();
{
let (strings, ref_counts) = interner.bulk_slices_mut(start, 3);
strings[0] = Some(Arc::from("alpha"));
strings[1] = Some(Arc::from("beta"));
strings[2] = Some(Arc::from("gamma"));
ref_counts[0] = 1;
ref_counts[1] = 2;
ref_counts[2] = 1;
}
assert_eq!(
interner.resolve(StringId::new(start)).unwrap().as_ref(),
"alpha"
);
assert_eq!(
interner.resolve(StringId::new(start + 1)).unwrap().as_ref(),
"beta"
);
assert_eq!(
interner.resolve(StringId::new(start + 2)).unwrap().as_ref(),
"gamma"
);
assert_eq!(interner.ref_count(StringId::new(start)), 1);
assert_eq!(interner.ref_count(StringId::new(start + 1)), 2);
assert_eq!(interner.ref_count(StringId::new(start + 2)), 1);
}
#[test]
fn test_build_dedup_table_no_duplicates() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(3).unwrap();
{
let (strings, ref_counts) = interner.bulk_slices_mut(start, 3);
strings[0] = Some(Arc::from("one"));
strings[1] = Some(Arc::from("two"));
strings[2] = Some(Arc::from("three"));
ref_counts[0] = 1;
ref_counts[1] = 1;
ref_counts[2] = 1;
}
let remap = interner.build_dedup_table();
assert!(remap.is_empty(), "no duplicates means empty remap");
assert_eq!(
interner.resolve(StringId::new(start)).unwrap().as_ref(),
"one"
);
assert_eq!(
interner.resolve(StringId::new(start + 1)).unwrap().as_ref(),
"two"
);
assert_eq!(
interner.resolve(StringId::new(start + 2)).unwrap().as_ref(),
"three"
);
assert_eq!(interner.len(), 3);
}
#[test]
fn test_build_dedup_table_with_duplicates() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(4).unwrap();
{
let (strings, ref_counts) = interner.bulk_slices_mut(start, 4);
strings[0] = Some(Arc::from("hello"));
strings[1] = Some(Arc::from("world"));
strings[2] = Some(Arc::from("hello")); strings[3] = Some(Arc::from("world")); ref_counts[0] = 1;
ref_counts[1] = 3;
ref_counts[2] = 2;
ref_counts[3] = 1;
}
let remap = interner.build_dedup_table();
assert_eq!(remap.len(), 2);
assert_eq!(remap[&StringId::new(start + 2)], StringId::new(start)); assert_eq!(remap[&StringId::new(start + 3)], StringId::new(start + 1));
assert_eq!(interner.ref_count(StringId::new(start)), 3); assert_eq!(interner.ref_count(StringId::new(start + 1)), 4);
assert!(interner.resolve(StringId::new(start + 2)).is_none());
assert!(interner.resolve(StringId::new(start + 3)).is_none());
assert_eq!(interner.ref_count(StringId::new(start + 2)), 0);
assert_eq!(interner.ref_count(StringId::new(start + 3)), 0);
assert_eq!(interner.len(), 2);
assert_eq!(interner.get("hello"), Some(StringId::new(start)));
assert_eq!(interner.get("world"), Some(StringId::new(start + 1)));
}
#[test]
fn test_truncate_to() {
let mut interner = StringInterner::new();
interner.intern("keep").unwrap();
let saved = interner.string_count_raw(); let _start = interner.alloc_range(10).unwrap();
assert_eq!(interner.string_count_raw(), 12);
interner.truncate_to(saved);
assert_eq!(interner.string_count_raw(), 2);
assert_eq!(interner.resolve(StringId::new(1)).unwrap().as_ref(), "keep");
}
#[test]
#[should_panic(expected = "cannot truncate sentinel")]
fn test_truncate_to_zero_panics() {
let mut interner = StringInterner::new();
interner.truncate_to(0);
}
#[test]
fn test_dedup_preserves_sentinel() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(2).unwrap();
{
let (strings, ref_counts) = interner.bulk_slices_mut(start, 2);
strings[0] = Some(Arc::from("a"));
strings[1] = Some(Arc::from("a")); ref_counts[0] = 1;
ref_counts[1] = 1;
}
let _remap = interner.build_dedup_table();
assert!(interner.resolve(StringId::new(0)).is_none());
assert_eq!(interner.ref_counts[0], 0);
assert!(interner.strings[0].is_none());
}
#[test]
fn test_string_count_raw() {
let mut interner = StringInterner::new();
assert_eq!(interner.string_count_raw(), 1);
interner.intern("a").unwrap();
assert_eq!(interner.string_count_raw(), 2);
interner.alloc_range(5).unwrap();
assert_eq!(interner.string_count_raw(), 7);
}
#[test]
fn test_ref_count_accessor() {
let mut interner = StringInterner::new();
let id = interner.intern("test").unwrap();
assert_eq!(interner.ref_count(id), 1);
interner.intern("test").unwrap(); assert_eq!(interner.ref_count(id), 2);
assert_eq!(interner.ref_count(StringId::INVALID), 0);
assert_eq!(interner.ref_count(StringId::new(999)), 0);
}
#[test]
fn test_alloc_range_basic_succeeds() {
let mut interner = StringInterner::with_max_ids(u32::MAX);
let start = interner.alloc_range(10).unwrap();
assert_eq!(start, 1, "first allocation should start at index 1");
assert_eq!(
interner.string_count_raw(),
11,
"string count should advance by the allocated range size"
);
}
#[test]
fn test_dedup_frees_slots_for_reuse() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(3).unwrap();
{
let (strings, ref_counts) = interner.bulk_slices_mut(start, 3);
strings[0] = Some(Arc::from("dup"));
strings[1] = Some(Arc::from("unique"));
strings[2] = Some(Arc::from("dup")); ref_counts[0] = 1;
ref_counts[1] = 1;
ref_counts[2] = 1;
}
let remap = interner.build_dedup_table();
assert_eq!(remap.len(), 1);
let dup_slot = start + 2;
assert!(interner.resolve(StringId::new(dup_slot)).is_none());
let new_id = interner.intern("fresh").unwrap();
assert_eq!(
new_id.index(),
dup_slot,
"new intern should reuse freed duplicate slot"
);
}
#[test]
fn test_build_dedup_table_with_gaps() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(4).unwrap();
{
let (strings, ref_counts) = interner.bulk_slices_mut(start, 4);
strings[0] = Some(Arc::from("x"));
strings[2] = Some(Arc::from("y"));
strings[3] = Some(Arc::from("x")); ref_counts[0] = 1;
ref_counts[2] = 1;
ref_counts[3] = 1;
}
let remap = interner.build_dedup_table();
assert_eq!(remap.len(), 1);
assert_eq!(remap[&StringId::new(start + 3)], StringId::new(start));
assert_eq!(interner.ref_count(StringId::new(start)), 2);
assert_eq!(interner.ref_count(StringId::new(start + 2)), 1);
assert!(interner.resolve(StringId::new(start + 1)).is_none());
}
#[test]
fn test_interner_deterministic_serialization() {
let mut interner1 = StringInterner::new();
interner1.intern("alpha").unwrap();
interner1.intern("beta").unwrap();
interner1.intern("gamma").unwrap();
let mut interner2 = StringInterner::new();
interner2.intern("alpha").unwrap();
interner2.intern("beta").unwrap();
interner2.intern("gamma").unwrap();
let bytes1 = postcard::to_stdvec(&interner1).expect("serialize interner1");
let bytes2 = postcard::to_stdvec(&interner2).expect("serialize interner2");
assert_eq!(
bytes1, bytes2,
"same insertion order must produce identical postcard bytes"
);
}
#[test]
fn test_interner_serialization_roundtrip() {
let mut interner = StringInterner::new();
interner.intern("foo").unwrap();
interner.intern("bar").unwrap();
interner.intern("baz").unwrap();
interner.intern("foo").unwrap();
let bytes = postcard::to_stdvec(&interner).expect("serialize");
let deserialized: StringInterner = postcard::from_bytes(&bytes).expect("deserialize");
assert_eq!(deserialized.len(), 3);
assert!(deserialized.contains("foo"));
assert!(deserialized.contains("bar"));
assert!(deserialized.contains("baz"));
let foo_id = deserialized.get("foo").unwrap();
assert_eq!(deserialized.ref_count(foo_id), 2);
let bar_id = deserialized.get("bar").unwrap();
assert_eq!(deserialized.ref_count(bar_id), 1);
}
#[test]
fn test_interner_sorted_lookup_produces_stable_bytes() {
let mut interner = StringInterner::new();
interner.intern("gamma").unwrap();
interner.intern("alpha").unwrap();
interner.intern("beta").unwrap();
let bytes = postcard::to_stdvec(&interner).expect("serialize");
let bytes_str = String::from_utf8_lossy(&bytes);
let find_second = |needle: &str| {
let first = bytes_str.find(needle).unwrap();
bytes_str[first + needle.len()..]
.find(needle)
.map(|pos| pos + first + needle.len())
};
let alpha_pos = find_second("alpha");
let beta_pos = find_second("beta");
let gamma_pos = find_second("gamma");
if let (Some(a), Some(b), Some(g)) = (alpha_pos, beta_pos, gamma_pos) {
assert!(a < b, "alpha should appear before beta in sorted lookup");
assert!(b < g, "beta should appear before gamma in sorted lookup");
}
}
#[test]
fn test_alloc_range_sets_lookup_stale() {
let mut interner = StringInterner::new();
assert!(!interner.is_lookup_stale());
interner.alloc_range(5).unwrap();
assert!(interner.is_lookup_stale());
}
#[test]
fn test_alloc_range_zero_does_not_set_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(0).unwrap();
assert!(!interner.is_lookup_stale());
}
#[test]
fn test_build_dedup_table_clears_lookup_stale() {
let mut interner = StringInterner::new();
let start = interner.alloc_range(2).unwrap();
assert!(interner.is_lookup_stale());
interner.strings[start as usize] = Some(Arc::from("alpha"));
interner.ref_counts[start as usize] = 1;
interner.strings[(start + 1) as usize] = Some(Arc::from("beta"));
interner.ref_counts[(start + 1) as usize] = 1;
let _remap = interner.build_dedup_table();
assert!(!interner.is_lookup_stale());
assert!(interner.contains("alpha"));
assert!(interner.contains("beta"));
assert_eq!(interner.len(), 2);
}
#[test]
fn test_truncate_to_clears_lookup_stale() {
let mut interner = StringInterner::new();
let saved_len = interner.string_count_raw();
interner.alloc_range(10).unwrap();
assert!(interner.is_lookup_stale());
interner.truncate_to(saved_len);
assert!(!interner.is_lookup_stale());
let id = interner.intern("after_rollback").unwrap();
assert!(interner.contains("after_rollback"));
assert_eq!(interner.resolve(id).unwrap().as_ref(), "after_rollback");
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_intern_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.intern("should_panic");
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_intern_without_ref_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.intern_without_ref("should_panic");
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_get_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.get("should_panic");
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_contains_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.contains("should_panic");
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_len_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.len();
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_is_empty_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.is_empty();
}
#[test]
#[should_panic(expected = "lookup is stale")]
fn test_recycle_unreferenced_panics_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(1).unwrap();
let _ = interner.recycle_unreferenced();
}
#[test]
fn test_resolve_works_when_lookup_stale() {
let mut interner = StringInterner::new();
let id = interner.intern("before_bulk").unwrap();
interner.alloc_range(5).unwrap();
assert!(interner.is_lookup_stale());
assert_eq!(interner.resolve(id).unwrap().as_ref(), "before_bulk");
}
#[test]
fn test_stats_works_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.intern("existing").unwrap();
let start = interner.alloc_range(2).unwrap();
interner.strings[start as usize] = Some(Arc::from("bulk1"));
interner.ref_counts[start as usize] = 1;
assert!(interner.is_lookup_stale());
let stats = interner.stats();
assert_eq!(stats.string_count, 2);
}
#[test]
fn test_display_works_when_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(3).unwrap();
assert!(interner.is_lookup_stale());
let display = format!("{interner}");
assert!(
display.contains("lookup_stale"),
"Display should indicate stale state: {display}"
);
}
#[test]
fn test_display_omits_stale_when_consistent() {
let mut interner = StringInterner::new();
interner.intern("hello").unwrap();
let display = format!("{interner}");
assert!(
!display.contains("lookup_stale"),
"Display should not mention stale when consistent: {display}"
);
}
#[test]
fn test_full_parallel_commit_lifecycle() {
let mut interner = StringInterner::new();
let pre_id = interner.intern("pre_existing").unwrap();
assert_eq!(interner.len(), 1);
let start = interner.alloc_range(6).unwrap();
assert!(interner.is_lookup_stale());
interner.strings[start as usize] = Some(Arc::from("alpha"));
interner.ref_counts[start as usize] = 1;
interner.strings[(start + 1) as usize] = Some(Arc::from("beta"));
interner.ref_counts[(start + 1) as usize] = 1;
interner.strings[(start + 2) as usize] = Some(Arc::from("alpha"));
interner.ref_counts[(start + 2) as usize] = 1;
interner.strings[(start + 3) as usize] = Some(Arc::from("gamma"));
interner.ref_counts[(start + 3) as usize] = 1;
interner.strings[(start + 4) as usize] = Some(Arc::from("beta"));
interner.ref_counts[(start + 4) as usize] = 1;
interner.strings[(start + 5) as usize] = Some(Arc::from("delta"));
interner.ref_counts[(start + 5) as usize] = 1;
assert_eq!(interner.resolve(pre_id).unwrap().as_ref(), "pre_existing");
let remap = interner.build_dedup_table();
assert!(!interner.is_lookup_stale());
assert_eq!(remap.len(), 2);
assert_eq!(interner.len(), 5); assert!(interner.contains("pre_existing"));
assert!(interner.contains("alpha"));
assert!(interner.contains("beta"));
assert!(interner.contains("gamma"));
assert!(interner.contains("delta"));
assert_eq!(interner.get("alpha"), Some(StringId::new(start)));
assert_eq!(interner.get("beta"), Some(StringId::new(start + 1)));
assert_eq!(interner.ref_count(StringId::new(start)), 2); assert_eq!(interner.ref_count(StringId::new(start + 1)), 2); }
#[test]
fn test_clear_resets_lookup_stale() {
let mut interner = StringInterner::new();
interner.alloc_range(5).unwrap();
assert!(interner.is_lookup_stale());
interner.clear();
assert!(!interner.is_lookup_stale());
assert_eq!(interner.len(), 0);
assert!(interner.is_empty());
assert!(!interner.contains("anything"));
assert_eq!(interner.get("anything"), None);
let id = interner.intern("after_clear").unwrap();
assert_eq!(interner.resolve(id).unwrap().as_ref(), "after_clear");
assert_eq!(interner.len(), 1);
}
#[test]
fn test_bulk_slices_mut_sets_lookup_stale() {
let mut interner = StringInterner::new();
interner.strings.resize(4, None);
interner.ref_counts.resize(4, 0);
assert!(!interner.is_lookup_stale());
let (str_slots, rc_slots) = interner.bulk_slices_mut(1, 3);
str_slots[0] = Some(Arc::from("x"));
rc_slots[0] = 1;
assert!(interner.is_lookup_stale());
let _remap = interner.build_dedup_table();
assert!(!interner.is_lookup_stale());
assert!(interner.contains("x"));
}
#[test]
fn test_bulk_slices_mut_zero_does_not_set_stale() {
let mut interner = StringInterner::new();
interner.intern("existing").unwrap();
assert!(!interner.is_lookup_stale());
let (_s, _r) = interner.bulk_slices_mut(1, 0);
assert!(!interner.is_lookup_stale());
assert!(interner.contains("existing"));
}
#[test]
fn test_with_max_ids_capacity_exhaustion_via_intern() {
let mut interner = StringInterner::with_max_ids(2);
interner.intern("a").unwrap();
interner.intern("b").unwrap();
let err = interner.intern("c").unwrap_err();
assert_eq!(err, InternError::CapacityExhausted);
let id = interner.intern("a").unwrap();
assert!(interner.resolve(id).is_some());
}
#[test]
fn test_with_max_ids_capacity_exhaustion_via_intern_without_ref() {
let mut interner = StringInterner::with_max_ids(1);
interner.intern_without_ref("only").unwrap();
let err = interner.intern_without_ref("second").unwrap_err();
assert_eq!(err, InternError::CapacityExhausted);
}
#[test]
fn test_intern_without_ref_already_interned_no_ref_count_change() {
let mut interner = StringInterner::new();
let id = interner.intern("hello").unwrap();
assert_eq!(interner.ref_count(id), 1);
let id2 = interner.intern_without_ref("hello").unwrap();
assert_eq!(id, id2);
assert_eq!(interner.ref_count(id), 1); }
#[test]
fn test_recycle_unreferenced_noop_when_all_referenced() {
let mut interner = StringInterner::new();
interner.intern("a").unwrap(); interner.intern("b").unwrap();
let recycled = interner.recycle_unreferenced();
assert_eq!(recycled, 0); assert_eq!(interner.len(), 2); }
#[test]
fn test_inc_ref_out_of_bounds_returns_none() {
let mut interner = StringInterner::new();
let result = interner.inc_ref(StringId::new(999));
assert!(result.is_none());
}
#[test]
fn test_dec_ref_out_of_bounds_returns_none() {
let mut interner = StringInterner::new();
let result = interner.dec_ref(StringId::new(999));
assert!(result.is_none());
}
#[test]
fn test_intern_without_ref_reuses_free_list_slot() {
let mut interner = StringInterner::new();
interner.intern_without_ref("recycle_me").unwrap(); interner.intern("anchor").unwrap(); let recycled = interner.recycle_unreferenced();
assert_eq!(recycled, 1);
let new_id = interner.intern_without_ref("fresh").unwrap();
assert_eq!(interner.resolve(new_id).unwrap().as_ref(), "fresh");
assert_eq!(interner.ref_count(new_id), 0);
}
}