use std::sync::OnceLock;
use dashmap::DashMap;
use parking_lot::RwLock;
use crate::{HirnError, HirnResult};
pub const DEFAULT_INTERNER_MAX_ENTRIES: usize = 65_535;
pub struct StringInterner {
forward: DashMap<String, u32>,
reverse: RwLock<Vec<&'static str>>,
max_entries: usize,
}
impl StringInterner {
fn new() -> Self {
Self::with_max(DEFAULT_INTERNER_MAX_ENTRIES)
}
pub fn with_max(max_entries: usize) -> Self {
Self {
forward: DashMap::new(),
reverse: RwLock::new(Vec::new()),
max_entries,
}
}
pub fn intern(&self, s: &str) -> u32 {
self.try_intern(s).unwrap_or_else(|_| {
panic!(
"StringInterner capacity exceeded ({} entries): cannot intern {:?}",
self.max_entries, s
)
})
}
pub fn try_intern(&self, s: &str) -> HirnResult<u32> {
if let Some(id) = self.forward.get(s) {
return Ok(*id);
}
let mut reverse = self.reverse.write();
if let Some(id) = self.forward.get(s) {
return Ok(*id);
}
let current = reverse.len();
if current >= self.max_entries {
return Err(HirnError::InvalidInput(format!(
"interner capacity exhausted ({} entries): refusing to intern {:?}",
self.max_entries, s
)));
}
let id = current as u32;
let leaked: &'static str = Box::leak(s.to_string().into_boxed_str());
reverse.push(leaked);
self.forward.insert(s.to_string(), id);
Ok(id)
}
pub fn resolve(&self, id: u32) -> &'static str {
let reverse = self.reverse.read();
reverse[id as usize]
}
pub fn try_resolve(&self, id: u32) -> Option<&'static str> {
let reverse = self.reverse.read();
reverse.get(id as usize).copied()
}
#[cfg(test)]
pub fn len(&self) -> usize {
self.reverse.read().len()
}
#[cfg(test)]
pub fn is_empty(&self) -> bool {
self.reverse.read().is_empty()
}
}
static NAMESPACE_INTERNER: OnceLock<StringInterner> = OnceLock::new();
static AGENT_ID_INTERNER: OnceLock<StringInterner> = OnceLock::new();
pub fn namespace_interner() -> &'static StringInterner {
NAMESPACE_INTERNER.get_or_init(|| {
let interner = StringInterner::new();
interner.intern("default");
interner.intern("shared");
interner
})
}
pub fn agent_id_interner() -> &'static StringInterner {
AGENT_ID_INTERNER.get_or_init(|| {
let interner = StringInterner::new();
interner.intern("system");
interner
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn intern_same_string_returns_same_id() {
let interner = StringInterner::new();
let a = interner.intern("hello");
let b = interner.intern("hello");
assert_eq!(a, b);
}
#[test]
fn intern_different_strings_returns_different_ids() {
let interner = StringInterner::new();
let a = interner.intern("hello");
let b = interner.intern("world");
assert_ne!(a, b);
}
#[test]
fn resolve_round_trips() {
let interner = StringInterner::new();
let id = interner.intern("test_value");
assert_eq!(interner.resolve(id), "test_value");
}
#[test]
fn concurrent_interning_is_safe() {
let interner = StringInterner::new();
std::thread::scope(|s| {
for t in 0..4 {
let interner = &interner;
s.spawn(move || {
for i in 0..250 {
let key = format!("thread{t}_key{i}");
let id = interner.intern(&key);
assert_eq!(interner.resolve(id), key);
}
});
}
});
assert_eq!(interner.len(), 1000);
}
#[test]
fn concurrent_interning_same_keys() {
let interner = StringInterner::new();
std::thread::scope(|s| {
for _ in 0..4 {
let interner = &interner;
s.spawn(move || {
for i in 0..100 {
let key = format!("shared_key_{i}");
interner.intern(&key);
}
});
}
});
assert_eq!(interner.len(), 100);
}
#[test]
fn namespace_interner_pre_interns_well_known() {
let interner = namespace_interner();
assert_eq!(interner.resolve(0), "default");
assert_eq!(interner.resolve(1), "shared");
}
#[test]
fn agent_id_interner_pre_interns_system() {
let interner = agent_id_interner();
assert_eq!(interner.resolve(0), "system");
}
}