#![doc = include_str!("../README.md")]
mod chunkfooter;
mod syncbump;
use rustc_hash::{FxHashMap, FxBuildHasher};
use std::collections::HashMap;
use std::sync::RwLock;
use syncbump::SyncBump;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Symbol(u32);
struct InternerData<'a> {
map: FxHashMap<&'a str, Symbol>,
vec: Vec<&'a str>,
}
pub struct Interner<'bump, const MIN_ALIGN: usize = 1> {
arena: SyncBump<MIN_ALIGN>,
data: RwLock<InternerData<'bump>>,
}
impl<'bump, const MIN_ALIGN: usize> Interner<'bump, MIN_ALIGN> {
pub fn with_capacity(capacity: usize) -> Self {
Interner {
arena: SyncBump::<MIN_ALIGN>::with_capacity(capacity * 10), data: RwLock::new(InternerData {
map: HashMap::with_capacity_and_hasher(capacity, FxBuildHasher::default()),
vec: Vec::with_capacity(capacity),
}),
}
}
pub fn intern(&'bump self, s: &str) -> Symbol {
let read_guard = self.data.read().expect("RwLock poisoned during read");
if let Some(symbol) = read_guard.map.get(s) {
return *symbol;
}
drop(read_guard);
let mut write_guard = self.data.write().unwrap();
if let Some(symbol) = write_guard.map.get(s) {
return *symbol;
}
let id = write_guard.vec.len() as u32;
let symbol = Symbol(id);
let interned_str = self.arena.alloc_str(s);
write_guard.vec.push(interned_str);
write_guard.map.insert(interned_str, symbol);
symbol
}
pub fn resolve(&self, symbol: Symbol) -> Option<&'bump str> {
let read_guard = self.data.read().expect("RwLock poisoned during read");
read_guard.vec.get(symbol.0 as usize).copied()
}
pub fn len(&self) -> usize {
self.data
.read()
.expect("RwLock poisoned during read")
.vec
.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn memory_usage(&self) -> usize {
self.arena.allocated_bytes()
}
}
#[cfg(test)]
mod tests {
use super::*;
use once_cell::sync::Lazy;
#[test]
fn test_intern_and_resolve_basic() {
static INTERNER: Lazy<Interner<'static>> = Lazy::new(|| Interner::with_capacity(32));
let s1 = "hello";
let sym1 = INTERNER.intern(s1);
assert_eq!(INTERNER.resolve(sym1), Some(s1));
assert_eq!(INTERNER.len(), 1);
}
#[test]
fn test_intern_uniqueness() {
static INTERNER: Lazy<Interner<'static>> = Lazy::new(|| Interner::with_capacity(32));
let s = "world";
let sym1 = INTERNER.intern(s);
let sym2 = INTERNER.intern(s);
assert_eq!(sym1, sym2);
assert_eq!(INTERNER.len(), 1);
let s_new = "rust";
let sym_new = INTERNER.intern(s_new);
assert_ne!(sym1, sym_new);
assert_eq!(INTERNER.len(), 2);
assert_eq!(INTERNER.resolve(sym_new), Some(s_new));
}
#[test]
fn test_edge_cases() {
static INTERNER: Lazy<Interner<'static>> = Lazy::new(|| Interner::with_capacity(16));
let empty_sym = INTERNER.intern("");
assert_eq!(INTERNER.resolve(empty_sym), Some(""));
assert_eq!(INTERNER.len(), 1);
let invalid_sym = Symbol(999);
assert_eq!(INTERNER.resolve(invalid_sym), None);
}
#[test]
fn test_concurrent_interning() {
use std::thread;
static INTERNER: Lazy<Interner<'static>> = Lazy::new(|| Interner::with_capacity(100));
let strings_to_intern = vec![
"apple", "banana", "orange", "apple", "grape", "banana", "kiwi", "apple", "mango",
"orange", "grape", "papaya",
];
thread::scope(|s| {
for _ in 0..10 {
s.spawn(|| {
for &string in &strings_to_intern {
INTERNER.intern(string);
}
});
}
});
let unique_strings_count = 7;
assert_eq!(INTERNER.len(), unique_strings_count);
let sym_apple1 = INTERNER.intern("apple");
let sym_apple2 = INTERNER.intern("apple");
assert_eq!(sym_apple1, sym_apple2);
assert_eq!(INTERNER.resolve(INTERNER.intern("apple")), Some("apple"));
assert_eq!(INTERNER.resolve(INTERNER.intern("papaya")), Some("papaya"));
assert_eq!(INTERNER.resolve(INTERNER.intern("banana")), Some("banana"));
}
}