use std::sync::{Arc, LazyLock};
use dashmap::DashMap;
static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::new);
pub struct Interner {
strings: DashMap<Arc<str>, ()>,
}
impl Interner {
pub fn new() -> Self {
Self {
strings: DashMap::new(),
}
}
pub fn intern(&self, s: &str) -> Arc<str> {
if let Some(existing) = self.strings.get(s) {
return Arc::clone(existing.key());
}
let key: Arc<str> = Arc::from(s);
use dashmap::mapref::entry::Entry;
match self.strings.entry(Arc::clone(&key)) {
Entry::Occupied(e) => Arc::clone(e.key()),
Entry::Vacant(e) => {
e.insert(());
key
}
}
}
#[inline]
#[allow(dead_code)] pub fn intern_arc(&self, s: Arc<str>) -> Arc<str> {
if let Some(existing) = self.strings.get(&s) {
return Arc::clone(existing.key());
}
self.strings.insert(Arc::clone(&s), ());
s
}
#[allow(dead_code)] pub fn len(&self) -> usize {
self.strings.len()
}
#[allow(dead_code)] pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
}
impl Default for Interner {
fn default() -> Self {
Self::new()
}
}
#[inline]
pub fn intern(s: &str) -> Arc<str> {
INTERNER.intern(s)
}
#[inline]
#[allow(dead_code)] pub fn intern_arc(s: Arc<str>) -> Arc<str> {
INTERNER.intern_arc(s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn intern_returns_same_arc_for_same_string() {
let interner = Interner::new();
let a1 = interner.intern("task_a");
let a2 = interner.intern("task_a");
assert!(Arc::ptr_eq(&a1, &a2));
}
#[test]
fn intern_different_strings_different_arcs() {
let interner = Interner::new();
let a = interner.intern("task_a");
let b = interner.intern("task_b");
assert!(!Arc::ptr_eq(&a, &b));
assert_eq!(interner.len(), 2);
}
#[test]
fn intern_arc_reuses_existing() {
let interner = Interner::new();
let a1 = interner.intern("task_a");
let a2 = interner.intern_arc(Arc::from("task_a"));
assert!(Arc::ptr_eq(&a1, &a2));
}
#[test]
fn global_intern_works() {
let a1 = intern("global_test");
let a2 = intern("global_test");
assert!(Arc::ptr_eq(&a1, &a2));
}
#[test]
fn concurrent_intern_is_safe() {
use std::thread;
let interner = Arc::new(Interner::new());
let mut handles = vec![];
for i in 0..10 {
let interner = Arc::clone(&interner);
handles.push(thread::spawn(move || {
for j in 0..100 {
interner.intern(&format!("task_{}_{}", i, j));
}
}));
}
for h in handles {
h.join().unwrap();
}
assert_eq!(interner.len(), 1000);
}
#[test]
fn concurrent_same_string_returns_same_arc() {
use std::sync::mpsc;
use std::thread;
let interner = Arc::new(Interner::new());
let (tx, rx) = mpsc::channel();
for _ in 0..10 {
let interner = Arc::clone(&interner);
let tx = tx.clone();
thread::spawn(move || {
let result = interner.intern("shared_task");
tx.send(result).unwrap();
});
}
drop(tx);
let results: Vec<Arc<str>> = rx.iter().collect();
assert_eq!(results.len(), 10);
let first = &results[0];
for result in &results[1..] {
assert!(Arc::ptr_eq(first, result));
}
}
}