use rustc_hash::FxHashMap;
use std::sync::{OnceLock, RwLock};
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct SymId(pub(crate) u32);
pub const NIL_SYM_ID: SymId = SymId(0);
pub const T_SYM_ID: SymId = SymId(1);
pub struct StringInterner {
strings: Vec<&'static str>,
map: FxHashMap<&'static str, u32>,
canonical: Vec<bool>,
}
impl Default for StringInterner {
fn default() -> Self {
Self::new()
}
}
impl StringInterner {
pub fn new() -> Self {
let mut interner = Self {
strings: Vec::new(),
map: FxHashMap::default(),
canonical: Vec::new(),
};
let nil_id = interner.intern("nil");
debug_assert_eq!(nil_id, NIL_SYM_ID);
let t_id = interner.intern("t");
debug_assert_eq!(t_id, T_SYM_ID);
interner
}
pub fn intern(&mut self, s: &str) -> SymId {
if let Some(&idx) = self.map.get(s) {
return SymId(idx);
}
let idx = self.strings.len() as u32;
let leaked = Box::leak(s.to_owned().into_boxed_str()) as &'static str;
self.strings.push(leaked);
self.map.insert(leaked, idx);
self.canonical.push(true);
SymId(idx)
}
pub fn intern_uninterned(&mut self, s: &str) -> SymId {
let idx = self.strings.len() as u32;
let leaked = Box::leak(s.to_owned().into_boxed_str()) as &'static str;
self.strings.push(leaked);
self.canonical.push(false);
SymId(idx)
}
pub fn lookup(&self, s: &str) -> Option<SymId> {
self.map.get(s).copied().map(SymId)
}
#[inline]
pub fn is_canonical_id(&self, id: SymId) -> bool {
self.canonical.get(id.0 as usize).copied().unwrap_or(false)
}
#[inline]
pub fn resolve(&self, id: SymId) -> &'static str {
self.strings[id.0 as usize]
}
pub(crate) fn strings(&self) -> &[&'static str] {
&self.strings
}
pub(crate) fn from_strings(strings: Vec<String>) -> Self {
let mut interner = Self {
strings: Vec::with_capacity(strings.len()),
map: FxHashMap::with_capacity_and_hasher(strings.len(), Default::default()),
canonical: Vec::with_capacity(strings.len()),
};
for s in strings {
interner.push_preserving_slot(s);
}
interner
}
pub(crate) fn ensure_from_strings(&mut self, strings: &[String]) {
for (idx, expected) in strings.iter().enumerate() {
if let Some(existing) = self.strings.get(idx) {
assert_eq!(
*existing,
expected.as_str(),
"global interner slot {idx} diverged from dump state"
);
continue;
}
let inserted = self.push_preserving_slot(expected.clone());
debug_assert_eq!(inserted.0 as usize, idx);
}
}
fn push_preserving_slot(&mut self, s: String) -> SymId {
let idx = self.strings.len() as u32;
let leaked = Box::leak(s.into_boxed_str()) as &'static str;
self.strings.push(leaked);
let canonical = match self.map.entry(leaked) {
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(idx);
true
}
std::collections::hash_map::Entry::Occupied(_) => false,
};
self.canonical.push(canonical);
SymId(idx)
}
}
fn global_interner() -> &'static RwLock<StringInterner> {
static GLOBAL_INTERNER: OnceLock<RwLock<StringInterner>> = OnceLock::new();
GLOBAL_INTERNER.get_or_init(|| RwLock::new(StringInterner::new()))
}
pub(crate) fn dump_runtime_interner() -> StringInterner {
let interner = global_interner()
.read()
.expect("global interner poisoned during dump");
StringInterner::from_strings(interner.strings().iter().map(|s| (*s).to_owned()).collect())
}
pub(crate) fn ensure_runtime_interner(strings: &[String]) {
let mut interner = global_interner()
.write()
.expect("global interner poisoned during restore");
interner.ensure_from_strings(strings);
}
#[inline]
pub fn intern(s: &str) -> SymId {
let mut interner = global_interner()
.write()
.expect("global interner poisoned during intern");
interner.intern(s)
}
#[inline]
pub fn intern_uninterned(s: &str) -> SymId {
let mut interner = global_interner()
.write()
.expect("global interner poisoned during uninterned symbol creation");
interner.intern_uninterned(s)
}
#[inline]
pub fn lookup_interned(s: &str) -> Option<SymId> {
let interner = global_interner()
.read()
.expect("global interner poisoned during lookup");
interner.lookup(s)
}
#[inline]
pub fn is_canonical_id(id: SymId) -> bool {
let interner = global_interner()
.read()
.expect("global interner poisoned during canonical-id lookup");
interner.is_canonical_id(id)
}
#[inline]
pub fn resolve_sym_metadata(id: SymId) -> (&'static str, bool) {
let interner = global_interner()
.read()
.expect("global interner poisoned during metadata lookup");
(interner.resolve(id), interner.is_canonical_id(id))
}
#[inline]
pub fn resolve_sym(id: SymId) -> &'static str {
let interner = global_interner()
.read()
.expect("global interner poisoned during resolve");
interner.resolve(id)
}
#[inline]
pub fn try_resolve_sym(id: SymId) -> Option<&'static str> {
let interner = global_interner()
.read()
.expect("global interner poisoned during resolve");
interner.strings().get(id.0 as usize).copied()
}
#[cfg(test)]
#[path = "intern_test.rs"]
mod tests;