use std::cell::RefCell;
use std::fmt;
use std::rc::Rc;
use rustc_hash::FxHashMap;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Symbol(u32);
impl Symbol {
#[must_use]
pub fn index(self) -> u32 {
self.0
}
}
impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Symbol({})", self.0)
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "#{}", self.0)
}
}
#[derive(Clone, Default)]
pub struct Interner {
map: FxHashMap<Rc<str>, Symbol>,
strings: Vec<Rc<str>>,
}
impl Interner {
#[must_use]
pub fn new() -> Self {
Self {
map: FxHashMap::default(),
strings: Vec::new(),
}
}
#[must_use]
pub fn with_capacity(cap: usize) -> Self {
Self {
map: FxHashMap::with_capacity_and_hasher(cap, rustc_hash::FxBuildHasher::default()),
strings: Vec::with_capacity(cap),
}
}
pub fn intern(&mut self, s: &str) -> Symbol {
if let Some(&sym) = self.map.get(s) {
return sym;
}
let rc: Rc<str> = Rc::from(s);
let sym = Symbol(u32::try_from(self.strings.len()).expect("interner overflow"));
self.strings.push(Rc::clone(&rc));
self.map.insert(rc, sym);
sym
}
#[must_use]
pub fn resolve(&self, sym: Symbol) -> &str {
&self.strings[sym.0 as usize]
}
#[must_use]
pub fn resolve_rc(&self, sym: Symbol) -> Rc<str> {
Rc::clone(&self.strings[sym.0 as usize])
}
#[must_use]
pub fn try_resolve(&self, sym: Symbol) -> Option<&str> {
self.strings.get(sym.0 as usize).map(AsRef::as_ref)
}
#[must_use]
pub fn lookup(&self, s: &str) -> Option<Symbol> {
self.map.get(s).copied()
}
#[must_use]
pub fn len(&self) -> usize {
self.strings.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
}
impl fmt::Debug for Interner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Interner({} strings)", self.strings.len())
}
}
thread_local! {
static GLOBAL_INTERNER: RefCell<Interner> = RefCell::new(Interner::with_capacity(512));
}
pub fn intern(s: &str) -> Symbol {
GLOBAL_INTERNER.with(|i| i.borrow_mut().intern(s))
}
#[must_use]
pub fn resolve(sym: Symbol) -> String {
GLOBAL_INTERNER.with(|i| i.borrow().resolve(sym).to_string())
}
#[must_use]
pub fn resolve_rc(sym: Symbol) -> Rc<str> {
GLOBAL_INTERNER.with(|i| i.borrow().resolve_rc(sym))
}
pub fn with_resolved<F, R>(sym: Symbol, f: F) -> R
where
F: FnOnce(&str) -> R,
{
GLOBAL_INTERNER.with(|i| f(i.borrow().resolve(sym)))
}
#[must_use]
pub fn lookup(s: &str) -> Option<Symbol> {
GLOBAL_INTERNER.with(|i| i.borrow().lookup(s))
}
pub fn prewarm() {
GLOBAL_INTERNER.with(|i| {
let mut guard = i.borrow_mut();
for s in HOT_SYMBOLS {
guard.intern(s);
}
});
}
const HOT_SYMBOLS: &[&str] = &[
"",
"name",
"pname",
"version",
"src",
"system",
"builder",
"args",
"outputs",
"out",
"dev",
"bin",
"man",
"doc",
"outputHash",
"outputHashAlgo",
"outputHashMode",
"passAsFile",
"preferLocalBuild",
"allowSubstitutes",
"buildInputs",
"nativeBuildInputs",
"propagatedBuildInputs",
"propagatedNativeBuildInputs",
"checkInputs",
"nativeCheckInputs",
"buildPhase",
"installPhase",
"configurePhase",
"patchPhase",
"unpackPhase",
"config",
"options",
"imports",
"_module",
"mkOption",
"mkDefault",
"mkForce",
"mkIf",
"mkMerge",
"mkOverride",
"type",
"default",
"description",
"example",
"visible",
"internal",
"readOnly",
"inputs",
"url",
"flake",
"follows",
"packages",
"devShells",
"apps",
"overlays",
"nixosModules",
"nixosConfigurations",
"darwinModules",
"darwinConfigurations",
"homeModules",
"homeConfigurations",
"templates",
"checks",
"formatter",
"legacyPackages",
"nixpkgs",
"pkgs",
"stdenv",
"lib",
"hostPlatform",
"buildPlatform",
"targetPlatform",
"isDarwin",
"isLinux",
"x86_64-linux",
"aarch64-linux",
"x86_64-darwin",
"aarch64-darwin",
"meta",
"platforms",
"homepage",
"license",
"maintainers",
"mainProgram",
"available",
"broken",
"insecure",
"unsupported",
"recurseIntoAttrs",
"__functor",
"__toString",
"__ignoreNulls",
"outPath",
"drvPath",
"attrs",
"outputName",
"true",
"false",
"null",
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn intern_returns_same_symbol() {
let mut interner = Interner::new();
let s1 = interner.intern("hello");
let s2 = interner.intern("hello");
assert_eq!(s1, s2);
}
#[test]
fn different_strings_different_symbols() {
let mut interner = Interner::new();
let s1 = interner.intern("hello");
let s2 = interner.intern("world");
assert_ne!(s1, s2);
}
#[test]
fn resolve_roundtrip() {
let mut interner = Interner::new();
let sym = interner.intern("foo");
assert_eq!(interner.resolve(sym), "foo");
}
#[test]
fn resolve_rc_shares_allocation() {
let mut interner = Interner::new();
let sym = interner.intern("shared");
let a = interner.resolve_rc(sym);
let b = interner.resolve_rc(sym);
assert_eq!(&*a, "shared");
assert_eq!(&*b, "shared");
assert!(Rc::ptr_eq(&a, &b));
}
#[test]
fn lookup_existing() {
let mut interner = Interner::new();
let sym = interner.intern("bar");
assert_eq!(interner.lookup("bar"), Some(sym));
}
#[test]
fn lookup_missing() {
let interner = Interner::new();
assert_eq!(interner.lookup("missing"), None);
}
#[test]
fn len_and_empty() {
let mut interner = Interner::new();
assert!(interner.is_empty());
assert_eq!(interner.len(), 0);
interner.intern("a");
interner.intern("b");
interner.intern("a"); assert_eq!(interner.len(), 2);
assert!(!interner.is_empty());
}
#[test]
fn symbol_ordering() {
let mut interner = Interner::new();
let s1 = interner.intern("alpha");
let s2 = interner.intern("beta");
assert!(s1 < s2);
}
#[test]
fn try_resolve_valid() {
let mut interner = Interner::new();
let sym = interner.intern("test");
assert_eq!(interner.try_resolve(sym), Some("test"));
}
#[test]
fn try_resolve_invalid() {
let interner = Interner::new();
assert_eq!(interner.try_resolve(Symbol(999)), None);
}
#[test]
fn clone_interner() {
let mut interner = Interner::new();
let s1 = interner.intern("hello");
let cloned = interner.clone();
assert_eq!(cloned.resolve(s1), "hello");
assert_eq!(cloned.len(), 1);
}
#[test]
fn with_capacity_preallocates() {
let interner = Interner::with_capacity(256);
assert!(interner.is_empty());
assert_eq!(interner.len(), 0);
}
#[test]
fn thread_local_intern_resolve() {
std::thread::spawn(|| {
let sym = intern("thread_local_test");
let resolved = resolve(sym);
assert_eq!(resolved, "thread_local_test");
})
.join()
.unwrap();
}
#[test]
fn thread_local_intern_dedup() {
std::thread::spawn(|| {
let s1 = intern("dedup");
let s2 = intern("dedup");
assert_eq!(s1, s2);
})
.join()
.unwrap();
}
#[test]
fn thread_local_resolve_rc_zero_copy() {
std::thread::spawn(|| {
let sym = intern("tl_rc");
let a = resolve_rc(sym);
let b = resolve_rc(sym);
assert!(Rc::ptr_eq(&a, &b));
})
.join()
.unwrap();
}
#[test]
fn thread_local_with_resolved_no_alloc() {
std::thread::spawn(|| {
let sym = intern("borrowed");
let len = with_resolved(sym, str::len);
assert_eq!(len, "borrowed".len());
})
.join()
.unwrap();
}
#[test]
fn thread_local_lookup() {
std::thread::spawn(|| {
assert_eq!(lookup("never_interned_here"), None);
let sym = intern("findme");
assert_eq!(lookup("findme"), Some(sym));
})
.join()
.unwrap();
}
#[test]
fn prewarm_populates_hot_set() {
std::thread::spawn(|| {
prewarm();
for &s in HOT_SYMBOLS {
assert!(
lookup(s).is_some(),
"prewarm should have interned {s:?}"
);
}
let before = HOT_SYMBOLS.len();
prewarm();
let sym_first = lookup("name").expect("name interned by prewarm");
assert!(sym_first.index() < u32::try_from(before).unwrap());
})
.join()
.unwrap();
}
#[test]
fn hot_symbols_unique() {
let mut seen = std::collections::HashSet::new();
for &s in HOT_SYMBOLS {
assert!(seen.insert(s), "HOT_SYMBOLS contains duplicate {s:?}");
}
}
}