use fxhash::{FxBuildHasher, FxHashMap};
#[derive(Default)]
pub struct Intern<'a> {
data: FxHashMap<&'a str, InternId>,
list: Vec<Box<str>>,
}
pub type InternId = u32;
impl Intern<'_> {
pub fn new() -> Self {
Self {
data: FxHashMap::default(),
list: Vec::new(),
}
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
data: FxHashMap::with_capacity_and_hasher(capacity, FxBuildHasher::default()),
list: Vec::with_capacity(capacity),
}
}
#[inline]
pub fn intern<V: Into<String> + AsRef<str>>(&mut self, input: V) -> InternId {
if let Some(&id) = self.data.get(input.as_ref()) {
return id;
}
let owned = input.into().into_boxed_str();
let str_data = owned.as_ptr();
let str_len = owned.len();
let id = self.list.len() as InternId;
self.list.push(owned);
let k =
unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(str_data, str_len)) };
self.data.insert(k, id);
id
}
#[inline]
pub fn lookup(&self, id: InternId) -> &str {
&self.list[id as usize]
}
#[inline]
pub fn try_lookup(&self, id: InternId) -> Option<&str> {
self.list.get(id as usize).map(|s| &**s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn interns_and_handles_lookup() {
let mut interner = Intern::new();
let id = interner.intern("hello");
assert_eq!(interner.lookup(id), "hello");
assert_eq!(interner.try_lookup(id), Some("hello"));
}
#[test]
fn reallocate() {
let mut interner = Intern::with_capacity(1);
let id1 = interner.intern("hello");
let id2 = interner.intern("world");
assert_eq!(interner.lookup(id1), "hello");
assert_eq!(interner.try_lookup(id1), Some("hello"));
assert_eq!(interner.lookup(id2), "world");
assert_eq!(interner.try_lookup(id2), Some("world"));
}
}