use alloc::string::String;
use alloc::vec::Vec;
use hashbrown::HashMap;
use crate::error::{Error, Result};
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Symbol(u32);
impl Symbol {
#[inline]
pub const fn id(self) -> u32 {
self.0
}
}
pub struct Interner {
storage: Vec<String>,
lookup: HashMap<String, u32>,
}
impl Interner {
#[inline]
#[must_use]
pub fn new() -> Self {
Self {
storage: Vec::new(),
lookup: HashMap::new(),
}
}
#[inline]
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
storage: Vec::with_capacity(capacity),
lookup: HashMap::with_capacity(capacity),
}
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.storage.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.storage.is_empty()
}
pub fn intern(&mut self, s: &str) -> Symbol {
match self.try_intern(s) {
Ok(sym) => sym,
Err(_) => panic!("interner symbol counter overflow (u32::MAX symbols)"),
}
}
pub fn try_intern(&mut self, s: &str) -> Result<Symbol> {
if let Some(&id) = self.lookup.get(s) {
return Ok(Symbol(id));
}
let id_usize = self.storage.len();
if id_usize > u32::MAX as usize {
return Err(Error::CounterOverflow);
}
let id = id_usize as u32;
let owned = String::from(s);
self.storage.push(owned.clone());
let _ = self.lookup.insert(owned, id);
Ok(Symbol(id))
}
#[inline]
#[must_use]
pub fn resolve(&self, symbol: Symbol) -> Option<&str> {
self.storage.get(symbol.0 as usize).map(String::as_str)
}
#[inline]
#[must_use]
pub fn contains(&self, s: &str) -> bool {
self.lookup.contains_key(s)
}
#[inline]
#[must_use]
pub fn lookup(&self, s: &str) -> Option<Symbol> {
self.lookup.get(s).copied().map(Symbol)
}
pub fn iter(&self) -> Iter<'_> {
Iter {
inner: self.storage.iter().enumerate(),
}
}
}
impl Default for Interner {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl core::fmt::Debug for Interner {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Interner")
.field("len", &self.storage.len())
.finish()
}
}
pub struct Iter<'a> {
inner: core::iter::Enumerate<core::slice::Iter<'a, String>>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (Symbol, &'a str);
fn next(&mut self) -> Option<Self::Item> {
let (i, s) = self.inner.next()?;
Some((Symbol(i as u32), s.as_str()))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn same_string_returns_same_symbol() {
let mut i = Interner::new();
let a = i.intern("hello");
let b = i.intern("hello");
assert_eq!(a, b);
assert_eq!(i.len(), 1);
}
#[test]
fn distinct_strings_return_distinct_symbols() {
let mut i = Interner::new();
let a = i.intern("alpha");
let b = i.intern("bravo");
assert_ne!(a, b);
}
#[test]
fn resolve_round_trips() {
let mut i = Interner::new();
let s = i.intern("round-trip");
assert_eq!(i.resolve(s), Some("round-trip"));
}
#[test]
fn lookup_does_not_insert() {
let mut i = Interner::new();
let _ = i.intern("first");
assert!(i.lookup("second").is_none());
assert_eq!(i.len(), 1);
}
#[test]
fn contains_reflects_state() {
let mut i = Interner::new();
assert!(!i.contains("x"));
let _ = i.intern("x");
assert!(i.contains("x"));
}
#[test]
fn iter_yields_insertion_order() {
let mut i = Interner::new();
let _ = i.intern("a");
let _ = i.intern("b");
let _ = i.intern("c");
let collected: Vec<&str> = i.iter().map(|(_, s)| s).collect();
assert_eq!(collected, vec!["a", "b", "c"]);
}
}