use alloc::boxed::Box;
use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec::Vec;
use crate::wtf8;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct Atom(u32);
impl Atom {
#[must_use]
pub const fn index(self) -> u32 {
self.0
}
#[must_use]
pub const fn from_index(index: u32) -> Self {
Self(index)
}
}
#[derive(Default)]
pub struct AtomTable {
strings: Vec<Box<[u8]>>,
lookup: BTreeMap<Box<[u8]>, Atom>,
}
impl AtomTable {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn intern(&mut self, s: &str) -> Atom {
self.intern_bytes(s.as_bytes())
}
pub fn intern_bytes(&mut self, bytes: &[u8]) -> Atom {
if let Some(&atom) = self.lookup.get(bytes) {
return atom;
}
let atom = Atom(self.strings.len() as u32);
let boxed: Box<[u8]> = Box::from(bytes);
self.strings.push(boxed.clone());
self.lookup.insert(boxed, atom);
atom
}
#[must_use]
pub fn get(&self, s: &str) -> Option<Atom> {
self.get_bytes(s.as_bytes())
}
#[must_use]
pub fn get_bytes(&self, bytes: &[u8]) -> Option<Atom> {
self.lookup.get(bytes).copied()
}
#[must_use]
pub fn resolve(&self, atom: Atom) -> Option<&str> {
self.resolve_bytes(atom).and_then(wtf8::as_str)
}
#[must_use]
pub fn resolve_bytes(&self, atom: Atom) -> Option<&[u8]> {
self.strings.get(atom.0 as usize).map(AsRef::as_ref)
}
#[must_use]
pub fn resolve_lossy(&self, atom: Atom) -> Option<String> {
self.resolve_bytes(atom).map(wtf8::to_string_lossy)
}
#[must_use]
pub fn len(&self) -> usize {
self.strings.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn intern_deduplicates() {
let mut t = AtomTable::new();
let a = t.intern("hello");
let b = t.intern("hello");
let c = t.intern("world");
assert_eq!(a, b); assert_ne!(a, c); assert_eq!(t.len(), 2); }
#[test]
fn atom_equality_is_string_equality() {
let mut t = AtomTable::new();
let keys = ["x", "y", "length", "x", "constructor", "y"];
let atoms: Vec<Atom> = keys.iter().map(|k| t.intern(k)).collect();
for (i, ki) in keys.iter().enumerate() {
for (j, kj) in keys.iter().enumerate() {
assert_eq!(atoms[i] == atoms[j], ki == kj);
}
}
assert_eq!(t.len(), 4); }
#[test]
fn resolve_round_trips() {
let mut t = AtomTable::new();
let a = t.intern("foo");
let b = t.intern("bar");
assert_eq!(t.resolve(a), Some("foo"));
assert_eq!(t.resolve(b), Some("bar"));
assert_eq!(t.resolve(Atom::from_index(999)), None);
}
#[test]
fn get_does_not_intern() {
let mut t = AtomTable::new();
assert_eq!(t.get("nope"), None);
let a = t.intern("yep");
assert_eq!(t.get("yep"), Some(a));
assert_eq!(t.get("nope"), None);
assert_eq!(t.len(), 1);
}
#[test]
fn index_round_trips() {
let mut t = AtomTable::new();
let a = t.intern("z");
assert_eq!(Atom::from_index(a.index()), a);
assert_eq!(t.resolve(Atom::from_index(a.index())), Some("z"));
}
#[test]
fn surrogate_keys_round_trip_via_bytes() {
let mut t = AtomTable::new();
let key = crate::wtf8::from_utf16(&[0x6B, 0xD800]); let a = t.intern_bytes(&key);
assert_eq!(t.intern_bytes(&key), a);
assert_eq!(t.get_bytes(&key), Some(a));
assert_eq!(t.resolve_bytes(a), Some(key.as_slice()));
assert_eq!(t.resolve(a), None);
assert_eq!(t.resolve_lossy(a).as_deref(), Some("k\u{FFFD}"));
let b = t.intern("plain");
assert_eq!(t.resolve(b), Some("plain"));
let c = t.intern("k");
assert_ne!(a, c);
}
#[test]
fn intern_str_and_bytes_agree() {
let mut t = AtomTable::new();
let a = t.intern("hi");
let b = t.intern_bytes("hi".as_bytes());
assert_eq!(a, b);
assert_eq!(t.get("hi"), Some(a));
assert_eq!(t.get_bytes("hi".as_bytes()), Some(a));
}
#[test]
fn atoms_are_assigned_in_intern_order() {
let mut t = AtomTable::new();
assert_eq!(t.intern("a").index(), 0);
assert_eq!(t.intern("b").index(), 1);
assert_eq!(t.intern("a").index(), 0); assert_eq!(t.intern("c").index(), 2);
}
}