use std::collections::HashMap;
use std::slice::Iter;
pub type Symbol = i32;
#[derive(Clone, Default)]
pub struct SymbolTable {
table: HashMap<String, Symbol>,
symbols: Vec<String>,
}
impl SymbolTable {
pub fn new() -> Self {
SymbolTable {
table: HashMap::new(),
symbols: Vec::new(),
}
}
pub fn resolve(&self, string: impl AsRef<str>) -> Option<Symbol> {
self.table.get(string.as_ref()).copied()
}
pub fn find_or_create(&mut self, string: impl AsRef<str>) -> anyhow::Result<Symbol> {
let value = string.as_ref();
if let Some(symbol) = self.table.get(value) {
Ok(*symbol)
} else if self.symbols.len() >= i32::MAX as usize {
Err(anyhow::anyhow!("Symbol table overflow!"))
} else {
let new_symbol = (self.symbols.len() + 1) as i32;
let _ = self.table.insert(value.to_owned(), new_symbol);
self.symbols.push(value.to_owned());
Ok(new_symbol)
}
}
pub fn lookup(&self, symbol: Symbol) -> &str {
self.symbols
.get((symbol - 1) as usize)
.map(|string| string.as_str())
.unwrap_or("")
}
pub fn len(&self) -> usize {
self.symbols.len()
}
pub fn is_empty(&self) -> bool {
self.symbols.is_empty()
}
pub fn allocated_size(&self) -> usize {
let table_size = self.table.capacity() * 11 / 10
* (std::mem::size_of::<usize>() + std::mem::size_of::<String>() + std::mem::size_of::<Symbol>());
let lookup_size = self.symbols.capacity() * std::mem::size_of::<String>();
let content_size: usize = self.symbols.iter().map(|string| string.len()).sum();
table_size + lookup_size + content_size
}
}
#[derive(Debug, Eq, PartialEq, Default)]
pub struct SymbolMap<V> {
entries: Vec<(Symbol, V)>,
}
impl<V: Default> SymbolMap<V> {
pub fn new() -> Self {
SymbolMap {
entries: Vec::with_capacity(2),
}
}
pub fn get(&self, key: Symbol) -> Option<&V> {
if let Ok(pos) = self.entries.binary_search_by_key(&key, |entry| entry.0) {
Some(&self.entries[pos].1)
} else {
None
}
}
pub fn get_or_insert(&mut self, key: Symbol, mut factory: impl FnMut() -> V) -> &mut V {
match self.entries.binary_search_by_key(&key, |entry| entry.0) {
Ok(pos) => &mut self.entries[pos].1,
Err(pos) => {
self.entries.insert(pos, (key, factory()));
&mut self.entries[pos].1
}
}
}
pub fn get_mut(&mut self, key: Symbol) -> Option<&mut V> {
if let Ok(pos) = self.entries.binary_search_by_key(&key, |entry| entry.0) {
Some(&mut self.entries[pos].1)
} else {
None
}
}
pub fn put(&mut self, key: Symbol, value: V) {
match self.entries.binary_search_by_key(&key, |entry| entry.0) {
Ok(pos) => self.entries[pos].1 = value,
Err(pos) => self.entries.insert(pos, (key, value)),
}
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn capacity(&self) -> usize {
self.entries.capacity()
}
pub fn entries(&'_ self) -> Iter<'_, (Symbol, V)> {
self.entries.iter()
}
}
#[cfg(test)]
mod tests {
use crate::ig::symbols::{SymbolMap, SymbolTable};
#[test]
pub fn resolve_and_lookup_works() {
let mut table = SymbolTable::new();
let symbol = table.find_or_create("Hello").unwrap();
assert_eq!(table.find_or_create("Hello").unwrap(), symbol);
assert_eq!(table.resolve("Hello").unwrap(), symbol);
assert_eq!(table.lookup(symbol), "Hello");
assert_eq!(table.lookup(-1), "");
assert_eq!(table.resolve("World").is_none(), true);
assert_eq!(table.len(), 1);
}
#[test]
pub fn get_and_put_work_for_symbol_map() {
let mut map = SymbolMap::new();
for symbol in (1..128).rev() {
map.put(symbol, symbol);
}
for symbol in 1..128 {
assert_eq!(*map.get(symbol).unwrap(), symbol);
assert_eq!(*map.get_mut(symbol).unwrap(), symbol);
}
assert_eq!(map.get(130).is_none(), true);
assert_eq!(map.get_mut(130).is_none(), true);
map.put(1, 42);
assert_eq!(*map.get(1).unwrap(), 42);
assert_eq!(*map.get_mut(1).unwrap(), 42);
for symbol in 2..128 {
assert_eq!(*map.get(symbol).unwrap(), symbol);
assert_eq!(*map.get_mut(symbol).unwrap(), symbol);
}
assert_eq!(map.len(), 127);
assert_eq!(
map.entries().map(|(symbol, _)| symbol).sum::<i32>(),
127 * 128 / 2
);
}
#[test]
pub fn get_or_insert_works() {
let mut map = SymbolMap::new();
assert_eq!(map.get_or_insert(42, || 0), &0);
map.put(42, 1);
assert_eq!(map.get_or_insert(42, || 1), &1);
*map.get_or_insert(23, || 0) = 5;
assert_eq!(map.get_or_insert(23, || 0), &5);
}
}