use crate::data_type::AsBytes;
use hashbrown::HashTable;
const DEFAULT_DEDUP_CAPACITY: usize = 4096;
pub trait Storage {
type Key: Copy;
type Value: AsBytes + ?Sized;
fn get(&self, idx: Self::Key) -> &Self::Value;
fn push(&mut self, value: &Self::Value) -> Self::Key;
#[allow(dead_code)] fn estimated_memory_size(&self) -> usize;
}
#[derive(Debug, Default)]
pub struct Interner<S: Storage> {
state: ahash::RandomState,
dedup: HashTable<S::Key>,
storage: S,
}
impl<S: Storage> Interner<S> {
pub fn new(storage: S) -> Self {
Self {
state: Default::default(),
dedup: HashTable::with_capacity(DEFAULT_DEDUP_CAPACITY),
storage,
}
}
pub fn intern(&mut self, value: &S::Value) -> S::Key {
let hash = self.state.hash_one(value.as_bytes());
*self
.dedup
.entry(
hash,
|index| value.as_bytes() == self.storage.get(*index).as_bytes(),
|key| self.state.hash_one(self.storage.get(*key).as_bytes()),
)
.or_insert_with(|| self.storage.push(value))
.get()
}
#[allow(dead_code)] pub fn estimated_memory_size(&self) -> usize {
self.storage.estimated_memory_size() +
self.dedup.capacity() + std::mem::size_of::<S::Key>()
}
pub fn storage(&self) -> &S {
&self.storage
}
#[cfg(feature = "arrow")]
pub fn into_inner(self) -> S {
self.storage
}
}