#![doc(html_root_url = "https://docs.rs/crate/tiny-interner/0.1.0")]
#![cfg_attr(not(feature = "std"), no_std)]
#![deny(missing_docs)]
#![warn(unsafe_op_in_unsafe_fn, clippy::redundant_closure_for_method_calls)]
use core::{
hash::{BuildHasher, Hash, Hasher},
marker::PhantomData,
str::from_utf8_unchecked,
};
extern crate alloc;
use self::alloc::{string::String, vec::Vec};
use hashbrown::{
hash_map::{DefaultHashBuilder, RawEntryMut},
HashMap,
};
pub type Symbol = usize;
#[derive(Debug)]
pub struct Interner<H = DefaultHashBuilder>
where
H: BuildHasher,
{
dedup: HashMap<usize, (), ()>,
hasher: H,
backend: Backend,
}
#[derive(Debug)]
pub struct Backend {
ends: Vec<usize>,
buffer: String,
marker: PhantomData<fn() -> usize>,
}
impl Default for Interner {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl Default for Backend {
#[inline]
fn default() -> Self {
Self {
ends: Vec::default(),
buffer: String::default(),
marker: Default::default(),
}
}
}
fn hash_value<T>(hasher: &impl BuildHasher, value: &T) -> u64
where
T: ?Sized + Hash,
{
let state = &mut hasher.build_hasher();
value.hash(state);
state.finish()
}
impl Backend {
#[inline]
fn with_capacity(capacity: usize) -> Self {
Self {
ends: Vec::with_capacity(capacity),
buffer: String::default(),
marker: Default::default(),
}
}
#[inline]
fn intern(&mut self, string: &str) -> usize {
self.push(string)
}
#[inline]
fn intern_static(&mut self, string: &'static str) -> usize {
self.intern(string)
}
#[inline]
fn resolve(&self, symbol: usize) -> Option<&str> {
self.span_of(symbol).map(|span| self.str_at(span))
}
#[inline]
unsafe fn unchecked_resolve(&self, symbol: usize) -> &str {
unsafe { self.str_at(self.unchecked_span_of(symbol)) }
}
fn shrink_to_fit(&mut self) {
self.ends.shrink_to_fit();
self.buffer.shrink_to_fit();
}
fn next_symbol(&self) -> usize {
self.ends.len()
}
fn span_of(&self, symbol: usize) -> Option<Span> {
self.ends.get(symbol).copied().map(|end| Span {
start: self.ends.get(symbol.wrapping_sub(1)).copied().unwrap_or(0),
end,
})
}
unsafe fn unchecked_span_of(&self, symbol: usize) -> Span {
let end = unsafe { *self.ends.get_unchecked(symbol) };
let start = self.ends.get(symbol.wrapping_sub(1)).copied().unwrap_or(0);
Span { start, end }
}
fn str_at(&self, span: Span) -> &str {
unsafe {
from_utf8_unchecked(&self.buffer.as_bytes()[(span.start as usize)..(span.end as usize)])
}
}
fn push(&mut self, string: &str) -> usize {
self.buffer.push_str(string);
let end = self.buffer.as_bytes().len();
let symbol = self.next_symbol();
self.ends.push(end);
symbol
}
}
impl<H> Interner<H>
where
H: BuildHasher + Default,
{
#[inline]
pub fn new() -> Self {
Self {
dedup: HashMap::default(),
hasher: Default::default(),
backend: Backend::default(),
}
}
#[inline]
pub fn with_hasher(hasher: H) -> Self {
Self {
dedup: HashMap::default(),
hasher,
backend: Backend::default(),
}
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {
dedup: HashMap::with_capacity_and_hasher(capacity, ()),
hasher: Default::default(),
backend: Backend::with_capacity(capacity),
}
}
#[inline]
pub fn with_capacity_and_hasher(capacity: usize, hasher: H) -> Self {
Self {
dedup: HashMap::with_capacity_and_hasher(capacity, ()),
hasher,
backend: Backend::with_capacity(capacity),
}
}
#[inline]
pub fn len(&self) -> usize {
self.dedup.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn get<T>(&self, string: T) -> Option<usize>
where
T: AsRef<str>,
{
let string_ref = string.as_ref();
let hasher = &self.hasher;
let hash = hash_value(hasher, string_ref);
self.dedup
.raw_entry()
.from_hash(hash, |symbol| {
string_ref == unsafe { self.backend.unchecked_resolve(*symbol) }
})
.map(|(&symbol, ())| symbol)
}
#[inline]
fn get_or_intern_using<T>(
&mut self,
string: T,
intern_fn: fn(&mut Backend, T) -> usize,
) -> usize
where
T: AsRef<str> + Copy + Hash + for<'a> PartialEq<&'a str>,
{
let string_ref = string.as_ref();
let hasher = &self.hasher;
let hash = hash_value(hasher, string_ref);
let entry = self.dedup.raw_entry_mut().from_hash(hash, |symbol| {
string_ref == unsafe { self.backend.unchecked_resolve(*symbol) }
});
let (&mut symbol, &mut ()) = match entry {
RawEntryMut::Vacant(vacant) => {
let symbol = intern_fn(&mut self.backend, string);
vacant.insert_with_hasher(hash, symbol, (), |symbol| {
hash_value(hasher, unsafe { self.backend.unchecked_resolve(*symbol) })
})
}
RawEntryMut::Occupied(occupied) => occupied.into_key_value(),
};
symbol
}
#[inline]
pub fn get_or_intern<T>(&mut self, string: T) -> usize
where
T: AsRef<str>,
{
self.get_or_intern_using(string.as_ref(), Backend::intern)
}
pub fn get_or_intern_static(&mut self, string: &'static str) -> usize {
self.get_or_intern_using(string.as_ref(), Backend::intern_static)
}
pub fn shrink_to_fit(&mut self) {
self.backend.shrink_to_fit();
}
#[inline]
pub fn resolve(&self, symbol: usize) -> Option<&str> {
self.backend.resolve(symbol)
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Span {
start: usize,
end: usize,
}