#![cfg_attr(all(feature = "bench", test), feature(test))]
#![doc(html_root_url = "https://docs.rs/crate/string-interner/0.7.1")]
#![deny(missing_docs)]
#[cfg(all(feature = "bench", test))]
extern crate test;
#[cfg(test)]
mod tests;
#[cfg(all(feature = "bench", test))]
mod benches;
#[cfg(feature = "serde_support")]
mod serde_impl;
use std::iter::FromIterator;
use std::{
collections::{hash_map::RandomState, HashMap},
hash::{BuildHasher, Hash, Hasher},
iter, marker,
num::NonZeroU32,
slice, u32, vec,
};
pub trait Symbol: Copy + Ord + Eq {
fn from_usize(val: usize) -> Self;
fn to_usize(self) -> usize;
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Sym(NonZeroU32);
impl Symbol for Sym {
fn from_usize(val: usize) -> Self {
assert!(
val < u32::MAX as usize,
"Symbol value {} is too large and not supported by `string_interner::Sym` type",
val
);
Sym(NonZeroU32::new((val + 1) as u32).unwrap_or_else(|| {
unreachable!("Should never fail because `val + 1` is nonzero and `<= u32::MAX`")
}))
}
fn to_usize(self) -> usize {
(self.0.get() as usize) - 1
}
}
impl Symbol for usize {
fn from_usize(val: usize) -> Self {
val
}
fn to_usize(self) -> usize {
self
}
}
#[derive(Debug, Copy, Clone, Eq)]
struct InternalStrRef(*const str);
impl InternalStrRef {
fn from_str(val: &str) -> Self {
InternalStrRef(val as *const str)
}
fn as_str(&self) -> &str {
unsafe { &*self.0 }
}
}
impl<T> From<T> for InternalStrRef
where
T: AsRef<str>,
{
fn from(val: T) -> Self {
InternalStrRef::from_str(val.as_ref())
}
}
impl Hash for InternalStrRef {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl PartialEq for InternalStrRef {
fn eq(&self, other: &InternalStrRef) -> bool {
self.as_str() == other.as_str()
}
}
pub type DefaultStringInterner = StringInterner<Sym>;
#[derive(Debug, Eq)]
pub struct StringInterner<S, H = RandomState>
where
S: Symbol,
H: BuildHasher,
{
map: HashMap<InternalStrRef, S, H>,
values: Vec<Box<str>>,
}
impl<S, H> PartialEq for StringInterner<S, H>
where
S: Symbol,
H: BuildHasher,
{
fn eq(&self, rhs: &Self) -> bool {
self.len() == rhs.len() && self.values == rhs.values
}
}
impl Default for StringInterner<Sym, RandomState> {
#[inline]
fn default() -> Self {
StringInterner::new()
}
}
impl<S, H> Clone for StringInterner<S, H>
where
S: Symbol,
H: Clone + BuildHasher,
{
fn clone(&self) -> Self {
let values = self.values.clone();
let mut map = HashMap::with_capacity_and_hasher(values.len(), self.map.hasher().clone());
map.extend(
values
.iter()
.enumerate()
.map(|(i, s)| (InternalStrRef::from_str(s), S::from_usize(i))),
);
Self { values, map }
}
}
unsafe impl<S, H> Send for StringInterner<S, H>
where
S: Symbol + Send,
H: BuildHasher,
{
}
unsafe impl<S, H> Sync for StringInterner<S, H>
where
S: Symbol + Sync,
H: BuildHasher,
{
}
impl<S> StringInterner<S>
where
S: Symbol,
{
#[inline]
pub fn new() -> StringInterner<S, RandomState> {
StringInterner {
map: HashMap::new(),
values: Vec::new(),
}
}
#[inline]
pub fn with_capacity(cap: usize) -> Self {
StringInterner {
map: HashMap::with_capacity(cap),
values: Vec::with_capacity(cap),
}
}
#[inline]
pub fn capacity(&self) -> usize {
std::cmp::min(self.map.capacity(), self.values.capacity())
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.map.reserve(additional);
self.values.reserve(additional);
}
}
impl<S, H> StringInterner<S, H>
where
S: Symbol,
H: BuildHasher,
{
#[inline]
pub fn with_hasher(hash_builder: H) -> StringInterner<S, H> {
StringInterner {
map: HashMap::with_hasher(hash_builder),
values: Vec::new(),
}
}
#[inline]
pub fn with_capacity_and_hasher(cap: usize, hash_builder: H) -> StringInterner<S, H> {
StringInterner {
map: HashMap::with_hasher(hash_builder),
values: Vec::with_capacity(cap),
}
}
#[inline]
pub fn get_or_intern<T>(&mut self, val: T) -> S
where
T: Into<String> + AsRef<str>,
{
match self.map.get(&val.as_ref().into()) {
Some(&sym) => sym,
None => self.intern(val),
}
}
fn intern<T>(&mut self, new_val: T) -> S
where
T: Into<String> + AsRef<str>,
{
let new_id: S = self.make_symbol();
let new_boxed_val = new_val.into().into_boxed_str();
let new_ref: InternalStrRef = new_boxed_val.as_ref().into();
self.values.push(new_boxed_val);
self.map.insert(new_ref, new_id);
new_id
}
fn make_symbol(&self) -> S {
S::from_usize(self.len())
}
#[inline]
pub fn resolve(&self, symbol: S) -> Option<&str> {
self.values
.get(symbol.to_usize())
.map(|boxed_str| boxed_str.as_ref())
}
#[inline]
pub unsafe fn resolve_unchecked(&self, symbol: S) -> &str {
self.values.get_unchecked(symbol.to_usize()).as_ref()
}
#[inline]
pub fn get<T>(&self, val: T) -> Option<S>
where
T: AsRef<str>,
{
self.map.get(&val.as_ref().into()).cloned()
}
#[inline]
pub fn len(&self) -> usize {
self.values.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn iter(&self) -> Iter<S> {
Iter::new(self)
}
#[inline]
pub fn iter_values(&self) -> Values<S> {
Values::new(self)
}
pub fn shrink_to_fit(&mut self) {
self.map.shrink_to_fit();
self.values.shrink_to_fit();
}
}
impl<T, S> FromIterator<T> for StringInterner<S>
where
S: Symbol,
T: Into<String> + AsRef<str>,
{
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = T>,
{
let iter = iter.into_iter();
let mut interner = StringInterner::with_capacity(iter.size_hint().0);
interner.extend(iter);
interner
}
}
impl<T, S> std::iter::Extend<T> for StringInterner<S>
where
S: Symbol,
T: Into<String> + AsRef<str>,
{
fn extend<I>(&mut self, iter: I)
where
I: IntoIterator<Item = T>,
{
for s in iter {
self.get_or_intern(s);
}
}
}
pub struct Iter<'a, S> {
iter: iter::Enumerate<slice::Iter<'a, Box<str>>>,
mark: marker::PhantomData<S>,
}
impl<'a, S> Iter<'a, S>
where
S: Symbol + 'a,
{
#[inline]
fn new<H>(interner: &'a StringInterner<S, H>) -> Self
where
H: BuildHasher,
{
Iter {
iter: interner.values.iter().enumerate(),
mark: marker::PhantomData,
}
}
}
impl<'a, S> Iterator for Iter<'a, S>
where
S: Symbol + 'a,
{
type Item = (S, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|(num, boxed_str)| (S::from_usize(num), boxed_str.as_ref()))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
pub struct Values<'a, S>
where
S: Symbol + 'a,
{
iter: slice::Iter<'a, Box<str>>,
mark: marker::PhantomData<S>,
}
impl<'a, S> Values<'a, S>
where
S: Symbol + 'a,
{
#[inline]
fn new<H>(interner: &'a StringInterner<S, H>) -> Self
where
H: BuildHasher,
{
Values {
iter: interner.values.iter(),
mark: marker::PhantomData,
}
}
}
impl<'a, S> Iterator for Values<'a, S>
where
S: Symbol + 'a,
{
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|boxed_str| boxed_str.as_ref())
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<S, H> iter::IntoIterator for StringInterner<S, H>
where
S: Symbol,
H: BuildHasher,
{
type Item = (S, String);
type IntoIter = IntoIter<S>;
fn into_iter(self) -> Self::IntoIter {
IntoIter {
iter: self.values.into_iter().enumerate(),
mark: marker::PhantomData,
}
}
}
pub struct IntoIter<S>
where
S: Symbol,
{
iter: iter::Enumerate<vec::IntoIter<Box<str>>>,
mark: marker::PhantomData<S>,
}
impl<S> Iterator for IntoIter<S>
where
S: Symbol,
{
type Item = (S, String);
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|(num, boxed_str)| (S::from_usize(num), boxed_str.into_string()))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}