#![forbid(
missing_docs,
unsafe_op_in_unsafe_fn,
clippy::missing_safety_doc,
clippy::multiple_unsafe_ops_per_block,
clippy::undocumented_unsafe_blocks
)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[cfg(feature = "delta")]
mod delta;
mod mapping;
mod slice;
mod str;
use appendvec::AppendVec;
use dashtable::DashTable;
#[cfg(feature = "delta")]
pub use delta::{Accumulator, DeltaEncoding};
#[cfg(feature = "get-size2")]
use get_size2::{GetSize, GetSizeTracker};
use hashbrown::DefaultHashBuilder;
pub use mapping::{ForwardMapping, Mapping, ReverseMapping};
#[cfg(feature = "retain")]
pub use mapping::{RetainBuilder, RetainSliceBuilder, RetainStrBuilder};
#[cfg(feature = "serde")]
use serde::de::{SeqAccess, Visitor};
#[cfg(feature = "serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use slice::CopyRangeU32;
pub use slice::{ArenaSlice, InternedSlice};
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::hash::{BuildHasher, Hash, Hasher};
use std::marker::PhantomData;
#[cfg(feature = "get-size2")]
use std::mem::size_of;
#[cfg(feature = "debug")]
use std::sync::atomic::{self, AtomicUsize};
pub use str::{ArenaStr, InternedStr};
pub struct Interned<T: ?Sized, Storage = T> {
id: u32,
_phantom: PhantomData<fn() -> (*const T, *const Storage)>,
}
impl<T: ?Sized, Storage> Default for Interned<T, Storage> {
fn default() -> Self {
Self::new(u32::MAX)
}
}
impl<T: ?Sized, Storage> Debug for Interned<T, Storage> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("I").field(&self.id).finish()
}
}
impl<T: ?Sized, Storage> Clone for Interned<T, Storage> {
fn clone(&self) -> Self {
*self
}
}
impl<T: ?Sized, Storage> Copy for Interned<T, Storage> {}
impl<T: ?Sized, Storage> PartialEq for Interned<T, Storage> {
fn eq(&self, other: &Self) -> bool {
self.id.eq(&other.id)
}
}
impl<T: ?Sized, Storage> Eq for Interned<T, Storage> {}
impl<T: ?Sized, Storage> PartialOrd for Interned<T, Storage> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<T: ?Sized, Storage> Ord for Interned<T, Storage> {
fn cmp(&self, other: &Self) -> Ordering {
self.id.cmp(&other.id)
}
}
impl<T: ?Sized, Storage> Hash for Interned<T, Storage> {
fn hash<H>(&self, state: &mut H)
where
H: Hasher,
{
self.id.hash(state);
}
}
#[cfg(feature = "get-size2")]
impl<T: ?Sized, Storage> GetSize for Interned<T, Storage> {
}
#[cfg(feature = "raw")]
impl<T: ?Sized, Storage> Interned<T, Storage> {
pub fn from_id(id: u32) -> Self {
Self::new(id)
}
pub fn id(&self) -> u32 {
self.id
}
}
impl<T: ?Sized, Storage> Interned<T, Storage> {
pub(crate) fn new(id: u32) -> Self {
Self {
id,
_phantom: PhantomData,
}
}
pub(crate) fn id_(&self) -> u32 {
self.id
}
}
#[cfg(feature = "serde")]
impl<T: ?Sized, Storage> Serialize for Interned<T, Storage> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u32(self.id)
}
}
#[cfg(feature = "serde")]
impl<'de, T: ?Sized, Storage> Deserialize<'de> for Interned<T, Storage> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let id = deserializer.deserialize_u32(U32Visitor)?;
Ok(Self {
id,
_phantom: PhantomData,
})
}
}
#[cfg(feature = "serde")]
struct U32Visitor;
#[cfg(feature = "serde")]
impl Visitor<'_> for U32Visitor {
type Value = u32;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("an integer between 0 and 2^32")
}
fn visit_u8<E>(self, value: u8) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(u32::from(value))
}
fn visit_u16<E>(self, value: u16) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(u32::from(value))
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(value)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
value
.try_into()
.map_err(|_| E::custom(format!("u32 out of range: {}", value)))
}
fn visit_i8<E>(self, value: i8) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
value
.try_into()
.map_err(|_| E::custom(format!("u32 out of range: {}", value)))
}
fn visit_i16<E>(self, value: i16) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
value
.try_into()
.map_err(|_| E::custom(format!("u32 out of range: {}", value)))
}
fn visit_i32<E>(self, value: i32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
value
.try_into()
.map_err(|_| E::custom(format!("u32 out of range: {}", value)))
}
fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
value
.try_into()
.map_err(|_| E::custom(format!("u32 out of range: {}", value)))
}
}
pub struct Arena<T: ?Sized, Storage = T> {
vec: AppendVec<Storage>,
map: DashTable<u32>,
hasher: DefaultHashBuilder,
#[cfg(feature = "debug")]
references: AtomicUsize,
_phantom: PhantomData<fn() -> *const T>,
}
impl<T: ?Sized, Storage> Clone for Arena<T, Storage>
where
T: Eq + Hash,
Storage: Borrow<T> + Clone,
{
fn clone(&self) -> Self {
let iter = self.vec.iter();
let mut arena = Self::with_capacity(iter.len());
for value in iter {
arena.push(value.clone());
}
arena
}
}
impl<T: ?Sized, Storage> Arena<T, Storage> {
pub fn with_capacity(len: usize) -> Self {
Self {
vec: AppendVec::with_capacity(len),
map: DashTable::with_capacity(len),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
_phantom: PhantomData,
}
}
pub fn len(&self) -> usize {
self.vec.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl<T: ?Sized, Storage> Arena<T, Storage>
where
Storage: Borrow<T>,
{
#[cfg(feature = "raw")]
pub fn iter(&self) -> impl ExactSizeIterator<Item = &T> {
self.iter_()
}
fn iter_(&self) -> impl ExactSizeIterator<Item = &T> {
self.vec.iter().map(|x| x.borrow())
}
}
impl<T: ?Sized, Storage> Arena<T, Storage>
where
T: Eq + Hash,
Storage: Borrow<T>,
{
pub fn find(&self, value: &T) -> Option<Interned<T, Storage>> {
let hash = self.hasher.hash_one(value);
self.map
.find(hash, |&i| self.vec[i as usize].borrow() == value)
.map(|id| Interned {
id: *id,
_phantom: PhantomData,
})
}
}
#[cfg(feature = "raw")]
impl<T: ?Sized, Storage> Arena<T, Storage>
where
T: Eq + Hash,
Storage: Borrow<T>,
{
pub fn push_mut(&mut self, value: Storage) -> u32 {
self.push(value)
}
}
impl<T: ?Sized, Storage> Default for Arena<T, Storage> {
fn default() -> Self {
Self {
vec: AppendVec::new(),
map: DashTable::new(),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
_phantom: PhantomData,
}
}
}
impl<T: ?Sized, Storage> Debug for Arena<T, Storage>
where
T: Debug,
Storage: Borrow<T>,
{
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt.debug_list().entries(self.iter_()).finish()
}
}
impl<T: ?Sized, Storage> PartialEq for Arena<T, Storage>
where
T: Eq,
Storage: Borrow<T>,
{
fn eq(&self, other: &Self) -> bool {
self.iter_().eq(other.iter_())
}
}
impl<T: ?Sized, Storage> Eq for Arena<T, Storage>
where
T: Eq,
Storage: Borrow<T>,
{
}
#[cfg(feature = "get-size2")]
impl<T: ?Sized, Storage> GetSize for Arena<T, Storage>
where
Storage: GetSize,
{
fn get_heap_size_with_tracker<Tr: GetSizeTracker>(&self, tracker: Tr) -> (usize, Tr) {
let heap_size = self.vec.iter().map(|x| x.get_size()).sum::<usize>()
+ self.vec.len() * size_of::<u32>();
(heap_size, tracker)
}
}
#[cfg(feature = "debug")]
impl<T: ?Sized, Storage> Arena<T, Storage>
where
Storage: GetSize,
{
pub fn print_summary(&self, prefix: &str, title: &str, total_bytes: usize) {
let len = self.vec.len();
let references = self.references();
let estimated_bytes = self.get_size();
println!(
"{}[{:.02}%] {} interner: {} objects | {} bytes ({:.02} bytes/object) | {} references ({:.02} refs/object)",
prefix,
estimated_bytes as f64 * 100.0 / total_bytes as f64,
title,
len,
estimated_bytes,
estimated_bytes as f64 / len as f64,
references,
references as f64 / len as f64,
);
}
}
#[cfg(feature = "debug")]
impl<T: ?Sized, Storage> Arena<T, Storage> {
fn references(&self) -> usize {
self.references.load(atomic::Ordering::Relaxed)
}
}
impl<T: ?Sized, Storage> Arena<T, Storage>
where
T: Eq + Hash,
Storage: Borrow<T>,
{
pub fn intern(&self, value: impl Borrow<T> + Into<Storage>) -> Interned<T, Storage> {
#[cfg(feature = "debug")]
self.references.fetch_add(1, atomic::Ordering::Relaxed);
let hash = self.hasher.hash_one(value.borrow());
let id = *self
.map
.entry(
hash,
|&i| self.vec[i as usize].borrow() == value.borrow(),
|&i| self.hasher.hash_one(self.vec[i as usize].borrow()),
)
.or_insert_with(|| {
let x: Storage = value.into();
let id = self.vec.push(x);
assert!(id <= u32::MAX as usize);
id as u32
})
.get();
Interned::new(id)
}
pub fn intern_mut(&mut self, value: impl Borrow<T> + Into<Storage>) -> Interned<T, Storage> {
#[cfg(feature = "debug")]
self.references.fetch_add(1, atomic::Ordering::Relaxed);
let hash = self.hasher.hash_one(value.borrow());
let id = *self
.map
.entry_mut(
hash,
|&i| self.vec[i as usize].borrow() == value.borrow(),
|&i| self.hasher.hash_one(self.vec[i as usize].borrow()),
)
.or_insert_with(|| {
let x: Storage = value.into();
let id = self.vec.push_mut(x);
assert!(id <= u32::MAX as usize);
id as u32
})
.get();
Interned::new(id)
}
pub(crate) fn push(&mut self, value: Storage) -> u32 {
#[cfg(feature = "debug")]
self.references.fetch_add(1, atomic::Ordering::Relaxed);
let hash = self.hasher.hash_one(value.borrow());
let id = self.vec.push_mut(value);
assert!(id <= u32::MAX as usize);
let id = id as u32;
self.map.insert_unique_mut(hash, id, |&i| {
self.hasher.hash_one(self.vec[i as usize].borrow())
});
id
}
}
impl<T: ?Sized, Storage> Arena<T, Storage>
where
Storage: Clone,
{
pub fn lookup(&self, interned: Interned<T, Storage>) -> Storage {
self.vec[interned.id as usize].clone()
}
}
impl<T: ?Sized, Storage> Arena<T, Storage>
where
Storage: Borrow<T>,
{
pub fn lookup_ref(&self, interned: Interned<T, Storage>) -> &T {
self.vec[interned.id as usize].borrow()
}
}
#[cfg(feature = "serde")]
impl<T: ?Sized, Storage> Serialize for Arena<T, Storage>
where
T: Serialize,
Storage: Borrow<T>,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_seq(self.iter_())
}
}
#[cfg(feature = "serde")]
impl<'de, T: ?Sized, Storage> Deserialize<'de> for Arena<T, Storage>
where
T: Eq + Hash,
Storage: Borrow<T> + Deserialize<'de>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(ArenaVisitor::new())
}
}
#[cfg(feature = "serde")]
struct ArenaVisitor<T: ?Sized, Storage> {
_phantom: PhantomData<fn() -> Arena<T, Storage>>,
}
#[cfg(feature = "serde")]
impl<T: ?Sized, Storage> ArenaVisitor<T, Storage> {
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
}
#[cfg(feature = "serde")]
impl<'de, T: ?Sized, Storage> Visitor<'de> for ArenaVisitor<T, Storage>
where
T: Eq + Hash,
Storage: Borrow<T> + Deserialize<'de>,
{
type Value = Arena<T, Storage>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a sequence of values")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut arena = match seq.size_hint() {
None => Arena::default(),
Some(size_hint) => Arena {
vec: AppendVec::with_capacity(size_hint),
map: DashTable::with_capacity(size_hint),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
_phantom: PhantomData,
},
};
while let Some(t) = seq.next_element()? {
arena.push(t);
}
Ok(arena)
}
}
#[cfg(test)]
mod test {
use super::*;
use std::borrow::Cow;
use std::thread;
#[test]
fn test_intern_lookup() {
let arena: Arena<u32> = Arena::default();
for i in 0..100 {
assert_eq!(arena.intern(2 * i).id, i);
}
for i in 0..100 {
assert_eq!(*arena.lookup_ref(Interned::new(i)), 2 * i);
assert_eq!(arena.lookup(Interned::new(i)), 2 * i);
}
}
const NUM_READERS: usize = 4;
const NUM_WRITERS: usize = 4;
#[cfg(not(miri))]
const NUM_ITEMS: usize = 1_000_000;
#[cfg(miri)]
const NUM_ITEMS: usize = 100;
#[test]
fn test_intern_lookup_concurrent_reads() {
let arena: Arena<u32, Box<u32>> = Arena::default();
thread::scope(|s| {
for _ in 0..NUM_READERS {
s.spawn(|| {
loop {
let len = arena.len();
if len > 0 {
let last = len as u32 - 1;
assert_eq!(*arena.lookup_ref(Interned::new(last)), last);
if len == NUM_ITEMS {
break;
}
}
}
});
}
s.spawn(|| {
for j in 0..NUM_ITEMS as u32 {
assert_eq!(arena.intern(j).id, j);
}
});
});
}
#[test]
fn test_intern_lookup_concurrent_writes() {
let arena: Arena<u32, Box<u32>> = Arena::default();
thread::scope(|s| {
s.spawn(|| {
loop {
let len = arena.len();
if len > 0 {
let last = len as u32 - 1;
assert_eq!(*arena.lookup_ref(Interned::new(last)), last);
if len == NUM_ITEMS {
break;
}
}
}
});
for _ in 0..NUM_WRITERS {
s.spawn(|| {
for j in 0..NUM_ITEMS as u32 {
assert_eq!(arena.intern(j).id, j);
}
});
}
});
}
#[test]
fn test_intern_lookup_concurrent_readwrites() {
let arena: Arena<u32, Box<u32>> = Arena::default();
thread::scope(|s| {
for _ in 0..NUM_READERS {
s.spawn(|| {
loop {
let len = arena.len();
if len > 0 {
let last = len as u32 - 1;
assert_eq!(*arena.lookup_ref(Interned::new(last)), last);
if len == NUM_ITEMS {
break;
}
}
}
});
}
for _ in 0..NUM_WRITERS {
s.spawn(|| {
for j in 0..NUM_ITEMS as u32 {
assert_eq!(arena.intern(j).id, j);
}
});
}
});
}
#[test]
fn test_boxed_str_interner() {
let arena: Arena<str, Box<str>> = Arena::default();
let key: &str = "Hello";
assert_eq!(arena.intern(key).id, 0);
let key: String = "world".into();
assert_eq!(arena.intern(key).id, 1);
let key: Box<str> = "Hello".into();
assert_eq!(arena.intern(key).id, 0);
let key: Box<str> = "world".into();
assert_eq!(arena.intern(key).id, 1);
let key: Cow<'_, str> = "Hello world".into();
assert_eq!(arena.intern(key).id, 2);
}
#[cfg(feature = "serde")]
#[test]
fn test_serde_postcard() {
let arena: Arena<u32> = Arena::default();
let a = arena.intern(0);
let b = arena.intern(1);
let c = arena.intern(22);
let d = arena.intern(333);
let e = arena.intern(4444);
let f = arena.intern(55555);
assert_eq!(arena.len(), 6);
let serialized_arena = postcard::to_stdvec(&arena).expect("Failed to serialize arena");
assert_eq!(
serialized_arena,
vec![6, 0, 1, 22, 205, 2, 220, 34, 131, 178, 3]
);
let new_arena: Arena<u32> =
postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
assert_eq!(new_arena, arena);
assert_eq!(new_arena.len(), 6);
let serialized_handles =
postcard::to_stdvec(&[a, b, c, d, e, f]).expect("Failed to serialize interned handles");
assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
let new_handles: [Interned<u32>; 6] = postcard::from_bytes(&serialized_handles)
.expect("Failed to deserialize interned handles");
assert_eq!(new_handles, [a, b, c, d, e, f]);
assert_eq!(new_arena.lookup(a), 0);
assert_eq!(new_arena.lookup(b), 1);
assert_eq!(new_arena.lookup(c), 22);
assert_eq!(new_arena.lookup(d), 333);
assert_eq!(new_arena.lookup(e), 4444);
assert_eq!(new_arena.lookup(f), 55555);
}
#[cfg(feature = "serde")]
#[test]
fn test_serde_json() {
let arena: Arena<u32> = Arena::default();
let a = arena.intern(0);
let b = arena.intern(1);
let c = arena.intern(22);
let d = arena.intern(333);
let e = arena.intern(4444);
let f = arena.intern(55555);
assert_eq!(arena.len(), 6);
let serialized_arena = serde_json::to_string(&arena).expect("Failed to serialize arena");
assert_eq!(serialized_arena, "[0,1,22,333,4444,55555]");
let new_arena: Arena<u32> =
serde_json::from_str(&serialized_arena).expect("Failed to deserialize arena");
assert_eq!(new_arena, arena);
assert_eq!(new_arena.len(), 6);
let serialized_handles = serde_json::to_string(&[a, b, c, d, e, f])
.expect("Failed to serialize interned handles");
assert_eq!(serialized_handles, "[0,1,2,3,4,5]");
let new_handles: [Interned<u32>; 6] = serde_json::from_str(&serialized_handles)
.expect("Failed to deserialize interned handles");
assert_eq!(new_handles, [a, b, c, d, e, f]);
}
}