use std::{
borrow::Cow,
cell::RefCell,
ffi::c_void,
hash::{BuildHasherDefault, Hash, Hasher},
mem::ManuallyDrop,
num::NonZeroU8,
ops::Deref,
ptr::NonNull,
};
use rustc_hash::FxHasher;
use triomphe::ThinArc;
use crate::{
tagged_value::{TaggedValue, MAX_INLINE_LEN},
wtf8::Wtf8,
Atom, Wtf8Atom, INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
};
#[derive(PartialEq, Eq)]
pub(crate) struct Metadata {
pub hash: u64,
}
#[derive(Clone, PartialEq, Eq)]
pub(crate) struct Item(pub ThinArc<Metadata, u8>);
impl Deref for Item {
type Target = <ThinArc<Metadata, u8> as Deref>::Target;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Hash for Item {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.0.header.header.hash);
}
}
pub(crate) unsafe fn deref_from(ptr: TaggedValue) -> ManuallyDrop<Item> {
let item = restore_arc(ptr);
ManuallyDrop::new(item)
}
pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
let ptr = v.get_ptr();
Item(ThinArc::from_raw(ptr))
}
pub struct AtomStore {
pub(crate) data: hashbrown::HashMap<Item, (), BuildEntryHasher>,
}
impl Default for AtomStore {
fn default() -> Self {
Self {
data: hashbrown::HashMap::with_capacity_and_hasher(64, BuildEntryHasher::default()),
}
}
}
impl AtomStore {
#[inline(always)]
pub fn atom<'a>(&mut self, text: impl Into<Cow<'a, str>>) -> Atom {
atom_in(self, &text.into())
}
#[inline(always)]
pub fn wtf8_atom<'a>(&mut self, text: impl Into<Cow<'a, Wtf8>>) -> Wtf8Atom {
wtf8_atom_in(self, text.into().as_bytes())
}
fn gc(&mut self) {
self.data.retain(|item, _| {
let count = ThinArc::strong_count(&item.0);
debug_assert!(count > 0);
count > 1
});
}
}
thread_local! {
static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
}
pub fn global_atom_store_gc() {
GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();
store.gc();
});
}
pub(crate) fn global_wtf8_atom(text: &[u8]) -> Wtf8Atom {
GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();
wtf8_atom_in(&mut *store, text)
})
}
pub(crate) fn global_atom(text: &str) -> Atom {
GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();
atom_in(&mut *store, text)
})
}
fn wtf8_atom_in<S>(storage: S, text: &[u8]) -> Wtf8Atom
where
S: Storage,
{
let len = text.len();
if len <= MAX_INLINE_LEN {
let tag = INLINE_TAG_INIT | ((len as u8) << LEN_OFFSET);
let mut unsafe_data = TaggedValue::new_tag(tag);
unsafe {
unsafe_data.data_mut()[..len].copy_from_slice(text);
}
return Wtf8Atom { unsafe_data };
}
let hash = calc_hash(text);
let entry = storage.insert_entry(text, hash);
let entry = ThinArc::into_raw(entry.0) as *mut c_void;
let ptr: NonNull<c_void> = unsafe {
NonNull::new_unchecked(entry)
};
debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
Wtf8Atom {
unsafe_data: TaggedValue::new_ptr(ptr),
}
}
fn atom_in<S>(storage: S, text: &str) -> Atom
where
S: Storage,
{
unsafe { Atom::from_wtf8_unchecked(wtf8_atom_in(storage, text.as_bytes())) }
}
pub(crate) const fn inline_atom(text: &str) -> Option<Atom> {
let len = text.len();
if len <= MAX_INLINE_LEN {
let tag = INLINE_TAG | ((len as u8) << LEN_OFFSET);
let mut unsafe_data = TaggedValue::new_tag(NonZeroU8::new(tag).unwrap());
unsafe {
let data = unsafe_data.data_mut();
let bytes = text.as_bytes();
let mut i = 0;
while i < len {
data[i] = bytes[i];
i += 1;
}
}
return Some(Atom { unsafe_data });
}
None
}
trait Storage {
fn insert_entry(self, text: &[u8], hash: u64) -> Item;
}
impl Storage for &'_ mut AtomStore {
fn insert_entry(self, text: &[u8], hash: u64) -> Item {
if text.len() > 512 {
return Item(ThinArc::from_header_and_slice(Metadata { hash }, text));
}
let (entry, _) = self
.data
.raw_entry_mut()
.from_hash(hash, |key| {
key.header.header.hash == hash && key.slice.eq(text)
})
.or_insert_with(move || {
(
Item(ThinArc::from_header_and_slice(Metadata { hash }, text)),
(),
)
});
entry.clone()
}
}
#[inline(always)]
fn calc_hash(text: &[u8]) -> u64 {
let mut hasher = FxHasher::default();
text.hash(&mut hasher);
hasher.finish()
}
type BuildEntryHasher = BuildHasherDefault<EntryHasher>;
#[derive(Default)]
pub(crate) struct EntryHasher {
hash: u64,
#[cfg(debug_assertions)]
write_called: bool,
}
impl Hasher for EntryHasher {
fn finish(&self) -> u64 {
#[cfg(debug_assertions)]
debug_assert!(
self.write_called,
"EntryHasher expects write_u64 to have been called",
);
self.hash
}
fn write(&mut self, _bytes: &[u8]) {
panic!("EntryHasher expects to be called with write_u64");
}
fn write_u64(&mut self, val: u64) {
#[cfg(debug_assertions)]
{
debug_assert!(
!self.write_called,
"EntryHasher expects write_u64 to be called only once",
);
self.write_called = true;
}
self.hash = val;
}
}
#[cfg(test)]
mod tests {
use crate::{dynamic::GLOBAL_DATA, global_atom_store_gc, Atom};
fn expect_size(expected: usize) {
GLOBAL_DATA.with(|global| {
let store = global.borrow();
assert_eq!(store.data.len(), expected);
})
}
#[test]
fn global_ref_count_dynamic_0() {
expect_size(0);
let atom1 = Atom::new("Hello, beautiful world!");
expect_size(1);
let atom2 = Atom::new("Hello, beautiful world!");
expect_size(1);
assert_eq!(atom1.ref_count(), 3);
assert_eq!(atom2.ref_count(), 3);
drop(atom1);
expect_size(1);
assert_eq!(atom2.ref_count(), 2);
drop(atom2);
expect_size(1);
global_atom_store_gc();
expect_size(0);
}
#[test]
fn global_ref_count_dynamic_1() {
expect_size(0);
{
expect_size(0);
let atom = Atom::new("Hello, beautiful world!");
assert_eq!(atom.ref_count(), 2);
expect_size(1);
}
expect_size(1);
global_atom_store_gc();
expect_size(0);
{
let atom = Atom::new("Hello, beautiful world!");
assert_eq!(atom.ref_count(), 2);
expect_size(1);
}
let atom = Atom::new("Hello, beautiful world!");
assert_eq!(atom.ref_count(), 2);
expect_size(1);
global_atom_store_gc();
expect_size(1);
drop(atom);
expect_size(1);
global_atom_store_gc();
expect_size(0);
}
}