use super::bumpalloc::LeakyBumpAlloc;
#[repr(align(128))]
pub(crate) struct StringCache {
pub(crate) alloc: LeakyBumpAlloc,
pub(crate) old_allocs: Vec<LeakyBumpAlloc>,
entries: Vec<*mut StringCacheEntry>,
num_entries: usize,
mask: usize,
total_allocated: usize,
_pad: [u32; 3],
}
pub(crate) const INITIAL_CAPACITY: usize = 1 << 20;
pub(crate) const INITIAL_ALLOC: usize = 4 << 20;
pub(crate) const BIN_SHIFT: usize = 6;
pub(crate) const NUM_BINS: usize = 1 << BIN_SHIFT;
pub(crate) const TOP_SHIFT: usize =
8 * std::mem::size_of::<usize>() - BIN_SHIFT;
impl StringCache {
pub fn new() -> StringCache {
let capacity = INITIAL_CAPACITY / NUM_BINS;
let alloc = LeakyBumpAlloc::new(
INITIAL_ALLOC / NUM_BINS,
std::mem::align_of::<StringCacheEntry>(),
);
StringCache {
alloc,
old_allocs: Vec::with_capacity(16),
entries: vec![std::ptr::null_mut(); capacity],
num_entries: 0,
mask: capacity - 1,
total_allocated: capacity,
_pad: [0u32; 3],
}
}
pub(crate) fn get_existing(
&self,
string: &str,
hash: u64,
) -> Option<*const u8> {
let mut pos = self.mask & hash as usize;
let mut dist = 0;
loop {
let entry = unsafe { self.entries.get_unchecked(pos) };
if entry.is_null() {
return None;
}
unsafe {
let entry_chars = entry.add(1) as *const u8;
let sce = &**entry;
if sce.hash == hash
&& sce.len == string.len()
&& std::str::from_utf8_unchecked(
std::slice::from_raw_parts(entry_chars, sce.len),
) == string
{
return Some(entry_chars);
}
}
dist += 1;
debug_assert!(dist <= self.mask);
pos = (pos + dist) & self.mask;
}
}
pub(crate) fn insert(&mut self, string: &str, hash: u64) -> *const u8 {
let mut pos = self.mask & hash as usize;
let mut dist = 0;
loop {
let entry = unsafe { self.entries.get_unchecked(pos) };
if entry.is_null() {
break;
}
unsafe {
let entry_chars = entry.add(1) as *const u8;
let sce = &**entry;
if sce.hash == hash
&& sce.len == string.len()
&& std::str::from_utf8_unchecked(
std::slice::from_raw_parts(entry_chars, sce.len),
) == string
{
return entry_chars;
}
}
dist += 1;
debug_assert!(dist <= self.mask);
pos = (pos + dist) & self.mask;
}
let entry_ptr = unsafe { self.entries.get_unchecked_mut(pos) };
let byte_len = string.len() + 1;
let alloc_size = std::mem::size_of::<StringCacheEntry>() + byte_len;
let capacity = self.alloc.capacity();
let allocated = self.alloc.allocated();
if alloc_size
.checked_add(allocated)
.expect("overflowed alloc_size + allocated")
> capacity
{
let new_capacity = capacity
.checked_mul(2)
.expect("capacity * 2 overflowed")
.max(alloc_size);
let old_alloc = std::mem::replace(
&mut self.alloc,
LeakyBumpAlloc::new(
new_capacity,
std::mem::align_of::<StringCacheEntry>(),
),
);
self.old_allocs.push(old_alloc);
self.total_allocated += new_capacity;
}
unsafe {
*entry_ptr =
self.alloc.allocate(alloc_size) as *mut StringCacheEntry;
std::ptr::write(
*entry_ptr,
StringCacheEntry {
hash,
len: string.len(),
},
);
let char_ptr = entry_ptr.add(1) as *mut u8;
std::ptr::copy_nonoverlapping(
string.as_bytes().as_ptr(),
char_ptr,
string.len(),
);
let write_ptr = char_ptr.add(string.len());
std::ptr::write(write_ptr, 0u8);
self.num_entries += 1;
if self.num_entries * 2 > self.mask {
self.grow();
}
char_ptr
}
}
pub(crate) unsafe fn grow(&mut self) {
let new_mask = self.mask * 2 + 1;
let mut new_entries =
vec![std::ptr::null_mut() as *mut StringCacheEntry; new_mask + 1];
let mut to_copy = self.num_entries;
for e in self.entries.iter_mut() {
if e.is_null() {
continue;
}
let hash = *(*e as *const u64);
let mut pos = (hash as usize) & new_mask;
let mut dist = 0;
loop {
if new_entries[pos].is_null() {
break;
}
dist += 1;
debug_assert!(dist <= new_mask, "Probing wrapped around");
pos = pos.wrapping_add(dist) & new_mask;
}
new_entries[pos] = *e;
to_copy -= 1;
if to_copy == 0 {
break;
}
}
self.entries = new_entries;
self.mask = new_mask;
}
pub(crate) unsafe fn clear(&mut self) {
std::ptr::write_bytes(self.entries.as_mut_ptr(), 0, self.mask + 1);
self.num_entries = 0;
self.total_allocated = 0;
for a in self.old_allocs.iter_mut() {
a.clear();
}
self.old_allocs = Vec::new();
self.alloc.clear();
self.alloc = LeakyBumpAlloc::new(
INITIAL_ALLOC / NUM_BINS,
std::mem::align_of::<StringCacheEntry>(),
);
}
pub(crate) fn total_allocated(&self) -> usize {
self.alloc.allocated()
+ self.old_allocs.iter().map(|a| a.allocated()).sum::<usize>()
}
pub(crate) fn total_capacity(&self) -> usize {
self.alloc.capacity()
+ self.old_allocs.iter().map(|a| a.capacity()).sum::<usize>()
}
pub(crate) fn num_entries(&self) -> usize {
self.num_entries
}
}
impl Default for StringCache {
fn default() -> StringCache {
StringCache::new()
}
}
unsafe impl Send for StringCache {}
#[doc(hidden)]
pub struct StringCacheIterator {
pub(crate) allocs: Vec<(*const u8, *const u8)>,
pub(crate) current_alloc: usize,
pub(crate) current_ptr: *const u8,
}
fn round_up_to(n: usize, align: usize) -> usize {
debug_assert!(align.is_power_of_two());
(n.checked_add(align).expect("round_up_to overflowed") - 1) & !(align - 1)
}
impl Iterator for StringCacheIterator {
type Item = &'static str;
fn next(&mut self) -> Option<Self::Item> {
let (_, end) = self.allocs[self.current_alloc];
if self.current_ptr >= end {
if self.current_alloc == self.allocs.len() - 1 {
return None;
} else {
self.current_alloc += 1;
let (current_ptr, _) = self.allocs[self.current_alloc];
self.current_ptr = current_ptr;
}
}
unsafe {
let sce = &*(self.current_ptr as *const StringCacheEntry);
self.current_ptr = sce.next_entry();
let s = std::str::from_utf8_unchecked(std::slice::from_raw_parts(
sce.char_ptr(),
sce.len,
));
Some(s)
}
}
}
#[repr(C)]
#[derive(Clone)]
pub(crate) struct StringCacheEntry {
pub(crate) hash: u64,
pub(crate) len: usize,
}
impl StringCacheEntry {
pub(crate) fn char_ptr(&self) -> *const u8 {
unsafe { (self as *const StringCacheEntry).add(1) as *const u8 }
}
pub(crate) unsafe fn next_entry(&self) -> *const u8 {
#[allow(clippy::ptr_offset_with_cast)]
self.char_ptr().add(round_up_to(
self.len + 1,
std::mem::align_of::<StringCacheEntry>(),
))
}
}