mod error;
#[allow(unused)]
pub mod wordpress_test_data;
pub use error::*;
use crate::collections::identifiable::StringId;
use crate::iter::{IntoLendingIterator, LendingIterator};
use libdd_alloc::{AllocError, Allocator, ChainAllocator, VirtualAllocator};
use std::alloc::Layout;
pub trait ArenaAllocator: Allocator {
fn allocate(&self, str: &str) -> Result<&str, AllocError> {
if str.is_empty() {
return Ok("");
}
let layout = Layout::for_value(str);
let uninit_ptr = Allocator::allocate(self, layout)?;
unsafe {
let src = str.as_ptr();
let dst = uninit_ptr.as_ptr() as *mut u8;
let count = str.len();
core::ptr::copy_nonoverlapping(src, dst, count);
}
let slice: &[u8] =
unsafe { core::slice::from_raw_parts(uninit_ptr.as_ptr() as *const u8, str.len()) };
Ok(unsafe { core::str::from_utf8_unchecked(slice) })
}
}
impl<A: Allocator + Clone> ArenaAllocator for ChainAllocator<A> {}
type Hasher = core::hash::BuildHasherDefault<rustc_hash::FxHasher>;
type HashSet<K> = indexmap::IndexSet<K, Hasher>;
pub struct StringTable {
bytes: ChainAllocator<VirtualAllocator>,
strings: HashSet<&'static str>,
}
impl Default for StringTable {
fn default() -> Self {
Self::new()
}
}
impl StringTable {
pub fn new() -> Self {
const SIZE_HINT: usize = 4 * 1024 * 1024;
let bytes = ChainAllocator::new_in(SIZE_HINT, VirtualAllocator {});
let mut strings = HashSet::with_hasher(Hasher::default());
strings.reserve(32);
strings.insert("");
Self { bytes, strings }
}
#[inline]
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
self.strings.len()
}
pub fn intern(&mut self, str: &str) -> StringId {
#[allow(clippy::expect_used)]
self.try_intern(str).expect("failed to intern string")
}
pub fn try_intern(&mut self, str: &str) -> Result<StringId, Error> {
let set = &mut self.strings;
match set.get_index_of(str) {
Some(offset) => Ok(unsafe { StringId::try_from(offset).unwrap_unchecked() }),
None => {
let string_id = StringId::try_from(set.len()).map_err(|_| Error::StorageFull)?;
let new_str = {
let s = ArenaAllocator::allocate(&self.bytes, str)?;
unsafe { core::mem::transmute::<&str, &'static str>(s) }
};
self.strings.try_reserve(1)?;
self.strings.insert(new_str);
Ok(string_id)
}
}
}
}
pub struct StringTableIter {
#[allow(unused)]
bytes: ChainAllocator<VirtualAllocator>,
iter: <HashSet<&'static str> as IntoIterator>::IntoIter,
}
impl StringTableIter {
fn new(string_table: StringTable) -> StringTableIter {
StringTableIter {
bytes: string_table.bytes,
iter: string_table.strings.into_iter(),
}
}
}
impl LendingIterator for StringTableIter {
type Item<'a>
= &'a str
where
Self: 'a;
fn next(&mut self) -> Option<Self::Item<'_>> {
self.iter.next()
}
fn count(self) -> usize {
self.iter.count()
}
}
impl IntoLendingIterator for StringTable {
type Iter = StringTableIter;
fn into_lending_iter(self) -> Self::Iter {
StringTableIter::new(self)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::collections::identifiable::Id;
#[test]
fn fuzz_arena_allocator() {
bolero::check!()
.with_type::<(usize, Vec<String>)>()
.for_each(|(size_hint, strings)| {
if *size_hint > 4 * 1024 * 1024 * 1024 {
return;
}
let bytes = ChainAllocator::new_in(*size_hint, VirtualAllocator {});
let mut allocated_strings = vec![];
for string in strings {
let s =
ArenaAllocator::allocate(&bytes, string).expect("allocation to succeed");
assert_eq!(s, string);
allocated_strings.push(s);
}
assert_eq!(strings.len(), allocated_strings.len());
strings
.iter()
.zip(allocated_strings.iter())
.for_each(|(s, t)| assert_eq!(s, t));
});
}
#[test]
fn fuzz_string_table() {
bolero::check!()
.with_type::<Vec<String>>()
.for_each(|strings| {
let mut golden_list = vec![""];
let mut golden_set = std::collections::HashSet::from([""]);
let mut st = StringTable::new();
for string in strings {
assert_eq!(st.len(), golden_set.len());
if golden_set.insert(string) {
golden_list.push(string);
}
let str_id = st.intern(string);
assert_eq!(string, golden_list[usize::from(str_id)]);
}
assert_eq!(st.len(), golden_list.len());
assert_eq!(st.len(), golden_set.len());
let mut it = st.into_lending_iter();
let mut idx = 0;
while let Some(s) = it.next() {
assert_eq!(s, golden_list[idx]);
idx += 1;
}
})
}
#[test]
fn test_basics() {
let mut table = StringTable::new();
assert_eq!(1, table.len());
assert_eq!(StringId::ZERO, table.intern(""));
let string = table.intern("datadog");
assert_eq!(StringId::from_offset(1), string);
assert_eq!(2, table.len());
}
#[track_caller]
fn test_from_src(src: &[&str]) {
let mut table = StringTable::new();
let n_strings = src.len();
for string in src {
table.intern(string);
}
assert_eq!(n_strings, table.len());
for string in src {
table.intern(string);
}
assert_eq!(n_strings, table.len());
let mut actual_iter = table.into_lending_iter();
let mut expected_iter = src.iter();
while let (Some(expected), Some(actual)) = (expected_iter.next(), actual_iter.next()) {
assert_eq!(*expected, actual);
}
assert_eq!(None, expected_iter.next());
assert_eq!(0, actual_iter.count());
}
#[test]
fn test_small_set_of_strings() {
let cases: &[_] = &[
"",
"local root span id",
"span id",
"trace endpoint",
"samples",
"count",
"wall-time",
"nanoseconds",
"cpu-time",
"<?php",
"/srv/demo/public/index.php",
"pid",
"/var/www/public/index.php",
"main",
"thread id",
"A\\Very\\Long\\Php\\Namespace\\Class::method",
"/",
];
test_from_src(cases);
}
#[test]
fn test_wordpress() {
test_from_src(&wordpress_test_data::WORDPRESS_STRINGS);
}
}