use crate::CopyRangeU32;
use appendvec::{AppendStr, AppendVec};
use dashtable::DashTable;
#[cfg(feature = "get-size2")]
use get_size2::{GetSize, GetSizeTracker};
use hashbrown::DefaultHashBuilder;
#[cfg(feature = "serde")]
use serde::de::{Error, SeqAccess, Visitor};
#[cfg(feature = "serde")]
use serde::ser::SerializeTuple;
#[cfg(feature = "serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[cfg(feature = "serde")]
use serde_cow::CowStr;
#[cfg(feature = "serde")]
use std::cell::Cell;
use std::fmt::Debug;
use std::hash::{BuildHasher, Hash};
#[cfg(feature = "debug")]
use std::sync::atomic::{self, AtomicUsize};
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "get-size2", derive(GetSize))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct InternedStr(u32);
impl Default for InternedStr {
fn default() -> Self {
Self::new(u32::MAX)
}
}
impl Debug for InternedStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("I").field(&self.0).finish()
}
}
#[cfg(feature = "raw")]
impl InternedStr {
pub fn from_id(id: u32) -> Self {
Self::new(id)
}
pub fn id(&self) -> u32 {
self.0
}
}
impl InternedStr {
pub(crate) fn new(id: u32) -> Self {
Self(id)
}
pub(crate) fn id_(&self) -> u32 {
self.0
}
}
struct RangeVecStr {
vec: AppendStr,
ranges: AppendVec<CopyRangeU32>,
}
impl RangeVecStr {
fn lookup_bytes(&self, id: u32) -> &[u8] {
let range = self.ranges[id as usize];
let range = range.start as usize..range.end as usize;
self.vec.get_bytes(range)
}
fn lookup_str(&self, id: u32) -> &str {
let range = self.ranges[id as usize];
let range = range.start as usize..range.end as usize;
&self.vec[range]
}
fn iter(&self) -> impl ExactSizeIterator<Item = &str> {
self.ranges
.iter()
.map(|&range| &self.vec[range.start as usize..range.end as usize])
}
fn iter_bytes(&self) -> impl ExactSizeIterator<Item = &[u8]> {
self.ranges
.iter()
.map(|&range| self.vec.get_bytes(range.start as usize..range.end as usize))
}
fn push_str(&self, value: &str) -> u32 {
let range = self.vec.push_str(value);
assert!(range.start <= u32::MAX as usize);
assert!(range.end <= u32::MAX as usize);
let range = range.start as u32..range.end as u32;
let id = self.ranges.push(range.into());
assert!(id <= u32::MAX as usize);
id as u32
}
fn push_str_mut(&mut self, value: &str) -> u32 {
let range = self.vec.push_str_mut(value);
assert!(range.start <= u32::MAX as usize);
assert!(range.end <= u32::MAX as usize);
let range = range.start as u32..range.end as u32;
let id = self.ranges.push_mut(range.into());
assert!(id <= u32::MAX as usize);
id as u32
}
}
pub struct ArenaStr {
rangevec: RangeVecStr,
map: DashTable<u32>,
hasher: DefaultHashBuilder,
#[cfg(feature = "debug")]
references: AtomicUsize,
}
impl Clone for ArenaStr {
fn clone(&self) -> Self {
let iter = self.iter_();
let mut arena = Self::with_capacity(iter.len(), self.bytes());
for s in iter {
arena.push(s);
}
arena
}
}
impl ArenaStr {
pub fn with_capacity(strings: usize, bytes: usize) -> Self {
Self {
rangevec: RangeVecStr {
vec: AppendStr::with_capacity(bytes),
ranges: AppendVec::with_capacity(strings),
},
map: DashTable::with_capacity(strings),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
}
}
pub fn strings(&self) -> usize {
self.rangevec.ranges.len()
}
pub fn bytes(&self) -> usize {
self.rangevec.vec.len()
}
pub fn is_empty(&self) -> bool {
self.strings() == 0
}
#[cfg(feature = "raw")]
pub fn iter(&self) -> impl ExactSizeIterator<Item = &str> {
self.rangevec.iter()
}
fn iter_(&self) -> impl ExactSizeIterator<Item = &str> {
self.rangevec.iter()
}
#[cfg(feature = "raw")]
pub fn iter_bytes(&self) -> impl ExactSizeIterator<Item = &[u8]> {
self.rangevec.iter_bytes()
}
fn iter_bytes_(&self) -> impl ExactSizeIterator<Item = &[u8]> {
self.rangevec.iter_bytes()
}
pub fn find(&self, value: &str) -> Option<InternedStr> {
let hash = self.hasher.hash_one(value);
self.map
.find(hash, |&i| self.lookup_str(i) == value)
.map(|id| InternedStr(*id))
}
#[cfg(feature = "raw")]
pub fn push_mut(&mut self, value: &str) -> u32 {
self.push(value)
}
}
impl Default for ArenaStr {
fn default() -> Self {
Self {
rangevec: RangeVecStr {
vec: AppendStr::new(),
ranges: AppendVec::new(),
},
map: DashTable::new(),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
}
}
}
impl Debug for ArenaStr {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt.debug_list().entries(self.iter_()).finish()
}
}
impl PartialEq for ArenaStr {
fn eq(&self, other: &Self) -> bool {
self.iter_bytes_().eq(other.iter_bytes_())
}
}
impl Eq for ArenaStr {}
#[cfg(feature = "get-size2")]
impl GetSize for ArenaStr {
fn get_heap_size_with_tracker<Tr: GetSizeTracker>(&self, tracker: Tr) -> (usize, Tr) {
let heap_size = self.rangevec.vec.len() * size_of::<u8>()
+ self.rangevec.ranges.len() * (size_of::<CopyRangeU32>() + size_of::<u32>());
(heap_size, tracker)
}
}
#[cfg(feature = "debug")]
impl ArenaStr {
pub fn print_summary(&self, prefix: &str, title: &str, total_bytes: usize) {
let strings = self.rangevec.ranges.len();
let references = self.references();
let estimated_bytes = self.get_size();
println!(
"{}[{:.02}%] {} interner: {} objects | {} bytes ({:.02} bytes/object) | {} references ({:.02} refs/object)",
prefix,
estimated_bytes as f64 * 100.0 / total_bytes as f64,
title,
strings,
estimated_bytes,
estimated_bytes as f64 / strings as f64,
references,
references as f64 / strings as f64,
);
}
fn references(&self) -> usize {
self.references.load(atomic::Ordering::Relaxed)
}
}
impl ArenaStr {
pub fn intern(&self, value: &str) -> InternedStr {
#[cfg(feature = "debug")]
self.references.fetch_add(1, atomic::Ordering::Relaxed);
let hash = self.hasher.hash_one(value);
let id = *self
.map
.entry(
hash,
|&i| self.lookup_str(i) == value,
|&i| self.hasher.hash_one(self.lookup_str(i)),
)
.or_insert_with(|| self.rangevec.push_str(value))
.get();
InternedStr::new(id)
}
pub fn intern_mut(&mut self, value: &str) -> InternedStr {
#[cfg(feature = "debug")]
self.references.fetch_add(1, atomic::Ordering::Relaxed);
let hash = self.hasher.hash_one(value);
let id = *self
.map
.entry_mut(
hash,
|&i| self.rangevec.lookup_str(i) == value,
|&i| self.hasher.hash_one(self.rangevec.lookup_str(i)),
)
.or_insert_with(|| self.rangevec.push_str_mut(value))
.get();
InternedStr::new(id)
}
pub(crate) fn push(&mut self, value: &str) -> u32 {
#[cfg(feature = "debug")]
self.references.fetch_add(1, atomic::Ordering::Relaxed);
let hash = self.hasher.hash_one(value);
let id = self.rangevec.push_str_mut(value);
self.map.insert_unique_mut(hash, id, |&i| {
self.hasher.hash_one(self.rangevec.lookup_str(i))
});
id
}
pub fn lookup(&self, interned: InternedStr) -> &str {
self.lookup_str(interned.0)
}
pub fn lookup_bytes(&self, interned: InternedStr) -> &[u8] {
self.rangevec.lookup_bytes(interned.0)
}
fn lookup_str(&self, id: u32) -> &str {
self.rangevec.lookup_str(id)
}
}
#[cfg(feature = "serde")]
impl Serialize for ArenaStr {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut tuple = serializer.serialize_tuple(2)?;
let ranges = RangeWrapper {
ranges: &self.rangevec.ranges,
ranges_len: Cell::new(0),
total_len: Cell::new(0),
};
tuple.serialize_element(&ranges)?;
tuple.serialize_element(&ArenaStrWrapper {
ranges_len: ranges.ranges_len.into_inner(),
total_len: ranges.total_len.into_inner(),
rangevec: &self.rangevec,
})?;
tuple.end()
}
}
#[cfg(feature = "serde")]
struct RangeWrapper<'a> {
ranges: &'a AppendVec<CopyRangeU32>,
ranges_len: Cell<u32>,
total_len: Cell<u32>,
}
#[cfg(feature = "serde")]
impl<'a> Serialize for RangeWrapper<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut ranges_len: u32 = 0;
let mut total_len: u32 = 0;
let result = serializer.collect_seq(self.ranges.iter().map(|range| {
ranges_len += 1;
let this_len = range.end - range.start;
total_len = total_len.strict_add(this_len);
this_len
}));
self.ranges_len.set(ranges_len);
self.total_len.set(total_len);
result
}
}
#[cfg(feature = "serde")]
struct ArenaStrWrapper<'a> {
ranges_len: u32,
total_len: u32,
rangevec: &'a RangeVecStr,
}
#[cfg(feature = "serde")]
impl<'a> Serialize for ArenaStrWrapper<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut string = String::with_capacity(self.total_len as usize);
for range in self.rangevec.ranges.iter().take(self.ranges_len as usize) {
let s = &self.rangevec.vec[range.start as usize..range.end as usize];
string.push_str(s);
}
serializer.serialize_str(&string)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for ArenaStr {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_tuple(2, ArenaStrVisitor)
}
}
#[cfg(feature = "serde")]
struct ArenaStrVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for ArenaStrVisitor {
type Value = ArenaStr;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a pair of values")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let sizes: Vec<u32> = seq
.next_element()?
.ok_or_else(|| A::Error::invalid_length(0, &self))?;
let string: CowStr = seq
.next_element()?
.ok_or_else(|| A::Error::invalid_length(1, &self))?;
let mut arena = ArenaStr {
rangevec: RangeVecStr {
vec: AppendStr::with_capacity(string.0.len()),
ranges: AppendVec::with_capacity(sizes.len()),
},
map: DashTable::with_capacity(sizes.len()),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
};
let mut start = 0;
for size in sizes {
let size = size as usize;
arena.push(&string.0[start..start + size]);
start += size;
}
Ok(arena)
}
}
#[cfg(all(feature = "delta", feature = "serde"))]
mod delta {
use super::*;
use crate::{Accumulator, DeltaEncoding};
use serde::ser::SerializeSeq;
use serde_cow::CowBytes;
use std::marker::PhantomData;
impl<Accum> Serialize for DeltaEncoding<&ArenaStr, Accum>
where
Accum: Accumulator<Value = str, Storage = Box<str>, DeltaStorage = Box<[u8]>>,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut tuple = serializer.serialize_tuple(2)?;
let ranges = RangeWrapper {
ranges: &self.rangevec.ranges,
ranges_len: Cell::new(0),
total_len: Cell::new(0),
};
tuple.serialize_element(&ranges)?;
tuple.serialize_element(&ArenaStrWrapper {
ranges_len: ranges.ranges_len.into_inner(),
total_len: ranges.total_len.into_inner(),
rangevec: &self.map_ref(|arena| &arena.rangevec),
})?;
tuple.end()
}
}
struct ArenaStrWrapper<'a, Accum> {
ranges_len: u32,
total_len: u32,
rangevec: &'a DeltaEncoding<&'a RangeVecStr, Accum>,
}
impl<'a, Accum> Serialize for ArenaStrWrapper<'a, Accum>
where
Accum: Accumulator<Value = str, Storage = Box<str>, DeltaStorage = Box<[u8]>>,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(Some(self.total_len as usize))?;
let mut acc = Accum::default();
for range in self.rangevec.ranges.iter().take(self.ranges_len as usize) {
let slice = &self.rangevec.vec[range.start as usize..range.end as usize];
let delta = acc.fold(slice);
assert_eq!(
delta.len(),
slice.len(),
"Invalid Accumulator implementation for DeltaEncoding of ArenaStr: delta length must match source string length (in bytes)"
);
for d in delta {
seq.serialize_element(&d)?;
}
}
seq.end()
}
}
impl<'de, Accum> Deserialize<'de> for DeltaEncoding<ArenaStr, Accum>
where
Accum: Accumulator<Value = str, Storage = Box<str>, Delta = [u8]>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_tuple(2, DeltaArenaStrVisitor::new())
}
}
struct DeltaArenaStrVisitor<Accum> {
_accum: PhantomData<Accum>,
}
impl<Accum> DeltaArenaStrVisitor<Accum> {
fn new() -> Self {
Self {
_accum: PhantomData,
}
}
}
impl<'de, Accum> Visitor<'de> for DeltaArenaStrVisitor<Accum>
where
Accum: Accumulator<Value = str, Storage = Box<str>, Delta = [u8]>,
{
type Value = DeltaEncoding<ArenaStr, Accum>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a pair of values")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let sizes: Vec<u32> = seq
.next_element()?
.ok_or_else(|| A::Error::invalid_length(0, &self))?;
let bytes: CowBytes = seq
.next_element()?
.ok_or_else(|| A::Error::invalid_length(1, &self))?;
let mut arena = ArenaStr {
rangevec: RangeVecStr {
vec: AppendStr::with_capacity(bytes.0.len()),
ranges: AppendVec::with_capacity(sizes.len()),
},
map: DashTable::with_capacity(sizes.len()),
hasher: DefaultHashBuilder::default(),
#[cfg(feature = "debug")]
references: AtomicUsize::new(0),
};
let mut acc = Accum::default();
let mut start = 0;
for size in sizes {
let size = size as usize;
let delta = &bytes.0[start..start + size];
let string = acc.unfold(delta);
assert_eq!(
delta.len(),
string.len(),
"Invalid Accumulator implementation for DeltaEncoding of ArenaSlice: delta length must match destination string length (in bytes)"
);
arena.push(&string);
start += size;
}
Ok(DeltaEncoding {
inner: arena,
_phantom: PhantomData,
})
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[cfg(all(feature = "delta", feature = "serde"))]
use crate::{Accumulator, DeltaEncoding};
use std::thread;
fn make_utf8_string(mut i: u32) -> String {
let mut s = String::new();
while i != 0 {
let j = i % (64 + 26);
let c = if j < 64 {
char::from_u32(0x410 + j).expect("Invalid Unicode value")
} else {
char::from_u32(b'a' as u32 + j - 64).expect("Invalid Unicode value")
};
i /= 64 + 26;
s.push(c);
}
s
}
#[test]
fn test_utf8_string() {
assert_eq!(make_utf8_string(0), "");
assert_eq!(make_utf8_string(0).len(), 0);
assert_eq!(make_utf8_string(5), "Е");
assert_eq!(make_utf8_string(5).len(), 2);
assert_eq!(make_utf8_string(25), "Щ");
assert_eq!(make_utf8_string(25).len(), 2);
assert_eq!(make_utf8_string(125), "гБ");
assert_eq!(make_utf8_string(125).len(), 4);
assert_eq!(make_utf8_string(625), "vЖ");
assert_eq!(make_utf8_string(625).len(), 3);
assert_eq!(make_utf8_string(3125), "bв");
assert_eq!(make_utf8_string(3125).len(), 3);
assert_eq!(make_utf8_string(15625), "чtБ");
assert_eq!(make_utf8_string(15625).len(), 5);
assert_eq!(make_utf8_string(78125), "ЕъЙ");
assert_eq!(make_utf8_string(78125).len(), 6);
assert_eq!(make_utf8_string(390625), "ЩФр");
assert_eq!(make_utf8_string(390625).len(), 6);
assert_eq!(make_utf8_string(1953125), "гЛэВ");
assert_eq!(make_utf8_string(1953125).len(), 8);
assert_eq!(make_utf8_string(9765625), "vшгН");
assert_eq!(make_utf8_string(9765625).len(), 7);
}
#[test]
fn test_lookup() {
let arena = ArenaStr::default();
let empty = arena.intern("");
let a = arena.intern("a");
let b = arena.intern("bb");
let c = arena.intern("ccc");
let d = arena.intern("dddd");
let e = arena.intern("eeeee");
assert_eq!(arena.lookup(empty), "");
assert_eq!(arena.lookup(a), "a");
assert_eq!(arena.lookup(b), "bb");
assert_eq!(arena.lookup(c), "ccc");
assert_eq!(arena.lookup(d), "dddd");
assert_eq!(arena.lookup(e), "eeeee");
}
#[test]
fn test_intern_lookup() {
let arena = ArenaStr::default();
for i in 0..100 {
assert_eq!(arena.intern(&make_utf8_string(i)).0, i);
}
for i in 0..100 {
assert_eq!(arena.lookup(InternedStr::new(i)), &make_utf8_string(i));
}
}
const NUM_READERS: usize = 4;
const NUM_WRITERS: usize = 4;
#[cfg(not(miri))]
const NUM_ITEMS: usize = 1_000_000;
#[cfg(miri)]
const NUM_ITEMS: usize = 100;
#[test]
fn test_intern_lookup_concurrent_reads() {
let arena = ArenaStr::default();
thread::scope(|s| {
for _ in 0..NUM_READERS {
s.spawn(|| {
loop {
let len = arena.strings();
if len > 0 {
let last = len as u32 - 1;
assert_eq!(
arena.lookup(InternedStr::new(last)),
&make_utf8_string(last)
);
if len == NUM_ITEMS {
break;
}
}
}
});
}
s.spawn(|| {
for j in 0..NUM_ITEMS as u32 {
assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
}
});
});
}
#[test]
fn test_intern_lookup_concurrent_writes() {
let arena = ArenaStr::default();
thread::scope(|s| {
s.spawn(|| {
loop {
let len = arena.strings();
if len > 0 {
let last = len as u32 - 1;
assert_eq!(
arena.lookup(InternedStr::new(last)),
&make_utf8_string(last)
);
if len == NUM_ITEMS {
break;
}
}
}
});
for _ in 0..NUM_WRITERS {
s.spawn(|| {
for j in 0..NUM_ITEMS as u32 {
assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
}
});
}
});
}
#[test]
fn test_intern_lookup_concurrent_readwrites() {
let arena = ArenaStr::default();
thread::scope(|s| {
for _ in 0..NUM_READERS {
s.spawn(|| {
loop {
let len = arena.strings();
if len > 0 {
let last = len as u32 - 1;
assert_eq!(
arena.lookup(InternedStr::new(last)),
&make_utf8_string(last)
);
if len == NUM_ITEMS {
break;
}
}
}
});
}
for _ in 0..NUM_WRITERS {
s.spawn(|| {
for j in 0..NUM_ITEMS as u32 {
assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
}
});
}
});
}
#[cfg(feature = "serde")]
#[test]
fn test_serde_postcard() {
let arena = ArenaStr::default();
let empty = arena.intern("");
let a = arena.intern("a");
let b = arena.intern("bb");
let c = arena.intern("ccc");
let d = arena.intern("dddd");
let e = arena.intern("eeeee");
assert_eq!(arena.strings(), 6);
assert!(arena.bytes() >= 15);
let serialized_arena = postcard::to_stdvec(&arena).expect("Failed to serialize arena");
assert_eq!(
serialized_arena,
vec![
6, 0, 1, 2, 3, 4, 5, 15, b'a', b'b', b'b', b'c', b'c', b'c', b'd', b'd', b'd',
b'd', b'e', b'e', b'e', b'e', b'e'
]
);
let new_arena: ArenaStr =
postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
assert_eq!(new_arena, arena);
assert_eq!(new_arena.strings(), 6);
assert_eq!(new_arena.bytes(), 15);
let serialized_handles = postcard::to_stdvec(&[empty, a, b, c, d, e])
.expect("Failed to serialize interned handles");
assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
let new_handles: [InternedStr; 6] = postcard::from_bytes(&serialized_handles)
.expect("Failed to deserialize interned handles");
assert_eq!(new_handles, [empty, a, b, c, d, e]);
assert_eq!(new_arena.lookup(empty), "");
assert_eq!(new_arena.lookup(a), "a");
assert_eq!(new_arena.lookup(b), "bb");
assert_eq!(new_arena.lookup(c), "ccc");
assert_eq!(new_arena.lookup(d), "dddd");
assert_eq!(new_arena.lookup(e), "eeeee");
}
#[cfg(feature = "serde")]
#[test]
fn test_serde_json() {
let arena = ArenaStr::default();
let empty = arena.intern("");
let a = arena.intern("a");
let b = arena.intern("bb");
let c = arena.intern("ccc");
let d = arena.intern("dddd");
let e = arena.intern("eeeee");
assert_eq!(arena.strings(), 6);
assert!(arena.bytes() >= 15);
let serialized_arena = serde_json::to_string(&arena).expect("Failed to serialize arena");
assert_eq!(serialized_arena, r#"[[0,1,2,3,4,5],"abbcccddddeeeee"]"#);
let new_arena: ArenaStr =
serde_json::from_str(&serialized_arena).expect("Failed to deserialize arena");
assert_eq!(new_arena, arena);
assert_eq!(new_arena.strings(), 6);
assert_eq!(new_arena.bytes(), 15);
let serialized_handles = serde_json::to_string(&[empty, a, b, c, d, e])
.expect("Failed to serialize interned handles");
assert_eq!(serialized_handles, "[0,1,2,3,4,5]");
let new_handles: [InternedStr; 6] = serde_json::from_str(&serialized_handles)
.expect("Failed to deserialize interned handles");
assert_eq!(new_handles, [empty, a, b, c, d, e]);
}
#[cfg(all(feature = "delta", feature = "serde"))]
#[derive(Default)]
struct StringAccumulator {
previous: Vec<u8>,
}
#[cfg(all(feature = "delta", feature = "serde"))]
impl Accumulator for StringAccumulator {
type Value = str;
type Storage = Box<str>;
type Delta = [u8];
type DeltaStorage = Box<[u8]>;
fn fold(&mut self, v: &Self::Value) -> Self::DeltaStorage {
let mut delta = Vec::with_capacity(v.len());
for (i, byte) in v.bytes().enumerate() {
delta.push(byte ^ self.previous.get(i).copied().unwrap_or(0));
}
self.previous = v.into();
delta.into()
}
fn unfold(&mut self, d: &Self::Delta) -> Self::Storage {
let mut value = Vec::with_capacity(d.len());
for (i, byte) in d.iter().enumerate() {
value.push(byte ^ self.previous.get(i).copied().unwrap_or(0));
}
self.previous = value.clone();
String::from_utf8(value)
.expect("Invalid UTF-8 encoding")
.into()
}
}
#[cfg(all(feature = "delta", feature = "serde"))]
#[test]
fn test_serde_delta() {
let arena = ArenaStr::default();
let empty = arena.intern("");
let a = arena.intern("a");
let b = arena.intern("bb");
let c = arena.intern("ccc");
let d = arena.intern("dddd");
let e = arena.intern("eeeee");
assert_eq!(arena.strings(), 6);
assert!(arena.bytes() >= 15);
let delta_encoded: DeltaEncoding<&ArenaStr, StringAccumulator> = DeltaEncoding::new(&arena);
let serialized_arena =
postcard::to_stdvec(&delta_encoded).expect("Failed to serialize arena");
assert_eq!(
serialized_arena,
vec![
6, 0, 1, 2, 3, 4, 5, 15, 97, 3, 98, 1, 1, 99, 7, 7, 7, 100, 1, 1, 1, 1, 101
]
);
let delta_encoded: DeltaEncoding<ArenaStr, StringAccumulator> =
postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
let new_arena = delta_encoded.into_inner();
assert_eq!(new_arena.strings(), 6);
assert_eq!(new_arena.bytes(), 15);
let serialized_handles = postcard::to_stdvec(&[empty, a, b, c, d, e])
.expect("Failed to serialize interned handles");
assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
let new_handles: [InternedStr; 6] = postcard::from_bytes(&serialized_handles)
.expect("Failed to deserialize interned handles");
assert_eq!(new_handles, [empty, a, b, c, d, e]);
assert_eq!(new_arena.lookup(empty), "");
assert_eq!(new_arena.lookup(a), "a");
assert_eq!(new_arena.lookup(b), "bb");
assert_eq!(new_arena.lookup(c), "ccc");
assert_eq!(new_arena.lookup(d), "dddd");
assert_eq!(new_arena.lookup(e), "eeeee");
}
}