1use crate::CopyRangeU32;
2use appendvec::{AppendStr, AppendVec};
3use dashtable::DashTable;
4#[cfg(feature = "get-size2")]
5use get_size2::{GetSize, GetSizeTracker};
6use hashbrown::DefaultHashBuilder;
7#[cfg(feature = "serde")]
8use serde::de::{Error, SeqAccess, Visitor};
9#[cfg(feature = "serde")]
10use serde::ser::SerializeTuple;
11#[cfg(feature = "serde")]
12use serde::{Deserialize, Deserializer, Serialize, Serializer};
13#[cfg(feature = "serde")]
14use std::cell::Cell;
15use std::fmt::Debug;
16use std::hash::{BuildHasher, Hash};
17#[cfg(feature = "debug")]
18use std::sync::atomic::{self, AtomicUsize};
19
20#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
22#[cfg_attr(feature = "get-size2", derive(GetSize))]
23#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
24pub struct InternedStr(u32);
25
26impl Debug for InternedStr {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 f.debug_tuple("I").field(&self.0).finish()
29 }
30}
31
32#[cfg(feature = "raw")]
33impl InternedStr {
34 pub fn from_id(id: u32) -> Self {
40 Self::new(id)
41 }
42
43 pub fn id(&self) -> u32 {
49 self.0
50 }
51}
52
53impl InternedStr {
54 fn new(id: u32) -> Self {
55 Self(id)
56 }
57}
58
59pub struct ArenaStr {
61 vec: AppendStr,
62 ranges: AppendVec<CopyRangeU32>,
63 map: DashTable<u32>,
64 hasher: DefaultHashBuilder,
65 #[cfg(feature = "debug")]
66 references: AtomicUsize,
67}
68
69impl Clone for ArenaStr {
70 fn clone(&self) -> Self {
71 let mut arena = Self::with_capacity(self.strings(), self.bytes());
72 for s in self.iter() {
73 arena.push(s);
74 }
75 arena
76 }
77}
78
79impl ArenaStr {
80 pub fn with_capacity(strings: usize, bytes: usize) -> Self {
83 Self {
84 vec: AppendStr::with_capacity(bytes),
85 ranges: AppendVec::with_capacity(strings),
86 map: DashTable::with_capacity(strings),
87 hasher: DefaultHashBuilder::default(),
88 #[cfg(feature = "debug")]
89 references: AtomicUsize::new(0),
90 }
91 }
92
93 pub fn strings(&self) -> usize {
99 self.ranges.len()
100 }
101
102 pub fn bytes(&self) -> usize {
108 self.vec.len()
109 }
110
111 pub fn is_empty(&self) -> bool {
117 self.strings() == 0
118 }
119
120 pub fn find(&self, value: &str) -> Option<InternedStr> {
126 let hash = self.hasher.hash_one(value);
127 self.map
128 .find(hash, |&i| self.lookup_str(i) == value)
129 .map(|id| InternedStr(*id))
130 }
131
132 pub fn push_mut(&mut self, value: &str) -> u32 {
135 self.push(value)
136 }
137}
138
139impl ArenaStr {
140 fn iter(&self) -> impl Iterator<Item = &str> {
141 self.ranges
142 .iter()
143 .map(|&range| &self.vec[range.start as usize..range.end as usize])
144 }
145
146 fn iter_bytes(&self) -> impl Iterator<Item = &[u8]> {
147 self.ranges
148 .iter()
149 .map(|&range| self.vec.get_bytes(range.start as usize..range.end as usize))
150 }
151}
152
153impl Default for ArenaStr {
154 fn default() -> Self {
155 Self {
156 vec: AppendStr::new(),
157 ranges: AppendVec::new(),
158 map: DashTable::new(),
159 hasher: DefaultHashBuilder::default(),
160 #[cfg(feature = "debug")]
161 references: AtomicUsize::new(0),
162 }
163 }
164}
165
166impl Debug for ArenaStr {
167 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168 fmt.debug_list().entries(self.iter()).finish()
169 }
170}
171
172impl PartialEq for ArenaStr {
173 fn eq(&self, other: &Self) -> bool {
174 self.iter_bytes().eq(other.iter_bytes())
175 }
176}
177
178impl Eq for ArenaStr {}
179
180#[cfg(feature = "get-size2")]
181impl GetSize for ArenaStr {
182 fn get_heap_size_with_tracker<Tr: GetSizeTracker>(&self, tracker: Tr) -> (usize, Tr) {
183 let heap_size = self.vec.len() * size_of::<u8>()
184 + self.ranges.len() * (size_of::<CopyRangeU32>() + size_of::<u32>());
185 (heap_size, tracker)
186 }
187}
188
189#[cfg(feature = "debug")]
190impl ArenaStr {
191 pub fn print_summary(&self, prefix: &str, title: &str, total_bytes: usize) {
193 let strings = self.ranges.len();
194 let references = self.references();
195 let estimated_bytes = self.get_size();
196 println!(
197 "{}[{:.02}%] {} interner: {} objects | {} bytes ({:.02} bytes/object) | {} references ({:.02} refs/object)",
198 prefix,
199 estimated_bytes as f64 * 100.0 / total_bytes as f64,
200 title,
201 strings,
202 estimated_bytes,
203 estimated_bytes as f64 / strings as f64,
204 references,
205 references as f64 / strings as f64,
206 );
207 }
208
209 fn references(&self) -> usize {
210 self.references.load(atomic::Ordering::Relaxed)
211 }
212}
213
214impl ArenaStr {
215 pub fn intern(&self, value: &str) -> InternedStr {
220 #[cfg(feature = "debug")]
221 self.references.fetch_add(1, atomic::Ordering::Relaxed);
222
223 let hash = self.hasher.hash_one(value);
224 let id = *self
225 .map
226 .entry(
227 hash,
228 |&i| self.lookup_str(i) == value,
229 |&i| self.hasher.hash_one(self.lookup_str(i)),
230 )
231 .or_insert_with(|| {
232 let range = self.vec.push_str(value);
233 assert!(range.start <= u32::MAX as usize);
234 assert!(range.end <= u32::MAX as usize);
235 let range = range.start as u32..range.end as u32;
236
237 let id = self.ranges.push(range.into());
238 assert!(id <= u32::MAX as usize);
239 id as u32
240 })
241 .get();
242 InternedStr::new(id)
243 }
244
245 fn push(&mut self, value: &str) -> u32 {
248 #[cfg(feature = "debug")]
249 self.references.fetch_add(1, atomic::Ordering::Relaxed);
250
251 let hash = self.hasher.hash_one(value);
252
253 let range = self.vec.push_str_mut(value);
254 assert!(range.start <= u32::MAX as usize);
255 assert!(range.end <= u32::MAX as usize);
256 let range = range.start as u32..range.end as u32;
257
258 let id = self.ranges.push_mut(range.into());
259 assert!(id <= u32::MAX as usize);
260 let id = id as u32;
261
262 self.map
263 .insert_unique(hash, id, |&i| self.hasher.hash_one(self.lookup_str(i)));
264 id
265 }
266
267 pub fn lookup(&self, interned: InternedStr) -> &str {
276 self.lookup_str(interned.0)
277 }
278
279 pub fn lookup_bytes(&self, interned: InternedStr) -> &[u8] {
285 let range = self.ranges[interned.0 as usize];
286 let range = range.start as usize..range.end as usize;
287 self.vec.get_bytes(range)
288 }
289
290 fn lookup_str(&self, id: u32) -> &str {
291 let range = self.ranges[id as usize];
292 let range = range.start as usize..range.end as usize;
293 &self.vec[range]
294 }
295}
296
297#[cfg(feature = "serde")]
298impl Serialize for ArenaStr {
299 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
300 where
301 S: Serializer,
302 {
303 let mut tuple = serializer.serialize_tuple(2)?;
304
305 let ranges = RangeWrapper {
306 ranges: &self.ranges,
307 ranges_len: Cell::new(0),
308 total_len: Cell::new(0),
309 };
310 tuple.serialize_element(&ranges)?;
311
312 tuple.serialize_element(&ArenaStrWrapper {
313 ranges_len: ranges.ranges_len.into_inner(),
314 total_len: ranges.total_len.into_inner(),
315 arena: self,
316 })?;
317
318 tuple.end()
319 }
320}
321
322#[cfg(feature = "serde")]
323struct RangeWrapper<'a> {
324 ranges: &'a AppendVec<CopyRangeU32>,
325 ranges_len: Cell<u32>,
326 total_len: Cell<u32>,
327}
328
329#[cfg(feature = "serde")]
330impl<'a> Serialize for RangeWrapper<'a> {
331 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
332 where
333 S: Serializer,
334 {
335 let mut ranges_len: u32 = 0;
336 let mut total_len: u32 = 0;
337 let result = serializer.collect_seq(self.ranges.iter().map(|range| {
338 ranges_len += 1;
339 let this_len = range.end - range.start;
340 total_len = total_len.strict_add(this_len);
341 this_len
342 }));
343
344 self.ranges_len.set(ranges_len);
345 self.total_len.set(total_len);
346
347 result
348 }
349}
350
351#[cfg(feature = "serde")]
352struct ArenaStrWrapper<'a> {
353 ranges_len: u32,
354 total_len: u32,
355 arena: &'a ArenaStr,
356}
357
358#[cfg(feature = "serde")]
359impl<'a> Serialize for ArenaStrWrapper<'a> {
360 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
361 where
362 S: Serializer,
363 {
364 let mut string = String::with_capacity(self.total_len as usize);
366 for range in self.arena.ranges.iter().take(self.ranges_len as usize) {
367 let s = &self.arena.vec[range.start as usize..range.end as usize];
368 string.push_str(s);
369 }
370
371 serializer.serialize_str(&string)
372 }
373}
374
375#[cfg(feature = "serde")]
376impl<'de> Deserialize<'de> for ArenaStr {
377 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
378 where
379 D: Deserializer<'de>,
380 {
381 deserializer.deserialize_tuple(2, ArenaStrVisitor)
382 }
383}
384
385#[cfg(feature = "serde")]
386struct ArenaStrVisitor;
387
388#[cfg(feature = "serde")]
389impl<'de> Visitor<'de> for ArenaStrVisitor {
390 type Value = ArenaStr;
391
392 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
393 formatter.write_str("a pair of values")
394 }
395
396 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
397 where
398 A: SeqAccess<'de>,
399 {
400 let sizes: Vec<u32> = seq
401 .next_element()?
402 .ok_or_else(|| A::Error::invalid_length(0, &self))?;
403 let string: &str = seq
404 .next_element()?
405 .ok_or_else(|| A::Error::invalid_length(1, &self))?;
406
407 let mut arena = ArenaStr {
408 vec: AppendStr::with_capacity(string.len()),
409 ranges: AppendVec::with_capacity(sizes.len()),
410 map: DashTable::with_capacity(sizes.len()),
411 hasher: DefaultHashBuilder::default(),
412 #[cfg(feature = "debug")]
413 references: AtomicUsize::new(0),
414 };
415
416 let mut start = 0;
417 for size in sizes {
418 let size = size as usize;
419 arena.push(&string[start..start + size]);
420 start += size;
421 }
422
423 Ok(arena)
424 }
425}
426
427#[cfg(all(feature = "delta", feature = "serde"))]
428mod delta {
429 use super::*;
430 use crate::{Accumulator, DeltaEncoding};
431 use serde::ser::SerializeSeq;
432 use std::marker::PhantomData;
433
434 impl<Accum> Serialize for DeltaEncoding<&ArenaStr, Accum>
435 where
436 Accum: Accumulator<Value = str, Storage = Box<str>, DeltaStorage = Box<[u8]>>,
437 {
438 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
439 where
440 S: Serializer,
441 {
442 let mut tuple = serializer.serialize_tuple(2)?;
443
444 let ranges = RangeWrapper {
445 ranges: &self.ranges,
446 ranges_len: Cell::new(0),
447 total_len: Cell::new(0),
448 };
449 tuple.serialize_element(&ranges)?;
450
451 tuple.serialize_element(&ArenaStrWrapper {
452 ranges_len: ranges.ranges_len.into_inner(),
453 total_len: ranges.total_len.into_inner(),
454 arena: self,
455 })?;
456
457 tuple.end()
458 }
459 }
460
461 struct ArenaStrWrapper<'a, Accum> {
462 ranges_len: u32,
463 total_len: u32,
464 arena: &'a DeltaEncoding<&'a ArenaStr, Accum>,
465 }
466
467 impl<'a, Accum> Serialize for ArenaStrWrapper<'a, Accum>
468 where
469 Accum: Accumulator<Value = str, Storage = Box<str>, DeltaStorage = Box<[u8]>>,
470 {
471 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
472 where
473 S: Serializer,
474 {
475 let mut seq = serializer.serialize_seq(Some(self.total_len as usize))?;
476
477 let mut acc = Accum::default();
478 for range in self.arena.ranges.iter().take(self.ranges_len as usize) {
479 let slice = &self.arena.vec[range.start as usize..range.end as usize];
480 let delta = acc.fold(slice);
481 assert_eq!(
482 delta.len(),
483 slice.len(),
484 "Invalid Accumulator implementation for DeltaEncoding of ArenaStr: delta length must match source string length (in bytes)"
485 );
486 for d in delta {
487 seq.serialize_element(&d)?;
488 }
489 }
490
491 seq.end()
492 }
493 }
494
495 impl<'de, Accum> Deserialize<'de> for DeltaEncoding<ArenaStr, Accum>
496 where
497 Accum: Accumulator<Value = str, Storage = Box<str>, Delta = [u8]>,
498 {
499 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
500 where
501 D: Deserializer<'de>,
502 {
503 deserializer.deserialize_tuple(2, DeltaArenaStrVisitor::new())
504 }
505 }
506
507 struct DeltaArenaStrVisitor<Accum> {
508 _accum: PhantomData<Accum>,
509 }
510
511 impl<Accum> DeltaArenaStrVisitor<Accum> {
512 fn new() -> Self {
513 Self {
514 _accum: PhantomData,
515 }
516 }
517 }
518
519 impl<'de, Accum> Visitor<'de> for DeltaArenaStrVisitor<Accum>
520 where
521 Accum: Accumulator<Value = str, Storage = Box<str>, Delta = [u8]>,
522 {
523 type Value = DeltaEncoding<ArenaStr, Accum>;
524
525 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
526 formatter.write_str("a pair of values")
527 }
528
529 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
530 where
531 A: SeqAccess<'de>,
532 {
533 let sizes: Vec<u32> = seq
534 .next_element()?
535 .ok_or_else(|| A::Error::invalid_length(0, &self))?;
536 let bytes: &[u8] = seq
537 .next_element()?
538 .ok_or_else(|| A::Error::invalid_length(1, &self))?;
539
540 let mut arena = ArenaStr {
541 vec: AppendStr::with_capacity(bytes.len()),
542 ranges: AppendVec::with_capacity(sizes.len()),
543 map: DashTable::with_capacity(sizes.len()),
544 hasher: DefaultHashBuilder::default(),
545 #[cfg(feature = "debug")]
546 references: AtomicUsize::new(0),
547 };
548
549 let mut acc = Accum::default();
550 let mut start = 0;
551 for size in sizes {
552 let size = size as usize;
553 let delta = &bytes[start..start + size];
554 let string = acc.unfold(delta);
555 assert_eq!(
556 delta.len(),
557 string.len(),
558 "Invalid Accumulator implementation for DeltaEncoding of ArenaSlice: delta length must match destination string length (in bytes)"
559 );
560 arena.push(&string);
561 start += size;
562 }
563
564 Ok(DeltaEncoding {
565 inner: arena,
566 _phantom: PhantomData,
567 })
568 }
569 }
570}
571
572#[cfg(test)]
573mod test {
574 use super::*;
575 #[cfg(all(feature = "delta", feature = "serde"))]
576 use crate::{Accumulator, DeltaEncoding};
577 use std::thread;
578
579 fn make_utf8_string(mut i: u32) -> String {
580 let mut s = String::new();
581 while i != 0 {
582 let j = i % (64 + 26);
583 let c = if j < 64 {
584 char::from_u32(0x410 + j).expect("Invalid Unicode value")
586 } else {
587 char::from_u32(b'a' as u32 + j - 64).expect("Invalid Unicode value")
588 };
589 i /= 64 + 26;
590 s.push(c);
591 }
592 s
593 }
594
595 #[test]
596 fn test_utf8_string() {
597 assert_eq!(make_utf8_string(0), "");
598 assert_eq!(make_utf8_string(0).len(), 0);
599 assert_eq!(make_utf8_string(5), "Е");
600 assert_eq!(make_utf8_string(5).len(), 2);
601 assert_eq!(make_utf8_string(25), "Щ");
602 assert_eq!(make_utf8_string(25).len(), 2);
603 assert_eq!(make_utf8_string(125), "гБ");
604 assert_eq!(make_utf8_string(125).len(), 4);
605 assert_eq!(make_utf8_string(625), "vЖ");
606 assert_eq!(make_utf8_string(625).len(), 3);
607 assert_eq!(make_utf8_string(3125), "bв");
608 assert_eq!(make_utf8_string(3125).len(), 3);
609 assert_eq!(make_utf8_string(15625), "чtБ");
610 assert_eq!(make_utf8_string(15625).len(), 5);
611 assert_eq!(make_utf8_string(78125), "ЕъЙ");
612 assert_eq!(make_utf8_string(78125).len(), 6);
613 assert_eq!(make_utf8_string(390625), "ЩФр");
614 assert_eq!(make_utf8_string(390625).len(), 6);
615 assert_eq!(make_utf8_string(1953125), "гЛэВ");
616 assert_eq!(make_utf8_string(1953125).len(), 8);
617 assert_eq!(make_utf8_string(9765625), "vшгН");
618 assert_eq!(make_utf8_string(9765625).len(), 7);
619 }
620
621 #[test]
622 fn test_lookup() {
623 let arena = ArenaStr::default();
624
625 let empty = arena.intern("");
626 let a = arena.intern("a");
627 let b = arena.intern("bb");
628 let c = arena.intern("ccc");
629 let d = arena.intern("dddd");
630 let e = arena.intern("eeeee");
631
632 assert_eq!(arena.lookup(empty), "");
633 assert_eq!(arena.lookup(a), "a");
634 assert_eq!(arena.lookup(b), "bb");
635 assert_eq!(arena.lookup(c), "ccc");
636 assert_eq!(arena.lookup(d), "dddd");
637 assert_eq!(arena.lookup(e), "eeeee");
638 }
639
640 #[test]
641 fn test_intern_lookup() {
642 let arena = ArenaStr::default();
643 for i in 0..100 {
644 assert_eq!(arena.intern(&make_utf8_string(i)).0, i);
645 }
646 for i in 0..100 {
647 assert_eq!(arena.lookup(InternedStr::new(i)), &make_utf8_string(i));
648 }
649 }
650
651 const NUM_READERS: usize = 4;
652 const NUM_WRITERS: usize = 4;
653 #[cfg(not(miri))]
654 const NUM_ITEMS: usize = 1_000_000;
655 #[cfg(miri)]
656 const NUM_ITEMS: usize = 100;
657
658 #[test]
659 fn test_intern_lookup_concurrent_reads() {
660 let arena = ArenaStr::default();
661 thread::scope(|s| {
662 for _ in 0..NUM_READERS {
663 s.spawn(|| {
664 loop {
665 let len = arena.strings();
666 if len > 0 {
667 let last = len as u32 - 1;
668 assert_eq!(
669 arena.lookup(InternedStr::new(last)),
670 &make_utf8_string(last)
671 );
672 if len == NUM_ITEMS {
673 break;
674 }
675 }
676 }
677 });
678 }
679 s.spawn(|| {
680 for j in 0..NUM_ITEMS as u32 {
681 assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
682 }
683 });
684 });
685 }
686
687 #[test]
688 fn test_intern_lookup_concurrent_writes() {
689 let arena = ArenaStr::default();
690 thread::scope(|s| {
691 s.spawn(|| {
692 loop {
693 let len = arena.strings();
694 if len > 0 {
695 let last = len as u32 - 1;
696 assert_eq!(
697 arena.lookup(InternedStr::new(last)),
698 &make_utf8_string(last)
699 );
700 if len == NUM_ITEMS {
701 break;
702 }
703 }
704 }
705 });
706 for _ in 0..NUM_WRITERS {
707 s.spawn(|| {
708 for j in 0..NUM_ITEMS as u32 {
709 assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
710 }
711 });
712 }
713 });
714 }
715
716 #[test]
717 fn test_intern_lookup_concurrent_readwrites() {
718 let arena = ArenaStr::default();
719 thread::scope(|s| {
720 for _ in 0..NUM_READERS {
721 s.spawn(|| {
722 loop {
723 let len = arena.strings();
724 if len > 0 {
725 let last = len as u32 - 1;
726 assert_eq!(
727 arena.lookup(InternedStr::new(last)),
728 &make_utf8_string(last)
729 );
730 if len == NUM_ITEMS {
731 break;
732 }
733 }
734 }
735 });
736 }
737 for _ in 0..NUM_WRITERS {
738 s.spawn(|| {
739 for j in 0..NUM_ITEMS as u32 {
740 assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
741 }
742 });
743 }
744 });
745 }
746
747 #[cfg(feature = "serde")]
748 #[test]
749 fn test_serde_postcard() {
750 let arena = ArenaStr::default();
751
752 let empty = arena.intern("");
753 let a = arena.intern("a");
754 let b = arena.intern("bb");
755 let c = arena.intern("ccc");
756 let d = arena.intern("dddd");
757 let e = arena.intern("eeeee");
758
759 assert_eq!(arena.strings(), 6);
760 assert!(arena.bytes() >= 15);
761
762 let serialized_arena = postcard::to_stdvec(&arena).expect("Failed to serialize arena");
763 assert_eq!(
764 serialized_arena,
765 vec![
766 6, 0, 1, 2, 3, 4, 5, 15, b'a', b'b', b'b', b'c', b'c', b'c', b'd', b'd', b'd',
767 b'd', b'e', b'e', b'e', b'e', b'e'
768 ]
769 );
770 let new_arena: ArenaStr =
771 postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
772 assert_eq!(new_arena, arena);
773
774 assert_eq!(new_arena.strings(), 6);
775 assert_eq!(new_arena.bytes(), 15);
776
777 let serialized_handles = postcard::to_stdvec(&[empty, a, b, c, d, e])
778 .expect("Failed to serialize interned handles");
779 assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
780 let new_handles: [InternedStr; 6] = postcard::from_bytes(&serialized_handles)
781 .expect("Failed to deserialize interned handles");
782 assert_eq!(new_handles, [empty, a, b, c, d, e]);
783
784 assert_eq!(new_arena.lookup(empty), "");
785 assert_eq!(new_arena.lookup(a), "a");
786 assert_eq!(new_arena.lookup(b), "bb");
787 assert_eq!(new_arena.lookup(c), "ccc");
788 assert_eq!(new_arena.lookup(d), "dddd");
789 assert_eq!(new_arena.lookup(e), "eeeee");
790 }
791
792 #[cfg(feature = "serde")]
793 #[test]
794 fn test_serde_json() {
795 let arena = ArenaStr::default();
796
797 let empty = arena.intern("");
798 let a = arena.intern("a");
799 let b = arena.intern("bb");
800 let c = arena.intern("ccc");
801 let d = arena.intern("dddd");
802 let e = arena.intern("eeeee");
803
804 assert_eq!(arena.strings(), 6);
805 assert!(arena.bytes() >= 15);
806
807 let serialized_arena = serde_json::to_string(&arena).expect("Failed to serialize arena");
808 assert_eq!(serialized_arena, r#"[[0,1,2,3,4,5],"abbcccddddeeeee"]"#);
809 let new_arena: ArenaStr =
810 serde_json::from_str(&serialized_arena).expect("Failed to deserialize arena");
811 assert_eq!(new_arena, arena);
812
813 assert_eq!(new_arena.strings(), 6);
814 assert_eq!(new_arena.bytes(), 15);
815
816 let serialized_handles = serde_json::to_string(&[empty, a, b, c, d, e])
817 .expect("Failed to serialize interned handles");
818 assert_eq!(serialized_handles, "[0,1,2,3,4,5]");
819 let new_handles: [InternedStr; 6] = serde_json::from_str(&serialized_handles)
820 .expect("Failed to deserialize interned handles");
821 assert_eq!(new_handles, [empty, a, b, c, d, e]);
822 }
823
824 #[cfg(all(feature = "delta", feature = "serde"))]
825 #[derive(Default)]
826 struct StringAccumulator {
827 previous: Vec<u8>,
828 }
829
830 #[cfg(all(feature = "delta", feature = "serde"))]
831 impl Accumulator for StringAccumulator {
832 type Value = str;
833 type Storage = Box<str>;
834 type Delta = [u8];
835 type DeltaStorage = Box<[u8]>;
836
837 fn fold(&mut self, v: &Self::Value) -> Self::DeltaStorage {
838 let mut delta = Vec::with_capacity(v.len());
839 for (i, byte) in v.bytes().enumerate() {
840 delta.push(byte ^ self.previous.get(i).copied().unwrap_or(0));
841 }
842 self.previous = v.into();
843 delta.into()
844 }
845
846 fn unfold(&mut self, d: &Self::Delta) -> Self::Storage {
847 let mut value = Vec::with_capacity(d.len());
848 for (i, byte) in d.iter().enumerate() {
849 value.push(byte ^ self.previous.get(i).copied().unwrap_or(0));
850 }
851 self.previous = value.clone();
852 String::from_utf8(value)
853 .expect("Invalid UTF-8 encoding")
854 .into()
855 }
856 }
857
858 #[cfg(all(feature = "delta", feature = "serde"))]
859 #[test]
860 fn test_serde_delta() {
861 let arena = ArenaStr::default();
862
863 let empty = arena.intern("");
864 let a = arena.intern("a");
865 let b = arena.intern("bb");
866 let c = arena.intern("ccc");
867 let d = arena.intern("dddd");
868 let e = arena.intern("eeeee");
869
870 assert_eq!(arena.strings(), 6);
871 assert!(arena.bytes() >= 15);
872
873 let delta_encoded: DeltaEncoding<&ArenaStr, StringAccumulator> = DeltaEncoding::new(&arena);
874 let serialized_arena =
875 postcard::to_stdvec(&delta_encoded).expect("Failed to serialize arena");
876 assert_eq!(
877 serialized_arena,
878 vec![
879 6, 0, 1, 2, 3, 4, 5, 15, 97, 3, 98, 1, 1, 99, 7, 7, 7, 100, 1, 1, 1, 1, 101
880 ]
881 );
882 let delta_encoded: DeltaEncoding<ArenaStr, StringAccumulator> =
883 postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
884 let new_arena = delta_encoded.into_inner();
885
886 assert_eq!(new_arena.strings(), 6);
887 assert_eq!(new_arena.bytes(), 15);
888
889 let serialized_handles = postcard::to_stdvec(&[empty, a, b, c, d, e])
890 .expect("Failed to serialize interned handles");
891 assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
892 let new_handles: [InternedStr; 6] = postcard::from_bytes(&serialized_handles)
893 .expect("Failed to deserialize interned handles");
894 assert_eq!(new_handles, [empty, a, b, c, d, e]);
895
896 assert_eq!(new_arena.lookup(empty), "");
897 assert_eq!(new_arena.lookup(a), "a");
898 assert_eq!(new_arena.lookup(b), "bb");
899 assert_eq!(new_arena.lookup(c), "ccc");
900 assert_eq!(new_arena.lookup(d), "dddd");
901 assert_eq!(new_arena.lookup(e), "eeeee");
902 }
903}