Skip to main content

blazinterner/
str.rs

1use crate::CopyRangeU32;
2use appendvec::{AppendStr, AppendVec};
3use dashtable::DashTable;
4#[cfg(feature = "get-size2")]
5use get_size2::{GetSize, GetSizeTracker};
6use hashbrown::DefaultHashBuilder;
7#[cfg(feature = "serde")]
8use serde::de::{Error, SeqAccess, Visitor};
9#[cfg(feature = "serde")]
10use serde::ser::SerializeTuple;
11#[cfg(feature = "serde")]
12use serde::{Deserialize, Deserializer, Serialize, Serializer};
13#[cfg(feature = "serde")]
14use std::cell::Cell;
15use std::fmt::Debug;
16use std::hash::{BuildHasher, Hash};
17#[cfg(feature = "debug")]
18use std::sync::atomic::{self, AtomicUsize};
19
20/// A handle to an interned value in an [`ArenaStr`].
21#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
22#[cfg_attr(feature = "get-size2", derive(GetSize))]
23#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
24pub struct InternedStr(u32);
25
26impl Debug for InternedStr {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        f.debug_tuple("I").field(&self.0).finish()
29    }
30}
31
32#[cfg(feature = "raw")]
33impl InternedStr {
34    /// Creates an interned value for the given index.
35    ///
36    /// This is a low-level function. You should instead use the
37    /// [`from()`](Self::from) API to intern a value, unless you really know
38    /// what you're doing.
39    pub fn from_id(id: u32) -> Self {
40        Self::new(id)
41    }
42
43    /// Obtains the underlying interning index.
44    ///
45    /// This is a low-level function. You should instead use the
46    /// [`lookup()`](Self::lookup) API, unless you really know what you're
47    /// doing.
48    pub fn id(&self) -> u32 {
49        self.0
50    }
51}
52
53impl InternedStr {
54    fn new(id: u32) -> Self {
55        Self(id)
56    }
57}
58
59/// Interning arena for strings.
60pub struct ArenaStr {
61    vec: AppendStr,
62    ranges: AppendVec<CopyRangeU32>,
63    map: DashTable<u32>,
64    hasher: DefaultHashBuilder,
65    #[cfg(feature = "debug")]
66    references: AtomicUsize,
67}
68
69impl Clone for ArenaStr {
70    fn clone(&self) -> Self {
71        let mut arena = Self::with_capacity(self.strings(), self.bytes());
72        for s in self.iter() {
73            arena.push(s);
74        }
75        arena
76    }
77}
78
79impl ArenaStr {
80    /// Creates a new arena with pre-allocated space to store at least the given
81    /// number of strings, totalling the given number of bytes.
82    pub fn with_capacity(strings: usize, bytes: usize) -> Self {
83        Self {
84            vec: AppendStr::with_capacity(bytes),
85            ranges: AppendVec::with_capacity(strings),
86            map: DashTable::with_capacity(strings),
87            hasher: DefaultHashBuilder::default(),
88            #[cfg(feature = "debug")]
89            references: AtomicUsize::new(0),
90        }
91    }
92
93    /// Returns the number of strings in this arena.
94    ///
95    /// Note that because [`ArenaStr`] is a concurrent data structure, this is
96    /// only a snapshot as viewed by this thread, and the result may change
97    /// if other threads are inserting values.
98    pub fn strings(&self) -> usize {
99        self.ranges.len()
100    }
101
102    /// Returns the total number of bytes in this arena.
103    ///
104    /// Note that because [`ArenaStr`] is a concurrent data structure, this is
105    /// only a snapshot as viewed by this thread, and the result may change
106    /// if other threads are inserting values.
107    pub fn bytes(&self) -> usize {
108        self.vec.len()
109    }
110
111    /// Checks if this arena is empty.
112    ///
113    /// Note that because [`ArenaStr`] is a concurrent data structure, this is
114    /// only a snapshot as viewed by this thread, and the result may change
115    /// if other threads are inserting values.
116    pub fn is_empty(&self) -> bool {
117        self.strings() == 0
118    }
119
120    /// Returns the given string's [`InternedStr`] handle if it is already
121    /// interned.
122    ///
123    /// Otherwise, this simply returns [`None`] without adding the string to
124    /// this arena.
125    pub fn find(&self, value: &str) -> Option<InternedStr> {
126        let hash = self.hasher.hash_one(value);
127        self.map
128            .find(hash, |&i| self.lookup_str(i) == value)
129            .map(|id| InternedStr(*id))
130    }
131
132    /// Unconditionally push a value, without validating that it's already
133    /// interned.
134    pub fn push_mut(&mut self, value: &str) -> u32 {
135        self.push(value)
136    }
137}
138
139impl ArenaStr {
140    fn iter(&self) -> impl Iterator<Item = &str> {
141        self.ranges
142            .iter()
143            .map(|&range| &self.vec[range.start as usize..range.end as usize])
144    }
145
146    fn iter_bytes(&self) -> impl Iterator<Item = &[u8]> {
147        self.ranges
148            .iter()
149            .map(|&range| self.vec.get_bytes(range.start as usize..range.end as usize))
150    }
151}
152
153impl Default for ArenaStr {
154    fn default() -> Self {
155        Self {
156            vec: AppendStr::new(),
157            ranges: AppendVec::new(),
158            map: DashTable::new(),
159            hasher: DefaultHashBuilder::default(),
160            #[cfg(feature = "debug")]
161            references: AtomicUsize::new(0),
162        }
163    }
164}
165
166impl Debug for ArenaStr {
167    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168        fmt.debug_list().entries(self.iter()).finish()
169    }
170}
171
172impl PartialEq for ArenaStr {
173    fn eq(&self, other: &Self) -> bool {
174        self.iter_bytes().eq(other.iter_bytes())
175    }
176}
177
178impl Eq for ArenaStr {}
179
180#[cfg(feature = "get-size2")]
181impl GetSize for ArenaStr {
182    fn get_heap_size_with_tracker<Tr: GetSizeTracker>(&self, tracker: Tr) -> (usize, Tr) {
183        let heap_size = self.vec.len() * size_of::<u8>()
184            + self.ranges.len() * (size_of::<CopyRangeU32>() + size_of::<u32>());
185        (heap_size, tracker)
186    }
187}
188
189#[cfg(feature = "debug")]
190impl ArenaStr {
191    /// Prints a summary of the storage used by this arena to stdout.
192    pub fn print_summary(&self, prefix: &str, title: &str, total_bytes: usize) {
193        let strings = self.ranges.len();
194        let references = self.references();
195        let estimated_bytes = self.get_size();
196        println!(
197            "{}[{:.02}%] {} interner: {} objects | {} bytes ({:.02} bytes/object) | {} references ({:.02} refs/object)",
198            prefix,
199            estimated_bytes as f64 * 100.0 / total_bytes as f64,
200            title,
201            strings,
202            estimated_bytes,
203            estimated_bytes as f64 / strings as f64,
204            references,
205            references as f64 / strings as f64,
206        );
207    }
208
209    fn references(&self) -> usize {
210        self.references.load(atomic::Ordering::Relaxed)
211    }
212}
213
214impl ArenaStr {
215    /// Interns the given value in this arena.
216    ///
217    /// If the value was already interned in this arena, its interning index
218    /// will simply be returned. Otherwise it will be stored into the arena.
219    pub fn intern(&self, value: &str) -> InternedStr {
220        #[cfg(feature = "debug")]
221        self.references.fetch_add(1, atomic::Ordering::Relaxed);
222
223        let hash = self.hasher.hash_one(value);
224        let id = *self
225            .map
226            .entry(
227                hash,
228                |&i| self.lookup_str(i) == value,
229                |&i| self.hasher.hash_one(self.lookup_str(i)),
230            )
231            .or_insert_with(|| {
232                let range = self.vec.push_str(value);
233                assert!(range.start <= u32::MAX as usize);
234                assert!(range.end <= u32::MAX as usize);
235                let range = range.start as u32..range.end as u32;
236
237                let id = self.ranges.push(range.into());
238                assert!(id <= u32::MAX as usize);
239                id as u32
240            })
241            .get();
242        InternedStr::new(id)
243    }
244
245    /// Unconditionally push a value, without validating that it's already
246    /// interned.
247    fn push(&mut self, value: &str) -> u32 {
248        #[cfg(feature = "debug")]
249        self.references.fetch_add(1, atomic::Ordering::Relaxed);
250
251        let hash = self.hasher.hash_one(value);
252
253        let range = self.vec.push_str_mut(value);
254        assert!(range.start <= u32::MAX as usize);
255        assert!(range.end <= u32::MAX as usize);
256        let range = range.start as u32..range.end as u32;
257
258        let id = self.ranges.push_mut(range.into());
259        assert!(id <= u32::MAX as usize);
260        let id = id as u32;
261
262        self.map
263            .insert_unique(hash, id, |&i| self.hasher.hash_one(self.lookup_str(i)));
264        id
265    }
266
267    /// Retrieves the given [`InternedStr`] value from this arena.
268    ///
269    /// The caller is responsible for ensuring that the same arena was used to
270    /// intern this value, otherwise an arbitrary value will be returned or
271    /// a panic will happen.
272    ///
273    /// If you only need to access the bytes,
274    /// [`lookup_bytes()`](Self::lookup_bytes) may be more efficient.
275    pub fn lookup(&self, interned: InternedStr) -> &str {
276        self.lookup_str(interned.0)
277    }
278
279    /// Retrieves the bytes for the given [`InternedStr`] value from this arena.
280    ///
281    /// The caller is responsible for ensuring that the same arena was used to
282    /// intern this value, otherwise an arbitrary value will be returned or
283    /// a panic will happen.
284    pub fn lookup_bytes(&self, interned: InternedStr) -> &[u8] {
285        let range = self.ranges[interned.0 as usize];
286        let range = range.start as usize..range.end as usize;
287        self.vec.get_bytes(range)
288    }
289
290    fn lookup_str(&self, id: u32) -> &str {
291        let range = self.ranges[id as usize];
292        let range = range.start as usize..range.end as usize;
293        &self.vec[range]
294    }
295}
296
297#[cfg(feature = "serde")]
298impl Serialize for ArenaStr {
299    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
300    where
301        S: Serializer,
302    {
303        let mut tuple = serializer.serialize_tuple(2)?;
304
305        let ranges = RangeWrapper {
306            ranges: &self.ranges,
307            ranges_len: Cell::new(0),
308            total_len: Cell::new(0),
309        };
310        tuple.serialize_element(&ranges)?;
311
312        tuple.serialize_element(&ArenaStrWrapper {
313            ranges_len: ranges.ranges_len.into_inner(),
314            total_len: ranges.total_len.into_inner(),
315            arena: self,
316        })?;
317
318        tuple.end()
319    }
320}
321
322#[cfg(feature = "serde")]
323struct RangeWrapper<'a> {
324    ranges: &'a AppendVec<CopyRangeU32>,
325    ranges_len: Cell<u32>,
326    total_len: Cell<u32>,
327}
328
329#[cfg(feature = "serde")]
330impl<'a> Serialize for RangeWrapper<'a> {
331    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
332    where
333        S: Serializer,
334    {
335        let mut ranges_len: u32 = 0;
336        let mut total_len: u32 = 0;
337        let result = serializer.collect_seq(self.ranges.iter().map(|range| {
338            ranges_len += 1;
339            let this_len = range.end - range.start;
340            total_len = total_len.strict_add(this_len);
341            this_len
342        }));
343
344        self.ranges_len.set(ranges_len);
345        self.total_len.set(total_len);
346
347        result
348    }
349}
350
351#[cfg(feature = "serde")]
352struct ArenaStrWrapper<'a> {
353    ranges_len: u32,
354    total_len: u32,
355    arena: &'a ArenaStr,
356}
357
358#[cfg(feature = "serde")]
359impl<'a> Serialize for ArenaStrWrapper<'a> {
360    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
361    where
362        S: Serializer,
363    {
364        // TODO: Make this zero-copy?
365        let mut string = String::with_capacity(self.total_len as usize);
366        for range in self.arena.ranges.iter().take(self.ranges_len as usize) {
367            let s = &self.arena.vec[range.start as usize..range.end as usize];
368            string.push_str(s);
369        }
370
371        serializer.serialize_str(&string)
372    }
373}
374
375#[cfg(feature = "serde")]
376impl<'de> Deserialize<'de> for ArenaStr {
377    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
378    where
379        D: Deserializer<'de>,
380    {
381        deserializer.deserialize_tuple(2, ArenaStrVisitor)
382    }
383}
384
385#[cfg(feature = "serde")]
386struct ArenaStrVisitor;
387
388#[cfg(feature = "serde")]
389impl<'de> Visitor<'de> for ArenaStrVisitor {
390    type Value = ArenaStr;
391
392    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
393        formatter.write_str("a pair of values")
394    }
395
396    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
397    where
398        A: SeqAccess<'de>,
399    {
400        let sizes: Vec<u32> = seq
401            .next_element()?
402            .ok_or_else(|| A::Error::invalid_length(0, &self))?;
403        let string: &str = seq
404            .next_element()?
405            .ok_or_else(|| A::Error::invalid_length(1, &self))?;
406
407        let mut arena = ArenaStr {
408            vec: AppendStr::with_capacity(string.len()),
409            ranges: AppendVec::with_capacity(sizes.len()),
410            map: DashTable::with_capacity(sizes.len()),
411            hasher: DefaultHashBuilder::default(),
412            #[cfg(feature = "debug")]
413            references: AtomicUsize::new(0),
414        };
415
416        let mut start = 0;
417        for size in sizes {
418            let size = size as usize;
419            arena.push(&string[start..start + size]);
420            start += size;
421        }
422
423        Ok(arena)
424    }
425}
426
427#[cfg(all(feature = "delta", feature = "serde"))]
428mod delta {
429    use super::*;
430    use crate::{Accumulator, DeltaEncoding};
431    use serde::ser::SerializeSeq;
432    use std::marker::PhantomData;
433
434    impl<Accum> Serialize for DeltaEncoding<&ArenaStr, Accum>
435    where
436        Accum: Accumulator<Value = str, Storage = Box<str>, DeltaStorage = Box<[u8]>>,
437    {
438        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
439        where
440            S: Serializer,
441        {
442            let mut tuple = serializer.serialize_tuple(2)?;
443
444            let ranges = RangeWrapper {
445                ranges: &self.ranges,
446                ranges_len: Cell::new(0),
447                total_len: Cell::new(0),
448            };
449            tuple.serialize_element(&ranges)?;
450
451            tuple.serialize_element(&ArenaStrWrapper {
452                ranges_len: ranges.ranges_len.into_inner(),
453                total_len: ranges.total_len.into_inner(),
454                arena: self,
455            })?;
456
457            tuple.end()
458        }
459    }
460
461    struct ArenaStrWrapper<'a, Accum> {
462        ranges_len: u32,
463        total_len: u32,
464        arena: &'a DeltaEncoding<&'a ArenaStr, Accum>,
465    }
466
467    impl<'a, Accum> Serialize for ArenaStrWrapper<'a, Accum>
468    where
469        Accum: Accumulator<Value = str, Storage = Box<str>, DeltaStorage = Box<[u8]>>,
470    {
471        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
472        where
473            S: Serializer,
474        {
475            let mut seq = serializer.serialize_seq(Some(self.total_len as usize))?;
476
477            let mut acc = Accum::default();
478            for range in self.arena.ranges.iter().take(self.ranges_len as usize) {
479                let slice = &self.arena.vec[range.start as usize..range.end as usize];
480                let delta = acc.fold(slice);
481                assert_eq!(
482                    delta.len(),
483                    slice.len(),
484                    "Invalid Accumulator implementation for DeltaEncoding of ArenaStr: delta length must match source string length (in bytes)"
485                );
486                for d in delta {
487                    seq.serialize_element(&d)?;
488                }
489            }
490
491            seq.end()
492        }
493    }
494
495    impl<'de, Accum> Deserialize<'de> for DeltaEncoding<ArenaStr, Accum>
496    where
497        Accum: Accumulator<Value = str, Storage = Box<str>, Delta = [u8]>,
498    {
499        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
500        where
501            D: Deserializer<'de>,
502        {
503            deserializer.deserialize_tuple(2, DeltaArenaStrVisitor::new())
504        }
505    }
506
507    struct DeltaArenaStrVisitor<Accum> {
508        _accum: PhantomData<Accum>,
509    }
510
511    impl<Accum> DeltaArenaStrVisitor<Accum> {
512        fn new() -> Self {
513            Self {
514                _accum: PhantomData,
515            }
516        }
517    }
518
519    impl<'de, Accum> Visitor<'de> for DeltaArenaStrVisitor<Accum>
520    where
521        Accum: Accumulator<Value = str, Storage = Box<str>, Delta = [u8]>,
522    {
523        type Value = DeltaEncoding<ArenaStr, Accum>;
524
525        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
526            formatter.write_str("a pair of values")
527        }
528
529        fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
530        where
531            A: SeqAccess<'de>,
532        {
533            let sizes: Vec<u32> = seq
534                .next_element()?
535                .ok_or_else(|| A::Error::invalid_length(0, &self))?;
536            let bytes: &[u8] = seq
537                .next_element()?
538                .ok_or_else(|| A::Error::invalid_length(1, &self))?;
539
540            let mut arena = ArenaStr {
541                vec: AppendStr::with_capacity(bytes.len()),
542                ranges: AppendVec::with_capacity(sizes.len()),
543                map: DashTable::with_capacity(sizes.len()),
544                hasher: DefaultHashBuilder::default(),
545                #[cfg(feature = "debug")]
546                references: AtomicUsize::new(0),
547            };
548
549            let mut acc = Accum::default();
550            let mut start = 0;
551            for size in sizes {
552                let size = size as usize;
553                let delta = &bytes[start..start + size];
554                let string = acc.unfold(delta);
555                assert_eq!(
556                    delta.len(),
557                    string.len(),
558                    "Invalid Accumulator implementation for DeltaEncoding of ArenaSlice: delta length must match destination string length (in bytes)"
559                );
560                arena.push(&string);
561                start += size;
562            }
563
564            Ok(DeltaEncoding {
565                inner: arena,
566                _phantom: PhantomData,
567            })
568        }
569    }
570}
571
572#[cfg(test)]
573mod test {
574    use super::*;
575    #[cfg(all(feature = "delta", feature = "serde"))]
576    use crate::{Accumulator, DeltaEncoding};
577    use std::thread;
578
579    fn make_utf8_string(mut i: u32) -> String {
580        let mut s = String::new();
581        while i != 0 {
582            let j = i % (64 + 26);
583            let c = if j < 64 {
584                // See https://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode.
585                char::from_u32(0x410 + j).expect("Invalid Unicode value")
586            } else {
587                char::from_u32(b'a' as u32 + j - 64).expect("Invalid Unicode value")
588            };
589            i /= 64 + 26;
590            s.push(c);
591        }
592        s
593    }
594
595    #[test]
596    fn test_utf8_string() {
597        assert_eq!(make_utf8_string(0), "");
598        assert_eq!(make_utf8_string(0).len(), 0);
599        assert_eq!(make_utf8_string(5), "Е");
600        assert_eq!(make_utf8_string(5).len(), 2);
601        assert_eq!(make_utf8_string(25), "Щ");
602        assert_eq!(make_utf8_string(25).len(), 2);
603        assert_eq!(make_utf8_string(125), "гБ");
604        assert_eq!(make_utf8_string(125).len(), 4);
605        assert_eq!(make_utf8_string(625), "vЖ");
606        assert_eq!(make_utf8_string(625).len(), 3);
607        assert_eq!(make_utf8_string(3125), "bв");
608        assert_eq!(make_utf8_string(3125).len(), 3);
609        assert_eq!(make_utf8_string(15625), "чtБ");
610        assert_eq!(make_utf8_string(15625).len(), 5);
611        assert_eq!(make_utf8_string(78125), "ЕъЙ");
612        assert_eq!(make_utf8_string(78125).len(), 6);
613        assert_eq!(make_utf8_string(390625), "ЩФр");
614        assert_eq!(make_utf8_string(390625).len(), 6);
615        assert_eq!(make_utf8_string(1953125), "гЛэВ");
616        assert_eq!(make_utf8_string(1953125).len(), 8);
617        assert_eq!(make_utf8_string(9765625), "vшгН");
618        assert_eq!(make_utf8_string(9765625).len(), 7);
619    }
620
621    #[test]
622    fn test_lookup() {
623        let arena = ArenaStr::default();
624
625        let empty = arena.intern("");
626        let a = arena.intern("a");
627        let b = arena.intern("bb");
628        let c = arena.intern("ccc");
629        let d = arena.intern("dddd");
630        let e = arena.intern("eeeee");
631
632        assert_eq!(arena.lookup(empty), "");
633        assert_eq!(arena.lookup(a), "a");
634        assert_eq!(arena.lookup(b), "bb");
635        assert_eq!(arena.lookup(c), "ccc");
636        assert_eq!(arena.lookup(d), "dddd");
637        assert_eq!(arena.lookup(e), "eeeee");
638    }
639
640    #[test]
641    fn test_intern_lookup() {
642        let arena = ArenaStr::default();
643        for i in 0..100 {
644            assert_eq!(arena.intern(&make_utf8_string(i)).0, i);
645        }
646        for i in 0..100 {
647            assert_eq!(arena.lookup(InternedStr::new(i)), &make_utf8_string(i));
648        }
649    }
650
651    const NUM_READERS: usize = 4;
652    const NUM_WRITERS: usize = 4;
653    #[cfg(not(miri))]
654    const NUM_ITEMS: usize = 1_000_000;
655    #[cfg(miri)]
656    const NUM_ITEMS: usize = 100;
657
658    #[test]
659    fn test_intern_lookup_concurrent_reads() {
660        let arena = ArenaStr::default();
661        thread::scope(|s| {
662            for _ in 0..NUM_READERS {
663                s.spawn(|| {
664                    loop {
665                        let len = arena.strings();
666                        if len > 0 {
667                            let last = len as u32 - 1;
668                            assert_eq!(
669                                arena.lookup(InternedStr::new(last)),
670                                &make_utf8_string(last)
671                            );
672                            if len == NUM_ITEMS {
673                                break;
674                            }
675                        }
676                    }
677                });
678            }
679            s.spawn(|| {
680                for j in 0..NUM_ITEMS as u32 {
681                    assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
682                }
683            });
684        });
685    }
686
687    #[test]
688    fn test_intern_lookup_concurrent_writes() {
689        let arena = ArenaStr::default();
690        thread::scope(|s| {
691            s.spawn(|| {
692                loop {
693                    let len = arena.strings();
694                    if len > 0 {
695                        let last = len as u32 - 1;
696                        assert_eq!(
697                            arena.lookup(InternedStr::new(last)),
698                            &make_utf8_string(last)
699                        );
700                        if len == NUM_ITEMS {
701                            break;
702                        }
703                    }
704                }
705            });
706            for _ in 0..NUM_WRITERS {
707                s.spawn(|| {
708                    for j in 0..NUM_ITEMS as u32 {
709                        assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
710                    }
711                });
712            }
713        });
714    }
715
716    #[test]
717    fn test_intern_lookup_concurrent_readwrites() {
718        let arena = ArenaStr::default();
719        thread::scope(|s| {
720            for _ in 0..NUM_READERS {
721                s.spawn(|| {
722                    loop {
723                        let len = arena.strings();
724                        if len > 0 {
725                            let last = len as u32 - 1;
726                            assert_eq!(
727                                arena.lookup(InternedStr::new(last)),
728                                &make_utf8_string(last)
729                            );
730                            if len == NUM_ITEMS {
731                                break;
732                            }
733                        }
734                    }
735                });
736            }
737            for _ in 0..NUM_WRITERS {
738                s.spawn(|| {
739                    for j in 0..NUM_ITEMS as u32 {
740                        assert_eq!(arena.intern(&make_utf8_string(j)).0, j);
741                    }
742                });
743            }
744        });
745    }
746
747    #[cfg(feature = "serde")]
748    #[test]
749    fn test_serde_postcard() {
750        let arena = ArenaStr::default();
751
752        let empty = arena.intern("");
753        let a = arena.intern("a");
754        let b = arena.intern("bb");
755        let c = arena.intern("ccc");
756        let d = arena.intern("dddd");
757        let e = arena.intern("eeeee");
758
759        assert_eq!(arena.strings(), 6);
760        assert!(arena.bytes() >= 15);
761
762        let serialized_arena = postcard::to_stdvec(&arena).expect("Failed to serialize arena");
763        assert_eq!(
764            serialized_arena,
765            vec![
766                6, 0, 1, 2, 3, 4, 5, 15, b'a', b'b', b'b', b'c', b'c', b'c', b'd', b'd', b'd',
767                b'd', b'e', b'e', b'e', b'e', b'e'
768            ]
769        );
770        let new_arena: ArenaStr =
771            postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
772        assert_eq!(new_arena, arena);
773
774        assert_eq!(new_arena.strings(), 6);
775        assert_eq!(new_arena.bytes(), 15);
776
777        let serialized_handles = postcard::to_stdvec(&[empty, a, b, c, d, e])
778            .expect("Failed to serialize interned handles");
779        assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
780        let new_handles: [InternedStr; 6] = postcard::from_bytes(&serialized_handles)
781            .expect("Failed to deserialize interned handles");
782        assert_eq!(new_handles, [empty, a, b, c, d, e]);
783
784        assert_eq!(new_arena.lookup(empty), "");
785        assert_eq!(new_arena.lookup(a), "a");
786        assert_eq!(new_arena.lookup(b), "bb");
787        assert_eq!(new_arena.lookup(c), "ccc");
788        assert_eq!(new_arena.lookup(d), "dddd");
789        assert_eq!(new_arena.lookup(e), "eeeee");
790    }
791
792    #[cfg(feature = "serde")]
793    #[test]
794    fn test_serde_json() {
795        let arena = ArenaStr::default();
796
797        let empty = arena.intern("");
798        let a = arena.intern("a");
799        let b = arena.intern("bb");
800        let c = arena.intern("ccc");
801        let d = arena.intern("dddd");
802        let e = arena.intern("eeeee");
803
804        assert_eq!(arena.strings(), 6);
805        assert!(arena.bytes() >= 15);
806
807        let serialized_arena = serde_json::to_string(&arena).expect("Failed to serialize arena");
808        assert_eq!(serialized_arena, r#"[[0,1,2,3,4,5],"abbcccddddeeeee"]"#);
809        let new_arena: ArenaStr =
810            serde_json::from_str(&serialized_arena).expect("Failed to deserialize arena");
811        assert_eq!(new_arena, arena);
812
813        assert_eq!(new_arena.strings(), 6);
814        assert_eq!(new_arena.bytes(), 15);
815
816        let serialized_handles = serde_json::to_string(&[empty, a, b, c, d, e])
817            .expect("Failed to serialize interned handles");
818        assert_eq!(serialized_handles, "[0,1,2,3,4,5]");
819        let new_handles: [InternedStr; 6] = serde_json::from_str(&serialized_handles)
820            .expect("Failed to deserialize interned handles");
821        assert_eq!(new_handles, [empty, a, b, c, d, e]);
822    }
823
824    #[cfg(all(feature = "delta", feature = "serde"))]
825    #[derive(Default)]
826    struct StringAccumulator {
827        previous: Vec<u8>,
828    }
829
830    #[cfg(all(feature = "delta", feature = "serde"))]
831    impl Accumulator for StringAccumulator {
832        type Value = str;
833        type Storage = Box<str>;
834        type Delta = [u8];
835        type DeltaStorage = Box<[u8]>;
836
837        fn fold(&mut self, v: &Self::Value) -> Self::DeltaStorage {
838            let mut delta = Vec::with_capacity(v.len());
839            for (i, byte) in v.bytes().enumerate() {
840                delta.push(byte ^ self.previous.get(i).copied().unwrap_or(0));
841            }
842            self.previous = v.into();
843            delta.into()
844        }
845
846        fn unfold(&mut self, d: &Self::Delta) -> Self::Storage {
847            let mut value = Vec::with_capacity(d.len());
848            for (i, byte) in d.iter().enumerate() {
849                value.push(byte ^ self.previous.get(i).copied().unwrap_or(0));
850            }
851            self.previous = value.clone();
852            String::from_utf8(value)
853                .expect("Invalid UTF-8 encoding")
854                .into()
855        }
856    }
857
858    #[cfg(all(feature = "delta", feature = "serde"))]
859    #[test]
860    fn test_serde_delta() {
861        let arena = ArenaStr::default();
862
863        let empty = arena.intern("");
864        let a = arena.intern("a");
865        let b = arena.intern("bb");
866        let c = arena.intern("ccc");
867        let d = arena.intern("dddd");
868        let e = arena.intern("eeeee");
869
870        assert_eq!(arena.strings(), 6);
871        assert!(arena.bytes() >= 15);
872
873        let delta_encoded: DeltaEncoding<&ArenaStr, StringAccumulator> = DeltaEncoding::new(&arena);
874        let serialized_arena =
875            postcard::to_stdvec(&delta_encoded).expect("Failed to serialize arena");
876        assert_eq!(
877            serialized_arena,
878            vec![
879                6, 0, 1, 2, 3, 4, 5, 15, 97, 3, 98, 1, 1, 99, 7, 7, 7, 100, 1, 1, 1, 1, 101
880            ]
881        );
882        let delta_encoded: DeltaEncoding<ArenaStr, StringAccumulator> =
883            postcard::from_bytes(&serialized_arena).expect("Failed to deserialize arena");
884        let new_arena = delta_encoded.into_inner();
885
886        assert_eq!(new_arena.strings(), 6);
887        assert_eq!(new_arena.bytes(), 15);
888
889        let serialized_handles = postcard::to_stdvec(&[empty, a, b, c, d, e])
890            .expect("Failed to serialize interned handles");
891        assert_eq!(serialized_handles, vec![0, 1, 2, 3, 4, 5]);
892        let new_handles: [InternedStr; 6] = postcard::from_bytes(&serialized_handles)
893            .expect("Failed to deserialize interned handles");
894        assert_eq!(new_handles, [empty, a, b, c, d, e]);
895
896        assert_eq!(new_arena.lookup(empty), "");
897        assert_eq!(new_arena.lookup(a), "a");
898        assert_eq!(new_arena.lookup(b), "bb");
899        assert_eq!(new_arena.lookup(c), "ccc");
900        assert_eq!(new_arena.lookup(d), "dddd");
901        assert_eq!(new_arena.lookup(e), "eeeee");
902    }
903}