Skip to main content

any_intern/
dropless.rs

1use super::common::{Interned, RawInterned, UnsafeLock};
2use bumpalo::Bump;
3use core::str;
4use hashbrown::{hash_table::Entry, HashTable};
5use std::{
6    alloc::Layout,
7    fmt::{self, Debug, Display, Write},
8    hash::{BuildHasher, Hash, Hasher},
9    mem::MaybeUninit,
10    ptr::{self, NonNull},
11    slice,
12};
13
14/// A trait for types that can be stored in a dropless interner.
15///
16/// The `Dropless` trait provides methods for working with types that can be stored in a
17/// [`DroplessInterner`] or [`DroplessInternSet`]. It defines how instances of a type are converted
18/// to and from raw byte representations, hashed, and compared for equality. This is useful for
19/// interning values without requiring ownership.
20///
21/// # Safety
22///
23/// Implementing the `Dropless` trait requires careful attention to alignment and memory safety. The
24/// methods in this trait are marked as `unsafe` because they rely on the caller to ensure that the
25/// provided byte slices are properly aligned and valid for the type. Misuse of these methods can
26/// lead to undefined behavior.
27///
28/// # Examples
29///
30/// ```
31/// use any_intern::Dropless;
32/// use std::alloc::Layout;
33/// use std::ptr::NonNull;
34/// use std::hash::{Hash, Hasher, BuildHasher};
35///
36/// #[derive(PartialEq, Eq, Hash, Debug)]
37/// struct MyType(u32);
38///
39/// impl Dropless for MyType {
40///     fn as_byte_ptr(&self) -> NonNull<u8> {
41///         unsafe { NonNull::new_unchecked(self as *const Self as *mut Self).cast::<u8>() }
42///     }
43///
44///     fn layout(&self) -> Layout {
45///         Layout::for_value(self)
46///     }
47///
48///     unsafe fn from_bytes(bytes: &[u8]) -> &Self {
49///         let ptr = bytes.as_ptr().cast::<Self>();
50///         unsafe { ptr.as_ref().unwrap_unchecked() }
51///     }
52///
53///     unsafe fn hash<S: BuildHasher>(hash_builder: &S, bytes: &[u8]) -> u64 {
54///         let this = unsafe { Self::from_bytes(bytes) };
55///         let mut hasher = hash_builder.build_hasher();
56///         this.hash(&mut hasher);
57///         hasher.finish()
58///     }
59///
60///     unsafe fn eq(a: &[u8], b: &[u8]) -> bool {
61///         unsafe { Self::from_bytes(a) == Self::from_bytes(b) }
62///     }
63/// }
64/// ```
65pub trait Dropless {
66    /// Returns pointer to the instance.
67    fn as_byte_ptr(&self) -> NonNull<u8> {
68        // Safety: A reference is non-null
69        unsafe { NonNull::new_unchecked(self as *const Self as *mut Self).cast::<u8>() }
70    }
71
72    /// Returns layout of the type.
73    fn layout(&self) -> Layout {
74        Layout::for_value(self)
75    }
76
77    /// Converts a byte slice into a reference to the type.
78    ///
79    /// The byte slice is guaranteed to be well aligned and correct data for the type.
80    ///
81    /// # Safety
82    ///
83    /// Undefined behavior if any conditions below are not met.
84    /// * Implementation should interpret the byte slice into the type correctly.
85    /// * Caller should give well aligned data for the type.
86    unsafe fn from_bytes(bytes: &[u8]) -> &Self;
87
88    /// Computes a hash value for the type using the provided byte slice.
89    ///
90    /// The byte slice is guaranteed to be well aligned and correct data for the type.
91    ///
92    /// # Safety
93    ///
94    /// Undefined behavior if any conditions below are not met.
95    /// * Implementation should interpret the byte slice into the type correctly.
96    /// * Caller should give well aligned data for the type.
97    unsafe fn hash<S: BuildHasher>(hash_builder: &S, bytes: &[u8]) -> u64;
98
99    /// Compares two byte slices for equality as instances of the type.
100    ///
101    /// The byte slices are guaranteed to be well aligned and correct data for the type.
102    ///
103    /// # Safety
104    ///
105    /// Undefined behavior if any conditions below are not met.
106    /// * Implementation should interpret the byte slice into the type correctly.
107    /// * Caller should give well aligned data for the type.
108    unsafe fn eq(a: &[u8], b: &[u8]) -> bool;
109}
110
111macro_rules! simple_impl_dropless_fn {
112    (from_bytes) => {
113        unsafe fn from_bytes(bytes: &[u8]) -> &Self {
114            let ptr = bytes.as_ptr().cast::<Self>();
115            unsafe { ptr.as_ref().unwrap_unchecked() }
116        }
117    };
118    (hash) => {
119        unsafe fn hash<S: BuildHasher>(hash_builder: &S, bytes: &[u8]) -> u64 {
120            let this = unsafe { Self::from_bytes(bytes) };
121            let mut hasher = hash_builder.build_hasher();
122            this.hash(&mut hasher);
123            hasher.finish()
124        }
125    };
126    (eq) => {
127        unsafe fn eq(a: &[u8], b: &[u8]) -> bool {
128            unsafe { Self::from_bytes(a) == Self::from_bytes(b) }
129        }
130    };
131}
132
133macro_rules! impl_dropless {
134    ($($ty:ty)*) => {
135        $(
136            impl Dropless for $ty {
137                simple_impl_dropless_fn!(from_bytes);
138                simple_impl_dropless_fn!(hash);
139                simple_impl_dropless_fn!(eq);
140            }
141        )*
142    };
143}
144
145impl_dropless!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize bool char);
146
147impl<T: Dropless + Hash + Eq> Dropless for [T] {
148    unsafe fn from_bytes(bytes: &[u8]) -> &Self {
149        let ptr = bytes.as_ptr().cast::<T>();
150        let stride = Layout::new::<T>().pad_to_align().size();
151        let num_elems = bytes.len() / stride;
152        unsafe { slice::from_raw_parts(ptr, num_elems) }
153    }
154    simple_impl_dropless_fn!(hash);
155    simple_impl_dropless_fn!(eq);
156}
157
158impl<T: Dropless + Hash + Eq, const N: usize> Dropless for [T; N] {
159    simple_impl_dropless_fn!(from_bytes);
160    simple_impl_dropless_fn!(hash);
161    simple_impl_dropless_fn!(eq);
162}
163
164impl Dropless for str {
165    unsafe fn from_bytes(bytes: &[u8]) -> &Self {
166        unsafe { std::str::from_utf8_unchecked(bytes) }
167    }
168    simple_impl_dropless_fn!(hash);
169    simple_impl_dropless_fn!(eq);
170}
171
172impl<T: Dropless + Hash + Eq> Dropless for Option<T> {
173    simple_impl_dropless_fn!(from_bytes);
174    simple_impl_dropless_fn!(hash);
175    simple_impl_dropless_fn!(eq);
176}
177
178impl<T: Dropless + Hash + Eq, E: Dropless + Hash + Eq> Dropless for Result<T, E> {
179    simple_impl_dropless_fn!(from_bytes);
180    simple_impl_dropless_fn!(hash);
181    simple_impl_dropless_fn!(eq);
182}
183
184/// An interner for storing and deduplicating values without requiring ownership.
185///
186/// The `DroplessInterner` is designed for interning values that implement the [`Dropless`] trait.
187/// It allows efficient storage and retrieval of values by ensuring that each unique value is stored
188/// only once. Interning with this type always copies the given value into an internal buffer,
189/// making it suitable for use cases where ownership is not required.
190///
191/// # Examples
192///
193/// ```
194/// use any_intern::DroplessInterner;
195///
196/// let mut interner = DroplessInterner::new();
197///
198/// // Interning strings
199/// let hello = interner.intern("hello");
200/// let world = interner.intern("world");
201/// let another_hello = interner.intern("hello");
202///
203/// assert_eq!(hello, another_hello); // Same value, same reference
204/// assert_ne!(hello, world); // Different values, different references
205///
206/// // Checking if a value exists
207/// assert!(interner.get("hello").is_some());
208/// assert!(interner.get("unknown").is_none());
209///
210/// // Clearing the interner
211/// interner.clear();
212/// assert!(interner.is_empty());
213/// ```
214///
215/// # Safety
216///
217/// The `DroplessInterner` relies on the `Dropless` trait for converting values to and from raw
218/// byte representations. It is the responsibility of the `Dropless` implementation to ensure
219/// memory safety and alignment when interacting with the interner.
220#[derive(Debug)]
221pub struct DroplessInterner<S = fxhash::FxBuildHasher> {
222    inner: UnsafeLock<DroplessInternSet<S>>,
223}
224
225impl DroplessInterner {
226    pub fn new() -> Self {
227        Self::default()
228    }
229}
230
231impl<S: BuildHasher> DroplessInterner<S> {
232    pub fn with_hasher(hash_builder: S) -> Self {
233        // Safety: Only one instance
234        let inner = unsafe { UnsafeLock::new(DroplessInternSet::with_hasher(hash_builder)) };
235        Self { inner }
236    }
237
238    /// Returns number of values the interner contains.
239    pub fn len(&self) -> usize {
240        self.with_inner(|set| set.len())
241    }
242
243    /// Returns true if the interner is empty.
244    pub fn is_empty(&self) -> bool {
245        self.with_inner(|set| set.is_empty())
246    }
247
248    /// Stores a value in the interner, returning a reference to the interned value.
249    ///
250    /// This method inserts the given value into the interner if it does not already exist.  If the
251    /// value already exists, a reference to the existing value is returned. The value is copied
252    /// into an internal buffer, making it suitable for use cases where ownership is not required.
253    ///
254    /// # Examples
255    ///
256    /// ```
257    /// use any_intern::DroplessInterner;
258    ///
259    /// let interner = DroplessInterner::new();
260    ///
261    /// // Interning strings
262    /// let hello = interner.intern("hello");
263    /// let world = interner.intern("world");
264    /// let another_hello = interner.intern("hello");
265    ///
266    /// assert_eq!(hello, another_hello); // Same value, same reference
267    /// assert_ne!(hello, world); // Different values, different references
268    ///
269    /// // Interning arrays
270    /// let array1 = interner.intern(&[1, 2, 3]);
271    /// let array2 = interner.intern(&[1, 2, 3]);
272    /// let array3 = interner.intern(&[4, 5, 6]);
273    ///
274    /// assert_eq!(array1, array2); // Same value, same reference
275    /// assert_ne!(array1, array3); // Different values, different references
276    /// ```
277    pub fn intern<K: Dropless + ?Sized>(&self, value: &K) -> Interned<'_, K> {
278        self.with_inner(|set| set.intern(value))
279    }
280
281    /// Stores a value in the interner as a formatted string through [`Display`], returning a
282    /// reference to the interned value.
283    ///
284    /// This method provides a buffer for making string. This will be benefit in terms of
285    /// performance when you frequently make `String` via something like `to_string()` by exploiting
286    /// chunk memory.
287    ///
288    /// This method first formats the given value using the `Display` trait and stores the resulting
289    /// string in the interner's buffer, then compares the string with existing values. If the
290    /// formatted string already exists in the interner, formatted string is discarded and reference
291    /// to the existing value is returned.
292    ///
293    /// If you give insufficient `upper_size`, then error is returned.
294    ///
295    /// # Examples
296    ///
297    /// ```
298    /// use any_intern::DroplessInterner;
299    ///
300    /// let interner = DroplessInterner::new();
301    ///
302    /// let value = 42;
303    /// let interned = interner.intern_formatted_str(&value, 10).unwrap();
304    ///
305    /// assert_eq!(&*interned, "42");
306    /// ```
307    pub fn intern_formatted_str<K: Display + ?Sized>(
308        &self,
309        value: &K,
310        upper_size: usize,
311    ) -> Result<Interned<'_, str>, fmt::Error> {
312        self.with_inner(|set| set.intern_formatted_str(value, upper_size))
313    }
314
315    /// Retrieves a reference to a value in the interner based on the provided key.
316    ///
317    /// This method checks if a value corresponding to the given key exists in the interner. If it
318    /// exists, a reference to the interned value is returned. Otherwise, `None` is returned.
319    ///
320    /// # Eaxmples
321    ///
322    /// ```
323    /// use any_intern::DroplessInterner;
324    ///
325    /// let interner = DroplessInterner::new();
326    ///
327    /// // Interning strings
328    /// let hello = interner.intern("hello");
329    ///
330    /// assert_eq!(interner.get("hello").as_deref(), Some("hello"));
331    /// assert!(interner.get("world").is_none());
332    /// ```
333    pub fn get<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
334        self.with_inner(|set| set.get(value))
335    }
336
337    /// Removes all items in the interner.
338    ///
339    /// Although the interner support interior mutability, clear method requires mutable access
340    /// to the interner to invalidate all [`Interned`]s referencing the interner.
341    pub fn clear(&mut self) {
342        self.with_inner(|set| set.clear())
343    }
344
345    fn with_inner<'this, F, R>(&'this self, f: F) -> R
346    where
347        F: FnOnce(&'this mut DroplessInternSet<S>) -> R,
348        R: 'this,
349    {
350        // Safety: Mutex unlocking is paired with the locking.
351        unsafe {
352            let set = self.inner.lock().as_mut();
353            let ret = f(set);
354            self.inner.unlock();
355            ret
356        }
357    }
358}
359
360impl<S: Default> Default for DroplessInterner<S> {
361    fn default() -> Self {
362        // Safety: Only one instance
363        let inner = unsafe { UnsafeLock::new(DroplessInternSet::default()) };
364        Self { inner }
365    }
366}
367
368/// A dropless interner.
369///
370/// Interning on this type always copies the given value into internal buffer.
371#[derive(Debug, Default)]
372pub struct DroplessInternSet<S = fxhash::FxBuildHasher> {
373    bump: Bump,
374    str_buf: StringBuffer,
375    set: HashTable<DynInternEntry<S>>,
376    hash_builder: S,
377}
378
379impl DroplessInternSet {
380    pub fn new() -> Self {
381        Self::default()
382    }
383}
384
385impl<S: BuildHasher> DroplessInternSet<S> {
386    pub fn with_hasher(hash_builder: S) -> Self {
387        Self {
388            bump: Bump::new(),
389            str_buf: StringBuffer::default(),
390            set: HashTable::new(),
391            hash_builder,
392        }
393    }
394
395    pub fn intern<K: Dropless + ?Sized>(&mut self, value: &K) -> Interned<'_, K> {
396        let src = value.as_byte_ptr();
397        let layout = value.layout();
398
399        unsafe {
400            // Allocation for zero sized data should be avoided even if Bump allocator allows it.
401            // Plus, we change its address to make it have fixed address for equality test.
402            // But this would change nothing actually.
403            if layout.size() == 0 {
404                // Safety: Making a pointer to ZST and acessing through it is safe.
405                let mut ptr = value as *const K;
406                let addr_part = &mut ptr as *mut *const K as *mut *const () as *mut usize;
407                *addr_part = layout.align(); // Like ptr::dangling()
408                return Interned::unique(&*ptr);
409            }
410
411            let bytes = slice::from_raw_parts(src.as_ptr(), layout.size());
412            let hash = <K as Dropless>::hash(&self.hash_builder, bytes);
413            let eq = Self::table_eq::<K>(bytes, layout);
414            let hasher = Self::table_hasher(&self.hash_builder);
415            let ref_ = match self.set.entry(hash, eq, hasher) {
416                Entry::Occupied(entry) => Self::ref_from_entry(entry.get()),
417                Entry::Vacant(entry) => {
418                    // New allocation & copy
419                    let dst = self.bump.alloc_layout(layout);
420                    ptr::copy_nonoverlapping(src.as_ptr(), dst.as_ptr(), layout.size());
421
422                    // New set entry
423                    let occupied = entry.insert(DynInternEntry {
424                        data: RawInterned(dst).cast(),
425                        layout,
426                        hash: <K as Dropless>::hash,
427                    });
428
429                    Self::ref_from_entry(occupied.get())
430                }
431            };
432            Interned::unique(ref_)
433        }
434    }
435
436    pub fn intern_formatted_str<K: Display + ?Sized>(
437        &mut self,
438        value: &K,
439        upper_size: usize,
440    ) -> Result<Interned<'_, str>, fmt::Error> {
441        let mut write_buf = self.str_buf.speculative_alloc(upper_size);
442        write!(write_buf, "{value}")?;
443        let bytes = write_buf.as_bytes();
444
445        unsafe {
446            let layout = Layout::from_size_align_unchecked(bytes.len(), 1);
447
448            // We change address to make it have fixed address for equality test.
449            // But this would change nothing actually.
450            if bytes.is_empty() {
451                // Safety: Making a pointer to ZST and acessing through it is safe.
452                let dangling_ptr = NonNull::<u8>::dangling().as_ptr().cast_const();
453                let empty_slice = slice::from_raw_parts(dangling_ptr, 0);
454                let empty_str = std::str::from_utf8_unchecked(empty_slice);
455                return Ok(Interned::unique(empty_str));
456            }
457
458            let hash = <str as Dropless>::hash(&self.hash_builder, bytes);
459            let eq = Self::table_eq::<str>(bytes, layout);
460            let hasher = Self::table_hasher(&self.hash_builder);
461            let ref_ = match self.set.entry(hash, eq, hasher) {
462                Entry::Occupied(entry) => {
463                    // Discards the change to the string buffer by just dropping the `write_buf`
464                    // because we have the same value in the bump alloator.
465                    // drop(write_buf);
466
467                    Self::ref_from_entry(entry.get())
468                }
469                Entry::Vacant(entry) => {
470                    // We keep the change to the string buffer because we're going to return the
471                    // reference to the string buffer.
472                    let bytes = write_buf.commit();
473
474                    // Safety: Zero size was filtered out above.
475                    let ptr = NonNull::new_unchecked(bytes.as_ptr().cast_mut());
476
477                    // New set entry
478                    let occupied = entry.insert(DynInternEntry {
479                        data: RawInterned(ptr).cast(),
480                        layout,
481                        hash: <str as Dropless>::hash,
482                    });
483
484                    Self::ref_from_entry(occupied.get())
485                }
486            };
487            Ok(Interned::unique(ref_))
488        }
489    }
490
491    /// # Eaxmples
492    ///
493    /// ```
494    /// use any_intern::DroplessInternSet;
495    /// use std::hash::RandomState;
496    ///
497    /// let mut set = DroplessInternSet::with_hasher(RandomState::new());
498    /// set.intern("hello");
499    ///
500    /// assert_eq!(set.get("hello").as_deref(), Some("hello"));
501    /// assert!(set.get("hi").is_none());
502    /// ```
503    pub fn get<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
504        let ptr = value.as_byte_ptr();
505        let layout = value.layout();
506        unsafe {
507            let bytes = slice::from_raw_parts(ptr.as_ptr(), layout.size());
508            let hash = <K as Dropless>::hash(&self.hash_builder, bytes);
509            let eq = Self::table_eq::<K>(bytes, layout);
510            let entry = self.set.find(hash, eq)?;
511            let ref_ = Self::ref_from_entry(entry);
512            Some(Interned::unique(ref_))
513        }
514    }
515
516    pub fn len(&self) -> usize {
517        self.set.len()
518    }
519
520    pub fn is_empty(&self) -> bool {
521        self.len() == 0
522    }
523
524    pub fn clear(&mut self) {
525        self.bump.reset();
526        self.set.clear();
527    }
528
529    /// Returns `eq` closure that is used for some methods on the [`HashTable`].
530    #[inline]
531    fn table_eq<'a, K: Dropless + ?Sized>(
532        key: &'a [u8],
533        layout: Layout,
534    ) -> impl FnMut(&DynInternEntry<S>) -> bool + 'a {
535        move |entry: &DynInternEntry<S>| unsafe {
536            if layout == entry.layout {
537                let entry_bytes =
538                    slice::from_raw_parts(entry.data.cast::<u8>().as_ptr(), entry.layout.size());
539                <K as Dropless>::eq(key, entry_bytes)
540            } else {
541                false
542            }
543        }
544    }
545
546    /// Returns `hasher` closure that is used for some methods on the [`HashTable`].
547    #[inline]
548    fn table_hasher<'a>(hash_builder: &'a S) -> impl Fn(&DynInternEntry<S>) -> u64 + 'a {
549        |entry: &DynInternEntry<S>| unsafe {
550            let entry_bytes =
551                slice::from_raw_parts(entry.data.cast::<u8>().as_ptr(), entry.layout.size());
552            // Safety: `entry_bytes` is aligned for the entry hash function.
553            (entry.hash)(hash_builder, entry_bytes)
554        }
555    }
556
557    #[inline]
558    unsafe fn ref_from_entry<'a, K: Dropless + ?Sized>(entry: &DynInternEntry<S>) -> &'a K {
559        unsafe {
560            let bytes =
561                slice::from_raw_parts(entry.data.cast::<u8>().as_ptr(), entry.layout.size());
562            K::from_bytes(bytes)
563        }
564    }
565}
566
567/// A buffer for strings.
568#[derive(Debug, Default)]
569struct StringBuffer {
570    chunks: Vec<Vec<MaybeUninit<u8>>>,
571    last_chunk_start: usize,
572}
573
574const INIT_CHUNK_SIZE: usize = 1 << 5;
575const GROW_MAX_CHUNK_SIZE: usize = 1 << 12;
576
577impl StringBuffer {
578    fn speculative_alloc(&mut self, upper_size: usize) -> StringWriteBuffer<'_> {
579        match self.chunks.last() {
580            None => {
581                let chunk_size = INIT_CHUNK_SIZE.max(upper_size.next_power_of_two());
582                self.append_new_chunk(chunk_size)
583            }
584            Some(last_chunk) if last_chunk.len() - self.last_chunk_start < upper_size => {
585                let chunk_size = (last_chunk.len() * 2)
586                    .min(GROW_MAX_CHUNK_SIZE)
587                    .max(upper_size.next_power_of_two());
588                self.append_new_chunk(chunk_size);
589            }
590            _ => {}
591        }
592
593        // Safety: We added the last chunk above.
594        let buf = unsafe {
595            let last_chunk = self.chunks.last_mut().unwrap_unchecked();
596            last_chunk.as_mut_ptr().add(self.last_chunk_start)
597        };
598        StringWriteBuffer {
599            buf,
600            buf_cap: upper_size,
601            last_chuck_start: &mut self.last_chunk_start,
602            written: 0,
603        }
604    }
605
606    #[inline]
607    fn append_new_chunk(&mut self, chunk_size: usize) {
608        let mut chunk: Vec<MaybeUninit<u8>> = Vec::with_capacity(chunk_size);
609
610        // Safety: We reserved enough amount of memory, also it can contain uninitialized data.
611        unsafe { chunk.set_len(chunk_size) };
612
613        self.chunks.push(chunk);
614        self.last_chunk_start = 0;
615    }
616}
617
618struct StringWriteBuffer<'a> {
619    buf: *mut MaybeUninit<u8>,
620    buf_cap: usize,
621    last_chuck_start: &'a mut usize,
622    written: usize,
623}
624
625impl<'a> StringWriteBuffer<'a> {
626    #[inline]
627    fn as_bytes(&self) -> &[u8] {
628        // Safety: bytes 0..written were initialized by write_str.
629        unsafe { slice::from_raw_parts(self.buf.cast::<u8>(), self.written) }
630    }
631
632    #[inline]
633    fn commit(self) -> &'a [u8] {
634        *self.last_chuck_start += self.written;
635
636        // Safety: bytes 0..written were initialized by write_str.
637        // The lifetime 'a is valid because buf points into a Vec owned by the StringBuffer that is
638        // mutably borrowed for 'a (evidenced by last_chuck_start: &'a mut usize).
639        unsafe { slice::from_raw_parts(self.buf.cast::<u8>(), self.written) }
640    }
641}
642
643impl Write for StringWriteBuffer<'_> {
644    fn write_str(&mut self, s: &str) -> std::fmt::Result {
645        let size = s.len();
646
647        if self.buf_cap - self.written >= size {
648            let src = s.as_ptr();
649
650            // Safety
651            // * `src` is valid for reading of `size` bytes
652            // * `dst` is valid for writing of `size` bytes
653            //   (`written + size <= buf_cap <= chunk len`)
654            // * Both alignments are 1
655            // * `src` and `dst` are not overlapping (src is a string literal / caller data)
656            let dst = unsafe { self.buf.add(self.written).cast::<u8>() };
657            unsafe { ptr::copy_nonoverlapping(src, dst, size) };
658
659            self.written += size;
660            Ok(())
661        } else {
662            Err(std::fmt::Error)
663        }
664    }
665}
666
667struct DynInternEntry<S> {
668    data: RawInterned,
669    layout: Layout,
670    /// Input bytes must be well aligned.
671    hash: unsafe fn(&S, &[u8]) -> u64,
672}
673
674impl<S> Debug for DynInternEntry<S> {
675    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
676        f.debug_struct("DynInternEntry")
677            .field("data", &self.data)
678            .field("layout", &self.layout)
679            .finish_non_exhaustive()
680    }
681}
682
683#[cfg(test)]
684mod tests {
685    use super::*;
686    use crate::common;
687    use std::num::NonZeroU32;
688
689    #[test]
690    fn test_dropless_interner() {
691        test_dropless_interner_int();
692        test_dropless_interner_str();
693        test_dropless_interner_bytes();
694        test_dropless_interner_mixed();
695        test_dropless_interner_many();
696        test_dropless_interner_alignment_handling();
697        test_dropless_interner_complex_display_type();
698        test_dropless_interner_consecutive_formatted_strs();
699    }
700
701    fn test_dropless_interner_int() {
702        let interner = DroplessInterner::new();
703
704        let a = interner.intern(&0_u32).erased_raw();
705        let b = interner.intern(&0_u32).erased_raw();
706        let c = interner.intern(&1_u32).erased_raw();
707        // d has the same bytes and layout as c's, so memory will be shared.
708        let d = interner.intern(&1_i32).erased_raw();
709
710        let groups: [&[RawInterned]; _] = [&[a, b], &[c, d]];
711        common::assert_group_addr_eq(&groups);
712    }
713
714    fn test_dropless_interner_str() {
715        let interner = DroplessInterner::new();
716
717        // "apple"
718        let a = interner.intern("apple").erased_raw();
719        let b = interner.intern(*Box::new("apple")).erased_raw();
720        let c = interner.intern(&*String::from("apple")).erased_raw();
721
722        // "banana"
723        let d = interner.intern("banana").erased_raw();
724
725        // ""
726        let e = interner.intern("").erased_raw();
727        let f = interner.intern(&*String::from("")).erased_raw();
728
729        // "42"
730        let g = interner.intern("42").erased_raw();
731        let h = interner.intern_formatted_str(&42, 2).unwrap().erased_raw();
732
733        // "43"
734        let i = interner
735            .intern_formatted_str(&NonZeroU32::new(43).unwrap(), 2)
736            .unwrap()
737            .erased_raw();
738        let j = interner.intern(&*43.to_string()).erased_raw();
739
740        let groups: [&[RawInterned]; _] = [&[a, b, c], &[d], &[e, f], &[g, h], &[i, j]];
741        common::assert_group_addr_eq(&groups);
742    }
743
744    fn test_dropless_interner_bytes() {
745        let interner = DroplessInterner::new();
746
747        let a = interner.intern(&[0, 1]).erased_raw();
748        let boxed: Box<[i32]> = Box::new([0, 1]);
749        let b = interner.intern(&*boxed).erased_raw();
750        let c = interner.intern(&*vec![0, 1]).erased_raw();
751        let d = interner.intern(&[2, 3]).erased_raw();
752
753        let groups: [&[RawInterned]; _] = [&[a, b, c], &[d]];
754        common::assert_group_addr_eq(&groups);
755    }
756
757    fn test_dropless_interner_mixed() {
758        let interner = DroplessInterner::new();
759
760        interner.intern("apple");
761        interner.intern(&1_u32);
762        interner.intern(&[2, 3]);
763        interner.intern_formatted_str(&42, 10).unwrap();
764
765        assert_eq!(interner.get("apple").as_deref(), Some("apple"));
766        assert!(interner.get("banana").is_none());
767
768        assert_eq!(interner.get(&1_u32).as_deref(), Some(&1_u32));
769        assert!(interner.get(&2_u32).is_none());
770
771        assert_eq!(interner.get(&[2, 3]).as_deref(), Some(&[2, 3]));
772        assert!(interner.get(&[2]).is_none());
773
774        assert_eq!(interner.get("42").as_deref(), Some("42"));
775    }
776
777    fn test_dropless_interner_many() {
778        let interner = DroplessInterner::new();
779
780        let mut interned_int = Vec::new();
781        let mut interned_str = Vec::new();
782        let mut interned_bytes = Vec::new();
783
784        #[cfg(not(miri))]
785        const N: usize = 1000;
786        #[cfg(miri)]
787        const N: usize = 50;
788
789        let strs = (0..N).map(|i| (i * 10_000).to_string()).collect::<Vec<_>>();
790
791        // Interns lots of items.
792        for (i, str_) in strs.iter().enumerate().take(N) {
793            let int = &(i as u32); // 4 bytes
794            let str_ = str_.as_str(); // greater than 4 btyes
795            let bytes = &[i as u16]; // 2 bytes
796            interned_int.push(interner.intern(int).erased_raw());
797            interned_str.push(interner.intern(str_).erased_raw());
798            interned_bytes.push(interner.intern(bytes).erased_raw());
799        }
800
801        // Verifies every pointer is unique.
802        interned_int.sort_unstable();
803        interned_int.dedup();
804        interned_str.sort_unstable();
805        interned_str.dedup();
806        interned_bytes.sort_unstable();
807        interned_bytes.dedup();
808        assert_eq!(interned_int.len(), N);
809        assert_eq!(interned_str.len(), N);
810        assert_eq!(interned_bytes.len(), N);
811        let whole = interned_int
812            .iter()
813            .chain(&interned_str)
814            .chain(&interned_bytes)
815            .cloned()
816            .collect::<fxhash::FxHashSet<_>>();
817        assert_eq!(whole.len(), N * 3);
818
819        // Verifies `get` method for every item.
820        for (i, str_) in strs.iter().enumerate().take(N) {
821            let int = &(i as u32); // 4 bytes
822            let str_ = str_.as_str(); // greater than 4 btyes
823            let bytes = &[i as u16]; // 2 bytes
824            assert_eq!(interner.get(int).as_deref(), Some(int));
825            assert_eq!(interner.get(str_).as_deref(), Some(str_));
826            assert_eq!(interner.get(bytes).as_deref(), Some(bytes));
827        }
828    }
829
830    #[rustfmt::skip]
831    fn test_dropless_interner_alignment_handling() {
832        #[derive(PartialEq, Eq, Hash, Debug)] #[repr(C, align(4))]  struct T4  (u16, [u8; 2]);
833        #[derive(PartialEq, Eq, Hash, Debug)] #[repr(C, align(8))]  struct T8  (u16, [u8; 6]);
834        #[derive(PartialEq, Eq, Hash, Debug)] #[repr(C, align(16))] struct T16 (u16, [u8; 14]);
835
836        macro_rules! impl_for_first_2bytes {
837            () => {
838                unsafe fn hash<S: BuildHasher>(hash_builder: &S, bytes: &[u8]) -> u64 {
839                    hash_builder.hash_one(&bytes[0..2])
840                }
841                unsafe fn eq(a: &[u8], b: &[u8]) -> bool {
842                    a[0..2] == b[0..2]
843                }
844            };
845        }
846
847        impl Dropless for T4 { simple_impl_dropless_fn!(from_bytes); impl_for_first_2bytes!(); }
848        impl Dropless for T8 { simple_impl_dropless_fn!(from_bytes); impl_for_first_2bytes!(); }
849        impl Dropless for T16 { simple_impl_dropless_fn!(from_bytes); impl_for_first_2bytes!(); }
850
851        let interner = DroplessInterner::new();
852
853        let mut interned_4 = Vec::new();
854        let mut interned_8 = Vec::new();
855        let mut interned_16 = Vec::new();
856
857        #[cfg(not(miri))]
858        const N: usize = 1000;
859        #[cfg(miri)]
860        const N: usize = 50;
861
862        // Items being interned have the same first 2 bytes, so they will look like the smae from
863        // perspective of interner. But, they must be distinguished from each other due to their
864        // different layouts. Following bytes after the 2 bytes will be used to detect data
865        // validity.
866        for i in 0..N {
867            let t4 = T4(i as u16, [4; _]);
868            let t8 = T8(i as u16, [8; _]);
869            let t16 = T16(i as u16, [16; _]);
870            interned_4.push(interner.intern(&t4).erased_raw());
871            interned_8.push(interner.intern(&t8).erased_raw());
872            interned_16.push(interner.intern(&t16).erased_raw());
873        }
874
875        for i in 0..N {
876            let t4 = T4(i as u16, [4; _]);
877            let t8 = T8(i as u16, [8; _]);
878            let t16 = T16(i as u16, [16; _]);
879            unsafe {
880                assert_eq!(*<Interned<'_, T4>>::from_erased_raw(interned_4[i]), t4);
881                assert_eq!(*<Interned<'_, T8>>::from_erased_raw(interned_8[i]), t8);
882                assert_eq!(*<Interned<'_, T16>>::from_erased_raw(interned_16[i]), t16);
883            }
884        }
885    }
886
887    fn test_dropless_interner_complex_display_type() {
888        let interner = DroplessInterner::new();
889
890        #[allow(unused)]
891        #[derive(Debug)]
892        struct A<'a> {
893            int: i32,
894            float: f32,
895            text: &'a str,
896            bytes: &'a [u8],
897        }
898
899        impl Display for A<'_> {
900            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
901                fmt::Debug::fmt(self, f)
902            }
903        }
904
905        let a = A {
906            int: 123,
907            float: 456.789,
908            text: "this is a text",
909            bytes: &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0],
910        };
911
912        let interned = interner.intern_formatted_str(&a, 1_000).unwrap();
913
914        let mut s = String::new();
915        write!(&mut s, "{a}").unwrap();
916        assert_eq!(&*interned, s.as_str());
917    }
918
919    // UB if the interner gets a mutable reference to a whole chunk while a reference(i0) is alive.
920    fn test_dropless_interner_consecutive_formatted_strs() {
921        let dropless = DroplessInterner::new();
922
923        let value = 0;
924        let i1 = dropless.intern_formatted_str(&value, 1).unwrap();
925
926        let value = 1;
927        let i2 = dropless.intern_formatted_str(&value, 1).unwrap();
928
929        // Prevents i1 and i2 from being optimized away.
930        let _c = format!("{i1}{i2}");
931    }
932
933    #[test]
934    fn test_string_buffer() {
935        test_string_buffer_chunk();
936        test_string_buffer_discard();
937        test_string_buffer_insufficient_upper_size();
938        test_string_buffer_long_string();
939    }
940
941    fn test_string_buffer_chunk() {
942        let mut buf = StringBuffer::default();
943        assert_eq!(buf.chunks.len(), 0);
944
945        // Fills the first chunk.
946        let s = "a".repeat(INIT_CHUNK_SIZE);
947        let mut write_buf = buf.speculative_alloc(s.len());
948        write_buf.write_str(&s).unwrap();
949        assert_eq!(write_buf.as_bytes(), s.as_bytes());
950        write_buf.commit();
951
952        assert_eq!(buf.chunks.len(), 1);
953        assert_eq!(buf.last_chunk_start, s.len());
954
955        // Makes another chunk.
956        let mut write_buf = buf.speculative_alloc(1);
957        write_buf.write_str("a").unwrap();
958        assert_eq!(write_buf.as_bytes(), b"a");
959        write_buf.commit();
960
961        assert_eq!(buf.chunks.len(), 2);
962        assert_eq!(buf.last_chunk_start, 1);
963
964        // Forces to make another chunk
965        let mut write_buf = buf.speculative_alloc(GROW_MAX_CHUNK_SIZE);
966        write_buf.write_str("aa").unwrap();
967        assert_eq!(write_buf.as_bytes(), b"aa");
968        write_buf.commit();
969
970        assert_eq!(buf.chunks.len(), 3);
971        assert_eq!(buf.last_chunk_start, 2);
972    }
973
974    fn test_string_buffer_discard() {
975        let mut buf = StringBuffer::default();
976
977        // Write & discard makes no change.
978        for _ in 0..10 {
979            let s = "a".repeat(INIT_CHUNK_SIZE);
980            let mut write_buf = buf.speculative_alloc(s.len());
981            write_buf.write_str(&s).unwrap();
982            assert_eq!(write_buf.as_bytes(), s.as_bytes());
983            assert_eq!(buf.chunks.len(), 1);
984            assert_eq!(buf.last_chunk_start, 0);
985        }
986    }
987
988    fn test_string_buffer_insufficient_upper_size() {
989        let mut buf = StringBuffer::default();
990
991        for _ in 0..10 {
992            let mut write_buf = buf.speculative_alloc(5);
993            let res = write_buf.write_str("this is longer than 5");
994            assert!(res.is_err());
995            write_buf.commit();
996
997            assert_eq!(buf.chunks.len(), 1);
998            assert_eq!(buf.last_chunk_start, 0);
999        }
1000    }
1001
1002    fn test_string_buffer_long_string() {
1003        let mut buf = StringBuffer::default();
1004
1005        let s = "a".repeat(GROW_MAX_CHUNK_SIZE * 10);
1006        let mut write_buf = buf.speculative_alloc(s.len());
1007        write_buf.write_str(&s).unwrap();
1008        assert_eq!(write_buf.as_bytes(), s.as_bytes());
1009        write_buf.commit();
1010
1011        assert_eq!(buf.chunks.len(), 1);
1012        assert_eq!(buf.last_chunk_start, s.len());
1013    }
1014}