string_hash_interner/
backend.rs

1use crate::{intern::Intern, symbol::expect_valid_symbol, Symbol};
2use alloc::vec::Vec;
3use core::{fmt::Debug, iter::Enumerate, marker::PhantomData, slice};
4
5/// An interner backend that accumulates all interned string contents into one string.
6///
7/// # Note
8///
9/// Implementation inspired by [CAD97's](https://github.com/CAD97) research
10/// project [`strena`](https://github.com/CAD97/strena).
11///
12pub(crate) struct StringBackend<I: Intern + ?Sized, S> {
13    /// Stores end of the string and it's hash
14    ends: Vec<(usize, u64)>,
15    buffer: Vec<I::Primitive>,
16    marker: PhantomData<fn() -> S>,
17}
18
19impl<I: Intern + ?Sized, S> Debug for StringBackend<I, S> {
20    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
21        f.debug_struct("StringBackend")
22            .field("ends", &self.ends)
23            .field("buffer", &self.buffer)
24            .finish()
25    }
26}
27
28impl<I: Intern + ?Sized, S> Clone for StringBackend<I, S> {
29    fn clone(&self) -> Self {
30        Self {
31            ends: self.ends.clone(),
32            buffer: self.buffer.clone(),
33            marker: PhantomData,
34        }
35    }
36}
37
38impl<I: Intern + ?Sized, S> Default for StringBackend<I, S> {
39    #[cfg_attr(feature = "inline-more", inline)]
40    fn default() -> Self {
41        Self {
42            ends: Vec::default(),
43            buffer: Vec::default(),
44            marker: PhantomData,
45        }
46    }
47}
48
49impl<I: Intern + ?Sized, S: Symbol> StringBackend<I, S> {
50    /// Returns the string associated to the span.
51    ///
52    /// # Safety
53    ///
54    /// Span must be valid within the [Self::buffer]
55    unsafe fn span_to_str(&self, from: usize, to: usize) -> &I {
56        unsafe { I::from_bytes(&self.buffer[from..to]) }
57    }
58
59    #[cfg_attr(feature = "inline-more", inline)]
60    pub(crate) fn with_capacity(cap: usize) -> Self {
61        // According to google the approx. word length is 5. So we will use 10.
62        const DEFAULT_WORD_LEN: usize = 10;
63        Self {
64            ends: Vec::with_capacity(cap),
65            buffer: Vec::with_capacity(cap * DEFAULT_WORD_LEN),
66            marker: PhantomData,
67        }
68    }
69
70    #[inline]
71    pub(crate) fn intern(&mut self, string: &I, hash: u64) -> S {
72        self.buffer.extend_from_slice(string.as_bytes());
73        let to = self.buffer.len();
74        let symbol = {
75            let this = &self;
76            expect_valid_symbol(this.ends.len())
77        };
78        self.ends.push((to, hash));
79        symbol
80    }
81
82    #[inline]
83    pub(crate) fn resolve(&self, symbol: S) -> Option<&I> {
84        let index = symbol.to_usize();
85        let to = self.ends.get(index)?.0;
86
87        let from = self
88            .ends
89            .get(index.wrapping_sub(1))
90            .map(|&(end, _)| end)
91            .unwrap_or(0);
92
93        // SAFETY: This span is guaranteed to be valid
94        unsafe { Some(self.span_to_str(from, to)) }
95    }
96
97    pub(crate) fn shrink_to_fit(&mut self) {
98        self.ends.shrink_to_fit();
99        self.buffer.shrink_to_fit();
100    }
101
102    #[inline]
103    pub(crate) unsafe fn resolve_unchecked(&self, symbol: S) -> &I {
104        let index = symbol.to_usize();
105        // SAFETY: The function is marked unsafe so that the caller guarantees
106        //         that required invariants are checked.
107        let to = unsafe { self.ends.get_unchecked(index).0 };
108        let from = self
109            .ends
110            .get(index.wrapping_sub(1))
111            .map(|&(end, _)| end)
112            .unwrap_or(0);
113
114        // SAFETY: This span is guaranteed to be valid
115        unsafe { self.span_to_str(from, to) }
116    }
117
118    pub fn get_hash(&self, symbol: S) -> Option<u64> {
119        self.ends.get(symbol.to_usize()).map(|&(_, hash)| hash)
120    }
121
122    pub unsafe fn get_hash_unchecked(&self, symbol: S) -> u64 {
123        // SAFETY: The function is marked unsafe so that the caller guarantees
124        //         that required invariants are checked.
125        unsafe { self.ends.get_unchecked(symbol.to_usize()).1 }
126    }
127
128    #[inline]
129    pub(crate) fn iter(&self) -> Iter<'_, I, S> {
130        Iter::new(self)
131    }
132
133    #[inline]
134    pub(crate) fn iter_with_hashes(&self) -> IterWithHashes<'_, I, S> {
135        IterWithHashes::new(self)
136    }
137}
138
139impl<'a, I: Intern + ?Sized, S: Symbol> IntoIterator for &'a StringBackend<I, S> {
140    type Item = (S, &'a I);
141    type IntoIter = Iter<'a, I, S>;
142
143    #[cfg_attr(feature = "inline-more", inline)]
144    fn into_iter(self) -> Self::IntoIter {
145        self.iter()
146    }
147}
148
149/// An iterator over the interned symbols, their strings, and their hashes.
150pub struct IterWithHashes<'a, I: Intern + ?Sized, S> {
151    backend: &'a StringBackend<I, S>,
152    start: usize,
153    ends: Enumerate<slice::Iter<'a, (usize, u64)>>,
154}
155
156impl<'a, I: Intern + ?Sized, S> IterWithHashes<'a, I, S> {
157    #[cfg_attr(feature = "inline-more", inline)]
158    fn new(backend: &'a StringBackend<I, S>) -> Self {
159        Self {
160            backend,
161            start: 0,
162            ends: backend.ends.iter().enumerate(),
163        }
164    }
165}
166
167impl<'a, I: Intern + ?Sized, S: Symbol> Iterator for IterWithHashes<'a, I, S> {
168    type Item = (S, &'a I, u64);
169
170    #[inline]
171    fn size_hint(&self) -> (usize, Option<usize>) {
172        self.ends.size_hint()
173    }
174
175    #[inline]
176    fn next(&mut self) -> Option<Self::Item> {
177        let (id, &(to, hash)) = self.ends.next()?;
178        let from = core::mem::replace(&mut self.start, to);
179
180        // SAFETY: This span is guaranteed to be valid
181        let string = unsafe { self.backend.span_to_str(from, to) };
182
183        Some((expect_valid_symbol(id), string, hash))
184    }
185}
186
187/// An iterator over the interned symbols and their strings
188pub struct Iter<'a, I: Intern + ?Sized, S> {
189    inner: IterWithHashes<'a, I, S>,
190}
191
192impl<'a, I: Intern + ?Sized, S> Iter<'a, I, S> {
193    #[cfg_attr(feature = "inline-more", inline)]
194    fn new(backend: &'a StringBackend<I, S>) -> Self {
195        Self {
196            inner: IterWithHashes::new(backend),
197        }
198    }
199}
200
201impl<'a, I: Intern + ?Sized, S: Symbol> Iterator for Iter<'a, I, S>
202where
203    S: Symbol,
204{
205    type Item = (S, &'a I);
206
207    #[inline]
208    fn size_hint(&self) -> (usize, Option<usize>) {
209        self.inner.size_hint()
210    }
211
212    #[inline]
213    fn next(&mut self) -> Option<Self::Item> {
214        let (sym, s, _hash) = self.inner.next()?;
215        Some((sym, s))
216    }
217}