Skip to main content

wasmtime_environ/
string_pool.rs

1//! Simple string interning.
2
3use crate::{
4    collections::{HashMap, Vec},
5    error::OutOfMemory,
6    prelude::*,
7};
8use core::{fmt, mem, num::NonZeroU32};
9
10/// An interned string associated with a particular string in a `StringPool`.
11///
12/// Allows for $O(1)$ equality tests, $O(1)$ hashing, and $O(1)$
13/// arbitrary-but-stable ordering.
14#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
15pub struct Atom {
16    index: NonZeroU32,
17}
18
19/// A pool of interned strings.
20///
21/// Insert new strings with [`StringPool::insert`] to get an `Atom` that is
22/// unique per string within the context of the associated pool.
23///
24/// Once you have interned a string into the pool and have its `Atom`, you can
25/// get the interned string slice via `&pool[atom]` or `pool.get(atom)`.
26///
27/// In general, there are no correctness protections against indexing into a
28/// different `StringPool` from the one that the `Atom` was not allocated
29/// inside. Doing so is memory safe but may panic or otherwise return incorrect
30/// results.
31#[derive(Default)]
32pub struct StringPool {
33    /// A map from each string in this pool (as an unsafe borrow from
34    /// `self.strings`) to its `Atom`.
35    map: mem::ManuallyDrop<HashMap<&'static str, Atom>>,
36
37    /// Strings in this pool. These must never be mutated or reallocated once
38    /// inserted.
39    strings: mem::ManuallyDrop<Vec<Box<str>>>,
40}
41
42impl Drop for StringPool {
43    fn drop(&mut self) {
44        // Ensure that `self.map` is dropped before `self.strings`, since
45        // `self.map` borrows from `self.strings`.
46        //
47        // Safety: Neither field will be used again.
48        unsafe {
49            mem::ManuallyDrop::drop(&mut self.map);
50            mem::ManuallyDrop::drop(&mut self.strings);
51        }
52    }
53}
54
55impl fmt::Debug for StringPool {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        struct Strings<'a>(&'a StringPool);
58        impl fmt::Debug for Strings<'_> {
59            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60                f.debug_map()
61                    .entries(
62                        self.0
63                            .strings
64                            .iter()
65                            .enumerate()
66                            .map(|(i, s)| (Atom::new(i), s)),
67                    )
68                    .finish()
69            }
70        }
71
72        f.debug_struct("StringPool")
73            .field("strings", &Strings(self))
74            .finish()
75    }
76}
77
78impl TryClone for StringPool {
79    fn try_clone(&self) -> Result<Self, OutOfMemory> {
80        let mut new_pool = StringPool::new();
81        // Re-intern strings in index order so that each Atom value is
82        // identical in the clone — callers that hold Atoms from the original
83        // can use them interchangeably with the clone.
84        //
85        // Directly cloning `self.map` would copy &'static str keys that point
86        // into the *original* pool's `strings` allocation. Those pointers
87        // become dangling once the original is dropped, leading to UB on any
88        // subsequent lookup. Re-interning ensures the cloned map's keys point
89        // into the clone's own `strings`.
90        for s in self.strings.iter() {
91            new_pool.insert(s)?;
92        }
93        Ok(new_pool)
94    }
95}
96
97impl TryClone for Atom {
98    fn try_clone(&self) -> Result<Self, OutOfMemory> {
99        Ok(*self)
100    }
101}
102
103impl core::ops::Index<Atom> for StringPool {
104    type Output = str;
105
106    #[inline]
107    #[track_caller]
108    fn index(&self, atom: Atom) -> &Self::Output {
109        self.get(atom).unwrap()
110    }
111}
112
113// For convenience, to avoid `*atom` noise at call sites.
114impl core::ops::Index<&'_ Atom> for StringPool {
115    type Output = str;
116
117    #[inline]
118    #[track_caller]
119    fn index(&self, atom: &Atom) -> &Self::Output {
120        self.get(*atom).unwrap()
121    }
122}
123
124impl serde::ser::Serialize for StringPool {
125    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
126    where
127        S: serde::Serializer,
128    {
129        serde::ser::Serialize::serialize(&*self.strings, serializer)
130    }
131}
132
133impl<'de> serde::de::Deserialize<'de> for StringPool {
134    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
135    where
136        D: serde::Deserializer<'de>,
137    {
138        struct Visitor;
139        impl<'de> serde::de::Visitor<'de> for Visitor {
140            type Value = StringPool;
141
142            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
143                f.write_str("a `StringPool` sequence of strings")
144            }
145
146            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
147            where
148                A: serde::de::SeqAccess<'de>,
149            {
150                use serde::de::Error as _;
151
152                let mut pool = StringPool::new();
153
154                if let Some(len) = seq.size_hint() {
155                    pool.map.reserve(len).map_err(|oom| A::Error::custom(oom))?;
156                    pool.strings
157                        .reserve(len)
158                        .map_err(|oom| A::Error::custom(oom))?;
159                }
160
161                while let Some(s) = seq.next_element::<TryString>()? {
162                    debug_assert_eq!(s.len(), s.capacity());
163                    let s = s.into_boxed_str().map_err(|oom| A::Error::custom(oom))?;
164                    if !pool.map.contains_key(&*s) {
165                        pool.insert_new_boxed_str(s)
166                            .map_err(|oom| A::Error::custom(oom))?;
167                    }
168                }
169
170                Ok(pool)
171            }
172        }
173        deserializer.deserialize_seq(Visitor)
174    }
175}
176
177impl StringPool {
178    /// Create a new, empty pool.
179    pub fn new() -> Self {
180        Self::default()
181    }
182
183    /// Insert a new string into this pool.
184    pub fn insert(&mut self, s: &str) -> Result<Atom, OutOfMemory> {
185        if let Some(atom) = self.map.get(s) {
186            return Ok(*atom);
187        }
188
189        self.map.reserve(1)?;
190        self.strings.reserve(1)?;
191
192        let mut owned = TryString::new();
193        owned.reserve_exact(s.len())?;
194        owned.push_str(s).expect("reserved capacity");
195        let owned = owned
196            .into_boxed_str()
197            .expect("reserved exact capacity, so shouldn't need to realloc");
198
199        self.insert_new_boxed_str(owned)
200    }
201
202    fn insert_new_boxed_str(&mut self, owned: Box<str>) -> Result<Atom, OutOfMemory> {
203        debug_assert!(!self.map.contains_key(&*owned));
204
205        let index = self.strings.len();
206        let atom = Atom::new(index);
207        self.strings.push(owned)?;
208
209        // SAFETY: We never expose this borrow and never mutate or reallocate
210        // strings once inserted into the pool.
211        let s = unsafe { mem::transmute::<&str, &'static str>(&self.strings[index]) };
212
213        let old = self.map.insert(s, atom)?;
214        debug_assert!(old.is_none());
215
216        Ok(atom)
217    }
218
219    /// Get the `Atom` for the given string, if it has already been inserted
220    /// into this pool.
221    pub fn get_atom(&self, s: &str) -> Option<Atom> {
222        self.map.get(s).copied()
223    }
224
225    /// Does this pool contain the given `atom`?
226    #[inline]
227    pub fn contains(&self, atom: Atom) -> bool {
228        atom.index() < self.strings.len()
229    }
230
231    /// Get the string associated with the given `atom`, if the pool contains
232    /// the atom.
233    #[inline]
234    pub fn get(&self, atom: Atom) -> Option<&str> {
235        if self.contains(atom) {
236            Some(&self.strings[atom.index()])
237        } else {
238            None
239        }
240    }
241
242    /// Get the number of strings in this pool.
243    pub fn len(&self) -> usize {
244        self.strings.len()
245    }
246}
247
248impl Default for Atom {
249    #[inline]
250    fn default() -> Self {
251        Self {
252            index: NonZeroU32::MAX,
253        }
254    }
255}
256
257impl fmt::Debug for Atom {
258    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
259        f.debug_struct("Atom")
260            .field("index", &self.index())
261            .finish()
262    }
263}
264
265// Allow using `Atom` in `SecondaryMap`s.
266impl crate::EntityRef for Atom {
267    fn new(index: usize) -> Self {
268        Atom::new(index)
269    }
270
271    fn index(self) -> usize {
272        Atom::index(&self)
273    }
274}
275
276impl serde::ser::Serialize for Atom {
277    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
278    where
279        S: serde::Serializer,
280    {
281        serde::ser::Serialize::serialize(&self.index, serializer)
282    }
283}
284
285impl<'de> serde::de::Deserialize<'de> for Atom {
286    fn deserialize<D>(deserializer: D) -> core::result::Result<Self, D::Error>
287    where
288        D: serde::Deserializer<'de>,
289    {
290        let index = serde::de::Deserialize::deserialize(deserializer)?;
291        Ok(Self { index })
292    }
293}
294
295impl Atom {
296    fn new(index: usize) -> Self {
297        assert!(index < usize::try_from(u32::MAX).unwrap());
298        let index = u32::try_from(index).unwrap();
299        let index = NonZeroU32::new(index + 1).unwrap();
300        Self { index }
301    }
302
303    /// Get this atom's index in its pool.
304    pub fn index(&self) -> usize {
305        let index = self.index.get() - 1;
306        usize::try_from(index).unwrap()
307    }
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    #[test]
315    fn basic() -> Result<()> {
316        let mut pool = StringPool::new();
317
318        let a = pool.insert("a")?;
319        assert_eq!(&pool[a], "a");
320        assert_eq!(pool.get_atom("a"), Some(a));
321
322        let a2 = pool.insert("a")?;
323        assert_eq!(a, a2);
324        assert_eq!(&pool[a2], "a");
325
326        let b = pool.insert("b")?;
327        assert_eq!(&pool[b], "b");
328        assert_ne!(a, b);
329        assert_eq!(pool.get_atom("b"), Some(b));
330
331        assert!(pool.get_atom("zzz").is_none());
332
333        let mut pool2 = StringPool::new();
334        let c = pool2.insert("c")?;
335        assert_eq!(&pool2[c], "c");
336        assert_eq!(a, c);
337        assert_eq!(&pool2[a], "c");
338        assert!(!pool2.contains(b));
339        assert!(pool2.get(b).is_none());
340
341        Ok(())
342    }
343
344    #[test]
345    fn stress() -> Result<()> {
346        let mut pool = StringPool::new();
347
348        let n = if cfg!(miri) { 100 } else { 10_000 };
349
350        for _ in 0..2 {
351            let atoms: Vec<_> = (0..n).map(|i| pool.insert(&i.to_string())).try_collect()?;
352
353            for atom in atoms {
354                assert!(pool.contains(atom));
355                assert_eq!(&pool[atom], atom.index().to_string());
356            }
357        }
358
359        Ok(())
360    }
361
362    #[test]
363    fn roundtrip_serialize_deserialize() -> Result<()> {
364        let mut pool = StringPool::new();
365        let a = pool.insert("a")?;
366        let b = pool.insert("b")?;
367        let c = pool.insert("c")?;
368
369        let bytes = postcard::to_allocvec(&(pool, a, b, c))?;
370        let (pool, a2, b2, c2) = postcard::from_bytes::<(StringPool, Atom, Atom, Atom)>(&bytes)?;
371
372        assert_eq!(&pool[a], "a");
373        assert_eq!(&pool[b], "b");
374        assert_eq!(&pool[c], "c");
375
376        assert_eq!(&pool[a2], "a");
377        assert_eq!(&pool[b2], "b");
378        assert_eq!(&pool[c2], "c");
379
380        Ok(())
381    }
382}