Skip to main content

spacetimedb_data_structures/
small_map.rs

1use crate::map::{HashCollectionExt as _, HashMap};
2use core::hash::Hash;
3use core::mem;
4use either::Either;
5use smallvec::SmallVec;
6
7/// A hash map optimized for small sizes,
8/// with a small size optimization for up to `N` entries
9/// and then a vector for up to `M`
10/// and then finally falling back to a real hash map after `M`.
11///
12/// The inline and heap based vectors use linear scans,
13/// which is faster than hashing as long as the map is small.
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub enum SmallHashMap<K: Eq + Hash, V, const N: usize, const M: usize> {
16    Small(SmallVec<[(K, V); N]>),
17    Large(HashMap<K, V>),
18}
19
20#[cfg(feature = "memory-usage")]
21impl<
22        K: Eq + Hash + spacetimedb_memory_usage::MemoryUsage,
23        V: spacetimedb_memory_usage::MemoryUsage,
24        const N: usize,
25        const M: usize,
26    > spacetimedb_memory_usage::MemoryUsage for SmallHashMap<K, V, N, M>
27{
28    fn heap_usage(&self) -> usize {
29        match self {
30            SmallHashMap::Small(vec) => vec.heap_usage(),
31            SmallHashMap::Large(map) => map.heap_usage(),
32        }
33    }
34}
35
36impl<K: Eq + Hash, V, const N: usize, const M: usize> Default for SmallHashMap<K, V, N, M> {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42impl<K: Eq + Hash, V, const N: usize, const M: usize> SmallHashMap<K, V, N, M> {
43    pub fn new() -> Self {
44        Self::Small(SmallVec::new())
45    }
46
47    /// Inserts the association `key -> value` into the map,
48    /// returning the previous value for `key`, if any.
49    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
50        // Possibly convert to large map first.
51        self.maybe_convert_to_large();
52
53        match self {
54            Self::Small(list) => {
55                if let Some(idx) = Self::key_pos(list, &key) {
56                    // SAFETY: `idx` was given by `key_pos`, so it must be in-bounds.
57                    let (_, val) = unsafe { list.get_unchecked_mut(idx) };
58                    return Some(mem::replace(val, value));
59                }
60
61                list.push((key, value));
62                None
63            }
64            Self::Large(map) => map.insert(key, value),
65        }
66    }
67
68    /// Returns either the existing value for `key`
69    /// or inserts into `key` using `or_insert`.
70    pub fn get_or_insert(&mut self, key: K, or_insert: impl FnOnce() -> V) -> &mut V {
71        // Possibly convert to large map first.
72        self.maybe_convert_to_large();
73
74        match self {
75            Self::Small(list) => {
76                if let Some(idx) = Self::key_pos(list, &key) {
77                    // SAFETY: `idx` was given by `key_pos`, so it must be in-bounds.
78                    let (_, val) = unsafe { list.get_unchecked_mut(idx) };
79                    return val;
80                }
81
82                list.push((key, or_insert()));
83                let last = list.last_mut();
84                // SAFETY: just inserted one element so `list` cannot be empty.
85                let (_, val) = unsafe { last.unwrap_unchecked() };
86                val
87            }
88            Self::Large(map) => map.entry(key).or_insert_with(or_insert),
89        }
90    }
91
92    #[inline]
93    fn maybe_convert_to_large(&mut self) {
94        if let Self::Small(list) = self {
95            if list.len() > M {
96                let list = mem::take(list);
97                self.convert_to_large(list);
98            }
99        }
100    }
101
102    #[cold]
103    #[inline(never)]
104    fn convert_to_large(&mut self, list: SmallVec<[(K, V); N]>) {
105        let mut map = HashMap::with_capacity(list.len());
106        map.extend(list);
107        *self = Self::Large(map);
108    }
109
110    pub fn remove(&mut self, key: &K) -> Option<V> {
111        match self {
112            Self::Small(list) => Self::key_pos(list, key).map(|idx| list.swap_remove(idx).1),
113            Self::Large(map) => map.remove(key),
114        }
115    }
116
117    /// Returns the position of `key` in `list`, if any.
118    fn key_pos(list: &[(K, V)], key: &K) -> Option<usize> {
119        list.iter().position(|(k, _)| k == key)
120    }
121
122    /// Clears all entries from the map.
123    pub fn clear(&mut self) {
124        match self {
125            Self::Small(list) => list.clear(),
126            Self::Large(map) => map.clear(),
127        }
128    }
129
130    /// Returns the number of entries in the map.
131    pub fn len(&self) -> usize {
132        match self {
133            Self::Small(list) => list.len(),
134            Self::Large(map) => map.len(),
135        }
136    }
137
138    /// Returns whether the map is empty.
139    pub fn is_empty(&self) -> bool {
140        self.len() == 0
141    }
142
143    /// Returns an iterator over all the key-value pairs in the map.
144    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&K, &V)> {
145        match self {
146            Self::Small(list) => Either::Left(list.iter().map(|(k, v)| (k, v))),
147            Self::Large(map) => Either::Right(map.iter()),
148        }
149    }
150
151    /// Returns an iterator over all the keys in the map.
152    pub fn keys(&self) -> impl ExactSizeIterator<Item = &K> {
153        match self {
154            Self::Small(list) => Either::Left(list.iter().map(|(k, _)| k)),
155            Self::Large(map) => Either::Right(map.keys()),
156        }
157    }
158
159    /// Returns an iterator over all the values in the map.
160    pub fn values(&self) -> impl ExactSizeIterator<Item = &V> {
161        match self {
162            Self::Small(list) => Either::Left(list.iter().map(|(_, v)| v)),
163            Self::Large(map) => Either::Right(map.values()),
164        }
165    }
166
167    /// Returns whether `key` is in the map.
168    pub fn contains_key(&self, key: &K) -> bool {
169        match self {
170            Self::Small(list) => list.iter().any(|(k, _)| k == key),
171            Self::Large(map) => map.contains_key(key),
172        }
173    }
174
175    /// Returns the value for `key`, if any.
176    pub fn get(&self, key: &K) -> Option<&V> {
177        match self {
178            Self::Small(list) => list.iter().find_map(|(k, v)| (k == key).then_some(v)),
179            Self::Large(map) => map.get(key),
180        }
181    }
182
183    /// Returns the value for `key`, mutably, if any.
184    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
185        match self {
186            Self::Small(list) => list.iter_mut().find_map(|(k, v)| (k == key).then_some(v)),
187            Self::Large(map) => map.get_mut(key),
188        }
189    }
190}
191
192impl<K: Eq + Hash, V, const N: usize, const M: usize> Extend<(K, V)> for SmallHashMap<K, V, N, M> {
193    fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
194        for (k, v) in iter {
195            self.insert(k, v);
196        }
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use std::collections::HashSet;
203
204    use super::SmallHashMap;
205    use proptest::collection::hash_set;
206    use proptest::prelude::*;
207
208    type K = u32;
209    type V = u32;
210    type Map<const N: usize, const M: usize> = SmallHashMap<K, V, N, M>;
211
212    /// Asserts that the map behaves consistently as an empty map.
213    fn assert_empty<const N: usize, const M: usize>(map: &mut Map<N, { M }>, key: &K, val: V) {
214        assert!(map.is_empty());
215        assert_eq!(map.len(), 0);
216
217        assert_eq!(map.iter().count(), 0);
218        assert_eq!(map.keys().count(), 0);
219        assert_eq!(map.values().count(), 0);
220
221        assert_eq!(Map::<N, M>::key_pos(&[], key), None);
222        assert!(!map.contains_key(key));
223        assert_eq!(map.get(key), None);
224        assert_eq!(map.get_mut(key), None);
225
226        assert_eq!(map.remove(key), None);
227        assert_eq!(map.insert(*key, val), None);
228    }
229
230    /// Asserts that the map behaves consistently as a non-empty map.
231    fn assert_not_empty<const N: usize, const M: usize>(map: &mut Map<N, M>, len: usize) {
232        assert!(!map.is_empty());
233        assert_eq!(map.len(), len);
234
235        assert_eq!(map.iter().count(), len);
236        assert_eq!(map.keys().count(), len);
237        assert_eq!(map.values().count(), len);
238    }
239
240    /// Extends the map with entries and then clears it,
241    /// asserting correct behavior before and after.
242    fn extend_clear<const N: usize, const M: usize>(map: &mut Map<N, M>, entries: &HashSet<(K, V)>) {
243        map.extend(entries.iter().cloned());
244        assert_not_empty(map, entries.len());
245
246        map.clear();
247        assert_empty(map, &0, 0);
248    }
249
250    /// Asserts that the map contains `key` with value `val`.
251    fn assert_key_eq<const N: usize, const M: usize>(map: &mut Map<N, M>, key: K, mut val: V) {
252        assert!(map.contains_key(&key));
253        assert_eq!(map.get(&key), Some(&val));
254        assert_eq!(map.get_mut(&key), Some(&mut val));
255    }
256
257    /// Asserts that the map does not contain `key`.
258    fn assert_key_none<const N: usize, const M: usize>(map: &mut Map<N, M>, key: K) {
259        assert!(!map.contains_key(&key));
260        assert_eq!(map.get(&key), None);
261        assert_eq!(map.get_mut(&key), None);
262    }
263
264    /// Inserting `key` twice returns the old value.
265    fn insert_returns_old_inner<const N: usize, const M: usize>(key: K, val1: V, val2: V, val3: V) {
266        let mut map = Map::<N, M>::new();
267
268        assert_key_none(&mut map, key);
269
270        assert_eq!(map.insert(key, val1), None);
271        assert_key_eq(&mut map, key, val1);
272
273        assert_eq!(map.insert(key, val2), Some(val1));
274        assert_key_eq(&mut map, key, val2);
275
276        assert_eq!(map.get_or_insert(key, || val3), &val2);
277    }
278
279    /// Mutating the value via `get_mut` has effect.
280    fn mutation_via_get_mut_inner<const N: usize, const M: usize>(key: K, val1: V, val2: V) {
281        let mut map = Map::<N, M>::new();
282
283        assert_eq!(map.insert(key, val1), None);
284        if let Some(slot) = map.get_mut(&key) {
285            *slot = val2;
286        }
287
288        assert_key_eq(&mut map, key, val2);
289    }
290
291    /// Mutating the value via `get_or_insert` has effect.
292    fn mutation_via_get_or_insert_inner<const N: usize, const M: usize>(key: K, val1: V, val2: V) {
293        let mut map = Map::<N, M>::new();
294
295        assert_eq!(map.insert(key, val1), None);
296        let slot = map.get_or_insert(key, || val2);
297        *slot = val2;
298
299        assert_eq!(map.get(&key), Some(&val2));
300    }
301
302    /// Collects `iter` into a sorted `Vec<T>`.
303    fn sorted<T: Ord>(iter: impl Iterator<Item = T>) -> Vec<T> {
304        let mut vec: Vec<T> = iter.collect();
305        vec.sort();
306        vec
307    }
308
309    /// Tests insertion, retrieval, and deletion together.
310    fn insert_get_remove_inner<const N: usize, const M: usize>(entries: &[(K, V)]) {
311        let mut map = Map::<N, M>::new();
312
313        // Initially all keys are absent.
314        for (k, _) in entries.iter() {
315            assert_key_none(&mut map, *k);
316        }
317
318        // Insert all entries.
319        for (k, v) in entries.iter() {
320            map.insert(*k, *v);
321        }
322
323        // Now all keys are present.
324        assert_not_empty(&mut map, entries.len());
325        for (k, v) in entries.iter().cloned() {
326            assert_key_eq(&mut map, k, v);
327        }
328
329        // Iterators return all entries.
330        assert_eq!(
331            sorted(map.iter().map(|(k, v)| (*k, *v))),
332            sorted(entries.iter().cloned())
333        );
334        assert_eq!(sorted(map.keys().cloned()), sorted(entries.iter().map(|(k, _)| *k)));
335        assert_eq!(sorted(map.values().cloned()), sorted(entries.iter().map(|(_, v)| *v)));
336
337        // Removal results in absence.
338        for (k, _) in entries.iter() {
339            assert!(map.remove(k).is_some());
340            assert_eq!(map.get(k), None);
341        }
342
343        // Finally the map is empty again.
344        assert!(map.is_empty());
345    }
346
347    #[test]
348    fn new_is_same_as_default() {
349        assert_eq!(Map::<8, 16>::new(), <_>::default());
350        assert_eq!(Map::<0, 16>::new(), <_>::default());
351        assert_eq!(Map::<0, 0>::new(), <_>::default());
352    }
353
354    proptest! {
355        #[test]
356        fn new_is_empty(key: K, val: V) {
357            assert_empty(&mut Map::<4, 8>::new(), &key, val);
358            assert_empty(&mut Map::<0, 8>::new(), &key, val);
359            assert_empty(&mut Map::<0, 0>::new(), &key, val);
360        }
361
362        #[test]
363        fn cleared_is_empty(entries in hash_set(any::<(K, V)>(), 1..50)) {
364            extend_clear(&mut Map::<4, 8>::new(), &entries);
365            extend_clear(&mut Map::<0, 8>::new(), &entries);
366            extend_clear(&mut Map::<0, 0>::new(), &entries);
367        }
368
369        #[test]
370        fn insert_returns_old(key: K, val1: V, val2: V) {
371            insert_returns_old_inner::<4, 8>(key, val1, val2, val2);
372            insert_returns_old_inner::<0, 8>(key, val1, val2, val2);
373            insert_returns_old_inner::<0, 0>(key, val1, val2, val2);
374        }
375
376        #[test]
377        fn mutation_via_get_mut(key: K, val1: V, val2: V) {
378            mutation_via_get_mut_inner::<4, 8>(key, val1, val2);
379            mutation_via_get_mut_inner::<0, 8>(key, val1, val2);
380            mutation_via_get_mut_inner::<0, 0>(key, val1, val2);
381        }
382
383        #[test]
384        fn mutation_via_get_or_insert(key: K, val1: V, val2: V) {
385            mutation_via_get_or_insert_inner::<4, 8>(key, val1, val2);
386            mutation_via_get_or_insert_inner::<0, 8>(key, val1, val2);
387            mutation_via_get_or_insert_inner::<0, 0>(key, val1, val2);
388        }
389
390        #[test]
391        fn insert_get_remove(entries in hash_set(any::<(K, V)>(), 1..50)) {
392            let entries: Vec<(K, V)> = entries.into_iter().collect();
393            insert_get_remove_inner::<4, 8>(&entries);
394            insert_get_remove_inner::<0, 8>(&entries);
395            insert_get_remove_inner::<0, 0>(&entries);
396        }
397    }
398}