Skip to main content

luaur_common/records/
dense_hash_table.rs

1//! Faithful port of `Luau::detail::DenseHashTable` — the open-addressing core
2//! shared by `DenseHashMap`/`DenseHashSet`. Reference:
3//! `luau/Common/include/Luau/DenseHash.h` (open addressing, quadratic probing,
4//! empty-key sentinel). Oracle: the two upstream cases in
5//! `luau/tests/DenseHash.test.cpp` + a `std::collections::HashMap` differential
6//! fuzz (2000 trials × 200 ops). The validated standalone prototype this file
7//! transcribes lives at `/tmp/densehash_proto.rs`.
8//!
9//! Design (option A — generic functor params): the C++ `Hash`/`Eq` template
10//! parameters become the `DenseHasher`/`DenseEq` traits, and the `Set`/`Map`
11//! item layout becomes the `ItemInterface` trait. The table stores items in a
12//! `Vec<I>` and returns slot indices (`usize`) rather than raw pointers, which
13//! keeps the port sound without `unsafe`. `core`/`alloc` only, so the crate
14//! stays `wasm32-unknown-unknown` compatible.
15
16use alloc::vec::Vec;
17use core::marker::PhantomData;
18use core::mem;
19
20// ---- functor traits (C++ `Hash` / `Eq` template params) ----
21
22/// Hash functor, mirroring the C++ `Hash` template parameter.
23pub trait DenseHasher<K> {
24    fn hash(&self, key: &K) -> usize;
25}
26
27/// Equality functor, mirroring the C++ `Eq` template parameter.
28pub trait DenseEq<K> {
29    fn eq(&self, a: &K, b: &K) -> bool;
30}
31
32/// Default equality functor used when none is supplied (`std::equal_to<T>`).
33#[derive(Clone, Copy)]
34pub struct DenseEqDefault<K>(PhantomData<K>);
35
36// Manual impl: the derive would demand `K: Default` (PhantomData needs nothing).
37impl<K> Default for DenseEqDefault<K> {
38    fn default() -> Self {
39        DenseEqDefault(PhantomData)
40    }
41}
42
43impl<K: PartialEq> DenseEq<K> for DenseEqDefault<K> {
44    fn eq(&self, a: &K, b: &K) -> bool {
45        a == b
46    }
47}
48
49// ---- empty-slot value initialization ----
50
51/// Empty-slot value for a `DenseHashMap`, mirroring C++ value-initialization: a
52/// freshly grown bucket holds a value-initialized `Value()` — null for pointers,
53/// zero for scalars, `Default::default()` for ordinary types. Rust's `Default`
54/// is *not* implemented for raw pointers, and the orphan rule forbids adding it,
55/// so a blanket `impl<T: Default>` cannot coexist with the pointer impls below
56/// (coherence). Hence an explicit trait: pointer and scalar value types are
57/// covered here, and any struct value type a map stores supplies its own impl
58/// (e.g. `Location` in `luau-ast`).
59pub trait DenseDefault {
60    fn dense_default() -> Self;
61}
62
63impl<T> DenseDefault for *mut T {
64    fn dense_default() -> Self {
65        core::ptr::null_mut()
66    }
67}
68
69impl<T> DenseDefault for *const T {
70    fn dense_default() -> Self {
71        core::ptr::null()
72    }
73}
74
75macro_rules! dense_default_via_default {
76    ($($t:ty),* $(,)?) => {
77        $(
78            impl DenseDefault for $t {
79                fn dense_default() -> Self {
80                    <$t as Default>::default()
81                }
82            }
83        )*
84    };
85}
86
87dense_default_via_default!(
88    bool, i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, f32, f64, char,
89);
90
91impl DenseDefault for alloc::string::String {
92    fn dense_default() -> Self {
93        alloc::string::String::new()
94    }
95}
96
97impl<T> DenseDefault for alloc::vec::Vec<T> {
98    fn dense_default() -> Self {
99        alloc::vec::Vec::new()
100    }
101}
102
103// A DenseHashMap value type can itself be a map/set (C++ stores
104// `std::unordered_map`/`std::map`/`std::set` values, e.g.
105// `DfgScope::Props = DenseHashMap<const Def*, std::unordered_map<...>>`), which
106// default-construct empty.
107impl<K: Ord, V> DenseDefault for alloc::collections::BTreeMap<K, V> {
108    fn dense_default() -> Self {
109        alloc::collections::BTreeMap::new()
110    }
111}
112
113impl<T: Ord> DenseDefault for alloc::collections::BTreeSet<T> {
114    fn dense_default() -> Self {
115        alloc::collections::BTreeSet::new()
116    }
117}
118
119// A nullable shared pointer (C++ `std::shared_ptr<T>`) defaults to null; the
120// Rust mirror `Option<Arc<T>>` defaults to `None`. Used for DenseHashMap values
121// whose C++ type is a smart pointer (e.g. `DenseHashMap<K, ScopePtr>`).
122impl<T> DenseDefault for Option<T> {
123    fn dense_default() -> Self {
124        None
125    }
126}
127
128impl<A: DenseDefault, B: DenseDefault> DenseDefault for (A, B) {
129    fn dense_default() -> Self {
130        (A::dense_default(), B::dense_default())
131    }
132}
133
134// ---- item interface (Set: `I = K` ; Map: `I = (K, V)`) ----
135
136/// Bridges the `Set`/`Map` item layout so the table is agnostic to whether a
137/// slot stores a bare key or a `(key, value)` pair.
138pub trait ItemInterface<K, I> {
139    fn get_key(item: &I) -> &K;
140    fn set_key(item: &mut I, key: K);
141    fn make_empty(empty_key: &K) -> I;
142}
143
144/// Set layout: the item *is* the key.
145pub struct ItemInterfaceSet<K>(PhantomData<K>);
146
147impl<K: Clone> ItemInterface<K, K> for ItemInterfaceSet<K> {
148    fn get_key(item: &K) -> &K {
149        item
150    }
151    fn set_key(item: &mut K, key: K) {
152        *item = key;
153    }
154    fn make_empty(empty_key: &K) -> K {
155        empty_key.clone()
156    }
157}
158
159/// Map layout: the item is a `(key, value)` pair; the value defaults on insert.
160pub struct ItemInterfaceMap<K, V>(PhantomData<(K, V)>);
161
162impl<K: Clone, V: DenseDefault> ItemInterface<K, (K, V)> for ItemInterfaceMap<K, V> {
163    fn get_key(item: &(K, V)) -> &K {
164        &item.0
165    }
166    fn set_key(item: &mut (K, V), key: K) {
167        item.0 = key;
168    }
169    fn make_empty(empty_key: &K) -> (K, V) {
170        (empty_key.clone(), V::dense_default())
171    }
172}
173
174// ---- the table ----
175
176/// `capacity == data.len()`, always a power of two or 0. A slot is empty iff its
177/// key compares equal to `empty_key`.
178pub struct DenseHashTable<K, I, Iface, H, E> {
179    pub(crate) data: Vec<I>,
180    pub(crate) capacity: usize,
181    pub(crate) count: usize,
182    pub(crate) empty_key: K,
183    pub(crate) hasher: H,
184    pub(crate) eq: E,
185    pub(crate) _iface: PhantomData<Iface>,
186}
187
188// The C++ container is copyable; we provide `Clone`/`Debug` by hand (rather than
189// `derive`) so the bounds stay precise — `derive` would spuriously require
190// `Iface: Clone`/`Debug` on the zero-sized `PhantomData<Iface>` marker, and
191// `Debug` would force `H`/`E` (the hash/eq functors) to be `Debug`.
192impl<K: Clone, I: Clone, Iface, H: Clone, E: Clone> Clone for DenseHashTable<K, I, Iface, H, E> {
193    fn clone(&self) -> Self {
194        Self {
195            data: self.data.clone(),
196            capacity: self.capacity,
197            count: self.count,
198            empty_key: self.empty_key.clone(),
199            hasher: self.hasher.clone(),
200            eq: self.eq.clone(),
201            _iface: PhantomData,
202        }
203    }
204}
205
206impl<K: core::fmt::Debug, I: core::fmt::Debug, Iface, H, E> core::fmt::Debug
207    for DenseHashTable<K, I, Iface, H, E>
208{
209    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
210        f.debug_struct("DenseHashTable")
211            .field("data", &self.data)
212            .field("capacity", &self.capacity)
213            .field("count", &self.count)
214            .field("empty_key", &self.empty_key)
215            .finish_non_exhaustive()
216    }
217}
218
219impl<K, I, Iface, H, E> DenseHashTable<K, I, Iface, H, E>
220where
221    K: Clone,
222    Iface: ItemInterface<K, I>,
223    H: DenseHasher<K> + Default,
224    E: DenseEq<K> + Default,
225{
226    /// `DenseHashTable(const Key& empty_key, size_t buckets)`. `buckets` must be
227    /// a power of two or 0. Reference: `DenseHash.h` ctor.
228    pub fn new(empty_key: K, buckets: usize) -> Self {
229        let eq = E::default();
230        // equality must at least recognise the sentinel as equal to itself
231        debug_assert!(eq.eq(&empty_key, &empty_key));
232        debug_assert!(buckets & buckets.wrapping_sub(1) == 0);
233
234        let data = if buckets > 0 {
235            (0..buckets)
236                .map(|_| Iface::make_empty(&empty_key))
237                .collect()
238        } else {
239            Vec::new()
240        };
241
242        DenseHashTable {
243            data,
244            capacity: buckets,
245            count: 0,
246            empty_key,
247            hasher: H::default(),
248            eq,
249            _iface: PhantomData,
250        }
251    }
252
253    /// Inserts `key` (or finds it if present) without checking load factor, and
254    /// returns the slot index. The caller must `rehash_if_full` first.
255    pub(crate) fn insert_unsafe(&mut self, key: K) -> usize {
256        debug_assert!(!self.eq.eq(&key, &self.empty_key));
257        let hashmod = self.capacity - 1;
258        let mut bucket = self.hasher.hash(&key) & hashmod;
259        for probe in 0..=hashmod {
260            if self
261                .eq
262                .eq(Iface::get_key(&self.data[bucket]), &self.empty_key)
263            {
264                Iface::set_key(&mut self.data[bucket], key);
265                self.count += 1;
266                return bucket;
267            }
268            if self.eq.eq(Iface::get_key(&self.data[bucket]), &key) {
269                return bucket;
270            }
271            bucket = (bucket + probe + 1) & hashmod;
272        }
273        let occupied = self
274            .data
275            .iter()
276            .filter(|item| !self.eq.eq(Iface::get_key(item), &self.empty_key))
277            .count();
278        unreachable!(
279            "dense hash table is full: capacity={}, count={}, occupied={}",
280            self.capacity, self.count, occupied
281        );
282    }
283
284    /// Returns the slot index of `key` if present.
285    pub(crate) fn find(&self, key: &K) -> Option<usize> {
286        if self.count == 0 {
287            return None;
288        }
289        if self.eq.eq(key, &self.empty_key) {
290            return None;
291        }
292        let hashmod = self.capacity - 1;
293        let mut bucket = self.hasher.hash(key) & hashmod;
294        for probe in 0..=hashmod {
295            let k = Iface::get_key(&self.data[bucket]);
296            if self.eq.eq(k, key) {
297                return Some(bucket);
298            }
299            if self.eq.eq(k, &self.empty_key) {
300                return None;
301            }
302            bucket = (bucket + probe + 1) & hashmod;
303        }
304        let occupied = self
305            .data
306            .iter()
307            .filter(|item| !self.eq.eq(Iface::get_key(item), &self.empty_key))
308            .count();
309        unreachable!(
310            "dense hash table is full: capacity={}, count={}, occupied={}",
311            self.capacity, self.count, occupied
312        );
313    }
314
315    /// Grows to `16` (from empty) or `2×` capacity, re-inserting live items.
316    pub(crate) fn rehash(&mut self) {
317        let newsize = if self.capacity == 0 {
318            16
319        } else {
320            self.capacity * 2
321        };
322        let mut newtable = Self::new(self.empty_key.clone(), newsize);
323        for i in 0..self.capacity {
324            if !self.eq.eq(Iface::get_key(&self.data[i]), &self.empty_key) {
325                let key = Iface::get_key(&self.data[i]).clone();
326                let idx = newtable.insert_unsafe(key);
327                newtable.data[idx] =
328                    mem::replace(&mut self.data[i], Iface::make_empty(&self.empty_key));
329            }
330        }
331        debug_assert_eq!(self.count, newtable.count);
332        mem::swap(&mut self.data, &mut newtable.data);
333        mem::swap(&mut self.capacity, &mut newtable.capacity);
334    }
335
336    /// Rehashes before an insert that would push past the 3/4 load factor — but
337    /// only when `key` is genuinely new. The `find` guard is load-bearing:
338    /// overwriting an existing key when full must NOT rehash (upstream relies on
339    /// iterators surviving an overwrite-merge).
340    pub(crate) fn rehash_if_full(&mut self, key: &K) {
341        if self.count >= self.capacity * 3 / 4 && self.find(key).is_none() {
342            self.rehash();
343        }
344    }
345
346    /// Clears all slots back to the empty sentinel without freeing capacity.
347    pub(crate) fn clear(&mut self) {
348        if self.count == 0 {
349            return;
350        }
351        for slot in self.data.iter_mut() {
352            *slot = Iface::make_empty(&self.empty_key);
353        }
354        self.count = 0;
355    }
356
357    pub(crate) fn size(&self) -> usize {
358        self.count
359    }
360
361    /// Index of the first occupied slot (== `capacity` when empty). Iterator seed.
362    pub(crate) fn first_occupied(&self) -> usize {
363        let mut start = 0;
364        while start < self.capacity
365            && self
366                .eq
367                .eq(Iface::get_key(&self.data[start]), &self.empty_key)
368        {
369            start += 1;
370        }
371        start
372    }
373
374    /// Index of the next occupied slot strictly after `index`.
375    pub(crate) fn next_occupied(&self, mut index: usize) -> usize {
376        loop {
377            index += 1;
378            if index >= self.capacity
379                || !self
380                    .eq
381                    .eq(Iface::get_key(&self.data[index]), &self.empty_key)
382            {
383                break;
384            }
385        }
386        index
387    }
388}