Skip to main content

cjc_data/detcoll/
det_open.rs

1//! `DetOpenMap<K, V>` — deterministic open-addressing hash map.
2//!
3//! Fixed splitmix64 mixing. Bounded probe length. Deterministic
4//! fallback to `BTreeMap` if the probe budget is exceeded — which
5//! prevents adversarial collision attacks from causing unbounded
6//! probing or silent corruption.
7//!
8//! Use for: sparse mutable equality lookup where order doesn't matter
9//! (worklist membership, temporary caches, sparse `NodeId → Fact` etc.).
10//! For sealed/read-heavy workloads prefer `DHarht`. For ordered output
11//! prefer `BTreeMap`.
12//!
13//! Iteration order is undefined-but-deterministic for a given insertion
14//! sequence. **Do not surface iteration order to public output** — sort
15//! into a `BTreeMap` or `SortedVecMap` for canonical output.
16
17use super::splitmix64;
18use std::collections::BTreeMap;
19use std::hash::Hash;
20
21/// Maximum probe length before falling back to BTreeMap. Caps worst
22/// case lookup at `MAX_PROBE` linear scan steps.
23const MAX_PROBE: usize = 32;
24
25/// Default initial table size. Power of two so `hash & (n - 1)` is the
26/// shard index.
27const INITIAL_SLOTS: usize = 16;
28
29/// Resize when load factor crosses this fraction. 0.75 in u32 form.
30const LOAD_NUM: usize = 3;
31const LOAD_DEN: usize = 4;
32
33#[derive(Debug, Clone)]
34enum Slot<K, V> {
35    Empty,
36    Occupied(K, V),
37    Tombstone,
38}
39
40#[derive(Debug)]
41pub struct DetOpenMap<K: Hash + Eq + Ord + Clone, V: Clone> {
42    /// Open-addressing primary table.
43    slots: Vec<Slot<K, V>>,
44    /// Number of `Occupied` slots.
45    occupied: usize,
46    /// Number of `Tombstone` slots.
47    tombstones: usize,
48    /// Deterministic fallback for entries that exceeded the probe
49    /// budget. The contract: `BTreeMap::insert` semantics, so no
50    /// ordering surprises.
51    fallback: BTreeMap<K, V>,
52    /// Counter for instrumentation / security tests.
53    overflow_to_fallback: u64,
54}
55
56impl<K: Hash + Eq + Ord + Clone, V: Clone> Default for DetOpenMap<K, V> {
57    fn default() -> Self {
58        Self::new()
59    }
60}
61
62impl<K: Hash + Eq + Ord + Clone, V: Clone> DetOpenMap<K, V> {
63    pub fn new() -> Self {
64        Self::with_capacity(INITIAL_SLOTS)
65    }
66
67    pub fn with_capacity(cap: usize) -> Self {
68        let cap = cap.max(INITIAL_SLOTS).next_power_of_two();
69        Self {
70            slots: (0..cap).map(|_| Slot::Empty).collect(),
71            occupied: 0,
72            tombstones: 0,
73            fallback: BTreeMap::new(),
74            overflow_to_fallback: 0,
75        }
76    }
77
78    pub fn len(&self) -> usize {
79        self.occupied + self.fallback.len()
80    }
81
82    pub fn is_empty(&self) -> bool {
83        self.len() == 0
84    }
85
86    /// How many entries spilled into the BTreeMap fallback. Surfaces
87    /// adversarial-collision health for tests / monitoring. Not part
88    /// of the user-visible map contract.
89    pub fn overflow_count(&self) -> u64 {
90        self.overflow_to_fallback
91    }
92
93    /// Hash a key with the deterministic mixer.
94    fn hash_key(&self, key: &K) -> u64 {
95        // Stable per-implementation: rely on Hash + a `splitmix64` mix.
96        let mut h = std::hash::DefaultHasher::new();
97        // Note: we deliberately use a stable starting state by feeding
98        // a fixed prefix. `DefaultHasher` is `SipHash-1-3` with a
99        // *stable* seed within a single run, but Rust documents that
100        // its output may differ across versions. To pin determinism
101        // across versions we mix the result through `splitmix64` and
102        // also feed a fixed nonce.
103        use std::hash::Hasher;
104        h.write_u64(0xCBF29CE484222325);
105        key.hash(&mut h);
106        splitmix64(h.finish())
107    }
108
109    fn slot_index(&self, hash: u64, probe: usize) -> usize {
110        let n = self.slots.len();
111        // Linear probing with deterministic mix per probe step.
112        let step = splitmix64(hash.wrapping_add(probe as u64));
113        (step as usize) & (n - 1)
114    }
115
116    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
117        // If we'd exceed the load factor, resize first.
118        if (self.occupied + 1) * LOAD_DEN > self.slots.len() * LOAD_NUM {
119            self.resize();
120        }
121
122        let h = self.hash_key(&key);
123        let mut first_tombstone: Option<usize> = None;
124        for probe in 0..MAX_PROBE {
125            let idx = self.slot_index(h, probe);
126            match &self.slots[idx] {
127                Slot::Empty => {
128                    let slot_idx = first_tombstone.unwrap_or(idx);
129                    if matches!(self.slots[slot_idx], Slot::Tombstone) {
130                        self.tombstones -= 1;
131                    }
132                    self.slots[slot_idx] = Slot::Occupied(key, value);
133                    self.occupied += 1;
134                    return None;
135                }
136                Slot::Tombstone => {
137                    if first_tombstone.is_none() {
138                        first_tombstone = Some(idx);
139                    }
140                }
141                Slot::Occupied(k, _) if k == &key => {
142                    // Found an existing slot — update.
143                    if let Slot::Occupied(_, old_v) =
144                        std::mem::replace(&mut self.slots[idx], Slot::Empty)
145                    {
146                        self.slots[idx] = Slot::Occupied(key, value);
147                        return Some(old_v);
148                    }
149                    unreachable!()
150                }
151                Slot::Occupied(_, _) => continue,
152            }
153        }
154
155        // Probe budget exceeded — fall back to BTreeMap. Deterministic.
156        self.overflow_to_fallback += 1;
157        self.fallback.insert(key, value)
158    }
159
160    pub fn get(&self, key: &K) -> Option<&V> {
161        // Check fallback first — once a key has overflowed, it lives
162        // there even if the primary now has space.
163        if let Some(v) = self.fallback.get(key) {
164            return Some(v);
165        }
166        let h = self.hash_key(key);
167        for probe in 0..MAX_PROBE {
168            let idx = self.slot_index(h, probe);
169            match &self.slots[idx] {
170                Slot::Empty => return None,
171                Slot::Occupied(k, v) if k == key => return Some(v),
172                Slot::Occupied(_, _) | Slot::Tombstone => continue,
173            }
174        }
175        None
176    }
177
178    pub fn contains_key(&self, key: &K) -> bool {
179        self.get(key).is_some()
180    }
181
182    pub fn remove(&mut self, key: &K) -> Option<V> {
183        if let Some(v) = self.fallback.remove(key) {
184            return Some(v);
185        }
186        let h = self.hash_key(key);
187        for probe in 0..MAX_PROBE {
188            let idx = self.slot_index(h, probe);
189            match &self.slots[idx] {
190                Slot::Empty => return None,
191                Slot::Occupied(k, _) if k == key => {
192                    if let Slot::Occupied(_, v) =
193                        std::mem::replace(&mut self.slots[idx], Slot::Tombstone)
194                    {
195                        self.occupied -= 1;
196                        self.tombstones += 1;
197                        return Some(v);
198                    }
199                    unreachable!()
200                }
201                _ => continue,
202            }
203        }
204        None
205    }
206
207    fn resize(&mut self) {
208        let new_cap = (self.slots.len() * 2).max(INITIAL_SLOTS);
209        let old = std::mem::replace(
210            &mut self.slots,
211            (0..new_cap).map(|_| Slot::Empty).collect(),
212        );
213        self.occupied = 0;
214        self.tombstones = 0;
215        for slot in old {
216            if let Slot::Occupied(k, v) = slot {
217                // Reinsert via the normal insert path so probe sequence
218                // reuses the new table size. Because we just resized,
219                // load factor cannot trigger another resize here.
220                self.insert(k, v);
221            }
222        }
223    }
224
225    /// Iterate `(K, V)` pairs in **deterministic-but-undefined** order
226    /// (combination of slot order + fallback order). Callers MUST sort
227    /// before public output.
228    pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> + '_ {
229        let from_slots = self.slots.iter().filter_map(|s| match s {
230            Slot::Occupied(k, v) => Some((k, v)),
231            _ => None,
232        });
233        let from_fallback = self.fallback.iter();
234        from_slots.chain(from_fallback)
235    }
236
237    /// Sorted iteration. Use this for any path that surfaces iteration
238    /// order to public output.
239    pub fn iter_sorted(&self) -> Vec<(&K, &V)> {
240        let mut all: Vec<(&K, &V)> = self.iter().collect();
241        all.sort_by(|a, b| a.0.cmp(b.0));
242        all
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    #[test]
251    fn insert_get_basic() {
252        let mut m: DetOpenMap<i32, &str> = DetOpenMap::new();
253        m.insert(1, "a");
254        m.insert(2, "b");
255        m.insert(3, "c");
256        assert_eq!(m.get(&1), Some(&"a"));
257        assert_eq!(m.get(&2), Some(&"b"));
258        assert_eq!(m.get(&3), Some(&"c"));
259        assert_eq!(m.get(&4), None);
260    }
261
262    #[test]
263    fn insert_overwrites() {
264        let mut m: DetOpenMap<i32, &str> = DetOpenMap::new();
265        m.insert(1, "a");
266        let prev = m.insert(1, "b");
267        assert_eq!(prev, Some("a"));
268        assert_eq!(m.get(&1), Some(&"b"));
269        assert_eq!(m.len(), 1);
270    }
271
272    #[test]
273    fn remove_works_via_tombstone() {
274        let mut m: DetOpenMap<i32, &str> = DetOpenMap::new();
275        m.insert(1, "a");
276        m.insert(2, "b");
277        m.insert(3, "c");
278        assert_eq!(m.remove(&2), Some("b"));
279        assert_eq!(m.get(&2), None);
280        // Other entries still findable.
281        assert_eq!(m.get(&1), Some(&"a"));
282        assert_eq!(m.get(&3), Some(&"c"));
283    }
284
285    #[test]
286    fn resize_preserves_entries() {
287        let mut m: DetOpenMap<i32, i32> = DetOpenMap::new();
288        for i in 0..1000 {
289            m.insert(i, i * 2);
290        }
291        for i in 0..1000 {
292            assert_eq!(m.get(&i), Some(&(i * 2)));
293        }
294    }
295
296    #[test]
297    fn deterministic_double_run_iter_sorted() {
298        let mut a: DetOpenMap<i32, i32> = DetOpenMap::new();
299        let mut b: DetOpenMap<i32, i32> = DetOpenMap::new();
300        for i in [3, 1, 4, 1, 5, 9, 2, 6, 5, 3] {
301            a.insert(i, i);
302            b.insert(i, i);
303        }
304        let av = a.iter_sorted();
305        let bv = b.iter_sorted();
306        assert_eq!(av, bv);
307    }
308
309    #[test]
310    fn iter_sorted_is_canonical_regardless_of_insertion_order() {
311        let mut a: DetOpenMap<i32, i32> = DetOpenMap::new();
312        let mut b: DetOpenMap<i32, i32> = DetOpenMap::new();
313        for i in [1, 2, 3, 4, 5] {
314            a.insert(i, i);
315        }
316        for i in [5, 4, 3, 2, 1] {
317            b.insert(i, i);
318        }
319        let av: Vec<i32> = a.iter_sorted().into_iter().map(|(_, v)| *v).collect();
320        let bv: Vec<i32> = b.iter_sorted().into_iter().map(|(_, v)| *v).collect();
321        assert_eq!(av, bv);
322        assert_eq!(av, vec![1, 2, 3, 4, 5]);
323    }
324}