cjc_runtime/det_map.rs
1//! Deterministic map with insertion-order iteration.
2//!
3//! Provides [`DetMap`], an open-addressing hash map that uses MurmurHash3 with
4//! a fixed seed and Fibonacci hashing for slot selection. Iteration order
5//! follows insertion order, not hash order, making output deterministic across
6//! runs and platforms.
7//!
8//! # Determinism guarantees
9//!
10//! - MurmurHash3 with a compile-time fixed seed produces identical hashes on
11//! every run.
12//! - Fibonacci hashing maps hash values to slots without random state.
13//! - Insertion-order iteration is maintained via an auxiliary `order` vector.
14//! - Growth (rehash) preserves insertion order.
15//!
16//! # When to use
17//!
18//! Use [`DetMap`] instead of `HashMap` when the CJC runtime needs a
19//! key-value store whose iteration order must be reproducible (e.g., for
20//! struct field serialization, JSON emission, or deterministic fold
21//! operations). For cases where only sorted-key order is needed, prefer
22//! `BTreeMap`.
23
24use crate::value::Value;
25
26// ---------------------------------------------------------------------------
27// 5. Deterministic Map (open addressing, MurmurHash3, insertion-order iteration)
28// ---------------------------------------------------------------------------
29
30/// Apply the MurmurHash3 64-bit finalizer to a hash value.
31///
32/// Mixes all bits of `h` so that small input changes produce large output
33/// changes. This is the standard MurmurHash3 finalization step.
34pub fn murmurhash3_finalize(mut h: u64) -> u64 {
35 h ^= h >> 33;
36 h = h.wrapping_mul(0xff51afd7ed558ccd);
37 h ^= h >> 33;
38 h = h.wrapping_mul(0xc4ceb9fe1a85ec53);
39 h ^= h >> 33;
40 h
41}
42
43/// Hash a byte slice using MurmurHash3 with a fixed compile-time seed.
44///
45/// Processes 8-byte chunks followed by a tail, then finalizes. The seed is
46/// `0x5f3759df` (the fast inverse-sqrt constant), chosen for its good
47/// avalanche properties. Produces identical results on all platforms.
48///
49/// # Arguments
50///
51/// * `data` - The byte slice to hash.
52///
53/// # Returns
54///
55/// A 64-bit hash value.
56pub fn murmurhash3(data: &[u8]) -> u64 {
57 const SEED: u64 = 0x5f3759df;
58 let mut h = SEED;
59 // Process 8-byte chunks
60 let chunks = data.len() / 8;
61 for i in 0..chunks {
62 let mut k = [0u8; 8];
63 k.copy_from_slice(&data[i * 8..(i + 1) * 8]);
64 let k = u64::from_le_bytes(k);
65 let k = k.wrapping_mul(0x87c37b91114253d5);
66 let k = k.rotate_left(31);
67 let k = k.wrapping_mul(0x4cf5ad432745937f);
68 h ^= k;
69 h = h.rotate_left(27);
70 h = h.wrapping_mul(5).wrapping_add(0x52dce729);
71 }
72 // Tail
73 let tail = &data[chunks * 8..];
74 let mut k: u64 = 0;
75 for (i, &b) in tail.iter().enumerate() {
76 k |= (b as u64) << (i * 8);
77 }
78 if !tail.is_empty() {
79 let k = k.wrapping_mul(0x87c37b91114253d5);
80 let k = k.rotate_left(31);
81 let k = k.wrapping_mul(0x4cf5ad432745937f);
82 h ^= k;
83 }
84 h ^= data.len() as u64;
85 murmurhash3_finalize(h)
86}
87
88/// Compute a deterministic hash for a CJC [`Value`].
89///
90/// Dispatches on the value variant, serializing to bytes and hashing via
91/// [`murmurhash3`]. Enum variants recursively hash their fields. Unsupported
92/// variants (e.g., closures, tensors) map to a fixed sentinel byte.
93///
94/// # Determinism
95///
96/// The same [`Value`] always produces the same hash, regardless of platform
97/// or run. Float hashing uses `to_bits()` so that `NaN` and `-0.0` hash
98/// consistently.
99pub fn value_hash(val: &Value) -> u64 {
100 match val {
101 Value::Int(n) => murmurhash3(&n.to_le_bytes()),
102 Value::Float(f) => murmurhash3(&f.to_bits().to_le_bytes()),
103 Value::Bool(b) => murmurhash3(&[*b as u8]),
104 Value::String(s) => murmurhash3(s.as_bytes()),
105 Value::Bytes(b) => murmurhash3(&b.borrow()),
106 Value::ByteSlice(b) => murmurhash3(b),
107 Value::StrView(b) => murmurhash3(b),
108 Value::U8(v) => murmurhash3(&[*v]),
109 Value::Bf16(v) => murmurhash3(&v.0.to_le_bytes()),
110 Value::Enum {
111 enum_name,
112 variant,
113 fields,
114 } => {
115 // Hash enum_name + variant + each field
116 let mut h = murmurhash3(enum_name.as_bytes());
117 h ^= murmurhash3(variant.as_bytes());
118 for f in fields {
119 h ^= value_hash(f);
120 }
121 h
122 }
123 Value::Void => murmurhash3(&[0xff]),
124 _ => murmurhash3(&[0xfe]),
125 }
126}
127
128/// A deterministic hash map with insertion-order iteration.
129/// Uses open addressing with Fibonacci hashing.
130#[derive(Debug, Clone)]
131pub struct DetMap {
132 /// Entries: (hash, key, value). None for empty slots.
133 entries: Vec<Option<(u64, Value, Value)>>,
134 /// Insertion-order indices into `entries`.
135 order: Vec<usize>,
136 len: usize,
137 capacity: usize,
138}
139
140const FIBONACCI_CONSTANT: u64 = 11400714819323198485; // 2^64 / phi
141
142impl DetMap {
143 /// Create a new empty [`DetMap`] with an initial capacity of 8 slots.
144 pub fn new() -> Self {
145 let capacity = 8;
146 DetMap {
147 entries: vec![None; capacity],
148 order: Vec::new(),
149 len: 0,
150 capacity,
151 }
152 }
153
154 /// Map a hash to a slot index using Fibonacci hashing.
155 fn slot_index(&self, hash: u64) -> usize {
156 let shift = 64 - (self.capacity.trailing_zeros() as u64);
157 (hash.wrapping_mul(FIBONACCI_CONSTANT) >> shift) as usize
158 }
159
160 /// Insert a key-value pair into the map.
161 ///
162 /// If the key already exists, its value is overwritten and its original
163 /// insertion position is preserved. Triggers a rehash (doubling capacity)
164 /// when the load factor exceeds 75%.
165 pub fn insert(&mut self, key: Value, value: Value) {
166 if self.len * 4 >= self.capacity * 3 {
167 self.grow();
168 }
169 let hash = value_hash(&key);
170 let mut slot = self.slot_index(hash);
171 loop {
172 match &self.entries[slot] {
173 None => {
174 self.entries[slot] = Some((hash, key, value));
175 self.order.push(slot);
176 self.len += 1;
177 return;
178 }
179 Some((h, k, _)) => {
180 if *h == hash && values_equal_static(k, &key) {
181 // Overwrite value, preserve insertion order
182 self.entries[slot] = Some((hash, key, value));
183 return;
184 }
185 slot = (slot + 1) % self.capacity;
186 }
187 }
188 }
189 }
190
191 /// Look up a value by key. Returns `None` if the key is not present.
192 pub fn get(&self, key: &Value) -> Option<&Value> {
193 let hash = value_hash(key);
194 let mut slot = self.slot_index(hash);
195 let start = slot;
196 loop {
197 match &self.entries[slot] {
198 None => return None,
199 Some((h, k, v)) => {
200 if *h == hash && values_equal_static(k, key) {
201 return Some(v);
202 }
203 slot = (slot + 1) % self.capacity;
204 if slot == start {
205 return None;
206 }
207 }
208 }
209 }
210 }
211
212 /// Return `true` if the map contains the given key.
213 pub fn contains_key(&self, key: &Value) -> bool {
214 self.get(key).is_some()
215 }
216
217 /// Remove a key from the map, returning its value if it was present.
218 ///
219 /// Uses Robin Hood-style backward-shift deletion to maintain probe
220 /// chain correctness. The insertion-order vector is updated in place
221 /// so that iteration order of remaining entries is preserved.
222 pub fn remove(&mut self, key: &Value) -> Option<Value> {
223 let hash = value_hash(key);
224 let mut slot = self.slot_index(hash);
225 let start = slot;
226 loop {
227 match &self.entries[slot] {
228 None => return None,
229 Some((h, k, _)) => {
230 if *h == hash && values_equal_static(k, key) {
231 let (_, _, v) = self.entries[slot].take().unwrap();
232 self.len -= 1;
233 self.order.retain(|&s| s != slot);
234 // Re-insert displaced entries (Robin Hood-style cleanup)
235 // Preserve insertion-order by updating slot refs in-place
236 let mut next = (slot + 1) % self.capacity;
237 while self.entries[next].is_some() {
238 let entry = self.entries[next].take().unwrap();
239 let old_slot = next;
240 let rehash = entry.0;
241 let rkey = entry.1;
242 let rval = entry.2;
243 // Find new slot
244 let mut new_slot = self.slot_index(rehash);
245 while self.entries[new_slot].is_some() {
246 new_slot = (new_slot + 1) % self.capacity;
247 }
248 self.entries[new_slot] = Some((rehash, rkey, rval));
249 // Update order in-place: replace old_slot with new_slot
250 // This preserves the original insertion position
251 for s in &mut self.order {
252 if *s == old_slot {
253 *s = new_slot;
254 break;
255 }
256 }
257 next = (next + 1) % self.capacity;
258 }
259 return Some(v);
260 }
261 slot = (slot + 1) % self.capacity;
262 if slot == start {
263 return None;
264 }
265 }
266 }
267 }
268 }
269
270 /// Iterate in insertion order.
271 pub fn iter(&self) -> impl Iterator<Item = (&Value, &Value)> {
272 self.order.iter().filter_map(move |&slot| {
273 self.entries[slot]
274 .as_ref()
275 .map(|(_, k, v)| (k, v))
276 })
277 }
278
279 /// Collect all keys in insertion order.
280 pub fn keys(&self) -> Vec<Value> {
281 self.iter().map(|(k, _)| k.clone()).collect()
282 }
283
284 /// Collect all values in insertion order.
285 pub fn values_vec(&self) -> Vec<Value> {
286 self.iter().map(|(_, v)| v.clone()).collect()
287 }
288
289 /// Return the number of key-value pairs in the map.
290 pub fn len(&self) -> usize {
291 self.len
292 }
293
294 /// Return `true` if the map contains no entries.
295 pub fn is_empty(&self) -> bool {
296 self.len == 0
297 }
298
299 /// Double the capacity and rehash all entries, preserving insertion order.
300 fn grow(&mut self) {
301 let new_capacity = self.capacity * 2;
302 let old_order = self.order.clone();
303 let old_entries: Vec<_> = old_order
304 .iter()
305 .filter_map(|&slot| self.entries[slot].clone())
306 .collect();
307
308 self.entries = vec![None; new_capacity];
309 self.order = Vec::new();
310 self.len = 0;
311 self.capacity = new_capacity;
312
313 for (hash, key, value) in old_entries {
314 let _ = hash; // Re-hash with new capacity
315 self.insert(key, value);
316 }
317 }
318}
319
320impl Default for DetMap {
321 fn default() -> Self {
322 Self::new()
323 }
324}
325
326/// Test two [`Value`]s for structural equality without interpreter context.
327///
328/// Float comparison uses `to_bits()` so that `NaN == NaN` is `false` and
329/// `+0.0 != -0.0`. Enum variants compare name, variant tag, and fields
330/// recursively. Unsupported variant pairs always return `false`.
331pub fn values_equal_static(a: &Value, b: &Value) -> bool {
332 match (a, b) {
333 (Value::Int(a), Value::Int(b)) => a == b,
334 (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
335 (Value::Bool(a), Value::Bool(b)) => a == b,
336 (Value::String(a), Value::String(b)) => a == b,
337 (Value::Bytes(a), Value::Bytes(b)) => *a.borrow() == *b.borrow(),
338 (Value::ByteSlice(a), Value::ByteSlice(b)) => **a == **b,
339 (Value::StrView(a), Value::StrView(b)) => **a == **b,
340 (Value::U8(a), Value::U8(b)) => a == b,
341 (Value::Bf16(a), Value::Bf16(b)) => a.0 == b.0,
342 (
343 Value::Enum {
344 enum_name: en1,
345 variant: v1,
346 fields: f1,
347 },
348 Value::Enum {
349 enum_name: en2,
350 variant: v2,
351 fields: f2,
352 },
353 ) => {
354 en1 == en2
355 && v1 == v2
356 && f1.len() == f2.len()
357 && f1
358 .iter()
359 .zip(f2.iter())
360 .all(|(a, b)| values_equal_static(a, b))
361 }
362 (Value::Void, Value::Void) => true,
363 _ => false,
364 }
365}
366