Skip to main content

cjc_runtime/
det_map.rs

1//! Deterministic map with insertion-order iteration.
2//!
3//! Provides [`DetMap`], an open-addressing hash map that uses MurmurHash3 with
4//! a fixed seed and Fibonacci hashing for slot selection. Iteration order
5//! follows insertion order, not hash order, making output deterministic across
6//! runs and platforms.
7//!
8//! # Determinism guarantees
9//!
10//! - MurmurHash3 with a compile-time fixed seed produces identical hashes on
11//!   every run.
12//! - Fibonacci hashing maps hash values to slots without random state.
13//! - Insertion-order iteration is maintained via an auxiliary `order` vector.
14//! - Growth (rehash) preserves insertion order.
15//!
16//! # When to use
17//!
18//! Use [`DetMap`] instead of `HashMap` when the CJC runtime needs a
19//! key-value store whose iteration order must be reproducible (e.g., for
20//! struct field serialization, JSON emission, or deterministic fold
21//! operations). For cases where only sorted-key order is needed, prefer
22//! `BTreeMap`.
23
24use crate::value::Value;
25
26// ---------------------------------------------------------------------------
27// 5. Deterministic Map (open addressing, MurmurHash3, insertion-order iteration)
28// ---------------------------------------------------------------------------
29
30/// Apply the MurmurHash3 64-bit finalizer to a hash value.
31///
32/// Mixes all bits of `h` so that small input changes produce large output
33/// changes. This is the standard MurmurHash3 finalization step.
34pub fn murmurhash3_finalize(mut h: u64) -> u64 {
35    h ^= h >> 33;
36    h = h.wrapping_mul(0xff51afd7ed558ccd);
37    h ^= h >> 33;
38    h = h.wrapping_mul(0xc4ceb9fe1a85ec53);
39    h ^= h >> 33;
40    h
41}
42
43/// Hash a byte slice using MurmurHash3 with a fixed compile-time seed.
44///
45/// Processes 8-byte chunks followed by a tail, then finalizes. The seed is
46/// `0x5f3759df` (the fast inverse-sqrt constant), chosen for its good
47/// avalanche properties. Produces identical results on all platforms.
48///
49/// # Arguments
50///
51/// * `data` - The byte slice to hash.
52///
53/// # Returns
54///
55/// A 64-bit hash value.
56pub fn murmurhash3(data: &[u8]) -> u64 {
57    const SEED: u64 = 0x5f3759df;
58    let mut h = SEED;
59    // Process 8-byte chunks
60    let chunks = data.len() / 8;
61    for i in 0..chunks {
62        let mut k = [0u8; 8];
63        k.copy_from_slice(&data[i * 8..(i + 1) * 8]);
64        let k = u64::from_le_bytes(k);
65        let k = k.wrapping_mul(0x87c37b91114253d5);
66        let k = k.rotate_left(31);
67        let k = k.wrapping_mul(0x4cf5ad432745937f);
68        h ^= k;
69        h = h.rotate_left(27);
70        h = h.wrapping_mul(5).wrapping_add(0x52dce729);
71    }
72    // Tail
73    let tail = &data[chunks * 8..];
74    let mut k: u64 = 0;
75    for (i, &b) in tail.iter().enumerate() {
76        k |= (b as u64) << (i * 8);
77    }
78    if !tail.is_empty() {
79        let k = k.wrapping_mul(0x87c37b91114253d5);
80        let k = k.rotate_left(31);
81        let k = k.wrapping_mul(0x4cf5ad432745937f);
82        h ^= k;
83    }
84    h ^= data.len() as u64;
85    murmurhash3_finalize(h)
86}
87
88/// Compute a deterministic hash for a CJC [`Value`].
89///
90/// Dispatches on the value variant, serializing to bytes and hashing via
91/// [`murmurhash3`]. Enum variants recursively hash their fields. Unsupported
92/// variants (e.g., closures, tensors) map to a fixed sentinel byte.
93///
94/// # Determinism
95///
96/// The same [`Value`] always produces the same hash, regardless of platform
97/// or run. Float hashing uses `to_bits()` so that `NaN` and `-0.0` hash
98/// consistently.
99pub fn value_hash(val: &Value) -> u64 {
100    match val {
101        Value::Int(n) => murmurhash3(&n.to_le_bytes()),
102        Value::Float(f) => murmurhash3(&f.to_bits().to_le_bytes()),
103        Value::Bool(b) => murmurhash3(&[*b as u8]),
104        Value::String(s) => murmurhash3(s.as_bytes()),
105        Value::Bytes(b) => murmurhash3(&b.borrow()),
106        Value::ByteSlice(b) => murmurhash3(b),
107        Value::StrView(b) => murmurhash3(b),
108        Value::U8(v) => murmurhash3(&[*v]),
109        Value::Bf16(v) => murmurhash3(&v.0.to_le_bytes()),
110        Value::Enum {
111            enum_name,
112            variant,
113            fields,
114        } => {
115            // Hash enum_name + variant + each field
116            let mut h = murmurhash3(enum_name.as_bytes());
117            h ^= murmurhash3(variant.as_bytes());
118            for f in fields {
119                h ^= value_hash(f);
120            }
121            h
122        }
123        Value::Void => murmurhash3(&[0xff]),
124        _ => murmurhash3(&[0xfe]),
125    }
126}
127
128/// A deterministic hash map with insertion-order iteration.
129/// Uses open addressing with Fibonacci hashing.
130#[derive(Debug, Clone)]
131pub struct DetMap {
132    /// Entries: (hash, key, value). None for empty slots.
133    entries: Vec<Option<(u64, Value, Value)>>,
134    /// Insertion-order indices into `entries`.
135    order: Vec<usize>,
136    len: usize,
137    capacity: usize,
138}
139
140const FIBONACCI_CONSTANT: u64 = 11400714819323198485; // 2^64 / phi
141
142impl DetMap {
143    /// Create a new empty [`DetMap`] with an initial capacity of 8 slots.
144    pub fn new() -> Self {
145        let capacity = 8;
146        DetMap {
147            entries: vec![None; capacity],
148            order: Vec::new(),
149            len: 0,
150            capacity,
151        }
152    }
153
154    /// Map a hash to a slot index using Fibonacci hashing.
155    fn slot_index(&self, hash: u64) -> usize {
156        let shift = 64 - (self.capacity.trailing_zeros() as u64);
157        (hash.wrapping_mul(FIBONACCI_CONSTANT) >> shift) as usize
158    }
159
160    /// Insert a key-value pair into the map.
161    ///
162    /// If the key already exists, its value is overwritten and its original
163    /// insertion position is preserved. Triggers a rehash (doubling capacity)
164    /// when the load factor exceeds 75%.
165    pub fn insert(&mut self, key: Value, value: Value) {
166        if self.len * 4 >= self.capacity * 3 {
167            self.grow();
168        }
169        let hash = value_hash(&key);
170        let mut slot = self.slot_index(hash);
171        loop {
172            match &self.entries[slot] {
173                None => {
174                    self.entries[slot] = Some((hash, key, value));
175                    self.order.push(slot);
176                    self.len += 1;
177                    return;
178                }
179                Some((h, k, _)) => {
180                    if *h == hash && values_equal_static(k, &key) {
181                        // Overwrite value, preserve insertion order
182                        self.entries[slot] = Some((hash, key, value));
183                        return;
184                    }
185                    slot = (slot + 1) % self.capacity;
186                }
187            }
188        }
189    }
190
191    /// Look up a value by key. Returns `None` if the key is not present.
192    pub fn get(&self, key: &Value) -> Option<&Value> {
193        let hash = value_hash(key);
194        let mut slot = self.slot_index(hash);
195        let start = slot;
196        loop {
197            match &self.entries[slot] {
198                None => return None,
199                Some((h, k, v)) => {
200                    if *h == hash && values_equal_static(k, key) {
201                        return Some(v);
202                    }
203                    slot = (slot + 1) % self.capacity;
204                    if slot == start {
205                        return None;
206                    }
207                }
208            }
209        }
210    }
211
212    /// Return `true` if the map contains the given key.
213    pub fn contains_key(&self, key: &Value) -> bool {
214        self.get(key).is_some()
215    }
216
217    /// Remove a key from the map, returning its value if it was present.
218    ///
219    /// Uses Robin Hood-style backward-shift deletion to maintain probe
220    /// chain correctness. The insertion-order vector is updated in place
221    /// so that iteration order of remaining entries is preserved.
222    pub fn remove(&mut self, key: &Value) -> Option<Value> {
223        let hash = value_hash(key);
224        let mut slot = self.slot_index(hash);
225        let start = slot;
226        loop {
227            match &self.entries[slot] {
228                None => return None,
229                Some((h, k, _)) => {
230                    if *h == hash && values_equal_static(k, key) {
231                        let (_, _, v) = self.entries[slot].take().unwrap();
232                        self.len -= 1;
233                        self.order.retain(|&s| s != slot);
234                        // Re-insert displaced entries (Robin Hood-style cleanup)
235                        // Preserve insertion-order by updating slot refs in-place
236                        let mut next = (slot + 1) % self.capacity;
237                        while self.entries[next].is_some() {
238                            let entry = self.entries[next].take().unwrap();
239                            let old_slot = next;
240                            let rehash = entry.0;
241                            let rkey = entry.1;
242                            let rval = entry.2;
243                            // Find new slot
244                            let mut new_slot = self.slot_index(rehash);
245                            while self.entries[new_slot].is_some() {
246                                new_slot = (new_slot + 1) % self.capacity;
247                            }
248                            self.entries[new_slot] = Some((rehash, rkey, rval));
249                            // Update order in-place: replace old_slot with new_slot
250                            // This preserves the original insertion position
251                            for s in &mut self.order {
252                                if *s == old_slot {
253                                    *s = new_slot;
254                                    break;
255                                }
256                            }
257                            next = (next + 1) % self.capacity;
258                        }
259                        return Some(v);
260                    }
261                    slot = (slot + 1) % self.capacity;
262                    if slot == start {
263                        return None;
264                    }
265                }
266            }
267        }
268    }
269
270    /// Iterate in insertion order.
271    pub fn iter(&self) -> impl Iterator<Item = (&Value, &Value)> {
272        self.order.iter().filter_map(move |&slot| {
273            self.entries[slot]
274                .as_ref()
275                .map(|(_, k, v)| (k, v))
276        })
277    }
278
279    /// Collect all keys in insertion order.
280    pub fn keys(&self) -> Vec<Value> {
281        self.iter().map(|(k, _)| k.clone()).collect()
282    }
283
284    /// Collect all values in insertion order.
285    pub fn values_vec(&self) -> Vec<Value> {
286        self.iter().map(|(_, v)| v.clone()).collect()
287    }
288
289    /// Return the number of key-value pairs in the map.
290    pub fn len(&self) -> usize {
291        self.len
292    }
293
294    /// Return `true` if the map contains no entries.
295    pub fn is_empty(&self) -> bool {
296        self.len == 0
297    }
298
299    /// Double the capacity and rehash all entries, preserving insertion order.
300    fn grow(&mut self) {
301        let new_capacity = self.capacity * 2;
302        let old_order = self.order.clone();
303        let old_entries: Vec<_> = old_order
304            .iter()
305            .filter_map(|&slot| self.entries[slot].clone())
306            .collect();
307
308        self.entries = vec![None; new_capacity];
309        self.order = Vec::new();
310        self.len = 0;
311        self.capacity = new_capacity;
312
313        for (hash, key, value) in old_entries {
314            let _ = hash; // Re-hash with new capacity
315            self.insert(key, value);
316        }
317    }
318}
319
320impl Default for DetMap {
321    fn default() -> Self {
322        Self::new()
323    }
324}
325
326/// Test two [`Value`]s for structural equality without interpreter context.
327///
328/// Float comparison uses `to_bits()` so that `NaN == NaN` is `false` and
329/// `+0.0 != -0.0`. Enum variants compare name, variant tag, and fields
330/// recursively. Unsupported variant pairs always return `false`.
331pub fn values_equal_static(a: &Value, b: &Value) -> bool {
332    match (a, b) {
333        (Value::Int(a), Value::Int(b)) => a == b,
334        (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
335        (Value::Bool(a), Value::Bool(b)) => a == b,
336        (Value::String(a), Value::String(b)) => a == b,
337        (Value::Bytes(a), Value::Bytes(b)) => *a.borrow() == *b.borrow(),
338        (Value::ByteSlice(a), Value::ByteSlice(b)) => **a == **b,
339        (Value::StrView(a), Value::StrView(b)) => **a == **b,
340        (Value::U8(a), Value::U8(b)) => a == b,
341        (Value::Bf16(a), Value::Bf16(b)) => a.0 == b.0,
342        (
343            Value::Enum {
344                enum_name: en1,
345                variant: v1,
346                fields: f1,
347            },
348            Value::Enum {
349                enum_name: en2,
350                variant: v2,
351                fields: f2,
352            },
353        ) => {
354            en1 == en2
355                && v1 == v2
356                && f1.len() == f2.len()
357                && f1
358                    .iter()
359                    .zip(f2.iter())
360                    .all(|(a, b)| values_equal_static(a, b))
361        }
362        (Value::Void, Value::Void) => true,
363        _ => false,
364    }
365}
366