Skip to main content

cljrs_value/
hash.rs

1/// Clojure-compatible hashing.
2///
3/// Clojure uses a specific hash algorithm (based on Murmur3) so that:
4/// - `(hash 1)` == `(hash 1N)` (Long and BigInt with the same value)
5/// - `(hash 1.0)` == `(hash 1)` when the double is a whole number
6/// - String hashing matches the JVM `String.hashCode()` algorithm
7///
8/// We implement a simplified but compatible version here.  Phase 5 can
9/// refine this to byte-exact JVM compatibility if needed.
10pub trait ClojureHash {
11    fn clojure_hash(&self) -> u32;
12}
13
14/// Murmur3 finalizer mix (matches Clojure's `Util.hashCombine`).
15pub fn murmur3_mix(mut h: u32) -> u32 {
16    h ^= h >> 16;
17    h = h.wrapping_mul(0x85eb_ca6b);
18    h ^= h >> 13;
19    h = h.wrapping_mul(0xc2b2_ae35);
20    h ^= h >> 16;
21    h
22}
23
24/// Hash a single i64 the way Clojure does (mix the two 32-bit halves).
25pub fn hash_i64(n: i64) -> u32 {
26    let lo = n as u32;
27    let hi = (n >> 32) as u32;
28    murmur3_mix(lo ^ hi)
29}
30
31pub fn hash_u128(n: u128) -> u32 {
32    let a = n as u32;
33    let b = ((n >> 32) & 0xFFFFFFFF) as u32;
34    let c = ((n >> 64) & 0xFFFFFFFF) as u32;
35    let d = ((n >> 96) & 0xFFFFFFFF) as u32;
36    murmur3_mix(a ^ b ^ c ^ d)
37}
38
39/// Hash a string using the JVM `String.hashCode` algorithm (UTF-16 codepoints).
40/// This matches `(hash "foo")` in Clojure.
41pub fn hash_string(s: &str) -> u32 {
42    let mut h: i32 = 0;
43    for ch in s.chars() {
44        // Encode as UTF-16 (each char is one or two u16 code units).
45        let mut buf = [0u16; 2];
46        for unit in ch.encode_utf16(&mut buf) {
47            h = h.wrapping_mul(31).wrapping_add(*unit as i32);
48        }
49    }
50    murmur3_mix(h as u32)
51}
52
53/// Combine two hashes (order-independent — used for map/set).
54pub fn hash_combine_unordered(a: u32, b: u32) -> u32 {
55    a ^ b
56}
57
58/// Combine two hashes preserving order (used for list/vector).
59pub fn hash_combine_ordered(acc: u32, h: u32) -> u32 {
60    murmur3_mix(acc.wrapping_mul(31).wrapping_add(h))
61}
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66
67    #[test]
68    fn test_hash_i64_zero() {
69        // Just ensure it doesn't panic and produces a deterministic value.
70        let h1 = hash_i64(0);
71        let h2 = hash_i64(0);
72        assert_eq!(h1, h2);
73    }
74
75    #[test]
76    fn test_hash_string_deterministic() {
77        let h1 = hash_string("hello");
78        let h2 = hash_string("hello");
79        assert_eq!(h1, h2);
80        // Different strings have different hashes (with very high probability).
81        assert_ne!(hash_string("hello"), hash_string("world"));
82    }
83
84    #[test]
85    fn test_murmur3_avalanche() {
86        // Changing one bit should avalanche.
87        let h1 = murmur3_mix(0);
88        let h2 = murmur3_mix(1);
89        assert_ne!(h1, h2);
90    }
91}