ragc_common/
hash.rs

1// Hash functions for AGC
2// Rust equivalent of MurMurHash implementations in src/common/utils.h
3
4/// MurMurHash3 for 64-bit values
5/// Direct equivalent of MurMur64Hash in C++
6#[derive(Debug, Clone, Copy)]
7pub struct MurMur64Hash;
8
9impl MurMur64Hash {
10    #[inline]
11    pub fn hash(mut h: u64) -> u64 {
12        h ^= h >> 33;
13        h = h.wrapping_mul(0xff51afd7ed558ccd_u64);
14        h ^= h >> 33;
15        h = h.wrapping_mul(0xc4ceb9fe1a85ec53_u64);
16        h ^= h >> 33;
17        h
18    }
19}
20
21impl std::hash::Hasher for MurMur64Hash {
22    fn finish(&self) -> u64 {
23        0 // Not used in this context
24    }
25
26    fn write(&mut self, _bytes: &[u8]) {
27        // Not implemented for this use case
28    }
29}
30
31/// MurMurHash3 for pairs of 64-bit values
32/// Direct equivalent of MurMurPair64Hash in C++
33#[derive(Debug, Clone, Copy)]
34pub struct MurMurPair64Hash;
35
36impl MurMurPair64Hash {
37    #[inline]
38    pub fn hash(first: u64, second: u64) -> u64 {
39        let mut h = first;
40
41        h ^= h >> 33;
42        h = h.wrapping_mul(0xff51afd7ed558ccd_u64);
43        h ^= h >> 33;
44        h = h.wrapping_mul(0xc4ceb9fe1a85ec53_u64);
45        h ^= h >> 33;
46
47        h ^= second;
48
49        h ^= h >> 33;
50        h = h.wrapping_mul(0xff51afd7ed558ccd_u64);
51        h ^= h >> 33;
52        h = h.wrapping_mul(0xc4ceb9fe1a85ec53_u64);
53        h ^= h >> 33;
54
55        h
56    }
57}
58
59/// MurMurHash3 for strings
60/// Direct equivalent of MurMurStringsHash in C++
61#[derive(Debug, Clone, Copy)]
62pub struct MurMurStringsHash;
63
64impl MurMurStringsHash {
65    const C1: u64 = 0x87c37b91114253d5_u64;
66    const C2: u64 = 0x4cf5ad432745937f_u64;
67
68    #[inline]
69    fn rotl64(x: u64, r: i8) -> u64 {
70        x.rotate_left(r as u32)
71    }
72
73    #[inline]
74    fn fmix64(mut k: u64) -> u64 {
75        k ^= k >> 33;
76        k = k.wrapping_mul(0xff51afd7ed558ccd_u64);
77        k ^= k >> 33;
78        k = k.wrapping_mul(0xc4ceb9fe1a85ec53_u64);
79        k ^= k >> 33;
80        k
81    }
82
83    #[inline]
84    fn load64(data: &[u8], offset: usize) -> u64 {
85        let mut x = 0u64;
86        for i in 0..8 {
87            if offset + i < data.len() {
88                x = (x << 8) | (data[offset + i] as u64);
89            }
90        }
91        x
92    }
93
94    pub fn hash(s: &str) -> u64 {
95        let data = s.as_bytes();
96        let mut h1 = 0u64;
97        let mut h2 = 0u64;
98
99        let n_blocks = data.len() / 16;
100
101        // Process 16-byte blocks
102        for i in 0..n_blocks {
103            let offset = i * 16;
104            let mut k1 = Self::load64(data, offset);
105            let mut k2 = Self::load64(data, offset + 8);
106
107            k1 = k1.wrapping_mul(Self::C1);
108            k1 = Self::rotl64(k1, 31);
109            k1 = k1.wrapping_mul(Self::C2);
110            h1 ^= k1;
111
112            h1 = Self::rotl64(h1, 27);
113            h1 = h1.wrapping_add(h2);
114            h1 = h1.wrapping_mul(5).wrapping_add(0x52dce729);
115
116            k2 = k2.wrapping_mul(Self::C2);
117            k2 = Self::rotl64(k2, 33);
118            k2 = k2.wrapping_mul(Self::C1);
119            h2 ^= k2;
120
121            h2 = Self::rotl64(h2, 31);
122            h2 = h2.wrapping_add(h1);
123            h2 = h2.wrapping_mul(5).wrapping_add(0x38495ab5);
124        }
125
126        // Process remaining bytes
127        let tail = data.len() % 16;
128        let tail_offset = n_blocks * 16;
129
130        let mut k1 = 0u64;
131        let mut k2 = 0u64;
132
133        if tail >= 15 {
134            k2 ^= (data[tail_offset + 14] as u64) << 48;
135        }
136        if tail >= 14 {
137            k2 ^= (data[tail_offset + 13] as u64) << 40;
138        }
139        if tail >= 13 {
140            k2 ^= (data[tail_offset + 12] as u64) << 32;
141        }
142        if tail >= 12 {
143            k2 ^= (data[tail_offset + 11] as u64) << 24;
144        }
145        if tail >= 11 {
146            k2 ^= (data[tail_offset + 10] as u64) << 16;
147        }
148        if tail >= 10 {
149            k2 ^= (data[tail_offset + 9] as u64) << 8;
150        }
151        if tail >= 9 {
152            k2 ^= data[tail_offset + 8] as u64;
153            k2 = k2.wrapping_mul(Self::C2);
154            k2 = Self::rotl64(k2, 33);
155            k2 = k2.wrapping_mul(Self::C1);
156            h2 ^= k2;
157        }
158        if tail >= 8 {
159            k1 ^= (data[tail_offset + 7] as u64) << 56;
160        }
161        if tail >= 7 {
162            k1 ^= (data[tail_offset + 6] as u64) << 48;
163        }
164        if tail >= 6 {
165            k1 ^= (data[tail_offset + 5] as u64) << 40;
166        }
167        if tail >= 5 {
168            k1 ^= (data[tail_offset + 4] as u64) << 32;
169        }
170        if tail >= 4 {
171            k1 ^= (data[tail_offset + 3] as u64) << 24;
172        }
173        if tail >= 3 {
174            k1 ^= (data[tail_offset + 2] as u64) << 16;
175        }
176        if tail >= 2 {
177            k1 ^= (data[tail_offset + 1] as u64) << 8;
178        }
179        if tail >= 1 {
180            k1 ^= data[tail_offset] as u64;
181            k1 = k1.wrapping_mul(Self::C1);
182            k1 = Self::rotl64(k1, 31);
183            k1 = k1.wrapping_mul(Self::C2);
184            h1 ^= k1;
185        }
186
187        h1 ^= data.len() as u64;
188        h2 ^= data.len() as u64;
189
190        h1 = h1.wrapping_add(h2);
191        h2 = h2.wrapping_add(h1);
192
193        h1 = Self::fmix64(h1);
194        h2 = Self::fmix64(h2);
195
196        h1 = h1.wrapping_add(h2);
197        h2 = h2.wrapping_add(h1);
198
199        h1 ^ h2
200    }
201}
202
203/// MurMurHash32 for 32-bit values
204#[derive(Debug, Clone, Copy)]
205pub struct MurMur32Hash;
206
207impl MurMur32Hash {
208    #[inline]
209    pub fn hash(mut h: u32) -> u32 {
210        h ^= h >> 16;
211        h = h.wrapping_mul(0x85ebca6b);
212        h ^= h >> 13;
213        h = h.wrapping_mul(0xc2b2ae35);
214        h ^= h >> 16;
215        h
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_murmurhash64() {
225        // Test basic properties (actual values will be validated against C++)
226        assert_eq!(MurMur64Hash::hash(0), 0);
227        assert_ne!(MurMur64Hash::hash(1), 0);
228        assert_ne!(MurMur64Hash::hash(42), 0);
229    }
230
231    #[test]
232    fn test_murmurhash_pair64() {
233        // Test basic properties
234        let h1 = MurMurPair64Hash::hash(0, 0);
235        assert_eq!(h1, 0);
236
237        let h2 = MurMurPair64Hash::hash(1, 2);
238        assert_ne!(h2, 0);
239    }
240
241    #[test]
242    fn test_murmurhash_strings() {
243        // Test basic properties
244        let h1 = MurMurStringsHash::hash("");
245        assert_eq!(h1, 0);
246
247        let h2 = MurMurStringsHash::hash("hello");
248        assert_ne!(h2, 0);
249
250        let h3 = MurMurStringsHash::hash("world");
251        assert_ne!(h3, 0);
252        assert_ne!(h2, h3);
253    }
254
255    #[test]
256    fn test_murmurhash32() {
257        // Test basic properties
258        assert_eq!(MurMur32Hash::hash(0), 0);
259        assert_ne!(MurMur32Hash::hash(1), 0);
260        assert_ne!(MurMur32Hash::hash(42), 0);
261    }
262}