scylla_rs/cql/murmur3/
mod.rs

1// Copyright 2021 IOTA Stiftung
2// SPDX-License-Identifier: Apache-2.0
3
4//! This crates implements the murmur3, which is used to calculate the partition key in scyllaDB.
5use std::convert::TryInto;
6
7fn copy_into_array<A, T>(slice: &[T]) -> A
8where
9    A: Default + AsMut<[T]>,
10    T: Copy,
11{
12    let mut a = A::default();
13    <A as AsMut<[T]>>::as_mut(&mut a).copy_from_slice(slice);
14    a
15}
16
17/// Modified from https://github.com/stusmall/murmur3
18///
19/// Use the x64 variant of the 128 bit murmur3 to hash some [Read] implementation.
20///
21/// # Example
22/// ```
23/// use scylla_rs::cql::murmur3_cassandra_x64_128;
24/// let hash_pair = murmur3_cassandra_x64_128(
25///     "EHUHSJRCMDJSZUQMNLDBSRFC9O9XCI9SMHFWWHNDYOOOWMSOJQHCC9GFUEGECEVVXCSXYTHSRJ9TZ9999".as_bytes(),
26///     0,
27/// );
28/// ```
29#[allow(unused)]
30pub fn murmur3_cassandra_x64_128(source: &[u8], seed: u32) -> (i64, i64) {
31    const C1: i64 = -8_663_945_395_140_668_459_i64; // 0x87c3_7b91_1142_53d5;
32    const C2: i64 = 0x4cf5_ad43_2745_937f;
33    const C3: i64 = 0x52dc_e729;
34    const C4: i64 = 0x3849_5ab5;
35    const R1: u32 = 27;
36    const R2: u32 = 31;
37    const R3: u32 = 33;
38    const M: i64 = 5;
39    let mut h1: i64 = seed as i64;
40    let mut h2: i64 = seed as i64;
41    let mut chunks_iter = source.chunks_exact(16);
42    let rem = chunks_iter.remainder();
43    for chunk in chunks_iter {
44        let k1 = i64::from_le_bytes((&chunk[..8]).try_into().unwrap());
45        let k2 = i64::from_le_bytes((&chunk[8..]).try_into().unwrap());
46        h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2);
47        h1 = h1.rotate_left(R1).wrapping_add(h2).wrapping_mul(M).wrapping_add(C3);
48        h2 ^= k2.wrapping_mul(C2).rotate_left(R3).wrapping_mul(C1);
49        h2 = h2.rotate_left(R2).wrapping_add(h1).wrapping_mul(M).wrapping_add(C4);
50    }
51    let read = rem.len();
52    if read > 0 {
53        let mut k1 = 0;
54        let mut k2 = 0;
55        if read >= 15 {
56            k2 ^= (rem[14] as i64) << 48;
57        }
58        if read >= 14 {
59            k2 ^= (rem[13] as i64) << 40;
60        }
61        if read >= 13 {
62            k2 ^= (rem[12] as i64) << 32;
63        }
64        if read >= 12 {
65            k2 ^= (rem[11] as i64) << 24;
66        }
67        if read >= 11 {
68            k2 ^= (rem[10] as i64) << 16;
69        }
70        if read >= 10 {
71            k2 ^= (rem[9] as i64) << 8;
72        }
73        if read >= 9 {
74            k2 ^= rem[8] as i64;
75            k2 = k2.wrapping_mul(C2).rotate_left(33).wrapping_mul(C1);
76            h2 ^= k2;
77        }
78        if read >= 8 {
79            k1 ^= (rem[7] as i64) << 56;
80        }
81        if read >= 7 {
82            k1 ^= (rem[6] as i64) << 48;
83        }
84        if read >= 6 {
85            k1 ^= (rem[5] as i64) << 40;
86        }
87        if read >= 5 {
88            k1 ^= (rem[4] as i64) << 32;
89        }
90        if read >= 4 {
91            k1 ^= (rem[3] as i64) << 24;
92        }
93        if read >= 3 {
94            k1 ^= (rem[2] as i64) << 16;
95        }
96        if read >= 2 {
97            k1 ^= (rem[1] as i64) << 8;
98        }
99        if read >= 1 {
100            k1 ^= rem[0] as i64;
101        }
102        k1 = k1.wrapping_mul(C1).rotate_left(31).wrapping_mul(C2);
103        h1 ^= k1;
104    }
105
106    h1 ^= source.len() as i64;
107    h2 ^= source.len() as i64;
108    h1 = h1.wrapping_add(h2);
109    h2 = h2.wrapping_add(h1);
110    h1 = fmix64_i64(h1);
111    h2 = fmix64_i64(h2);
112    h1 = h1.wrapping_add(h2);
113    h2 = h2.wrapping_add(h1);
114    (h1, h2)
115}
116
117#[allow(unused)]
118pub fn old_modified_murmur3_cassandra_x64_128(source: &[u8], seed: u32) -> anyhow::Result<(i64, i64)> {
119    const C1: i64 = -8_663_945_395_140_668_459_i64; // 0x87c3_7b91_1142_53d5;
120    const C2: i64 = 0x4cf5_ad43_2745_937f;
121    const C3: i64 = 0x52dc_e729;
122    const C4: i64 = 0x3849_5ab5;
123    const R1: u32 = 27;
124    const R2: u32 = 31;
125    const R3: u32 = 33;
126    const M: i64 = 5;
127    let mut h1: i64 = seed as i64;
128    let mut h2: i64 = seed as i64;
129    let mut chunks_iter = source.chunks_exact(16);
130    let rem = chunks_iter.remainder();
131    for chunk in chunks_iter {
132        let k1 = i64::from_le_bytes(copy_into_array(&chunk[..8]));
133        let k2 = i64::from_le_bytes(copy_into_array(&chunk[8..]));
134        h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2);
135        h1 = h1.rotate_left(R1).wrapping_add(h2).wrapping_mul(M).wrapping_add(C3);
136        h2 ^= k2.wrapping_mul(C2).rotate_left(R3).wrapping_mul(C1);
137        h2 = h2.rotate_left(R2).wrapping_add(h1).wrapping_mul(M).wrapping_add(C4);
138    }
139    let read = rem.len();
140    if read > 0 {
141        let mut k1 = 0;
142        let mut k2 = 0;
143        if read >= 15 {
144            k2 ^= (rem[14] as i64) << 48;
145        }
146        if read >= 14 {
147            k2 ^= (rem[13] as i64) << 40;
148        }
149        if read >= 13 {
150            k2 ^= (rem[12] as i64) << 32;
151        }
152        if read >= 12 {
153            k2 ^= (rem[11] as i64) << 24;
154        }
155        if read >= 11 {
156            k2 ^= (rem[10] as i64) << 16;
157        }
158        if read >= 10 {
159            k2 ^= (rem[9] as i64) << 8;
160        }
161        if read >= 9 {
162            k2 ^= rem[8] as i64;
163            k2 = k2.wrapping_mul(C2).rotate_left(33).wrapping_mul(C1);
164            h2 ^= k2;
165        }
166        if read >= 8 {
167            k1 ^= (rem[7] as i64) << 56;
168        }
169        if read >= 7 {
170            k1 ^= (rem[6] as i64) << 48;
171        }
172        if read >= 6 {
173            k1 ^= (rem[5] as i64) << 40;
174        }
175        if read >= 5 {
176            k1 ^= (rem[4] as i64) << 32;
177        }
178        if read >= 4 {
179            k1 ^= (rem[3] as i64) << 24;
180        }
181        if read >= 3 {
182            k1 ^= (rem[2] as i64) << 16;
183        }
184        if read >= 2 {
185            k1 ^= (rem[1] as i64) << 8;
186        }
187        if read >= 1 {
188            k1 ^= rem[0] as i64;
189        }
190        k1 = k1.wrapping_mul(C1).rotate_left(31).wrapping_mul(C2);
191        h1 ^= k1;
192    }
193
194    h1 ^= source.len() as i64;
195    h2 ^= source.len() as i64;
196    h1 = h1.wrapping_add(h2);
197    h2 = h2.wrapping_add(h1);
198    h1 = fmix64_i64(h1);
199    h2 = fmix64_i64(h2);
200    h1 = h1.wrapping_add(h2);
201    h2 = h2.wrapping_add(h1);
202    return Ok((h1, h2));
203}
204
205use std::ops::Shl;
206#[allow(unused)]
207pub fn old_murmur3_cassandra_x64_128<T: std::io::Read>(source: &mut T, seed: u32) -> std::io::Result<i64> {
208    const C1: i64 = -8_663_945_395_140_668_459_i64; // 0x87c3_7b91_1142_53d5;
209    const C2: i64 = 0x4cf5_ad43_2745_937f;
210    const C3: i64 = 0x52dc_e729;
211    const C4: i64 = 0x3849_5ab5;
212    const R1: u32 = 27;
213    const R2: u32 = 31;
214    const R3: u32 = 33;
215    const M: i64 = 5;
216    let mut h1: i64 = seed as i64;
217    let mut h2: i64 = seed as i64;
218    let mut buf = [0; 16];
219    let mut processed: usize = 0;
220    loop {
221        let read = source.read(&mut buf[..])?;
222        processed += read;
223        if read == 16 {
224            let k1 = i64::from_le_bytes(copy_into_array(&buf[0..8]));
225            let k2 = i64::from_le_bytes(copy_into_array(&buf[8..]));
226            h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2);
227            h1 = h1.rotate_left(R1).wrapping_add(h2).wrapping_mul(M).wrapping_add(C3);
228            h2 ^= k2.wrapping_mul(C2).rotate_left(R3).wrapping_mul(C1);
229            h2 = h2.rotate_left(R2).wrapping_add(h1).wrapping_mul(M).wrapping_add(C4);
230        } else if read == 0 {
231            h1 ^= processed as i64;
232            h2 ^= processed as i64;
233            h1 = h1.wrapping_add(h2);
234            h2 = h2.wrapping_add(h1);
235            h1 = fmix64_i64(h1);
236            h2 = fmix64_i64(h2);
237            h1 = h1.wrapping_add(h2);
238            // This is the original output
239            // h2 = h2.wrapping_add(h1);
240            // let x = ((h2 as i128) << 64) | (h1 as u64 as i128);
241            let x = h1 as i64;
242            return Ok(x);
243        } else {
244            let mut k1 = 0;
245            let mut k2 = 0;
246            if read >= 15 {
247                k2 ^= (buf[14] as i8 as i64).shl(48);
248            }
249            if read >= 14 {
250                k2 ^= (buf[13] as i8 as i64).shl(40);
251            }
252            if read >= 13 {
253                k2 ^= (buf[12] as i8 as i64).shl(32);
254            }
255            if read >= 12 {
256                k2 ^= (buf[11] as i8 as i64).shl(24);
257            }
258            if read >= 11 {
259                k2 ^= (buf[10] as i8 as i64).shl(16);
260            }
261            if read >= 10 {
262                k2 ^= (buf[9] as i8 as i64).shl(8);
263            }
264            if read >= 9 {
265                k2 ^= buf[8] as i8 as i64;
266                k2 = k2.wrapping_mul(C2).rotate_left(33).wrapping_mul(C1);
267                h2 ^= k2;
268            }
269            if read >= 8 {
270                k1 ^= (buf[7] as i8 as i64).shl(56);
271            }
272            if read >= 7 {
273                k1 ^= (buf[6] as i8 as i64).shl(48);
274            }
275            if read >= 6 {
276                k1 ^= (buf[5] as i8 as i64).shl(40);
277            }
278            if read >= 5 {
279                k1 ^= (buf[4] as i8 as i64).shl(32);
280            }
281            if read >= 4 {
282                k1 ^= (buf[3] as i8 as i64).shl(24);
283            }
284            if read >= 3 {
285                k1 ^= (buf[2] as i8 as i64).shl(16);
286            }
287            if read >= 2 {
288                k1 ^= (buf[1] as i8 as i64).shl(8);
289            }
290            if read >= 1 {
291                k1 ^= buf[0] as i8 as i64;
292            }
293            k1 = k1.wrapping_mul(C1);
294            k1 = k1.rotate_left(31);
295            k1 = k1.wrapping_mul(C2);
296            h1 ^= k1;
297        }
298    }
299}
300
301fn fmix64_i64(k: i64) -> i64 {
302    const C1: u64 = 0xff51_afd7_ed55_8ccd;
303    const C2: u64 = 0xc4ce_b9fe_1a85_ec53;
304    const R: u32 = 33;
305    let mut tmp = k as u64;
306    tmp ^= tmp >> R;
307    tmp = tmp.wrapping_mul(C1);
308    tmp ^= tmp >> R;
309    tmp = tmp.wrapping_mul(C2);
310    tmp ^= tmp >> R;
311    tmp as i64
312}
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use std::io::{BufRead, BufReader, Cursor};
317    #[test]
318    fn test_tx_murmur3_token_generation() {
319        let tx = "EHUHSJRCMDJSZUQMNLDBSRFC9O9XCI9SMHFWWHNDYOOOWMSOJQHCC9GFUEGECEVVXCSXYTHSRJ9TZ9999";
320        let hash_pair = murmur3_cassandra_x64_128(tx.as_bytes(), 0);
321        assert_eq!(hash_pair.0, -7733304998189415164);
322    }
323
324    #[test]
325    fn test_address_murmur3_token_generation() {
326        let addr = "NBBM9QWTLPXDQPISXWRJSMOKJQVHCIYBZTWPPAXJSRNRDWQOJDQNX9BZ9RQVLNVTOJBHKBDPP9NPGPGYAQGFDYOHLA";
327        let hash_pair = murmur3_cassandra_x64_128(addr.as_bytes(), 0);
328        assert_eq!(hash_pair.0, -5381343058315604526);
329    }
330
331    #[test]
332    fn test_1000() {
333        let mut file = BufReader::new(Cursor::new(std::include_str!("murmur3_tests.txt")));
334        let mut buf = String::new();
335        while let Ok(n) = file.read_line(&mut buf) {
336            if n == 0 {
337                break;
338            }
339            let mut vals = buf.split_whitespace();
340            let key = vals.next().unwrap();
341            let hash1 = vals.next().unwrap().parse::<i64>().unwrap();
342            let hash2 = vals.next().unwrap().parse::<i64>().unwrap();
343            let (h1, h2) = murmur3_cassandra_x64_128(key.as_bytes(), 0);
344            assert_eq!((h1, h2), (hash1, hash2));
345            buf.clear();
346        }
347    }
348
349    #[test]
350    fn perf_test() {
351        struct Item {
352            key: String,
353            hash1: i64,
354            hash2: i64,
355        }
356
357        let file_str = std::include_str!("murmur3_tests.txt");
358        let n = 10000;
359        let mut buf = String::new();
360        let mut items: Vec<Item> = Vec::new();
361        let mut file = BufReader::new(Cursor::new(file_str));
362        while let Ok(read) = file.read_line(&mut buf) {
363            if read == 0 {
364                break;
365            }
366            let mut vals = buf.split_whitespace();
367            let key = vals.next().unwrap();
368            let hash1 = vals.next().unwrap().parse::<i64>().unwrap();
369            let hash2 = vals.next().unwrap().parse::<i64>().unwrap();
370            items.push(Item {
371                key: key.to_string(),
372                hash1,
373                hash2,
374            });
375            buf.clear();
376        }
377
378        let now = std::time::SystemTime::now();
379        for _ in 0..n {
380            for item in &items {
381                let (h1, h2) = murmur3_cassandra_x64_128(item.key.as_bytes(), 0);
382                assert_eq!((h1, h2), (item.hash1, item.hash2));
383            }
384        }
385        println!(
386            "New Method: {} runs completed in {} ms",
387            1000_i64 * n,
388            now.elapsed().unwrap().as_millis()
389        );
390
391        let now = std::time::SystemTime::now();
392        for _ in 0..n {
393            for item in &items {
394                let (h1, h2) = old_modified_murmur3_cassandra_x64_128(item.key.as_bytes(), 0).unwrap();
395                assert_eq!((h1, h2), (item.hash1, item.hash2));
396            }
397        }
398        println!(
399            "Old Modified Method: {} runs completed in {} ms",
400            1000_i64 * n,
401            now.elapsed().unwrap().as_millis()
402        );
403
404        let now = std::time::SystemTime::now();
405        for _ in 0..n {
406            for item in &items {
407                let mut key = Cursor::new(item.key.clone());
408                let h1 = old_murmur3_cassandra_x64_128(&mut key, 0).unwrap();
409                assert_eq!(h1, item.hash1);
410            }
411        }
412        let mut total_time = now.elapsed().unwrap().as_millis();
413        // We exclute the Cursor cloning time to compare the calculation speed
414        let now = std::time::SystemTime::now();
415        for _ in 0..n {
416            for item in &items {
417                let _ = Cursor::new(item.key.clone());
418            }
419        }
420        total_time -= now.elapsed().unwrap().as_millis();
421        println!("Old Method: {} runs completed in {} ms", 1000_i64 * n, total_time);
422    }
423}