const C1: u64 = 0x87c3_7b91_1142_53d5;
const C2: u64 = 0x4cf5_ad43_2745_937f;
const H1_ADD: u64 = 0x52dc_e729;
const H2_ADD: u64 = 0x3849_5ab5;
const FMIX_C1: u64 = 0xff51_afd7_ed55_8ccd;
const FMIX_C2: u64 = 0xc4ce_b9fe_1a85_ec53;
pub fn cassandra_murmur3_x64_128(data: &[u8]) -> (i64, i64) {
let mut h1: u64 = 0;
let mut h2: u64 = 0;
let mut chunks = data.chunks_exact(16);
for block in &mut chunks {
let mut k1 = u64::from_le_bytes(block[0..8].try_into().unwrap());
let mut k2 = u64::from_le_bytes(block[8..16].try_into().unwrap());
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(31);
k1 = k1.wrapping_mul(C2);
h1 ^= k1;
h1 = h1.rotate_left(27);
h1 = h1.wrapping_add(h2);
h1 = h1.wrapping_mul(5).wrapping_add(H1_ADD);
k2 = k2.wrapping_mul(C2);
k2 = k2.rotate_left(33);
k2 = k2.wrapping_mul(C1);
h2 ^= k2;
h2 = h2.rotate_left(31);
h2 = h2.wrapping_add(h1);
h2 = h2.wrapping_mul(5).wrapping_add(H2_ADD);
}
let tail = chunks.remainder();
let mut k1: u64 = 0;
let mut k2: u64 = 0;
let signed = |byte: u8| -> u64 { byte as i8 as i64 as u64 };
for (pos, &byte) in tail.iter().enumerate() {
let shift = ((pos % 8) * 8) as u32;
if pos < 8 {
k1 ^= signed(byte) << shift;
} else {
k2 ^= signed(byte) << shift;
}
}
if tail.len() >= 9 {
k2 = k2.wrapping_mul(C2);
k2 = k2.rotate_left(33);
k2 = k2.wrapping_mul(C1);
h2 ^= k2;
}
if !tail.is_empty() {
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(31);
k1 = k1.wrapping_mul(C2);
h1 ^= k1;
}
let len = data.len() as u64;
h1 ^= len;
h2 ^= len;
h1 = h1.wrapping_add(h2);
h2 = h2.wrapping_add(h1);
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 = h1.wrapping_add(h2);
h2 = h2.wrapping_add(h1);
(h1 as i64, h2 as i64)
}
pub fn cassandra_murmur3_token(data: &[u8]) -> i64 {
let (h1, _) = cassandra_murmur3_x64_128(data);
normalize(h1)
}
fn normalize(v: i64) -> i64 {
if v == i64::MIN {
i64::MAX
} else {
v
}
}
fn fmix64(mut value: u64) -> u64 {
value ^= value >> 33;
value = value.wrapping_mul(FMIX_C1);
value ^= value >> 33;
value = value.wrapping_mul(FMIX_C2);
value ^= value >> 33;
value
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_tokens(label: &str, vectors: &[(&[u8], i64)]) {
for (key_bytes, expected_token) in vectors {
let token = cassandra_murmur3_token(key_bytes);
assert_eq!(
token, *expected_token,
"Token mismatch for {} key {:02X?}: got {}, expected {}",
label, key_bytes, token, expected_token
);
}
}
#[test]
fn test_text_key_tokens() {
let vectors: &[(&[u8], i64)] = &[
(b"a", -8839064797231613815),
(b"ab", -7815133031266706642),
(b"abc", -5434086359492102041),
(b"abcd", -5153323217664422577),
(b"abcde", 2321271983248423864),
(b"abcdef", -1982280103179862187),
(b"abcdefg", -6427428730009885543),
(b"abcdefgh", -3708139591217214462),
(b"hello", -3758069500696749310),
(b"cassandra", 356242581507269238),
(b"murmur3", 5322121941860471994),
(b"test_key_12345", -3182120811138177122),
(b"0123456789abcde", -6472281833689111727),
(b"0123456789abcdef", 5467490433528156583),
(
b"this is a longer test key for murmur3 hashing",
-7889617755374116647,
),
];
assert_tokens("text", vectors);
}
#[test]
fn test_int_key_tokens() {
let vectors: &[(&[u8], i64)] = &[
(&[0x00, 0x00, 0x00, 0x00], -3485513579396041028), (&[0x00, 0x00, 0x00, 0x01], -4069959284402364209), (&[0xFF, 0xFF, 0xFF, 0xFF], 7297452126230313552), (&[0x00, 0x00, 0x00, 0x2A], -7160136740246525330), (&[0x00, 0x00, 0x00, 0x7F], 4443639997907684431), (&[0x00, 0x00, 0x00, 0x80], -9081975895656599623), (&[0x00, 0x00, 0x00, 0xFF], -8423851636648339959), (&[0x00, 0x00, 0x01, 0x00], 180151580513994396), (&[0x00, 0x00, 0x03, 0xE8], 7935772098093053663), (&[0xFF, 0xFF, 0xFC, 0x18], 5905661066942090405), (&[0x7F, 0xFF, 0xFF, 0xFF], -765994672030311617), (&[0x80, 0x00, 0x00, 0x00], -420533958509279465), ];
assert_tokens("int", vectors);
}
#[test]
fn test_uuid_key_tokens() {
let vectors: &[(&[u8], i64)] = &[
(
&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
5457549051747178710,
),
(
&[
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF,
],
-2824192546314762522,
),
(
&[
0x55, 0x0e, 0x84, 0x00, 0xe2, 0x9b, 0x41, 0xd4, 0xa7, 0x16, 0x44, 0x66, 0x55,
0x44, 0x00, 0x00,
],
4277286421682315655,
),
(
&[
0x12, 0x34, 0x56, 0x78, 0x12, 0x34, 0x12, 0x34, 0x12, 0x34, 0x12, 0x34, 0x56,
0x78, 0x9a, 0xbc,
],
-8497799532739775204,
),
];
assert_tokens("UUID", vectors);
}
#[test]
fn test_blob_key_tokens() {
let vectors: &[(&[u8], i64)] = &[
(&[0x00], 5048724184180415669),
(&[0x80], -5284281814142962636), (&[0xFF], -4442228696663692417), (&[0xFF, 0xFE], -2002833339314343643), (
&[0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89],
-7623170703309721106,
),
(
&[
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
0x0E, 0x0F,
],
597835946752277653,
),
(
&[
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
0x0E, 0x0F, 0x10,
],
-5563837382979743776,
),
];
assert_tokens("blob", vectors);
}
#[test]
fn test_bigint_key_tokens() {
let vectors: &[(&[u8], i64)] = &[
(&[0, 0, 0, 0, 0, 0, 0, 0], 2945182322382062539), (&[0, 0, 0, 0, 0, 0, 0, 1], 6292367497774912474), (
&[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
7071048584287372947,
), (
&[0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
-1722304415079482439,
), (
&[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00],
9204767954415360687,
), ];
assert_tokens("bigint", vectors);
}
#[test]
fn test_composite_key_tokens() {
let mut key = Vec::new();
key.extend_from_slice(&[0x00, 0x05]); key.extend_from_slice(b"hello");
key.push(0x00); key.extend_from_slice(&[0x00, 0x04]); key.extend_from_slice(&42i32.to_be_bytes());
key.push(0x00); assert_eq!(
cassandra_murmur3_token(&key),
7666157718303755816,
"Composite key ('hello', 42) token mismatch"
);
let mut key = Vec::new();
key.extend_from_slice(&[0x00, 0x05]); key.extend_from_slice(b"world");
key.push(0x00);
key.extend_from_slice(&[0x00, 0x04]);
key.extend_from_slice(&99i32.to_be_bytes());
key.push(0x00);
assert_eq!(
cassandra_murmur3_token(&key),
-4641306270390207264,
"Composite key ('world', 99) token mismatch"
);
}
#[test]
fn test_normalize() {
assert_eq!(normalize(i64::MIN), i64::MAX);
assert_eq!(normalize(i64::MAX), i64::MAX);
assert_eq!(normalize(0), 0);
assert_eq!(normalize(1), 1);
assert_eq!(normalize(-1), -1);
}
#[test]
fn test_hash_returns_h1_h2_order() {
let (h1, h2) = cassandra_murmur3_x64_128(b"");
assert_eq!(h1, 0);
assert_eq!(h2, 0);
let (h1, _h2) = cassandra_murmur3_x64_128(b"a");
assert_eq!(h1, -8839064797231613815_i64);
}
}