use std::hash::Hasher;
const K0: u64 = 0xc3a5c85c97cb3127;
const K1: u64 = 0xb492b66fbe98f273;
const K2: u64 = 0x9ae16a3b2f90404f;
#[allow(dead_code)]
const K3: u64 = 0xc949d7c7509e6557;
#[allow(dead_code)]
const CITYHASH_SEED: u64 = 0;
#[derive(Debug, Clone)]
pub struct CityHasher64 {
state: u64,
buffer: Vec<u8>, }
impl CityHasher64 {
#[inline]
pub fn new() -> Self {
Self {
state: 0,
buffer: Vec::new(),
}
}
#[inline]
pub fn hash_bytes(bytes: &[u8]) -> u64 {
if bytes.len() <= 16 {
hash_bytes_small(bytes)
} else if bytes.len() <= 32 {
hash_bytes_medium(bytes)
} else {
hash_bytes_large(bytes)
}
}
#[inline]
pub fn finish_raw(&self) -> u64 {
if !self.buffer.is_empty() {
Self::hash_bytes(&self.buffer)
} else {
self.state
}
}
}
impl Default for CityHasher64 {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl Hasher for CityHasher64 {
#[inline]
fn finish(&self) -> u64 {
self.finish_raw()
}
#[inline]
fn write(&mut self, bytes: &[u8]) {
self.buffer.extend_from_slice(bytes);
}
}
#[inline]
fn hash_bytes_small(bytes: &[u8]) -> u64 {
let len = bytes.len();
if len >= 8 {
let mul = K2.wrapping_add(len as u64 * 2);
let a = fetch64(bytes, 0).wrapping_add(K2);
let b = fetch64(bytes, len - 8);
let c = rotate(b, 37).wrapping_mul(mul).wrapping_add(a);
let d = rotate(a, 25).wrapping_add(b).wrapping_mul(mul);
hash_len_16_mul(c, d, mul)
} else if len >= 4 {
let mul = K2.wrapping_add(len as u64 * 2);
let a = fetch32(bytes, 0) as u64;
let b = fetch32(bytes, len - 4) as u64;
hash_len_16_mul((len as u64).wrapping_add(a << 3), b, mul)
} else if len > 0 {
let a = bytes[0] as u64;
let b = bytes[len >> 1] as u64;
let c = bytes[len - 1] as u64;
let y = a.wrapping_add(b << 8);
let z = (len as u64).wrapping_add(c << 2);
shift_mix(y.wrapping_mul(K2) ^ z.wrapping_mul(K0)).wrapping_mul(K2)
} else {
K2
}
}
#[inline]
fn hash_bytes_medium(bytes: &[u8]) -> u64 {
let len = bytes.len();
let mul = K2.wrapping_add(len as u64 * 2);
let a = fetch64(bytes, 0).wrapping_mul(K1);
let b = fetch64(bytes, 8);
let c = fetch64(bytes, len - 8).wrapping_mul(mul);
let d = fetch64(bytes, len - 16).wrapping_mul(K2);
let rotated_sum1 = rotate(a.wrapping_add(b), 43)
.wrapping_add(rotate(c, 30))
.wrapping_add(d);
let rotated_sum2 = a
.wrapping_add(rotate(b.wrapping_add(K2), 18))
.wrapping_add(c);
hash_len_16_mul(rotated_sum1, rotated_sum2, mul)
}
#[inline]
fn hash_bytes_large(bytes: &[u8]) -> u64 {
let len = bytes.len();
if len < 33 {
return hash_bytes_medium(bytes);
}
let mut x = fetch64(bytes, 0).wrapping_mul(K2);
let mut y = fetch64(bytes, 8);
let mut z = fetch64(bytes, len - 8).wrapping_mul(K2);
let mut v = weak_hash_32_bytes_values(bytes, len);
let w = weak_hash_32_bytes_values(&bytes[len - 32..], len);
x = x.wrapping_mul(K2).wrapping_add(fetch64(bytes, 16));
y = y
.wrapping_add(rotate(x, 48).wrapping_mul(K2))
.wrapping_add(fetch64(bytes, 24));
z = z.wrapping_mul(K2).wrapping_add(fetch64(bytes, len - 16));
v.0 = v.0.wrapping_mul(K2).wrapping_add(w.1);
v.1 = v.1.wrapping_mul(K2).wrapping_add(w.0);
let a = (y.wrapping_add(z))
.wrapping_mul(K2)
.wrapping_add(v.0)
.wrapping_add(w.0);
let b = (v.1.wrapping_add(w.1))
.wrapping_mul(K2)
.wrapping_add(x)
.wrapping_add(y);
hash_len_16_mul(a, b, K2)
}
#[inline]
fn fetch32(bytes: &[u8], i: usize) -> u32 {
assert!(i + 4 <= bytes.len());
let mut result = 0u32;
for j in 0..4 {
result |= (bytes[i + j] as u32) << (j * 8);
}
result
}
#[inline]
fn fetch64(bytes: &[u8], i: usize) -> u64 {
assert!(i + 8 <= bytes.len());
let mut result = 0u64;
for j in 0..8 {
result |= (bytes[i + j] as u64) << (j * 8);
}
result
}
#[inline]
fn rotate(val: u64, shift: u32) -> u64 {
(val >> shift) | (val << (64 - shift))
}
#[inline]
fn hash_len_16_mul(u: u64, v: u64, mul: u64) -> u64 {
let mut a = (u ^ v).wrapping_mul(mul);
a ^= a >> 47;
let mut b = (v ^ a).wrapping_mul(mul);
b ^= b >> 47;
b = b.wrapping_mul(mul);
b
}
#[inline]
fn shift_mix(val: u64) -> u64 {
val ^ (val >> 47)
}
#[inline]
fn weak_hash_32_bytes_values(bytes: &[u8], len: usize) -> (u64, u64) {
if bytes.len() < 32 {
return (K0, K1);
}
let mut a = fetch64(bytes, 0);
let mut b = fetch64(bytes, 8);
let c = fetch64(bytes, 16);
let d = fetch64(bytes, 24);
a = a.wrapping_add(fetch64(bytes, 0));
b = rotate(b.wrapping_add(a).wrapping_add(d), 21);
let c = c.wrapping_add(a);
a = a.wrapping_add(rotate(a, 44).wrapping_add(b));
let mut vf = a.wrapping_add(d);
let mut vs = c.wrapping_add(rotate(b, 10));
if len > 48 && bytes.len() >= 40 {
vf = vf.wrapping_add(
shift_mix(fetch64(bytes, 32).wrapping_mul(K2))
.wrapping_mul(K0)
.wrapping_add(a),
);
vs = vs.wrapping_add(shift_mix(c.wrapping_add(fetch64(bytes, 8 + 32))).wrapping_mul(K2));
vf = shift_mix(vf);
vs = shift_mix(vs);
}
(vf, vs)
}
#[inline]
pub fn city_hash_64(data: &[u8]) -> u64 {
CityHasher64::hash_bytes(data)
}
#[cfg(test)]
mod tests {
use super::*;
const TEST_DATA: [(&[u8], u64); 6] = [
(&[], 0x9ae16a3b2f90404f),
(&[0xde], 0x8af595327a84082a),
(&[0x87, 0x2a], 0xf23effdc30999888),
(&[0xb5, 0x3f, 0x9c], 0x76c81f1559a343fc),
(&[0x8c, 0x45, 0x1a, 0x6b], 0xe27a8e9c4439c382),
(b"hello world", 0x588fb7478bd6b01b),
];
#[test]
fn test_city_hash_64_vectors() {
for &(data, expected) in &TEST_DATA {
let result = city_hash_64(data);
println!(
"CityHash64 for {:?}: expected 0x{:016x}, got 0x{:016x}",
data, expected, result
);
assert_eq!(result, expected);
}
}
#[test]
fn test_hasher_trait() {
let mut hasher = CityHasher64::new();
let test_str = "hello world";
hasher.write(test_str.as_bytes());
let result = hasher.finish();
let direct = city_hash_64(test_str.as_bytes());
assert_eq!(result, direct);
}
#[test]
fn test_different_inputs() {
let a = city_hash_64(b"hello world");
let b = city_hash_64(b"hello worlD");
assert_ne!(a, b, "Different inputs should produce different hashes");
}
#[test]
fn test_large_input() {
let large_input: Vec<u8> = (0..128).map(|i| (i % 251) as u8).collect();
let hash = city_hash_64(&large_input);
assert_ne!(hash, 0);
}
}