use twox_hash::XxHash3_64;
pub const CANONICAL_HASH_SEED_TABLE: [u64; 20] = [
0xcafe3553,
0xade3415118,
0x8cc70208,
0x2f024b2b,
0x451a3df5,
0x6a09e667,
0xbb67ae85,
0x3c6ef372,
0xa54ff53a,
0x510e527f,
0x9b05688c,
0x1f83d9ab,
0x5be0cd19,
0xcbbb9d5d,
0x629a292a,
0x9159015a,
0x152fecd8,
0x67332667,
0x8eb44a87,
0xdb0c2e0d,
];
pub const CANONICAL_HASH_SEED: usize = 5;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct HashSpec {
pub seed_list: Vec<u64>,
pub canonical_seed_index: usize,
pub seed_derivation: SeedDerivation,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SeedDerivation {
Packed,
Additive,
}
impl Default for HashSpec {
fn default() -> Self {
Self {
seed_list: CANONICAL_HASH_SEED_TABLE.to_vec(),
canonical_seed_index: CANONICAL_HASH_SEED,
seed_derivation: SeedDerivation::Packed,
}
}
}
impl HashSpec {
#[inline]
pub fn matrix_seed(&self) -> u64 {
self.seed_list[0]
}
#[inline]
pub fn row_seed(&self, row: usize) -> u64 {
let idx = row % self.seed_list.len();
self.seed_list[idx]
}
}
#[inline]
pub fn hash_with_spec(spec: &HashSpec, key: &[u8]) -> u64 {
XxHash3_64::oneshot_with_seed(spec.matrix_seed(), key)
}
#[inline]
fn mask_bits_for_width(width: u32) -> u32 {
if width <= 1 {
return 1;
}
let mut u = width - 1;
let mut bits = 0u32;
while u > 0 {
bits += 1;
u >>= 1;
}
bits
}
#[inline]
pub fn derive_index(_spec: &HashSpec, row: usize, hash: u64, width: u32) -> usize {
let shift = (row as u32) * mask_bits_for_width(width);
let mask = (width as u64).wrapping_sub(1);
((hash >> shift) & mask) as usize
}
#[inline]
pub fn derive_sign(_spec: &HashSpec, row: usize, hash: u64) -> i64 {
debug_assert!(
row <= 63,
"derive_sign: row {row} out of range for u64 hash"
);
let bit = (hash >> (63 - row as u32)) & 1;
if bit == 0 { -1 } else { 1 }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn seed_table_matches_sketchlib_go() {
let go_seed_list: [u64; 20] = [
0xcafe3553,
0xade3415118,
0x8cc70208,
0x2f024b2b,
0x451a3df5,
0x6a09e667,
0xbb67ae85,
0x3c6ef372,
0xa54ff53a,
0x510e527f,
0x9b05688c,
0x1f83d9ab,
0x5be0cd19,
0xcbbb9d5d,
0x629a292a,
0x9159015a,
0x152fecd8,
0x67332667,
0x8eb44a87,
0xdb0c2e0d,
];
assert_eq!(CANONICAL_HASH_SEED_TABLE, go_seed_list);
assert_eq!(CANONICAL_HASH_SEED, 5);
}
#[test]
fn hash_with_spec_matches_sketchlib_go() {
let spec = HashSpec::default();
let got = hash_with_spec(&spec, b"projectasap");
assert_eq!(got, 887548862923853302);
}
#[test]
fn derive_index_matches_go_bit_slicing() {
let spec = HashSpec::default();
let h = hash_with_spec(&spec, b"projectasap");
for row in 0..3 {
let expected = ((h >> (row as u32 * 9)) & 0x1ff) as usize;
assert_eq!(derive_index(&spec, row, h, 512), expected);
}
for row in 0..3 {
let expected = ((h >> (row as u32 * 10)) & 0x3ff) as usize;
assert_eq!(derive_index(&spec, row, h, 1024), expected);
}
}
#[test]
fn derive_sign_matches_go_high_bit() {
let spec = HashSpec::default();
let h = hash_with_spec(&spec, b"projectasap");
for row in 0..5 {
let bit = (h >> (63 - row as u32)) & 1;
let expected: i64 = if bit == 0 { -1 } else { 1 };
assert_eq!(derive_sign(&spec, row, h), expected);
}
}
#[test]
fn mask_bits_for_width_matches_go() {
assert_eq!(mask_bits_for_width(1), 1);
assert_eq!(mask_bits_for_width(2), 1);
assert_eq!(mask_bits_for_width(4), 2);
assert_eq!(mask_bits_for_width(512), 9);
assert_eq!(mask_bits_for_width(1024), 10);
assert_eq!(mask_bits_for_width(4096), 12);
}
#[test]
fn default_hashspec_has_packed_derivation() {
let spec = HashSpec::default();
assert_eq!(spec.seed_derivation, SeedDerivation::Packed);
assert_eq!(spec.canonical_seed_index, CANONICAL_HASH_SEED);
assert_eq!(spec.seed_list.len(), CANONICAL_HASH_SEED_TABLE.len());
}
}