use xxhash_rust::xxh3::xxh3_64;
pub const HASH_BASE: u64 = 0x9e3779b97f4a7c15;
pub fn token_hash(kind: u8, value: &str) -> u64 {
xxh3_64(value.as_bytes()) ^ (kind as u64)
}
#[inline]
pub fn hash_token(kind_discriminant: u8, value: &str, ignore_case: bool) -> u64 {
if ignore_case {
xxh3_64(value.to_lowercase().as_bytes()) ^ (kind_discriminant as u64)
} else {
xxh3_64(value.as_bytes()) ^ (kind_discriminant as u64)
}
}
pub fn hash_window(hashes: &[u64]) -> u64 {
hashes
.iter()
.fold(0u64, |acc, &h| acc.wrapping_mul(HASH_BASE).wrapping_add(h))
}
pub fn roll(current: u64, outgoing: u64, incoming: u64, window_power: u64) -> u64 {
current
.wrapping_sub(outgoing.wrapping_mul(window_power))
.wrapping_mul(HASH_BASE)
.wrapping_add(incoming)
}
pub fn base_pow(n: usize) -> u64 {
let mut result = 1u64;
for _ in 0..n {
result = result.wrapping_mul(HASH_BASE);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn token_hash_is_deterministic() {
let h1 = token_hash(1, "function");
let h2 = token_hash(1, "function");
assert_eq!(h1, h2);
}
#[test]
fn token_hash_differs_by_kind() {
let h1 = token_hash(1, "x");
let h2 = token_hash(2, "x");
assert_ne!(h1, h2);
}
#[test]
fn hash_window_single_element() {
let h = token_hash(0, "a");
assert_eq!(hash_window(&[h]), h);
}
#[test]
fn roll_matches_naive_recompute() {
let a = token_hash(0, "a");
let b = token_hash(0, "b");
let c = token_hash(0, "c");
let d = token_hash(0, "d");
let initial = hash_window(&[a, b, c]);
let wp = base_pow(3 - 1);
let rolled = roll(initial, a, d, wp);
let naive = hash_window(&[b, c, d]);
assert_eq!(rolled, naive, "rolled hash must match naive recomputation");
}
#[test]
fn roll_window_of_one() {
let a = token_hash(0, "hello");
let b = token_hash(0, "world");
let initial = hash_window(&[a]);
let wp = base_pow(1 - 1); let rolled = roll(initial, a, b, wp);
let naive = hash_window(&[b]);
assert_eq!(rolled, naive);
}
#[test]
fn hash_window_empty_is_zero() {
assert_eq!(hash_window(&[]), 0u64);
}
#[test]
fn hash_token_case_insensitive_matches_different_case() {
let h1 = hash_token(1, "Function", true);
let h2 = hash_token(1, "function", true);
assert_eq!(h1, h2, "ignore_case=true must fold case before hashing");
}
#[test]
fn hash_token_case_sensitive_differs() {
let h1 = hash_token(1, "Function", false);
let h2 = hash_token(1, "function", false);
assert_ne!(h1, h2, "ignore_case=false must not fold case");
}
#[test]
fn hash_token_no_ignore_case_matches_token_hash() {
let h1 = hash_token(2, "hello", false);
let h2 = token_hash(2, "hello");
assert_eq!(
h1, h2,
"hash_token(ignore_case=false) must match token_hash"
);
}
}