use std::collections::{HashSet, VecDeque};
use std::hash::Hasher;
#[derive(Debug)]
pub struct PerfectHash {
pub size: usize,
hash_seed1: u64,
hash_seed2: u64,
pub g: Vec<u64>,
}
impl PerfectHash {
pub fn build(keys: &[String]) -> Self {
assert!(!keys.is_empty(), "No keys provided.");
use rand::{rngs::StdRng, Rng, SeedableRng};
let n = keys.len();
let mut attempt_rng = StdRng::seed_from_u64(0xABCD1234);
for _ in 0..1000 {
let seed1 = attempt_rng.gen::<u64>();
let seed2 = attempt_rng.gen::<u64>();
match try_build_phf(keys, seed1, seed2) {
Ok(g) => {
return PerfectHash {
size: n,
hash_seed1: seed1,
hash_seed2: seed2,
g,
};
}
Err(_) => {
}
}
}
panic!("Unable to build a perfect hash after many attempts. Increase attempts or refine approach.");
}
pub fn hash_index(&self, key: &str) -> usize {
let i1 = hash_str(key, self.hash_seed1) % self.size as u64;
let i2 = hash_str(key, self.hash_seed2) % self.size as u64;
let idx = (i1 + self.g[i1 as usize] + self.g[i2 as usize]) % (self.size as u64);
idx as usize
}
}
fn try_build_phf(keys: &[String], seed1: u64, seed2: u64) -> Result<Vec<u64>, ()> {
let n = keys.len();
if n == 1 {
return Ok(vec![0]);
}
let mut edges = Vec::with_capacity(n);
for (k_idx, k) in keys.iter().enumerate() {
let i1 = hash_str(k, seed1) % (n as u64);
let i2 = hash_str(k, seed2) % (n as u64);
if n > 2 && i1 == i2 {
return Err(());
}
edges.push((i1 as usize, i2 as usize, k_idx));
}
if n == 2 {
let (i1_0, i2_0) = (hash_str(&keys[0], seed1) % 2, hash_str(&keys[0], seed2) % 2);
let (i1_1, i2_1) = (hash_str(&keys[1], seed1) % 2, hash_str(&keys[1], seed2) % 2);
for g0 in 0..2u64 {
for g1 in 0..2u64 {
let g = vec![g0, g1];
let idx0 = (i1_0 + g[i1_0 as usize] + g[i2_0 as usize]) % 2;
let idx1 = (i1_1 + g[i1_1 as usize] + g[i2_1 as usize]) % 2;
if idx0 != idx1 {
return Ok(g);
}
}
}
return Err(());
}
let mut adjacency = vec![Vec::new(); n];
for &(u, v, e_idx) in &edges {
adjacency[u].push((v, e_idx)); }
let mut g = vec![0u64; n];
let mut visited = vec![false; n];
let mut used_edge = vec![false; n];
for start in 0..n {
if visited[start] {
continue;
}
visited[start] = true;
let mut queue = VecDeque::new();
queue.push_back(start);
let mut used_indices = HashSet::new();
while let Some(u) = queue.pop_front() {
for &(v, key_idx) in &adjacency[u] {
if used_edge[key_idx] {
continue;
}
let i1 = edges[key_idx].0;
let i2 = edges[key_idx].1;
let mut idx = (i1 as u64 + g[i1] + g[i2]) % (n as u64);
if !used_indices.insert(idx) {
let mut found = false;
for new_g in 0..n as u64 {
if !visited[v] {
g[v] = new_g;
idx = (i1 as u64 + g[i1] + g[v]) % (n as u64);
if used_indices.insert(idx) {
found = true;
break;
}
}
}
if !found {
return Err(());
}
}
if !visited[v] {
visited[v] = true;
queue.push_back(v);
}
used_edge[key_idx] = true;
}
}
}
let mut used = HashSet::new();
for k in keys.iter() {
let i1 = hash_str(k, seed1) % (n as u64);
let i2 = hash_str(k, seed2) % (n as u64);
let idx = (i1 + g[i1 as usize] + g[i2 as usize]) % (n as u64);
if !used.insert(idx) {
return Err(());
}
}
Ok(g)
}
fn hash_str(s: &str, seed: u64) -> u64 {
let mut h = Fnv64 {
state: 0xcbf29ce484222325 ^ seed,
};
for (shift, b) in s.as_bytes().iter().enumerate() {
h.write_u8(*b);
h.state = h.state.rotate_left((shift & 63) as u32);
}
h.finish()
}
struct Fnv64 {
state: u64,
}
impl Hasher for Fnv64 {
fn finish(&self) -> u64 {
self.state
}
fn write(&mut self, bytes: &[u8]) {
for &b in bytes {
self.write_u8(b);
}
}
fn write_u8(&mut self, b: u8) {
const FNV_PRIME_64: u64 = 0x100000001b3;
self.state ^= b as u64;
self.state = self.state.wrapping_mul(FNV_PRIME_64);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_small_phf() {
let keys = vec![
"apple".to_string(),
"banana".to_string(),
"cherry".to_string(),
"date".to_string(),
"eggplant".to_string(),
];
let ph = PerfectHash::build(&keys);
let mut seen = std::collections::HashSet::new();
for k in &keys {
let idx = ph.hash_index(k);
assert!(seen.insert(idx), "Index collision for key {:?}", k);
}
assert_eq!(seen.len(), keys.len());
}
#[test]
fn test_single_key() {
let keys = vec!["solo".to_string()];
let ph = PerfectHash::build(&keys);
let idx = ph.hash_index("solo");
assert_eq!(idx, 0);
}
#[test]
fn test_two_keys() {
let keys = vec!["one".to_string(), "two".to_string()];
let ph = PerfectHash::build(&keys);
let i1 = ph.hash_index("one");
let i2 = ph.hash_index("two");
assert!(i1 != i2);
assert!(i1 < 2 && i2 < 2);
}
}