use crate::{MapletError, MapletResult};
use ahash::RandomState;
use std::hash::{Hash, Hasher};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum HashFunction {
#[default]
AHash,
TwoX,
Fnv,
}
#[derive(Debug, Clone)]
pub struct FingerprintHasher {
hash_fn: HashFunction,
random_state: RandomState,
fingerprint_bits: u32,
fingerprint_mask: u64,
}
impl FingerprintHasher {
#[must_use]
pub fn new(hash_fn: HashFunction, fingerprint_bits: u32) -> Self {
let fingerprint_mask = if fingerprint_bits >= 64 {
u64::MAX
} else {
(1u64 << fingerprint_bits) - 1
};
Self {
hash_fn,
random_state: RandomState::with_seed(42), fingerprint_bits,
fingerprint_mask,
}
}
pub fn fingerprint<T: Hash>(&self, key: &T) -> u64 {
let hash = self.hash_key(key);
hash & self.fingerprint_mask
}
pub fn hash_key<T: Hash>(&self, key: &T) -> u64 {
match self.hash_fn {
HashFunction::AHash => self.random_state.hash_one(&key),
HashFunction::TwoX => {
use twox_hash::XxHash64;
let mut hasher = XxHash64::default();
key.hash(&mut hasher);
hasher.finish()
}
HashFunction::Fnv => {
use std::collections::hash_map::DefaultHasher;
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
hasher.finish()
}
}
}
#[must_use]
pub const fn fingerprint_bits(&self) -> u32 {
self.fingerprint_bits
}
#[must_use]
pub const fn fingerprint_mask(&self) -> u64 {
self.fingerprint_mask
}
#[must_use]
pub fn optimal_fingerprint_size(false_positive_rate: f64) -> u32 {
if false_positive_rate <= 0.0 || false_positive_rate >= 1.0 {
return 8; }
let bits = (-false_positive_rate.log2()).ceil() as u32 + 3;
bits.min(64).max(4) }
}
#[derive(Debug, Clone)]
pub struct PerfectHash {
num_slots: usize,
slot_hasher: FingerprintHasher,
}
impl PerfectHash {
#[must_use]
pub fn new(num_slots: usize, hash_fn: HashFunction) -> Self {
let slot_hash_fn = match hash_fn {
HashFunction::AHash => HashFunction::TwoX,
HashFunction::TwoX => HashFunction::Fnv,
HashFunction::Fnv => HashFunction::AHash,
};
Self {
num_slots,
slot_hasher: FingerprintHasher::new(slot_hash_fn, 32), }
}
#[must_use]
pub fn slot_index(&self, fingerprint: u64) -> usize {
let hash = self.slot_hasher.hash_key(&fingerprint);
(hash as usize) % self.num_slots
}
#[must_use]
pub const fn num_slots(&self) -> usize {
self.num_slots
}
}
#[derive(Debug, Clone)]
pub struct CollisionDetector {
max_collisions: usize,
collision_count: usize,
warning_threshold: usize,
}
impl CollisionDetector {
#[must_use]
pub const fn new(max_collisions: usize) -> Self {
Self {
max_collisions,
collision_count: 0,
warning_threshold: max_collisions / 2,
}
}
pub fn record_collision(&mut self) -> MapletResult<()> {
self.collision_count += 1;
if self.collision_count > self.max_collisions {
return Err(MapletError::CollisionLimitExceeded);
}
if self.collision_count > self.warning_threshold {
tracing::warn!(
"High collision count: {} (threshold: {})",
self.collision_count,
self.warning_threshold
);
}
Ok(())
}
#[must_use]
pub const fn collision_count(&self) -> usize {
self.collision_count
}
pub const fn reset(&mut self) {
self.collision_count = 0;
}
#[must_use]
pub const fn is_approaching_limit(&self) -> bool {
self.collision_count > self.warning_threshold
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fingerprint_hasher() {
let hasher = FingerprintHasher::new(HashFunction::AHash, 8);
let key = "test_key";
let fingerprint = hasher.fingerprint(&key);
assert!(fingerprint < (1u64 << 8));
let fingerprint2 = hasher.fingerprint(&key);
assert_eq!(fingerprint, fingerprint2);
}
#[test]
fn test_perfect_hash() {
let perfect_hash = PerfectHash::new(100, HashFunction::AHash);
let fingerprint = 12345u64;
let slot = perfect_hash.slot_index(fingerprint);
assert!(slot < 100);
let slot2 = perfect_hash.slot_index(fingerprint);
assert_eq!(slot, slot2);
}
#[test]
fn test_collision_detector() {
let mut detector = CollisionDetector::new(10);
for _ in 0..5 {
assert!(detector.record_collision().is_ok());
}
assert_eq!(detector.collision_count(), 5);
assert!(!detector.is_approaching_limit());
for _ in 0..5 {
assert!(detector.record_collision().is_ok());
}
assert_eq!(detector.collision_count(), 10);
assert!(detector.is_approaching_limit());
assert!(detector.record_collision().is_err());
}
#[test]
fn test_optimal_fingerprint_size() {
assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.01), 10); assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.001), 13); assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.1), 7); }
}