1use crate::{MapletError, MapletResult};
7use ahash::RandomState;
8use std::hash::{Hash, Hasher};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
12pub enum HashFunction {
13 #[default]
15 AHash,
16 TwoX,
18 Fnv,
20}
21
22#[derive(Debug, Clone)]
24pub struct FingerprintHasher {
25 hash_fn: HashFunction,
27 random_state: RandomState,
29 fingerprint_bits: u32,
31 fingerprint_mask: u64,
33}
34
35impl FingerprintHasher {
36 #[must_use]
38 pub fn new(hash_fn: HashFunction, fingerprint_bits: u32) -> Self {
39 let fingerprint_mask = if fingerprint_bits >= 64 {
40 u64::MAX
41 } else {
42 (1u64 << fingerprint_bits) - 1
43 };
44
45 Self {
46 hash_fn,
47 random_state: RandomState::with_seed(42), fingerprint_bits,
49 fingerprint_mask,
50 }
51 }
52
53 pub fn fingerprint<T: Hash>(&self, key: &T) -> u64 {
55 let hash = self.hash_key(key);
56 hash & self.fingerprint_mask
57 }
58
59 pub fn hash_key<T: Hash>(&self, key: &T) -> u64 {
61 match self.hash_fn {
62 HashFunction::AHash => self.random_state.hash_one(&key),
63 HashFunction::TwoX => {
64 use twox_hash::XxHash64;
65 let mut hasher = XxHash64::default();
66 key.hash(&mut hasher);
67 hasher.finish()
68 }
69 HashFunction::Fnv => {
70 use std::collections::hash_map::DefaultHasher;
71 let mut hasher = DefaultHasher::new();
72 key.hash(&mut hasher);
73 hasher.finish()
74 }
75 }
76 }
77
78 #[must_use]
80 pub const fn fingerprint_bits(&self) -> u32 {
81 self.fingerprint_bits
82 }
83
84 #[must_use]
86 pub const fn fingerprint_mask(&self) -> u64 {
87 self.fingerprint_mask
88 }
89
90 #[must_use]
92 pub fn optimal_fingerprint_size(false_positive_rate: f64) -> u32 {
93 if false_positive_rate <= 0.0 || false_positive_rate >= 1.0 {
94 return 8; }
96
97 let bits = (-false_positive_rate.log2()).ceil() as u32 + 3;
99 bits.min(64).max(4) }
101}
102
103#[derive(Debug, Clone)]
105pub struct PerfectHash {
106 num_slots: usize,
108 slot_hasher: FingerprintHasher,
110}
111
112impl PerfectHash {
113 #[must_use]
115 pub fn new(num_slots: usize, hash_fn: HashFunction) -> Self {
116 let slot_hash_fn = match hash_fn {
118 HashFunction::AHash => HashFunction::TwoX,
119 HashFunction::TwoX => HashFunction::Fnv,
120 HashFunction::Fnv => HashFunction::AHash,
121 };
122
123 Self {
124 num_slots,
125 slot_hasher: FingerprintHasher::new(slot_hash_fn, 32), }
127 }
128
129 #[must_use]
131 pub fn slot_index(&self, fingerprint: u64) -> usize {
132 let hash = self.slot_hasher.hash_key(&fingerprint);
133 (hash as usize) % self.num_slots
134 }
135
136 #[must_use]
138 pub const fn num_slots(&self) -> usize {
139 self.num_slots
140 }
141}
142
143#[derive(Debug, Clone)]
145pub struct CollisionDetector {
146 max_collisions: usize,
148 collision_count: usize,
150 warning_threshold: usize,
152}
153
154impl CollisionDetector {
155 #[must_use]
157 pub const fn new(max_collisions: usize) -> Self {
158 Self {
159 max_collisions,
160 collision_count: 0,
161 warning_threshold: max_collisions / 2,
162 }
163 }
164
165 pub fn record_collision(&mut self) -> MapletResult<()> {
167 self.collision_count += 1;
168
169 if self.collision_count > self.max_collisions {
170 return Err(MapletError::CollisionLimitExceeded);
171 }
172
173 if self.collision_count > self.warning_threshold {
174 tracing::warn!(
175 "High collision count: {} (threshold: {})",
176 self.collision_count,
177 self.warning_threshold
178 );
179 }
180
181 Ok(())
182 }
183
184 #[must_use]
186 pub const fn collision_count(&self) -> usize {
187 self.collision_count
188 }
189
190 pub const fn reset(&mut self) {
192 self.collision_count = 0;
193 }
194
195 #[must_use]
197 pub const fn is_approaching_limit(&self) -> bool {
198 self.collision_count > self.warning_threshold
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn test_fingerprint_hasher() {
208 let hasher = FingerprintHasher::new(HashFunction::AHash, 8);
209 let key = "test_key";
210
211 let fingerprint = hasher.fingerprint(&key);
212 assert!(fingerprint < (1u64 << 8));
213
214 let fingerprint2 = hasher.fingerprint(&key);
216 assert_eq!(fingerprint, fingerprint2);
217 }
218
219 #[test]
220 fn test_perfect_hash() {
221 let perfect_hash = PerfectHash::new(100, HashFunction::AHash);
222 let fingerprint = 12345u64;
223
224 let slot = perfect_hash.slot_index(fingerprint);
225 assert!(slot < 100);
226
227 let slot2 = perfect_hash.slot_index(fingerprint);
229 assert_eq!(slot, slot2);
230 }
231
232 #[test]
233 fn test_collision_detector() {
234 let mut detector = CollisionDetector::new(10);
235
236 for _ in 0..5 {
238 assert!(detector.record_collision().is_ok());
239 }
240
241 assert_eq!(detector.collision_count(), 5);
242 assert!(!detector.is_approaching_limit());
243
244 for _ in 0..5 {
246 assert!(detector.record_collision().is_ok());
247 }
248
249 assert_eq!(detector.collision_count(), 10);
250 assert!(detector.is_approaching_limit());
251
252 assert!(detector.record_collision().is_err());
254 }
255
256 #[test]
257 fn test_optimal_fingerprint_size() {
258 assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.01), 10); assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.001), 13); assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.1), 7); }
262}