1use std::collections::hash_map::DefaultHasher;
7use std::hash::{Hash, Hasher as StdHasher};
8
9pub trait Hasher {
11 fn hash<T: Hash + ?Sized>(&self, item: &T) -> usize;
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum HashAlgorithm {
18 Default,
20 Murmur3,
22 Fnv1a,
24 Seeded(u64),
26}
27
28#[derive(Debug, Clone)]
30pub struct HashFunction {
31 algorithm: HashAlgorithm,
32 seed: u64,
33}
34
35impl HashFunction {
36 pub fn new() -> Self {
48 HashFunction {
49 algorithm: HashAlgorithm::Default,
50 seed: 0,
51 }
52 }
53
54 pub fn with_algorithm(algorithm: HashAlgorithm) -> Self {
70 HashFunction { algorithm, seed: 0 }
71 }
72
73 pub fn with_seed(algorithm: HashAlgorithm, seed: u64) -> Self {
90 HashFunction { algorithm, seed }
91 }
92
93 pub fn generate_functions(count: usize) -> Vec<HashFunction> {
110 let algorithms = [
111 HashAlgorithm::Default,
112 HashAlgorithm::Murmur3,
113 HashAlgorithm::Fnv1a,
114 ];
115
116 let mut functions = Vec::with_capacity(count);
117
118 for i in 0..count {
119 let algorithm = algorithms[i % algorithms.len()];
120 let seed = (i as u64).wrapping_mul(0x9e3779b97f4a7c15_u64); functions.push(HashFunction::with_seed(algorithm, seed));
122 }
123
124 functions
125 }
126
127 pub fn algorithm(&self) -> HashAlgorithm {
129 self.algorithm
130 }
131
132 pub fn seed(&self) -> u64 {
134 self.seed
135 }
136}
137
138impl Default for HashFunction {
139 fn default() -> Self {
140 Self::new()
141 }
142}
143
144impl Hasher for HashFunction {
145 fn hash<T: Hash + ?Sized>(&self, item: &T) -> usize {
146 match self.algorithm {
147 HashAlgorithm::Default => {
148 let mut hasher = DefaultHasher::new();
149 self.seed.hash(&mut hasher);
150 item.hash(&mut hasher);
151 hasher.finish() as usize
152 }
153 HashAlgorithm::Murmur3 => murmur3_hash(item, self.seed as u32),
154 HashAlgorithm::Fnv1a => fnv1a_hash(item, self.seed),
155 HashAlgorithm::Seeded(base_seed) => {
156 let mut hasher = DefaultHasher::new();
157 (base_seed ^ self.seed).hash(&mut hasher);
158 item.hash(&mut hasher);
159 hasher.finish() as usize
160 }
161 }
162 }
163}
164
165fn murmur3_hash<T: Hash + ?Sized>(item: &T, seed: u32) -> usize {
169 let mut hasher = DefaultHasher::new();
171 seed.hash(&mut hasher);
172 item.hash(&mut hasher);
173 let hash64 = hasher.finish();
174
175 let mut h = hash64 as u32;
177 h ^= h >> 16;
178 h = h.wrapping_mul(0x85ebca6b);
179 h ^= h >> 13;
180 h = h.wrapping_mul(0xc2b2ae35);
181 h ^= h >> 16;
182
183 h as usize
184}
185
186fn fnv1a_hash<T: Hash + ?Sized>(item: &T, seed: u64) -> usize {
190 const FNV_OFFSET_BASIS: u64 = 0xcbf29ce484222325;
191 const FNV_PRIME: u64 = 0x100000001b3;
192
193 let mut hash = FNV_OFFSET_BASIS ^ seed;
195
196 let mut hasher = DefaultHasher::new();
198 item.hash(&mut hasher);
199 let item_hash = hasher.finish();
200
201 let bytes = item_hash.to_le_bytes();
203 for &byte in &bytes {
204 hash ^= byte as u64;
205 hash = hash.wrapping_mul(FNV_PRIME);
206 }
207
208 hash as usize
209}
210
211pub struct DoubleHasher {
216 hasher1: HashFunction,
217 hasher2: HashFunction,
218}
219
220impl DoubleHasher {
221 pub fn new() -> Self {
233 DoubleHasher {
234 hasher1: HashFunction::with_algorithm(HashAlgorithm::Default),
235 hasher2: HashFunction::with_algorithm(HashAlgorithm::Murmur3),
236 }
237 }
238
239 pub fn with_algorithms(algo1: HashAlgorithm, algo2: HashAlgorithm) -> Self {
246 DoubleHasher {
247 hasher1: HashFunction::with_algorithm(algo1),
248 hasher2: HashFunction::with_algorithm(algo2),
249 }
250 }
251
252 pub fn hash_multiple<T: Hash + ?Sized>(
273 &self,
274 item: &T,
275 count: usize,
276 max_value: usize,
277 ) -> Vec<usize> {
278 if max_value == 0 {
279 return vec![0; count];
280 }
281
282 let h1 = self.hasher1.hash(item) % max_value;
283 let h2 = self.hasher2.hash(item) % max_value;
284
285 let h2 = if h2 % 2 == 0 {
287 (h2 + 1) % max_value
288 } else {
289 h2
290 };
291
292 let mut hashes = Vec::with_capacity(count);
293 for i in 0..count {
294 let hash = (h1 + i * h2) % max_value;
295 hashes.push(hash);
296 }
297
298 hashes
299 }
300}
301
302impl Default for DoubleHasher {
303 fn default() -> Self {
304 Self::new()
305 }
306}
307
308pub struct HashQuality {
310 pub uniformity: f64,
312 pub collision_rate: f64,
314 pub avalanche_score: f64,
316}
317
318pub fn evaluate_hash_quality<T: Hash + ?Sized + Clone>(
337 hasher: &HashFunction,
338 test_data: &[T],
339 bucket_count: usize,
340) -> HashQuality {
341 if test_data.is_empty() || bucket_count == 0 {
342 return HashQuality {
343 uniformity: 0.0,
344 collision_rate: 0.0,
345 avalanche_score: 0.0,
346 };
347 }
348
349 let mut buckets = vec![0; bucket_count];
351 let mut hash_values = Vec::new();
352
353 for item in test_data {
354 let hash = hasher.hash(item);
355 hash_values.push(hash);
356 buckets[hash % bucket_count] += 1;
357 }
358
359 let expected = test_data.len() as f64 / bucket_count as f64;
361 let chi_square: f64 = buckets
362 .iter()
363 .map(|&observed| {
364 let diff = observed as f64 - expected;
365 diff * diff / expected
366 })
367 .sum();
368
369 let uniformity = 1.0 / (1.0 + chi_square / bucket_count as f64);
371
372 let unique_hashes = {
374 let mut sorted_hashes = hash_values.clone();
375 sorted_hashes.sort_unstable();
376 sorted_hashes.dedup();
377 sorted_hashes.len()
378 };
379 let collision_rate = 1.0 - (unique_hashes as f64 / test_data.len() as f64);
380
381 let avalanche_score = calculate_avalanche_score(hasher, test_data);
383
384 HashQuality {
385 uniformity,
386 collision_rate,
387 avalanche_score,
388 }
389}
390
391fn calculate_avalanche_score<T: Hash + ?Sized + Clone>(
393 hasher: &HashFunction,
394 test_data: &[T],
395) -> f64 {
396 if test_data.is_empty() {
397 return 0.0;
398 }
399
400 let sample_size = test_data.len().min(100); let mut bit_flip_counts = 0;
402 let mut total_comparisons = 0;
403
404 for i in 0..sample_size {
405 let hash1 = hasher.hash(&test_data[i]);
406
407 if i + 1 < sample_size {
411 let hash2 = hasher.hash(&test_data[i + 1]);
412 let xor_result = hash1 ^ hash2;
413 bit_flip_counts += xor_result.count_ones();
414 total_comparisons += std::mem::size_of::<usize>() * 8; }
416 }
417
418 if total_comparisons == 0 {
419 return 0.0;
420 }
421
422 let flip_ratio = bit_flip_counts as f64 / total_comparisons as f64;
424 1.0 - (flip_ratio - 0.5).abs() * 2.0 }
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn test_hash_function_creation() {
433 let hash_fn = HashFunction::new();
434 assert_eq!(hash_fn.algorithm(), HashAlgorithm::Default);
435 assert_eq!(hash_fn.seed(), 0);
436 }
437
438 #[test]
439 fn test_hash_function_with_algorithm() {
440 let hash_fn = HashFunction::with_algorithm(HashAlgorithm::Murmur3);
441 assert_eq!(hash_fn.algorithm(), HashAlgorithm::Murmur3);
442 assert_eq!(hash_fn.seed(), 0);
443 }
444
445 #[test]
446 fn test_hash_function_with_seed() {
447 let hash_fn = HashFunction::with_seed(HashAlgorithm::Fnv1a, 12345);
448 assert_eq!(hash_fn.algorithm(), HashAlgorithm::Fnv1a);
449 assert_eq!(hash_fn.seed(), 12345);
450 }
451
452 #[test]
453 fn test_generate_functions() {
454 let functions = HashFunction::generate_functions(5);
455 assert_eq!(functions.len(), 5);
456
457 for i in 0..functions.len() {
459 for j in (i + 1)..functions.len() {
460 assert_ne!(functions[i].seed(), functions[j].seed());
461 }
462 }
463 }
464
465 #[test]
466 fn test_hash_consistency() {
467 let hash_fn = HashFunction::new();
468 let item = "test_string";
469
470 let hash1 = hash_fn.hash(item);
471 let hash2 = hash_fn.hash(item);
472
473 assert_eq!(hash1, hash2); }
475
476 #[test]
477 fn test_different_algorithms_produce_different_hashes() {
478 let item = "test_string";
479
480 let default_hash = HashFunction::with_algorithm(HashAlgorithm::Default).hash(item);
481 let murmur3_hash = HashFunction::with_algorithm(HashAlgorithm::Murmur3).hash(item);
482 let fnv1a_hash = HashFunction::with_algorithm(HashAlgorithm::Fnv1a).hash(item);
483
484 assert_ne!(default_hash, murmur3_hash);
486 assert_ne!(default_hash, fnv1a_hash);
487 assert_ne!(murmur3_hash, fnv1a_hash);
488 }
489
490 #[test]
491 fn test_different_seeds_produce_different_hashes() {
492 let item = "test_string";
493 let algorithm = HashAlgorithm::Default;
494
495 let hash1 = HashFunction::with_seed(algorithm, 123).hash(item);
496 let hash2 = HashFunction::with_seed(algorithm, 456).hash(item);
497
498 assert_ne!(hash1, hash2); }
500
501 #[test]
502 fn test_hash_distribution() {
503 let hash_fn = HashFunction::new();
504 let bucket_count = 100;
505 let item_count = 1000;
506
507 let mut buckets = vec![0; bucket_count];
508
509 for i in 0..item_count {
510 let item = format!("item_{}", i);
511 let hash = hash_fn.hash(&item);
512 buckets[hash % bucket_count] += 1;
513 }
514
515 let min_count = buckets.iter().min().unwrap();
517 let max_count = buckets.iter().max().unwrap();
518
519 assert!(*min_count > 0); assert!(*max_count < item_count / 5); }
522
523 #[test]
524 fn test_double_hasher() {
525 let double_hasher = DoubleHasher::new();
526 let hashes = double_hasher.hash_multiple("test", 5, 100);
527
528 assert_eq!(hashes.len(), 5);
529
530 for &hash in &hashes {
532 assert!(hash < 100);
533 }
534
535 let unique_hashes: std::collections::HashSet<_> = hashes.iter().collect();
537 assert!(unique_hashes.len() > 1); }
539
540 #[test]
541 fn test_double_hasher_zero_max_value() {
542 let double_hasher = DoubleHasher::new();
543 let hashes = double_hasher.hash_multiple("test", 5, 0);
544
545 assert_eq!(hashes.len(), 5);
546 assert!(hashes.iter().all(|&h| h == 0));
547 }
548
549 #[test]
550 fn test_hash_quality_evaluation() {
551 let hash_fn = HashFunction::new();
552 let test_data: Vec<String> = (0..1000).map(|i| format!("item_{}", i)).collect();
553
554 let quality = evaluate_hash_quality(&hash_fn, &test_data, 100);
555
556 assert!(quality.uniformity >= 0.0 && quality.uniformity <= 1.0);
557 assert!(quality.collision_rate >= 0.0 && quality.collision_rate <= 1.0);
558 assert!(quality.avalanche_score >= 0.0 && quality.avalanche_score <= 1.0);
559 }
560
561 #[test]
562 fn test_hash_quality_empty_data() {
563 let hash_fn = HashFunction::new();
564 let test_data: Vec<String> = vec![];
565
566 let quality = evaluate_hash_quality(&hash_fn, &test_data, 100);
567
568 assert_eq!(quality.uniformity, 0.0);
569 assert_eq!(quality.collision_rate, 0.0);
570 assert_eq!(quality.avalanche_score, 0.0);
571 }
572
573 #[test]
574 fn test_murmur3_hash() {
575 let item = "test";
576 let hash1 = murmur3_hash(&item, 123);
577 let hash2 = murmur3_hash(&item, 123);
578 let hash3 = murmur3_hash(&item, 456);
579
580 assert_eq!(hash1, hash2); assert_ne!(hash1, hash3); }
583
584 #[test]
585 fn test_fnv1a_hash() {
586 let item = "test";
587 let hash1 = fnv1a_hash(&item, 123);
588 let hash2 = fnv1a_hash(&item, 123);
589 let hash3 = fnv1a_hash(&item, 456);
590
591 assert_eq!(hash1, hash2); assert_ne!(hash1, hash3); }
594
595 #[test]
596 fn test_seeded_algorithm() {
597 let item = "test";
598 let hash_fn1 = HashFunction::with_seed(HashAlgorithm::Seeded(999), 123);
599 let hash_fn2 = HashFunction::with_seed(HashAlgorithm::Seeded(999), 456);
600
601 let hash1 = hash_fn1.hash(&item);
602 let hash2 = hash_fn2.hash(&item);
603
604 assert_ne!(hash1, hash2); }
606}