mappy_core/
hash.rs

1//! Fingerprint hashing and perfect hash foundation
2//!
3//! Implements the hashing layer for maplets, providing fingerprint-based
4//! hashing with configurable hash functions and collision detection.
5
6use crate::{MapletError, MapletResult};
7use ahash::RandomState;
8use std::hash::{Hash, Hasher};
9
10/// Hash function types supported by the maplet
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
12pub enum HashFunction {
13    /// `AHash` - fast, high-quality hashing
14    #[default]
15    AHash,
16    /// `TwoX` - deterministic, good distribution
17    TwoX,
18    /// FNV - simple, fast for small keys
19    Fnv,
20}
21
22/// Fingerprint-based hasher for maplets
23#[derive(Debug, Clone)]
24pub struct FingerprintHasher {
25    /// Hash function to use
26    hash_fn: HashFunction,
27    /// Random state for `AHash`
28    random_state: RandomState,
29    /// Fingerprint size in bits
30    fingerprint_bits: u32,
31    /// Mask for fingerprint extraction
32    fingerprint_mask: u64,
33}
34
35impl FingerprintHasher {
36    /// Create a new fingerprint hasher
37    #[must_use]
38    pub fn new(hash_fn: HashFunction, fingerprint_bits: u32) -> Self {
39        let fingerprint_mask = if fingerprint_bits >= 64 {
40            u64::MAX
41        } else {
42            (1u64 << fingerprint_bits) - 1
43        };
44
45        Self {
46            hash_fn,
47            random_state: RandomState::with_seed(42), // Use consistent seed
48            fingerprint_bits,
49            fingerprint_mask,
50        }
51    }
52
53    /// Calculate fingerprint for a key
54    pub fn fingerprint<T: Hash>(&self, key: &T) -> u64 {
55        let hash = self.hash_key(key);
56        hash & self.fingerprint_mask
57    }
58
59    /// Calculate full hash for a key
60    pub fn hash_key<T: Hash>(&self, key: &T) -> u64 {
61        match self.hash_fn {
62            HashFunction::AHash => self.random_state.hash_one(&key),
63            HashFunction::TwoX => {
64                use twox_hash::XxHash64;
65                let mut hasher = XxHash64::default();
66                key.hash(&mut hasher);
67                hasher.finish()
68            }
69            HashFunction::Fnv => {
70                use std::collections::hash_map::DefaultHasher;
71                let mut hasher = DefaultHasher::new();
72                key.hash(&mut hasher);
73                hasher.finish()
74            }
75        }
76    }
77
78    /// Get the fingerprint size in bits
79    #[must_use]
80    pub const fn fingerprint_bits(&self) -> u32 {
81        self.fingerprint_bits
82    }
83
84    /// Get the fingerprint mask
85    #[must_use]
86    pub const fn fingerprint_mask(&self) -> u64 {
87        self.fingerprint_mask
88    }
89
90    /// Calculate the optimal fingerprint size for a given false-positive rate
91    #[must_use]
92    pub fn optimal_fingerprint_size(false_positive_rate: f64) -> u32 {
93        if false_positive_rate <= 0.0 || false_positive_rate >= 1.0 {
94            return 8; // Default to 8 bits
95        }
96
97        // From the paper: fingerprint size = ⌈log₂(1/ε)⌉ + 3
98        let bits = (-false_positive_rate.log2()).ceil() as u32 + 3;
99        bits.min(64).max(4) // Clamp between 4 and 64 bits
100    }
101}
102
103/// Perfect hash function for mapping fingerprints to slots
104#[derive(Debug, Clone)]
105pub struct PerfectHash {
106    /// Number of slots available
107    num_slots: usize,
108    /// Hash function for slot mapping
109    slot_hasher: FingerprintHasher,
110}
111
112impl PerfectHash {
113    /// Create a new perfect hash function
114    #[must_use]
115    pub fn new(num_slots: usize, hash_fn: HashFunction) -> Self {
116        // Use a different hash function for slot mapping to avoid correlation
117        let slot_hash_fn = match hash_fn {
118            HashFunction::AHash => HashFunction::TwoX,
119            HashFunction::TwoX => HashFunction::Fnv,
120            HashFunction::Fnv => HashFunction::AHash,
121        };
122
123        Self {
124            num_slots,
125            slot_hasher: FingerprintHasher::new(slot_hash_fn, 32), // 32 bits for slot hashing
126        }
127    }
128
129    /// Map a fingerprint to a slot index
130    #[must_use]
131    pub fn slot_index(&self, fingerprint: u64) -> usize {
132        let hash = self.slot_hasher.hash_key(&fingerprint);
133        (hash as usize) % self.num_slots
134    }
135
136    /// Get the number of slots
137    #[must_use]
138    pub const fn num_slots(&self) -> usize {
139        self.num_slots
140    }
141}
142
143/// Collision detector for tracking hash collisions
144#[derive(Debug, Clone)]
145pub struct CollisionDetector {
146    /// Maximum number of collisions to track
147    max_collisions: usize,
148    /// Current collision count
149    collision_count: usize,
150    /// Collision threshold for warnings
151    warning_threshold: usize,
152}
153
154impl CollisionDetector {
155    /// Create a new collision detector
156    #[must_use]
157    pub const fn new(max_collisions: usize) -> Self {
158        Self {
159            max_collisions,
160            collision_count: 0,
161            warning_threshold: max_collisions / 2,
162        }
163    }
164
165    /// Record a collision
166    pub fn record_collision(&mut self) -> MapletResult<()> {
167        self.collision_count += 1;
168
169        if self.collision_count > self.max_collisions {
170            return Err(MapletError::CollisionLimitExceeded);
171        }
172
173        if self.collision_count > self.warning_threshold {
174            tracing::warn!(
175                "High collision count: {} (threshold: {})",
176                self.collision_count,
177                self.warning_threshold
178            );
179        }
180
181        Ok(())
182    }
183
184    /// Get the current collision count
185    #[must_use]
186    pub const fn collision_count(&self) -> usize {
187        self.collision_count
188    }
189
190    /// Reset the collision count
191    pub const fn reset(&mut self) {
192        self.collision_count = 0;
193    }
194
195    /// Check if we're approaching the collision limit
196    #[must_use]
197    pub const fn is_approaching_limit(&self) -> bool {
198        self.collision_count > self.warning_threshold
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_fingerprint_hasher() {
208        let hasher = FingerprintHasher::new(HashFunction::AHash, 8);
209        let key = "test_key";
210
211        let fingerprint = hasher.fingerprint(&key);
212        assert!(fingerprint < (1u64 << 8));
213
214        // Same key should produce same fingerprint
215        let fingerprint2 = hasher.fingerprint(&key);
216        assert_eq!(fingerprint, fingerprint2);
217    }
218
219    #[test]
220    fn test_perfect_hash() {
221        let perfect_hash = PerfectHash::new(100, HashFunction::AHash);
222        let fingerprint = 12345u64;
223
224        let slot = perfect_hash.slot_index(fingerprint);
225        assert!(slot < 100);
226
227        // Same fingerprint should map to same slot
228        let slot2 = perfect_hash.slot_index(fingerprint);
229        assert_eq!(slot, slot2);
230    }
231
232    #[test]
233    fn test_collision_detector() {
234        let mut detector = CollisionDetector::new(10);
235
236        // Record some collisions
237        for _ in 0..5 {
238            assert!(detector.record_collision().is_ok());
239        }
240
241        assert_eq!(detector.collision_count(), 5);
242        assert!(!detector.is_approaching_limit());
243
244        // Record more collisions to trigger warning
245        for _ in 0..5 {
246            assert!(detector.record_collision().is_ok());
247        }
248
249        assert_eq!(detector.collision_count(), 10);
250        assert!(detector.is_approaching_limit());
251
252        // Try to record one more collision - should fail
253        assert!(detector.record_collision().is_err());
254    }
255
256    #[test]
257    fn test_optimal_fingerprint_size() {
258        assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.01), 10); // log2(100) + 3 = 10
259        assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.001), 13); // log2(1000) + 3 = 13
260        assert_eq!(FingerprintHasher::optimal_fingerprint_size(0.1), 7); // log2(10) + 3 = 7
261    }
262}