simd_lookup/
entropy_map_lookup.rs

1//! Entropy-map based lookup tables using Perfect Hash Functions (PHFs)
2//!
3//! This module provides wrappers around entropy-map's MapWithDict and MapWithDictBitpacked
4//! for u32 -> u8 lookups, allowing comparison with traditional hash-based approaches.
5
6use entropy_map::MapWithDict;
7use std::time::Instant;
8
9/// Wrapper around entropy-map's MapWithDict for u32 -> u8 lookups
10pub struct EntropyMapLookup {
11    map: MapWithDict<u32, u8>,
12}
13
14impl EntropyMapLookup {
15    /// Create a new entropy map from key-value pairs
16    /// Returns the map and construction time in nanoseconds
17    pub fn new(entries: &[(u32, u8)]) -> (Self, u64) {
18        let start = Instant::now();
19
20        // Convert to the format expected by entropy-map
21        let map_entries: Vec<(u32, u8)> = entries.iter().copied().collect();
22
23        let map = MapWithDict::from_iter_with_params(map_entries, 2.0)
24            .map_err(|e| format!("Failed to create MapWithDict: {:?}", e))
25            .unwrap();
26        let construction_time = start.elapsed().as_nanos() as u64;
27
28        (Self { map }, construction_time)
29    }
30
31    /// Lookup a single value, returns 0 if key not found
32    #[inline]
33    pub fn lookup(&self, key: u32) -> u8 {
34        self.map.get(&key).copied().unwrap_or(0)
35    }
36
37    /// Lookup multiple values at once
38    pub fn lookup_batch(&self, keys: &[u32], results: &mut [u8]) {
39        assert_eq!(keys.len(), results.len());
40        for (i, &key) in keys.iter().enumerate() {
41            results[i] = self.lookup(key);
42        }
43    }
44
45    /// Get memory usage information
46    pub fn memory_usage(&self) -> usize {
47        // This is an approximation - entropy-map doesn't expose exact memory usage
48        // We estimate based on the typical PHF overhead
49        std::mem::size_of_val(&self.map)
50    }
51}
52
53/// Wrapper around entropy-map's MapWithDictBitpacked for u32 -> u8 lookups
54/// This version uses bitpacking for even more compact storage
55/// Note: For now, we'll use a simpler approach and just use MapWithDict with a different name
56/// TODO: Implement proper MapWithDictBitpacked support when API is clearer
57pub struct EntropyMapBitpackedLookup {
58    map: MapWithDict<u32, u8>,
59}
60
61impl EntropyMapBitpackedLookup {
62    /// Create a new bitpacked entropy map from key-value pairs
63    /// Returns the map and construction time in nanoseconds
64    pub fn new(entries: &[(u32, u8)]) -> (Self, u64) {
65        let start = Instant::now();
66
67        // Convert to the format expected by entropy-map
68        let map_entries: Vec<(u32, u8)> = entries.iter().copied().collect();
69
70        let map = MapWithDict::from_iter_with_params(map_entries, 2.0)
71            .map_err(|e| format!("Failed to create MapWithDict: {:?}", e))
72            .unwrap();
73        let construction_time = start.elapsed().as_nanos() as u64;
74
75        (Self { map }, construction_time)
76    }
77
78    /// Lookup a single value, returns 0 if key not found
79    #[inline]
80    pub fn lookup(&self, key: u32) -> u8 {
81        self.map.get(&key).copied().unwrap_or(0)
82    }
83
84    /// Lookup multiple values at once
85    pub fn lookup_batch(&self, keys: &[u32], results: &mut [u8]) {
86        assert_eq!(keys.len(), results.len());
87        for (i, &key) in keys.iter().enumerate() {
88            results[i] = self.lookup(key);
89        }
90    }
91
92    /// Get memory usage information
93    pub fn memory_usage(&self) -> usize {
94        // This is an approximation - entropy-map doesn't expose exact memory usage
95        // Bitpacked version should be more compact than regular version
96        std::mem::size_of_val(&self.map)
97    }
98}
99
100/// Helper function to create sparse test data with specified density
101pub fn create_sparse_entries_for_entropy(size: usize, density_percent: f32) -> Vec<(u32, u8)> {
102    let num_entries = ((size as f32) * (density_percent / 100.0)) as usize;
103    let mut entries = Vec::with_capacity(num_entries);
104
105    // Create sparse entries distributed across the range
106    let step = size / num_entries.max(1);
107    for i in 0..num_entries {
108        let key = (i * step) as u32;
109        let value = ((key % 255) + 1) as u8; // Values 1-255, avoiding 0 which is default
110        entries.push((key, value));
111    }
112
113    entries
114}
115
116/// Benchmark helper to measure construction time for different sizes
117pub fn benchmark_construction_time(sizes: &[usize], density: f32) -> Vec<(usize, u64, u64)> {
118    let mut results = Vec::new();
119
120    for &size in sizes {
121        let entries = create_sparse_entries_for_entropy(size, density);
122
123        // Benchmark MapWithDict
124        let (_, dict_time) = EntropyMapLookup::new(&entries);
125
126        // Benchmark MapWithDictBitpacked
127        let (_, bitpacked_time) = EntropyMapBitpackedLookup::new(&entries);
128
129        results.push((size, dict_time, bitpacked_time));
130
131        println!(
132            "Size: {}, Dict: {}ns, Bitpacked: {}ns",
133            size, dict_time, bitpacked_time
134        );
135    }
136
137    results
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    fn create_test_entries() -> Vec<(u32, u8)> {
145        vec![
146            (0, 100),
147            (5, 105),
148            (10, 110),
149            (100, 200),
150            (1000, 255),
151            (10000, 42),
152        ]
153    }
154
155    #[test]
156    fn test_entropy_map_lookup() {
157        let entries = create_test_entries();
158        let (lookup, _) = EntropyMapLookup::new(&entries);
159
160        assert_eq!(lookup.lookup(0), 100);
161        assert_eq!(lookup.lookup(5), 105);
162        assert_eq!(lookup.lookup(10), 110);
163        assert_eq!(lookup.lookup(1), 0); // Not found, returns 0
164        assert_eq!(lookup.lookup(20000), 0); // Not found, returns 0
165    }
166
167    #[test]
168    fn test_entropy_map_bitpacked_lookup() {
169        let entries = create_test_entries();
170        let (lookup, _) = EntropyMapBitpackedLookup::new(&entries);
171
172        assert_eq!(lookup.lookup(0), 100);
173        assert_eq!(lookup.lookup(5), 105);
174        assert_eq!(lookup.lookup(10), 110);
175        assert_eq!(lookup.lookup(1), 0); // Not found, returns 0
176        assert_eq!(lookup.lookup(20000), 0); // Not found, returns 0
177    }
178
179    #[test]
180    fn test_batch_lookup() {
181        let entries = create_test_entries();
182        let (dict_lookup, _) = EntropyMapLookup::new(&entries);
183        let (bitpacked_lookup, _) = EntropyMapBitpackedLookup::new(&entries);
184
185        let keys = vec![0, 1, 5, 10, 15, 100, 1000, 20000];
186        let expected = vec![100, 0, 105, 110, 0, 200, 255, 0];
187
188        let mut results = vec![0u8; keys.len()];
189
190        // Test dict batch lookup
191        dict_lookup.lookup_batch(&keys, &mut results);
192        assert_eq!(results, expected);
193
194        // Test bitpacked batch lookup
195        results.fill(0);
196        bitpacked_lookup.lookup_batch(&keys, &mut results);
197        assert_eq!(results, expected);
198    }
199
200    #[test]
201    fn test_construction_time_measurement() {
202        let entries = create_test_entries();
203
204        let (_, dict_time) = EntropyMapLookup::new(&entries);
205        let (_, bitpacked_time) = EntropyMapBitpackedLookup::new(&entries);
206
207        // Construction should take some measurable time
208        assert!(dict_time > 0);
209        assert!(bitpacked_time > 0);
210
211        println!("Dict construction: {}ns", dict_time);
212        println!("Bitpacked construction: {}ns", bitpacked_time);
213    }
214}