hexz_core/algo/dedup/hash_table/
mod.rs1#[derive(Debug, Clone, Default)]
32pub struct TableStats {
33 pub total_inserts: u64,
35
36 pub total_lookups: u64,
38
39 pub total_probes: u64,
41
42 pub max_probe_length: u32,
44
45 pub level_usage: Vec<usize>,
47}
48
49impl TableStats {
50 pub fn avg_probes_per_op(&self) -> f64 {
52 let total_ops = self.total_inserts + self.total_lookups;
53 if total_ops == 0 {
54 0.0
55 } else {
56 self.total_probes as f64 / total_ops as f64
57 }
58 }
59}
60
61struct IdentityHasher(u64);
66
67impl std::hash::Hasher for IdentityHasher {
68 #[inline]
69 fn write(&mut self, bytes: &[u8]) {
70 if bytes.len() >= 8 {
71 let Ok(arr) = <[u8; 8]>::try_from(&bytes[0..8]) else {
74 unreachable!("length checked above")
75 };
76 self.0 = u64::from_le_bytes(arr);
77 } else {
78 self.0 = bytes
79 .iter()
80 .fold(0u64, |acc, &b| acc.wrapping_mul(256).wrapping_add(b as u64));
81 }
82 }
83
84 #[inline]
85 fn finish(&self) -> u64 {
86 self.0
87 }
88}
89
90#[derive(Clone, Default)]
91struct BuildIdentityHasher;
92
93impl std::hash::BuildHasher for BuildIdentityHasher {
94 type Hasher = IdentityHasher;
95
96 #[inline]
97 fn build_hasher(&self) -> IdentityHasher {
98 IdentityHasher(0)
99 }
100}
101
102pub struct StandardHashTable {
107 map: std::collections::HashMap<[u8; 32], u64, BuildIdentityHasher>,
108 total_inserts: u64,
109 total_lookups: std::cell::Cell<u64>,
110 total_probes: std::cell::Cell<u64>,
111}
112
113impl std::fmt::Debug for StandardHashTable {
114 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115 f.debug_struct("StandardHashTable")
116 .field("len", &self.map.len())
117 .field("capacity", &self.map.capacity())
118 .field("total_inserts", &self.total_inserts)
119 .finish_non_exhaustive()
120 }
121}
122
123impl StandardHashTable {
124 pub const fn new() -> Self {
126 Self {
127 map: std::collections::HashMap::with_hasher(BuildIdentityHasher),
128 total_inserts: 0,
129 total_lookups: std::cell::Cell::new(0),
130 total_probes: std::cell::Cell::new(0),
131 }
132 }
133
134 pub fn with_capacity(capacity: usize) -> Self {
136 Self {
137 map: std::collections::HashMap::with_capacity_and_hasher(capacity, BuildIdentityHasher),
138 total_inserts: 0,
139 total_lookups: std::cell::Cell::new(0),
140 total_probes: std::cell::Cell::new(0),
141 }
142 }
143
144 pub fn len(&self) -> usize {
146 self.map.len()
147 }
148
149 pub fn is_empty(&self) -> bool {
151 self.map.is_empty()
152 }
153
154 pub fn insert(&mut self, hash: [u8; 32], offset: u64) -> Option<u64> {
156 self.total_inserts += 1;
157 self.map.insert(hash, offset)
158 }
159
160 pub fn get(&self, hash: &[u8; 32]) -> Option<u64> {
162 self.total_lookups.set(self.total_lookups.get() + 1);
163 self.total_probes.set(self.total_probes.get() + 1);
164 self.map.get(hash).copied()
165 }
166
167 pub fn load_factor(&self) -> f64 {
169 let cap = self.map.capacity();
170 if cap == 0 {
171 0.0
172 } else {
173 self.map.len() as f64 / cap as f64
174 }
175 }
176
177 pub fn memory_bytes(&self) -> usize {
179 self.map.capacity() * 56
180 }
181
182 pub fn stats(&self) -> TableStats {
184 TableStats {
185 total_inserts: self.total_inserts,
186 total_lookups: self.total_lookups.get(),
187 total_probes: self.total_probes.get(),
188 max_probe_length: 1,
189 level_usage: Vec::new(),
190 }
191 }
192}
193
194impl Default for StandardHashTable {
195 fn default() -> Self {
196 Self::new()
197 }
198}
199
200unsafe impl Sync for StandardHashTable {}