hexz_core/algo/dedup/hash_table/
mod.rs1#[derive(Debug, Clone, Default)]
32pub struct TableStats {
33 pub total_inserts: u64,
35
36 pub total_lookups: u64,
38
39 pub total_probes: u64,
41
42 pub max_probe_length: u32,
44
45 pub level_usage: Vec<usize>,
47}
48
49impl TableStats {
50 pub fn avg_probes_per_op(&self) -> f64 {
52 let total_ops = self.total_inserts + self.total_lookups;
53 if total_ops == 0 {
54 0.0
55 } else {
56 self.total_probes as f64 / total_ops as f64
57 }
58 }
59}
60
61struct IdentityHasher(u64);
66
67impl std::hash::Hasher for IdentityHasher {
68 #[inline]
69 fn write(&mut self, bytes: &[u8]) {
70 if bytes.len() >= 8 {
71 self.0 = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
72 } else {
73 self.0 = bytes
74 .iter()
75 .fold(0u64, |acc, &b| acc.wrapping_mul(256).wrapping_add(b as u64));
76 }
77 }
78
79 #[inline]
80 fn finish(&self) -> u64 {
81 self.0
82 }
83}
84
85#[derive(Clone, Default)]
86struct BuildIdentityHasher;
87
88impl std::hash::BuildHasher for BuildIdentityHasher {
89 type Hasher = IdentityHasher;
90
91 #[inline]
92 fn build_hasher(&self) -> IdentityHasher {
93 IdentityHasher(0)
94 }
95}
96
97pub struct StandardHashTable {
102 map: std::collections::HashMap<[u8; 32], u64, BuildIdentityHasher>,
103 total_inserts: u64,
104 total_lookups: std::cell::Cell<u64>,
105 total_probes: std::cell::Cell<u64>,
106}
107
108impl StandardHashTable {
109 pub fn new() -> Self {
110 Self {
111 map: std::collections::HashMap::with_hasher(BuildIdentityHasher),
112 total_inserts: 0,
113 total_lookups: std::cell::Cell::new(0),
114 total_probes: std::cell::Cell::new(0),
115 }
116 }
117
118 pub fn with_capacity(capacity: usize) -> Self {
119 Self {
120 map: std::collections::HashMap::with_capacity_and_hasher(capacity, BuildIdentityHasher),
121 total_inserts: 0,
122 total_lookups: std::cell::Cell::new(0),
123 total_probes: std::cell::Cell::new(0),
124 }
125 }
126
127 pub fn len(&self) -> usize {
128 self.map.len()
129 }
130
131 pub fn is_empty(&self) -> bool {
132 self.map.is_empty()
133 }
134
135 pub fn insert(&mut self, hash: [u8; 32], offset: u64) -> Option<u64> {
136 self.total_inserts += 1;
137 self.map.insert(hash, offset)
138 }
139
140 pub fn get(&self, hash: &[u8; 32]) -> Option<u64> {
141 self.total_lookups.set(self.total_lookups.get() + 1);
142 self.total_probes.set(self.total_probes.get() + 1);
143 self.map.get(hash).copied()
144 }
145
146 pub fn load_factor(&self) -> f64 {
147 let cap = self.map.capacity();
148 if cap == 0 {
149 0.0
150 } else {
151 self.map.len() as f64 / cap as f64
152 }
153 }
154
155 pub fn memory_bytes(&self) -> usize {
156 self.map.capacity() * 56
157 }
158
159 pub fn stats(&self) -> TableStats {
160 TableStats {
161 total_inserts: self.total_inserts,
162 total_lookups: self.total_lookups.get(),
163 total_probes: self.total_probes.get(),
164 max_probe_length: 1,
165 level_usage: Vec::new(),
166 }
167 }
168}
169
170impl Default for StandardHashTable {
171 fn default() -> Self {
172 Self::new()
173 }
174}
175
176unsafe impl Sync for StandardHashTable {}