summavy_stacker/
arena_hashmap.rs1use std::{iter, mem, slice};
2
3use byteorder::{ByteOrder, NativeEndian};
4use murmurhash32::murmurhash2;
5
6use super::{Addr, MemoryArena};
7use crate::memory_arena::store;
8use crate::UnorderedId;
9
10pub fn compute_table_size(capacity: usize) -> usize {
14 capacity * mem::size_of::<KeyValue>()
15}
16
17#[derive(Copy, Clone)]
21struct KeyValue {
22 key_value_addr: Addr,
23 hash: u32,
24 unordered_id: UnorderedId,
25}
26
27impl Default for KeyValue {
28 fn default() -> Self {
29 KeyValue {
30 key_value_addr: Addr::null_pointer(),
31 hash: 0u32,
32 unordered_id: UnorderedId::default(),
33 }
34 }
35}
36
37impl KeyValue {
38 fn is_empty(self) -> bool {
39 self.key_value_addr.is_null()
40 }
41}
42
43pub struct ArenaHashMap {
53 table: Box<[KeyValue]>,
54 memory_arena: MemoryArena,
55 mask: usize,
56 occupied: Vec<usize>,
57 len: usize,
58}
59
60struct QuadraticProbing {
61 hash: usize,
62 i: usize,
63 mask: usize,
64}
65
66impl QuadraticProbing {
67 #[inline]
68 fn compute(hash: usize, mask: usize) -> QuadraticProbing {
69 QuadraticProbing { hash, i: 0, mask }
70 }
71
72 #[inline]
73 fn next_probe(&mut self) -> usize {
74 self.i += 1;
75 (self.hash + self.i) & self.mask
76 }
77}
78
79pub struct Iter<'a> {
80 hashmap: &'a ArenaHashMap,
81 inner: slice::Iter<'a, usize>,
82}
83
84impl<'a> Iterator for Iter<'a> {
85 type Item = (&'a [u8], Addr, UnorderedId);
86
87 fn next(&mut self) -> Option<Self::Item> {
88 self.inner.next().cloned().map(move |bucket: usize| {
89 let kv = self.hashmap.table[bucket];
90 let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
91 (key, offset, kv.unordered_id)
92 })
93 }
94}
95
96fn compute_previous_power_of_two(n: usize) -> usize {
101 assert!(n > 0);
102 let msb = (63u32 - (n as u64).leading_zeros()) as u8;
103 1 << msb
104}
105
106impl ArenaHashMap {
107 pub fn new(table_size: usize) -> ArenaHashMap {
108 assert!(table_size > 0);
109 let table_size_power_of_2 = compute_previous_power_of_two(table_size);
110 let memory_arena = MemoryArena::default();
111 let table: Vec<KeyValue> = iter::repeat(KeyValue::default())
112 .take(table_size_power_of_2)
113 .collect();
114 ArenaHashMap {
115 table: table.into_boxed_slice(),
116 memory_arena,
117 mask: table_size_power_of_2 - 1,
118 occupied: Vec::with_capacity(table_size_power_of_2 / 2),
119 len: 0,
120 }
121 }
122
123 #[inline]
124 pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
125 self.memory_arena.read(addr)
126 }
127
128 #[inline]
129 fn probe(&self, hash: u32) -> QuadraticProbing {
130 QuadraticProbing::compute(hash as usize, self.mask)
131 }
132
133 #[inline]
134 pub fn mem_usage(&self) -> usize {
135 self.table.len() * mem::size_of::<KeyValue>()
136 }
137
138 #[inline]
139 fn is_saturated(&self) -> bool {
140 self.table.len() < self.occupied.len() * 3
141 }
142
143 #[inline]
144 fn get_key_value(&self, addr: Addr) -> (&[u8], Addr) {
145 let data = self.memory_arena.slice_from(addr);
146 let key_bytes_len = NativeEndian::read_u16(data) as usize;
147 let key_bytes: &[u8] = &data[2..][..key_bytes_len];
148 (key_bytes, addr.offset(2u32 + key_bytes_len as u32))
149 }
150
151 #[inline]
152 fn get_value_addr_if_key_match(&self, target_key: &[u8], addr: Addr) -> Option<Addr> {
153 let (stored_key, value_addr) = self.get_key_value(addr);
154 if stored_key == target_key {
155 Some(value_addr)
156 } else {
157 None
158 }
159 }
160
161 #[inline]
162 fn set_bucket(&mut self, hash: u32, key_value_addr: Addr, bucket: usize) -> UnorderedId {
163 self.occupied.push(bucket);
164 let unordered_id = self.len as UnorderedId;
165 self.len += 1;
166 self.table[bucket] = KeyValue {
167 key_value_addr,
168 hash,
169 unordered_id,
170 };
171 unordered_id
172 }
173
174 #[inline]
175 pub fn is_empty(&self) -> bool {
176 self.len() == 0
177 }
178
179 #[inline]
180 pub fn len(&self) -> usize {
181 self.len
182 }
183
184 #[inline]
185 pub fn iter(&self) -> Iter<'_> {
186 Iter {
187 inner: self.occupied.iter(),
188 hashmap: self,
189 }
190 }
191
192 fn resize(&mut self) {
193 let new_len = self.table.len() * 2;
194 let mask = new_len - 1;
195 self.mask = mask;
196 let new_table = vec![KeyValue::default(); new_len].into_boxed_slice();
197 let old_table = mem::replace(&mut self.table, new_table);
198 for old_pos in self.occupied.iter_mut() {
199 let key_value: KeyValue = old_table[*old_pos];
200 let mut probe = QuadraticProbing::compute(key_value.hash as usize, mask);
201 loop {
202 let bucket = probe.next_probe();
203 if self.table[bucket].is_empty() {
204 *old_pos = bucket;
205 self.table[bucket] = key_value;
206 break;
207 }
208 }
209 }
210 }
211
212 pub fn mutate_or_create<V, TMutator>(
223 &mut self,
224 key: &[u8],
225 mut updater: TMutator,
226 ) -> UnorderedId
227 where
228 V: Copy + 'static,
229 TMutator: FnMut(Option<V>) -> V,
230 {
231 if self.is_saturated() {
232 self.resize();
233 }
234 let hash = murmurhash2(key);
235 let mut probe = self.probe(hash);
236 loop {
237 let bucket = probe.next_probe();
238 let kv: KeyValue = self.table[bucket];
239 if kv.is_empty() {
240 let val = updater(None);
242 let num_bytes = std::mem::size_of::<u16>() + key.len() + std::mem::size_of::<V>();
243 let key_addr = self.memory_arena.allocate_space(num_bytes);
244 {
245 let data = self.memory_arena.slice_mut(key_addr, num_bytes);
246 NativeEndian::write_u16(data, key.len() as u16);
247 let stop = 2 + key.len();
248 data[2..stop].copy_from_slice(key);
249 store(&mut data[stop..], val);
250 }
251 return self.set_bucket(hash, key_addr, bucket);
252 } else if kv.hash == hash {
253 if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
254 let v = self.memory_arena.read(val_addr);
255 let new_v = updater(Some(v));
256 self.memory_arena.write_at(val_addr, new_v);
257 return kv.unordered_id;
258 }
259 }
260 }
261 }
262}
263
264#[cfg(test)]
265mod tests {
266
267 use std::collections::HashMap;
268
269 use super::{compute_previous_power_of_two, ArenaHashMap};
270
271 #[test]
272 fn test_hash_map() {
273 let mut hash_map: ArenaHashMap = ArenaHashMap::new(1 << 18);
274 hash_map.mutate_or_create(b"abc", |opt_val: Option<u32>| {
275 assert_eq!(opt_val, None);
276 3u32
277 });
278 hash_map.mutate_or_create(b"abcd", |opt_val: Option<u32>| {
279 assert_eq!(opt_val, None);
280 4u32
281 });
282 hash_map.mutate_or_create(b"abc", |opt_val: Option<u32>| {
283 assert_eq!(opt_val, Some(3u32));
284 5u32
285 });
286 let mut vanilla_hash_map = HashMap::new();
287 let iter_values = hash_map.iter();
288 for (key, addr, _) in iter_values {
289 let val: u32 = hash_map.memory_arena.read(addr);
290 vanilla_hash_map.insert(key.to_owned(), val);
291 }
292 assert_eq!(vanilla_hash_map.len(), 2);
293 }
294
295 #[test]
296 fn test_compute_previous_power_of_two() {
297 assert_eq!(compute_previous_power_of_two(8), 8);
298 assert_eq!(compute_previous_power_of_two(9), 8);
299 assert_eq!(compute_previous_power_of_two(7), 4);
300 assert_eq!(compute_previous_power_of_two(u64::MAX as usize), 1 << 63);
301 }
302}