bpht/lib.rs
1//! BPHT -- A bitpacked hopscotch hash table
2//!
3//! Computing address and remainder (fingerprint; fp) from keys:
4//!
5//! key: 32-bit
6//! | 32 - fp_len address-bits | floor(log2(|ht|)) fingerprint-bits |
7//! | power of 2 of the address space | |
8//!
9//! Bitpacking hash table entries:
10//!
11//! | 32 bit payload | ~24 bit fingerprint |~ 8 hop bits|
12//! |pppppppppppppppppppppppppppppppp|ffffffffffffffffffffffff...<|>...hhhhhhhh|
13//!
14//! Hop bits: 0 means free, 1 means filled
15//! Hop bits are read from left to right
16//! 1011 means, this position is filled (___1),
17//! as is the next (__1_) and the one with offset 3 (1___)
18//!
19//! The number of fingerprint bits depends on the size of the hash table.
20//!
21mod tests;
22
23use bincode::{deserialize_from, serialize_into};
24use serde::{Deserialize, Serialize};
25use std::fs::File;
26use std::io::{BufReader, BufWriter};
27
28#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
29pub struct BPHT {
30 h: usize,
31 u: usize,
32 table: Vec<u64>,
33 hop_bits_mask: u64,
34 fingerprint_mask: u64,
35 fingerprint_shift: usize,
36 admin_bits: u64,
37 fp_bits_in_key: usize,
38 key_fp_mask: u32,
39 in_resize: bool,
40 allow_resize: bool,
41}
42
43impl BPHT {
44 /// Create a new hopscotch hash table using
45 /// a page size of h and u + h - 1 slots in total.
46 /// the last h-1 slots are used to keep overflowing entries
47 /// from the last position without wraparound
48 ///
49 /// Note that log_2(u) > h is required, to pack all bits used for
50 /// administration into 32 bits.
51 pub fn new(h: usize, u: usize, allow_resize: bool) -> Result<Self, &'static str> {
52 // Make sure that we can work with 32-bit values here.
53 // Not sure if this is helpful as it restricts the HT size.
54 //
55 assert!(u <= 2_u64.pow(32) as usize);
56 // assure that u is a power of 2
57 if u.count_ones() != 1 {
58 return Err("The parameter u is not a power of 2");
59 }
60
61 // Get the number of fingerprint bits required for a hash table
62 // of size u
63 let required_fp_bits = BPHT::fp_length_for(u as u32);
64
65 // make sure that the sum of fingerprint bits and hop bits
66 // does not exceed the 32 bits allocated for them.
67 // To circumvent this, either use less hop bits or a larger
68 // hash table, i.e. a larger u
69 //
70 // NOTE: The number of hop bits could be automatically calculated.
71 // however, 8 is a reasonable value, since it is equal to a 64 byte
72 // cache line
73 let total_admin_bits = required_fp_bits as usize + h;
74 if total_admin_bits > 32 {
75 return Err("Total sum of admin bits is >32");
76 }
77
78 let key_fp_mask = 2_u32.pow(required_fp_bits as u32) - 1;
79 // To reach the fp bits, we need to shift out hop bits
80 let fingerprint_shift = h;
81 // The remaining 32 bits not taken up by hop bits store fingerprint info
82 // these might not all contain fingerprint bits in the current setup
83 // depending on the size of u
84 let entry_fp_bits = 32 - h;
85 // a contiguous mask of fp_bits 1-bits, shifted to the right position
86 let fingerprint_mask = (2_u64.pow(entry_fp_bits as u32) - 1) << fingerprint_shift;
87 // Create the actual hash table
88 Ok(BPHT {
89 h,
90 u,
91 table: vec![0_u64; u + h - 1], // u hash values, plus h-1 shifting positions for the last hv
92 hop_bits_mask: 2_u64.pow(h as u32) - 1,
93 fingerprint_mask,
94 fingerprint_shift,
95 admin_bits: 2_u64.pow(32) - 1,
96 fp_bits_in_key: required_fp_bits,
97 key_fp_mask,
98 in_resize: false,
99 allow_resize,
100 })
101 }
102
103 /// Load a serialized BPHT from file.
104 pub fn load(path: &str) -> BPHT {
105 let loaded_hht: BPHT;
106 {
107 let mut f = BufReader::new(
108 File::open(path)
109 .unwrap_or_else(|_| panic!("Opening the file {} did not work", path)),
110 );
111 loaded_hht = deserialize_from(&mut f).unwrap_or_else(|_| {
112 panic!("Deserializing the BPHT from file {} did not work", path)
113 });
114 }
115 loaded_hht
116 }
117
118 /// Serialize a BPHT to file
119 pub fn save(&mut self, path: &str) {
120 eprintln!("Saving BPHT to {}", path);
121 let mut f = BufWriter::new(File::create(path).unwrap_or_else(|_| {
122 panic!(
123 "Opening file to BPHT at {} did not work. Check that the path exists.",
124 path
125 )
126 }));
127 serialize_into(&mut f, self).expect("Serializing the BPHT did not work.");
128 }
129
130 /// Return the hopscotch neighborhood size H.
131 pub fn get_h(&self) -> usize {
132 self.h
133 }
134
135 /// Get address space size.
136 pub fn get_size(&self) -> usize {
137 self.u
138 }
139
140 /// Compute fill rate.
141 pub fn fill_rate(&self) -> f64 {
142 let mut nonzero = 0_usize;
143 for (_addr, value) in self.table.iter().enumerate() {
144 if (value & (!self.hop_bits_mask)) != 0 {
145 nonzero += 1;
146 }
147 }
148 nonzero as f64 / (self.table.len() as f64)
149 }
150
151 /// Compute the fingerprint length for a given size u
152 fn fp_length_for(u: u32) -> usize {
153 (2_u64.pow(32) as f64 / f64::from(u)).log2().floor() as usize
154 }
155
156 /// Split a key into HT address (high bits) and remainder (low bits).
157 #[inline]
158 pub fn split_key(&self, key: u32) -> (usize, u32) {
159 // Split into: | address | fp |
160 let fp = key & self.key_fp_mask;
161 let (address, _) = key.overflowing_shr(self.fp_bits_in_key as u32);
162
163 (address as usize, fp as u32)
164 }
165
166 /// Restore a key from address and fingerprint using the
167 /// current hash table parameters.
168 fn _restore_key(&self, address: usize, fp: u32) -> u32 {
169 (address << self.fp_bits_in_key) as u32 | (fp & self.key_fp_mask)
170 }
171
172 /// Restore a key from address and fingerprint using the
173 /// provided hash table parameters.
174 fn restore_key_with(address: usize, fp: u32, fp_bits_in_key: usize, key_fp_mask: u32) -> u32 {
175 (address << fp_bits_in_key) as u32 | (fp & key_fp_mask)
176 }
177
178 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
179 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
180 // Hop bit alteration
181 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
182 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
183
184 /// Retrieve the hop bits for the given address
185 fn get_hop_bits(&self, address: usize) -> u64 {
186 self.table[address] & self.hop_bits_mask
187 }
188
189 /// Get initial hop bits for an address, taking into account the h-1
190 /// slots before the target address
191 fn initialize_insert_hop_bits(&self, address: usize) -> u64 {
192 let start = if (self.h - 1) > address {
193 // lower edge of table
194 0
195 } else {
196 // start h-1 position before the address
197 // to pass all positions that can influence
198 // the address slot
199 address - (self.h - 1)
200 };
201 // extract bits for the first
202 let mut shifting_hop_bits = self.get_hop_bits(start);
203
204 for i in start..=address {
205 shifting_hop_bits = (shifting_hop_bits >> 1) | self.get_hop_bits(i);
206 }
207 shifting_hop_bits
208 }
209
210 /// Set a specific hop bit of the address to 1
211 ///
212 /// Example: hop_bits of address 42: 10001
213 /// set_hop_bit_in_table(42, 1)
214 /// new hop bits of address 42: 10011
215 #[inline]
216 fn set_hop_bit_in_table(&mut self, address: usize, offset: usize) {
217 self.table[address] |= 0b_1 << offset;
218 }
219
220 /// Replace the hop bits of the address with the given hop_bits vector
221 #[inline]
222 fn replace_hop_bits(&mut self, address: usize, hop_bits: u64) {
223 self.table[address] = (self.table[address] & (!self.hop_bits_mask)) | hop_bits;
224 }
225
226 /// For a given (u64-encoded) hop bit vector, set a specific position to 0
227 #[inline]
228 pub fn unset_hop_bit(&self, hop_bits: u64, pos: usize) -> u64 {
229 let inverted_mask = self.hop_bits_mask ^ (0b_1 << pos);
230 hop_bits & inverted_mask
231 }
232
233 /// For a given hop bit vector, set a specific position to 1
234 #[inline]
235 pub fn set_hop_bit(&self, hop_bits: u64, pos: usize) -> u64 {
236 hop_bits | (0b_1 << pos)
237 }
238
239 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
240 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
241 // Packing, unpacking, and value alteration
242 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
243 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
244
245 /// Pack a value, fingerprint and hop bits into one 64-bit integer.
246 /// The amount of hop bits and fingerprint bits used depends on the size of
247 /// the hash table. Some bits might remain 'empty'.
248 fn pack(&self, value: u32, fp: u32, hop_bits: u64) -> u64 {
249 (u64::from(value) << 32) | (u64::from(fp) << self.fingerprint_shift) | hop_bits
250 }
251
252 /// Change the value of an entry without changing the hop bits
253 fn repack_value(&self, value: u32, fp: u32, old_value: u64) -> u64 {
254 (u64::from(value) << 32)
255 | (u64::from(fp) << self.fingerprint_shift)
256 | (old_value & self.hop_bits_mask)
257 }
258
259 /// unpack an entry into value, fingerprint, hop_bits
260 #[inline]
261 fn unpack(&self, entry: u64) -> (u32, u32, u64) {
262 (
263 (entry >> 32) as u32,
264 ((entry & self.fingerprint_mask) >> self.fingerprint_shift) as u32,
265 entry & self.hop_bits_mask,
266 )
267 }
268
269 /// unpack an entry into value, fingerprint, hop_bits
270 fn _unpack_with(&self, entry: u64, shift: usize, mask: u64) -> (u32, u32, u64) {
271 (
272 (entry >> 32) as u32,
273 ((entry & mask) >> shift) as u32,
274 entry & self.hop_bits_mask,
275 )
276 }
277
278 /// Extract a payload value from an entry by shifting
279 /// out the 32 fingerprint and hop bits
280 #[inline]
281 fn extract_value(&self, entry: u64) -> u32 {
282 (entry >> 32) as u32
283 }
284
285 /// Store a value at a position in the table
286 /// Note this does not change the hop bits.
287 /// That is handled in the insert method.
288 #[inline]
289 fn set_value(&mut self, value: u32, fp: u32, address: usize) {
290 self.table[address] = self.repack_value(value, fp, self.table[address]);
291 }
292
293 /// For a given bit packed hash table entry, check,
294 /// if it has the same fingerprint as the fp provided.
295 #[inline]
296 fn has_fp(&self, entry: u64, fp: u64) -> bool {
297 let target_fp = (entry & self.fingerprint_mask) >> self.fingerprint_shift;
298 target_fp == fp
299 }
300
301 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
302 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
303 // Helper functions
304 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
305 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
306
307 /// Check if the given address can store into the bin with given
308 /// target_offset by evaluating the hop bits. Returns the offset
309 /// relative to address of the item that can be moved into target address.
310 ///
311 /// Example: address 42, with h=4 and hop bits 1101, is queried:
312 /// find_offset_to_replace(42, 0) -> None (offset of 0 in the hop bits is not free)
313 /// find_offset_to_replace(42, 1) -> Some(0) (offset of 1 in the hop bits is 0, offset 0 can be moved there)
314 /// find_offset_to_replace(42, 2) -> None (offset of 2 in the hop bits is not free)
315 /// find_offset_to_replace(42, 3) -> None (offset of 3 in the hop bits is not free)
316 ///
317 /// Helper function for the `free_up_positions` stage of `insert`.
318 fn find_offset_to_replace(&self, address: usize, target_offset: usize) -> Option<usize> {
319 assert!(
320 target_offset < self.h,
321 "target_offset < h\ntarget offset: {}\nself.h: {}",
322 target_offset,
323 self.h
324 );
325 let hb = self.get_hop_bits(address);
326 // make sure the target position is empty and there exists a valid shifting candidate
327 if ((hb >> target_offset) & 1 == 0) && (hb != 0) {
328 // NOTE: this does not need to take slots before into account.
329 // The target slot is guaranteed to be empty.
330
331 // start from the smallest, go to the largest
332 // to minimize the number of replacements needed by
333 // choosing the biggest possible step size
334 for i in 0..target_offset {
335 // find the 1-bit making the most way towards the insert point
336 if ((hb >> i) & 0b_1) == 1 {
337 return Some(i);
338 }
339 }
340 // no valid offset was found
341 None
342 } else {
343 // either the target position is not free or
344 // the hop bits are empty
345 None
346 }
347 }
348
349 /// Get the offsets for filled position for the given address.
350 /// Associated positions are extracted from the hop bits
351 /// and returned as a vector of offset from the address.
352 ///
353 /// Example:
354 /// hop bits: 0111 -> offsets: [0, 1, 2]
355 /// so that
356 /// for o in offsets {
357 /// table[address+offset]
358 /// }
359 /// Yields an entry associated with the address.
360 /// Note that these can be soft collisions.
361 ///
362 /// Helper function for `get` and `delete`.
363 #[inline]
364 fn occupied_positions(&self, address: usize) -> Vec<usize> {
365 let mut occupied_positions = Vec::with_capacity(self.h);
366 let positions = self.get_hop_bits(address);
367
368 let mut offset = 0;
369 loop {
370 if (positions >> offset & 1) == 1 {
371 occupied_positions.push(offset);
372 }
373
374 offset += 1;
375 if offset >= self.h {
376 return occupied_positions;
377 }
378 }
379 }
380
381 /// Shift entries towards the address within one neighbourhood
382 /// This should only be possible when deletions occurred.
383 /// It is currently not used per default since its use has not yet been evaluated.
384 pub fn compact(&mut self, address: usize) -> Option<usize> {
385 // get hop bit mask showing free (0) and filled (1) positions for the current slot
386 let mut shifting_hop_bits = self.initialize_insert_hop_bits(address);
387 let mut occupied_positions = self.occupied_positions(address);
388 // let mut target_offset = 0;
389 let mut highest_occupied = *occupied_positions.iter().max().unwrap();
390 let mut moved = 0;
391
392 let mut target_offset = 0;
393 loop {
394 if target_offset >= highest_occupied {
395 break;
396 }
397 // look for the first genuinely empty slot
398 if (shifting_hop_bits & 0b_1) == 0 {
399 // remove highest occupied, move into address + offset
400 moved += 1;
401
402 let address_from = address + highest_occupied;
403
404 let entry = self.table[address_from];
405 let (value, fp, tmp_hop_bits) = self.unpack(entry);
406
407 let hop_bits = self.get_hop_bits(address);
408
409 let mut new_hop_bits_for_mca = self.unset_hop_bit(hop_bits, highest_occupied);
410 new_hop_bits_for_mca = self.set_hop_bit(new_hop_bits_for_mca, target_offset);
411
412 self.table[address_from] = self.pack(0, 0, tmp_hop_bits);
413 self.replace_hop_bits(address, new_hop_bits_for_mca);
414 self.set_value(value, fp, address + target_offset);
415
416 occupied_positions.retain(|x| x != &highest_occupied);
417 occupied_positions.push(target_offset);
418 highest_occupied = *occupied_positions.iter().max().unwrap();
419 }
420 // the current position is full
421 // shift to look at a farther offset
422 target_offset += 1;
423 let new_addr = address + target_offset;
424 shifting_hop_bits = (shifting_hop_bits >> 1) | self.get_hop_bits(new_addr);
425 }
426 if moved > 0 {
427 Some(moved)
428 } else {
429 None
430 }
431 }
432
433 /// Create a free address for insertion by shifting items to higher
434 /// addresses in their respective pages.
435 /// In other words: Try to shift an empty slot towards the target address
436 /// so that a new key can be inserted.
437 ///
438 /// The `address` parameter is the address for the newly inserted key
439 /// `free_offset` is the distance from said address to the next free slot
440 /// in the hash table.
441 ///
442 /// Helper function for `insert`.
443 pub fn free_up_positions(
444 &mut self,
445 address: usize,
446 free_offset: usize,
447 ) -> Result<usize, &'static str> {
448 // address is the POSITION in the HT at which the new item should be inserted
449 // free_offset is the DISTANCE/ OFFSET from address to the next free slot
450 //
451 // starting from (address + free_offset) work backwards to move an empty slot
452 // into range h of address.
453 //
454 // for j = (address + free_offset), j > (address + h - 1)
455 // check positions (j - h + 1)..j
456 // if one of these contains an item that can be moved into j
457 // do it
458 //
459 // repeat until j is closer than h - 1 positions to the initial address
460 let mut j = address + free_offset;
461
462 // move backwards from the first free slot by h positions
463 // and try to find an entry that can be moved into the free slot.
464 loop {
465 // sub-loop to check H-1 slots below address + active_free_offset
466 // i.e. the addresses that proncipially can move items into the free slot.
467 let mut successfully_shifted = false;
468 for move_candidate_address in (j - (self.h - 1))..j {
469 // if current address has items that can be moved into
470 // the current free spot (which is guaranteed to be free
471 // due to previous steps of this loop or the initial free slot)
472 // move it.
473 if let Some(moveable_offset) =
474 self.find_offset_to_replace(move_candidate_address, j - move_candidate_address)
475 {
476 // move the identified offset into the current free slot (j)
477 // update the current free slot and move on
478
479 // The address from which a value can be moved to free up space
480 // Note that this is the address computed for said key
481 // plus an offset at which is was inserted relative to the initial address
482 let address_from = move_candidate_address + moveable_offset;
483
484 // Extract the entry that is moved
485 let entry = self.table[address_from];
486 let (value, fp, tmp_hop_bits) = self.unpack(entry);
487
488 // compute the new hop bits for the original address () of the moved value
489 // by removing the old offset of said entru and adding the offset to the new position
490 let hop_bits = self.get_hop_bits(move_candidate_address);
491 let address_offset_to_j = j - move_candidate_address;
492
493 // Assemble new hop bits for the move_candidate_address.
494 // These will be stored at the real address of the key that is moved
495 let mut new_hop_bits_for_mca = self.unset_hop_bit(hop_bits, moveable_offset);
496 new_hop_bits_for_mca =
497 self.set_hop_bit(new_hop_bits_for_mca, address_offset_to_j);
498
499 // Clear the slot that the item was moved out of, only adding its hop bits
500 // back in. These are not changed, unless moveable offset = 0
501 if moveable_offset == 0 {
502 // this is the case, where addres_from and movable offset are the same slot
503 assert_eq!(address_from, move_candidate_address);
504 self.table[address_from] = self.pack(0, 0, new_hop_bits_for_mca);
505 } else {
506 // add the hop bits that were present at the slot from which the key
507 // was extracted back into the table
508 self.table[address_from] = self.pack(0, 0, tmp_hop_bits);
509 self.replace_hop_bits(move_candidate_address, new_hop_bits_for_mca);
510 }
511
512 // enter the shifted value at the target position j
513 self.set_value(value, fp, j);
514
515 // make sure a new free slot is set and terminate the sub-loop
516 j = address_from;
517 successfully_shifted = true;
518 break;
519 }
520 }
521
522 // check if the sub-loop above (h-1 slots before the current free position)
523 // could shift, if not, stop here and trigger a resize in insert.
524 if !successfully_shifted || j < address {
525 return Err("No freeable slot for address. Needs a resize.");
526 }
527
528 // stop, when a freed up slot is close enough to the target address
529 if j < (address + self.h) {
530 return Ok(j - address); // the offset from the keys target address to the freed up slot
531 }
532 }
533 }
534
535 /// Double the size of the HT to accomodate more entries
536 ///
537 /// Iterate through all slots from u down to h
538 /// for each slot:
539 /// extract all entries (key-value pairs) stored at this address (up to h)
540 /// restore their keys from address and remainder
541 /// reinsert the key-value pairs using the new table parameters
542 /// for the last h slots (0 .. h-1)
543 /// extract all values into a vector
544 /// reinsert the key value pairs in the vec
545 ///
546 /// Note that the order of keys within a sequence of slots is stable.
547 /// They are extracted in a certain order and reinserted in the same order.
548 fn resize(&mut self) {
549 // NOTE: Allocate the new size. Start from the largest hash value, pull
550 // a new bit out of the FP and reenter the key. This allows to resize
551 // without allocating |old HT| + |new HT| and recomputing the hashes
552 // but do with |new HT| and only repacking.
553
554 // Proof that in place shifting works for addresses larger than h:
555 //
556 // If a given address a receives an additional (least significant) bit,
557 // the new address a' is either a' = 2a (0-bit) or a' = 2a+1 (1-bit).
558 // Unless a <= h-1, a new address can always be inserted without touching old values.
559 // Since:
560 // [new address] a' > (a-1) + h - 1 [highest non shifted entry; rightmost entry ((h-1)-th soft collision) in the bucket of (a-1)]
561 // 2a > a + h - 2
562 // a > h - 2
563 //
564 // set flag that a resize is in progress.
565 // this flag is used to prevent a call to insert made during the resize
566 // process that triggers another resize. This should not happen and is
567 // always an error.
568 self.in_resize = true;
569
570 // Update all administrative parameters, keeping a backup of the old ones
571 // needed to unpack and restore old entries.
572 let old_len = self.table.len();
573 self.table.reserve_exact((2 * self.u) + self.h - 1);
574 self.table.extend(vec![0; self.u]);
575
576 let old_fp_bits_in_key = self.fp_bits_in_key;
577 let old_key_fp_mask = self.key_fp_mask;
578
579 self.fp_bits_in_key -= 1;
580 self.key_fp_mask >>= 1;
581
582 self.u *= 2;
583
584 // NOTE this could theoretically go wrong, if during a resize, another resize is triggered.
585 // this will mess up the unpacking with old values.
586 // A solution for this would be to make the resize function recursive
587 // but there is still no way to track, in which iteration, which key was
588 // (re)inserted.
589 //
590 // However, this should not arise in the first place.
591 // Resizing cannot invalidate a table and the given table is valid
592 // before the item that triggered the resize is added.
593 // Hence, this should be a valid table that is moved to a larger space.
594 //
595 // To assert this, the variable in_resize is checked, before a resize is triggered.
596
597 // only run until h, put the rest into a vec and reinsert them piecewise
598 // this is to prevent reinserted keys touching positions still
599 // occupied by not updated entries.
600 for old_address in (self.h..old_len).rev() {
601 let hb = self.get_hop_bits(old_address);
602 if hb > 0 {
603 // unpack
604 // shift one bit from fingerprint to address
605 // insert back into table
606 for offset in self.occupied_positions(old_address) {
607 // Current problem:
608 // if a cluster of keys all has trailing ones as fingerprints,
609 // resizing does not solve the resize issue.
610 // Also there are incosistencies concerning the order of fingerprints and
611 // addressbits in the hash value.
612
613 let extracted_entry = self.table[old_address + offset];
614 self.set_value(0, 0, old_address + offset);
615
616 // unpack, with old params
617 let (value, fp, _) = self.unpack(extracted_entry);
618 // restore original key used for insertion
619 let key = BPHT::restore_key_with(
620 old_address,
621 fp,
622 old_fp_bits_in_key,
623 old_key_fp_mask,
624 );
625
626 self.insert(key, value).unwrap();
627 }
628 // after this, all entries stored for the addres were removed
629 // and its hop bits are 0
630 self.replace_hop_bits(old_address, 0);
631 }
632 }
633
634 // the last h addresses as well as their soft collisions cannot be
635 // reinserted directly, but need to
636 let mut kv_pairs_left = Vec::with_capacity(self.h.pow(2)); // 2h-1 should sufffice
637 for old_address in (0..self.h).rev() {
638 let hb = self.get_hop_bits(old_address);
639 if hb > 0 {
640 for offset in self.occupied_positions(old_address) {
641 let extracted_entry = self.table[old_address + offset];
642 self.set_value(0, 0, old_address + offset);
643 let (value, fp, _) = self.unpack(extracted_entry);
644 let old_key = BPHT::restore_key_with(
645 old_address,
646 fp,
647 old_fp_bits_in_key,
648 old_key_fp_mask,
649 );
650
651 // instead of re-inserting directly, put the kv pair into a vector that
652 // will be drained later for insertion
653 kv_pairs_left.push((old_key, value));
654 }
655 self.replace_hop_bits(old_address, 0);
656 }
657 }
658
659 // insert all leftover key value pairs back into the table.
660 for (key, value) in kv_pairs_left.drain(..) {
661 self.insert(key, value).unwrap();
662 }
663
664 // Resize has finished. Set the flag accordingly before continueing
665 self.in_resize = false;
666 }
667
668 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
669 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
670 // Basic operation
671 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
672 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
673
674 /// Insert a new (key, value) pair into the HT
675 pub fn insert(&mut self, key: u32, value: u32) -> Result<(), &'static str> {
676 let (address, fp) = self.split_key(key);
677
678 // Check if address already has all hop bits set.
679 // If that is the case, immediately resize, there is
680 // no way we can make room for the new key in the current setup.
681 if self.get_hop_bits(address) == self.hop_bits_mask {
682 if self.allow_resize {
683 self.resize();
684 self.insert(key, value).unwrap();
685 return Ok(());
686 } else {
687 return Err("Resizes not allowed; full slot");
688 }
689 }
690
691 // Use linear probing to find the first empty slot.
692 // Extract hop bits, shift them after each iteration
693 // to maintain a list of occupied positions
694 // start with the accumulated hop bits from the H-1 positions
695 // left from the address, which could have filled the slot
696 let mut shifting_hop_bits = self.initialize_insert_hop_bits(address);
697 let mut probe_offset = 0;
698 loop {
699 // look for the first genuinely empty slot
700 if (shifting_hop_bits & 0b_1) == 0 {
701 // found empty position
702 if probe_offset >= self.h {
703 // start shifting process
704 if let Ok(freed_offset) = self.free_up_positions(address, probe_offset) {
705 // if a valid free position was found, fill it with the given value and fp
706 self.set_value(value, fp, address + freed_offset);
707 self.set_hop_bit_in_table(address, freed_offset);
708 return Ok(());
709 } else {
710 // prevent triggering a resize during an active resize
711 if self.in_resize {
712 panic!("Double resize");
713 }
714 if self.allow_resize {
715 self.resize();
716 } else {
717 return Err("Resizes not allowed, couldn't move free slot");
718 }
719
720 self.insert(key, value).unwrap();
721
722 return Ok(());
723 }
724 } else {
725 // all is fine. insert at address + offset
726 // set probe_offset bit in hop_bits(address)
727 self.set_value(value, fp, address + probe_offset);
728 self.set_hop_bit_in_table(address, probe_offset);
729 return Ok(());
730 }
731 } else {
732 // the current position is full
733 // shift to look at a farther offset
734 probe_offset += 1;
735
736 let new_addr = address + probe_offset;
737 // Check if the end of the table is reached
738 if new_addr >= self.table.len() {
739 // prevent triggering a resize during an active resize
740 if self.in_resize {
741 panic!("Double resize");
742 }
743 if self.allow_resize {
744 self.resize();
745 } else {
746 return Err("Resizes not allowed, ran over last slot");
747 }
748 self.insert(key, value).unwrap();
749 return Ok(());
750 } else {
751 // shift already collected hop bits one position. This
752 // shifts out the last active position. By OR-ing in the
753 // hop bits for the new active position all filled position
754 // bits are combined.
755 shifting_hop_bits = (shifting_hop_bits >> 1) | self.get_hop_bits(new_addr);
756 }
757 }
758 }
759 }
760
761 /// Increment the count for the supplied key.
762 /// This is only a valid operation when the HT is used as a counter.
763 pub fn increment_count(&mut self, key: u32) -> Result<(), &'static str> {
764 let mut hit_addresses = Vec::with_capacity(self.h);
765
766 // Get address and fingerprint
767 let (address, query_fp) = self.split_key(key);
768
769 // identify positions with target address.
770 // these can contain soft collisions with different
771 // fingerprint
772 for offset in self.occupied_positions(address) {
773 // Check if the entry shares its fingerprint with
774 // the query to weed out soft collisions
775 if self.has_fp(self.table[address + offset], u64::from(query_fp)) {
776 hit_addresses.push(address + offset);
777 }
778 }
779
780 match hit_addresses.len() {
781 0 => {
782 // this key was not yet present
783 // insert it with a count of 1
784 self.insert(key, 1)
785 }
786 1 => {
787 // This key was already present
788 // increment its count by 1
789 let hit_address = hit_addresses[0];
790 let (value, fingerprint, hop_bits) = self.unpack(self.table[hit_address]);
791 self.table[hit_address] = self.repack_value(value + 1, fingerprint, hop_bits);
792 Ok(())
793 }
794 x => {
795 // This should not arise when using the table for q-gram counting
796 panic!(
797 "More than one hit ({}). This should not be possible with a counting table.",
798 x
799 );
800 }
801 }
802 }
803
804 /// Get the count for the supplied key.
805 /// This is only a valid operation when the HT is used as a counter.
806 pub fn get_count(&self, key: u32) -> Option<u32> {
807 let mut hit_address = None;
808
809 // Get address and fingerprint
810 let (address, query_fp) = self.split_key(key);
811 let query_fp = u64::from(query_fp);
812
813 // identify positions with target address.
814 // these can contain soft collisions with different
815 // fingerprint
816 for offset in self.occupied_positions(address) {
817 // Check if the entry shares its fingerprint with
818 // the query to weed out soft collisions
819 if self.has_fp(self.table[address + offset], query_fp) {
820 hit_address = Some(address + offset);
821 }
822 }
823
824 match hit_address {
825 None => None,
826 Some(hit_address) => {
827 let (value, _, _) = self.unpack(self.table[hit_address]);
828 Some(value)
829 }
830 }
831 }
832
833 /// Get all entries for the given key in a Option<Vector>.
834 /// If no entry is found, return None.
835 pub fn get(&self, key: u32) -> Option<Vec<u32>> {
836 // Initialize output. At most h hits can be found.
837 let mut hits = Vec::with_capacity(self.h);
838
839 // Get address and fingerprint
840 let (address, query_fp) = self.split_key(key);
841
842 // identify positions with target address.
843 // these can contain soft collisions with different
844 // fingerprint
845 for offset in self.occupied_positions(address) {
846 let candidate = self.table[address + offset];
847
848 // Check if the entry shares its fingerprint with
849 // the query to weed out soft collisions
850 if self.has_fp(candidate, u64::from(query_fp)) {
851 hits.push(self.extract_value(candidate))
852 }
853 }
854
855 if !hits.is_empty() {
856 Some(hits)
857 } else {
858 None
859 }
860 }
861
862 /// Remove the occurrences of this key from the hash table
863 ///
864 /// What should the signature be? (key) or (key, value)?
865 /// Currently: (key) removes all occurences of key
866 pub fn delete(&mut self, key: u32) -> Result<(), &'static str> {
867 let (address, query_fp) = self.split_key(key);
868
869 let mut updated_hop_bits = self.get_hop_bits(address);
870
871 for offset in self.occupied_positions(address) {
872 let candidate = self.table[address + offset];
873
874 // Check if the entry shares its fingerprint with
875 // the query to weed out soft collisions
876 if self.has_fp(candidate, u64::from(query_fp)) {
877 // println!("Deleting offset {}", offset);
878 // take a one bit, shift it by the current offset.
879 // invert an h-bit bitvector using the mask XOR the set one-bit
880 // AND it to the current hop bits to set the current offset to 0
881 updated_hop_bits &= self.hop_bits_mask ^ (1 << offset);
882 self.set_value(0, 0, address + offset);
883 }
884 }
885
886 self.replace_hop_bits(address, updated_hop_bits);
887
888 Ok(())
889 }
890}