Skip to main content

bpht/
lib.rs

1//! BPHT -- A bitpacked hopscotch hash table
2//!
3//! Computing address and remainder (fingerprint; fp) from keys:
4//!
5//! key: 32-bit
6//! | 32 - fp_len address-bits         | floor(log2(|ht|)) fingerprint-bits  |
7//! | power of 2 of the address space  |                                     |
8//!
9//! Bitpacking hash table entries:
10//!
11//! |             32 bit payload     |     ~24 bit fingerprint    |~ 8 hop bits|
12//! |pppppppppppppppppppppppppppppppp|ffffffffffffffffffffffff...<|>...hhhhhhhh|
13//!
14//! Hop bits: 0 means free, 1 means filled
15//! Hop bits are read from left to right
16//!      1011 means, this position is filled (___1),
17//!      as is the next (__1_) and the one with offset 3 (1___)
18//!
19//! The number of fingerprint bits depends on the size of the hash table.
20//!
21mod tests;
22
23use bincode::{deserialize_from, serialize_into};
24use serde::{Deserialize, Serialize};
25use std::fs::File;
26use std::io::{BufReader, BufWriter};
27
28#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
29pub struct BPHT {
30    h: usize,
31    u: usize,
32    table: Vec<u64>,
33    hop_bits_mask: u64,
34    fingerprint_mask: u64,
35    fingerprint_shift: usize,
36    admin_bits: u64,
37    fp_bits_in_key: usize,
38    key_fp_mask: u32,
39    in_resize: bool,
40    allow_resize: bool,
41}
42
43impl BPHT {
44    /// Create a new hopscotch hash table using
45    /// a page size of h and u + h - 1 slots in total.
46    /// the last h-1 slots are used to keep overflowing entries
47    /// from the last position without wraparound
48    ///
49    /// Note that log_2(u) > h is required, to pack all bits used for
50    /// administration into 32 bits.
51    pub fn new(h: usize, u: usize, allow_resize: bool) -> Result<Self, &'static str> {
52        // Make sure that we can work with 32-bit values here.
53        // Not sure if this is helpful as it restricts the HT size.
54        //
55        assert!(u <= 2_u64.pow(32) as usize);
56        // assure that u is a power of 2
57        if u.count_ones() != 1 {
58            return Err("The parameter u is not a power of 2");
59        }
60
61        // Get the number of fingerprint bits required for a hash table
62        // of size u
63        let required_fp_bits = BPHT::fp_length_for(u as u32);
64
65        // make sure that the sum of fingerprint bits and hop bits
66        // does not exceed the 32 bits allocated for them.
67        // To circumvent this, either use less hop bits or a larger
68        // hash table, i.e. a larger u
69        //
70        // NOTE: The number of hop bits could be automatically calculated.
71        // however, 8 is a reasonable value, since it is equal to a 64 byte
72        // cache line
73        let total_admin_bits = required_fp_bits as usize + h;
74        if total_admin_bits > 32 {
75            return Err("Total sum of admin bits is >32");
76        }
77
78        let key_fp_mask = 2_u32.pow(required_fp_bits as u32) - 1;
79        // To reach the fp bits, we need to shift out hop bits
80        let fingerprint_shift = h;
81        // The remaining 32 bits not taken up by hop bits store fingerprint info
82        // these might not all contain fingerprint bits in the current setup
83        // depending on the size of u
84        let entry_fp_bits = 32 - h;
85        // a contiguous mask of fp_bits 1-bits, shifted to the right position
86        let fingerprint_mask = (2_u64.pow(entry_fp_bits as u32) - 1) << fingerprint_shift;
87        // Create the actual hash table
88        Ok(BPHT {
89            h,
90            u,
91            table: vec![0_u64; u + h - 1], // u hash values, plus h-1 shifting positions for the last hv
92            hop_bits_mask: 2_u64.pow(h as u32) - 1,
93            fingerprint_mask,
94            fingerprint_shift,
95            admin_bits: 2_u64.pow(32) - 1,
96            fp_bits_in_key: required_fp_bits,
97            key_fp_mask,
98            in_resize: false,
99            allow_resize,
100        })
101    }
102
103    /// Load a serialized BPHT from file.
104    pub fn load(path: &str) -> BPHT {
105        let loaded_hht: BPHT;
106        {
107            let mut f = BufReader::new(
108                File::open(path)
109                    .unwrap_or_else(|_| panic!("Opening the file {} did not work", path)),
110            );
111            loaded_hht = deserialize_from(&mut f).unwrap_or_else(|_| {
112                panic!("Deserializing the BPHT from file {} did not work", path)
113            });
114        }
115        loaded_hht
116    }
117
118    /// Serialize a BPHT to file
119    pub fn save(&mut self, path: &str) {
120        eprintln!("Saving BPHT to {}", path);
121        let mut f = BufWriter::new(File::create(path).unwrap_or_else(|_| {
122            panic!(
123                "Opening file to BPHT at {} did not work. Check that the path exists.",
124                path
125            )
126        }));
127        serialize_into(&mut f, self).expect("Serializing the BPHT did not work.");
128    }
129
130    /// Return the hopscotch neighborhood size H.
131    pub fn get_h(&self) -> usize {
132        self.h
133    }
134
135    /// Get address space size.
136    pub fn get_size(&self) -> usize {
137        self.u
138    }
139
140    /// Compute fill rate.
141    pub fn fill_rate(&self) -> f64 {
142        let mut nonzero = 0_usize;
143        for (_addr, value) in self.table.iter().enumerate() {
144            if (value & (!self.hop_bits_mask)) != 0 {
145                nonzero += 1;
146            }
147        }
148        nonzero as f64 / (self.table.len() as f64)
149    }
150
151    /// Compute the fingerprint length for a given size u
152    fn fp_length_for(u: u32) -> usize {
153        (2_u64.pow(32) as f64 / f64::from(u)).log2().floor() as usize
154    }
155
156    /// Split a key into HT address (high bits) and remainder (low bits).
157    #[inline]
158    pub fn split_key(&self, key: u32) -> (usize, u32) {
159        // Split into: | address | fp |
160        let fp = key & self.key_fp_mask;
161        let (address, _) = key.overflowing_shr(self.fp_bits_in_key as u32);
162
163        (address as usize, fp as u32)
164    }
165
166    /// Restore a key from address and fingerprint using the
167    /// current hash table parameters.
168    fn _restore_key(&self, address: usize, fp: u32) -> u32 {
169        (address << self.fp_bits_in_key) as u32 | (fp & self.key_fp_mask)
170    }
171
172    /// Restore a key from address and fingerprint using the
173    /// provided hash table parameters.
174    fn restore_key_with(address: usize, fp: u32, fp_bits_in_key: usize, key_fp_mask: u32) -> u32 {
175        (address << fp_bits_in_key) as u32 | (fp & key_fp_mask)
176    }
177
178    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
179    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
180    // Hop bit alteration
181    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
182    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
183
184    /// Retrieve the hop bits for the given address
185    fn get_hop_bits(&self, address: usize) -> u64 {
186        self.table[address] & self.hop_bits_mask
187    }
188
189    /// Get initial hop bits for an address, taking into account the h-1
190    /// slots before the target address
191    fn initialize_insert_hop_bits(&self, address: usize) -> u64 {
192        let start = if (self.h - 1) > address {
193            // lower edge of table
194            0
195        } else {
196            // start h-1 position before the address
197            // to pass all positions that can influence
198            // the address slot
199            address - (self.h - 1)
200        };
201        // extract bits for the first
202        let mut shifting_hop_bits = self.get_hop_bits(start);
203
204        for i in start..=address {
205            shifting_hop_bits = (shifting_hop_bits >> 1) | self.get_hop_bits(i);
206        }
207        shifting_hop_bits
208    }
209
210    /// Set a specific hop bit of the address to 1
211    ///
212    /// Example: hop_bits of address 42: 10001
213    /// set_hop_bit_in_table(42, 1)
214    /// new hop bits of address 42: 10011
215    #[inline]
216    fn set_hop_bit_in_table(&mut self, address: usize, offset: usize) {
217        self.table[address] |= 0b_1 << offset;
218    }
219
220    /// Replace the hop bits of the address with the given hop_bits vector
221    #[inline]
222    fn replace_hop_bits(&mut self, address: usize, hop_bits: u64) {
223        self.table[address] = (self.table[address] & (!self.hop_bits_mask)) | hop_bits;
224    }
225
226    /// For a given (u64-encoded) hop bit vector, set a specific position to 0
227    #[inline]
228    pub fn unset_hop_bit(&self, hop_bits: u64, pos: usize) -> u64 {
229        let inverted_mask = self.hop_bits_mask ^ (0b_1 << pos);
230        hop_bits & inverted_mask
231    }
232
233    /// For a given hop bit vector, set a specific position to 1
234    #[inline]
235    pub fn set_hop_bit(&self, hop_bits: u64, pos: usize) -> u64 {
236        hop_bits | (0b_1 << pos)
237    }
238
239    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
240    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
241    // Packing, unpacking, and value alteration
242    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
243    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
244
245    /// Pack a value, fingerprint and hop bits into one 64-bit integer.
246    /// The amount of hop bits and fingerprint bits used depends on the size of
247    /// the hash table. Some bits might remain 'empty'.
248    fn pack(&self, value: u32, fp: u32, hop_bits: u64) -> u64 {
249        (u64::from(value) << 32) | (u64::from(fp) << self.fingerprint_shift) | hop_bits
250    }
251
252    /// Change the value of an entry without changing the hop bits
253    fn repack_value(&self, value: u32, fp: u32, old_value: u64) -> u64 {
254        (u64::from(value) << 32)
255            | (u64::from(fp) << self.fingerprint_shift)
256            | (old_value & self.hop_bits_mask)
257    }
258
259    /// unpack an entry into value, fingerprint, hop_bits
260    #[inline]
261    fn unpack(&self, entry: u64) -> (u32, u32, u64) {
262        (
263            (entry >> 32) as u32,
264            ((entry & self.fingerprint_mask) >> self.fingerprint_shift) as u32,
265            entry & self.hop_bits_mask,
266        )
267    }
268
269    /// unpack an entry into value, fingerprint, hop_bits
270    fn _unpack_with(&self, entry: u64, shift: usize, mask: u64) -> (u32, u32, u64) {
271        (
272            (entry >> 32) as u32,
273            ((entry & mask) >> shift) as u32,
274            entry & self.hop_bits_mask,
275        )
276    }
277
278    /// Extract a payload value from an entry by shifting
279    /// out the 32 fingerprint and hop bits
280    #[inline]
281    fn extract_value(&self, entry: u64) -> u32 {
282        (entry >> 32) as u32
283    }
284
285    /// Store a value at a position in the table
286    /// Note this does not change the hop bits.
287    /// That is handled in the insert method.
288    #[inline]
289    fn set_value(&mut self, value: u32, fp: u32, address: usize) {
290        self.table[address] = self.repack_value(value, fp, self.table[address]);
291    }
292
293    /// For a given bit packed hash table entry, check,
294    /// if it has the same fingerprint as the fp provided.
295    #[inline]
296    fn has_fp(&self, entry: u64, fp: u64) -> bool {
297        let target_fp = (entry & self.fingerprint_mask) >> self.fingerprint_shift;
298        target_fp == fp
299    }
300
301    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
302    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
303    // Helper functions
304    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
305    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
306
307    /// Check if the given address can store into the bin with given
308    /// target_offset by evaluating the hop bits. Returns the offset
309    /// relative to address of the item that can be moved into target address.
310    ///
311    /// Example: address 42, with h=4 and hop bits 1101, is queried:
312    /// find_offset_to_replace(42, 0) -> None    (offset of 0 in the hop bits is not free)
313    /// find_offset_to_replace(42, 1) -> Some(0) (offset of 1 in the hop bits is 0, offset 0 can be moved there)
314    /// find_offset_to_replace(42, 2) -> None    (offset of 2 in the hop bits is not free)
315    /// find_offset_to_replace(42, 3) -> None    (offset of 3 in the hop bits is not free)
316    ///
317    /// Helper function for the `free_up_positions` stage of `insert`.
318    fn find_offset_to_replace(&self, address: usize, target_offset: usize) -> Option<usize> {
319        assert!(
320            target_offset < self.h,
321            "target_offset < h\ntarget offset: {}\nself.h: {}",
322            target_offset,
323            self.h
324        );
325        let hb = self.get_hop_bits(address);
326        // make sure the target position is empty and there exists a valid shifting candidate
327        if ((hb >> target_offset) & 1 == 0) && (hb != 0) {
328            // NOTE: this does not need to take slots before into account.
329            // The target slot is guaranteed to be empty.
330
331            // start from the smallest, go to the largest
332            // to minimize the number of replacements needed by
333            // choosing the biggest possible step size
334            for i in 0..target_offset {
335                // find the 1-bit making the most way towards the insert point
336                if ((hb >> i) & 0b_1) == 1 {
337                    return Some(i);
338                }
339            }
340            // no valid offset was found
341            None
342        } else {
343            // either the target position is not free or
344            // the hop bits are empty
345            None
346        }
347    }
348
349    /// Get the offsets for filled position for the given address.
350    /// Associated positions are extracted from the hop bits
351    /// and returned as a vector of offset from the address.
352    ///
353    /// Example:
354    /// hop bits: 0111 -> offsets: [0, 1, 2]
355    /// so that
356    /// for o in offsets {
357    ///     table[address+offset]
358    /// }
359    /// Yields an entry associated with the address.
360    /// Note that these can be soft collisions.
361    ///
362    /// Helper function for `get` and `delete`.
363    #[inline]
364    fn occupied_positions(&self, address: usize) -> Vec<usize> {
365        let mut occupied_positions = Vec::with_capacity(self.h);
366        let positions = self.get_hop_bits(address);
367
368        let mut offset = 0;
369        loop {
370            if (positions >> offset & 1) == 1 {
371                occupied_positions.push(offset);
372            }
373
374            offset += 1;
375            if offset >= self.h {
376                return occupied_positions;
377            }
378        }
379    }
380
381    /// Shift entries towards the address within one neighbourhood
382    /// This should only be possible when deletions occurred.
383    /// It is currently not used per default since its use has not yet been evaluated.
384    pub fn compact(&mut self, address: usize) -> Option<usize> {
385        // get hop bit mask showing free (0) and filled (1) positions for the current slot
386        let mut shifting_hop_bits = self.initialize_insert_hop_bits(address);
387        let mut occupied_positions = self.occupied_positions(address);
388        // let mut target_offset = 0;
389        let mut highest_occupied = *occupied_positions.iter().max().unwrap();
390        let mut moved = 0;
391
392        let mut target_offset = 0;
393        loop {
394            if target_offset >= highest_occupied {
395                break;
396            }
397            // look for the first genuinely empty slot
398            if (shifting_hop_bits & 0b_1) == 0 {
399                // remove highest occupied, move into address + offset
400                moved += 1;
401
402                let address_from = address + highest_occupied;
403
404                let entry = self.table[address_from];
405                let (value, fp, tmp_hop_bits) = self.unpack(entry);
406
407                let hop_bits = self.get_hop_bits(address);
408
409                let mut new_hop_bits_for_mca = self.unset_hop_bit(hop_bits, highest_occupied);
410                new_hop_bits_for_mca = self.set_hop_bit(new_hop_bits_for_mca, target_offset);
411
412                self.table[address_from] = self.pack(0, 0, tmp_hop_bits);
413                self.replace_hop_bits(address, new_hop_bits_for_mca);
414                self.set_value(value, fp, address + target_offset);
415
416                occupied_positions.retain(|x| x != &highest_occupied);
417                occupied_positions.push(target_offset);
418                highest_occupied = *occupied_positions.iter().max().unwrap();
419            }
420            // the current position is full
421            // shift to look at a farther offset
422            target_offset += 1;
423            let new_addr = address + target_offset;
424            shifting_hop_bits = (shifting_hop_bits >> 1) | self.get_hop_bits(new_addr);
425        }
426        if moved > 0 {
427            Some(moved)
428        } else {
429            None
430        }
431    }
432
433    /// Create a free address for insertion by shifting items to higher
434    /// addresses in their respective pages.
435    /// In other words: Try to shift an empty slot towards the target address
436    /// so that a new key can be inserted.
437    ///
438    /// The `address` parameter is the address for the newly inserted key
439    /// `free_offset` is the distance from said address to the next free slot
440    /// in the hash table.
441    ///
442    /// Helper function for `insert`.
443    pub fn free_up_positions(
444        &mut self,
445        address: usize,
446        free_offset: usize,
447    ) -> Result<usize, &'static str> {
448        // address is the POSITION in the HT at which the new item should be inserted
449        // free_offset is the DISTANCE/ OFFSET from address to the next free slot
450        //
451        // starting from (address + free_offset) work backwards to move an empty slot
452        // into range h of address.
453        //
454        // for j = (address + free_offset), j > (address + h - 1)
455        // check positions (j - h + 1)..j
456        // if one of these contains an item that can be moved into j
457        // do it
458        //
459        // repeat until j is closer than h - 1 positions to the initial address
460        let mut j = address + free_offset;
461
462        // move backwards from the first free slot by h positions
463        // and try to find an entry that can be moved into the free slot.
464        loop {
465            // sub-loop to check H-1 slots below address + active_free_offset
466            // i.e. the addresses that proncipially can move items into the free slot.
467            let mut successfully_shifted = false;
468            for move_candidate_address in (j - (self.h - 1))..j {
469                // if current address has items that can be moved into
470                // the current free spot (which is guaranteed to be free
471                // due to previous steps of this loop or the initial free slot)
472                // move it.
473                if let Some(moveable_offset) =
474                    self.find_offset_to_replace(move_candidate_address, j - move_candidate_address)
475                {
476                    // move the identified offset into the current free slot (j)
477                    // update the current free slot and move on
478
479                    // The address from which a value can be moved to free up space
480                    // Note that this is the address computed for said key
481                    // plus an offset at which is was inserted relative to the initial address
482                    let address_from = move_candidate_address + moveable_offset;
483
484                    // Extract the entry that is moved
485                    let entry = self.table[address_from];
486                    let (value, fp, tmp_hop_bits) = self.unpack(entry);
487
488                    // compute the new hop bits for the original address () of the moved value
489                    // by removing the old offset of said entru and adding the offset to the new position
490                    let hop_bits = self.get_hop_bits(move_candidate_address);
491                    let address_offset_to_j = j - move_candidate_address;
492
493                    // Assemble new hop bits for the move_candidate_address.
494                    // These will be stored at the real address of the key that is moved
495                    let mut new_hop_bits_for_mca = self.unset_hop_bit(hop_bits, moveable_offset);
496                    new_hop_bits_for_mca =
497                        self.set_hop_bit(new_hop_bits_for_mca, address_offset_to_j);
498
499                    // Clear the slot that the item was moved out of, only adding its hop bits
500                    // back in. These are not changed, unless moveable offset = 0
501                    if moveable_offset == 0 {
502                        // this is the case, where addres_from and movable offset are the same slot
503                        assert_eq!(address_from, move_candidate_address);
504                        self.table[address_from] = self.pack(0, 0, new_hop_bits_for_mca);
505                    } else {
506                        // add the hop bits that were present at the slot from which the key
507                        // was extracted back into the table
508                        self.table[address_from] = self.pack(0, 0, tmp_hop_bits);
509                        self.replace_hop_bits(move_candidate_address, new_hop_bits_for_mca);
510                    }
511
512                    // enter the shifted value at the target position j
513                    self.set_value(value, fp, j);
514
515                    // make sure a new free slot is set and terminate the sub-loop
516                    j = address_from;
517                    successfully_shifted = true;
518                    break;
519                }
520            }
521
522            // check if the sub-loop above (h-1 slots before the current free position)
523            // could shift, if not, stop here and trigger a resize in insert.
524            if !successfully_shifted || j < address {
525                return Err("No freeable slot for address. Needs a resize.");
526            }
527
528            // stop, when a freed up slot is close enough to the target address
529            if j < (address + self.h) {
530                return Ok(j - address); // the offset from the keys target address to the freed up slot
531            }
532        }
533    }
534
535    /// Double the size of the HT to accomodate more entries
536    ///
537    /// Iterate through all slots from u down to h
538    /// for each slot:
539    ///   extract all entries (key-value pairs) stored at this address (up to h)
540    ///   restore their keys from address and remainder
541    ///   reinsert the key-value pairs using the new table parameters
542    /// for the last h slots (0 .. h-1)
543    /// extract all values into a vector
544    /// reinsert the key value pairs in the vec
545    ///
546    /// Note that the order of keys within a sequence of slots is stable.
547    /// They are extracted in a certain order and reinserted in the same order.
548    fn resize(&mut self) {
549        // NOTE: Allocate the new size. Start from the largest hash value, pull
550        // a new bit out of the FP and reenter the key. This allows to resize
551        // without allocating |old HT| + |new HT| and recomputing the hashes
552        // but do with |new HT| and only repacking.
553
554        // Proof that in place shifting works for addresses larger than h:
555        //
556        // If a given address a receives an additional (least significant) bit,
557        // the new address a' is either a' = 2a (0-bit) or a' = 2a+1 (1-bit).
558        // Unless a <= h-1, a new address can always be inserted without touching old values.
559        // Since:
560        //  [new address]        a'  >  (a-1) + h - 1   [highest non shifted entry; rightmost entry ((h-1)-th soft collision) in the bucket of (a-1)]
561        //                      2a   >  a + h - 2
562        //                       a   >  h - 2
563        //
564        // set flag that a resize is in progress.
565        // this flag is used to prevent a call to insert made during the resize
566        // process that triggers another resize. This should not happen and is
567        // always an error.
568        self.in_resize = true;
569
570        // Update all administrative parameters, keeping a backup of the old ones
571        // needed to unpack and restore old entries.
572        let old_len = self.table.len();
573        self.table.reserve_exact((2 * self.u) + self.h - 1);
574        self.table.extend(vec![0; self.u]);
575
576        let old_fp_bits_in_key = self.fp_bits_in_key;
577        let old_key_fp_mask = self.key_fp_mask;
578
579        self.fp_bits_in_key -= 1;
580        self.key_fp_mask >>= 1;
581
582        self.u *= 2;
583
584        // NOTE this could theoretically go wrong, if during a resize, another resize is triggered.
585        // this will mess up the unpacking with old values.
586        // A solution for this would be to make the resize function recursive
587        // but there is still no way to track, in which iteration, which key was
588        // (re)inserted.
589        //
590        // However, this should not arise in the first place.
591        // Resizing cannot invalidate a table and the given table is valid
592        // before the item that triggered the resize is added.
593        // Hence, this should be a valid table that is moved to a larger space.
594        //
595        // To assert this, the variable in_resize is checked, before a resize is triggered.
596
597        // only run until h, put the rest into a vec and reinsert them piecewise
598        // this is to prevent reinserted keys touching positions still
599        // occupied by not updated entries.
600        for old_address in (self.h..old_len).rev() {
601            let hb = self.get_hop_bits(old_address);
602            if hb > 0 {
603                // unpack
604                // shift one bit from fingerprint to address
605                // insert back into table
606                for offset in self.occupied_positions(old_address) {
607                    // Current problem:
608                    // if a cluster of keys all has trailing ones as fingerprints,
609                    // resizing does not solve the resize issue.
610                    // Also there are incosistencies concerning the order of fingerprints and
611                    // addressbits in the hash value.
612
613                    let extracted_entry = self.table[old_address + offset];
614                    self.set_value(0, 0, old_address + offset);
615
616                    // unpack, with old params
617                    let (value, fp, _) = self.unpack(extracted_entry);
618                    // restore original key used for insertion
619                    let key = BPHT::restore_key_with(
620                        old_address,
621                        fp,
622                        old_fp_bits_in_key,
623                        old_key_fp_mask,
624                    );
625
626                    self.insert(key, value).unwrap();
627                }
628                // after this, all entries stored for the addres were removed
629                // and its hop bits are 0
630                self.replace_hop_bits(old_address, 0);
631            }
632        }
633
634        // the last h addresses as well as their soft collisions cannot be
635        // reinserted directly, but need to
636        let mut kv_pairs_left = Vec::with_capacity(self.h.pow(2)); // 2h-1 should sufffice
637        for old_address in (0..self.h).rev() {
638            let hb = self.get_hop_bits(old_address);
639            if hb > 0 {
640                for offset in self.occupied_positions(old_address) {
641                    let extracted_entry = self.table[old_address + offset];
642                    self.set_value(0, 0, old_address + offset);
643                    let (value, fp, _) = self.unpack(extracted_entry);
644                    let old_key = BPHT::restore_key_with(
645                        old_address,
646                        fp,
647                        old_fp_bits_in_key,
648                        old_key_fp_mask,
649                    );
650
651                    // instead of re-inserting directly, put the kv pair into a vector that
652                    // will be drained later for insertion
653                    kv_pairs_left.push((old_key, value));
654                }
655                self.replace_hop_bits(old_address, 0);
656            }
657        }
658
659        // insert all leftover key value pairs back into the table.
660        for (key, value) in kv_pairs_left.drain(..) {
661            self.insert(key, value).unwrap();
662        }
663
664        // Resize has finished. Set the flag accordingly before continueing
665        self.in_resize = false;
666    }
667
668    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
669    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
670    // Basic operation
671    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
672    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
673
674    /// Insert a new (key, value) pair into the HT
675    pub fn insert(&mut self, key: u32, value: u32) -> Result<(), &'static str> {
676        let (address, fp) = self.split_key(key);
677
678        // Check if address already has all hop bits set.
679        // If that is the case, immediately resize, there is
680        // no way we can make room for the new key in the current setup.
681        if self.get_hop_bits(address) == self.hop_bits_mask {
682            if self.allow_resize {
683                self.resize();
684                self.insert(key, value).unwrap();
685                return Ok(());
686            } else {
687                return Err("Resizes not allowed; full slot");
688            }
689        }
690
691        // Use linear probing to find the first empty slot.
692        // Extract hop bits, shift them after each iteration
693        // to maintain a list of occupied positions
694        // start with the accumulated hop bits from the H-1 positions
695        // left from the address, which could have filled the slot
696        let mut shifting_hop_bits = self.initialize_insert_hop_bits(address);
697        let mut probe_offset = 0;
698        loop {
699            // look for the first genuinely empty slot
700            if (shifting_hop_bits & 0b_1) == 0 {
701                // found empty position
702                if probe_offset >= self.h {
703                    // start shifting process
704                    if let Ok(freed_offset) = self.free_up_positions(address, probe_offset) {
705                        // if a valid free position was found, fill it with the given value and fp
706                        self.set_value(value, fp, address + freed_offset);
707                        self.set_hop_bit_in_table(address, freed_offset);
708                        return Ok(());
709                    } else {
710                        // prevent triggering a resize during an active resize
711                        if self.in_resize {
712                            panic!("Double resize");
713                        }
714                        if self.allow_resize {
715                            self.resize();
716                        } else {
717                            return Err("Resizes not allowed, couldn't move free slot");
718                        }
719
720                        self.insert(key, value).unwrap();
721
722                        return Ok(());
723                    }
724                } else {
725                    // all is fine. insert at address + offset
726                    // set probe_offset bit in hop_bits(address)
727                    self.set_value(value, fp, address + probe_offset);
728                    self.set_hop_bit_in_table(address, probe_offset);
729                    return Ok(());
730                }
731            } else {
732                // the current position is full
733                // shift to look at a farther offset
734                probe_offset += 1;
735
736                let new_addr = address + probe_offset;
737                // Check if the end of the table is reached
738                if new_addr >= self.table.len() {
739                    // prevent triggering a resize during an active resize
740                    if self.in_resize {
741                        panic!("Double resize");
742                    }
743                    if self.allow_resize {
744                        self.resize();
745                    } else {
746                        return Err("Resizes not allowed, ran over last slot");
747                    }
748                    self.insert(key, value).unwrap();
749                    return Ok(());
750                } else {
751                    // shift already collected hop bits one position. This
752                    // shifts out the last active position. By OR-ing in the
753                    // hop bits for the new active position all filled position
754                    // bits are combined.
755                    shifting_hop_bits = (shifting_hop_bits >> 1) | self.get_hop_bits(new_addr);
756                }
757            }
758        }
759    }
760
761    /// Increment the count for the supplied key.
762    /// This is only a valid operation when the HT is used as a counter.
763    pub fn increment_count(&mut self, key: u32) -> Result<(), &'static str> {
764        let mut hit_addresses = Vec::with_capacity(self.h);
765
766        // Get address and fingerprint
767        let (address, query_fp) = self.split_key(key);
768
769        // identify positions with target address.
770        // these can contain soft collisions with different
771        // fingerprint
772        for offset in self.occupied_positions(address) {
773            // Check if the entry shares its fingerprint with
774            // the query to weed out soft collisions
775            if self.has_fp(self.table[address + offset], u64::from(query_fp)) {
776                hit_addresses.push(address + offset);
777            }
778        }
779
780        match hit_addresses.len() {
781            0 => {
782                // this key was not yet present
783                // insert it with a count of 1
784                self.insert(key, 1)
785            }
786            1 => {
787                // This key was already present
788                // increment its count by 1
789                let hit_address = hit_addresses[0];
790                let (value, fingerprint, hop_bits) = self.unpack(self.table[hit_address]);
791                self.table[hit_address] = self.repack_value(value + 1, fingerprint, hop_bits);
792                Ok(())
793            }
794            x => {
795                // This should not arise when using the table for q-gram counting
796                panic!(
797                    "More than one hit ({}). This should not be possible with a counting table.",
798                    x
799                );
800            }
801        }
802    }
803
804    /// Get the count for the supplied key.
805    /// This is only a valid operation when the HT is used as a counter.
806    pub fn get_count(&self, key: u32) -> Option<u32> {
807        let mut hit_address = None;
808
809        // Get address and fingerprint
810        let (address, query_fp) = self.split_key(key);
811        let query_fp = u64::from(query_fp);
812
813        // identify positions with target address.
814        // these can contain soft collisions with different
815        // fingerprint
816        for offset in self.occupied_positions(address) {
817            // Check if the entry shares its fingerprint with
818            // the query to weed out soft collisions
819            if self.has_fp(self.table[address + offset], query_fp) {
820                hit_address = Some(address + offset);
821            }
822        }
823
824        match hit_address {
825            None => None,
826            Some(hit_address) => {
827                let (value, _, _) = self.unpack(self.table[hit_address]);
828                Some(value)
829            }
830        }
831    }
832
833    /// Get all entries for the given key in a Option<Vector>.
834    /// If no entry is found, return None.
835    pub fn get(&self, key: u32) -> Option<Vec<u32>> {
836        // Initialize output. At most h hits can be found.
837        let mut hits = Vec::with_capacity(self.h);
838
839        // Get address and fingerprint
840        let (address, query_fp) = self.split_key(key);
841
842        // identify positions with target address.
843        // these can contain soft collisions with different
844        // fingerprint
845        for offset in self.occupied_positions(address) {
846            let candidate = self.table[address + offset];
847
848            // Check if the entry shares its fingerprint with
849            // the query to weed out soft collisions
850            if self.has_fp(candidate, u64::from(query_fp)) {
851                hits.push(self.extract_value(candidate))
852            }
853        }
854
855        if !hits.is_empty() {
856            Some(hits)
857        } else {
858            None
859        }
860    }
861
862    /// Remove the occurrences of this key from the hash table
863    ///
864    /// What should the signature be? (key) or (key, value)?
865    /// Currently: (key) removes all occurences of key
866    pub fn delete(&mut self, key: u32) -> Result<(), &'static str> {
867        let (address, query_fp) = self.split_key(key);
868
869        let mut updated_hop_bits = self.get_hop_bits(address);
870
871        for offset in self.occupied_positions(address) {
872            let candidate = self.table[address + offset];
873
874            // Check if the entry shares its fingerprint with
875            // the query to weed out soft collisions
876            if self.has_fp(candidate, u64::from(query_fp)) {
877                // println!("Deleting offset {}", offset);
878                // take a one bit, shift it by the current offset.
879                // invert an h-bit bitvector using the mask XOR the set one-bit
880                // AND it to the current hop bits to set the current offset to 0
881                updated_hop_bits &= self.hop_bits_mask ^ (1 << offset);
882                self.set_value(0, 0, address + offset);
883            }
884        }
885
886        self.replace_hop_bits(address, updated_hop_bits);
887
888        Ok(())
889    }
890}