odht/
raw_table.rs

1//! This module implements the actual hash table logic. It works solely with
2//! byte arrays and does not know anything about the unencoded key and value
3//! types.
4//!
5//! The implementation makes sure that the encoded data contains no padding
6//! bytes (which makes it deterministic) and nothing in it requires any specific
7//! alignment.
8//!
9//! Many functions in this module are marked as `#[inline]`. This is allows
10//! LLVM to retain the information about byte array sizes, even though they are
11//! converted to slices (of unknown size) from time to time.
12
13use crate::swisstable_group_query::{GroupQuery, GROUP_SIZE, REFERENCE_GROUP_SIZE};
14use crate::{error::Error, HashFn};
15use std::convert::TryInto;
16use std::{fmt, marker::PhantomData, mem::size_of};
17
18/// Values of this type represent key-value pairs *as they are stored on-disk*.
19/// `#[repr(C)]` makes sure we have deterministic field order and the fields
20/// being byte arrays makes sure that there are no padding bytes, alignment is
21/// not restricted, and the data is endian-independent.
22///
23/// It is a strict requirement that any `&[Entry<K, V>]` can be transmuted
24/// into a `&[u8]` and back, regardless of whether the byte array has been
25/// moved in the meantime.
26#[repr(C)]
27#[derive(PartialEq, Eq, Clone, Copy, Debug)]
28pub(crate) struct Entry<K: ByteArray, V: ByteArray> {
29    pub key: K,
30    pub value: V,
31}
32
33impl<K: ByteArray, V: ByteArray> Entry<K, V> {
34    #[inline]
35    fn new(key: K, value: V) -> Entry<K, V> {
36        Entry { key, value }
37    }
38}
39
40impl<K: ByteArray, V: ByteArray> Default for Entry<K, V> {
41    #[inline]
42    fn default() -> Entry<K, V> {
43        Entry {
44            key: K::zeroed(),
45            value: V::zeroed(),
46        }
47    }
48}
49
50impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTable<'a, K, V, H> {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        writeln!(
53            f,
54            "RawTable (slot_count={}, hash_fn={}) {{",
55            self.data.len(),
56            std::any::type_name::<H>(),
57        )?;
58        for (index, (&metadata, entry)) in self.metadata.iter().zip(self.data.iter()).enumerate() {
59            if is_empty_or_deleted(metadata) {
60                writeln!(f, "slot {}: empty", index)?;
61            } else {
62                writeln!(
63                    f,
64                    "slot {}: key={:?}, value={:?}, control_byte={}",
65                    index, entry.key, entry.value, metadata
66                )?;
67            }
68        }
69        writeln!(f, "}}")
70    }
71}
72
73pub(crate) type EntryMetadata = u8;
74
75type HashValue = u32;
76
77#[inline]
78fn h1(h: HashValue) -> usize {
79    h as usize
80}
81
82#[inline]
83fn h2(h: HashValue) -> u8 {
84    const SHIFT: usize = size_of::<HashValue>() * 8 - 7;
85    (h >> SHIFT) as u8
86}
87
88/// This type implements the sequence in which slots are probed when resolving
89/// hash value conflicts. Note that probing is always done as if `GROUP_SIZE`
90/// was 16, even though on targets without sse2 `GROUP_SIZE` is going to be
91/// smaller. By keeping the probing pattern constant (i.e. always visiting
92/// the same slots in the same order, independently of `GROUP_SIZE`) we enable
93/// the hash table layout to be target-independent. In other words: A hash
94/// table created and persisted on a target with `GROUP_SIZE` x can always
95/// be loaded and read on a target with a different `GROUP_SIZE` y.
96struct ProbeSeq<const GROUP_SIZE: usize> {
97    index: usize,
98    base: usize,
99    chunk_index: usize,
100    stride: usize,
101}
102
103impl<const GROUP_SIZE: usize> ProbeSeq<GROUP_SIZE> {
104    #[inline]
105    fn new(h1: usize, mask: usize) -> Self {
106        let initial_index = h1 & mask;
107
108        ProbeSeq {
109            index: initial_index,
110            base: initial_index,
111            chunk_index: 0,
112            stride: 0,
113        }
114    }
115
116    #[inline]
117    fn advance(&mut self, mask: usize) {
118        debug_assert!(GROUP_SIZE <= REFERENCE_GROUP_SIZE);
119        debug_assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0);
120
121        // The effect of the code in the two branches is
122        // identical if GROUP_SIZE==REFERENCE_GROUP_SIZE
123        // but the if statement should make it very easy
124        // for the compiler to discard the more costly
125        // version and only emit the simplified version.
126
127        if GROUP_SIZE == REFERENCE_GROUP_SIZE {
128            self.stride += REFERENCE_GROUP_SIZE;
129            self.index += self.stride;
130            self.index &= mask;
131        } else {
132            self.chunk_index += GROUP_SIZE;
133
134            if self.chunk_index == REFERENCE_GROUP_SIZE {
135                self.chunk_index = 0;
136                self.stride += REFERENCE_GROUP_SIZE;
137                self.base += self.stride;
138            }
139
140            self.index = (self.base + self.chunk_index) & mask;
141        }
142    }
143}
144
145#[inline]
146fn group_starting_at<'a>(control_bytes: &'a [u8], index: usize) -> &'a [u8; GROUP_SIZE] {
147    debug_assert!(index < control_bytes.len() - GROUP_SIZE);
148    unsafe {
149        std::slice::from_raw_parts(control_bytes.as_ptr().offset(index as isize), GROUP_SIZE)
150            .try_into()
151            .unwrap()
152    }
153}
154
155#[inline]
156fn is_empty_or_deleted(control_byte: u8) -> bool {
157    (control_byte & EMPTY_CONTROL_BYTE) != 0
158}
159
160const EMPTY_CONTROL_BYTE: u8 = 0b1000_0000;
161
162/// This type provides a readonly view of the given table data.
163#[derive(PartialEq, Eq)]
164pub(crate) struct RawTable<'a, K, V, H>
165where
166    K: ByteArray,
167    V: ByteArray,
168    H: HashFn,
169{
170    metadata: &'a [EntryMetadata],
171    data: &'a [Entry<K, V>],
172    _hash_fn: PhantomData<H>,
173}
174
175impl<'a, K, V, H> RawTable<'a, K, V, H>
176where
177    K: ByteArray,
178    V: ByteArray,
179    H: HashFn,
180{
181    #[inline]
182    pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> Self {
183        // Make sure Entry<K, V> does not contain any padding bytes and can be
184        // stored at arbitrary adresses.
185        assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
186        assert!(std::mem::align_of::<Entry<K, V>>() == 1);
187
188        debug_assert!(data.len().is_power_of_two());
189        debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
190
191        Self {
192            metadata,
193            data,
194            _hash_fn: PhantomData::default(),
195        }
196    }
197
198    #[inline]
199    pub(crate) fn find(&self, key: &K) -> Option<&V> {
200        debug_assert!(self.data.len().is_power_of_two());
201        debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
202
203        let mask = self.data.len() - 1;
204        let hash = H::hash(key.as_slice());
205        let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
206        let h2 = h2(hash);
207
208        loop {
209            let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
210
211            for m in &mut group_query {
212                let index = (ps.index + m) & mask;
213
214                let entry = entry_at(self.data, index);
215
216                if likely!(entry.key.equals(key)) {
217                    return Some(&entry.value);
218                }
219            }
220
221            if likely!(group_query.any_empty()) {
222                return None;
223            }
224
225            ps.advance(mask);
226        }
227    }
228
229    #[inline]
230    pub(crate) fn iter(&'a self) -> RawIter<'a, K, V> {
231        RawIter::new(self.metadata, self.data)
232    }
233
234    /// Check (for the first `entries_to_check` entries) if the computed and
235    /// the stored hash value match.
236    ///
237    /// A mismatch is an indication that the table has been deserialized with
238    /// the wrong hash function.
239    pub(crate) fn sanity_check_hashes(&self, slots_to_check: usize) -> Result<(), Error> {
240        let slots_to_check = std::cmp::min(slots_to_check, self.data.len());
241
242        for i in 0..slots_to_check {
243            let metadata = self.metadata[i];
244            let entry = &self.data[i];
245
246            if is_empty_or_deleted(metadata) {
247                if !entry.key.is_all_zeros() || !entry.value.is_all_zeros() {
248                    let message = format!("Found empty entry with non-zero contents at {}", i);
249
250                    return Err(Error(message));
251                }
252            } else {
253                let hash = H::hash(entry.key.as_slice());
254                let h2 = h2(hash);
255
256                if metadata != h2 {
257                    let message = format!(
258                        "Control byte does not match hash value for entry {}. Expected {}, found {}.",
259                        i, h2, metadata
260                    );
261
262                    return Err(Error(message));
263                }
264            }
265        }
266
267        Ok(())
268    }
269}
270
271#[inline]
272fn entry_at<K: ByteArray, V: ByteArray>(data: &[Entry<K, V>], index: usize) -> &Entry<K, V> {
273    debug_assert!(index < data.len());
274    unsafe { data.get_unchecked(index) }
275}
276
277#[inline]
278fn metadata_at(metadata: &[EntryMetadata], index: usize) -> &EntryMetadata {
279    debug_assert!(index < metadata.len());
280    unsafe { metadata.get_unchecked(index) }
281}
282
283/// This type provides a mutable view of the given table data. It allows for
284/// inserting new entries but does *not* allow for growing the table.
285#[derive(PartialEq, Eq)]
286pub(crate) struct RawTableMut<'a, K, V, H>
287where
288    K: ByteArray,
289    V: ByteArray,
290    H: HashFn,
291{
292    metadata: &'a mut [EntryMetadata],
293    data: &'a mut [Entry<K, V>],
294    _hash_fn: PhantomData<H>,
295}
296
297impl<'a, K, V, H> RawTableMut<'a, K, V, H>
298where
299    K: ByteArray,
300    V: ByteArray,
301    H: HashFn,
302{
303    #[inline]
304    pub(crate) fn new(metadata: &'a mut [EntryMetadata], data: &'a mut [Entry<K, V>]) -> Self {
305        // Make sure Entry<K, V> does not contain any padding bytes and can be
306        // stored at arbitrary adresses.
307        assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
308        assert!(std::mem::align_of::<Entry<K, V>>() == 1);
309
310        debug_assert!(data.len().is_power_of_two());
311        debug_assert_eq!(metadata.len(), data.len() + REFERENCE_GROUP_SIZE);
312
313        Self {
314            metadata,
315            data,
316            _hash_fn: PhantomData::default(),
317        }
318    }
319
320    /// Inserts the given key value pair into the hash table.
321    ///
322    /// WARNING: This method assumes that there is free space in the hash table
323    ///          somewhere. If there isn't it will end up in an infinite loop.
324    #[inline]
325    pub(crate) fn insert(&mut self, key: K, value: V) -> Option<V> {
326        debug_assert!(self.data.len().is_power_of_two());
327        debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
328
329        let mask = self.data.len() - 1;
330        let hash = H::hash(key.as_slice());
331
332        let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
333        let h2 = h2(hash);
334
335        loop {
336            let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
337
338            for m in &mut group_query {
339                let index = (ps.index + m) & mask;
340
341                let entry = entry_at_mut(self.data, index);
342
343                if likely!(entry.key.equals(&key)) {
344                    let ret = Some(entry.value);
345                    entry.value = value;
346                    return ret;
347                }
348            }
349
350            if let Some(first_empty) = group_query.first_empty() {
351                let index = (ps.index + first_empty) & mask;
352                *entry_at_mut(self.data, index) = Entry::new(key, value);
353                *metadata_at_mut(self.metadata, index) = h2;
354
355                if index < REFERENCE_GROUP_SIZE {
356                    let first_mirror = self.data.len();
357                    *metadata_at_mut(self.metadata, first_mirror + index) = h2;
358                    debug_assert_eq!(
359                        self.metadata[..REFERENCE_GROUP_SIZE],
360                        self.metadata[self.data.len()..]
361                    );
362                }
363
364                return None;
365            }
366
367            ps.advance(mask);
368        }
369    }
370}
371
372#[inline]
373fn entry_at_mut<K: ByteArray, V: ByteArray>(
374    data: &mut [Entry<K, V>],
375    index: usize,
376) -> &mut Entry<K, V> {
377    debug_assert!(index < data.len());
378    unsafe { data.get_unchecked_mut(index) }
379}
380
381#[inline]
382fn metadata_at_mut(metadata: &mut [EntryMetadata], index: usize) -> &mut EntryMetadata {
383    debug_assert!(index < metadata.len());
384    unsafe { metadata.get_unchecked_mut(index) }
385}
386
387impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTableMut<'a, K, V, H> {
388    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
389        let readonly = RawTable::<'_, K, V, H>::new(self.metadata, self.data);
390        write!(f, "{:?}", readonly)
391    }
392}
393
394pub(crate) struct RawIter<'a, K, V>
395where
396    K: ByteArray,
397    V: ByteArray,
398{
399    metadata: &'a [EntryMetadata],
400    data: &'a [Entry<K, V>],
401    current_index: usize,
402}
403
404impl<'a, K, V> RawIter<'a, K, V>
405where
406    K: ByteArray,
407    V: ByteArray,
408{
409    pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> RawIter<'a, K, V> {
410        debug_assert!(data.len().is_power_of_two());
411        debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
412
413        RawIter {
414            metadata,
415            data,
416            current_index: 0,
417        }
418    }
419}
420
421impl<'a, K, V> Iterator for RawIter<'a, K, V>
422where
423    K: ByteArray,
424    V: ByteArray,
425{
426    type Item = (EntryMetadata, &'a Entry<K, V>);
427
428    fn next(&mut self) -> Option<Self::Item> {
429        loop {
430            if self.current_index >= self.data.len() {
431                return None;
432            }
433
434            let index = self.current_index;
435
436            self.current_index += 1;
437
438            let entry_metadata = *metadata_at(self.metadata, index);
439            if !is_empty_or_deleted(entry_metadata) {
440                return Some((entry_metadata, entry_at(self.data, index)));
441            }
442        }
443    }
444}
445
446/// A trait that lets us abstract over different lengths of fixed size byte
447/// arrays. Don't implement it for anything other than fixed size byte arrays!
448pub trait ByteArray: Sized + Copy + Eq + Clone + PartialEq + fmt::Debug + 'static {
449    fn zeroed() -> Self;
450    fn as_slice(&self) -> &[u8];
451    fn equals(&self, other: &Self) -> bool;
452    fn is_all_zeros(&self) -> bool {
453        self.as_slice().iter().all(|b| *b == 0)
454    }
455}
456
457impl<const LEN: usize> ByteArray for [u8; LEN] {
458    #[inline(always)]
459    fn zeroed() -> Self {
460        [0u8; LEN]
461    }
462
463    #[inline(always)]
464    fn as_slice(&self) -> &[u8] {
465        &self[..]
466    }
467
468    // This custom implementation of comparing the fixed size arrays
469    // seems make a big difference for performance (at least for
470    // 16+ byte keys)
471    #[inline]
472    fn equals(&self, other: &Self) -> bool {
473        // Most branches here are optimized away at compile time
474        // because they depend on values known at compile time.
475
476        const USIZE: usize = size_of::<usize>();
477
478        // Special case a few cases we care about
479        if size_of::<Self>() == USIZE {
480            return read_usize(&self[..], 0) == read_usize(&other[..], 0);
481        }
482
483        if size_of::<Self>() == USIZE * 2 {
484            return read_usize(&self[..], 0) == read_usize(&other[..], 0)
485                && read_usize(&self[..], 1) == read_usize(&other[..], 1);
486        }
487
488        if size_of::<Self>() == USIZE * 3 {
489            return read_usize(&self[..], 0) == read_usize(&other[..], 0)
490                && read_usize(&self[..], 1) == read_usize(&other[..], 1)
491                && read_usize(&self[..], 2) == read_usize(&other[..], 2);
492        }
493
494        if size_of::<Self>() == USIZE * 4 {
495            return read_usize(&self[..], 0) == read_usize(&other[..], 0)
496                && read_usize(&self[..], 1) == read_usize(&other[..], 1)
497                && read_usize(&self[..], 2) == read_usize(&other[..], 2)
498                && read_usize(&self[..], 3) == read_usize(&other[..], 3);
499        }
500
501        // fallback
502        return &self[..] == &other[..];
503
504        #[inline(always)]
505        fn read_usize(bytes: &[u8], index: usize) -> usize {
506            const STRIDE: usize = size_of::<usize>();
507            usize::from_le_bytes(
508                bytes[STRIDE * index..STRIDE * (index + 1)]
509                    .try_into()
510                    .unwrap(),
511            )
512        }
513    }
514}
515
516#[cfg(test)]
517#[rustfmt::skip]
518mod tests {
519    use super::*;
520    use crate::FxHashFn;
521
522    fn make_table<
523        I: Iterator<Item = (K, V)> + ExactSizeIterator,
524        K: ByteArray,
525        V: ByteArray,
526        H: HashFn,
527    >(
528        xs: I,
529    ) -> (Vec<EntryMetadata>, Vec<Entry<K, V>>) {
530        let size = xs.size_hint().0.next_power_of_two();
531        let mut data = vec![Entry::default(); size];
532        let mut metadata = vec![255; size + REFERENCE_GROUP_SIZE];
533
534        assert!(metadata.iter().all(|b| is_empty_or_deleted(*b)));
535
536        {
537            let mut table: RawTableMut<K, V, H> = RawTableMut {
538                metadata: &mut metadata[..],
539                data: &mut data[..],
540                _hash_fn: Default::default(),
541            };
542
543            for (k, v) in xs {
544                table.insert(k, v);
545            }
546        }
547
548        (metadata, data)
549    }
550
551    #[test]
552    fn stress() {
553        let xs: Vec<[u8; 2]> = (0 ..= u16::MAX).map(|x| x.to_le_bytes()).collect();
554
555        let (mut metadata, mut data) = make_table::<_, _, _, FxHashFn>(xs.iter().map(|x| (*x, *x)));
556
557        {
558            let table: RawTable<_, _, FxHashFn> = RawTable {
559                metadata: &metadata[..],
560                data: &data[..],
561                _hash_fn: PhantomData::default(),
562            };
563
564            for x in xs.iter() {
565                assert_eq!(table.find(x), Some(x));
566            }
567        }
568
569        // overwrite all values
570        {
571            let mut table: RawTableMut<_, _, FxHashFn> = RawTableMut {
572                metadata: &mut metadata[..],
573                data: &mut data[..],
574                _hash_fn: PhantomData::default(),
575            };
576
577            for (i, x) in xs.iter().enumerate() {
578                assert_eq!(table.insert(*x, [i as u8, (i + 1) as u8]), Some(*x));
579            }
580        }
581
582        // Check if we find the new expected values
583        {
584            let table: RawTable<_, _, FxHashFn> = RawTable {
585                metadata: &metadata[..],
586                data: &data[..],
587                _hash_fn: PhantomData::default(),
588            };
589
590            for (i, x) in xs.iter().enumerate() {
591                assert_eq!(table.find(x), Some(&[i as u8, (i + 1) as u8]));
592            }
593        }
594    }
595
596    // This test makes sure that ProbeSeq will always visit the same slots
597    // in the same order, regardless of the actual GROUP_SIZE.
598    #[test]
599    fn probe_seq() {
600        struct ReferenceProbeSeq {
601            index: usize,
602            stride: usize,
603        }
604
605        impl ReferenceProbeSeq {
606            fn new(h1: usize, mask: usize) -> ReferenceProbeSeq {
607                ReferenceProbeSeq {
608                    index: h1 & mask,
609                    stride: 0,
610                }
611            }
612
613            fn advance(&mut self, mask: usize) {
614                self.stride += REFERENCE_GROUP_SIZE;
615                self.index += self.stride;
616                self.index &= mask;
617            }
618        }
619
620        fn test_with_group_size<const GROUP_SIZE: usize>() {
621            assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0);
622            assert!(REFERENCE_GROUP_SIZE >= GROUP_SIZE);
623
624            for i in 4 .. 17 {
625                let item_count = 1 << i;
626                assert!(item_count % REFERENCE_GROUP_SIZE == 0);
627                assert!(item_count % GROUP_SIZE == 0);
628
629                let mask = item_count - 1;
630
631                let mut expected = Vec::with_capacity(item_count);
632
633                let mut refseq = ReferenceProbeSeq::new(0, mask);
634                for _ in 0 .. item_count / REFERENCE_GROUP_SIZE {
635                    for index in refseq.index .. refseq.index + REFERENCE_GROUP_SIZE {
636                        expected.push(index & mask);
637                    }
638                    refseq.advance(mask);
639                }
640
641                let mut actual = Vec::with_capacity(item_count);
642
643                let mut seq = ProbeSeq::<GROUP_SIZE>::new(0, mask);
644                for _ in 0 .. item_count / GROUP_SIZE {
645                    for index in seq.index .. seq.index + GROUP_SIZE {
646                        actual.push(index & mask);
647                    }
648                    seq.advance(mask);
649                }
650
651                assert_eq!(expected, actual);
652            }
653        }
654
655        test_with_group_size::<4>();
656        test_with_group_size::<8>();
657        test_with_group_size::<16>();
658    }
659}