musli_zerocopy/swiss/raw/
mod.rs

1#![allow(clippy::manual_map)]
2
3#[macro_use]
4mod macros;
5
6cfg_if! {
7    // Use the SSE2 implementation if possible: it allows us to scan 16 buckets
8    // at once instead of 8. We don't bother with AVX since it would require
9    // runtime dispatch and wouldn't gain us much anyways: the probability of
10    // finding a match drops off drastically after the first few buckets.
11    //
12    // I attempted an implementation on ARM using NEON instructions, but it
13    // turns out that most NEON instructions have multi-cycle latency, which in
14    // the end outweighs any gains over the generic implementation.
15    if #[cfg(all(
16        target_feature = "sse2",
17        any(target_arch = "x86", target_arch = "x86_64"),
18        not(miri)
19    ))] {
20        mod sse2;
21        use sse2 as imp;
22    } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
23        mod neon;
24        use neon as imp;
25    } else {
26        mod generic;
27        use generic as imp;
28    }
29}
30
31mod bitmask;
32
33use crate::error::Error;
34use crate::error::ErrorKind;
35
36pub(crate) use self::imp::Group;
37
38use core::mem;
39
40/// Probe sequence based on triangular numbers, which is guaranteed (since our
41/// table size is a power of two) to visit every group of elements exactly once.
42///
43/// A triangular probe has us jump by 1 more group every time. So first we
44/// jump by 1 group (meaning we just continue our linear scan), then 2 groups
45/// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on.
46///
47/// Proof that the probe will visit every group in the table:
48/// <https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/>
49#[derive(Debug)]
50pub(crate) struct ProbeSeq {
51    pub(crate) pos: usize,
52    stride: usize,
53}
54
55impl ProbeSeq {
56    #[inline]
57    pub(crate) fn move_next(&mut self, bucket_mask: usize) -> Result<(), Error> {
58        if self.stride > bucket_mask {
59            return Err(Error::new(ErrorKind::StrideOutOfBounds {
60                index: self.stride,
61                len: bucket_mask,
62            }));
63        }
64
65        self.stride += Group::WIDTH;
66        self.pos += self.stride;
67        self.pos &= bucket_mask;
68        Ok(())
69    }
70}
71
72// Constant for h2 function that grabing the top 7 bits of the hash.
73const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
74    mem::size_of::<usize>()
75} else {
76    mem::size_of::<u64>()
77};
78
79/// Returns an iterator-like object for a probe sequence on the table.
80///
81/// This iterator never terminates, but is guaranteed to visit each bucket
82/// group exactly once. The loop using `probe_seq` must terminate upon
83/// reaching a group containing an empty bucket.
84#[inline]
85pub(crate) fn probe_seq(bucket_mask: usize, hash: u64) -> ProbeSeq {
86    ProbeSeq {
87        // This is the same as `hash as usize % self.buckets()` because the number
88        // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
89        pos: h1(hash) & bucket_mask,
90        stride: 0,
91    }
92}
93
94/// Primary hash function, used to select the initial bucket to probe from.
95#[inline]
96#[allow(clippy::cast_possible_truncation)]
97fn h1(hash: u64) -> usize {
98    // On 32-bit platforms we simply ignore the higher hash bits.
99    hash as usize
100}
101
102/// Secondary hash function, saved in the low 7 bits of the control byte.
103#[inline]
104#[allow(clippy::cast_possible_truncation)]
105pub(crate) fn h2(hash: u64) -> u8 {
106    // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
107    // value, some hash functions (such as FxHash) produce a usize result
108    // instead, which means that the top 32 bits are 0 on 32-bit platforms.
109    // So we use MIN_HASH_LEN constant to handle this.
110    let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
111    (top7 & 0x7f) as u8 // truncation
112}
113
114/// Control byte value for an empty bucket.
115pub(crate) const EMPTY: u8 = 0b1111_1111;
116
117/// Checks whether a control byte represents a full bucket (top bit is clear).
118#[inline]
119#[cfg(feature = "alloc")]
120pub(crate) fn is_full(ctrl: u8) -> bool {
121    ctrl & 0x80 == 0
122}
123
124/// Checks whether a control byte represents a special value (top bit is set).
125#[inline]
126#[cfg(feature = "alloc")]
127pub(crate) fn is_special(ctrl: u8) -> bool {
128    ctrl & 0x80 != 0
129}
130
131/// Checks whether a special control value is EMPTY (just check 1 bit).
132#[inline]
133#[cfg(feature = "alloc")]
134pub(crate) fn special_is_empty(ctrl: u8) -> bool {
135    debug_assert!(is_special(ctrl));
136    ctrl & 0x01 != 0
137}
138
139/// Returns the number of buckets needed to hold the given number of items,
140/// taking the maximum load factor into account.
141///
142/// Returns `None` if an overflow occurs.
143// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258
144#[cfg_attr(target_os = "emscripten", inline(never))]
145#[cfg_attr(not(target_os = "emscripten"), inline)]
146#[cfg(feature = "alloc")]
147pub(crate) fn capacity_to_buckets(cap: usize) -> Option<usize> {
148    // For small tables we require at least 1 empty bucket so that lookups are
149    // guaranteed to terminate if an element doesn't exist in the table.
150    if cap < 8 {
151        // We don't bother with a table size of 2 buckets since that can only
152        // hold a single element. Instead we skip directly to a 4 bucket table
153        // which can hold 3 elements.
154        return Some(if cap < 4 { 4 } else { 8 });
155    }
156
157    // Otherwise require 1/8 buckets to be empty (87.5% load)
158    //
159    // Be careful when modifying this, calculate_layout relies on the
160    // overflow check here.
161    let adjusted_cap = cap.checked_mul(8)? / 7;
162
163    // Any overflows will have been caught by the checked_mul. Also, any
164    // rounding errors from the division above will be cleaned up by
165    // next_power_of_two (which can't overflow because of the previous division).
166    Some(adjusted_cap.next_power_of_two())
167}
musli_zerocopy/swiss/raw/mod.rs

musli_zerocopy/swiss/raw/
mod.rs