musli_zerocopy/swiss/raw/mod.rs
1#![allow(clippy::manual_map)]
2
3#[macro_use]
4mod macros;
5
6cfg_if! {
7 // Use the SSE2 implementation if possible: it allows us to scan 16 buckets
8 // at once instead of 8. We don't bother with AVX since it would require
9 // runtime dispatch and wouldn't gain us much anyways: the probability of
10 // finding a match drops off drastically after the first few buckets.
11 //
12 // I attempted an implementation on ARM using NEON instructions, but it
13 // turns out that most NEON instructions have multi-cycle latency, which in
14 // the end outweighs any gains over the generic implementation.
15 if #[cfg(all(
16 target_feature = "sse2",
17 any(target_arch = "x86", target_arch = "x86_64"),
18 not(miri)
19 ))] {
20 mod sse2;
21 use sse2 as imp;
22 } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
23 mod neon;
24 use neon as imp;
25 } else {
26 mod generic;
27 use generic as imp;
28 }
29}
30
31mod bitmask;
32
33use crate::error::Error;
34use crate::error::ErrorKind;
35
36pub(crate) use self::imp::Group;
37
38use core::mem;
39
40/// Probe sequence based on triangular numbers, which is guaranteed (since our
41/// table size is a power of two) to visit every group of elements exactly once.
42///
43/// A triangular probe has us jump by 1 more group every time. So first we
44/// jump by 1 group (meaning we just continue our linear scan), then 2 groups
45/// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on.
46///
47/// Proof that the probe will visit every group in the table:
48/// <https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/>
49#[derive(Debug)]
50pub(crate) struct ProbeSeq {
51 pub(crate) pos: usize,
52 stride: usize,
53}
54
55impl ProbeSeq {
56 #[inline]
57 pub(crate) fn move_next(&mut self, bucket_mask: usize) -> Result<(), Error> {
58 if self.stride > bucket_mask {
59 return Err(Error::new(ErrorKind::StrideOutOfBounds {
60 index: self.stride,
61 len: bucket_mask,
62 }));
63 }
64
65 self.stride += Group::WIDTH;
66 self.pos += self.stride;
67 self.pos &= bucket_mask;
68 Ok(())
69 }
70}
71
72// Constant for h2 function that grabing the top 7 bits of the hash.
73const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
74 mem::size_of::<usize>()
75} else {
76 mem::size_of::<u64>()
77};
78
79/// Returns an iterator-like object for a probe sequence on the table.
80///
81/// This iterator never terminates, but is guaranteed to visit each bucket
82/// group exactly once. The loop using `probe_seq` must terminate upon
83/// reaching a group containing an empty bucket.
84#[inline]
85pub(crate) fn probe_seq(bucket_mask: usize, hash: u64) -> ProbeSeq {
86 ProbeSeq {
87 // This is the same as `hash as usize % self.buckets()` because the number
88 // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
89 pos: h1(hash) & bucket_mask,
90 stride: 0,
91 }
92}
93
94/// Primary hash function, used to select the initial bucket to probe from.
95#[inline]
96#[allow(clippy::cast_possible_truncation)]
97fn h1(hash: u64) -> usize {
98 // On 32-bit platforms we simply ignore the higher hash bits.
99 hash as usize
100}
101
102/// Secondary hash function, saved in the low 7 bits of the control byte.
103#[inline]
104#[allow(clippy::cast_possible_truncation)]
105pub(crate) fn h2(hash: u64) -> u8 {
106 // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
107 // value, some hash functions (such as FxHash) produce a usize result
108 // instead, which means that the top 32 bits are 0 on 32-bit platforms.
109 // So we use MIN_HASH_LEN constant to handle this.
110 let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
111 (top7 & 0x7f) as u8 // truncation
112}
113
114/// Control byte value for an empty bucket.
115pub(crate) const EMPTY: u8 = 0b1111_1111;
116
117/// Checks whether a control byte represents a full bucket (top bit is clear).
118#[inline]
119#[cfg(feature = "alloc")]
120pub(crate) fn is_full(ctrl: u8) -> bool {
121 ctrl & 0x80 == 0
122}
123
124/// Checks whether a control byte represents a special value (top bit is set).
125#[inline]
126#[cfg(feature = "alloc")]
127pub(crate) fn is_special(ctrl: u8) -> bool {
128 ctrl & 0x80 != 0
129}
130
131/// Checks whether a special control value is EMPTY (just check 1 bit).
132#[inline]
133#[cfg(feature = "alloc")]
134pub(crate) fn special_is_empty(ctrl: u8) -> bool {
135 debug_assert!(is_special(ctrl));
136 ctrl & 0x01 != 0
137}
138
139/// Returns the number of buckets needed to hold the given number of items,
140/// taking the maximum load factor into account.
141///
142/// Returns `None` if an overflow occurs.
143// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258
144#[cfg_attr(target_os = "emscripten", inline(never))]
145#[cfg_attr(not(target_os = "emscripten"), inline)]
146#[cfg(feature = "alloc")]
147pub(crate) fn capacity_to_buckets(cap: usize) -> Option<usize> {
148 // For small tables we require at least 1 empty bucket so that lookups are
149 // guaranteed to terminate if an element doesn't exist in the table.
150 if cap < 8 {
151 // We don't bother with a table size of 2 buckets since that can only
152 // hold a single element. Instead we skip directly to a 4 bucket table
153 // which can hold 3 elements.
154 return Some(if cap < 4 { 4 } else { 8 });
155 }
156
157 // Otherwise require 1/8 buckets to be empty (87.5% load)
158 //
159 // Be careful when modifying this, calculate_layout relies on the
160 // overflow check here.
161 let adjusted_cap = cap.checked_mul(8)? / 7;
162
163 // Any overflows will have been caught by the checked_mul. Also, any
164 // rounding errors from the division above will be cleaned up by
165 // next_power_of_two (which can't overflow because of the previous division).
166 Some(adjusted_cap.next_power_of_two())
167}