musli_zerocopy/swiss/
constructor.rs

1// Note: This was ported from `hashbrown`, and still contains some of the
2// comments assuming that it performs internal allocations. These are most
3// likely wrong and need to be rewritten to take into account the safety
4// requirements towards `OwnedBuf`.
5
6use core::convert::{identity as likely, identity as unlikely};
7use core::marker::PhantomData;
8use core::mem::size_of;
9use core::ptr::NonNull;
10
11use crate::buf::{self, Buf, StoreBuf};
12use crate::error::{Error, ErrorKind};
13use crate::swiss::raw::{Group, ProbeSeq, h2, is_full, probe_seq, special_is_empty};
14use crate::traits::ZeroCopy;
15
16/// Construction of a raw swiss table.
17pub struct Constructor<'a, T, S: ?Sized> {
18    buf: &'a mut S,
19
20    // Mask to get an index from a hash value. The value is one less than the
21    // number of buckets in the table.
22    bucket_mask: usize,
23
24    // Control offset, where the control vectors are stored in `buf`.
25    ctrl_ptr: usize,
26
27    // Base offset where items are written in `buf`.
28    base_ptr: usize,
29
30    // Number of elements that can be inserted before we need to grow the table.
31    // Since we can't grow the table, reaching this point results in an error.
32    growth_left: usize,
33
34    // Hold onto T to make sure the API stays coherent with the types the
35    // constructor can write.
36    _marker: PhantomData<T>,
37}
38
39impl<'a, T, S: ?Sized> Constructor<'a, T, S>
40where
41    S: StoreBuf,
42{
43    /// Wrap the given buffer for table construction.
44    ///
45    /// # Safety
46    ///
47    /// The caller must ensure that buffer contains allocated and correctly
48    /// initialized memory at `ctrl_ptr` and `base_ptr`.
49    ///
50    /// * `ctrl_ptr` must point to a memory region that is `buckets + 1` length
51    ///   sized for `Group` which has been bitwise initialized to [`EMPTY`].
52    /// * `base_ptr` must point to a memory region that is `buckets` length
53    ///   sized for `T`.
54    /// * `buckets` must be a power of two.
55    ///
56    /// [`EMPTY`]: crate::swiss::raw::EMPTY
57    pub(crate) fn with_buf(
58        buf: &'a mut S,
59        ctrl_ptr: usize,
60        base_ptr: usize,
61        buckets: usize,
62    ) -> Self {
63        debug_assert!(buckets.is_power_of_two());
64
65        Self {
66            buf,
67            bucket_mask: buckets - 1,
68            ctrl_ptr,
69            base_ptr,
70            growth_left: bucket_mask_to_capacity(buckets - 1),
71            _marker: PhantomData,
72        }
73    }
74
75    /// Access the underlying buffer.
76    pub(crate) fn buf(&self) -> &Buf {
77        self.buf.as_buf()
78    }
79
80    /// Export bucket mask.
81    pub(crate) fn bucket_mask(&self) -> usize {
82        self.bucket_mask
83    }
84
85    /// Get the length of the table.
86    pub(crate) fn len(&self) -> usize {
87        self.bucket_mask - self.growth_left
88    }
89
90    /// Returns the number of buckets in the table.
91    #[inline]
92    pub(crate) fn buckets(&self) -> usize {
93        self.bucket_mask + 1
94    }
95
96    /// Insert the given zero copy value into the table.
97    pub(crate) fn insert(&mut self, hash: u64, value: &T) -> Result<Bucket<'_, T>, Error>
98    where
99        T: ZeroCopy,
100    {
101        // SAFETY:
102        // 1. The [`Constructor`] must already have properly initialized control bytes since
103        //    we will never expose `Constructor::new_uninitialized` in a public API.
104        let slot = self.find_insert_slot(hash)?;
105
106        // We can avoid growing the table once we have reached our load factor if we are replacing
107        // a tombstone. This works since the number of EMPTY slots does not change in this case.
108        //
109        // SAFETY: The function is guaranteed to return [`InsertSlot`] that contains an index
110        // in the range `0..=self.buckets()`.
111        let old_ctrl = *self.ctrl(slot.index);
112
113        if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) {
114            return Err(Error::new(ErrorKind::CapacityError));
115        }
116
117        Ok(self.insert_in_slot(hash, slot, value))
118    }
119
120    /// Inserts a new element into the table in the given slot, and returns its
121    /// raw bucket.
122    #[inline]
123    pub fn insert_in_slot(&mut self, hash: u64, slot: InsertSlot, value: &T) -> Bucket<'_, T>
124    where
125        T: ZeroCopy,
126    {
127        let old_ctrl = *self.ctrl(slot.index);
128        self.record_item_insert_at(slot.index, old_ctrl, hash);
129        let bucket = self.bucket(slot.index);
130        bucket.write(value);
131        bucket
132    }
133
134    /// Returns a pointer to an element in the table.
135    #[inline]
136    pub fn bucket(&mut self, index: usize) -> Bucket<'_, T> {
137        debug_assert_ne!(self.bucket_mask, 0);
138        debug_assert!(index < self.buckets());
139
140        let Some(index) = index.checked_mul(size_of::<T>()) else {
141            panic!("Index `{index}` out of bounds");
142        };
143
144        let Some(start) = self.base_ptr.checked_add(index) else {
145            panic!("Start `{index}` out of bounds");
146        };
147
148        let end = start + size_of::<T>();
149
150        // SAFETY: The accessed data is a slice of u8's, which cannot be
151        // incorrectly initialized locally.
152        let slice = unsafe { self.buf.get_mut(start..end) };
153
154        let Some(slice) = slice else {
155            panic!("Missing bucket at range {start}..{end}");
156        };
157
158        // SAFETY: We've ensure that the bucket is appropriately sized just
159        // above.
160        unsafe { Bucket::from_slice(slice) }
161    }
162
163    /// Finds the position to insert something in a group.
164    ///
165    /// **This may have false positives and must be fixed up with `fix_insert_slot`
166    /// before it's used.**
167    ///
168    /// The function is guaranteed to return the index of an empty or deleted [`Bucket`]
169    /// in the range `0..self.buckets()` (`0..=self.bucket_mask`).
170    #[inline]
171    fn find_insert_slot_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option<usize> {
172        let bit = likely(group.match_empty_or_deleted().lowest_set_bit()?);
173
174        // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
175        // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
176        Some((probe_seq.pos + bit) & self.bucket_mask)
177    }
178
179    #[inline]
180    fn record_item_insert_at(&mut self, index: usize, old_ctrl: u8, hash: u64) {
181        self.growth_left -= usize::from(special_is_empty(old_ctrl));
182        self.set_ctrl_h2(index, hash);
183    }
184
185    /// Searches for an empty or deleted bucket which is suitable for inserting
186    /// a new element, returning the `index` for the new [`Bucket`].
187    #[inline]
188    fn find_insert_slot(&mut self, hash: u64) -> Result<InsertSlot, Error> {
189        let mut probe_seq = probe_seq(self.bucket_mask, hash);
190
191        loop {
192            let group = unsafe { Group::load(self.ctrl_group(probe_seq.pos).as_ptr()) };
193
194            if let Some(index) = self.find_insert_slot_in_group(&group, &probe_seq) {
195                return Ok(self.fix_insert_slot(index));
196            };
197
198            probe_seq.move_next(self.bucket_mask)?;
199        }
200    }
201
202    /// Fixes up an insertion slot returned by the
203    /// [`Constructor::find_insert_slot_in_group`] method.
204    ///
205    /// In tables smaller than the group width (`self.buckets() <
206    /// Group::WIDTH`), trailing control bytes outside the range of the table
207    /// are filled with [`EMPTY`] entries. These will unfortunately trigger a
208    /// match of [`Constructor::find_insert_slot_in_group`] function. This is
209    /// because the `Some(bit)` returned by
210    /// `group.match_empty_or_deleted().lowest_set_bit()` after masking
211    /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket
212    /// that is already occupied. We detect this situation here and perform a
213    /// second scan starting at the beginning of the table. This second scan is
214    /// guaranteed to find an empty slot (due to the load factor) before hitting
215    /// the trailing control bytes (containing [`EMPTY`] bytes).
216    ///
217    /// If this function is called correctly, it is guaranteed to return
218    /// [`InsertSlot`] with an index of an empty or deleted bucket in the range
219    /// `0..self.buckets()` (see `Warning` and `Safety`).
220    ///
221    /// [`EMPTY`]: super::raw::EMPTY
222    ///
223    /// # Warning
224    ///
225    /// The table must have at least 1 empty or deleted `bucket`, otherwise if
226    /// the table is less than the group width (`self.buckets() < Group::WIDTH`)
227    /// this function returns an index outside of the table indices range
228    /// `0..self.buckets()` (`0..=self.bucket_mask`). Attempt to write data at
229    /// that index will cause immediate [`undefined behavior`].
230    ///
231    /// # Safety
232    ///
233    /// The safety rules are directly derived from the safety rules for
234    /// [`Constructor::ctrl`] method. Thus, in order to uphold those safety
235    /// contracts, as well as for the correct logic of the work of this crate,
236    /// the following rules are necessary and sufficient:
237    ///
238    /// * The [`Constructor`] must have properly initialized control bytes
239    ///   otherwise calling this function results in [`undefined behavior`].
240    ///
241    /// * This function must only be used on insertion slots found by
242    ///   [`find_insert_slot_in_group`] (after the `find_insert_slot_in_group`
243    ///   function, but before insertion into the table).
244    ///
245    /// * The `index` must not be greater than the `self.bucket_mask`, i.e.
246    ///   `(index + 1) <= self.buckets()` (this one is provided by the
247    ///   [`find_insert_slot_in_group`] function).
248    ///
249    /// Calling this function with an index not provided by
250    /// [`find_insert_slot_in_group`] may result in [`undefined behavior`] even
251    /// if the index satisfies the safety rules of the [`ctrl`] function (`index
252    /// < self.bucket_mask + 1 + Group::WIDTH`).
253    ///
254    /// [`ctrl`]: Self::ctrl
255    /// [`find_insert_slot_in_group`]: Self::find_insert_slot_in_group
256    /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
257    #[inline]
258    fn fix_insert_slot(&mut self, mut index: usize) -> InsertSlot {
259        // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`.
260        if unlikely(self.is_bucket_full(index)) {
261            debug_assert!(self.bucket_mask < Group::WIDTH);
262
263            // SAFETY:
264            //
265            // * Since the caller of this function ensures that the control bytes are properly
266            //   initialized and `ptr = self.ctrl(0)` points to the start of the array of control
267            //   bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH`
268            //   and points to the properly initialized control bytes (see also
269            //   `TableLayout::calculate_layout_for` and `ptr::read`);
270            //
271            // * Because the caller of this function ensures that the index was provided by the
272            //   `self.find_insert_slot_in_group()` function, so for for tables larger than the
273            //   group width (self.buckets() >= Group::WIDTH), we will never end up in the given
274            //   branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group`
275            //   cannot return a full bucket index. For tables smaller than the group width, calling
276            //   the `unwrap_unchecked` function is also safe, as the trailing control bytes outside
277            //   the range of the table are filled with EMPTY bytes (and we know for sure that there
278            //   is at least one FULL bucket), so this second scan either finds an empty slot (due to
279            //   the load factor) or hits the trailing control bytes (containing EMPTY).
280            index = unsafe {
281                Group::load_aligned(self.ctrl_group(0).as_ptr())
282                    .match_empty_or_deleted()
283                    .lowest_set_bit()
284                    .unwrap_unchecked()
285            };
286        }
287
288        InsertSlot { index }
289    }
290
291    /// Sets a control byte to the hash, and possibly also the replicated control byte at
292    /// the end of the array.
293    ///
294    /// This function does not make any changes to the `data` parts of the table,
295    /// or any changes to the the `items` or `growth_left` field of the table.
296    ///
297    /// # Safety
298    ///
299    /// The caller must ensure that the `index` is not out of bounds of the control allocation.
300    #[inline]
301    fn set_ctrl_h2(&mut self, index: usize, hash: u64) {
302        self.set_ctrl(index, h2(hash));
303    }
304
305    /// Sets a control byte, and possibly also the replicated control byte at
306    /// the end of the array.
307    ///
308    /// This function does not make any changes to the `data` parts of the table,
309    /// or any changes to the the `items` or `growth_left` field of the table.
310    ///
311    /// # Safety
312    ///
313    /// The caller must ensure that `index` is not out of bounds of the control
314    /// allocation.
315    #[inline]
316    fn set_ctrl(&mut self, index: usize, ctrl: u8) {
317        // Replicate the first Group::WIDTH control bytes at the end of
318        // the array without using a branch. If the tables smaller than
319        // the group width (self.buckets() < Group::WIDTH),
320        // `index2 = Group::WIDTH + index`, otherwise `index2` is:
321        //
322        // - If index >= Group::WIDTH then index == index2.
323        // - Otherwise index2 == self.bucket_mask + 1 + index.
324        //
325        // The very last replicated control byte is never actually read because
326        // we mask the initial index for unaligned loads, but we write it
327        // anyways because it makes the set_ctrl implementation simpler.
328        //
329        // If there are fewer buckets than Group::WIDTH then this code will
330        // replicate the buckets at the end of the trailing group. For example
331        // with 2 buckets and a group size of 4, the control bytes will look
332        // like this:
333        //
334        //     Real    |             Replicated
335        // ---------------------------------------------
336        // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] |
337        // ---------------------------------------------
338
339        // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.buckets() + Group::WIDTH`
340        // because the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
341        let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH;
342
343        // SAFETY: The caller must uphold the safety rules for the
344        // `Constructor::set_ctrl`.
345        *self.ctrl_mut(index) = ctrl;
346        *self.ctrl_mut(index2) = ctrl;
347    }
348
349    /// Checks whether the bucket at `index` is full.
350    ///
351    /// # Safety
352    ///
353    /// The caller must ensure `index` is less than the number of buckets.
354    #[inline]
355    fn is_bucket_full(&self, index: usize) -> bool {
356        debug_assert!(index < self.buckets());
357        is_full(*self.ctrl(index))
358    }
359
360    #[inline]
361    fn ctrl_mut(&mut self, index: usize) -> &mut u8 {
362        debug_assert!(index < self.num_ctrl_bytes());
363
364        let offset = self.ctrl_ptr + index;
365
366        // SAFETY: The control bytes are u8's, which cannot be incorrectly
367        // initialized.
368        let ctrl = unsafe { self.buf.get_mut(offset) };
369
370        let Some(ctrl) = ctrl else {
371            panic!("Missing control byte at {offset}");
372        };
373
374        ctrl
375    }
376
377    #[inline]
378    fn ctrl(&self, index: usize) -> &u8 {
379        debug_assert!(index < self.num_ctrl_bytes());
380
381        let offset = self.ctrl_ptr + index;
382
383        let Some(ctrl) = self.buf.get(offset) else {
384            panic!("Missing control byte at {offset}");
385        };
386
387        ctrl
388    }
389
390    #[inline]
391    fn ctrl_group(&self, index: usize) -> &[u8] {
392        debug_assert!(index < self.num_ctrl_bytes());
393
394        let start = self.ctrl_ptr + index;
395        let end = start + Group::WIDTH;
396
397        let Some(ctrl) = self.buf.get(start..end) else {
398            panic!("Missing control byte at range {start}..{end}");
399        };
400
401        ctrl
402    }
403
404    #[inline]
405    fn num_ctrl_bytes(&self) -> usize {
406        self.bucket_mask + 1 + Group::WIDTH
407    }
408}
409
410/// A reference to a hash table bucket containing a `T`.
411///
412/// This is usually just a pointer to the element itself. However if the element
413/// is a ZST, then we instead track the index of the element in the table so
414/// that `erase` works properly.
415pub struct Bucket<'a, T> {
416    data: NonNull<u8>,
417    _marker: PhantomData<&'a mut T>,
418}
419
420impl<'a, T> Bucket<'a, T> {
421    /// Construct a bucket from the given slice.
422    ///
423    /// # Safety
424    ///
425    /// Caller must ensure that the slice is sized for `T`.
426    #[inline]
427    unsafe fn from_slice(data: &'a mut [u8]) -> Self {
428        debug_assert!(data.len() == size_of::<T>());
429
430        Self {
431            data: unsafe { NonNull::new_unchecked(data.as_mut_ptr()) },
432            _marker: PhantomData,
433        }
434    }
435
436    /// Overwrites a memory location with the given `value` without reading or
437    /// dropping the old value (like [`ptr::write`] function).
438    ///
439    /// [`ptr::write`]: core::ptr::write
440    #[inline]
441    pub(crate) fn write(&self, value: &T)
442    where
443        T: ZeroCopy,
444    {
445        // SAFETY: During bucket construction we've asserted that a buffer of
446        // the appropriate size (that is not guaranteed to be aligned) is used.
447        unsafe { buf::store_unaligned(self.data, value) }
448    }
449}
450
451/// A reference to an empty bucket into which an can be inserted.
452pub struct InsertSlot {
453    index: usize,
454}
455
456/// Returns the maximum effective capacity for the given bucket mask, taking
457/// the maximum load factor into account.
458#[inline]
459fn bucket_mask_to_capacity(bucket_mask: usize) -> usize {
460    if bucket_mask < 8 {
461        // For tables with 1/2/4/8 buckets, we always reserve one empty slot.
462        // Keep in mind that the bucket mask is one less than the bucket count.
463        bucket_mask
464    } else {
465        // For larger tables we reserve 12.5% of the slots as empty.
466        ((bucket_mask + 1) / 8) * 7
467    }
468}