musli_zerocopy/swiss/constructor.rs
1// Note: This was ported from `hashbrown`, and still contains some of the
2// comments assuming that it performs internal allocations. These are most
3// likely wrong and need to be rewritten to take into account the safety
4// requirements towards `OwnedBuf`.
5
6use core::convert::{identity as likely, identity as unlikely};
7use core::marker::PhantomData;
8use core::mem::size_of;
9use core::ptr::NonNull;
10
11use crate::buf::{self, Buf, StoreBuf};
12use crate::error::{Error, ErrorKind};
13use crate::swiss::raw::{Group, ProbeSeq, h2, is_full, probe_seq, special_is_empty};
14use crate::traits::ZeroCopy;
15
16/// Construction of a raw swiss table.
17pub struct Constructor<'a, T, S: ?Sized> {
18 buf: &'a mut S,
19
20 // Mask to get an index from a hash value. The value is one less than the
21 // number of buckets in the table.
22 bucket_mask: usize,
23
24 // Control offset, where the control vectors are stored in `buf`.
25 ctrl_ptr: usize,
26
27 // Base offset where items are written in `buf`.
28 base_ptr: usize,
29
30 // Number of elements that can be inserted before we need to grow the table.
31 // Since we can't grow the table, reaching this point results in an error.
32 growth_left: usize,
33
34 // Hold onto T to make sure the API stays coherent with the types the
35 // constructor can write.
36 _marker: PhantomData<T>,
37}
38
39impl<'a, T, S: ?Sized> Constructor<'a, T, S>
40where
41 S: StoreBuf,
42{
43 /// Wrap the given buffer for table construction.
44 ///
45 /// # Safety
46 ///
47 /// The caller must ensure that buffer contains allocated and correctly
48 /// initialized memory at `ctrl_ptr` and `base_ptr`.
49 ///
50 /// * `ctrl_ptr` must point to a memory region that is `buckets + 1` length
51 /// sized for `Group` which has been bitwise initialized to [`EMPTY`].
52 /// * `base_ptr` must point to a memory region that is `buckets` length
53 /// sized for `T`.
54 /// * `buckets` must be a power of two.
55 ///
56 /// [`EMPTY`]: crate::swiss::raw::EMPTY
57 pub(crate) fn with_buf(
58 buf: &'a mut S,
59 ctrl_ptr: usize,
60 base_ptr: usize,
61 buckets: usize,
62 ) -> Self {
63 debug_assert!(buckets.is_power_of_two());
64
65 Self {
66 buf,
67 bucket_mask: buckets - 1,
68 ctrl_ptr,
69 base_ptr,
70 growth_left: bucket_mask_to_capacity(buckets - 1),
71 _marker: PhantomData,
72 }
73 }
74
75 /// Access the underlying buffer.
76 pub(crate) fn buf(&self) -> &Buf {
77 self.buf.as_buf()
78 }
79
80 /// Export bucket mask.
81 pub(crate) fn bucket_mask(&self) -> usize {
82 self.bucket_mask
83 }
84
85 /// Get the length of the table.
86 pub(crate) fn len(&self) -> usize {
87 self.bucket_mask - self.growth_left
88 }
89
90 /// Returns the number of buckets in the table.
91 #[inline]
92 pub(crate) fn buckets(&self) -> usize {
93 self.bucket_mask + 1
94 }
95
96 /// Insert the given zero copy value into the table.
97 pub(crate) fn insert(&mut self, hash: u64, value: &T) -> Result<Bucket<'_, T>, Error>
98 where
99 T: ZeroCopy,
100 {
101 // SAFETY:
102 // 1. The [`Constructor`] must already have properly initialized control bytes since
103 // we will never expose `Constructor::new_uninitialized` in a public API.
104 let slot = self.find_insert_slot(hash)?;
105
106 // We can avoid growing the table once we have reached our load factor if we are replacing
107 // a tombstone. This works since the number of EMPTY slots does not change in this case.
108 //
109 // SAFETY: The function is guaranteed to return [`InsertSlot`] that contains an index
110 // in the range `0..=self.buckets()`.
111 let old_ctrl = *self.ctrl(slot.index);
112
113 if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) {
114 return Err(Error::new(ErrorKind::CapacityError));
115 }
116
117 Ok(self.insert_in_slot(hash, slot, value))
118 }
119
120 /// Inserts a new element into the table in the given slot, and returns its
121 /// raw bucket.
122 #[inline]
123 pub fn insert_in_slot(&mut self, hash: u64, slot: InsertSlot, value: &T) -> Bucket<'_, T>
124 where
125 T: ZeroCopy,
126 {
127 let old_ctrl = *self.ctrl(slot.index);
128 self.record_item_insert_at(slot.index, old_ctrl, hash);
129 let bucket = self.bucket(slot.index);
130 bucket.write(value);
131 bucket
132 }
133
134 /// Returns a pointer to an element in the table.
135 #[inline]
136 pub fn bucket(&mut self, index: usize) -> Bucket<'_, T> {
137 debug_assert_ne!(self.bucket_mask, 0);
138 debug_assert!(index < self.buckets());
139
140 let Some(index) = index.checked_mul(size_of::<T>()) else {
141 panic!("Index `{index}` out of bounds");
142 };
143
144 let Some(start) = self.base_ptr.checked_add(index) else {
145 panic!("Start `{index}` out of bounds");
146 };
147
148 let end = start + size_of::<T>();
149
150 // SAFETY: The accessed data is a slice of u8's, which cannot be
151 // incorrectly initialized locally.
152 let slice = unsafe { self.buf.get_mut(start..end) };
153
154 let Some(slice) = slice else {
155 panic!("Missing bucket at range {start}..{end}");
156 };
157
158 // SAFETY: We've ensure that the bucket is appropriately sized just
159 // above.
160 unsafe { Bucket::from_slice(slice) }
161 }
162
163 /// Finds the position to insert something in a group.
164 ///
165 /// **This may have false positives and must be fixed up with `fix_insert_slot`
166 /// before it's used.**
167 ///
168 /// The function is guaranteed to return the index of an empty or deleted [`Bucket`]
169 /// in the range `0..self.buckets()` (`0..=self.bucket_mask`).
170 #[inline]
171 fn find_insert_slot_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option<usize> {
172 let bit = likely(group.match_empty_or_deleted().lowest_set_bit()?);
173
174 // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
175 // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
176 Some((probe_seq.pos + bit) & self.bucket_mask)
177 }
178
179 #[inline]
180 fn record_item_insert_at(&mut self, index: usize, old_ctrl: u8, hash: u64) {
181 self.growth_left -= usize::from(special_is_empty(old_ctrl));
182 self.set_ctrl_h2(index, hash);
183 }
184
185 /// Searches for an empty or deleted bucket which is suitable for inserting
186 /// a new element, returning the `index` for the new [`Bucket`].
187 #[inline]
188 fn find_insert_slot(&mut self, hash: u64) -> Result<InsertSlot, Error> {
189 let mut probe_seq = probe_seq(self.bucket_mask, hash);
190
191 loop {
192 let group = unsafe { Group::load(self.ctrl_group(probe_seq.pos).as_ptr()) };
193
194 if let Some(index) = self.find_insert_slot_in_group(&group, &probe_seq) {
195 return Ok(self.fix_insert_slot(index));
196 };
197
198 probe_seq.move_next(self.bucket_mask)?;
199 }
200 }
201
202 /// Fixes up an insertion slot returned by the
203 /// [`Constructor::find_insert_slot_in_group`] method.
204 ///
205 /// In tables smaller than the group width (`self.buckets() <
206 /// Group::WIDTH`), trailing control bytes outside the range of the table
207 /// are filled with [`EMPTY`] entries. These will unfortunately trigger a
208 /// match of [`Constructor::find_insert_slot_in_group`] function. This is
209 /// because the `Some(bit)` returned by
210 /// `group.match_empty_or_deleted().lowest_set_bit()` after masking
211 /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket
212 /// that is already occupied. We detect this situation here and perform a
213 /// second scan starting at the beginning of the table. This second scan is
214 /// guaranteed to find an empty slot (due to the load factor) before hitting
215 /// the trailing control bytes (containing [`EMPTY`] bytes).
216 ///
217 /// If this function is called correctly, it is guaranteed to return
218 /// [`InsertSlot`] with an index of an empty or deleted bucket in the range
219 /// `0..self.buckets()` (see `Warning` and `Safety`).
220 ///
221 /// [`EMPTY`]: super::raw::EMPTY
222 ///
223 /// # Warning
224 ///
225 /// The table must have at least 1 empty or deleted `bucket`, otherwise if
226 /// the table is less than the group width (`self.buckets() < Group::WIDTH`)
227 /// this function returns an index outside of the table indices range
228 /// `0..self.buckets()` (`0..=self.bucket_mask`). Attempt to write data at
229 /// that index will cause immediate [`undefined behavior`].
230 ///
231 /// # Safety
232 ///
233 /// The safety rules are directly derived from the safety rules for
234 /// [`Constructor::ctrl`] method. Thus, in order to uphold those safety
235 /// contracts, as well as for the correct logic of the work of this crate,
236 /// the following rules are necessary and sufficient:
237 ///
238 /// * The [`Constructor`] must have properly initialized control bytes
239 /// otherwise calling this function results in [`undefined behavior`].
240 ///
241 /// * This function must only be used on insertion slots found by
242 /// [`find_insert_slot_in_group`] (after the `find_insert_slot_in_group`
243 /// function, but before insertion into the table).
244 ///
245 /// * The `index` must not be greater than the `self.bucket_mask`, i.e.
246 /// `(index + 1) <= self.buckets()` (this one is provided by the
247 /// [`find_insert_slot_in_group`] function).
248 ///
249 /// Calling this function with an index not provided by
250 /// [`find_insert_slot_in_group`] may result in [`undefined behavior`] even
251 /// if the index satisfies the safety rules of the [`ctrl`] function (`index
252 /// < self.bucket_mask + 1 + Group::WIDTH`).
253 ///
254 /// [`ctrl`]: Self::ctrl
255 /// [`find_insert_slot_in_group`]: Self::find_insert_slot_in_group
256 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
257 #[inline]
258 fn fix_insert_slot(&mut self, mut index: usize) -> InsertSlot {
259 // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`.
260 if unlikely(self.is_bucket_full(index)) {
261 debug_assert!(self.bucket_mask < Group::WIDTH);
262
263 // SAFETY:
264 //
265 // * Since the caller of this function ensures that the control bytes are properly
266 // initialized and `ptr = self.ctrl(0)` points to the start of the array of control
267 // bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH`
268 // and points to the properly initialized control bytes (see also
269 // `TableLayout::calculate_layout_for` and `ptr::read`);
270 //
271 // * Because the caller of this function ensures that the index was provided by the
272 // `self.find_insert_slot_in_group()` function, so for for tables larger than the
273 // group width (self.buckets() >= Group::WIDTH), we will never end up in the given
274 // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group`
275 // cannot return a full bucket index. For tables smaller than the group width, calling
276 // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside
277 // the range of the table are filled with EMPTY bytes (and we know for sure that there
278 // is at least one FULL bucket), so this second scan either finds an empty slot (due to
279 // the load factor) or hits the trailing control bytes (containing EMPTY).
280 index = unsafe {
281 Group::load_aligned(self.ctrl_group(0).as_ptr())
282 .match_empty_or_deleted()
283 .lowest_set_bit()
284 .unwrap_unchecked()
285 };
286 }
287
288 InsertSlot { index }
289 }
290
291 /// Sets a control byte to the hash, and possibly also the replicated control byte at
292 /// the end of the array.
293 ///
294 /// This function does not make any changes to the `data` parts of the table,
295 /// or any changes to the the `items` or `growth_left` field of the table.
296 ///
297 /// # Safety
298 ///
299 /// The caller must ensure that the `index` is not out of bounds of the control allocation.
300 #[inline]
301 fn set_ctrl_h2(&mut self, index: usize, hash: u64) {
302 self.set_ctrl(index, h2(hash));
303 }
304
305 /// Sets a control byte, and possibly also the replicated control byte at
306 /// the end of the array.
307 ///
308 /// This function does not make any changes to the `data` parts of the table,
309 /// or any changes to the the `items` or `growth_left` field of the table.
310 ///
311 /// # Safety
312 ///
313 /// The caller must ensure that `index` is not out of bounds of the control
314 /// allocation.
315 #[inline]
316 fn set_ctrl(&mut self, index: usize, ctrl: u8) {
317 // Replicate the first Group::WIDTH control bytes at the end of
318 // the array without using a branch. If the tables smaller than
319 // the group width (self.buckets() < Group::WIDTH),
320 // `index2 = Group::WIDTH + index`, otherwise `index2` is:
321 //
322 // - If index >= Group::WIDTH then index == index2.
323 // - Otherwise index2 == self.bucket_mask + 1 + index.
324 //
325 // The very last replicated control byte is never actually read because
326 // we mask the initial index for unaligned loads, but we write it
327 // anyways because it makes the set_ctrl implementation simpler.
328 //
329 // If there are fewer buckets than Group::WIDTH then this code will
330 // replicate the buckets at the end of the trailing group. For example
331 // with 2 buckets and a group size of 4, the control bytes will look
332 // like this:
333 //
334 // Real | Replicated
335 // ---------------------------------------------
336 // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] |
337 // ---------------------------------------------
338
339 // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.buckets() + Group::WIDTH`
340 // because the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
341 let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH;
342
343 // SAFETY: The caller must uphold the safety rules for the
344 // `Constructor::set_ctrl`.
345 *self.ctrl_mut(index) = ctrl;
346 *self.ctrl_mut(index2) = ctrl;
347 }
348
349 /// Checks whether the bucket at `index` is full.
350 ///
351 /// # Safety
352 ///
353 /// The caller must ensure `index` is less than the number of buckets.
354 #[inline]
355 fn is_bucket_full(&self, index: usize) -> bool {
356 debug_assert!(index < self.buckets());
357 is_full(*self.ctrl(index))
358 }
359
360 #[inline]
361 fn ctrl_mut(&mut self, index: usize) -> &mut u8 {
362 debug_assert!(index < self.num_ctrl_bytes());
363
364 let offset = self.ctrl_ptr + index;
365
366 // SAFETY: The control bytes are u8's, which cannot be incorrectly
367 // initialized.
368 let ctrl = unsafe { self.buf.get_mut(offset) };
369
370 let Some(ctrl) = ctrl else {
371 panic!("Missing control byte at {offset}");
372 };
373
374 ctrl
375 }
376
377 #[inline]
378 fn ctrl(&self, index: usize) -> &u8 {
379 debug_assert!(index < self.num_ctrl_bytes());
380
381 let offset = self.ctrl_ptr + index;
382
383 let Some(ctrl) = self.buf.get(offset) else {
384 panic!("Missing control byte at {offset}");
385 };
386
387 ctrl
388 }
389
390 #[inline]
391 fn ctrl_group(&self, index: usize) -> &[u8] {
392 debug_assert!(index < self.num_ctrl_bytes());
393
394 let start = self.ctrl_ptr + index;
395 let end = start + Group::WIDTH;
396
397 let Some(ctrl) = self.buf.get(start..end) else {
398 panic!("Missing control byte at range {start}..{end}");
399 };
400
401 ctrl
402 }
403
404 #[inline]
405 fn num_ctrl_bytes(&self) -> usize {
406 self.bucket_mask + 1 + Group::WIDTH
407 }
408}
409
410/// A reference to a hash table bucket containing a `T`.
411///
412/// This is usually just a pointer to the element itself. However if the element
413/// is a ZST, then we instead track the index of the element in the table so
414/// that `erase` works properly.
415pub struct Bucket<'a, T> {
416 data: NonNull<u8>,
417 _marker: PhantomData<&'a mut T>,
418}
419
420impl<'a, T> Bucket<'a, T> {
421 /// Construct a bucket from the given slice.
422 ///
423 /// # Safety
424 ///
425 /// Caller must ensure that the slice is sized for `T`.
426 #[inline]
427 unsafe fn from_slice(data: &'a mut [u8]) -> Self {
428 debug_assert!(data.len() == size_of::<T>());
429
430 Self {
431 data: unsafe { NonNull::new_unchecked(data.as_mut_ptr()) },
432 _marker: PhantomData,
433 }
434 }
435
436 /// Overwrites a memory location with the given `value` without reading or
437 /// dropping the old value (like [`ptr::write`] function).
438 ///
439 /// [`ptr::write`]: core::ptr::write
440 #[inline]
441 pub(crate) fn write(&self, value: &T)
442 where
443 T: ZeroCopy,
444 {
445 // SAFETY: During bucket construction we've asserted that a buffer of
446 // the appropriate size (that is not guaranteed to be aligned) is used.
447 unsafe { buf::store_unaligned(self.data, value) }
448 }
449}
450
451/// A reference to an empty bucket into which an can be inserted.
452pub struct InsertSlot {
453 index: usize,
454}
455
456/// Returns the maximum effective capacity for the given bucket mask, taking
457/// the maximum load factor into account.
458#[inline]
459fn bucket_mask_to_capacity(bucket_mask: usize) -> usize {
460 if bucket_mask < 8 {
461 // For tables with 1/2/4/8 buckets, we always reserve one empty slot.
462 // Keep in mind that the bucket mask is one less than the bucket count.
463 bucket_mask
464 } else {
465 // For larger tables we reserve 12.5% of the slots as empty.
466 ((bucket_mask + 1) / 8) * 7
467 }
468}