linear_hashtbl/
raw.rs

1//! Partially unsafe `RawTable` API
2
3use core::iter::FusedIterator;
4use core::marker::PhantomData;
5use core::mem::ManuallyDrop;
6use core::mem::MaybeUninit;
7
8#[cfg(all(feature = "nightly", not(feature = "allocator-api2")))]
9use alloc::{alloc::Allocator, alloc::Global, boxed::Box, vec::Vec};
10#[cfg(feature = "allocator-api2")]
11use allocator_api2::{alloc::Allocator, alloc::Global, boxed::Box, vec::Vec};
12
13#[cfg(not(any(feature = "allocator-api2", feature = "nightly")))]
14use {
15    crate::__alloc::{Allocator, Global},
16    alloc::{boxed::Box, vec::Vec},
17};
18
19// === Structs =================================================================
20
21/// Raw hash table with a (partially) unsafe API
22pub struct RawTable<T, S: Status = usize, A: Allocator + Clone = Global> {
23    #[cfg(any(feature = "allocator-api2", feature = "nightly"))]
24    data: Box<[Slot<T, S>], A>,
25    #[cfg(not(any(feature = "allocator-api2", feature = "nightly")))]
26    data: Box<[Slot<T, S>]>,
27
28    /// The number of items in the table
29    len: usize,
30    /// The number of free slots
31    free: usize,
32
33    phantom: PhantomData<A>,
34}
35
36struct Slot<T, S: Status = usize> {
37    status: S,
38    data: MaybeUninit<T>,
39}
40
41/// Immutable iterator over the entries of a [`RawTable`] in arbitrary order
42///
43/// This `struct` is created by [`RawTable::iter()`].
44pub struct Iter<'a, T, S: Status = usize> {
45    iter: core::slice::Iter<'a, Slot<T, S>>,
46    len: usize,
47}
48
49/// Mutable iterator over the entries of a [`RawTable`] in arbitrary order
50///
51/// This `struct` is created by [`RawTable::iter_mut()`].
52pub struct IterMut<'a, T, S: Status = usize> {
53    iter: core::slice::IterMut<'a, Slot<T, S>>,
54    len: usize,
55}
56
57/// Owning iterator over the entries of a [`RawTable`] in arbitrary order
58///
59/// This `struct` is created by the [`IntoIterator::into_iter()`] implementation
60/// for `RawTable`. The `RawTable` cannot be used anymore after calling
61/// [`IntoIterator::into_iter()`].
62pub struct IntoIter<T, S: Status = usize, A: Allocator = Global> {
63    #[cfg(feature = "allocator-api2")]
64    iter: allocator_api2::vec::IntoIter<Slot<T, S>, A>,
65    #[cfg(all(not(feature = "allocator-api2"), feature = "nightly"))]
66    iter: alloc::vec::IntoIter<Slot<T, S>, A>,
67    #[cfg(not(any(feature = "allocator-api2", feature = "nightly")))]
68    iter: alloc::vec::IntoIter<Slot<T, S>>,
69    len: usize,
70    phantom: PhantomData<A>,
71}
72
73/// A draining iterator over the entries of a [`RawTable`]
74///
75/// This `struct` is created by [`RawTable::drain()`]. While this iterator moves
76/// elements out just like [`IntoIter`], it does neither consume the
77/// [`RawTable`] nor free its underlying memory.
78///
79/// Note: Forgetting `Drain` (e.g. via [`core::mem::forget()`]) and using the
80/// table afterwards is a very bad idea. It is not `unsafe` but it causes
81/// correctness issues since there exist non-empty slots while the length is
82/// already set to `0`.
83pub struct Drain<'a, T, S: Status = usize> {
84    iter: core::slice::IterMut<'a, Slot<T, S>>,
85    len: usize,
86}
87
88// === Status Impls ============================================================
89
90/// Status of a slot in the hash table
91///
92/// A status can either be free, a tombstone, or a hash value of the associated
93/// item. This can be implemented as a `u32` or a `usize`, for instance. Since
94/// there are possibly more `u64` hash values than `Status` hash values, it is
95/// fine to apply some mapping (e.g. only take the lowest `n - 1` bits).
96///
97/// Part of the idea of storing hash values is that we can avoid recomputing the
98/// hash values when rehashing (resizing) the table. Consequently, we cannot
99/// meaningfully address a table that is larger than the number of `Status` hash
100/// values. This is why we have [`Self::check_capacity()`].
101///
102/// # Safety
103///
104/// All valid status values are either [`Status::FREE`], a [`Status::TOMBSTONE`]
105/// or a hash value. [`Status::from_hash()`] must always return a hash value,
106/// and [`Status::is_hash()`] must return `true` if and only if the given
107/// `Status` is indeed a hash value.
108pub unsafe trait Status: Copy + Eq {
109    /// Marker for the slot being free
110    const FREE: Self;
111    /// Marker that is placed for deleted entries in a collision list
112    const TOMBSTONE: Self;
113
114    /// Convert a `u64` hash value (e.g. as returned by
115    /// [`Hash`][core::hash::Hash]) into a `Status` hash value
116    fn from_hash(hash: u64) -> Self;
117    /// Panics if `capacity` exceeds the number of possible `Status` hash values
118    fn check_capacity(capacity: usize);
119    /// Check if the `Status` is a hash value
120    fn is_hash(self) -> bool;
121    /// Convert a `Status` hash value into a `usize`
122    ///
123    /// The result is unspecified if the status is free, a tombstone, or
124    /// invalid. In this case, the implementation is allowed to panic.
125    fn hash_as_usize(self) -> usize;
126}
127
128// SAFETY: `FREE`, `TOMBSTONE`, and hash values are distinct.
129unsafe impl Status for usize {
130    const FREE: Self = usize::MAX;
131    const TOMBSTONE: Self = usize::MAX - 1;
132
133    #[inline]
134    fn from_hash(hash: u64) -> Self {
135        hash as usize & (usize::MAX >> 1)
136    }
137
138    #[inline]
139    #[track_caller]
140    fn check_capacity(capacity: usize) {
141        assert!(
142            capacity <= 1 << (usize::BITS - 1),
143            "requested capacity {capacity} is too large"
144        );
145    }
146
147    #[inline]
148    fn is_hash(self) -> bool {
149        self >> (usize::BITS - 1) == 0 // most significant bit is unset
150    }
151
152    #[inline]
153    fn hash_as_usize(self) -> usize {
154        debug_assert!(self.is_hash());
155        self
156    }
157}
158
159// SAFETY: `FREE`, `TOMBSTONE`, and hash values are distinct.
160unsafe impl Status for u32 {
161    const FREE: Self = u32::MAX;
162    const TOMBSTONE: Self = u32::MAX - 1;
163
164    #[inline]
165    fn from_hash(hash: u64) -> Self {
166        hash as u32 & (u32::MAX >> 1)
167    }
168
169    #[inline]
170    #[track_caller]
171    fn check_capacity(capacity: usize) {
172        assert!(
173            capacity <= 1 << (u32::BITS - 1),
174            "requested capacity {capacity} is too large"
175        );
176    }
177
178    #[inline]
179    fn is_hash(self) -> bool {
180        self >> (u32::BITS - 1) == 0 // most significant bit is unset
181    }
182
183    #[inline]
184    fn hash_as_usize(self) -> usize {
185        debug_assert!(self.is_hash());
186        self as usize
187    }
188}
189
190// === Slot Impls ==============================================================
191
192impl<T, S: Status> Slot<T, S> {
193    const FREE: Self = Slot {
194        status: S::FREE,
195        data: MaybeUninit::uninit(),
196    };
197}
198
199impl<T: Clone, S: Status> Clone for Slot<T, S> {
200    fn clone(&self) -> Self {
201        Self {
202            status: self.status,
203            data: if self.status.is_hash() {
204                // SAFETY: hash status means that the data is initialized
205                MaybeUninit::new(unsafe { self.data.assume_init_ref() }.clone())
206            } else {
207                MaybeUninit::uninit()
208            },
209        }
210    }
211}
212
213// === RawTable Impls =========================================================
214
215impl<T, S: Status> RawTable<T, S> {
216    /// Create a new `HashTable` with zero capacity
217    #[inline]
218    pub fn new() -> Self {
219        RawTable {
220            data: Vec::new().into_boxed_slice(),
221            len: 0,
222            free: 0,
223            phantom: PhantomData,
224        }
225    }
226
227    /// Create a new `HashTable` with capacity for at least `capacity` elements
228    pub fn with_capacity(capacity: usize) -> Self {
229        let capacity = Self::next_capacity(capacity);
230        let mut data = Vec::with_capacity(capacity);
231        data.resize_with(capacity, || Slot::FREE);
232        RawTable {
233            data: data.into_boxed_slice(),
234            len: 0,
235            free: capacity,
236            phantom: PhantomData,
237        }
238    }
239}
240
241#[cfg(any(feature = "allocator-api2", feature = "nightly"))]
242impl<T, S: Status, A: Clone + Allocator> RawTable<T, S, A> {
243    /// Create a new `HashTable` with zero capacity
244    #[inline]
245    pub fn new_in(alloc: A) -> Self {
246        RawTable {
247            data: Vec::new_in(alloc).into_boxed_slice(),
248            len: 0,
249            free: 0,
250            phantom: PhantomData,
251        }
252    }
253
254    /// Create a new `HashTable` with capacity for at least `capacity` elements
255    pub fn with_capacity_in(capacity: usize, alloc: A) -> Self {
256        let capacity = Self::next_capacity(capacity);
257        let mut data = Vec::with_capacity_in(capacity, alloc);
258        data.resize_with(capacity, || Slot::FREE);
259        RawTable {
260            data: data.into_boxed_slice(),
261            len: 0,
262            free: capacity,
263            phantom: PhantomData,
264        }
265    }
266}
267
268/// Numerator for the fraction of usable slots
269const RATIO_N: usize = 3;
270/// Denominator for the fraction of usable slots
271const RATIO_D: usize = 4;
272
273/// Minimal non-zero capacity (including spare slots)
274const MIN_CAP: usize = 16;
275
276impl<T, S: Status, A: Clone + Allocator> RawTable<T, S, A> {
277    /// Get the next largest array capacity for `requested` elements
278    ///
279    /// `requested` does not include the 25 % spare slots, while the returned
280    /// value does. The result is guaranteed to be a power of two
281    /// (or 0 in case `requested == 0`).
282    ///
283    /// This uses [`Status::check_capacity()`] to check if we can address the
284    /// new array properly.
285    #[inline]
286    fn next_capacity(requested: usize) -> usize {
287        if requested == 0 {
288            return 0;
289        }
290        let capacity = core::cmp::max((requested * RATIO_D / RATIO_N).next_power_of_two(), MIN_CAP);
291        S::check_capacity(capacity);
292        capacity
293    }
294
295    /// Get the number of elements stored in the table
296    #[inline(always)]
297    pub fn len(&self) -> usize {
298        self.len
299    }
300
301    /// Returns `true` iff no elements are stored in the table
302    pub fn is_empty(&self) -> bool {
303        self.len == 0
304    }
305
306    /// Get the capacity (excluding spare slots)
307    #[inline]
308    pub fn capacity(&self) -> usize {
309        self.data.len() / RATIO_D * RATIO_N
310    }
311
312    /// Get the number of slots (i.e. [`Self::capacity()`] plus spare slots)
313    #[inline]
314    pub fn slots(&self) -> usize {
315        self.data.len()
316    }
317
318    /// Reserve space for `additional` elements
319    ///
320    /// If there are no other modifying operations in between, the next
321    /// `additional` insertions are guaranteed to not cause a rehash (or resize)
322    /// of the table.
323    #[inline]
324    pub fn reserve(&mut self, additional: usize) {
325        let spare = additional + self.data.len() / RATIO_D * (RATIO_D - RATIO_N);
326        if self.free < spare {
327            self.reserve_rehash(additional)
328        }
329    }
330    /// Rehash the table with capacity for `self.len + additional` elements
331    #[cold]
332    fn reserve_rehash(&mut self, additional: usize) {
333        let new_cap = Self::next_capacity(self.len + additional);
334
335        #[cfg(any(feature = "allocator-api2", feature = "nightly"))]
336        let mut new_data = Vec::with_capacity_in(new_cap, Box::allocator(&self.data).clone());
337        #[cfg(not(any(feature = "allocator-api2", feature = "nightly")))]
338        let mut new_data = Vec::with_capacity(new_cap);
339
340        new_data.resize_with(new_cap, || Slot::FREE);
341        let old_data = core::mem::replace(&mut self.data, new_data.into_boxed_slice());
342
343        if new_cap == 0 {
344            self.free = 0;
345            return;
346        }
347
348        let new_data = &mut self.data[..];
349        let new_mask = new_cap - 1;
350        // `.into_vec()` is needed for `allocator-api2` boxes
351        for slot in old_data.into_vec() {
352            let status = slot.status;
353            if !status.is_hash() {
354                continue;
355            }
356            // SAFETY: hash status means that the data is initialized
357            let data = unsafe { slot.data.assume_init() };
358
359            let mut index = status.hash_as_usize() & new_mask;
360            loop {
361                // SAFETY: masked index is in bounds (`new_data.len() != 0`)
362                let new_slot = unsafe { new_data.get_unchecked_mut(index) };
363                if new_slot.status == S::FREE {
364                    new_slot.data.write(data);
365                    new_slot.status = status;
366                    break;
367                }
368                index = (index + 1) & new_mask;
369            }
370        }
371
372        self.free = new_cap - self.len;
373    }
374
375    /// Clear the table
376    ///
377    /// This does not change the table's capacity.
378    pub fn clear(&mut self) {
379        if self.len == 0 {
380            return;
381        }
382        for slot in self.data.iter_mut() {
383            let status = slot.status;
384            slot.status = S::FREE;
385            if status.is_hash() {
386                // SAFETY: hash status means that the data is initialized. We
387                // marked the slot as `FREE` above, so there is no danger of
388                // double frees.
389                unsafe { slot.data.assume_init_drop() };
390                self.len -= 1;
391                if self.len == 0 {
392                    return;
393                }
394            }
395        }
396        // SAFETY: `self.len <= self.data.len()` holds by invariant
397        unsafe { core::hint::unreachable_unchecked() }
398    }
399
400    /// [`Self::clear()`] but without dropping any entry
401    pub fn clear_no_drop(&mut self) {
402        if self.len == 0 {
403            return;
404        }
405        for slot in self.data.iter_mut() {
406            let status = slot.status;
407            slot.status = S::FREE;
408            if status.is_hash() {
409                self.len -= 1;
410                if self.len == 0 {
411                    return;
412                }
413            }
414        }
415        // SAFETY: `self.len <= self.data.len()` holds by invariant
416        unsafe { core::hint::unreachable_unchecked() }
417    }
418
419    /// Like [`Self::clear_no_drop()`], but also sets the capacity to 0
420    ///
421    /// If the space is not needed anymore, this should generally be faster than
422    /// [`Self::clear_no_drop()`], since we do not need to mark every slot as
423    /// free.
424    #[inline]
425    pub fn reset_no_drop(&mut self) {
426        self.len = 0;
427
428        #[cfg(any(feature = "allocator-api2", feature = "nightly"))]
429        let empty = Vec::new_in(Box::allocator(&self.data).clone());
430        #[cfg(not(any(feature = "allocator-api2", feature = "nightly")))]
431        let empty = Vec::new();
432        self.data = empty.into_boxed_slice();
433    }
434
435    /// Returns `true` iff there is an entry at `slot`
436    ///
437    /// # Safety
438    ///
439    /// `slot` must be less than `self.slots()`
440    #[inline(always)]
441    pub unsafe fn is_slot_occupied_unchecked(&self, slot: usize) -> bool {
442        debug_assert!(slot < self.data.len());
443        // SAFETY: `slot <= self.slots.len()` is ensured by the caller
444        unsafe { self.data.get_unchecked(slot) }.status.is_hash()
445    }
446
447    /// Find the index of an element or a slot
448    #[inline]
449    pub fn find(&self, hash: u64, eq: impl Fn(&T) -> bool) -> Option<usize> {
450        if self.len == 0 {
451            return None;
452        }
453        debug_assert_ne!(self.free, 0, "find may diverge");
454
455        debug_assert!(self.data.len().is_power_of_two());
456        let mask = self.data.len() - 1;
457        let mut index = hash as usize & mask;
458        let hash_status = S::from_hash(hash);
459
460        loop {
461            // SAFETY: masked index is in bounds (`self.data.len() != 0`)
462            let slot = unsafe { self.data.get_unchecked(index) };
463            if slot.status == hash_status {
464                // SAFETY: hash status means that the data is initialized
465                if eq(unsafe { slot.data.assume_init_ref() }) {
466                    return Some(index);
467                }
468            } else if slot.status == S::FREE {
469                return None;
470            }
471            index = (index + 1) & mask;
472        }
473    }
474
475    /// Find the index of an element or a slot to insert it
476    ///
477    /// Returns `Ok(index)` if the element was found and `Err(index)` for an
478    /// insertion slot.
479    ///
480    /// `eq` is guaranteed to only be called for entries that are present in the
481    /// table.
482    #[inline]
483    pub fn find_or_find_insert_slot(
484        &mut self,
485        hash: u64,
486        eq: impl Fn(&T) -> bool,
487    ) -> Result<usize, usize> {
488        self.reserve(1);
489
490        debug_assert!(self.data.len().is_power_of_two());
491        let mask = self.data.len() - 1;
492        let mut index = hash as usize & mask;
493        let mut first_tombstone = None;
494        let hash_status = S::from_hash(hash);
495
496        loop {
497            // SAFETY: masked index is in bounds (`self.data.len() != 0`)
498            let slot = unsafe { self.data.get_unchecked(index) };
499            if slot.status == hash_status {
500                // SAFETY: hash status means that the data is initialized
501                if eq(unsafe { slot.data.assume_init_ref() }) {
502                    return Ok(index);
503                }
504            } else if slot.status == S::FREE {
505                return Err(first_tombstone.unwrap_or(index));
506            } else if slot.status == S::TOMBSTONE && first_tombstone.is_none() {
507                first_tombstone = Some(index);
508            }
509            index = (index + 1) & mask;
510        }
511    }
512
513    /// Get a reference to an entry in the table
514    ///
515    /// `hash` is the entry's hash value and `eq` returns true if the given
516    /// element is the searched one.
517    #[inline]
518    pub fn get(&self, hash: u64, eq: impl Fn(&T) -> bool) -> Option<&T> {
519        let index = self.find(hash, eq)?;
520        debug_assert!(self.data[index].status.is_hash());
521        // SAFETY: `find()` guarantees the returned index to be in bounds and
522        // the entry there to be occupied.
523        Some(unsafe { self.data.get_unchecked(index).data.assume_init_ref() })
524    }
525
526    /// Get a mutable reference to an entry in the table
527    ///
528    /// `hash` is the entry's hash value and `eq` returns true if the given
529    /// element is the searched one.
530    #[inline]
531    pub fn get_mut(&mut self, hash: u64, eq: impl Fn(&T) -> bool) -> Option<&mut T> {
532        let index = self.find(hash, eq)?;
533        debug_assert!(self.data[index].status.is_hash());
534        // SAFETY: `find()` guarantees the returned index to be in bounds and
535        // the entry there to be occupied.
536        Some(unsafe { self.data.get_unchecked_mut(index).data.assume_init_mut() })
537    }
538
539    /// Get a reference to the entry at `slot`
540    ///
541    /// # Safety
542    ///
543    /// `slot` must be the index of an occupied slot. This is the case if `slot`
544    /// has been returned by [`RawTable::find()`] or
545    /// [`RawTable::find_or_find_insert_slot()`] in the `Ok` case, and no
546    /// modifications happened in between.
547    #[inline]
548    pub unsafe fn get_at_slot_unchecked(&self, slot: usize) -> &T {
549        debug_assert!(self.data[slot].status.is_hash(), "slot is empty");
550        // SAFETY: The caller ensures that `slot` is the index of an occupied
551        // slot.
552        unsafe { self.data.get_unchecked(slot).data.assume_init_ref() }
553    }
554
555    /// Get a mutable reference to the entry entry at `slot`
556    ///
557    /// # Safety
558    ///
559    /// `slot` must be the index of an occupied slot. This is the case if `slot`
560    /// has been returned by [`RawTable::find()`] or
561    /// [`RawTable::find_or_find_insert_slot()`] in the `Ok` case, and no
562    /// modifications happened in between.
563    #[inline]
564    pub unsafe fn get_at_slot_unchecked_mut(&mut self, slot: usize) -> &mut T {
565        debug_assert!(self.data[slot].status.is_hash(), "slot is empty");
566        // SAFETY: The caller ensures that `slot` is the index of an occupied
567        // slot.
568        unsafe { self.data.get_unchecked_mut(slot).data.assume_init_mut() }
569    }
570
571    /// Insert `val` in `slot`
572    ///
573    /// `hash` is the hash value of `val`. Returns a mutable reference to the
574    /// inserted value.
575    ///
576    /// # Safety
577    ///
578    /// `slot` must be the index of an empty slot. This is the case if `slot`
579    /// has been returned by [`RawTable::find_or_find_insert_slot()`] in the
580    /// `Err` case, and no modifications happened in between.
581    #[inline]
582    pub unsafe fn insert_in_slot_unchecked(&mut self, hash: u64, slot: usize, val: T) -> &mut T {
583        debug_assert!(!self.data[slot].status.is_hash(), "slot is occupied");
584
585        // SAFETY: Validity of the slot is guaranteed by the caller.
586        let slot = unsafe { self.data.get_unchecked_mut(slot) };
587        if slot.status != S::TOMBSTONE {
588            debug_assert!(slot.status == S::FREE);
589            self.free -= 1;
590        }
591        self.len += 1;
592        let res = slot.data.write(val);
593        slot.status = S::from_hash(hash);
594        res
595    }
596
597    /// Find and remove an entry from the table, returning it
598    ///
599    /// `hash` is the entry's hash value and `eq` returns true if the given
600    /// element is the searched one.
601    #[inline]
602    pub fn remove_entry(&mut self, hash: u64, eq: impl Fn(&T) -> bool) -> Option<T> {
603        let index = self.find(hash, eq)?;
604        // SAFETY: `find()` guarantees the returned index to be in bounds and
605        // the entry there to be occupied.
606        Some(unsafe { self.remove_at_slot_unchecked(index) })
607    }
608
609    /// Remove the entry at `slot`
610    ///
611    /// Returns the entry value.
612    ///
613    /// # Safety
614    ///
615    /// `slot` must be the index of an occupied slot. This is the case if `slot`
616    /// has been returned by [`RawTable::find()`] or
617    /// [`RawTable::find_or_find_insert_slot()`] in the `Ok` case, and no
618    /// modifications happened in between.
619    #[inline]
620    pub unsafe fn remove_at_slot_unchecked(&mut self, slot: usize) -> T {
621        debug_assert_ne!(self.len, 0);
622        debug_assert!(self.data[slot].status.is_hash());
623        let next_slot_index = (slot + 1) & (self.data.len() - 1);
624        // SAFETY (next 2): The caller ensures that `slot` is in bounds, hence
625        // `self.data.len() != 0` and `next_slot_index` is in bounds, too.
626        let next_slot_status = unsafe { self.data.get_unchecked(next_slot_index) }.status;
627        let slot = unsafe { self.data.get_unchecked_mut(slot) };
628        slot.status = if next_slot_status == S::FREE {
629            self.free += 1;
630            S::FREE
631        } else {
632            S::TOMBSTONE
633        };
634        self.len -= 1;
635        // SAFETY: The slot's status was a hash meaning that the entry is
636        // occupied. We set the status to `FREE` or `TOMBSTONE`, so we don't
637        // duplicate the value.
638        unsafe { slot.data.assume_init_read() }
639    }
640
641    /// Get an immutable iterator over the entries of the table
642    #[inline]
643    pub fn iter(&self) -> Iter<'_, T, S> {
644        Iter {
645            iter: self.data.iter(),
646            len: self.len,
647        }
648    }
649
650    /// Get a mutable iterator over the entries of the table
651    #[inline]
652    pub fn iter_mut(&mut self) -> IterMut<'_, T, S> {
653        IterMut {
654            iter: self.data.iter_mut(),
655            len: self.len,
656        }
657    }
658
659    /// Get a draining iterator over the entries of the table
660    ///
661    /// A draining iterator removes all elements from the table but does not
662    /// change the table's capacity.
663    ///
664    /// Note: Forgetting the returned `Drain` (e.g., via
665    /// [`core::mem::forget()`]) and using the table afterwards is a very
666    /// bad idea. It is not `unsafe` but it causes correctness issues since
667    /// there exist non-empty slots while the length is already set to `0`.
668    #[inline]
669    pub fn drain(&mut self) -> Drain<'_, T, S> {
670        let len = self.len;
671        self.len = 0;
672        self.free = self.data.len();
673        Drain {
674            iter: self.data.iter_mut(),
675            len,
676        }
677    }
678
679    /// Retain only the elements for which `predicate` returns `true`
680    ///
681    /// `predicate` is guaranteed to only be called for entries that are present
682    /// in the table.
683    /// `drop` is called for every element that is removed from the table.
684    pub fn retain(&mut self, mut predicate: impl FnMut(&mut T) -> bool, mut drop: impl FnMut(T)) {
685        if self.len == 0 {
686            return;
687        }
688        debug_assert!(self.data.len() >= self.len);
689        let mut i = self.len;
690        let mut last_is_free = self.data[0].status == S::FREE;
691        // iterate from the back such that we can potentially remove tombstones
692        for slot in self.data.iter_mut().rev() {
693            if !slot.status.is_hash() {
694                if slot.status == S::FREE {
695                    last_is_free = true;
696                } else {
697                    debug_assert!(slot.status == S::TOMBSTONE);
698                    if last_is_free {
699                        slot.status = S::FREE;
700                        self.free += 1;
701                    } else {
702                        last_is_free = false;
703                    }
704                }
705                continue;
706            }
707            // SAFETY: The slot's status is a hash value meaning that the data
708            // is initialized.
709            if !predicate(unsafe { slot.data.assume_init_mut() }) {
710                self.len -= 1;
711                if last_is_free {
712                    slot.status = S::FREE;
713                    self.free += 1;
714                } else {
715                    slot.status = S::TOMBSTONE;
716                }
717                // SAFETY: `slot.data` is initialized (see above). We set the
718                // status to `FREE` or `TOMBSTONE` above, so we don't duplicate
719                // the value.
720                drop(unsafe { slot.data.assume_init_read() });
721            } else {
722                last_is_free = false;
723            }
724            i -= 1;
725            if i == 0 {
726                if self.len < self.data.len() / RATIO_D * (RATIO_D - RATIO_N)
727                    && self.data.len() >= MIN_CAP
728                {
729                    // shrink the table
730                    self.reserve_rehash(0);
731                }
732                return;
733            }
734        }
735        // SAFETY: `self.len <= self.data.len()` holds by invariant
736        unsafe { core::hint::unreachable_unchecked() }
737    }
738}
739
740impl<T: Clone, S: Status, A: Clone + Allocator> Clone for RawTable<T, S, A> {
741    #[inline]
742    fn clone(&self) -> Self {
743        Self {
744            data: self.data.clone(),
745            len: self.len,
746            free: self.free,
747            phantom: PhantomData,
748        }
749    }
750}
751
752impl<T, S: Status, A: Clone + Default + Allocator> Default for RawTable<T, S, A> {
753    fn default() -> Self {
754        RawTable {
755            #[cfg(any(feature = "allocator-api2", feature = "nightly"))]
756            data: Vec::new_in(A::default()).into_boxed_slice(),
757            #[cfg(not(any(feature = "allocator-api2", feature = "nightly")))]
758            data: Vec::new().into_boxed_slice(),
759            len: 0,
760            free: 0,
761            phantom: PhantomData,
762        }
763    }
764}
765
766impl<T, S: Status, A: Clone + Allocator> Drop for RawTable<T, S, A> {
767    fn drop(&mut self) {
768        if core::mem::needs_drop::<T>() {
769            self.clear();
770        }
771    }
772}
773
774impl<T, S: Status, A: Clone + Allocator> IntoIterator for RawTable<T, S, A> {
775    type Item = T;
776    type IntoIter = IntoIter<T, S, A>;
777
778    fn into_iter(self) -> Self::IntoIter {
779        let this = ManuallyDrop::new(self);
780        IntoIter {
781            // SAFETY: We move out of `this` (`this` is `ManuallyDrop` and never
782            // dropped)
783            iter: unsafe { core::ptr::read(&this.data) }
784                .into_vec()
785                .into_iter(),
786            len: this.len,
787            phantom: PhantomData,
788        }
789    }
790}
791
792// === Iterators ===============================================================
793
794// --- Iter --------------------------------------------------------------------
795
796impl<'a, T, S: Status> Iterator for Iter<'a, T, S> {
797    type Item = &'a T;
798
799    #[inline]
800    fn next(&mut self) -> Option<&'a T> {
801        if self.len == 0 {
802            return None;
803        }
804        loop {
805            let next = self.iter.next();
806            debug_assert!(next.is_some());
807            // SAFETY: The remaining part of the slice has at least `self.len`
808            // elements by invariant
809            let slot = unsafe { next.unwrap_unchecked() };
810            if slot.status.is_hash() {
811                self.len -= 1;
812                // SAFETY: `slot.status` is a hash value, so `slot.data` is
813                // initialized.
814                return Some(unsafe { slot.data.assume_init_ref() });
815            }
816        }
817    }
818
819    #[inline]
820    fn size_hint(&self) -> (usize, Option<usize>) {
821        (self.len, Some(self.len))
822    }
823}
824
825impl<T, S: Status> ExactSizeIterator for Iter<'_, T, S> {
826    #[inline]
827    fn len(&self) -> usize {
828        self.len
829    }
830}
831
832impl<T, S: Status> FusedIterator for Iter<'_, T, S> {}
833
834// --- IterMut -----------------------------------------------------------------
835
836impl<'a, T, S: Status> Iterator for IterMut<'a, T, S> {
837    type Item = &'a mut T;
838
839    #[inline]
840    fn next(&mut self) -> Option<&'a mut T> {
841        if self.len == 0 {
842            return None;
843        }
844        loop {
845            let next = self.iter.next();
846            debug_assert!(next.is_some());
847            // SAFETY: The remaining part of the slice has at least `self.len`
848            // elements by invariant
849            let slot = unsafe { next.unwrap_unchecked() };
850            if slot.status.is_hash() {
851                self.len -= 1;
852                // SAFETY: `slot.status` is a hash value, so `slot.data` is
853                // initialized.
854                return Some(unsafe { slot.data.assume_init_mut() });
855            }
856        }
857    }
858
859    #[inline]
860    fn size_hint(&self) -> (usize, Option<usize>) {
861        (self.len, Some(self.len))
862    }
863}
864
865impl<T, S: Status> ExactSizeIterator for IterMut<'_, T, S> {
866    #[inline]
867    fn len(&self) -> usize {
868        self.len
869    }
870}
871
872impl<T, S: Status> FusedIterator for IterMut<'_, T, S> {}
873
874// --- IntoIter ----------------------------------------------------------------
875
876impl<T, S: Status, A: Allocator> Iterator for IntoIter<T, S, A> {
877    type Item = T;
878
879    #[inline]
880    fn next(&mut self) -> Option<T> {
881        if self.len == 0 {
882            return None;
883        }
884        loop {
885            let next = self.iter.next();
886            debug_assert!(next.is_some());
887            // SAFETY: The remaining part of the vector has at least `self.len`
888            // elements by invariant
889            let slot = unsafe { next.unwrap_unchecked() };
890            if slot.status.is_hash() {
891                self.len -= 1;
892                // SAFETY: `slot.status` is a hash value, so `slot.data` is
893                // initialized.
894                return Some(unsafe { slot.data.assume_init() });
895            }
896        }
897    }
898
899    #[inline]
900    fn size_hint(&self) -> (usize, Option<usize>) {
901        (self.len, Some(self.len))
902    }
903}
904
905impl<T, S: Status, A: Allocator> ExactSizeIterator for IntoIter<T, S, A> {
906    #[inline]
907    fn len(&self) -> usize {
908        self.len
909    }
910}
911
912impl<T, S: Status, A: Allocator> FusedIterator for IntoIter<T, S, A> {}
913
914impl<T, S: Status, A: Allocator> Drop for IntoIter<T, S, A> {
915    fn drop(&mut self) {
916        while self.len != 0 {
917            let next = self.iter.next();
918            debug_assert!(next.is_some());
919            // SAFETY: The remaining part of the vector has at least `self.len`
920            // elements by invariant
921            let mut slot = unsafe { next.unwrap_unchecked() };
922            if slot.status.is_hash() {
923                self.len -= 1;
924                // SAFETY: `slot.status` is a hash value, so `slot.data` is
925                // initialized. We drop/forget `slot`, so we don't risk a double
926                // free.
927                unsafe { slot.data.assume_init_drop() };
928            }
929        }
930    }
931}
932
933// --- Drain -------------------------------------------------------------------
934
935impl<T, S: Status> Iterator for Drain<'_, T, S> {
936    type Item = T;
937
938    #[inline]
939    fn next(&mut self) -> Option<T> {
940        if self.len == 0 {
941            return None;
942        }
943        loop {
944            let next = self.iter.next();
945            debug_assert!(next.is_some());
946            // SAFETY: The remaining part of the slice has at least `self.len`
947            // elements by invariant
948            let slot = unsafe { next.unwrap_unchecked() };
949            let status = slot.status;
950            slot.status = S::FREE;
951            if status.is_hash() {
952                self.len -= 1;
953                // SAFETY: `slot.status` is a hash value, so `slot.data` is
954                // initialized. We set `slot.status = S::FREE` above, so we
955                // don't duplicate the value.
956                return Some(unsafe { slot.data.assume_init_read() });
957            }
958        }
959    }
960
961    #[inline]
962    fn size_hint(&self) -> (usize, Option<usize>) {
963        (self.len, Some(self.len))
964    }
965}
966
967impl<T, S: Status> ExactSizeIterator for Drain<'_, T, S> {
968    #[inline]
969    fn len(&self) -> usize {
970        self.len
971    }
972}
973
974impl<T, S: Status> FusedIterator for Drain<'_, T, S> {}
975
976impl<T, S: Status> Drop for Drain<'_, T, S> {
977    fn drop(&mut self) {
978        while self.len != 0 {
979            let next = self.iter.next();
980            debug_assert!(next.is_some());
981            // SAFETY: The remaining part of the slice has at least `self.len`
982            // elements by invariant
983            let slot = unsafe { next.unwrap_unchecked() };
984            let status = slot.status;
985            slot.status = S::FREE;
986            if status.is_hash() {
987                self.len -= 1;
988                // SAFETY: `slot.status` is a hash value, so `slot.data` is
989                // initialized. We set `slot.status = S::FREE` above, so we
990                // don't drop the value twice.
991                unsafe { slot.data.assume_init_drop() };
992            }
993        }
994    }
995}
996
997// === Tests ===================================================================
998
999#[cfg(test)]
1000mod test {
1001    use super::*;
1002
1003    #[test]
1004    fn new_cap_len_0() {
1005        let table = RawTable::<u32, u32>::new();
1006        assert_eq!(table.capacity(), 0);
1007        assert_eq!(table.len(), 0);
1008    }
1009
1010    #[test]
1011    fn insert_get_iter() {
1012        let mut table = RawTable::<u32, u32>::new();
1013        let numbers = [1, 4, 0, 5, 14, 4];
1014        let sorted = [0, 1, 4, 5, 14];
1015
1016        for number in numbers {
1017            match table.find_or_find_insert_slot(number as u64, |&x| x == number) {
1018                Ok(_) => assert_eq!(number, 4),
1019                Err(slot) => unsafe {
1020                    table.insert_in_slot_unchecked(number as u64, slot, number);
1021                },
1022            }
1023        }
1024        assert_eq!(table.len(), 5);
1025        assert!(table.capacity() >= 5);
1026
1027        for mut number in numbers {
1028            assert_eq!(table.get(number as u64, |&x| x == number), Some(&number));
1029            assert_eq!(
1030                table.get_mut(number as u64, |&x| x == number),
1031                Some(&mut number)
1032            );
1033        }
1034
1035        let iter = table.iter();
1036        assert_eq!(iter.len(), 5);
1037        let mut res: Vec<u32> = iter.copied().collect();
1038        res.sort();
1039        assert_eq!(&res[..], &sorted[..]);
1040
1041        let iter = table.iter_mut();
1042        assert_eq!(iter.len(), 5);
1043        let mut res: Vec<u32> = iter.map(|&mut x| x).collect();
1044        res.sort();
1045        assert_eq!(&res[..], &sorted[..]);
1046
1047        let iter = table.into_iter();
1048        assert_eq!(iter.len(), 5);
1049        let mut res: Vec<u32> = iter.collect();
1050        res.sort();
1051        assert_eq!(&res[..], &sorted[..]);
1052    }
1053
1054    #[test]
1055    fn retain_drain() {
1056        let mut table = RawTable::<u32, u32>::new();
1057        let numbers = [1, 2, 3, 4, 5, 6, 7];
1058
1059        for number in numbers {
1060            match table.find_or_find_insert_slot(number as u64, |&x| x == number) {
1061                Ok(_) => unreachable!(),
1062                Err(slot) => unsafe {
1063                    table.insert_in_slot_unchecked(number as u64, slot, number);
1064                },
1065            }
1066        }
1067        assert_eq!(table.len(), 7);
1068
1069        table.retain(
1070            |&mut x| x.is_multiple_of(2),
1071            |x| assert!(!x.is_multiple_of(2)),
1072        );
1073        assert_eq!(table.len(), 3);
1074
1075        let iter = table.drain();
1076        assert_eq!(iter.len(), 3);
1077        let mut res: Vec<u32> = iter.collect();
1078        res.sort();
1079        assert_eq!(&res[..], &[2, 4, 6]);
1080        assert_eq!(table.len(), 0);
1081    }
1082
1083    #[test]
1084    fn remove() {
1085        let mut table = RawTable::<u32, u32>::new();
1086        let numbers = [4, 0, 2];
1087
1088        for number in numbers {
1089            match table.find_or_find_insert_slot(number as u64, |&x| x == number) {
1090                Ok(_) => unreachable!(),
1091                Err(slot) => unsafe {
1092                    table.insert_in_slot_unchecked(number as u64, slot, number);
1093                },
1094            }
1095        }
1096        assert_eq!(table.len(), 3);
1097
1098        assert_eq!(table.remove_entry(0, |&x| x == 0), Some(0));
1099        assert_eq!(table.len(), 2);
1100        assert_eq!(table.remove_entry(0, |&x| x == 0), None);
1101        assert_eq!(table.len(), 2);
1102    }
1103}