Skip to main content

kevy_map/
map.rs

1//! The KevyMap implementation: struct, allocation, probing, and the live
2//! lookup / insert / remove API. Helpers (`h2`, `prefetch_t0`, metadata
3//! constants) and the private `ProbeOutcome` enum are all map-scoped.
4//!
5//! Layout (single allocation):
6//!
7//! ```text
8//! +------------+------------+-----+---------------+---------+--------+
9//! | slot[0]    | slot[1]    | ... | slot[cap-1]   | padding | meta   |
10//! +------------+------------+-----+---------------+---------+--------+
11//! ^                                                          ^
12//! slots_ptr                                                  metadata_ptr
13//! ```
14//!
15//! Both pointers are precomputed at `alloc_table` time and never re-derived
16//! in the hot path. The single allocation cuts one alloc/dealloc pair vs
17//! the previous two-`Box<[…]>` layout, and keeps metadata + slots in
18//! adjacent pages (warmer TLB, contiguous OS-prefetch).
19
20use std::alloc::Layout;
21use std::fmt;
22use std::marker::PhantomData;
23use std::mem::MaybeUninit;
24use std::ptr::{self, NonNull};
25
26use kevy_hash::KevyHash;
27
28use crate::iter::{Iter, IterMut, Keys, Values};
29
30/// SIMD group width (16 metadata bytes loaded per probe iteration).
31pub(crate) const GROUP_WIDTH: usize = 16;
32
33/// Metadata byte for an empty slot (top bit set, value bits 1's — distinct
34/// from DELETED so the probe loop can stop at EMPTY but skip DELETED).
35pub(crate) const EMPTY: u8 = 0xFF;
36/// Metadata byte for a tombstone (top bit set, value bits 0).
37pub(crate) const DELETED: u8 = 0x80;
38/// Minimum table size. ≥ 16 (one SSE2 group) so the future SIMD path can run
39/// a full group scan unconditionally.
40pub(crate) const MIN_CAP: usize = 16;
41
42/// Top-7 bits of the hash, used as the per-slot metadata byte for occupied
43/// slots. The top bit is always 0 (so occupancy = `meta & 0x80 == 0`).
44#[inline]
45pub(crate) fn h2(hash: u64) -> u8 {
46    ((hash >> 57) & 0x7F) as u8
47}
48
49/// Issue a hint to fetch the cache line containing `ptr` into L1 ("T0" =
50/// "all levels"). Stable on x86_64 / aarch64; no-op elsewhere AND under
51/// `cfg(miri)` (miri cannot model inline asm / arch intrinsics, so the hint
52/// degrades to a no-op for unsafe-correctness testing — the semantic
53/// contract of `prefetch_t0` is "may do nothing", so this is sound).
54///
55/// `inline(always)` is the point: prefetch only helps when the load-latency
56/// window the call hides is bigger than the call site itself. A non-inlined
57/// call_site / ret pair (~10 ns out-of-order) wipes the benefit.
58#[allow(clippy::inline_always)]
59#[inline(always)]
60fn prefetch_t0(ptr: *const u8) {
61    #[cfg(all(target_arch = "x86_64", not(miri)))]
62    {
63        // SAFETY: _mm_prefetch reads no memory; any aligned/unaligned/
64        // out-of-bounds pointer is permitted by the ISA.
65        unsafe {
66            core::arch::x86_64::_mm_prefetch(
67                ptr as *const i8,
68                core::arch::x86_64::_MM_HINT_T0,
69            );
70        }
71    }
72    #[cfg(all(target_arch = "aarch64", not(miri)))]
73    {
74        // SAFETY: prfm reads no memory; any pointer permitted.
75        unsafe {
76            core::arch::asm!(
77                "prfm pldl1keep, [{p}]",
78                p = in(reg) ptr,
79                options(nostack, preserves_flags, readonly),
80            );
81        }
82    }
83    #[cfg(any(miri, not(any(target_arch = "x86_64", target_arch = "aarch64"))))]
84    {
85        let _ = ptr;
86    }
87}
88
89/// Compute the single-buffer layout for a table of `cap` slots: returns the
90/// combined `Layout` and the byte offset to the metadata array. Panics on
91/// arithmetic overflow (only reachable for cap ≈ usize::MAX which would OOM
92/// anyway).
93///
94/// Metadata size is `cap + GROUP_WIDTH` (hashbrown 0.15 layout): the first
95/// `cap` bytes are the real per-slot metadata, the trailing `GROUP_WIDTH`
96/// bytes mirror the leading ones so the branchless `set_meta` formula
97/// `index2 = ((i - GW) & mask) + GW` always lands inside the buffer (for
98/// `i = GROUP_WIDTH - 1` the formula evaluates to `cap + GROUP_WIDTH - 1`,
99/// the very last byte). That last byte is written by `set_meta` but never
100/// read by `Group::load` — SIMD loads from `group_start ∈ [0, cap)` reach
101/// at most `cap + GROUP_WIDTH - 2`.
102#[inline]
103pub(crate) fn table_layout<KV>(cap: usize) -> (Layout, usize) {
104    let slots = Layout::array::<MaybeUninit<KV>>(cap).expect("slots layout overflow");
105    let meta = Layout::array::<u8>(cap + GROUP_WIDTH).expect("metadata layout overflow");
106    let (combined, meta_offset) = slots.extend(meta).expect("layout extend overflow");
107    (combined.pad_to_align(), meta_offset)
108}
109
110/// An open-addressing Swiss-style hashtable keyed by [`KevyHash`].
111///
112/// Power-of-two capacity (`mask = cap - 1`); 7/8 load factor; linear probing
113/// over the metadata array; full slots' (K, V) live AoS in a parallel slot
114/// array of `MaybeUninit<(K, V)>` co-allocated with the metadata.
115///
116/// When `cap == 0` both pointers are dangling and no allocation is held.
117pub struct KevyMap<K, V> {
118    /// Slot array. `cap` initialised iff the corresponding metadata byte is
119    /// in `0x00..=0x7F`. Dangling when `cap == 0`.
120    pub(crate) slots_ptr: NonNull<MaybeUninit<(K, V)>>,
121    /// Metadata array (`cap + GROUP_WIDTH` bytes; trailing
122    /// `GROUP_WIDTH - 1` bytes mirror the leading ones for SIMD-safe
123    /// wraparound loads — the hashbrown layout). Dangling when `cap == 0`.
124    pub(crate) metadata_ptr: NonNull<u8>,
125    /// Allocated slot count. `0` when no allocation is held.
126    pub(crate) cap: usize,
127    /// `cap - 1` when `cap > 0`; `0` when `cap == 0`.
128    pub(crate) mask: usize,
129    /// Live entries.
130    pub(crate) occupied: usize,
131    /// Tombstones (not yet reclaimed).
132    pub(crate) deleted: usize,
133    /// `true` when the table buffer was obtained from
134    /// [`kevy_madvise::mmap_anon_aligned_2mb`] (large tables that wanted
135    /// THP-aligned storage); `false` when it came from the global allocator.
136    /// Drives the dispatch in [`Drop`] between `munmap_2mb` and `dealloc`.
137    pub(crate) mmap_backed: bool,
138    /// Marker so dropck and variance treat us as owning `(K, V)` like a
139    /// `Box<[MaybeUninit<(K, V)>]>` would.
140    pub(crate) _marker: PhantomData<(K, V)>,
141}
142
143// SAFETY: KevyMap owns its `(K, V)` entries (via the slot allocation). The
144// `NonNull<...>` fields are conceptually `Box<[…]>` and inherit the same
145// Send/Sync bounds: send-K + send-V ⇒ KevyMap is Send. Same for Sync.
146unsafe impl<K: Send, V: Send> Send for KevyMap<K, V> {}
147unsafe impl<K: Sync, V: Sync> Sync for KevyMap<K, V> {}
148
149/// `(metadata, slots)` parallel-slice pair returned by [`KevyMap::as_slices`].
150/// Aliased so the long `(&[u8], &[MaybeUninit<(K, V)>])` signature doesn't
151/// trip clippy's `type_complexity` lint on a member-by-member basis.
152type SlotSlices<'a, K, V> = (&'a [u8], &'a [MaybeUninit<(K, V)>]);
153
154/// Mutable-slot variant of [`SlotSlices`], returned by
155/// [`KevyMap::as_mut_slices`] for [`KevyMap::iter_mut`].
156type SlotSlicesMut<'a, K, V> = (&'a [u8], &'a mut [MaybeUninit<(K, V)>]);
157
158pub(crate) enum ProbeOutcome {
159    Found(usize),
160    NotFound {
161        insert_at: usize,
162        via_tombstone: bool,
163    },
164}
165
166impl<K, V> KevyMap<K, V> {
167    /// Construct an empty map without allocating.
168    pub fn new() -> Self {
169        Self {
170            slots_ptr: NonNull::dangling(),
171            metadata_ptr: NonNull::dangling(),
172            cap: 0,
173            mask: 0,
174            occupied: 0,
175            deleted: 0,
176            mmap_backed: false,
177            _marker: PhantomData,
178        }
179    }
180
181    /// Construct a map sized to hold `cap_hint` entries without growing
182    /// (accounting for the 7/8 load factor).
183    pub fn with_capacity(cap_hint: usize) -> Self {
184        if cap_hint == 0 {
185            return Self::new();
186        }
187        // ceil(cap_hint * 8 / 7) → smallest table where cap_hint fits below 7/8.
188        let needed = cap_hint.saturating_mul(8).div_ceil(7);
189        let cap = needed.next_power_of_two().max(MIN_CAP);
190        Self::alloc_table(cap)
191    }
192
193    // alloc_table + Drop live in `crate::alloc` so this file stays under
194    // the 500-LOC house rule.
195
196    /// Write `v` into metadata slot `i`, also updating the mirror byte
197    /// at `cap + i` when `i < GROUP_WIDTH`. Every metadata mutation goes
198    /// through this helper so the mirror stays consistent with the real
199    /// metadata.
200    ///
201    /// Branchless: the formula `index2 = ((i - GW) & mask) + GW`
202    /// (hashbrown 0.15's `set_ctrl`) yields the real mirror position
203    /// `cap + i` when `i < GW`, and yields `i` itself when `i >= GW`.
204    /// The second write is therefore either to the mirror byte or a
205    /// duplicate write to the same real byte (a no-op). No branch.
206    #[inline]
207    pub(crate) fn set_meta(&mut self, i: usize, v: u8) {
208        debug_assert!(i < self.cap);
209        // SAFETY: i ∈ [0, cap); i2 ∈ [GROUP_WIDTH, cap + GROUP_WIDTH);
210        // both in-bounds since metadata buffer length is cap + GROUP_WIDTH.
211        let i2 = (i.wrapping_sub(GROUP_WIDTH) & self.mask) + GROUP_WIDTH;
212        unsafe {
213            *self.metadata_ptr.as_ptr().add(i) = v;
214            *self.metadata_ptr.as_ptr().add(i2) = v;
215        }
216    }
217
218    /// Live entry count.
219    #[inline]
220    pub fn len(&self) -> usize {
221        self.occupied
222    }
223
224    /// Whether the map has zero live entries.
225    #[inline]
226    pub fn is_empty(&self) -> bool {
227        self.occupied == 0
228    }
229
230    /// Allocated slot count (NOT live entries).
231    #[inline]
232    pub fn capacity(&self) -> usize {
233        self.cap
234    }
235
236    /// Drop every live entry and reset the metadata. Keeps the allocation.
237    pub fn clear(&mut self) {
238        if self.cap == 0 {
239            return;
240        }
241        if std::mem::needs_drop::<(K, V)>() {
242            for i in 0..self.cap {
243                // SAFETY: i < cap ⇒ metadata pointer in-bounds.
244                let meta = unsafe { *self.metadata_ptr.as_ptr().add(i) };
245                if meta & 0x80 == 0 {
246                    // SAFETY: full slot ⇒ initialised.
247                    unsafe {
248                        ptr::drop_in_place(self.slots_ptr.as_ptr().add(i).cast::<(K, V)>());
249                    }
250                }
251            }
252        }
253        // Reset entire metadata buffer (real range + mirror tail) in one memset.
254        // SAFETY: metadata buffer is exactly cap + GROUP_WIDTH bytes wide.
255        unsafe {
256            ptr::write_bytes(self.metadata_ptr.as_ptr(), EMPTY, self.cap + GROUP_WIDTH);
257        }
258        self.occupied = 0;
259        self.deleted = 0;
260    }
261
262    /// `(&K, &V)` over all live entries; order is unspecified.
263    pub fn iter(&self) -> Iter<'_, K, V> {
264        let (metadata, slots) = self.as_slices();
265        Iter::new(metadata, slots)
266    }
267
268    /// `iter` that begins at bucket `start` (clamped to `capacity()`) and
269    /// walks to the end. To sweep the full ring beginning at a random offset
270    /// — the pattern the kevy-store eviction sampler uses — chain it with a
271    /// second `iter_from_bucket(0)` and `take(start)`.
272    pub fn iter_from_bucket(&self, start: usize) -> Iter<'_, K, V> {
273        let (metadata, slots) = self.as_slices();
274        Iter::with_start(metadata, slots, start)
275    }
276
277    /// `(&K, &mut V)` over all live entries; order is unspecified. Keys stay
278    /// shared (mutating a key in place would corrupt its bucket).
279    pub fn iter_mut(&mut self) -> IterMut<'_, K, V> {
280        let (metadata, slots) = self.as_mut_slices();
281        IterMut::new(metadata, slots)
282    }
283
284    /// `&K` over all live entries.
285    pub fn keys(&self) -> Keys<'_, K, V> {
286        Keys::new(self.iter())
287    }
288
289    /// `&V` over all live entries.
290    pub fn values(&self) -> Values<'_, K, V> {
291        Values::new(self.iter())
292    }
293
294    /// Borrow the metadata and slots as parallel slices of length `cap`.
295    /// Used by [`KevyMap::iter`] (which only needs the real slot range,
296    /// not the mirror tail). When `cap == 0` returns two empty slices —
297    /// the dangling pointer is never dereferenced.
298    #[inline]
299    fn as_slices(&self) -> SlotSlices<'_, K, V> {
300        if self.cap == 0 {
301            return (&[], &[]);
302        }
303        // SAFETY: cap > 0 ⇒ both pointers are valid for `cap` reads; we hand
304        // out shared borrows tied to `&self`'s lifetime, so the allocation
305        // outlives the returned slices.
306        unsafe {
307            (
308                std::slice::from_raw_parts(self.metadata_ptr.as_ptr(), self.cap),
309                std::slice::from_raw_parts(self.slots_ptr.as_ptr(), self.cap),
310            )
311        }
312    }
313
314    /// [`KevyMap::as_slices`] with mutable slots (metadata stays shared —
315    /// [`KevyMap::iter_mut`] only reads it). The disjoint borrows are sound:
316    /// metadata and slots are separate allocations.
317    #[inline]
318    fn as_mut_slices(&mut self) -> SlotSlicesMut<'_, K, V> {
319        if self.cap == 0 {
320            return (&[], &mut []);
321        }
322        // SAFETY: cap > 0 ⇒ both pointers are valid; `&mut self` guarantees
323        // exclusive access for the lifetime of the returned slices.
324        unsafe {
325            (
326                std::slice::from_raw_parts(self.metadata_ptr.as_ptr(), self.cap),
327                std::slice::from_raw_parts_mut(self.slots_ptr.as_ptr(), self.cap),
328            )
329        }
330    }
331
332    /// Hint the CPU to fetch the bucket cache line that a probe at `hash`
333    /// would start at. The prefetch lever against the bucket-probe DRAM
334    /// miss: the command-batch driver calls this for command N+1 while
335    /// finishing command N, so by the time N+1 actually probes the
336    /// metadata, the line is in L1.
337    ///
338    /// No-op when the table is empty. Cheap when not empty (a single
339    /// `prefetcht0` on x86_64 / `prfm pldl1keep` on aarch64; a regular
340    /// volatile load on other arches via [`std::intrinsics`] — but we
341    /// only use stable intrinsics here, so non-x86/aarch64 architectures
342    /// degrade to a no-op rather than a fake hint).
343    ///
344    /// `inline(always)` is the point — see the `prefetch_t0` rationale.
345    #[allow(clippy::inline_always)]
346    #[inline(always)]
347    pub fn prefetch_for_hash(&self, hash: u64) {
348        if self.cap == 0 {
349            return;
350        }
351        let idx = (hash as usize) & self.mask;
352        // SAFETY: idx < cap ≤ metadata length ⇒ pointer in-bounds; prefetch
353        // reads never trap and never observe values.
354        let ptr = unsafe { self.metadata_ptr.as_ptr().add(idx) };
355        prefetch_t0(ptr);
356    }
357
358    /// 7/8 of the capacity — the inclusive max for `occupied + deleted`.
359    #[inline]
360    pub(crate) fn threshold(&self) -> usize {
361        self.cap - (self.cap / 8)
362    }
363}
364
365
366impl<K, V> Default for KevyMap<K, V> {
367    fn default() -> Self {
368        Self::new()
369    }
370}
371
372/// `m[&q]` panics on missing key (matches `std::HashMap::Index` semantics).
373impl<K, Q, V> std::ops::Index<&Q> for KevyMap<K, V>
374where
375    K: std::borrow::Borrow<Q>,
376    Q: KevyHash + Eq + ?Sized,
377{
378    type Output = V;
379    fn index(&self, key: &Q) -> &V {
380        self.get(key).expect("no entry found for key")
381    }
382}
383
384impl<K: fmt::Debug, V: fmt::Debug> fmt::Debug for KevyMap<K, V> {
385    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
386        f.debug_map().entries(self.iter()).finish()
387    }
388}
389
390impl<K: KevyHash + Eq, V> FromIterator<(K, V)> for KevyMap<K, V> {
391    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
392        let iter = iter.into_iter();
393        let mut m = match iter.size_hint() {
394            (lo, Some(hi)) if hi <= lo.saturating_mul(2) => Self::with_capacity(hi),
395            (lo, _) => Self::with_capacity(lo),
396        };
397        for (k, v) in iter {
398            m.insert(k, v);
399        }
400        m
401    }
402}
403
404impl<K: KevyHash + Eq, V> Extend<(K, V)> for KevyMap<K, V> {
405    fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
406        for (k, v) in iter {
407            self.insert(k, v);
408        }
409    }
410}
411
412#[cfg(test)]
413#[path = "map_tests.rs"]
414mod tests;