kevy_map/map.rs
1//! The KevyMap implementation: struct, allocation, probing, and the live
2//! lookup / insert / remove API. Helpers (`h2`, `prefetch_t0`, metadata
3//! constants) and the private `ProbeOutcome` enum are all map-scoped.
4//!
5//! Layout (single allocation):
6//!
7//! ```text
8//! +------------+------------+-----+---------------+---------+--------+
9//! | slot[0] | slot[1] | ... | slot[cap-1] | padding | meta |
10//! +------------+------------+-----+---------------+---------+--------+
11//! ^ ^
12//! slots_ptr metadata_ptr
13//! ```
14//!
15//! Both pointers are precomputed at `alloc_table` time and never re-derived
16//! in the hot path. The single allocation cuts one alloc/dealloc pair vs
17//! the previous two-`Box<[…]>` layout, and keeps metadata + slots in
18//! adjacent pages (warmer TLB, contiguous OS-prefetch).
19
20use std::alloc::{Layout, alloc, dealloc, handle_alloc_error};
21use std::fmt;
22use std::marker::PhantomData;
23use std::mem::MaybeUninit;
24use std::ptr::{self, NonNull};
25
26use kevy_hash::KevyHash;
27
28use crate::iter::{Iter, Keys, Values};
29
30/// SIMD group width (16 metadata bytes loaded per probe iteration).
31pub(crate) const GROUP_WIDTH: usize = 16;
32
33/// Metadata byte for an empty slot (top bit set, value bits 1's — distinct
34/// from DELETED so the probe loop can stop at EMPTY but skip DELETED).
35pub(crate) const EMPTY: u8 = 0xFF;
36/// Metadata byte for a tombstone (top bit set, value bits 0).
37pub(crate) const DELETED: u8 = 0x80;
38/// Minimum table size. ≥ 16 (one SSE2 group) so the future SIMD path can run
39/// a full group scan unconditionally.
40pub(crate) const MIN_CAP: usize = 16;
41
42/// Top-7 bits of the hash, used as the per-slot metadata byte for occupied
43/// slots. The top bit is always 0 (so occupancy = `meta & 0x80 == 0`).
44#[inline]
45pub(crate) fn h2(hash: u64) -> u8 {
46 ((hash >> 57) & 0x7F) as u8
47}
48
49/// Issue a hint to fetch the cache line containing `ptr` into L1 ("T0" =
50/// "all levels"). Stable on x86_64 / aarch64; no-op elsewhere AND under
51/// `cfg(miri)` (miri cannot model inline asm / arch intrinsics, so the hint
52/// degrades to a no-op for unsafe-correctness testing — the semantic
53/// contract of `prefetch_t0` is "may do nothing", so this is sound).
54#[inline(always)]
55fn prefetch_t0(ptr: *const u8) {
56 #[cfg(all(target_arch = "x86_64", not(miri)))]
57 {
58 // SAFETY: _mm_prefetch reads no memory; any aligned/unaligned/
59 // out-of-bounds pointer is permitted by the ISA.
60 unsafe {
61 core::arch::x86_64::_mm_prefetch(
62 ptr as *const i8,
63 core::arch::x86_64::_MM_HINT_T0,
64 );
65 }
66 }
67 #[cfg(all(target_arch = "aarch64", not(miri)))]
68 {
69 // SAFETY: prfm reads no memory; any pointer permitted.
70 unsafe {
71 core::arch::asm!(
72 "prfm pldl1keep, [{p}]",
73 p = in(reg) ptr,
74 options(nostack, preserves_flags, readonly),
75 );
76 }
77 }
78 #[cfg(any(miri, not(any(target_arch = "x86_64", target_arch = "aarch64"))))]
79 {
80 let _ = ptr;
81 }
82}
83
84/// Compute the single-buffer layout for a table of `cap` slots: returns the
85/// combined `Layout` and the byte offset to the metadata array. Panics on
86/// arithmetic overflow (only reachable for cap ≈ usize::MAX which would OOM
87/// anyway).
88///
89/// Metadata size is `cap + GROUP_WIDTH` (hashbrown 0.15 layout): the first
90/// `cap` bytes are the real per-slot metadata, the trailing `GROUP_WIDTH`
91/// bytes mirror the leading ones so the branchless `set_meta` formula
92/// `index2 = ((i - GW) & mask) + GW` always lands inside the buffer (for
93/// `i = GROUP_WIDTH - 1` the formula evaluates to `cap + GROUP_WIDTH - 1`,
94/// the very last byte). That last byte is written by `set_meta` but never
95/// read by `Group::load` — SIMD loads from `group_start ∈ [0, cap)` reach
96/// at most `cap + GROUP_WIDTH - 2`.
97#[inline]
98pub(crate) fn table_layout<KV>(cap: usize) -> (Layout, usize) {
99 let slots = Layout::array::<MaybeUninit<KV>>(cap).expect("slots layout overflow");
100 let meta = Layout::array::<u8>(cap + GROUP_WIDTH).expect("metadata layout overflow");
101 let (combined, meta_offset) = slots.extend(meta).expect("layout extend overflow");
102 (combined.pad_to_align(), meta_offset)
103}
104
105/// An open-addressing Swiss-style hashtable keyed by [`KevyHash`].
106///
107/// Power-of-two capacity (`mask = cap - 1`); 7/8 load factor; linear probing
108/// over the metadata array; full slots' (K, V) live AoS in a parallel slot
109/// array of `MaybeUninit<(K, V)>` co-allocated with the metadata.
110///
111/// When `cap == 0` both pointers are dangling and no allocation is held.
112pub struct KevyMap<K, V> {
113 /// Slot array. `cap` initialised iff the corresponding metadata byte is
114 /// in `0x00..=0x7F`. Dangling when `cap == 0`.
115 pub(crate) slots_ptr: NonNull<MaybeUninit<(K, V)>>,
116 /// Metadata array (`cap + GROUP_WIDTH` bytes; trailing
117 /// `GROUP_WIDTH - 1` bytes mirror the leading ones for SIMD-safe
118 /// wraparound loads — the hashbrown layout). Dangling when `cap == 0`.
119 pub(crate) metadata_ptr: NonNull<u8>,
120 /// Allocated slot count. `0` when no allocation is held.
121 pub(crate) cap: usize,
122 /// `cap - 1` when `cap > 0`; `0` when `cap == 0`.
123 pub(crate) mask: usize,
124 /// Live entries.
125 pub(crate) occupied: usize,
126 /// Tombstones (not yet reclaimed).
127 pub(crate) deleted: usize,
128 /// Marker so dropck and variance treat us as owning `(K, V)` like a
129 /// `Box<[MaybeUninit<(K, V)>]>` would.
130 _marker: PhantomData<(K, V)>,
131}
132
133// SAFETY: KevyMap owns its `(K, V)` entries (via the slot allocation). The
134// `NonNull<...>` fields are conceptually `Box<[…]>` and inherit the same
135// Send/Sync bounds: send-K + send-V ⇒ KevyMap is Send. Same for Sync.
136unsafe impl<K: Send, V: Send> Send for KevyMap<K, V> {}
137unsafe impl<K: Sync, V: Sync> Sync for KevyMap<K, V> {}
138
139/// `(metadata, slots)` parallel-slice pair returned by [`KevyMap::as_slices`].
140/// Aliased so the long `(&[u8], &[MaybeUninit<(K, V)>])` signature doesn't
141/// trip clippy's `type_complexity` lint on a member-by-member basis.
142type SlotSlices<'a, K, V> = (&'a [u8], &'a [MaybeUninit<(K, V)>]);
143
144pub(crate) enum ProbeOutcome {
145 Found(usize),
146 NotFound {
147 insert_at: usize,
148 via_tombstone: bool,
149 },
150}
151
152impl<K, V> KevyMap<K, V> {
153 /// Construct an empty map without allocating.
154 pub fn new() -> Self {
155 Self {
156 slots_ptr: NonNull::dangling(),
157 metadata_ptr: NonNull::dangling(),
158 cap: 0,
159 mask: 0,
160 occupied: 0,
161 deleted: 0,
162 _marker: PhantomData,
163 }
164 }
165
166 /// Construct a map sized to hold `cap_hint` entries without growing
167 /// (accounting for the 7/8 load factor).
168 pub fn with_capacity(cap_hint: usize) -> Self {
169 if cap_hint == 0 {
170 return Self::new();
171 }
172 // ceil(cap_hint * 8 / 7) → smallest table where cap_hint fits below 7/8.
173 let needed = cap_hint.saturating_mul(8).div_ceil(7);
174 let cap = needed.next_power_of_two().max(MIN_CAP);
175 Self::alloc_table(cap)
176 }
177
178 pub(crate) fn alloc_table(cap: usize) -> Self {
179 debug_assert!(cap.is_power_of_two());
180 debug_assert!(cap >= MIN_CAP);
181
182 let (layout, meta_offset) = table_layout::<(K, V)>(cap);
183 // SAFETY: layout has non-zero size (metadata alone is ≥ MIN_CAP +
184 // GROUP_WIDTH - 1 ≥ 31 bytes). alloc returns either a valid
185 // allocation of `layout` or null.
186 let base = unsafe { alloc(layout) };
187 if base.is_null() {
188 handle_alloc_error(layout);
189 }
190 // Initialise the metadata range (real + mirror tail) to EMPTY in a
191 // single memset. The slot array is left uninitialised — slots
192 // become initialised only when their metadata byte transitions
193 // out of the high-bit-set state (EMPTY/DELETED).
194 let meta_byte_ptr = unsafe { base.add(meta_offset) };
195 unsafe { ptr::write_bytes(meta_byte_ptr, EMPTY, cap + GROUP_WIDTH) };
196
197 let slots_ptr = base as *mut MaybeUninit<(K, V)>;
198 let metadata_ptr = meta_byte_ptr;
199
200 // single-buffer redo: hint THP on the entire buffer in
201 // one madvise call. The combined allocation is `meta_offset +
202 // cap + GROUP_WIDTH` bytes (== `layout.size()` minus padding).
203 // On 10M+ key tables the metadata alone is 16 MB — well over the
204 // 2 MB HP boundary, so the kernel's khugepaged can promote it in
205 // place. Cheap on the non-Linux paths (compile-time no-op).
206 kevy_madvise::advise_hugepage(base as *const u8, layout.size());
207
208 Self {
209 // SAFETY: alloc returned non-null; raw pointers are derived
210 // within the same allocation.
211 slots_ptr: unsafe { NonNull::new_unchecked(slots_ptr) },
212 metadata_ptr: unsafe { NonNull::new_unchecked(metadata_ptr) },
213 cap,
214 mask: cap - 1,
215 occupied: 0,
216 deleted: 0,
217 _marker: PhantomData,
218 }
219 }
220
221 /// Write `v` into metadata slot `i`, also updating the mirror byte
222 /// at `cap + i` when `i < GROUP_WIDTH`. Every metadata mutation goes
223 /// through this helper so the mirror stays consistent with the real
224 /// metadata.
225 ///
226 /// Branchless: the formula `index2 = ((i - GW) & mask) + GW`
227 /// (hashbrown 0.15's `set_ctrl`) yields the real mirror position
228 /// `cap + i` when `i < GW`, and yields `i` itself when `i >= GW`.
229 /// The second write is therefore either to the mirror byte or a
230 /// duplicate write to the same real byte (a no-op). No branch.
231 #[inline]
232 pub(crate) fn set_meta(&mut self, i: usize, v: u8) {
233 debug_assert!(i < self.cap);
234 // SAFETY: i ∈ [0, cap); i2 ∈ [GROUP_WIDTH, cap + GROUP_WIDTH);
235 // both in-bounds since metadata buffer length is cap + GROUP_WIDTH.
236 let i2 = (i.wrapping_sub(GROUP_WIDTH) & self.mask) + GROUP_WIDTH;
237 unsafe {
238 *self.metadata_ptr.as_ptr().add(i) = v;
239 *self.metadata_ptr.as_ptr().add(i2) = v;
240 }
241 }
242
243 /// Live entry count.
244 #[inline]
245 pub fn len(&self) -> usize {
246 self.occupied
247 }
248
249 /// Whether the map has zero live entries.
250 #[inline]
251 pub fn is_empty(&self) -> bool {
252 self.occupied == 0
253 }
254
255 /// Allocated slot count (NOT live entries).
256 #[inline]
257 pub fn capacity(&self) -> usize {
258 self.cap
259 }
260
261 /// Drop every live entry and reset the metadata. Keeps the allocation.
262 pub fn clear(&mut self) {
263 if self.cap == 0 {
264 return;
265 }
266 if std::mem::needs_drop::<(K, V)>() {
267 for i in 0..self.cap {
268 // SAFETY: i < cap ⇒ metadata pointer in-bounds.
269 let meta = unsafe { *self.metadata_ptr.as_ptr().add(i) };
270 if meta & 0x80 == 0 {
271 // SAFETY: full slot ⇒ initialised.
272 unsafe {
273 ptr::drop_in_place(self.slots_ptr.as_ptr().add(i) as *mut (K, V));
274 }
275 }
276 }
277 }
278 // Reset entire metadata buffer (real range + mirror tail) in one memset.
279 // SAFETY: metadata buffer is exactly cap + GROUP_WIDTH bytes wide.
280 unsafe {
281 ptr::write_bytes(self.metadata_ptr.as_ptr(), EMPTY, self.cap + GROUP_WIDTH);
282 }
283 self.occupied = 0;
284 self.deleted = 0;
285 }
286
287 /// `(&K, &V)` over all live entries; order is unspecified.
288 pub fn iter(&self) -> Iter<'_, K, V> {
289 let (metadata, slots) = self.as_slices();
290 Iter::new(metadata, slots)
291 }
292
293 /// `iter` that begins at bucket `start` (clamped to `capacity()`) and
294 /// walks to the end. To sweep the full ring beginning at a random offset
295 /// — the pattern the kevy-store eviction sampler uses — chain it with a
296 /// second `iter_from_bucket(0)` and `take(start)`.
297 pub fn iter_from_bucket(&self, start: usize) -> Iter<'_, K, V> {
298 let (metadata, slots) = self.as_slices();
299 Iter::with_start(metadata, slots, start)
300 }
301
302 /// `&K` over all live entries.
303 pub fn keys(&self) -> Keys<'_, K, V> {
304 Keys::new(self.iter())
305 }
306
307 /// `&V` over all live entries.
308 pub fn values(&self) -> Values<'_, K, V> {
309 Values::new(self.iter())
310 }
311
312 /// Borrow the metadata and slots as parallel slices of length `cap`.
313 /// Used by [`KevyMap::iter`] (which only needs the real slot range,
314 /// not the mirror tail). When `cap == 0` returns two empty slices —
315 /// the dangling pointer is never dereferenced.
316 #[inline]
317 fn as_slices(&self) -> SlotSlices<'_, K, V> {
318 if self.cap == 0 {
319 return (&[], &[]);
320 }
321 // SAFETY: cap > 0 ⇒ both pointers are valid for `cap` reads; we hand
322 // out shared borrows tied to `&self`'s lifetime, so the allocation
323 // outlives the returned slices.
324 unsafe {
325 (
326 std::slice::from_raw_parts(self.metadata_ptr.as_ptr(), self.cap),
327 std::slice::from_raw_parts(self.slots_ptr.as_ptr(), self.cap),
328 )
329 }
330 }
331
332 /// Hint the CPU to fetch the bucket cache line that a probe at `hash`
333 /// would start at. The prefetch lever against the bucket-probe DRAM
334 /// miss: the command-batch driver calls this for command N+1 while
335 /// finishing command N, so by the time N+1 actually probes the
336 /// metadata, the line is in L1.
337 ///
338 /// No-op when the table is empty. Cheap when not empty (a single
339 /// `prefetcht0` on x86_64 / `prfm pldl1keep` on aarch64; a regular
340 /// volatile load on other arches via [`std::intrinsics`] — but we
341 /// only use stable intrinsics here, so non-x86/aarch64 architectures
342 /// degrade to a no-op rather than a fake hint).
343 #[inline(always)]
344 pub fn prefetch_for_hash(&self, hash: u64) {
345 if self.cap == 0 {
346 return;
347 }
348 let idx = (hash as usize) & self.mask;
349 // SAFETY: idx < cap ≤ metadata length ⇒ pointer in-bounds; prefetch
350 // reads never trap and never observe values.
351 let ptr = unsafe { self.metadata_ptr.as_ptr().add(idx) };
352 prefetch_t0(ptr);
353 }
354
355 /// 7/8 of the capacity — the inclusive max for `occupied + deleted`.
356 #[inline]
357 pub(crate) fn threshold(&self) -> usize {
358 self.cap - (self.cap / 8)
359 }
360}
361
362
363impl<K, V> Default for KevyMap<K, V> {
364 fn default() -> Self {
365 Self::new()
366 }
367}
368
369/// `m[&q]` panics on missing key (matches `std::HashMap::Index` semantics).
370impl<K, Q, V> std::ops::Index<&Q> for KevyMap<K, V>
371where
372 K: std::borrow::Borrow<Q>,
373 Q: KevyHash + Eq + ?Sized,
374{
375 type Output = V;
376 fn index(&self, key: &Q) -> &V {
377 self.get(key).expect("no entry found for key")
378 }
379}
380
381impl<K, V> Drop for KevyMap<K, V> {
382 fn drop(&mut self) {
383 if self.cap == 0 {
384 return;
385 }
386 if std::mem::needs_drop::<(K, V)>() {
387 for i in 0..self.cap {
388 // SAFETY: i < cap ⇒ in-bounds.
389 let meta = unsafe { *self.metadata_ptr.as_ptr().add(i) };
390 if meta & 0x80 == 0 {
391 // SAFETY: full slot ⇒ initialised.
392 unsafe {
393 ptr::drop_in_place(self.slots_ptr.as_ptr().add(i) as *mut (K, V));
394 }
395 }
396 }
397 }
398 // Free the single combined allocation. `slots_ptr` IS the base of
399 // the allocation (see alloc_table's layout computation: slots are
400 // at offset 0; metadata sits at meta_offset).
401 let (layout, _) = table_layout::<(K, V)>(self.cap);
402 // SAFETY: cap > 0 ⇒ slots_ptr is non-null and was returned by `alloc`
403 // with the same Layout (table_layout is deterministic on cap).
404 unsafe {
405 dealloc(self.slots_ptr.as_ptr() as *mut u8, layout);
406 }
407 }
408}
409
410impl<K: fmt::Debug, V: fmt::Debug> fmt::Debug for KevyMap<K, V> {
411 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
412 f.debug_map().entries(self.iter()).finish()
413 }
414}
415
416impl<K: KevyHash + Eq, V> FromIterator<(K, V)> for KevyMap<K, V> {
417 fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
418 let iter = iter.into_iter();
419 let mut m = match iter.size_hint() {
420 (lo, Some(hi)) if hi <= lo.saturating_mul(2) => Self::with_capacity(hi),
421 (lo, _) => Self::with_capacity(lo),
422 };
423 for (k, v) in iter {
424 m.insert(k, v);
425 }
426 m
427 }
428}
429
430impl<K: KevyHash + Eq, V> Extend<(K, V)> for KevyMap<K, V> {
431 fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
432 for (k, v) in iter {
433 self.insert(k, v);
434 }
435 }
436}
437
438#[cfg(test)]
439#[path = "map_tests.rs"]
440mod tests;