1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
//! Key-trait-bound `KevyMap` operations: insert/grow/lookup/remove.
//!
//! Split out of [`crate::map`] for file-size hygiene. The raw / non-keyed
//! impl block (allocation, metadata bookkeeping, iter, Drop, trait impls)
//! stays in `map.rs`; everything that needs `K: KevyHash + Eq` or
//! `K: Borrow<Q>, Q: KevyHash + Eq` lives here.
use std::borrow::Borrow;
use std::ptr;
use kevy_hash::KevyHash;
use crate::group::Group;
use crate::map::{DELETED, EMPTY, GROUP_WIDTH, KevyMap, MIN_CAP, ProbeOutcome, h2};
impl<K: KevyHash + Eq, V> KevyMap<K, V> {
/// Insert `(key, value)`. Returns the old value if `key` was already
/// present. Following `std::HashMap` semantics, the existing K is kept on
/// overwrite — only V is replaced.
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
self.maybe_grow();
let hash = key.kevy_hash();
match self.probe_with_key(hash, &key) {
ProbeOutcome::Found(idx) => {
// SAFETY: slot is full ⇒ initialised. We replace only the V
// field; the old K is kept (std HashMap semantics).
let v_ptr = unsafe {
let kv: *mut (K, V) = self.slots_ptr.as_ptr().add(idx) as *mut (K, V);
ptr::addr_of_mut!((*kv).1)
};
let old_v = unsafe { ptr::replace(v_ptr, value) };
drop(key);
Some(old_v)
}
ProbeOutcome::NotFound {
insert_at,
via_tombstone,
} => {
self.set_meta(insert_at, h2(hash));
// SAFETY: insert_at < cap ⇒ slot pointer in-bounds; we write
// (K, V) into a previously uninitialised slot.
unsafe {
(*self.slots_ptr.as_ptr().add(insert_at)).write((key, value));
}
self.occupied += 1;
if via_tombstone {
self.deleted -= 1;
}
None
}
}
}
fn maybe_grow(&mut self) {
if self.cap == 0 || (self.occupied + self.deleted) >= self.threshold() {
self.grow();
}
}
fn grow(&mut self) {
let new_cap = if self.cap == 0 {
MIN_CAP
} else {
self.cap
.checked_mul(2)
.expect("kevy-map: capacity doubling overflow")
};
let mut new_map = Self::alloc_table(new_cap);
// Move every live entry over. After ptr::read'ing a slot we mark its
// metadata DELETED, so any subsequent Drop (incl. panic unwind) won't
// double-free; the old allocation will free with all-DELETED metadata.
//
// Only iterate the real slot range `[0, cap)`; the trailing mirror
// bytes are bookkeeping for SIMD-load wraparound, not real slots.
// Direct metadata writes are safe here because the old `self` table
// is going away (we swap with new_map then drop), so a stale mirror
// doesn't matter.
let old_cap = self.cap;
for i in 0..old_cap {
// SAFETY: i < old_cap ⇒ metadata in-bounds.
let meta = unsafe { *self.metadata_ptr.as_ptr().add(i) };
if meta & 0x80 == 0 {
// SAFETY: full slot ⇒ initialised; we mark DELETED immediately
// so this byte is never re-read as occupied.
let (k, v) = unsafe { ptr::read(self.slots_ptr.as_ptr().add(i) as *const (K, V)) };
unsafe { *self.metadata_ptr.as_ptr().add(i) = DELETED };
let hash = k.kevy_hash();
new_map.insert_known_unique(hash, k, v);
}
}
// All occupied entries are now in new_map; the old self has no live slots.
self.occupied = 0;
self.deleted = 0;
std::mem::swap(self, &mut new_map);
// new_map (now the old self) drops; metadata is all DELETED (or EMPTY
// for previously-empty slots) ⇒ Drop walks but touches no slots.
}
/// Insert under the assumption that the key isn't already present (used
/// by `grow` to repopulate the new table). Skips the duplicate-key
/// check. Uses a 16-slot SIMD group scan to find the first EMPTY.
fn insert_known_unique(&mut self, hash: u64, k: K, v: V) {
let h2v = h2(hash);
let mut group_start = (hash as usize) & self.mask;
loop {
// SAFETY: metadata is `cap + GROUP_WIDTH` bytes; group_start
// is in `[0, cap)`; the load reads 16 bytes which lie inside the
// buffer thanks to the mirror tail.
let g = unsafe { Group::load(self.metadata_ptr.as_ptr().add(group_start)) };
if let Some(m) = g.match_byte(EMPTY).lowest_set() {
let slot = (group_start + m) & self.mask;
self.set_meta(slot, h2v);
// SAFETY: slot < cap.
unsafe {
(*self.slots_ptr.as_ptr().add(slot)).write((k, v));
}
self.occupied += 1;
return;
}
// Linear probing by GROUP_WIDTH (tried triangular — at our 7/8
// load factor and group-scan-aware probe, linear wins on cache
// locality; triangular's anti-clustering only pays off at higher
// load factors than we run).
group_start = (group_start + GROUP_WIDTH) & self.mask;
}
}
fn probe_with_key(&self, hash: u64, key: &K) -> ProbeOutcome {
if self.cap == 0 {
return ProbeOutcome::NotFound {
insert_at: 0,
via_tombstone: false,
};
}
let h2v = h2(hash);
let mut group_start = (hash as usize) & self.mask;
// Fast path: no tombstones in the table ⇒ skip DELETED tracking
// entirely. This trims one SIMD `match_byte` (and one branch) from
// every group iteration; insert workloads with no deletions hit
// this path exclusively.
if self.deleted == 0 {
loop {
// SAFETY: see [insert_known_unique].
let g = unsafe { Group::load(self.metadata_ptr.as_ptr().add(group_start)) };
for m in g.match_byte(h2v).iter() {
let slot = (group_start + m) & self.mask;
// SAFETY: matched h2 ⇒ slot is occupied ⇒ initialised.
let kv = unsafe { (*self.slots_ptr.as_ptr().add(slot)).assume_init_ref() };
if &kv.0 == key {
return ProbeOutcome::Found(slot);
}
}
if let Some(m) = g.match_byte(EMPTY).lowest_set() {
return ProbeOutcome::NotFound {
insert_at: (group_start + m) & self.mask,
via_tombstone: false,
};
}
group_start = (group_start + GROUP_WIDTH) & self.mask;
}
}
// Slow path: tombstones exist; track the first DELETED so insert
// can reclaim it instead of growing the tombstone count.
let mut first_deleted: Option<usize> = None;
loop {
// SAFETY: see [insert_known_unique].
let g = unsafe { Group::load(self.metadata_ptr.as_ptr().add(group_start)) };
for m in g.match_byte(h2v).iter() {
let slot = (group_start + m) & self.mask;
// SAFETY: matched h2 ⇒ slot is occupied ⇒ initialised.
let kv = unsafe { (*self.slots_ptr.as_ptr().add(slot)).assume_init_ref() };
if &kv.0 == key {
return ProbeOutcome::Found(slot);
}
}
if first_deleted.is_none()
&& let Some(m) = g.match_byte(DELETED).lowest_set()
{
first_deleted = Some((group_start + m) & self.mask);
}
if let Some(m) = g.match_byte(EMPTY).lowest_set() {
let probe_empty = (group_start + m) & self.mask;
return ProbeOutcome::NotFound {
insert_at: first_deleted.unwrap_or(probe_empty),
via_tombstone: first_deleted.is_some(),
};
}
group_start = (group_start + GROUP_WIDTH) & self.mask;
}
}
}
impl<K, V> KevyMap<K, V> {
/// Borrow the value for `key`, or `None` if absent.
pub fn get<Q>(&self, key: &Q) -> Option<&V>
where
K: Borrow<Q>,
Q: KevyHash + Eq + ?Sized,
{
let idx = self.find_by_borrow(key)?;
// SAFETY: find_by_borrow only returns indices into full slots.
let kv = unsafe { (*self.slots_ptr.as_ptr().add(idx)).assume_init_ref() };
Some(&kv.1)
}
/// Mutably borrow the value for `key`, or `None` if absent.
pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
where
K: Borrow<Q>,
Q: KevyHash + Eq + ?Sized,
{
let idx = self.find_by_borrow(key)?;
// SAFETY: full slot.
let kv = unsafe { (*self.slots_ptr.as_ptr().add(idx)).assume_init_mut() };
Some(&mut kv.1)
}
/// Whether `key` is present in the map.
pub fn contains_key<Q>(&self, key: &Q) -> bool
where
K: Borrow<Q>,
Q: KevyHash + Eq + ?Sized,
{
self.find_by_borrow(key).is_some()
}
/// Remove `key`'s entry; returns the previous value if present.
pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
where
K: Borrow<Q>,
Q: KevyHash + Eq + ?Sized,
{
let idx = self.find_by_borrow(key)?;
self.set_meta(idx, DELETED);
self.occupied -= 1;
self.deleted += 1;
// SAFETY: slot was full, we just marked it DELETED so it won't be
// read again; ptr::read moves the (K, V) out.
let (_k, v) = unsafe { ptr::read(self.slots_ptr.as_ptr().add(idx) as *const (K, V)) };
Some(v)
}
fn find_by_borrow<Q>(&self, key: &Q) -> Option<usize>
where
K: Borrow<Q>,
Q: KevyHash + Eq + ?Sized,
{
if self.cap == 0 {
return None;
}
let hash = key.kevy_hash();
let h2v = h2(hash);
let mut group_start = (hash as usize) & self.mask;
loop {
// SAFETY: see [insert_known_unique]; group_start ∈ [0, cap),
// metadata length ≥ cap + GROUP_WIDTH.
let g = unsafe { Group::load(self.metadata_ptr.as_ptr().add(group_start)) };
for m in g.match_byte(h2v).iter() {
let slot = (group_start + m) & self.mask;
// SAFETY: matched h2 ⇒ slot occupied ⇒ initialised.
let kv = unsafe { (*self.slots_ptr.as_ptr().add(slot)).assume_init_ref() };
if kv.0.borrow() == key {
return Some(slot);
}
}
// EMPTY in this group ⇒ key cannot be later in the probe.
if !g.match_byte(EMPTY).is_empty() {
return None;
}
group_start = (group_start + GROUP_WIDTH) & self.mask;
}
}
}