luna_core/runtime/table.rs
1//! Lua table: hybrid array + hash.
2//!
3//! Array part uses split tag/payload storage (9 bytes/slot — the Lua 5.5
4//! "compact arrays" layout, bench-validated in benches/value_repr.rs).
5//! Hash part is the PUC node layout: main-position chaining with relocation
6//! (Brent's variation), capacity a power of two, rehash sizing per
7//! luaH_rehash/computesizes.
8
9use crate::runtime::heap::{Gc, GcHeader, Heap, Marker};
10use crate::runtime::value::{RawVal, Value, f2i_exact, raw};
11
12/// Errors that table mutation can raise back to the interpreter.
13#[derive(Clone, Copy, PartialEq, Eq, Debug)]
14pub enum TableError {
15 /// `t[nil] = …` — `nil` is forbidden as a key.
16 NilIndex,
17 /// `t[0/0] = …` — NaN floats are forbidden as keys.
18 NanIndex,
19 /// `next` called with a key not present in the table.
20 InvalidNext,
21 /// PUC `luaH_resizearray` — the array part would have to grow past
22 /// `MAXASIZE`, or the hash part past `MAXHBITS`. Raised back as
23 /// "table overflow" so a runaway `a[i] = i` loop walls within budget
24 /// (5.5/5.4 heavy.lua's `toomanyidx` pcalls exactly this scenario).
25 Overflow,
26}
27
28/// PUC `MAXASIZE` analogue: the highest power of two an array part may
29/// grow to. Choose a cap that comfortably fits in the gate's 60-second
30/// budget (each grow is O(n), so 2^27 entries × 16 bytes ≈ 2 GB is the
31/// effective ceiling). Beyond this `rehash` returns `TableError::Overflow`.
32pub(crate) const MAX_ASIZE: usize = 1 << 27;
33
34/// v2.1 Phase 1I.B — JIT layout constants for table-field IC.
35///
36/// luna-jit's trace lowerer needs to emit direct loads against
37/// `Table.nodes` (the hash part) without paying the helper-call ABI
38/// for each `Op::GetField` / `Op::SetField`. These constants expose
39/// the field offsets so the cranelift IR can be parameterised at
40/// compile time. The `Node` struct itself remains `pub(crate)` — only
41/// the offsets cross the crate boundary.
42///
43/// Layout assumptions:
44/// - `Box<[Node]>` is a fat pointer `(data_ptr, len)` on 64-bit
45/// targets (16 bytes total). The data pointer occupies the low 8
46/// bytes, length the high 8. This is the de-facto Rust ABI for
47/// `Box<[T]>` / `&[T]` but isn't formally guaranteed; the unit
48/// test `phase_1i_b_node_layout_pinned` and the const assertion
49/// on `size_of::<Box<[Node]>>()` catch drift.
50/// - `Value` is `#[repr(C, u8)]` so the discriminant byte sits at
51/// offset 0 and the payload starts at offset 8 (after 7 bytes of
52/// alignment padding). Total size 16 bytes per the existing
53/// `value_is_16_bytes` test in `runtime/value.rs`.
54/// - `Node` is `#[derive(Clone, Copy)]` with field order
55/// `(key: Value, val: Value, next: i32, dead_key: bool)`, so
56/// `key` lives at offset 0 and `val` at offset 16. The trailing
57/// `next + dead_key` fields are not read by the IC.
58pub mod jit_layout {
59 use super::Node;
60 use crate::runtime::Table;
61
62 /// Byte offset of the `nodes: Box<[Node]>` field within `Table`.
63 /// The fat-ptr low word (data ptr) lives at this offset; the
64 /// high word (length) at `TABLE_NODES_OFFSET + 8`. luna-jit
65 /// adds `TABLE_NODES_PTR_OFFSET` / `TABLE_NODES_LEN_OFFSET`
66 /// constants in `jit_backend/mod.rs` to express that split.
67 pub const TABLE_NODES_OFFSET: usize = std::mem::offset_of!(Table, nodes);
68
69 /// Byte offset of `key: Value` within `Node` (= 0).
70 pub const NODE_KEY_OFFSET: usize = std::mem::offset_of!(Node, key);
71
72 /// Byte offset of `val: Value` within `Node` (= 16 — `key` is 16-byte
73 /// `Value`, no inner padding).
74 pub const NODE_VAL_OFFSET: usize = std::mem::offset_of!(Node, val);
75
76 /// Total `Node` size in bytes (= 40 on 64-bit). Used as the stride
77 /// in `node_addr = nodes_ptr + slot_idx * SIZEOF_NODE`.
78 pub const SIZEOF_NODE: usize = std::mem::size_of::<Node>();
79
80 /// Static guard: pin the assumptions luna-jit relies on at compile
81 /// time. Layout drift here breaks IR emit, so trap it at compile
82 /// time rather than at trace-fire time.
83 ///
84 /// `Box<[T]>` is a fat pointer of `2 * usize` — 16 bytes on 64-bit
85 /// targets, 8 bytes on 32-bit (e.g. `wasm32`). Use a width-aware
86 /// expected size so the wasm32-unknown-unknown CI build does not
87 /// trip the assertion. The runtime layout still matters for luna-jit
88 /// IR emit on 64-bit hosts (the only platforms where Cranelift JIT
89 /// runs); the 32-bit branch documents the size in passing.
90 const _: () = {
91 assert!(std::mem::size_of::<Box<[Node]>>() == 2 * std::mem::size_of::<usize>());
92 assert!(NODE_KEY_OFFSET == 0);
93 assert!(NODE_VAL_OFFSET == 16);
94 assert!(SIZEOF_NODE >= 32);
95 };
96}
97
98#[derive(Clone, Copy)]
99pub(crate) struct Node {
100 key: Value,
101 val: Value,
102 /// absolute index of the next node in this chain, or NONE
103 next: i32,
104 /// PUC `setdeadkey` analogue: the key was a collectable that got swept
105 /// out of a weak table. The Gc pointer in `key` is now dangling — its
106 /// memory may have been reused for a new allocation with potentially
107 /// equal content. Marking the node "dead-key" lets `find_node` skip the
108 /// raw_eq probe (which could spuriously match a reallocated object) and
109 /// `insert_new` treat the slot as available for a fresh main-position
110 /// owner while leaving chain back-links intact for traversal.
111 dead_key: bool,
112}
113
114const NONE: i32 = -1;
115
116impl Node {
117 const EMPTY: Node = Node {
118 key: Value::Nil,
119 val: Value::Nil,
120 next: NONE,
121 dead_key: false,
122 };
123}
124
125/// C3 — SoA Robin Hood meta-word layout (Variant A, see
126/// `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1).
127///
128/// Each `meta[idx]` slot encodes the open-addressing slot state in a
129/// single u16:
130/// - bit 15 (`OCCUPIED_BIT`): 0 = empty, 1 = occupied
131/// - bit 14 (`TOMBSTONE_BIT`): 0 = live, 1 = tombstoned-occupied
132/// - bits 13..0 (`PSL_MASK`): probe-sequence length (0..16383)
133///
134/// The 14-bit PSL field is **far** beyond any realistic Robin Hood
135/// max-PSL at load ≤ 0.75 (expected max ~20 on 1024 slots; even the
136/// long-tail outliers seen empirically with luna's existing hash
137/// distributions stay under 200). 2 bytes/slot is still 20× smaller
138/// than the 40-byte Node — the SoA bandwidth gain (§3.5 of the RFC)
139/// is preserved.
140///
141/// Earlier draft used 1 byte with a 6-bit PSL cap of 63; bench under
142/// load 0.676 on cap=1024 produced a long-tail PSL of 64+ with the
143/// LuaStr+mix64 hash distribution, forcing the widening.
144///
145/// Tombstones do NOT free the slot for `find` (probe continues past), but
146/// DO free it for `insert` (write the new entry, clear the tomb bit). They
147/// accumulate; the rehash path compacts them periodically.
148#[allow(dead_code)]
149pub(crate) mod meta_bits {
150 pub const OCCUPIED_BIT: u16 = 0b1000_0000_0000_0000;
151 pub const TOMBSTONE_BIT: u16 = 0b0100_0000_0000_0000;
152 pub const PSL_MASK: u16 = 0b0011_1111_1111_1111;
153 pub const PSL_MAX: u16 = PSL_MASK;
154 /// Empty slot — bit 15 = 0, all others 0.
155 pub const EMPTY: u16 = 0;
156
157 #[inline(always)]
158 pub fn is_occupied(m: u16) -> bool {
159 (m & OCCUPIED_BIT) != 0
160 }
161 #[inline(always)]
162 pub fn is_tombstone(m: u16) -> bool {
163 (m & TOMBSTONE_BIT) != 0
164 }
165 /// Live = occupied AND not tombstoned. `next()` iteration cursor returns
166 /// these. `find_slot_rh` short-circuits on a live match.
167 #[inline(always)]
168 pub fn is_live(m: u16) -> bool {
169 (m & (OCCUPIED_BIT | TOMBSTONE_BIT)) == OCCUPIED_BIT
170 }
171 #[inline(always)]
172 pub fn psl(m: u16) -> u16 {
173 m & PSL_MASK
174 }
175 #[inline(always)]
176 pub fn pack(psl: u16, tomb: bool) -> u16 {
177 debug_assert!(psl <= PSL_MAX);
178 let mut m = OCCUPIED_BIT | (psl & PSL_MASK);
179 if tomb {
180 m |= TOMBSTONE_BIT;
181 }
182 m
183 }
184}
185
186/// P11-S5d.I — inline storage threshold. Tables whose array part has
187/// `asize <= INLINE_ASIZE` keep their atags+avals inside the Table
188/// struct itself (`inline_storage`), skipping the slab Box entirely
189/// — binary_trees's `{nil, nil}` and `{...}` 2-element leaves live
190/// here, sparing one allocator round-trip per NewTable.
191pub(crate) const INLINE_ASIZE: u64 = 2;
192/// `INLINE_ASIZE` u64 slots for avals + `ceil(INLINE_ASIZE / 8)` u64
193/// slots covering the atags bytes (with trailing pad). For
194/// `INLINE_ASIZE = 2`: 2 avals + 1 atags = 3 u64s = 24 bytes.
195pub(crate) const INLINE_U64S: usize = INLINE_ASIZE as usize + INLINE_ASIZE.div_ceil(8) as usize;
196
197/// Lua table — hybrid array + hash storage, with optional metatable and
198/// weak-mode flags.
199#[repr(C)]
200pub struct Table {
201 /// read through raw casts by the GC, not by field access
202 #[allow(dead_code)]
203 pub(crate) hdr: GcHeader,
204 /// P11-S5d.I — single backing pointer for the array part. Points
205 /// to `inline_storage` (asize <= INLINE_ASIZE) or `slab.as_ptr()`
206 /// (asize > INLINE_ASIZE). The JIT inline aset reads this with one
207 /// `load i64`, no branch — the choice between inline and slab is
208 /// already encoded in the pointer. Initialised in `Heap::new_table`
209 /// AFTER the Table reaches its final heap address (so that
210 /// `&mut self.inline_storage` is the stable heap pointer, not a
211 /// stack-local one). Updated by `Table::resize`.
212 pub array_ptr: *mut u8,
213 /// P11-S5d.H — external backing for the array part when
214 /// `asize > INLINE_ASIZE`. Layout: `[avals: asize × 8 bytes][atags:
215 /// asize bytes]`. Empty box (dangling, no alloc) when the inline
216 /// path is in use.
217 pub(crate) slab: Box<[u64]>,
218 /// Length of the array part in slots. u64 (rather than `usize` or
219 /// `u32`) so the JIT can load it with a single `load i64`.
220 pub asize: u64,
221 /// P11-S5d.I — inline backing used when `asize <= INLINE_ASIZE`.
222 /// Same layout as the slab: avals at low addresses (`asize * 8`
223 /// bytes from offset 0), atags at the trailing `asize` bytes.
224 pub(crate) inline_storage: [u64; INLINE_U64S],
225 /// hash part: power-of-two length (or empty)
226 /// hash part: power-of-two length (or empty)
227 /// `pub(crate)` so `Heap::free_obj` (pool recycle path) can reset.
228 pub(crate) nodes: Box<[Node]>,
229 /// free-slot search position, counts down (PUC lastfree).
230 /// `pub(crate)` so `Heap::new_table` can reset on pool recycle.
231 pub(crate) lastfree: u32,
232 /// C3 — SoA Robin Hood hash part (Variant A, parallel to `nodes`
233 /// during Phase B+C transition). After Phase 4 cutover these
234 /// replace `nodes` entirely. Each of `keys` / `vals` / `meta`
235 /// is the same length as `nodes` and is sized in lockstep by
236 /// `resize`. `meta` byte layout per the `meta_bits` module above.
237 /// Empty `Box::new([])` until Phase D cuts over.
238 pub(crate) keys: Box<[Value]>,
239 pub(crate) vals: Box<[Value]>,
240 pub(crate) meta: Box<[u16]>,
241 /// Count of tombstoned-occupied meta slots; rehash trigger.
242 pub(crate) tombstones: u32,
243 /// C3 — iterator-guard counter (R-A3 mitigation). Incremented on
244 /// each `pairs`/`next` entry, decremented on exit. While > 0,
245 /// the SoA path MUST defer rehash (which would rebase slot
246 /// indices and break PUC `nextvar.lua:520-521` invariant).
247 /// Phase F wires this; Phase B initialises to 0.
248 pub(crate) iter_depth: u32,
249 /// P11-S5d.K — visibility lifted to `pub(crate)` so the JIT can
250 /// take its field offset at compile time and emit an inline
251 /// "metatable.is_none()" guard before the inline aget fast path.
252 /// `Option<Gc<Table>>` is 8 bytes via the NonNull-pointer-opt: 0
253 /// ⇔ None, non-zero ⇔ Some.
254 pub metatable: Option<Gc<Table>>,
255 /// reserved for an absent-metamethod cache (PUC `flags`); currently
256 /// unread — luna's mm lookup walks `metatable.get` each time
257 #[allow(dead_code)]
258 pub(crate) flags: u8,
259}
260
261// SAFETY: `array_ptr` looks like an unprotected raw pointer field, but
262// it always refers to memory the same Table owns (either its own inline
263// storage or its `slab` Box). The Table is heap-allocated and never
264// moved post-adoption, so the pointer stays valid for the table's
265// lifetime. No thread-unsafety concern: tables are accessed only
266// through the Vm, single-threaded.
267unsafe impl Send for Table {}
268unsafe impl Sync for Table {}
269
270impl Table {
271 pub(crate) fn new(hdr: GcHeader) -> Table {
272 Table {
273 hdr,
274 // P11-S5d.I — `array_ptr` is fixed up in
275 // `Heap::new_table` after the Table reaches its final heap
276 // address (so that `&inline_storage` is the heap address,
277 // not a stack-local one). Null sentinel here so a
278 // bug-detection invariant flags any pre-fixup read.
279 array_ptr: std::ptr::null_mut(),
280 slab: Box::new([]),
281 asize: 0,
282 inline_storage: [0; INLINE_U64S],
283 nodes: Box::new([]),
284 lastfree: 0,
285 keys: Box::new([]),
286 vals: Box::new([]),
287 meta: Box::new([]),
288 tombstones: 0,
289 iter_depth: 0,
290 metatable: None,
291 flags: 0,
292 }
293 }
294
295 /// P11-S5d.I — set `array_ptr` to the inline storage's stable heap
296 /// address. Called by `Heap::new_table` once the Table is at its
297 /// final location.
298 #[inline]
299 pub(crate) fn init_array_ptr(&mut self) {
300 self.array_ptr = self.inline_storage.as_mut_ptr() as *mut u8;
301 }
302
303 /// P11-S5d.H/I — read view onto the array-part tag bytes. Trails
304 /// the avals portion in the active backing (inline or slab).
305 #[inline(always)]
306 pub(crate) fn atags(&self) -> &[u8] {
307 let n = self.asize as usize;
308 if n == 0 {
309 return &[];
310 }
311 // SAFETY: `array_ptr` always points to a buffer with `n`
312 // RawVal slots followed by `n` u8 tag bytes (either
313 // `inline_storage` of `INLINE_U64S` u64s, or a `slab` of
314 // `asize + ceil(asize/8)` u64s). The tag bytes start at byte
315 // offset `n * 8` from the buffer base.
316 unsafe {
317 let ptr = self.array_ptr.add(n * 8);
318 std::slice::from_raw_parts(ptr, n)
319 }
320 }
321
322 #[inline(always)]
323 pub(crate) fn atags_mut(&mut self) -> &mut [u8] {
324 let n = self.asize as usize;
325 if n == 0 {
326 return &mut [];
327 }
328 // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
329 unsafe {
330 let ptr = self.array_ptr.add(n * 8);
331 std::slice::from_raw_parts_mut(ptr, n)
332 }
333 }
334
335 /// P11-S5d.H/I — read view onto the array-part payload slots. Sits
336 /// at the start of the active backing (u64-aligned, identical size
337 /// and layout to `RawVal`).
338 #[inline(always)]
339 pub(crate) fn avals(&self) -> &[RawVal] {
340 let n = self.asize as usize;
341 if n == 0 {
342 return &[];
343 }
344 // SAFETY: inline_storage / slab both store u64s, so the cast
345 // to `*const RawVal` is alignment-safe (RawVal size = 8,
346 // align = 8). The buffer holds at least `n` such slots.
347 unsafe { std::slice::from_raw_parts(self.array_ptr as *const RawVal, n) }
348 }
349
350 #[inline(always)]
351 pub(crate) fn avals_mut(&mut self) -> &mut [RawVal] {
352 let n = self.asize as usize;
353 if n == 0 {
354 return &mut [];
355 }
356 // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
357 unsafe { std::slice::from_raw_parts_mut(self.array_ptr as *mut RawVal, n) }
358 }
359
360 /// Allocate a fresh external `[avals: asize × 8 bytes][atags: asize
361 /// bytes]` slab. Only used when `asize > INLINE_ASIZE`. The buffer
362 /// is u64-aligned via `Box<[u64]>` and zeroed (avals = `RawVal::
363 /// NIL` aka `0`; atags = `raw::NIL` aka `0`).
364 fn alloc_slab(asize: usize) -> Box<[u64]> {
365 if asize == 0 {
366 return Box::new([]);
367 }
368 let avals_u64s = asize;
369 let atags_u64s = asize.div_ceil(8);
370 let total = avals_u64s + atags_u64s;
371 vec![0u64; total].into_boxed_slice()
372 }
373
374 /// This table's metatable, if any.
375 pub fn metatable(&self) -> Option<Gc<Table>> {
376 self.metatable
377 }
378
379 /// Install (or clear) this table's metatable. Does not perform any
380 /// `__metatable` guarding; that belongs in the Vm-level `setmetatable`.
381 pub fn set_metatable(&mut self, mt: Option<Gc<Table>>) {
382 self.metatable = mt;
383 }
384
385 /// Bytes occupied by the table's *external* internal allocations
386 /// (slab and nodes). Cheap O(1) read — Box len × element size, no
387 /// allocator query. `Heap::free_obj` subtracts this on the way out
388 /// so the credit applied via `set`/`rehash`/`ensure_*` is symmetric.
389 ///
390 /// P11-S5d.I — inline storage doesn't count toward this (it's part
391 /// of the Table struct itself, accounted for by `size_of::<Table>()`
392 /// at adoption time). When the array part lives inline, the slab
393 /// is empty and contributes nothing here.
394 pub(crate) fn internal_bytes(&self) -> usize {
395 let n = self.asize as usize;
396 let array_external = if n > INLINE_ASIZE as usize {
397 n + n * std::mem::size_of::<RawVal>()
398 } else {
399 0
400 };
401 let soa_external = self.keys.len() * std::mem::size_of::<Value>()
402 + self.vals.len() * std::mem::size_of::<Value>()
403 + self.meta.len() * std::mem::size_of::<u16>();
404 array_external + self.nodes.len() * std::mem::size_of::<Node>() + soa_external
405 }
406
407 fn asize(&self) -> usize {
408 self.asize as usize
409 }
410
411 fn aget(&self, idx: usize) -> Value {
412 // SAFETY: callers gate on `idx < self.asize()` before reaching here
413 // (`get_int`, `iter_array`, etc.). atags and avals are sized
414 // identically by `rehash`, so a bound check passed against atags
415 // covers avals too.
416 unsafe {
417 Value::pack(
418 *self.atags().get_unchecked(idx),
419 *self.avals().get_unchecked(idx),
420 )
421 }
422 }
423
424 fn aset(&mut self, idx: usize, v: Value) {
425 let (t, b) = v.unpack();
426 // SAFETY: see `aget`. callers (`set_norm`, `set_int`) gate on
427 // `idx < self.asize()`. The two `*_mut` calls each take a
428 // distinct `&mut self` borrow whose lifetime ends at the
429 // statement boundary, so they don't overlap.
430 unsafe {
431 *self.atags_mut().get_unchecked_mut(idx) = t;
432 *self.avals_mut().get_unchecked_mut(idx) = b;
433 }
434 }
435
436 // ---- reads ----
437
438 /// Raw lookup (no `__index` metamethod). Returns `Value::Nil` when
439 /// the key is absent. `Value::Nil` and NaN floats return `nil` directly.
440 pub fn get(&self, key: Value) -> Value {
441 match key {
442 Value::Int(i) => self.get_int(i),
443 Value::Float(f) => match f2i_exact(f) {
444 Some(i) => self.get_int(i),
445 None => {
446 if f.is_nan() {
447 Value::Nil
448 } else {
449 self.get_hash(key)
450 }
451 }
452 },
453 Value::Nil => Value::Nil,
454 k => self.get_hash(k),
455 }
456 }
457
458 /// Integer-keyed variant of [`Self::get`].
459 pub fn get_int(&self, i: i64) -> Value {
460 if i >= 1 && (i as u64) <= self.asize() as u64 {
461 return self.aget(i as usize - 1);
462 }
463 self.get_hash(Value::Int(i))
464 }
465
466 /// String-keyed variant of [`Self::get`] for v1.2 D4 A1 GetField fast
467 /// path: the GetField interp arm always has a `Gc<LuaStr>` key from
468 /// `Proto.consts`. Skips the outer `Value` match (which would only
469 /// take the `_ => self.get_hash(k)` arm anyway) so the dispatcher
470 /// pays one less branch per call. ~5 GetField/iter × 1000 iters/cell
471 /// on the Redis-Lua-shape workload — every shaved nanosecond shows
472 /// up at the bench level. Counter-validated via
473 /// `examples/diag_opcode_breakdown.rs`.
474 #[inline]
475 pub fn get_str(&self, key: crate::runtime::Gc<crate::runtime::string::LuaStr>) -> Value {
476 self.get_hash(Value::Str(key))
477 }
478
479 fn get_hash(&self, k: Value) -> Value {
480 match self.find_node(k) {
481 Some(idx) => self.nodes[idx].val,
482 None => Value::Nil,
483 }
484 }
485
486 /// v2.1 Phase 1I.B — same logic as [`find_node`] but exposed
487 /// to luna-core's recorder so it can capture the slot index for
488 /// the table-field IC snapshot. luna-jit reads neither the
489 /// `nodes` field nor `Node` directly; only the slot index
490 /// crosses the crate boundary (baked into the IR as a `iconst`).
491 #[allow(dead_code)]
492 pub(crate) fn find_node_idx(&self, k: Value) -> Option<usize> {
493 self.find_node(k)
494 }
495
496 /// v2.1 Phase 1I.B — accessor for the recorder's
497 /// `FieldIcSnapshot` capture: read the slot's value's tag byte
498 /// for the cached_val_tag field. The recorder needs this to
499 /// match the runtime guard the IC emits. Returns None when
500 /// `idx >= nodes.len()`.
501 #[allow(dead_code)]
502 pub(crate) fn node_val_at(&self, idx: usize) -> Option<Value> {
503 self.nodes.get(idx).map(|n| n.val)
504 }
505
506 /// v2.1 Phase 1I.B — accessor for `nodes.len()` so the recorder
507 /// can capture the shape-guard's `nodes_len` field without
508 /// reaching into the private `nodes` member.
509 #[allow(dead_code)]
510 pub(crate) fn nodes_capacity(&self) -> usize {
511 self.nodes.len()
512 }
513
514 /// Walk the chain rooted at the key's main position.
515 fn find_node(&self, k: Value) -> Option<usize> {
516 if self.nodes.is_empty() {
517 return None;
518 }
519 let mut idx = self.main_position(k);
520 loop {
521 let n = &self.nodes[idx];
522 // Dead-key slots carry a dangling Gc pointer whose memory may
523 // have been reallocated to a different live object; raw_eq on
524 // such a key can spuriously match the freshly-reused address.
525 // Skip the comparison and only follow `next` (PUC `setdeadkey`
526 // / `equalkey` short-circuit). 5.5 gc.lua :459-:478 was 12%
527 // flaky on this exact path — a swept B-string's slot kept
528 // chaining into A's slot, so `a[k] = nil` (k = A_string) hit
529 // the dead slot and wrote nil there, leaving A's val untouched.
530 if !n.dead_key && n.key.raw_eq(k) {
531 return Some(idx);
532 }
533 if n.next == NONE {
534 return None;
535 }
536 idx = n.next as usize;
537 }
538 }
539
540 // ---- writes ----
541
542 /// Insert / update `(key, val)`. `heap` is used to credit any internal
543 /// Box growth (rehash) to `heap.bytes` so the counter stays in sync with
544 /// real memory; `free_obj` subtracts `internal_bytes()` on the way out.
545 pub fn set(&mut self, heap: &mut Heap, key: Value, val: Value) -> Result<(), TableError> {
546 let k = normalize_set_key(key)?;
547 self.set_norm(heap, k, val)
548 }
549
550 /// PUC `luaV_fastset` / `luaV_finishfastset` analogue: single-walk
551 /// in-place update for an existing key. Returns `true` iff `key` is
552 /// present with a non-nil value and the slot was overwritten with
553 /// `val`. Returns `false` when the key is absent, the slot holds nil,
554 /// or the key normalisation rejects it — the caller is then expected
555 /// to run the `__newindex` chain or fall back to `set` for the raw
556 /// insert.
557 ///
558 /// Collapses the SetField hot path from two hash-chain walks
559 /// (`get` + `set`) to one. The `__newindex` invariant ("fires iff
560 /// `get` would have returned nil") is preserved because this method
561 /// writes only when the existing slot is non-nil — the exact set the
562 /// prior `tb.get(key).is_nil()` gate already excluded from
563 /// `__newindex` eligibility. See
564 /// `.dev/rfcs/v2.0-pi-phase2-a3-audit.md` §4 for the case-by-case
565 /// semantics check.
566 ///
567 /// The caller is responsible for firing `Heap::barrier_back` after a
568 /// `true` return (same contract as the surrounding `raw_set`
569 /// wrapper).
570 pub fn try_set_existing(&mut self, key: Value, val: Value) -> bool {
571 let k = match normalize_set_key(key) {
572 Ok(k) => k,
573 Err(_) => return false,
574 };
575 if let Value::Int(i) = k
576 && i >= 1
577 && (i as u64) <= self.asize() as u64
578 {
579 let idx = i as usize - 1;
580 // SAFETY: `idx < self.asize()` is guarded by the conditional
581 // above, mirroring the bound on `aget`/`aset`.
582 let tag = unsafe { *self.atags().get_unchecked(idx) };
583 if tag != raw::NIL {
584 // Nil-val on a live slot must follow the same tombstone
585 // discipline as `set_norm` — routed through
586 // `clear_existing_slot` so chain-world / future
587 // data-layout cutovers (Phase E SoA) stay aligned.
588 // See `.dev/known-bugs/fixed/`.
589 if val.is_nil() {
590 self.clear_existing_slot(k);
591 } else {
592 self.aset(idx, val);
593 }
594 return true;
595 }
596 // Array slot present-but-nil → __newindex eligible: do NOT
597 // write. Caller falls through to the metamethod chain.
598 return false;
599 }
600 if let Some(idx) = self.find_node(k)
601 && !self.nodes[idx].val.is_nil()
602 {
603 if val.is_nil() {
604 self.clear_existing_slot(k);
605 } else {
606 self.nodes[idx].val = val;
607 }
608 return true;
609 }
610 false
611 }
612
613 /// Shared "live with val=Nil is illegal" tombstone routine for the
614 /// two write entry points (`set_norm` and `try_set_existing`). The
615 /// slot must already be known live (array slot inside `asize()` /
616 /// node returned by `find_node`).
617 ///
618 /// Chain-world today:
619 /// - array slot → `aset(_, Nil)` clears the atag, so `next()`'s
620 /// `tag != raw::NIL` filter skips the slot.
621 /// - node slot → soft tombstone (key kept, `val = Nil`); chain
622 /// `next()` filter `!n.val.is_nil()` skips it, and `find_node`
623 /// still routes a future re-insert into the same slot without
624 /// a rehash.
625 ///
626 /// Centralising the discipline here lets a future Phase E SoA
627 /// cutover (Variant B linear probe, or any non-Robin-Hood layout
628 /// attack that switches `next()`'s filter to `meta_bits::is_live`)
629 /// migrate both entry points in lockstep — exactly the divergence
630 /// that surfaced as `(key, nil)` zombies in `pairs()` on the C3
631 /// Session 2 cutover branch.
632 fn clear_existing_slot(&mut self, k: Value) {
633 if let Value::Int(i) = k
634 && i >= 1
635 && (i as u64) <= self.asize() as u64
636 {
637 self.aset(i as usize - 1, Value::Nil);
638 return;
639 }
640 if let Some(idx) = self.find_node(k) {
641 self.nodes[idx].val = Value::Nil;
642 }
643 }
644
645 /// Integer-keyed variant of [`Self::set`].
646 pub fn set_int(&mut self, heap: &mut Heap, i: i64, val: Value) -> Result<(), TableError> {
647 self.set_norm(heap, Value::Int(i), val)
648 }
649
650 /// `k` is already normalized (no nil, no NaN, integral floats → Int).
651 fn set_norm(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
652 if let Value::Int(i) = k
653 && i >= 1
654 && (i as u64) <= self.asize() as u64
655 {
656 // Live array slot + Nil write goes through the shared
657 // tombstone routine (see `clear_existing_slot` for the
658 // chain ↔ future-SoA rationale). The non-Nil branch is
659 // identical to a bare `aset` today.
660 if v.is_nil() {
661 self.clear_existing_slot(k);
662 } else {
663 self.aset(i as usize - 1, v);
664 }
665 return Ok(());
666 }
667 if let Some(idx) = self.find_node(k) {
668 if v.is_nil() {
669 self.clear_existing_slot(k);
670 } else {
671 self.nodes[idx].val = v;
672 }
673 return Ok(());
674 }
675 if v.is_nil() {
676 return Ok(()); // absent key set to nil: nothing to record
677 }
678 self.insert_new(heap, k, v)
679 }
680
681 fn insert_new(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
682 if self.nodes.is_empty() {
683 self.rehash(heap, k)?;
684 return self.set_norm(heap, k, v);
685 }
686 let mp = self.main_position(k);
687 // A truly empty slot (key=Nil, !dead_key) is free for direct placement.
688 // A dead-key slot still belongs to some chain (its `next` points to a
689 // live entry the chain reaches), so we treat it as occupied here and
690 // route the new key through the collision path below — that preserves
691 // the back-links into this slot from other nodes' `next` fields.
692 if self.nodes[mp].key.is_nil() && !self.nodes[mp].dead_key {
693 self.nodes[mp] = Node {
694 key: k,
695 val: v,
696 next: NONE,
697 dead_key: false,
698 };
699 return Ok(());
700 }
701 let Some(free) = self.free_pos() else {
702 self.rehash(heap, k)?;
703 return self.set_norm(heap, k, v);
704 };
705 // Dead-key slot: it carries no live key, so by definition nobody else
706 // counts it as "their main position owner". We give it directly to
707 // the new key but preserve `next` so the chain it sits inside still
708 // reaches its downstream entries.
709 if self.nodes[mp].dead_key {
710 let preserved_next = self.nodes[mp].next;
711 self.nodes[mp] = Node {
712 key: k,
713 val: v,
714 next: preserved_next,
715 dead_key: false,
716 };
717 return Ok(());
718 }
719 let other_mp = self.main_position(self.nodes[mp].key);
720 if other_mp != mp {
721 // colliding node is out of its main position: relocate it to the
722 // free slot and take its place
723 let mut prev = other_mp;
724 while self.nodes[prev].next != mp as i32 {
725 prev = self.nodes[prev].next as usize;
726 }
727 self.nodes[prev].next = free as i32;
728 self.nodes[free] = self.nodes[mp];
729 self.nodes[mp] = Node {
730 key: k,
731 val: v,
732 next: NONE,
733 dead_key: false,
734 };
735 } else {
736 // colliding node owns this position: chain the new node behind it
737 self.nodes[free] = Node {
738 key: k,
739 val: v,
740 next: self.nodes[mp].next,
741 dead_key: false,
742 };
743 self.nodes[mp].next = free as i32;
744 }
745 Ok(())
746 }
747
748 fn free_pos(&mut self) -> Option<usize> {
749 while self.lastfree > 0 {
750 self.lastfree -= 1;
751 let n = &self.nodes[self.lastfree as usize];
752 // Dead-key slots are still occupied for chain purposes (their
753 // `next` may be the only path to a downstream entry) — don't
754 // hand them out as free.
755 if n.key.is_nil() && !n.dead_key {
756 return Some(self.lastfree as usize);
757 }
758 }
759 None
760 }
761
762 // ---- rehash (PUC luaH_rehash) ----
763
764 fn rehash(&mut self, heap: &mut Heap, pending: Value) -> Result<(), TableError> {
765 let mut nums = [0usize; 65];
766 let mut int_keys = 0usize;
767 let mut total = 1; // the pending key
768 if let Value::Int(i) = pending
769 && i >= 1
770 {
771 nums[ceil_log2(i as u64)] += 1;
772 int_keys += 1;
773 }
774 let atags = self.atags();
775 for (i, &tag) in atags.iter().enumerate() {
776 if tag != raw::NIL {
777 nums[ceil_log2(i as u64 + 1)] += 1;
778 int_keys += 1;
779 total += 1;
780 }
781 }
782 for n in self.nodes.iter() {
783 if !n.val.is_nil() {
784 total += 1;
785 if let Value::Int(i) = n.key
786 && i >= 1
787 {
788 nums[ceil_log2(i as u64)] += 1;
789 int_keys += 1;
790 }
791 }
792 }
793 // computesizes: optimal array size = largest 2^i with more than 2^(i-1)
794 // integer keys in [1, 2^i]
795 let mut new_asize = 0usize;
796 let mut in_array = 0usize;
797 let mut a = 0usize;
798 let mut two_to_i = 1usize;
799 let mut i = 0usize;
800 while int_keys > two_to_i / 2 {
801 a += nums[i];
802 if a > two_to_i / 2 {
803 new_asize = two_to_i;
804 in_array = a;
805 }
806 i += 1;
807 match two_to_i.checked_mul(2) {
808 Some(n) => two_to_i = n,
809 None => break,
810 }
811 }
812 // PUC `luaH_resizearray` raises "table overflow" when the array part
813 // would have to grow past MAXASIZE. luna mirrors with `MAX_ASIZE`,
814 // checked on both the array and the hash bucket count (the latter is
815 // a power-of-two of total - in_array entries).
816 if new_asize > MAX_ASIZE {
817 return Err(TableError::Overflow);
818 }
819 let hash_entries = total - in_array;
820 if hash_entries > MAX_ASIZE {
821 return Err(TableError::Overflow);
822 }
823 self.resize(heap, new_asize, hash_entries);
824 Ok(())
825 }
826
827 /// Resize the table's array and hash parts. The array part grows
828 /// (or shrinks) to `new_asize` NIL-initialized slots; the hash
829 /// part rounds to the next power of two ≥ `hash_entries`. Any
830 /// existing entries are re-inserted into the new layout. The
831 /// Box growth is debited/credited to `heap.bytes` so `free_obj`
832 /// can subtract the symmetric amount.
833 ///
834 /// P11-S5c.B — `Heap::new_table_sized` calls this on a freshly
835 /// adopted empty table to pre-allocate the array part, sparing
836 /// the table-fill loop from O(log N) intermediate `rehash`es.
837 pub(crate) fn resize(&mut self, heap: &mut Heap, new_asize: usize, hash_entries: usize) {
838 let before = self.internal_bytes();
839 // P11-S5d.H/I — snapshot the old array entries before we
840 // re-install the backing. The active buffer can be inline OR
841 // slab; `array_ptr` already points to whichever it is, so
842 // walking via raw offsets works the same for either case.
843 let old_asize = self.asize as usize;
844 let mut old_pairs: Vec<(u8, RawVal)> = Vec::with_capacity(old_asize);
845 if old_asize > 0 {
846 // SAFETY: `array_ptr` was set up by `Heap::new_table` or
847 // an earlier `resize`; it covers `old_asize * 9` bytes
848 // (avals + atags).
849 let avals_base = self.array_ptr as *const RawVal;
850 let atags_base = unsafe { self.array_ptr.add(old_asize * 8) as *const u8 };
851 for i in 0..old_asize {
852 // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
853 let tag = unsafe { *atags_base.add(i) };
854 // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
855 let val = unsafe { *avals_base.add(i) };
856 old_pairs.push((tag, val));
857 }
858 }
859 let old_nodes = std::mem::take(&mut self.nodes);
860
861 // Install the new array backing first, then update `array_ptr`
862 // (before potentially dropping the old slab via the assignment
863 // below) so the JIT never observes a stale pointer.
864 self.asize = new_asize as u64;
865 if new_asize <= INLINE_ASIZE as usize {
866 // Inline path — zero the inline buffer; drop any prior
867 // external slab.
868 for slot in self.inline_storage.iter_mut() {
869 *slot = 0;
870 }
871 self.array_ptr = self.inline_storage.as_mut_ptr() as *mut u8;
872 self.slab = Box::new([]);
873 } else {
874 // External slab — allocate, then re-point `array_ptr`.
875 self.slab = Self::alloc_slab(new_asize);
876 self.array_ptr = self.slab.as_mut_ptr() as *mut u8;
877 }
878
879 let hsize = if hash_entries == 0 {
880 0
881 } else {
882 hash_entries.next_power_of_two()
883 };
884 self.nodes = vec![Node::EMPTY; hsize].into_boxed_slice();
885 self.lastfree = hsize as u32;
886 // PUC `g->GCtotalbytes` analogue: credit (or debit) the box-size
887 // delta so `Heap.bytes` reflects this table's actual internal
888 // memory. `free_obj` subtracts `internal_bytes()` on the way out.
889 let after = self.internal_bytes();
890 heap.apply_bytes_delta(before, after);
891 // Re-insert old array entries via the public set_norm path
892 // (which handles rehashing if the new array shrinks below the
893 // entry count).
894 for (i, (tag, val)) in old_pairs.into_iter().enumerate() {
895 if tag != raw::NIL {
896 // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
897 let v = unsafe { Value::pack(tag, val) };
898 let _ = self.set_norm(heap, Value::Int(i as i64 + 1), v);
899 }
900 }
901 for n in old_nodes.iter() {
902 if !n.val.is_nil() {
903 let _ = self.set_norm(heap, n.key, n.val);
904 }
905 }
906 }
907
908 fn main_position(&self, k: Value) -> usize {
909 debug_assert!(!self.nodes.is_empty());
910 hash_key(k) as usize & (self.nodes.len() - 1)
911 }
912
913 // ---- length / iteration ----
914
915 /// A border: `n` where `t[n]` is non-nil and `t[n+1]` is nil (PUC `luaH_getn`).
916 /// This is Lua `#` semantics, not a container size — an `is_empty`
917 /// counterpart would be meaningless.
918 #[allow(clippy::len_without_is_empty)]
919 pub fn len(&self) -> i64 {
920 let asize = self.asize();
921 let atags = self.atags();
922 if asize > 0 && atags[asize - 1] == raw::NIL {
923 // binary search inside the array part
924 let (mut lo, mut hi) = (0usize, asize);
925 while hi - lo > 1 {
926 let m = lo + (hi - lo) / 2;
927 if atags[m - 1] == raw::NIL {
928 hi = m;
929 } else {
930 lo = m;
931 }
932 }
933 return lo as i64;
934 }
935 if self.nodes.is_empty() {
936 return asize as i64;
937 }
938 // array is full (or absent): unbound search through the hash part
939 let mut lo = asize as i64;
940 let mut hi = lo + 1;
941 while !self.get_int(hi).is_nil() {
942 lo = hi;
943 match hi.checked_mul(2) {
944 Some(n) => hi = n,
945 None => {
946 // pathological sparse keys (the doubling overflowed): scan
947 // linearly from 1 for the first border, as PUC's
948 // unbound_search does — finds a small border fast instead of
949 // returning the huge one.
950 let mut i = 1i64;
951 while !self.get_int(i).is_nil() {
952 i += 1;
953 }
954 return i - 1;
955 }
956 }
957 }
958 while hi - lo > 1 {
959 let m = lo + (hi - lo) / 2;
960 if self.get_int(m).is_nil() {
961 hi = m;
962 } else {
963 lo = m;
964 }
965 }
966 lo
967 }
968
969 /// Lua `next`: iterate array part then hash part.
970 pub fn next(&self, key: Value) -> Result<Option<(Value, Value)>, TableError> {
971 let start = match key {
972 Value::Nil => 0,
973 k => {
974 let k = match k {
975 Value::Float(f) => match f2i_exact(f) {
976 Some(i) => Value::Int(i),
977 None => k,
978 },
979 k => k,
980 };
981 if let Value::Int(i) = k
982 && i >= 1
983 && (i as u64) <= self.asize() as u64
984 {
985 i as usize
986 } else {
987 match self.find_node(k) {
988 Some(idx) => self.asize() + idx + 1,
989 None => return Err(TableError::InvalidNext),
990 }
991 }
992 }
993 };
994 let atags = self.atags();
995 for i in start..self.asize() {
996 if atags[i] != raw::NIL {
997 return Ok(Some((Value::Int(i as i64 + 1), self.aget(i))));
998 }
999 }
1000 let hstart = start.saturating_sub(self.asize());
1001 for (idx, n) in self.nodes.iter().enumerate().skip(hstart) {
1002 if !n.val.is_nil() {
1003 let _ = idx;
1004 return Ok(Some((n.key, n.val)));
1005 }
1006 }
1007 Ok(None)
1008 }
1009
1010 /// `(weak_keys, weak_values)` from the metatable's `__mode` field. Read by
1011 /// scanning the metatable for the `__mode` string (no interned key needed
1012 /// inside the collector).
1013 pub(crate) fn weak_mode(&self) -> (bool, bool) {
1014 let Some(mt) = self.metatable else {
1015 return (false, false);
1016 };
1017 for n in mt.nodes.iter() {
1018 if let (Value::Str(k), Value::Str(mode)) = (n.key, n.val)
1019 && k.as_bytes() == b"__mode"
1020 {
1021 let b = mode.as_bytes();
1022 return (b.contains(&b'k'), b.contains(&b'v'));
1023 }
1024 }
1025 (false, false)
1026 }
1027
1028 /// True when this table holds at least one direct reference (array slot,
1029 /// hash key, or hash value) to a coroutine whose mark bit is still clear.
1030 /// Used by the GC's cycle-finalize check (PUC 5.3 gc.lua :502) to detect
1031 /// the table ↔ thread reference cycle that needs an extra GC round before
1032 /// `__gc` runs. Tag-level scan avoids walking the full reference graph.
1033 pub(crate) fn refs_contain_unmarked_coro(&self) -> bool {
1034 use crate::runtime::heap::header_is_marked;
1035 let atags = self.atags();
1036 let avals = self.avals();
1037 for (i, &tag) in atags.iter().enumerate() {
1038 if tag == raw::CORO {
1039 // SAFETY: raw union access — the tag byte at the same index in `atags` was previously confirmed to be `co` (closure/object pointer) so the `co` variant of `RawVal` holds the valid payload.
1040 let p = unsafe { avals[i].co } as *mut crate::runtime::heap::GcHeader;
1041 if !header_is_marked(p) {
1042 return true;
1043 }
1044 }
1045 }
1046 for n in self.nodes.iter() {
1047 if let Value::Coro(co) = n.key {
1048 if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1049 return true;
1050 }
1051 }
1052 if let Value::Coro(co) = n.val {
1053 if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1054 return true;
1055 }
1056 }
1057 }
1058 false
1059 }
1060
1061 pub(crate) fn trace(&self, m: &mut Marker) {
1062 let (wk, wv) = self.weak_mode();
1063 if wk || wv {
1064 m.weak.push(self as *const Table as *mut Table);
1065 }
1066 // weak keys + strong values = an ephemeron table: its hash values are
1067 // marked only if the key proves reachable (deferred to the convergence
1068 // pass), not here. PUC 5.1 predates ephemerons — under `no_ephemeron`
1069 // a weak-key table marks its values strongly during this pass, which
1070 // is what gc.lua's "weak tables" section requires.
1071 let ephemeron = wk && !wv && !m.no_ephemeron;
1072 if ephemeron {
1073 m.ephemeron.push(self as *const Table as *mut Table);
1074 }
1075 // array keys are integers (never weakly collected); skip values only
1076 // when the table has weak values
1077 if !wv {
1078 let atags = self.atags();
1079 let avals = self.avals();
1080 for (i, &tag) in atags.iter().enumerate() {
1081 if raw::is_gc(tag) {
1082 // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1083 m.value(unsafe { Value::pack(tag, avals[i]) });
1084 }
1085 }
1086 }
1087 for n in self.nodes.iter() {
1088 if !wk {
1089 m.value(n.key);
1090 }
1091 // ephemeron hash values are deferred; otherwise mark strong values
1092 if !wv && !ephemeron {
1093 m.value(n.val);
1094 }
1095 }
1096 if let Some(mt) = self.metatable {
1097 m.value(Value::Table(mt));
1098 }
1099 }
1100
1101 /// Ephemeron pass: mark the value of every hash entry whose key is alive
1102 /// (`alive` decides — strong/marked keys, plus strings/numbers which are
1103 /// never weakly collected). Returns true if any value was newly marked, so
1104 /// the caller can iterate to a fixpoint (PUC `traverseephemeron`).
1105 pub(crate) fn converge_ephemeron(&self, alive: &dyn Fn(Value) -> bool, m: &mut Marker) -> bool {
1106 let mut changed = false;
1107 for n in self.nodes.iter() {
1108 if !n.val.is_nil() && alive(n.key) {
1109 changed |= m.value(n.val);
1110 }
1111 }
1112 changed
1113 }
1114
1115 /// Clear entries whose weak key/value did not survive marking. `is_dead`
1116 /// reports whether a GC value was left unmarked (about to be swept).
1117 /// Clear weak-table entries whose key/value no longer carries a live
1118 /// reference. `is_dead` is a **pure** check (no side effects); the GC
1119 /// uses `mark_string` to resurrect any string that's still reachable via
1120 /// a *surviving* entry — Lua manual §2.5.4 says strings in weak tables
1121 /// are not collected as long as their entry is, and PUC `iscleared`
1122 /// implements that by marking the string during the same scan.
1123 pub(crate) fn clear_weak(
1124 &mut self,
1125 wk: bool,
1126 wv: bool,
1127 is_dead: &dyn Fn(Value) -> bool,
1128 mark_string: &dyn Fn(Value),
1129 ) {
1130 if wv {
1131 let n = self.asize as usize;
1132 for i in 0..n {
1133 let tag = self.atags()[i];
1134 if raw::is_gc(tag) {
1135 // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1136 let v = unsafe { Value::pack(tag, self.avals()[i]) };
1137 if is_dead(v) {
1138 self.atags_mut()[i] = raw::NIL;
1139 self.avals_mut()[i] = RawVal::NIL;
1140 } else {
1141 mark_string(v);
1142 }
1143 }
1144 }
1145 }
1146 for n in self.nodes.iter_mut() {
1147 if n.val.is_nil() {
1148 continue;
1149 }
1150 let key_dead = wk && is_dead(n.key);
1151 let val_dead = wv && is_dead(n.val);
1152 if key_dead || val_dead {
1153 // entry removed. PUC `setdeadkey`: when the key was a
1154 // collectable, drop the Gc pointer so a later raw_eq cannot
1155 // spuriously match a new object that gets allocated at the
1156 // same freed address. Keep `next` so the chain back-links
1157 // through this node still reach downstream entries; the
1158 // `dead_key` flag tells `find_node` to skip the comparison
1159 // and `insert_new` to treat the slot as a free
1160 // main-position owner that may inherit the chain.
1161 n.val = Value::Nil;
1162 if matches!(
1163 n.key,
1164 Value::Table(_)
1165 | Value::Closure(_)
1166 | Value::Native(_)
1167 | Value::Coro(_)
1168 | Value::Userdata(_)
1169 | Value::Str(_)
1170 ) {
1171 n.key = Value::Nil;
1172 n.dead_key = true;
1173 }
1174 } else {
1175 // entry survives — resurrect any string reachable through it
1176 if wk {
1177 mark_string(n.key);
1178 }
1179 if wv {
1180 mark_string(n.val);
1181 }
1182 }
1183 }
1184 }
1185}
1186
1187// =====================================================================
1188// C3 — SoA + Robin Hood open-addressing hash part (Variant A).
1189//
1190// Parallel to the chain-walk path during Phase B+C+D transition: the
1191// chain `nodes` / `lastfree` is the authoritative read path until
1192// Phase E migrates `next()` and Phase 4 cuts over. These methods
1193// operate only on the `keys` / `vals` / `meta` / `tombstones` SoA
1194// arrays — chain state is never touched.
1195//
1196// Layout invariants the methods below maintain:
1197// - `keys.len() == vals.len() == meta.len()`, all power-of-two
1198// (or zero in the empty-stub state)
1199// - `meta[i] = meta_bits::EMPTY` iff slot i is free
1200// - tombstoned slots are scanned past by find but reused by insert
1201// - `tombstones` counts the meta slots with TOMBSTONE_BIT set
1202// - load factor (live + tombstone) / cap is kept ≤ 0.75 via
1203// `soa_grow_if_needed` (R-A1 mitigation — PSL bound 63)
1204// - rehash is REFUSED when `iter_depth > 0` (R-A3 mitigation —
1205// wired in Phase F; for Phase C the counter is always 0 so the
1206// refusal path is unreachable)
1207//
1208// Refs: `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1 (variant A),
1209// §6.2 Phase 1-3 (impl plan).
1210// =====================================================================
1211
1212/// C3 — initial SoA capacity when growing from empty. Power of two.
1213/// Picked at 4 so a 3-element table doesn't trigger an immediate
1214/// regrowth.
1215#[allow(dead_code)] // wired by Phase D/E/F integration
1216pub(crate) const SOA_INITIAL_CAP: usize = 4;
1217
1218/// C3 — high load-factor threshold (3/4). SoA grow trigger; matches
1219/// the RFC §5.1 recommendation. PSL_MAX is the u16 14-bit value so
1220/// long-tail PSL overruns are recoverable via grow-retry.
1221#[allow(dead_code)]
1222const SOA_LOAD_NUM: usize = 3;
1223#[allow(dead_code)]
1224const SOA_LOAD_DEN: usize = 4;
1225
1226/// C3 — tombstone density threshold (1/4). When tombstones/cap ≥ 25%
1227/// the next non-resize-triggering rehash compacts them.
1228#[allow(dead_code)]
1229const SOA_TOMB_NUM: usize = 1;
1230#[allow(dead_code)]
1231const SOA_TOMB_DEN: usize = 4;
1232
1233#[allow(dead_code)] // wired by Phase D/E/F integration into public set/get/next
1234impl Table {
1235 /// C3 — current SoA hash-part capacity in slots (0 = empty stub).
1236 #[inline]
1237 pub(crate) fn soa_cap(&self) -> usize {
1238 self.meta.len()
1239 }
1240
1241 /// C3 — count of live (occupied & not tombstone) SoA slots.
1242 /// O(n) — only used by the Phase G equivalence test path; the
1243 /// hot rehash trigger uses `live_estimate = cap*3/4 - tombstones`
1244 /// implicitly via `soa_grow_if_needed`.
1245 #[cfg(test)]
1246 pub(crate) fn soa_live_count(&self) -> usize {
1247 self.meta.iter().filter(|&&m| meta_bits::is_live(m)).count()
1248 }
1249
1250 /// C3 — count of occupied (live OR tombstoned) SoA slots; this is
1251 /// the value the load factor compares against `cap * 3/4`.
1252 #[inline]
1253 fn soa_occupied_count(&self) -> usize {
1254 // O(n) sweep — Phase C inserts the count on each call so the
1255 // worst case is bounded by per-insert amortised cost. A future
1256 // polish could maintain a counter incrementally; left as a
1257 // Phase H mini-bench-driven follow-up if PI sample shows it
1258 // contributes > 1 µs/cell.
1259 self.meta
1260 .iter()
1261 .filter(|&&m| meta_bits::is_occupied(m))
1262 .count()
1263 }
1264
1265 /// C3 — Robin Hood lookup. Returns the slot index of a *live*
1266 /// matching key, or None if absent. Walks past tombstones (they
1267 /// preserve probe chains). Returns None if the SoA cap is zero
1268 /// (Phase B stub state). Bound by `cap` probes; in practice
1269 /// expected ≤ 8 at load 0.75.
1270 pub(crate) fn soa_find_slot(&self, k: Value) -> Option<usize> {
1271 let cap = self.meta.len();
1272 if cap == 0 {
1273 return None;
1274 }
1275 let mask = cap - 1;
1276 let mut idx = (hash_key(k) as usize) & mask;
1277 // Walk until empty slot or wrap. The `steps <= cap` bound
1278 // is a safety net: a properly maintained Robin Hood table
1279 // with load < 1 always has at least one empty slot, so a
1280 // full wrap means table invariant violation.
1281 for _ in 0..cap {
1282 let m = self.meta[idx];
1283 if !meta_bits::is_occupied(m) {
1284 return None;
1285 }
1286 if !meta_bits::is_tombstone(m) && self.keys[idx].raw_eq(k) {
1287 return Some(idx);
1288 }
1289 idx = (idx + 1) & mask;
1290 }
1291 None
1292 }
1293
1294 /// C3 — Allocate fresh SoA arrays at `new_cap` (power of two) and
1295 /// re-insert every live entry from the old SoA arrays. Tombstones
1296 /// are dropped (count resets to 0). Used by `soa_grow_if_needed`
1297 /// (new_cap = max(SOA_INITIAL_CAP, 2*cap)) and by Phase D
1298 /// tombstone compaction (new_cap = cap).
1299 ///
1300 /// IMPORTANT: rehash MUST NOT fire while `iter_depth > 0`
1301 /// (R-A3) — wired in Phase F. Phase C callers all enter from
1302 /// non-iteration paths.
1303 fn soa_rehash_to(&mut self, heap: &mut Heap, new_cap: usize) -> Result<(), TableError> {
1304 debug_assert!(new_cap.is_power_of_two() && new_cap > 0);
1305 let before = self.internal_bytes();
1306 // Snapshot old live entries. This list is the canonical
1307 // "must be present after rehash" set; we restart from it on
1308 // any PSL-overflow retry.
1309 let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len());
1310 for i in 0..self.meta.len() {
1311 if meta_bits::is_live(self.meta[i]) {
1312 survivors.push((self.keys[i], self.vals[i]));
1313 }
1314 }
1315 // Install fresh empty arrays at `new_cap`. On PSL overflow
1316 // during the re-insert pass (extremely rare with the 14-bit
1317 // PSL budget — would need a pathological hash distribution),
1318 // double the cap and replay the original `survivors` list
1319 // from scratch. We don't try to salvage partial work — the
1320 // rare-path retry cost is bounded by O(n × max_doublings),
1321 // and max_doublings has a hard MAX_ASIZE ceiling.
1322 let mut cap = new_cap;
1323 loop {
1324 if cap > MAX_ASIZE {
1325 return Err(TableError::Overflow);
1326 }
1327 self.keys = vec![Value::Nil; cap].into_boxed_slice();
1328 self.vals = vec![Value::Nil; cap].into_boxed_slice();
1329 self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1330 self.tombstones = 0;
1331 let mut overflowed = false;
1332 for (k, v) in survivors.iter().copied() {
1333 if self.soa_place_known_absent(k, v).is_err() {
1334 overflowed = true;
1335 break;
1336 }
1337 }
1338 if !overflowed {
1339 break;
1340 }
1341 cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1342 }
1343 let after = self.internal_bytes();
1344 heap.apply_bytes_delta(before, after);
1345 Ok(())
1346 }
1347
1348 /// C3 — Raw rob-from-rich placement for a key known to be absent
1349 /// from the SoA arrays. Used by `soa_rehash_to` (re-insert pass)
1350 /// and by `soa_insert` (new-key path after the explicit
1351 /// soa_find_slot check). This routine does NOT auto-grow on a
1352 /// load-factor trigger (caller's responsibility), but DOES signal
1353 /// back to the caller via `Err(())` when the PSL bound of 63 is
1354 /// hit before finding an empty slot — Robin Hood's long-tail
1355 /// max-PSL exceeds the 6-bit storage budget at unfavourable hash
1356 /// distributions even under the nominal 0.75 load gate (RFC §6.3
1357 /// R-A1). The caller (`soa_insert`) handles by growing & retrying.
1358 ///
1359 /// On success returns the slot index where the new key landed
1360 /// (after any rob-from-rich shuffle, the original `k` value is at
1361 /// this returned index).
1362 fn soa_place_known_absent(&mut self, k: Value, v: Value) -> Result<usize, (Value, Value)> {
1363 let cap = self.meta.len();
1364 debug_assert!(cap > 0);
1365 let mask = cap - 1;
1366 let landing = (hash_key(k) as usize) & mask;
1367 let mut idx = landing;
1368 let mut cur_psl: u16 = 0;
1369 let mut cur_key = k;
1370 let mut cur_val = v;
1371 let mut placed_at: Option<usize> = None;
1372 for _ in 0..cap {
1373 let m = self.meta[idx];
1374 if !meta_bits::is_occupied(m) || meta_bits::is_tombstone(m) {
1375 if meta_bits::is_tombstone(m) {
1376 self.tombstones = self.tombstones.saturating_sub(1);
1377 }
1378 self.meta[idx] = meta_bits::pack(cur_psl, false);
1379 self.keys[idx] = cur_key;
1380 self.vals[idx] = cur_val;
1381 return Ok(placed_at.unwrap_or(idx));
1382 }
1383 let stored_psl = meta_bits::psl(m);
1384 if cur_psl > stored_psl {
1385 // Rob: swap cur into this slot, evict stored to continue.
1386 std::mem::swap(&mut cur_key, &mut self.keys[idx]);
1387 std::mem::swap(&mut cur_val, &mut self.vals[idx]);
1388 self.meta[idx] = meta_bits::pack(cur_psl, false);
1389 if placed_at.is_none() {
1390 placed_at = Some(idx);
1391 }
1392 cur_psl = stored_psl;
1393 }
1394 idx = (idx + 1) & mask;
1395 cur_psl = cur_psl.saturating_add(1);
1396 if cur_psl > meta_bits::PSL_MAX {
1397 // PSL exceeds the 14-bit storage budget — exceptionally
1398 // rare with 16384 max. Caller (soa_insert / rehash
1399 // outer loop) handles by growing & retrying. Partial
1400 // state: all entries are still in the table EXCEPT
1401 // `(cur_key, cur_val)` which is the latest homeless
1402 // evictee — return it so caller can re-issue.
1403 return Err((cur_key, cur_val));
1404 }
1405 }
1406 // Wrapped cap probes with no free slot — invariant violation
1407 // (load < 1 should guarantee at least one empty). Signal as
1408 // PSL-overflow equivalent so caller grows + retries.
1409 Err((cur_key, cur_val))
1410 }
1411
1412 /// C3 — Grow SoA capacity if the load factor is at or above the
1413 /// 0.75 trigger. Doubles cap; from empty grows to SOA_INITIAL_CAP.
1414 fn soa_grow_if_needed(&mut self, heap: &mut Heap) -> Result<(), TableError> {
1415 // Defer rehash when an iterator is in flight (R-A3). Wired
1416 // in Phase F; in Phase C iter_depth is always 0.
1417 if self.iter_depth > 0 {
1418 return Ok(());
1419 }
1420 let cap = self.meta.len();
1421 if cap == 0 {
1422 return self.soa_rehash_to(heap, SOA_INITIAL_CAP);
1423 }
1424 let occupied = self.soa_occupied_count();
1425 if occupied * SOA_LOAD_DEN >= cap * SOA_LOAD_NUM {
1426 let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1427 return self.soa_rehash_to(heap, new_cap);
1428 }
1429 // Tombstone compaction (same cap, drops tombstones).
1430 if self.tombstones as usize * SOA_TOMB_DEN >= cap * SOA_TOMB_NUM {
1431 return self.soa_rehash_to(heap, cap);
1432 }
1433 Ok(())
1434 }
1435
1436 /// C3 — Insert (or update) `(k, v)` in the SoA hash part. Routes
1437 /// through `soa_find_slot` first so an existing key updates its
1438 /// val in place; otherwise rob-from-rich places a new entry.
1439 /// Auto-rehashes if the load factor would exceed 0.75 OR if the
1440 /// place chain runs into a PSL overflow on a pathological hash
1441 /// distribution.
1442 ///
1443 /// Phase C: this method is callable from outside via the
1444 /// equivalence-test entrypoint (Phase G); not yet hooked into
1445 /// public `set` / `set_norm`.
1446 pub(crate) fn soa_insert(
1447 &mut self,
1448 heap: &mut Heap,
1449 k: Value,
1450 v: Value,
1451 ) -> Result<(), TableError> {
1452 debug_assert!(!matches!(k, Value::Nil));
1453 // 1. Update-in-place if key is already present (live slot).
1454 if let Some(idx) = self.soa_find_slot(k) {
1455 self.vals[idx] = v;
1456 return Ok(());
1457 }
1458 // 2. New key: ensure capacity, then place. On PSL-overflow
1459 // from the place chain (extremely rare with 14-bit PSL budget),
1460 // grow + rehash with the homeless evictee merged in.
1461 // `soa_rehash_with_extra` handles further retries internally,
1462 // bounded by MAX_ASIZE.
1463 self.soa_grow_if_needed(heap)?;
1464 match self.soa_place_known_absent(k, v) {
1465 Ok(_) => Ok(()),
1466 Err(homeless) => {
1467 let cap = self.meta.len();
1468 let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1469 self.soa_rehash_with_extra(heap, new_cap, homeless)
1470 }
1471 }
1472 }
1473
1474 /// C3 — Rehash to `new_cap` while merging in an extra (k, v) pair
1475 /// not currently in the SoA arrays. Used by `soa_insert` to
1476 /// recover from PSL overflow: the homeless evictee from the failed
1477 /// place chain gets appended to the survivor list before the
1478 /// re-insert pass.
1479 fn soa_rehash_with_extra(
1480 &mut self,
1481 heap: &mut Heap,
1482 new_cap: usize,
1483 extra: (Value, Value),
1484 ) -> Result<(), TableError> {
1485 let before = self.internal_bytes();
1486 let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len() + 1);
1487 for i in 0..self.meta.len() {
1488 if meta_bits::is_live(self.meta[i]) {
1489 survivors.push((self.keys[i], self.vals[i]));
1490 }
1491 }
1492 // Avoid duplicating the extra if its key was already placed at
1493 // some slot during the failed rob chain (the rob may have
1494 // landed the original input into a slot before overflowing on
1495 // a downstream evictee — that case the meta-walk above picks
1496 // it up).
1497 if !survivors.iter().any(|(k, _)| k.raw_eq(extra.0)) {
1498 survivors.push(extra);
1499 }
1500 let mut cap = new_cap;
1501 loop {
1502 if cap > MAX_ASIZE {
1503 return Err(TableError::Overflow);
1504 }
1505 self.keys = vec![Value::Nil; cap].into_boxed_slice();
1506 self.vals = vec![Value::Nil; cap].into_boxed_slice();
1507 self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1508 self.tombstones = 0;
1509 let mut overflowed = false;
1510 for (k, v) in survivors.iter().copied() {
1511 if self.soa_place_known_absent(k, v).is_err() {
1512 overflowed = true;
1513 break;
1514 }
1515 }
1516 if !overflowed {
1517 break;
1518 }
1519 cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1520 }
1521 let after = self.internal_bytes();
1522 heap.apply_bytes_delta(before, after);
1523 Ok(())
1524 }
1525
1526 /// C3 — Read SoA hash part. Mirrors `get_hash` but reads from
1527 /// keys/vals/meta rather than nodes. Used by the Phase G
1528 /// equivalence test; not yet hooked into public `get` / `get_hash`.
1529 pub(crate) fn soa_get(&self, k: Value) -> Value {
1530 match self.soa_find_slot(k) {
1531 Some(idx) => self.vals[idx],
1532 None => Value::Nil,
1533 }
1534 }
1535
1536 /// C3 — Tombstone deletion. Marks the live slot for `k` as
1537 /// tombstoned, preserving the slot index (no backward shift).
1538 /// Slot-index stability is the PUC `next()` iteration invariant
1539 /// — `nextvar.lua:520-521` requires that deleting prior keys
1540 /// during a `pairs` traversal does NOT move unvisited keys.
1541 /// Backward-shift deletion would violate this; tombstones are
1542 /// the standard Robin Hood resolution (see RFC §4.5 + §5.1).
1543 ///
1544 /// keys[idx] / vals[idx] are reset to Nil so the GC marker is
1545 /// not held to the previous entries — only the tombstone bit
1546 /// distinguishes "occupied tombstone" from "free empty".
1547 ///
1548 /// Returns true if the key was found and deleted, false if absent.
1549 ///
1550 /// Phase D: not yet hooked into public `set(k, Nil)` — wired in
1551 /// Phase E alongside the `next()` migration.
1552 pub(crate) fn soa_delete(&mut self, k: Value) -> bool {
1553 if let Some(idx) = self.soa_find_slot(k) {
1554 let psl = meta_bits::psl(self.meta[idx]);
1555 self.meta[idx] = meta_bits::pack(psl, true);
1556 self.keys[idx] = Value::Nil;
1557 self.vals[idx] = Value::Nil;
1558 self.tombstones = self.tombstones.saturating_add(1);
1559 true
1560 } else {
1561 false
1562 }
1563 }
1564}
1565
1566fn normalize_set_key(key: Value) -> Result<Value, TableError> {
1567 match key {
1568 Value::Nil => Err(TableError::NilIndex),
1569 Value::Float(f) => match f2i_exact(f) {
1570 Some(i) => Ok(Value::Int(i)),
1571 None if f.is_nan() => Err(TableError::NanIndex),
1572 None => Ok(key),
1573 },
1574 k => Ok(k),
1575 }
1576}
1577
1578fn hash_key(k: Value) -> u64 {
1579 match k {
1580 Value::Int(i) => i as u64, // identity mod size (PUC hashint)
1581 Value::Float(f) => mix64(f.to_bits()),
1582 Value::Bool(b) => b as u64 + 1,
1583 Value::Str(s) => s.hash() as u64,
1584 Value::Table(t) => mix64(t.as_ptr() as u64),
1585 Value::Closure(c) => mix64(c.as_ptr() as u64),
1586 Value::Native(n) => mix64(n.as_ptr() as u64),
1587 Value::Coro(co) => mix64(co.as_ptr() as u64),
1588 Value::Userdata(u) => mix64(u.as_ptr() as u64),
1589 Value::LightUserdata(p) => mix64(p as u64),
1590 Value::Nil => 0, // unreachable as a stored key
1591 }
1592}
1593
1594/// splitmix64 finalizer.
1595fn mix64(mut x: u64) -> u64 {
1596 x ^= x >> 30;
1597 x = x.wrapping_mul(0xbf58_476d_1ce4_e5b9);
1598 x ^= x >> 27;
1599 x = x.wrapping_mul(0x94d0_49bb_1331_11eb);
1600 x ^ (x >> 31)
1601}
1602
1603/// For k ≥ 1: the bucket l such that k ∈ (2^(l-1), 2^l].
1604fn ceil_log2(k: u64) -> usize {
1605 (u64::BITS - (k - 1).leading_zeros()) as usize
1606}
1607
1608impl Table {
1609 /// Preallocate the array part (table.create); existing contents are
1610 /// preserved.
1611 pub fn ensure_array(&mut self, heap: &mut Heap, n: usize) {
1612 if n > self.asize() {
1613 let hash_entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1614 self.resize(heap, n, hash_entries);
1615 }
1616 }
1617}
1618
1619impl Table {
1620 /// Preallocate hash-part capacity (table.create's second size).
1621 pub fn ensure_hash(&mut self, heap: &mut Heap, n: usize) {
1622 let entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1623 if n > self.nodes.len() {
1624 self.resize(heap, self.asize(), n.max(entries));
1625 }
1626 }
1627}
1628
1629#[cfg(test)]
1630mod tests {
1631 use super::*;
1632 use crate::runtime::heap::Heap;
1633
1634 fn with_table(f: impl FnOnce(&mut Heap, &mut Table)) {
1635 let mut heap = Heap::new();
1636 let t = heap.new_table();
1637 f(&mut heap, unsafe { t.as_mut() });
1638 }
1639
1640 fn assert_is_border(t: &Table, n: i64) {
1641 if n == 0 {
1642 assert!(t.get_int(1).is_nil(), "border 0 but t[1] non-nil");
1643 } else {
1644 assert!(!t.get_int(n).is_nil(), "border {n} but t[{n}] is nil");
1645 assert!(
1646 t.get_int(n + 1).is_nil(),
1647 "border {n} but t[{}] non-nil",
1648 n + 1
1649 );
1650 }
1651 }
1652
1653 /// v2.1 Phase 1I.B — pin `Box<[Node]>` fat-ptr layout at runtime.
1654 /// The luna-jit table-field IC reads `(ptr, len)` directly out of
1655 /// the `nodes` field assuming the data pointer occupies the low 8
1656 /// bytes and the length the high 8 bytes (de-facto Rust ABI on
1657 /// 64-bit targets but not formally guaranteed). If a future Rust
1658 /// release reorders the fat-ptr, this test fails before IC fires
1659 /// at runtime.
1660 #[test]
1661 #[allow(clippy::assertions_on_constants)]
1662 #[cfg(target_pointer_width = "64")]
1663 fn phase_1i_b_node_layout_pinned() {
1664 use jit_layout::*;
1665 assert_eq!(std::mem::size_of::<Box<[Node]>>(), 16);
1666 assert_eq!(NODE_KEY_OFFSET, 0);
1667 assert_eq!(NODE_VAL_OFFSET, 16);
1668 assert!(SIZEOF_NODE >= 32);
1669
1670 // Construct a real Box<[Node]> with a known length, then
1671 // peek at the fat-pointer's two halves to confirm the
1672 // (data_ptr, len) order. Use a 4-slot box so the length is
1673 // non-zero and the data pointer is heap-allocated.
1674 let b: Box<[Node]> = vec![Node::EMPTY; 4].into_boxed_slice();
1675 let raw_ptr = b.as_ptr();
1676 let raw_len = b.len();
1677 // SAFETY: reading the fat pointer's two words is exactly the
1678 // layout luna-jit's IR assumes; it's the safest possible test
1679 // of that assumption.
1680 let words: [usize; 2] = unsafe { std::mem::transmute_copy(&b) };
1681 assert_eq!(words[0], raw_ptr as usize, "fat-ptr low word = data ptr");
1682 assert_eq!(words[1], raw_len, "fat-ptr high word = len");
1683 drop(b);
1684 }
1685
1686 #[test]
1687 fn sequence_grows_into_array() {
1688 with_table(|heap, t| {
1689 for i in 1..=1000 {
1690 let _ = t.set_int(heap, i, Value::Int(i * 10));
1691 }
1692 for i in 1..=1000 {
1693 assert!(t.get_int(i).raw_eq(Value::Int(i * 10)));
1694 }
1695 assert_eq!(t.len(), 1000);
1696 });
1697 }
1698
1699 #[test]
1700 fn string_and_mixed_keys() {
1701 with_table(|heap, t| {
1702 let k1 = Value::Str(heap.intern(b"alpha"));
1703 let k2 = Value::Str(heap.intern(b"beta"));
1704 t.set(heap, k1, Value::Int(1)).unwrap();
1705 t.set(heap, k2, Value::Int(2)).unwrap();
1706 t.set(heap, Value::Bool(true), Value::Int(3)).unwrap();
1707 t.set(heap, Value::Int(-5), Value::Int(4)).unwrap();
1708 // re-interned key reaches the same slot
1709 let k1b = Value::Str(heap.intern(b"alpha"));
1710 assert!(t.get(k1b).raw_eq(Value::Int(1)));
1711 assert!(t.get(k2).raw_eq(Value::Int(2)));
1712 assert!(t.get(Value::Bool(true)).raw_eq(Value::Int(3)));
1713 assert!(t.get(Value::Int(-5)).raw_eq(Value::Int(4)));
1714 assert!(t.get(Value::Str(heap.intern(b"gamma"))).is_nil());
1715 });
1716 }
1717
1718 #[test]
1719 fn float_keys_normalize_to_int() {
1720 with_table(|heap, t| {
1721 t.set(heap, Value::Float(2.0), Value::Int(22)).unwrap();
1722 assert!(t.get(Value::Int(2)).raw_eq(Value::Int(22)));
1723 t.set(heap, Value::Int(3), Value::Int(33)).unwrap();
1724 assert!(t.get(Value::Float(3.0)).raw_eq(Value::Int(33)));
1725 // -0.0 is key 0
1726 t.set(heap, Value::Float(-0.0), Value::Int(0)).unwrap();
1727 assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1728 // non-integral floats are their own keys
1729 t.set(heap, Value::Float(0.5), Value::Int(55)).unwrap();
1730 assert!(t.get(Value::Float(0.5)).raw_eq(Value::Int(55)));
1731 assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1732 });
1733 }
1734
1735 #[test]
1736 fn bad_keys() {
1737 with_table(|heap, t| {
1738 assert_eq!(
1739 t.set(heap, Value::Nil, Value::Int(1)),
1740 Err(TableError::NilIndex)
1741 );
1742 assert_eq!(
1743 t.set(heap, Value::Float(f64::NAN), Value::Int(1)),
1744 Err(TableError::NanIndex)
1745 );
1746 // reads with bad keys are nil, not errors
1747 assert!(t.get(Value::Nil).is_nil());
1748 assert!(t.get(Value::Float(f64::NAN)).is_nil());
1749 });
1750 }
1751
1752 #[test]
1753 fn delete_and_reinsert() {
1754 with_table(|heap, t| {
1755 let k = Value::Str(heap.intern(b"k"));
1756 t.set(heap, k, Value::Int(1)).unwrap();
1757 t.set(heap, k, Value::Nil).unwrap();
1758 assert!(t.get(k).is_nil());
1759 t.set(heap, k, Value::Int(2)).unwrap();
1760 assert!(t.get(k).raw_eq(Value::Int(2)));
1761 // setting an absent key to nil stays absent
1762 let k2 = Value::Str(heap.intern(b"k2"));
1763 t.set(heap, k2, Value::Nil).unwrap();
1764 assert!(t.get(k2).is_nil());
1765 });
1766 }
1767
1768 #[test]
1769 fn borders_with_holes() {
1770 with_table(|heap, t| {
1771 let _ = t.set_int(heap, 1, Value::Int(1));
1772 let _ = t.set_int(heap, 2, Value::Int(2));
1773 assert_eq!(t.len(), 2);
1774 t.set_int(heap, 2, Value::Nil).unwrap();
1775 assert_is_border(t, t.len());
1776 // hash-resident tail
1777 let _ = t.set_int(heap, 1_000_000, Value::Int(1));
1778 assert_is_border(t, t.len());
1779 });
1780 }
1781
1782 #[test]
1783 fn len_on_empty_and_hash_only() {
1784 with_table(|heap, t| {
1785 assert_eq!(t.len(), 0);
1786 let xk = Value::Str(heap.intern(b"x"));
1787 t.set(heap, xk, Value::Int(1)).unwrap();
1788 assert_eq!(t.len(), 0);
1789 });
1790 }
1791
1792 #[test]
1793 fn next_iterates_everything_exactly_once() {
1794 with_table(|heap, t| {
1795 let mut expected = 0i64;
1796 for i in 1..=64 {
1797 let _ = t.set_int(heap, i, Value::Int(i));
1798 expected += i;
1799 }
1800 for i in 0..32 {
1801 let k = Value::Str(heap.intern(format!("s{i}").as_bytes()));
1802 t.set(heap, k, Value::Int(1000 + i)).unwrap();
1803 expected += 1000 + i;
1804 }
1805 t.set(heap, Value::Float(2.5), Value::Int(7)).unwrap();
1806 expected += 7;
1807
1808 let mut sum = 0i64;
1809 let mut count = 0;
1810 let mut key = Value::Nil;
1811 while let Some((k, v)) = t.next(key).unwrap() {
1812 let Value::Int(x) = v else {
1813 panic!("bad value")
1814 };
1815 sum += x;
1816 count += 1;
1817 key = k;
1818 }
1819 assert_eq!(count, 64 + 32 + 1);
1820 assert_eq!(sum, expected);
1821 });
1822 }
1823
1824 #[test]
1825 fn next_skips_nil_values_and_rejects_alien_keys() {
1826 with_table(|heap, t| {
1827 let _ = t.set_int(heap, 1, Value::Int(1));
1828 let _ = t.set_int(heap, 3, Value::Int(3));
1829 let k = Value::Str(heap.intern(b"gone"));
1830 t.set(heap, k, Value::Int(9)).unwrap();
1831 t.set(heap, k, Value::Nil).unwrap();
1832 let mut seen = Vec::new();
1833 let mut key = Value::Nil;
1834 while let Some((k, v)) = t.next(key).unwrap() {
1835 let Value::Int(x) = v else { panic!() };
1836 seen.push(x);
1837 key = k;
1838 }
1839 assert_eq!(seen, vec![1, 3]);
1840 // a key never inserted is invalid for next
1841 let alien = Value::Str(heap.intern(b"never"));
1842 assert!(matches!(t.next(alien), Err(TableError::InvalidNext)));
1843 // ...but a deleted (nil-valued) key is still a valid cursor
1844 assert!(t.next(k).is_ok());
1845 });
1846 }
1847
1848 #[test]
1849 fn collision_relocation_keeps_chains_intact() {
1850 with_table(|heap, t| {
1851 // dense negative ints all land in the hash part; with identity
1852 // hashing they exercise both chain cases heavily
1853 for i in 0..512 {
1854 let _ = t.set_int(heap, -i, Value::Int(i));
1855 }
1856 for i in 0..512 {
1857 assert!(t.get_int(-i).raw_eq(Value::Int(i)), "lost key {}", -i);
1858 }
1859 });
1860 }
1861
1862 // -----------------------------------------------------------------
1863 // C3 SoA Robin Hood equivalence tests (Phase G).
1864 //
1865 // Cross-check the new SoA + RH path against the existing chain-walk
1866 // path: replay the same insert/lookup sequence on a table via
1867 // `set` (chain) and another via `soa_insert` (SoA), then assert
1868 // `get == soa_get` for every key.
1869 //
1870 // Phase B+C scope: insert + read only. Tombstone delete equivalence
1871 // arrives with Phase D.
1872 // -----------------------------------------------------------------
1873
1874 fn replay_chain(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
1875 let t = heap.new_table();
1876 let tref = unsafe { t.as_mut() };
1877 for (k, v) in ops.iter().copied() {
1878 tref.set(heap, k, v).unwrap();
1879 }
1880 t.as_ptr()
1881 }
1882
1883 fn replay_soa(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
1884 let t = heap.new_table();
1885 let tref = unsafe { t.as_mut() };
1886 for (k, v) in ops.iter().copied() {
1887 tref.soa_insert(heap, k, v).unwrap();
1888 }
1889 t.as_ptr()
1890 }
1891
1892 #[test]
1893 fn c3_soa_equivalence_string_keys() {
1894 let mut heap = Heap::new();
1895 let mut ops = Vec::new();
1896 for i in 0..40 {
1897 let k = Value::Str(heap.intern(format!("key_{i:03}").as_bytes()));
1898 ops.push((k, Value::Int(i * 7)));
1899 }
1900 let chain = unsafe { &*replay_chain(&mut heap, &ops) };
1901 let soa = unsafe { &*replay_soa(&mut heap, &ops) };
1902 for (k, _) in &ops {
1903 let cv = chain.get(*k);
1904 let sv = soa.soa_get(*k);
1905 assert!(
1906 cv.raw_eq(sv),
1907 "SoA vs chain mismatch on key — chain={:?} soa={:?}",
1908 cv,
1909 sv,
1910 );
1911 }
1912 // Absent key returns nil from both paths.
1913 let absent = Value::Str(heap.intern(b"never"));
1914 assert!(chain.get(absent).is_nil());
1915 assert!(soa.soa_get(absent).is_nil());
1916 }
1917
1918 #[test]
1919 fn c3_soa_equivalence_negative_int_keys() {
1920 // Dense negative ints with identity hashing — same collision
1921 // profile as the existing `collision_relocation_keeps_chains_intact`
1922 // test, but verified through the SoA RH path. Triggers
1923 // rob-from-rich repeatedly.
1924 let mut heap = Heap::new();
1925 let mut ops = Vec::new();
1926 for i in 0..256 {
1927 let k = Value::Int(-i);
1928 ops.push((k, Value::Int(i)));
1929 }
1930 let chain = unsafe { &*replay_chain(&mut heap, &ops) };
1931 let soa = unsafe { &*replay_soa(&mut heap, &ops) };
1932 for (k, _) in &ops {
1933 let cv = chain.get(*k);
1934 let sv = soa.soa_get(*k);
1935 assert!(cv.raw_eq(sv), "SoA mismatch on key {:?}", k);
1936 }
1937 }
1938
1939 #[test]
1940 fn c3_soa_equivalence_mixed_keys_with_updates() {
1941 // Insert, then update the same keys with new values — exercises
1942 // the soa_find_slot in-place update branch.
1943 let mut heap = Heap::new();
1944 let kstr = Value::Str(heap.intern(b"x"));
1945 let kint = Value::Int(42);
1946 let kbool = Value::Bool(true);
1947 let ops: Vec<(Value, Value)> = vec![
1948 (kstr, Value::Int(1)),
1949 (kint, Value::Int(2)),
1950 (kbool, Value::Int(3)),
1951 (kstr, Value::Int(11)), // update
1952 (kint, Value::Int(22)), // update
1953 (kbool, Value::Int(33)), // update
1954 ];
1955 let chain = unsafe { &*replay_chain(&mut heap, &ops) };
1956 let soa = unsafe { &*replay_soa(&mut heap, &ops) };
1957 for k in [kstr, kint, kbool] {
1958 assert!(chain.get(k).raw_eq(soa.soa_get(k)));
1959 }
1960 }
1961
1962 #[test]
1963 fn c3_soa_equivalence_delete_then_read() {
1964 // Phase D: tombstone delete + read on both paths, verify
1965 // matching nil-for-deleted, original-val-for-live.
1966 let mut heap = Heap::new();
1967 let mut ops_insert = Vec::new();
1968 for i in 0..30 {
1969 let k = Value::Str(heap.intern(format!("d_key_{i:03}").as_bytes()));
1970 ops_insert.push((k, Value::Int(i * 11)));
1971 }
1972 let chain = unsafe { &mut *replay_chain(&mut heap, &ops_insert) };
1973 let soa = unsafe { &mut *replay_soa(&mut heap, &ops_insert) };
1974 // Delete every 3rd key.
1975 let mut deleted: Vec<Value> = Vec::new();
1976 for (i, (k, _)) in ops_insert.iter().enumerate() {
1977 if i % 3 == 0 {
1978 // chain: set to Nil is the chain-path's delete equivalent
1979 chain.set(&mut heap, *k, Value::Nil).unwrap();
1980 let was_present = soa.soa_delete(*k);
1981 assert!(was_present, "soa_delete miss on inserted key {:?}", k);
1982 deleted.push(*k);
1983 }
1984 }
1985 // Read each key: deleted → nil, non-deleted → original val.
1986 for (k, v) in &ops_insert {
1987 let cv = chain.get(*k);
1988 let sv = soa.soa_get(*k);
1989 assert!(
1990 cv.raw_eq(sv),
1991 "delete/read mismatch on key {:?} — chain={:?} soa={:?}",
1992 k,
1993 cv,
1994 sv,
1995 );
1996 if deleted.iter().any(|d| d.raw_eq(*k)) {
1997 assert!(cv.is_nil(), "deleted key {:?} chain non-nil", k);
1998 assert!(sv.is_nil(), "deleted key {:?} soa non-nil", k);
1999 } else {
2000 assert!(cv.raw_eq(*v), "live key {:?} chain val drift", k);
2001 }
2002 }
2003 // Deleting an absent key is a no-op (returns false) on SoA.
2004 let absent = Value::Str(heap.intern(b"never_d"));
2005 assert!(!soa.soa_delete(absent));
2006 }
2007
2008 #[test]
2009 fn c3_soa_delete_then_reinsert_uses_tombstone() {
2010 // After delete + reinsert, key is findable with new val. The
2011 // SoA path may reuse the tombstoned slot (preferred) or place
2012 // elsewhere — either is correct as long as soa_get returns
2013 // the new val.
2014 let mut heap = Heap::new();
2015 let t = heap.new_table();
2016 let tref = unsafe { t.as_mut() };
2017 let k = Value::Str(heap.intern(b"reinsert_target"));
2018 tref.soa_insert(&mut heap, k, Value::Int(100)).unwrap();
2019 assert!(tref.soa_get(k).raw_eq(Value::Int(100)));
2020 let pre_tombs = tref.tombstones;
2021 assert!(tref.soa_delete(k));
2022 assert!(tref.tombstones == pre_tombs + 1);
2023 assert!(tref.soa_get(k).is_nil());
2024 // Reinsert with new val.
2025 tref.soa_insert(&mut heap, k, Value::Int(200)).unwrap();
2026 assert!(tref.soa_get(k).raw_eq(Value::Int(200)));
2027 // Tombstone reused — count back to pre_tombs.
2028 assert_eq!(tref.tombstones, pre_tombs);
2029 }
2030
2031 #[test]
2032 fn c3_soa_grows_under_load_pressure() {
2033 // Stress test: insert enough entries to trigger multiple RH
2034 // rehashes (cap doubles at load 0.75). Confirms PSL overflow
2035 // never fires and all keys survive grow cycles.
2036 let mut heap = Heap::new();
2037 let t = heap.new_table();
2038 let tref = unsafe { t.as_mut() };
2039 for i in 0..1024 {
2040 let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2041 tref.soa_insert(&mut heap, k, Value::Int(i)).unwrap();
2042 }
2043 // Verify every key is findable.
2044 for i in 0..1024 {
2045 let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2046 let v = tref.soa_get(k);
2047 assert!(
2048 v.raw_eq(Value::Int(i)),
2049 "SoA lost key entry_{:05} — got {:?}",
2050 i,
2051 v,
2052 );
2053 }
2054 assert!(tref.soa_live_count() == 1024);
2055 // Cap should have grown past the initial SOA_INITIAL_CAP via
2056 // the 0.75 load-factor trigger.
2057 assert!(
2058 tref.soa_cap() >= 2048,
2059 "SoA cap = {} after 1024 inserts — load gate didn't grow",
2060 tref.soa_cap(),
2061 );
2062 }
2063
2064 #[test]
2065 fn rehash_redistributes_into_array() {
2066 with_table(|heap, t| {
2067 // insert 1..n in reverse: starts in hash, rehash must migrate
2068 for i in (1..=256).rev() {
2069 let _ = t.set_int(heap, i, Value::Int(i));
2070 }
2071 assert_eq!(t.len(), 256);
2072 for i in 1..=256 {
2073 assert!(t.get_int(i).raw_eq(Value::Int(i)));
2074 }
2075 });
2076 }
2077}