luna_core/runtime/table.rs
1//! Lua table: hybrid array + hash.
2//!
3//! Array part uses split tag/payload storage (9 bytes/slot — the Lua 5.5
4//! "compact arrays" layout, bench-validated in benches/value_repr.rs).
5//! Hash part is the PUC node layout: main-position chaining with relocation
6//! (Brent's variation), capacity a power of two, rehash sizing per
7//! luaH_rehash/computesizes.
8
9use crate::runtime::heap::{Gc, GcHeader, Heap, Marker};
10use crate::runtime::value::{RawVal, Value, f2i_exact, raw};
11
12/// Errors that table mutation can raise back to the interpreter.
13#[derive(Clone, Copy, PartialEq, Eq, Debug)]
14pub enum TableError {
15 /// `t[nil] = …` — `nil` is forbidden as a key.
16 NilIndex,
17 /// `t[0/0] = …` — NaN floats are forbidden as keys.
18 NanIndex,
19 /// `next` called with a key not present in the table.
20 InvalidNext,
21 /// PUC `luaH_resizearray` — the array part would have to grow past
22 /// `MAXASIZE`, or the hash part past `MAXHBITS`. Raised back as
23 /// "table overflow" so a runaway `a[i] = i` loop walls within budget
24 /// (5.5/5.4 heavy.lua's `toomanyidx` pcalls exactly this scenario).
25 Overflow,
26}
27
28/// PUC `MAXASIZE` analogue: the highest power of two an array part may
29/// grow to. Choose a cap that comfortably fits in the gate's 60-second
30/// budget (each grow is O(n), so 2^27 entries × 16 bytes ≈ 2 GB is the
31/// effective ceiling). Beyond this `rehash` returns `TableError::Overflow`.
32pub(crate) const MAX_ASIZE: usize = 1 << 27;
33
34/// v2.1 Phase 1I.B — JIT layout constants for table-field IC.
35///
36/// luna-jit's trace lowerer needs to emit direct loads against
37/// `Table.nodes` (the hash part) without paying the helper-call ABI
38/// for each `Op::GetField` / `Op::SetField`. These constants expose
39/// the field offsets so the cranelift IR can be parameterised at
40/// compile time. The `Node` struct itself remains `pub(crate)` — only
41/// the offsets cross the crate boundary.
42///
43/// Layout assumptions:
44/// - `Box<[Node]>` is a fat pointer `(data_ptr, len)` on 64-bit
45/// targets (16 bytes total). The data pointer occupies the low 8
46/// bytes, length the high 8. This is the de-facto Rust ABI for
47/// `Box<[T]>` / `&[T]` but isn't formally guaranteed; the unit
48/// test `phase_1i_b_node_layout_pinned` and the const assertion
49/// on `size_of::<Box<[Node]>>()` catch drift.
50/// - `Value` is `#[repr(C, u8)]` so the discriminant byte sits at
51/// offset 0 and the payload starts at offset 8 (after 7 bytes of
52/// alignment padding). Total size 16 bytes per the existing
53/// `value_is_16_bytes` test in `runtime/value.rs`.
54/// - `Node` is `#[derive(Clone, Copy)]` with field order
55/// `(key: Value, val: Value, next: i32, dead_key: bool)`, so
56/// `key` lives at offset 0 and `val` at offset 16. The trailing
57/// `next + dead_key` fields are not read by the IC.
58pub mod jit_layout {
59 use super::Node;
60 use crate::runtime::Table;
61
62 /// Byte offset of the `nodes: Box<[Node]>` field within `Table`.
63 /// The fat-ptr low word (data ptr) lives at this offset; the
64 /// high word (length) at `TABLE_NODES_OFFSET + 8`. luna-jit
65 /// adds `TABLE_NODES_PTR_OFFSET` / `TABLE_NODES_LEN_OFFSET`
66 /// constants in `jit_backend/mod.rs` to express that split.
67 pub const TABLE_NODES_OFFSET: usize = std::mem::offset_of!(Table, nodes);
68
69 /// Byte offset of `key: Value` within `Node` (= 0).
70 pub const NODE_KEY_OFFSET: usize = std::mem::offset_of!(Node, key);
71
72 /// Byte offset of `val: Value` within `Node` (= 16 — `key` is 16-byte
73 /// `Value`, no inner padding).
74 pub const NODE_VAL_OFFSET: usize = std::mem::offset_of!(Node, val);
75
76 /// Total `Node` size in bytes (= 40 on 64-bit). Used as the stride
77 /// in `node_addr = nodes_ptr + slot_idx * SIZEOF_NODE`.
78 pub const SIZEOF_NODE: usize = std::mem::size_of::<Node>();
79
80 /// Static guard: pin the assumptions luna-jit relies on at compile
81 /// time. Layout drift here breaks IR emit, so trap it at compile
82 /// time rather than at trace-fire time.
83 ///
84 /// `Box<[T]>` is a fat pointer of `2 * usize` — 16 bytes on 64-bit
85 /// targets, 8 bytes on 32-bit (e.g. `wasm32`). Use a width-aware
86 /// expected size so the wasm32-unknown-unknown CI build does not
87 /// trip the assertion. The runtime layout still matters for luna-jit
88 /// IR emit on 64-bit hosts (the only platforms where Cranelift JIT
89 /// runs); the 32-bit branch documents the size in passing.
90 const _: () = {
91 assert!(std::mem::size_of::<Box<[Node]>>() == 2 * std::mem::size_of::<usize>());
92 assert!(NODE_KEY_OFFSET == 0);
93 assert!(NODE_VAL_OFFSET == 16);
94 assert!(SIZEOF_NODE >= 32);
95 };
96}
97
98#[derive(Clone, Copy)]
99pub(crate) struct Node {
100 key: Value,
101 val: Value,
102 /// absolute index of the next node in this chain, or NONE
103 next: i32,
104 /// PUC `setdeadkey` analogue: the key was a collectable that got swept
105 /// out of a weak table. The Gc pointer in `key` is now dangling — its
106 /// memory may have been reused for a new allocation with potentially
107 /// equal content. Marking the node "dead-key" lets `find_node` skip the
108 /// raw_eq probe (which could spuriously match a reallocated object) and
109 /// `insert_new` treat the slot as available for a fresh main-position
110 /// owner while leaving chain back-links intact for traversal.
111 dead_key: bool,
112}
113
114const NONE: i32 = -1;
115
116impl Node {
117 const EMPTY: Node = Node {
118 key: Value::Nil,
119 val: Value::Nil,
120 next: NONE,
121 dead_key: false,
122 };
123}
124
125/// C3 — SoA Robin Hood meta-word layout (Variant A, see
126/// `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1).
127///
128/// Each `meta[idx]` slot encodes the open-addressing slot state in a
129/// single u16:
130/// - bit 15 (`OCCUPIED_BIT`): 0 = empty, 1 = occupied
131/// - bit 14 (`TOMBSTONE_BIT`): 0 = live, 1 = tombstoned-occupied
132/// - bits 13..0 (`PSL_MASK`): probe-sequence length (0..16383)
133///
134/// The 14-bit PSL field is **far** beyond any realistic Robin Hood
135/// max-PSL at load ≤ 0.75 (expected max ~20 on 1024 slots; even the
136/// long-tail outliers seen empirically with luna's existing hash
137/// distributions stay under 200). 2 bytes/slot is still 20× smaller
138/// than the 40-byte Node — the SoA bandwidth gain (§3.5 of the RFC)
139/// is preserved.
140///
141/// Earlier draft used 1 byte with a 6-bit PSL cap of 63; bench under
142/// load 0.676 on cap=1024 produced a long-tail PSL of 64+ with the
143/// LuaStr+mix64 hash distribution, forcing the widening.
144///
145/// Tombstones do NOT free the slot for `find` (probe continues past), but
146/// DO free it for `insert` (write the new entry, clear the tomb bit). They
147/// accumulate; the rehash path compacts them periodically.
148#[allow(dead_code)]
149pub(crate) mod meta_bits {
150 pub const OCCUPIED_BIT: u16 = 0b1000_0000_0000_0000;
151 pub const TOMBSTONE_BIT: u16 = 0b0100_0000_0000_0000;
152 pub const PSL_MASK: u16 = 0b0011_1111_1111_1111;
153 pub const PSL_MAX: u16 = PSL_MASK;
154 /// Empty slot — bit 15 = 0, all others 0.
155 pub const EMPTY: u16 = 0;
156
157 #[inline(always)]
158 pub fn is_occupied(m: u16) -> bool {
159 (m & OCCUPIED_BIT) != 0
160 }
161 #[inline(always)]
162 pub fn is_tombstone(m: u16) -> bool {
163 (m & TOMBSTONE_BIT) != 0
164 }
165 /// Live = occupied AND not tombstoned. `next()` iteration cursor returns
166 /// these. `find_slot_rh` short-circuits on a live match.
167 #[inline(always)]
168 pub fn is_live(m: u16) -> bool {
169 (m & (OCCUPIED_BIT | TOMBSTONE_BIT)) == OCCUPIED_BIT
170 }
171 #[inline(always)]
172 pub fn psl(m: u16) -> u16 {
173 m & PSL_MASK
174 }
175 #[inline(always)]
176 pub fn pack(psl: u16, tomb: bool) -> u16 {
177 debug_assert!(psl <= PSL_MAX);
178 let mut m = OCCUPIED_BIT | (psl & PSL_MASK);
179 if tomb {
180 m |= TOMBSTONE_BIT;
181 }
182 m
183 }
184}
185
186/// P11-S5d.I — inline storage threshold. Tables whose array part has
187/// `asize <= INLINE_ASIZE` keep their atags+avals inside the Table
188/// struct itself (`inline_storage`), skipping the slab Box entirely
189/// — binary_trees's `{nil, nil}` and `{...}` 2-element leaves live
190/// here, sparing one allocator round-trip per NewTable.
191pub(crate) const INLINE_ASIZE: u64 = 2;
192/// `INLINE_ASIZE` u64 slots for avals + `ceil(INLINE_ASIZE / 8)` u64
193/// slots covering the atags bytes (with trailing pad). For
194/// `INLINE_ASIZE = 2`: 2 avals + 1 atags = 3 u64s = 24 bytes.
195pub(crate) const INLINE_U64S: usize = INLINE_ASIZE as usize + INLINE_ASIZE.div_ceil(8) as usize;
196
197/// Lua table — hybrid array + hash storage, with optional metatable and
198/// weak-mode flags.
199#[repr(C)]
200pub struct Table {
201 /// read through raw casts by the GC, not by field access
202 #[allow(dead_code)]
203 pub(crate) hdr: GcHeader,
204 /// P11-S5d.I — single backing pointer for the array part. Points
205 /// to `inline_storage` (asize <= INLINE_ASIZE) or `slab.as_ptr()`
206 /// (asize > INLINE_ASIZE). The JIT inline aset reads this with one
207 /// `load i64`, no branch — the choice between inline and slab is
208 /// already encoded in the pointer. Initialised in `Heap::new_table`
209 /// AFTER the Table reaches its final heap address (so that
210 /// `&mut self.inline_storage` is the stable heap pointer, not a
211 /// stack-local one). Updated by `Table::resize`.
212 pub array_ptr: *mut u8,
213 /// P11-S5d.H — external backing for the array part when
214 /// `asize > INLINE_ASIZE`. Layout: `[avals: asize × 8 bytes][atags:
215 /// asize bytes]`. Empty box (dangling, no alloc) when the inline
216 /// path is in use.
217 pub(crate) slab: Box<[u64]>,
218 /// Length of the array part in slots. u64 (rather than `usize` or
219 /// `u32`) so the JIT can load it with a single `load i64`.
220 pub asize: u64,
221 /// P11-S5d.I — inline backing used when `asize <= INLINE_ASIZE`.
222 /// Same layout as the slab: avals at low addresses (`asize * 8`
223 /// bytes from offset 0), atags at the trailing `asize` bytes.
224 ///
225 /// `UnsafeCell` because `array_ptr` is a SELF-REFERENTIAL cached
226 /// pointer into this field. Under Stacked Borrows, every
227 /// `&mut self` method call's function-entry retag re-tags the
228 /// whole `*self` byte range Unique and would pop the cached
229 /// pointer's tag — subsequent `array_ptr` accesses were UB (Miri:
230 /// "retag ... tag does not exist in the borrow stack", v2.13).
231 /// An `UnsafeCell` region instead receives SharedReadWrite on
232 /// retag, which coexists with the pointer derived from
233 /// `UnsafeCell::get`. All reads/writes of the inline bytes MUST
234 /// go through `array_ptr` / `.get()` — never through a direct
235 /// `&`/`&mut` borrow of the array contents.
236 pub(crate) inline_storage: std::cell::UnsafeCell<[u64; INLINE_U64S]>,
237 /// hash part: power-of-two length (or empty)
238 /// hash part: power-of-two length (or empty)
239 /// `pub(crate)` so `Heap::free_obj` (pool recycle path) can reset.
240 pub(crate) nodes: Box<[Node]>,
241 /// free-slot search position, counts down (PUC lastfree).
242 /// `pub(crate)` so `Heap::new_table` can reset on pool recycle.
243 pub(crate) lastfree: u32,
244 /// C3 — SoA Robin Hood hash part (Variant A, parallel to `nodes`
245 /// during Phase B+C transition). After Phase 4 cutover these
246 /// replace `nodes` entirely. Each of `keys` / `vals` / `meta`
247 /// is the same length as `nodes` and is sized in lockstep by
248 /// `resize`. `meta` byte layout per the `meta_bits` module above.
249 /// Empty `Box::new([])` until Phase D cuts over.
250 pub(crate) keys: Box<[Value]>,
251 pub(crate) vals: Box<[Value]>,
252 pub(crate) meta: Box<[u16]>,
253 /// Count of tombstoned-occupied meta slots; rehash trigger.
254 pub(crate) tombstones: u32,
255 /// C3 — iterator-guard counter (R-A3 mitigation). Incremented on
256 /// each `pairs`/`next` entry, decremented on exit. While > 0,
257 /// the SoA path MUST defer rehash (which would rebase slot
258 /// indices and break PUC `nextvar.lua:520-521` invariant).
259 /// Phase F wires this; Phase B initialises to 0.
260 pub(crate) iter_depth: u32,
261 /// P11-S5d.K — visibility lifted to `pub(crate)` so the JIT can
262 /// take its field offset at compile time and emit an inline
263 /// "metatable.is_none()" guard before the inline aget fast path.
264 /// `Option<Gc<Table>>` is 8 bytes via the NonNull-pointer-opt: 0
265 /// ⇔ None, non-zero ⇔ Some.
266 pub metatable: Option<Gc<Table>>,
267 /// reserved for an absent-metamethod cache (PUC `flags`); currently
268 /// unread — luna's mm lookup walks `metatable.get` each time
269 #[allow(dead_code)]
270 pub(crate) flags: u8,
271}
272
273// SAFETY: `array_ptr` looks like an unprotected raw pointer field, but
274// it always refers to memory the same Table owns (either its own inline
275// storage or its `slab` Box). The Table is heap-allocated and never
276// moved post-adoption, so the pointer stays valid for the table's
277// lifetime. No thread-unsafety concern: tables are accessed only
278// through the Vm, single-threaded.
279unsafe impl Send for Table {}
280unsafe impl Sync for Table {}
281
282impl Table {
283 pub(crate) fn new(hdr: GcHeader) -> Table {
284 Table {
285 hdr,
286 // P11-S5d.I — `array_ptr` is fixed up in
287 // `Heap::new_table` after the Table reaches its final heap
288 // address (so that `&inline_storage` is the heap address,
289 // not a stack-local one). Null sentinel here so a
290 // bug-detection invariant flags any pre-fixup read.
291 array_ptr: std::ptr::null_mut(),
292 slab: Box::new([]),
293 asize: 0,
294 inline_storage: std::cell::UnsafeCell::new([0; INLINE_U64S]),
295 nodes: Box::new([]),
296 lastfree: 0,
297 keys: Box::new([]),
298 vals: Box::new([]),
299 meta: Box::new([]),
300 tombstones: 0,
301 iter_depth: 0,
302 metatable: None,
303 flags: 0,
304 }
305 }
306
307 /// P11-S5d.I — set `array_ptr` to the inline storage's stable heap
308 /// address. Called by `Heap::new_table` once the Table is at its
309 /// final location.
310 #[inline]
311 pub(crate) fn init_array_ptr(&mut self) {
312 self.array_ptr = self.inline_storage.get() as *mut u8;
313 }
314
315 /// Freshly-derived base pointer for the array part. Rust-side
316 /// accessors MUST use this instead of the cached `array_ptr`
317 /// field when the backing is inline: a pointer into `*self`
318 /// cached across `&mut self` boundaries is invalidated by every
319 /// function-entry retag under Stacked Borrows — `&mut` retags
320 /// ignore `UnsafeCell` (only `&` retags respect it), so the
321 /// cached tag dies on the next method call (v2.13 Miri finding,
322 /// `retag ... tag does not exist in the borrow stack`). Deriving
323 /// through `UnsafeCell::get()` at each use gives a fresh
324 /// SharedReadWrite tag valid for reads AND writes even from
325 /// `&self`. The slab case keeps the cached pointer: its tag
326 /// lives on the heap allocation, outside `*self`, untouched by
327 /// entry retags. `array_ptr` itself stays maintained for the
328 /// JIT, whose emitted code loads the field directly (no Rust
329 /// borrows involved).
330 #[inline(always)]
331 fn array_base(&self) -> *mut u8 {
332 if self.asize <= INLINE_ASIZE {
333 self.inline_storage.get() as *mut u8
334 } else {
335 self.array_ptr
336 }
337 }
338
339 /// P11-S5d.H/I — read view onto the array-part tag bytes. Trails
340 /// the avals portion in the active backing (inline or slab).
341 #[inline(always)]
342 pub(crate) fn atags(&self) -> &[u8] {
343 let n = self.asize as usize;
344 if n == 0 {
345 return &[];
346 }
347 // SAFETY: `array_ptr` always points to a buffer with `n`
348 // RawVal slots followed by `n` u8 tag bytes (either
349 // `inline_storage` of `INLINE_U64S` u64s, or a `slab` of
350 // `asize + ceil(asize/8)` u64s). The tag bytes start at byte
351 // offset `n * 8` from the buffer base.
352 unsafe {
353 let ptr = self.array_base().add(n * 8);
354 std::slice::from_raw_parts(ptr, n)
355 }
356 }
357
358 #[inline(always)]
359 pub(crate) fn atags_mut(&mut self) -> &mut [u8] {
360 let n = self.asize as usize;
361 if n == 0 {
362 return &mut [];
363 }
364 // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
365 unsafe {
366 let ptr = self.array_base().add(n * 8);
367 std::slice::from_raw_parts_mut(ptr, n)
368 }
369 }
370
371 /// P11-S5d.H/I — read view onto the array-part payload slots. Sits
372 /// at the start of the active backing (u64-aligned, identical size
373 /// and layout to `RawVal`).
374 #[inline(always)]
375 pub(crate) fn avals(&self) -> &[RawVal] {
376 let n = self.asize as usize;
377 if n == 0 {
378 return &[];
379 }
380 // SAFETY: inline_storage / slab both store u64s, so the cast
381 // to `*const RawVal` is alignment-safe (RawVal size = 8,
382 // align = 8). The buffer holds at least `n` such slots.
383 unsafe { std::slice::from_raw_parts(self.array_base() as *const RawVal, n) }
384 }
385
386 #[inline(always)]
387 pub(crate) fn avals_mut(&mut self) -> &mut [RawVal] {
388 let n = self.asize as usize;
389 if n == 0 {
390 return &mut [];
391 }
392 // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
393 unsafe { std::slice::from_raw_parts_mut(self.array_base() as *mut RawVal, n) }
394 }
395
396 /// Allocate a fresh external `[avals: asize × 8 bytes][atags: asize
397 /// bytes]` slab. Only used when `asize > INLINE_ASIZE`. The buffer
398 /// is u64-aligned via `Box<[u64]>` and zeroed (avals = `RawVal::
399 /// NIL` aka `0`; atags = `raw::NIL` aka `0`).
400 fn alloc_slab(asize: usize) -> Box<[u64]> {
401 if asize == 0 {
402 return Box::new([]);
403 }
404 let avals_u64s = asize;
405 let atags_u64s = asize.div_ceil(8);
406 let total = avals_u64s + atags_u64s;
407 vec![0u64; total].into_boxed_slice()
408 }
409
410 /// This table's metatable, if any.
411 pub fn metatable(&self) -> Option<Gc<Table>> {
412 self.metatable
413 }
414
415 /// Install (or clear) this table's metatable. Does not perform any
416 /// `__metatable` guarding; that belongs in the Vm-level `setmetatable`.
417 pub fn set_metatable(&mut self, mt: Option<Gc<Table>>) {
418 self.metatable = mt;
419 }
420
421 /// Bytes occupied by the table's *external* internal allocations
422 /// (slab and nodes). Cheap O(1) read — Box len × element size, no
423 /// allocator query. `Heap::free_obj` subtracts this on the way out
424 /// so the credit applied via `set`/`rehash`/`ensure_*` is symmetric.
425 ///
426 /// P11-S5d.I — inline storage doesn't count toward this (it's part
427 /// of the Table struct itself, accounted for by `size_of::<Table>()`
428 /// at adoption time). When the array part lives inline, the slab
429 /// is empty and contributes nothing here.
430 pub(crate) fn internal_bytes(&self) -> usize {
431 let n = self.asize as usize;
432 let array_external = if n > INLINE_ASIZE as usize {
433 n + n * std::mem::size_of::<RawVal>()
434 } else {
435 0
436 };
437 let soa_external = self.keys.len() * std::mem::size_of::<Value>()
438 + self.vals.len() * std::mem::size_of::<Value>()
439 + self.meta.len() * std::mem::size_of::<u16>();
440 array_external + self.nodes.len() * std::mem::size_of::<Node>() + soa_external
441 }
442
443 fn asize(&self) -> usize {
444 self.asize as usize
445 }
446
447 fn aget(&self, idx: usize) -> Value {
448 // SAFETY: callers gate on `idx < self.asize()` before reaching here
449 // (`get_int`, `iter_array`, etc.). atags and avals are sized
450 // identically by `rehash`, so a bound check passed against atags
451 // covers avals too.
452 unsafe {
453 Value::pack(
454 *self.atags().get_unchecked(idx),
455 *self.avals().get_unchecked(idx),
456 )
457 }
458 }
459
460 fn aset(&mut self, idx: usize, v: Value) {
461 let (t, b) = v.unpack();
462 // SAFETY: see `aget`. callers (`set_norm`, `set_int`) gate on
463 // `idx < self.asize()`. The two `*_mut` calls each take a
464 // distinct `&mut self` borrow whose lifetime ends at the
465 // statement boundary, so they don't overlap.
466 unsafe {
467 *self.atags_mut().get_unchecked_mut(idx) = t;
468 *self.avals_mut().get_unchecked_mut(idx) = b;
469 }
470 }
471
472 // ---- reads ----
473
474 /// Raw lookup (no `__index` metamethod). Returns `Value::Nil` when
475 /// the key is absent. `Value::Nil` and NaN floats return `nil` directly.
476 pub fn get(&self, key: Value) -> Value {
477 match key {
478 Value::Int(i) => self.get_int(i),
479 Value::Float(f) => match f2i_exact(f) {
480 Some(i) => self.get_int(i),
481 None => {
482 if f.is_nan() {
483 Value::Nil
484 } else {
485 self.get_hash(key)
486 }
487 }
488 },
489 Value::Nil => Value::Nil,
490 k => self.get_hash(k),
491 }
492 }
493
494 /// Integer-keyed variant of [`Self::get`].
495 pub fn get_int(&self, i: i64) -> Value {
496 if i >= 1 && (i as u64) <= self.asize() as u64 {
497 return self.aget(i as usize - 1);
498 }
499 self.get_hash(Value::Int(i))
500 }
501
502 /// String-keyed variant of [`Self::get`] for v1.2 D4 A1 GetField fast
503 /// path: the GetField interp arm always has a `Gc<LuaStr>` key from
504 /// `Proto.consts`. Skips the outer `Value` match (which would only
505 /// take the `_ => self.get_hash(k)` arm anyway) so the dispatcher
506 /// pays one less branch per call. ~5 GetField/iter × 1000 iters/cell
507 /// on the Redis-Lua-shape workload — every shaved nanosecond shows
508 /// up at the bench level. Counter-validated via
509 /// `examples/diag_opcode_breakdown.rs`.
510 #[inline]
511 pub fn get_str(&self, key: crate::runtime::Gc<crate::runtime::string::LuaStr>) -> Value {
512 self.get_hash(Value::Str(key))
513 }
514
515 fn get_hash(&self, k: Value) -> Value {
516 match self.find_node(k) {
517 Some(idx) => self.nodes[idx].val,
518 None => Value::Nil,
519 }
520 }
521
522 /// v2.1 Phase 1I.B — same logic as [`find_node`] but exposed
523 /// to luna-core's recorder so it can capture the slot index for
524 /// the table-field IC snapshot. luna-jit reads neither the
525 /// `nodes` field nor `Node` directly; only the slot index
526 /// crosses the crate boundary (baked into the IR as a `iconst`).
527 #[allow(dead_code)]
528 pub(crate) fn find_node_idx(&self, k: Value) -> Option<usize> {
529 self.find_node(k)
530 }
531
532 /// v2.1 Phase 1I.B — accessor for the recorder's
533 /// `FieldIcSnapshot` capture: read the slot's value's tag byte
534 /// for the cached_val_tag field. The recorder needs this to
535 /// match the runtime guard the IC emits. Returns None when
536 /// `idx >= nodes.len()`.
537 #[allow(dead_code)]
538 pub(crate) fn node_val_at(&self, idx: usize) -> Option<Value> {
539 self.nodes.get(idx).map(|n| n.val)
540 }
541
542 /// v2.1 Phase 1I.B — accessor for `nodes.len()` so the recorder
543 /// can capture the shape-guard's `nodes_len` field without
544 /// reaching into the private `nodes` member.
545 #[allow(dead_code)]
546 pub(crate) fn nodes_capacity(&self) -> usize {
547 self.nodes.len()
548 }
549
550 /// Walk the chain rooted at the key's main position.
551 fn find_node(&self, k: Value) -> Option<usize> {
552 // v2.13 WUC read-time probe (gc-verify): both the query key and
553 // every node key compared below must be live. This is the
554 // convergence point of ALL hash lookups, so a dangling string
555 // is named at its dereference site with role attribution.
556 #[cfg(feature = "gc-verify")]
557 {
558 let hdr = |v: Value| -> Option<usize> {
559 match v {
560 Value::Str(s) => Some(s.as_ptr() as usize),
561 Value::Table(t) => Some(t.as_ptr() as usize),
562 _ => None,
563 }
564 };
565 if let Some(p) = hdr(k) {
566 if crate::runtime::gc_verify_probe::is_freed(p) {
567 panic!("[gc-verify] find_node QUERY key {p:#x} is freed (dangling)");
568 }
569 }
570 for (i, n) in self.nodes.iter().enumerate() {
571 // NOTE: tombstones (val nil, key kept) are NOT skipped —
572 // the walk below raw_eq's their keys too.
573 if n.dead_key {
574 continue;
575 }
576 if let Some(p) = hdr(n.key) {
577 if crate::runtime::gc_verify_probe::is_freed(p) {
578 panic!(
579 "[gc-verify] find_node NODE key {p:#x} (slot {i}, \
580 tombstone {}, table {:#x}) is freed (dangling)",
581 n.val.is_nil(),
582 self as *const Table as usize
583 );
584 }
585 }
586 }
587 }
588 if self.nodes.is_empty() {
589 return None;
590 }
591 let mut idx = self.main_position(k);
592 loop {
593 let n = &self.nodes[idx];
594 // Dead-key slots carry a dangling Gc pointer whose memory may
595 // have been reallocated to a different live object; raw_eq on
596 // such a key can spuriously match the freshly-reused address.
597 // Skip the comparison and only follow `next` (PUC `setdeadkey`
598 // / `equalkey` short-circuit). 5.5 gc.lua :459-:478 was 12%
599 // flaky on this exact path — a swept B-string's slot kept
600 // chaining into A's slot, so `a[k] = nil` (k = A_string) hit
601 // the dead slot and wrote nil there, leaving A's val untouched.
602 if !n.dead_key && n.key.raw_eq(k) {
603 return Some(idx);
604 }
605 if n.next == NONE {
606 return None;
607 }
608 idx = n.next as usize;
609 }
610 }
611
612 // ---- writes ----
613
614 /// Insert / update `(key, val)`. `heap` is used to credit any internal
615 /// Box growth (rehash) to `heap.bytes` so the counter stays in sync with
616 /// real memory; `free_obj` subtracts `internal_bytes()` on the way out.
617 pub fn set(&mut self, heap: &mut Heap, key: Value, val: Value) -> Result<(), TableError> {
618 let k = normalize_set_key(key)?;
619 self.set_norm(heap, k, val)
620 }
621
622 /// PUC `luaV_fastset` / `luaV_finishfastset` analogue: single-walk
623 /// in-place update for an existing key. Returns `true` iff `key` is
624 /// present with a non-nil value and the slot was overwritten with
625 /// `val`. Returns `false` when the key is absent, the slot holds nil,
626 /// or the key normalisation rejects it — the caller is then expected
627 /// to run the `__newindex` chain or fall back to `set` for the raw
628 /// insert.
629 ///
630 /// Collapses the SetField hot path from two hash-chain walks
631 /// (`get` + `set`) to one. The `__newindex` invariant ("fires iff
632 /// `get` would have returned nil") is preserved because this method
633 /// writes only when the existing slot is non-nil — the exact set the
634 /// prior `tb.get(key).is_nil()` gate already excluded from
635 /// `__newindex` eligibility. See
636 /// `.dev/rfcs/v2.0-pi-phase2-a3-audit.md` §4 for the case-by-case
637 /// semantics check.
638 ///
639 /// The caller is responsible for firing `Heap::barrier_back` after a
640 /// `true` return (same contract as the surrounding `raw_set`
641 /// wrapper).
642 pub fn try_set_existing(&mut self, key: Value, val: Value) -> bool {
643 let k = match normalize_set_key(key) {
644 Ok(k) => k,
645 Err(_) => return false,
646 };
647 if let Value::Int(i) = k
648 && i >= 1
649 && (i as u64) <= self.asize() as u64
650 {
651 let idx = i as usize - 1;
652 // SAFETY: `idx < self.asize()` is guarded by the conditional
653 // above, mirroring the bound on `aget`/`aset`.
654 let tag = unsafe { *self.atags().get_unchecked(idx) };
655 if tag != raw::NIL {
656 // Nil-val on a live slot must follow the same tombstone
657 // discipline as `set_norm` — routed through
658 // `clear_existing_slot` so chain-world / future
659 // data-layout cutovers (Phase E SoA) stay aligned.
660 // See `.dev/known-bugs/fixed/`.
661 if val.is_nil() {
662 self.clear_existing_slot(k);
663 } else {
664 self.aset(idx, val);
665 }
666 return true;
667 }
668 // Array slot present-but-nil → __newindex eligible: do NOT
669 // write. Caller falls through to the metamethod chain.
670 return false;
671 }
672 if let Some(idx) = self.find_node(k)
673 && !self.nodes[idx].val.is_nil()
674 {
675 if val.is_nil() {
676 self.clear_existing_slot(k);
677 } else {
678 self.nodes[idx].val = val;
679 }
680 return true;
681 }
682 false
683 }
684
685 /// Shared "live with val=Nil is illegal" tombstone routine for the
686 /// two write entry points (`set_norm` and `try_set_existing`). The
687 /// slot must already be known live (array slot inside `asize()` /
688 /// node returned by `find_node`).
689 ///
690 /// Chain-world today:
691 /// - array slot → `aset(_, Nil)` clears the atag, so `next()`'s
692 /// `tag != raw::NIL` filter skips the slot.
693 /// - node slot → soft tombstone (key kept, `val = Nil`); chain
694 /// `next()` filter `!n.val.is_nil()` skips it, and `find_node`
695 /// still routes a future re-insert into the same slot without
696 /// a rehash.
697 ///
698 /// Centralising the discipline here lets a future Phase E SoA
699 /// cutover (Variant B linear probe, or any non-Robin-Hood layout
700 /// attack that switches `next()`'s filter to `meta_bits::is_live`)
701 /// migrate both entry points in lockstep — exactly the divergence
702 /// that surfaced as `(key, nil)` zombies in `pairs()` on the C3
703 /// Session 2 cutover branch.
704 fn clear_existing_slot(&mut self, k: Value) {
705 if let Value::Int(i) = k
706 && i >= 1
707 && (i as u64) <= self.asize() as u64
708 {
709 self.aset(i as usize - 1, Value::Nil);
710 return;
711 }
712 if let Some(idx) = self.find_node(k) {
713 self.nodes[idx].val = Value::Nil;
714 }
715 }
716
717 /// Integer-keyed variant of [`Self::set`].
718 pub fn set_int(&mut self, heap: &mut Heap, i: i64, val: Value) -> Result<(), TableError> {
719 self.set_norm(heap, Value::Int(i), val)
720 }
721
722 /// `k` is already normalized (no nil, no NaN, integral floats → Int).
723 fn set_norm(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
724 if let Value::Int(i) = k
725 && i >= 1
726 && (i as u64) <= self.asize() as u64
727 {
728 // Live array slot + Nil write goes through the shared
729 // tombstone routine (see `clear_existing_slot` for the
730 // chain ↔ future-SoA rationale). The non-Nil branch is
731 // identical to a bare `aset` today.
732 if v.is_nil() {
733 self.clear_existing_slot(k);
734 } else {
735 self.aset(i as usize - 1, v);
736 }
737 return Ok(());
738 }
739 if let Some(idx) = self.find_node(k) {
740 if v.is_nil() {
741 self.clear_existing_slot(k);
742 } else {
743 self.nodes[idx].val = v;
744 }
745 return Ok(());
746 }
747 if v.is_nil() {
748 return Ok(()); // absent key set to nil: nothing to record
749 }
750 self.insert_new(heap, k, v)
751 }
752
753 fn insert_new(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
754 if self.nodes.is_empty() {
755 self.rehash(heap, k)?;
756 return self.set_norm(heap, k, v);
757 }
758 let mp = self.main_position(k);
759 // A truly empty slot (key=Nil, !dead_key) is free for direct placement.
760 // A dead-key slot still belongs to some chain (its `next` points to a
761 // live entry the chain reaches), so we treat it as occupied here and
762 // route the new key through the collision path below — that preserves
763 // the back-links into this slot from other nodes' `next` fields.
764 if self.nodes[mp].key.is_nil() && !self.nodes[mp].dead_key {
765 self.nodes[mp] = Node {
766 key: k,
767 val: v,
768 next: NONE,
769 dead_key: false,
770 };
771 return Ok(());
772 }
773 let Some(free) = self.free_pos() else {
774 self.rehash(heap, k)?;
775 return self.set_norm(heap, k, v);
776 };
777 // Dead-key slot: it carries no live key, so by definition nobody else
778 // counts it as "their main position owner". We give it directly to
779 // the new key but preserve `next` so the chain it sits inside still
780 // reaches its downstream entries.
781 if self.nodes[mp].dead_key {
782 let preserved_next = self.nodes[mp].next;
783 self.nodes[mp] = Node {
784 key: k,
785 val: v,
786 next: preserved_next,
787 dead_key: false,
788 };
789 return Ok(());
790 }
791 let other_mp = self.main_position(self.nodes[mp].key);
792 if other_mp != mp {
793 // colliding node is out of its main position: relocate it to the
794 // free slot and take its place
795 let mut prev = other_mp;
796 while self.nodes[prev].next != mp as i32 {
797 prev = self.nodes[prev].next as usize;
798 }
799 self.nodes[prev].next = free as i32;
800 self.nodes[free] = self.nodes[mp];
801 self.nodes[mp] = Node {
802 key: k,
803 val: v,
804 next: NONE,
805 dead_key: false,
806 };
807 } else {
808 // colliding node owns this position: chain the new node behind it
809 self.nodes[free] = Node {
810 key: k,
811 val: v,
812 next: self.nodes[mp].next,
813 dead_key: false,
814 };
815 self.nodes[mp].next = free as i32;
816 }
817 Ok(())
818 }
819
820 fn free_pos(&mut self) -> Option<usize> {
821 while self.lastfree > 0 {
822 self.lastfree -= 1;
823 let n = &self.nodes[self.lastfree as usize];
824 // Dead-key slots are still occupied for chain purposes (their
825 // `next` may be the only path to a downstream entry) — don't
826 // hand them out as free.
827 if n.key.is_nil() && !n.dead_key {
828 return Some(self.lastfree as usize);
829 }
830 }
831 None
832 }
833
834 // ---- rehash (PUC luaH_rehash) ----
835
836 fn rehash(&mut self, heap: &mut Heap, pending: Value) -> Result<(), TableError> {
837 let mut nums = [0usize; 65];
838 let mut int_keys = 0usize;
839 let mut total = 1; // the pending key
840 if let Value::Int(i) = pending
841 && i >= 1
842 {
843 nums[ceil_log2(i as u64)] += 1;
844 int_keys += 1;
845 }
846 let atags = self.atags();
847 for (i, &tag) in atags.iter().enumerate() {
848 if tag != raw::NIL {
849 nums[ceil_log2(i as u64 + 1)] += 1;
850 int_keys += 1;
851 total += 1;
852 }
853 }
854 for n in self.nodes.iter() {
855 if !n.val.is_nil() {
856 total += 1;
857 if let Value::Int(i) = n.key
858 && i >= 1
859 {
860 nums[ceil_log2(i as u64)] += 1;
861 int_keys += 1;
862 }
863 }
864 }
865 // computesizes: optimal array size = largest 2^i with more than 2^(i-1)
866 // integer keys in [1, 2^i]
867 let mut new_asize = 0usize;
868 let mut in_array = 0usize;
869 let mut a = 0usize;
870 let mut two_to_i = 1usize;
871 let mut i = 0usize;
872 while int_keys > two_to_i / 2 {
873 a += nums[i];
874 if a > two_to_i / 2 {
875 new_asize = two_to_i;
876 in_array = a;
877 }
878 i += 1;
879 match two_to_i.checked_mul(2) {
880 Some(n) => two_to_i = n,
881 None => break,
882 }
883 }
884 // PUC `luaH_resizearray` raises "table overflow" when the array part
885 // would have to grow past MAXASIZE. luna mirrors with `MAX_ASIZE`,
886 // checked on both the array and the hash bucket count (the latter is
887 // a power-of-two of total - in_array entries).
888 if new_asize > MAX_ASIZE {
889 return Err(TableError::Overflow);
890 }
891 let hash_entries = total - in_array;
892 if hash_entries > MAX_ASIZE {
893 return Err(TableError::Overflow);
894 }
895 self.resize(heap, new_asize, hash_entries);
896 Ok(())
897 }
898
899 /// Resize the table's array and hash parts. The array part grows
900 /// (or shrinks) to `new_asize` NIL-initialized slots; the hash
901 /// part rounds to the next power of two ≥ `hash_entries`. Any
902 /// existing entries are re-inserted into the new layout. The
903 /// Box growth is debited/credited to `heap.bytes` so `free_obj`
904 /// can subtract the symmetric amount.
905 ///
906 /// P11-S5c.B — `Heap::new_table_sized` calls this on a freshly
907 /// adopted empty table to pre-allocate the array part, sparing
908 /// the table-fill loop from O(log N) intermediate `rehash`es.
909 pub(crate) fn resize(&mut self, heap: &mut Heap, new_asize: usize, hash_entries: usize) {
910 let before = self.internal_bytes();
911 // P11-S5d.H/I — snapshot the old array entries before we
912 // re-install the backing. The active buffer can be inline OR
913 // slab; `array_ptr` already points to whichever it is, so
914 // walking via raw offsets works the same for either case.
915 let old_asize = self.asize as usize;
916 let mut old_pairs: Vec<(u8, RawVal)> = Vec::with_capacity(old_asize);
917 if old_asize > 0 {
918 // SAFETY: `array_ptr` was set up by `Heap::new_table` or
919 // an earlier `resize`; it covers `old_asize * 9` bytes
920 // (avals + atags).
921 let avals_base = self.array_base() as *const RawVal;
922 let atags_base = unsafe { self.array_base().add(old_asize * 8) as *const u8 };
923 for i in 0..old_asize {
924 // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
925 let tag = unsafe { *atags_base.add(i) };
926 // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
927 let val = unsafe { *avals_base.add(i) };
928 old_pairs.push((tag, val));
929 }
930 }
931 let old_nodes = std::mem::take(&mut self.nodes);
932
933 // Install the new array backing first, then update `array_ptr`
934 // (before potentially dropping the old slab via the assignment
935 // below) so the JIT never observes a stale pointer.
936 self.asize = new_asize as u64;
937 if new_asize <= INLINE_ASIZE as usize {
938 // Inline path — zero the inline buffer; drop any prior
939 // external slab.
940 // SAFETY: exclusive &mut self; write through the cell to
941 // stay on the raw-pointer access path (no &mut borrow of
942 // the array contents is ever formed).
943 unsafe {
944 *self.inline_storage.get() = [0; INLINE_U64S];
945 }
946 self.array_ptr = self.inline_storage.get() as *mut u8;
947 self.slab = Box::new([]);
948 } else {
949 // External slab — allocate, then re-point `array_ptr`.
950 self.slab = Self::alloc_slab(new_asize);
951 self.array_ptr = self.slab.as_mut_ptr() as *mut u8;
952 }
953
954 let hsize = if hash_entries == 0 {
955 0
956 } else {
957 hash_entries.next_power_of_two()
958 };
959 self.nodes = vec![Node::EMPTY; hsize].into_boxed_slice();
960 self.lastfree = hsize as u32;
961 // PUC `g->GCtotalbytes` analogue: credit (or debit) the box-size
962 // delta so `Heap.bytes` reflects this table's actual internal
963 // memory. `free_obj` subtracts `internal_bytes()` on the way out.
964 let after = self.internal_bytes();
965 heap.apply_bytes_delta(before, after);
966 // Re-insert old array entries via the public set_norm path
967 // (which handles rehashing if the new array shrinks below the
968 // entry count).
969 for (i, (tag, val)) in old_pairs.into_iter().enumerate() {
970 if tag != raw::NIL {
971 // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
972 let v = unsafe { Value::pack(tag, val) };
973 let _ = self.set_norm(heap, Value::Int(i as i64 + 1), v);
974 }
975 }
976 for n in old_nodes.iter() {
977 if !n.val.is_nil() {
978 let _ = self.set_norm(heap, n.key, n.val);
979 }
980 }
981 }
982
983 fn main_position(&self, k: Value) -> usize {
984 debug_assert!(!self.nodes.is_empty());
985 hash_key(k) as usize & (self.nodes.len() - 1)
986 }
987
988 // ---- length / iteration ----
989
990 /// A border: `n` where `t[n]` is non-nil and `t[n+1]` is nil (PUC `luaH_getn`).
991 /// This is Lua `#` semantics, not a container size — an `is_empty`
992 /// counterpart would be meaningless.
993 #[allow(clippy::len_without_is_empty)]
994 pub fn len(&self) -> i64 {
995 let asize = self.asize();
996 let atags = self.atags();
997 if asize > 0 && atags[asize - 1] == raw::NIL {
998 // binary search inside the array part
999 let (mut lo, mut hi) = (0usize, asize);
1000 while hi - lo > 1 {
1001 let m = lo + (hi - lo) / 2;
1002 if atags[m - 1] == raw::NIL {
1003 hi = m;
1004 } else {
1005 lo = m;
1006 }
1007 }
1008 return lo as i64;
1009 }
1010 if self.nodes.is_empty() {
1011 return asize as i64;
1012 }
1013 // array is full (or absent): unbound search through the hash part
1014 let mut lo = asize as i64;
1015 let mut hi = lo + 1;
1016 while !self.get_int(hi).is_nil() {
1017 lo = hi;
1018 match hi.checked_mul(2) {
1019 Some(n) => hi = n,
1020 None => {
1021 // pathological sparse keys (the doubling overflowed): scan
1022 // linearly from 1 for the first border, as PUC's
1023 // unbound_search does — finds a small border fast instead of
1024 // returning the huge one.
1025 let mut i = 1i64;
1026 while !self.get_int(i).is_nil() {
1027 i += 1;
1028 }
1029 return i - 1;
1030 }
1031 }
1032 }
1033 while hi - lo > 1 {
1034 let m = lo + (hi - lo) / 2;
1035 if self.get_int(m).is_nil() {
1036 hi = m;
1037 } else {
1038 lo = m;
1039 }
1040 }
1041 lo
1042 }
1043
1044 /// Lua `next`: iterate array part then hash part.
1045 pub fn next(&self, key: Value) -> Result<Option<(Value, Value)>, TableError> {
1046 let start = match key {
1047 Value::Nil => 0,
1048 k => {
1049 let k = match k {
1050 Value::Float(f) => match f2i_exact(f) {
1051 Some(i) => Value::Int(i),
1052 None => k,
1053 },
1054 k => k,
1055 };
1056 if let Value::Int(i) = k
1057 && i >= 1
1058 && (i as u64) <= self.asize() as u64
1059 {
1060 i as usize
1061 } else {
1062 match self.find_node(k) {
1063 Some(idx) => self.asize() + idx + 1,
1064 None => return Err(TableError::InvalidNext),
1065 }
1066 }
1067 }
1068 };
1069 let atags = self.atags();
1070 for i in start..self.asize() {
1071 if atags[i] != raw::NIL {
1072 return Ok(Some((Value::Int(i as i64 + 1), self.aget(i))));
1073 }
1074 }
1075 let hstart = start.saturating_sub(self.asize());
1076 for (idx, n) in self.nodes.iter().enumerate().skip(hstart) {
1077 if !n.val.is_nil() {
1078 let _ = idx;
1079 return Ok(Some((n.key, n.val)));
1080 }
1081 }
1082 Ok(None)
1083 }
1084
1085 /// `(weak_keys, weak_values)` from the metatable's `__mode` field. Read by
1086 /// scanning the metatable for the `__mode` string (no interned key needed
1087 /// inside the collector).
1088 pub(crate) fn weak_mode(&self) -> (bool, bool) {
1089 let Some(mt) = self.metatable else {
1090 return (false, false);
1091 };
1092 for n in mt.nodes.iter() {
1093 if let (Value::Str(k), Value::Str(mode)) = (n.key, n.val)
1094 && k.as_bytes() == b"__mode"
1095 {
1096 let b = mode.as_bytes();
1097 return (b.contains(&b'k'), b.contains(&b'v'));
1098 }
1099 }
1100 (false, false)
1101 }
1102
1103 /// True when this table holds at least one direct reference (array slot,
1104 /// hash key, or hash value) to a coroutine whose mark bit is still clear.
1105 /// Used by the GC's cycle-finalize check (PUC 5.3 gc.lua :502) to detect
1106 /// the table ↔ thread reference cycle that needs an extra GC round before
1107 /// `__gc` runs. Tag-level scan avoids walking the full reference graph.
1108 pub(crate) fn refs_contain_unmarked_coro(&self) -> bool {
1109 use crate::runtime::heap::header_is_marked;
1110 let atags = self.atags();
1111 let avals = self.avals();
1112 for (i, &tag) in atags.iter().enumerate() {
1113 if tag == raw::CORO {
1114 // SAFETY: raw union access — the tag byte at the same index in `atags` was previously confirmed to be `co` (closure/object pointer) so the `co` variant of `RawVal` holds the valid payload.
1115 let p = unsafe { avals[i].co } as *mut crate::runtime::heap::GcHeader;
1116 if !header_is_marked(p) {
1117 return true;
1118 }
1119 }
1120 }
1121 for n in self.nodes.iter() {
1122 if let Value::Coro(co) = n.key {
1123 if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1124 return true;
1125 }
1126 }
1127 if let Value::Coro(co) = n.val {
1128 if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1129 return true;
1130 }
1131 }
1132 }
1133 false
1134 }
1135
1136 /// v2.13 WUC `gc-verify`: after a completed sweep, every collectable
1137 /// reference this table still holds (array values, node keys/values,
1138 /// metatable) must point at a live heap object. Nodes flagged
1139 /// `dead_key` are the sanctioned exception — their key pointer is
1140 /// documented-dangling and never dereferenced. `describe` receives
1141 /// (what, node-index, tag-byte, ptr) on violation.
1142 #[cfg(feature = "gc-verify")]
1143 pub(crate) fn verify_refs(
1144 &self,
1145 is_live: &dyn Fn(Value) -> bool,
1146 report: &dyn Fn(&str, usize, Value),
1147 ) {
1148 let atags = self.atags();
1149 let avals = self.avals();
1150 for (i, &tag) in atags.iter().enumerate() {
1151 if raw::is_gc(tag) {
1152 // SAFETY: tags/vals parallel arrays kept in sync by all table writers.
1153 let v = unsafe { Value::pack(tag, avals[i]) };
1154 if !is_live(v) {
1155 report("array value", i, v);
1156 }
1157 }
1158 }
1159 for (i, n) in self.nodes.iter().enumerate() {
1160 if n.val.is_nil() {
1161 continue;
1162 }
1163 if !n.dead_key && !is_live(n.key) {
1164 report("node key", i, n.key);
1165 }
1166 if !is_live(n.val) {
1167 report("node value", i, n.val);
1168 }
1169 }
1170 if let Some(mt) = self.metatable {
1171 if !is_live(Value::Table(mt)) {
1172 report("metatable", 0, Value::Table(mt));
1173 }
1174 }
1175 }
1176
1177 pub(crate) fn trace(&self, m: &mut Marker) {
1178 let (wk, wv) = self.weak_mode();
1179 if wk || wv {
1180 m.weak.push(self as *const Table as *mut Table);
1181 }
1182 // weak keys + strong values = an ephemeron table: its hash values are
1183 // marked only if the key proves reachable (deferred to the convergence
1184 // pass), not here. PUC 5.1 predates ephemerons — under `no_ephemeron`
1185 // a weak-key table marks its values strongly during this pass, which
1186 // is what gc.lua's "weak tables" section requires.
1187 let ephemeron = wk && !wv && !m.no_ephemeron;
1188 if ephemeron {
1189 m.ephemeron.push(self as *const Table as *mut Table);
1190 }
1191 // array keys are integers (never weakly collected); skip values only
1192 // when the table has weak values
1193 if !wv {
1194 let atags = self.atags();
1195 let avals = self.avals();
1196 for (i, &tag) in atags.iter().enumerate() {
1197 if raw::is_gc(tag) {
1198 // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1199 m.value(unsafe { Value::pack(tag, avals[i]) });
1200 }
1201 }
1202 }
1203 for n in self.nodes.iter() {
1204 if !wk {
1205 m.value(n.key);
1206 }
1207 // ephemeron hash values are deferred; otherwise mark strong values
1208 if !wv && !ephemeron {
1209 m.value(n.val);
1210 }
1211 }
1212 if let Some(mt) = self.metatable {
1213 m.value(Value::Table(mt));
1214 }
1215 }
1216
1217 /// Ephemeron pass: mark the value of every hash entry whose key is alive
1218 /// (`alive` decides — strong/marked keys, plus strings/numbers which are
1219 /// never weakly collected). Returns true if any value was newly marked, so
1220 /// the caller can iterate to a fixpoint (PUC `traverseephemeron`).
1221 pub(crate) fn converge_ephemeron(&self, alive: &dyn Fn(Value) -> bool, m: &mut Marker) -> bool {
1222 let mut changed = false;
1223 for n in self.nodes.iter() {
1224 if !n.val.is_nil() && alive(n.key) {
1225 changed |= m.value(n.val);
1226 }
1227 }
1228 changed
1229 }
1230
1231 /// Clear entries whose weak key/value did not survive marking. `is_dead`
1232 /// reports whether a GC value was left unmarked (about to be swept).
1233 /// Clear weak-table entries whose key/value no longer carries a live
1234 /// reference. `is_dead` is a **pure** check (no side effects); the GC
1235 /// uses `mark_string` to resurrect any string that's still reachable via
1236 /// a *surviving* entry — Lua manual §2.5.4 says strings in weak tables
1237 /// are not collected as long as their entry is, and PUC `iscleared`
1238 /// implements that by marking the string during the same scan.
1239 pub(crate) fn clear_weak(
1240 &mut self,
1241 wk: bool,
1242 wv: bool,
1243 is_dead: &dyn Fn(Value) -> bool,
1244 mark_string: &dyn Fn(Value),
1245 ) {
1246 if wv {
1247 let n = self.asize as usize;
1248 for i in 0..n {
1249 let tag = self.atags()[i];
1250 if raw::is_gc(tag) {
1251 // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1252 let v = unsafe { Value::pack(tag, self.avals()[i]) };
1253 if is_dead(v) {
1254 self.atags_mut()[i] = raw::NIL;
1255 self.avals_mut()[i] = RawVal::NIL;
1256 } else {
1257 mark_string(v);
1258 }
1259 }
1260 }
1261 }
1262 for n in self.nodes.iter_mut() {
1263 if n.val.is_nil() {
1264 // PUC `clearbykeys`/`clearbyvalues` end with
1265 // `if (isempty(gval(n))) clearkey(n)`: an EMPTY entry's
1266 // collectable key must be demoted to a dead key. A
1267 // tombstone (`t[k] = nil` leaves val nil, key kept for
1268 // chain links) is otherwise invisible to this sweep AND
1269 // unmarked by the weak-key trace, so its string key gets
1270 // freed while `find_node` still raw_eq's it walking the
1271 // chain — UAF-C's Linux/ASAN-confirmed read site
1272 // (v2.13 Track WUC).
1273 if !n.dead_key
1274 && matches!(
1275 n.key,
1276 Value::Table(_)
1277 | Value::Closure(_)
1278 | Value::Native(_)
1279 | Value::Coro(_)
1280 | Value::Userdata(_)
1281 | Value::Str(_)
1282 )
1283 {
1284 n.key = Value::Nil;
1285 n.dead_key = true;
1286 }
1287 continue;
1288 }
1289 let key_dead = wk && is_dead(n.key);
1290 let val_dead = wv && is_dead(n.val);
1291 if key_dead || val_dead {
1292 // entry removed. PUC `setdeadkey`: when the key was a
1293 // collectable, drop the Gc pointer so a later raw_eq cannot
1294 // spuriously match a new object that gets allocated at the
1295 // same freed address. Keep `next` so the chain back-links
1296 // through this node still reach downstream entries; the
1297 // `dead_key` flag tells `find_node` to skip the comparison
1298 // and `insert_new` to treat the slot as a free
1299 // main-position owner that may inherit the chain.
1300 n.val = Value::Nil;
1301 if matches!(
1302 n.key,
1303 Value::Table(_)
1304 | Value::Closure(_)
1305 | Value::Native(_)
1306 | Value::Coro(_)
1307 | Value::Userdata(_)
1308 | Value::Str(_)
1309 ) {
1310 n.key = Value::Nil;
1311 n.dead_key = true;
1312 }
1313 } else {
1314 // entry survives — resurrect any string reachable through it
1315 if wk {
1316 mark_string(n.key);
1317 }
1318 if wv {
1319 mark_string(n.val);
1320 }
1321 }
1322 }
1323 }
1324}
1325
1326// =====================================================================
1327// C3 — SoA + Robin Hood open-addressing hash part (Variant A).
1328//
1329// Parallel to the chain-walk path during Phase B+C+D transition: the
1330// chain `nodes` / `lastfree` is the authoritative read path until
1331// Phase E migrates `next()` and Phase 4 cuts over. These methods
1332// operate only on the `keys` / `vals` / `meta` / `tombstones` SoA
1333// arrays — chain state is never touched.
1334//
1335// Layout invariants the methods below maintain:
1336// - `keys.len() == vals.len() == meta.len()`, all power-of-two
1337// (or zero in the empty-stub state)
1338// - `meta[i] = meta_bits::EMPTY` iff slot i is free
1339// - tombstoned slots are scanned past by find but reused by insert
1340// - `tombstones` counts the meta slots with TOMBSTONE_BIT set
1341// - load factor (live + tombstone) / cap is kept ≤ 0.75 via
1342// `soa_grow_if_needed` (R-A1 mitigation — PSL bound 63)
1343// - rehash is REFUSED when `iter_depth > 0` (R-A3 mitigation —
1344// wired in Phase F; for Phase C the counter is always 0 so the
1345// refusal path is unreachable)
1346//
1347// Refs: `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1 (variant A),
1348// §6.2 Phase 1-3 (impl plan).
1349// =====================================================================
1350
1351/// C3 — initial SoA capacity when growing from empty. Power of two.
1352/// Picked at 4 so a 3-element table doesn't trigger an immediate
1353/// regrowth.
1354#[allow(dead_code)] // wired by Phase D/E/F integration
1355pub(crate) const SOA_INITIAL_CAP: usize = 4;
1356
1357/// C3 — high load-factor threshold (3/4). SoA grow trigger; matches
1358/// the RFC §5.1 recommendation. PSL_MAX is the u16 14-bit value so
1359/// long-tail PSL overruns are recoverable via grow-retry.
1360#[allow(dead_code)]
1361const SOA_LOAD_NUM: usize = 3;
1362#[allow(dead_code)]
1363const SOA_LOAD_DEN: usize = 4;
1364
1365/// C3 — tombstone density threshold (1/4). When tombstones/cap ≥ 25%
1366/// the next non-resize-triggering rehash compacts them.
1367#[allow(dead_code)]
1368const SOA_TOMB_NUM: usize = 1;
1369#[allow(dead_code)]
1370const SOA_TOMB_DEN: usize = 4;
1371
1372#[allow(dead_code)] // wired by Phase D/E/F integration into public set/get/next
1373impl Table {
1374 /// C3 — current SoA hash-part capacity in slots (0 = empty stub).
1375 #[inline]
1376 pub(crate) fn soa_cap(&self) -> usize {
1377 self.meta.len()
1378 }
1379
1380 /// C3 — count of live (occupied & not tombstone) SoA slots.
1381 /// O(n) — only used by the Phase G equivalence test path; the
1382 /// hot rehash trigger uses `live_estimate = cap*3/4 - tombstones`
1383 /// implicitly via `soa_grow_if_needed`.
1384 #[cfg(test)]
1385 pub(crate) fn soa_live_count(&self) -> usize {
1386 self.meta.iter().filter(|&&m| meta_bits::is_live(m)).count()
1387 }
1388
1389 /// C3 — count of occupied (live OR tombstoned) SoA slots; this is
1390 /// the value the load factor compares against `cap * 3/4`.
1391 #[inline]
1392 fn soa_occupied_count(&self) -> usize {
1393 // O(n) sweep — Phase C inserts the count on each call so the
1394 // worst case is bounded by per-insert amortised cost. A future
1395 // polish could maintain a counter incrementally; left as a
1396 // Phase H mini-bench-driven follow-up if PI sample shows it
1397 // contributes > 1 µs/cell.
1398 self.meta
1399 .iter()
1400 .filter(|&&m| meta_bits::is_occupied(m))
1401 .count()
1402 }
1403
1404 /// C3 — Robin Hood lookup. Returns the slot index of a *live*
1405 /// matching key, or None if absent. Walks past tombstones (they
1406 /// preserve probe chains). Returns None if the SoA cap is zero
1407 /// (Phase B stub state). Bound by `cap` probes; in practice
1408 /// expected ≤ 8 at load 0.75.
1409 pub(crate) fn soa_find_slot(&self, k: Value) -> Option<usize> {
1410 let cap = self.meta.len();
1411 if cap == 0 {
1412 return None;
1413 }
1414 let mask = cap - 1;
1415 let mut idx = (hash_key(k) as usize) & mask;
1416 // Walk until empty slot or wrap. The `steps <= cap` bound
1417 // is a safety net: a properly maintained Robin Hood table
1418 // with load < 1 always has at least one empty slot, so a
1419 // full wrap means table invariant violation.
1420 for _ in 0..cap {
1421 let m = self.meta[idx];
1422 if !meta_bits::is_occupied(m) {
1423 return None;
1424 }
1425 if !meta_bits::is_tombstone(m) && self.keys[idx].raw_eq(k) {
1426 return Some(idx);
1427 }
1428 idx = (idx + 1) & mask;
1429 }
1430 None
1431 }
1432
1433 /// C3 — Allocate fresh SoA arrays at `new_cap` (power of two) and
1434 /// re-insert every live entry from the old SoA arrays. Tombstones
1435 /// are dropped (count resets to 0). Used by `soa_grow_if_needed`
1436 /// (new_cap = max(SOA_INITIAL_CAP, 2*cap)) and by Phase D
1437 /// tombstone compaction (new_cap = cap).
1438 ///
1439 /// IMPORTANT: rehash MUST NOT fire while `iter_depth > 0`
1440 /// (R-A3) — wired in Phase F. Phase C callers all enter from
1441 /// non-iteration paths.
1442 fn soa_rehash_to(&mut self, heap: &mut Heap, new_cap: usize) -> Result<(), TableError> {
1443 debug_assert!(new_cap.is_power_of_two() && new_cap > 0);
1444 let before = self.internal_bytes();
1445 // Snapshot old live entries. This list is the canonical
1446 // "must be present after rehash" set; we restart from it on
1447 // any PSL-overflow retry.
1448 let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len());
1449 for i in 0..self.meta.len() {
1450 if meta_bits::is_live(self.meta[i]) {
1451 survivors.push((self.keys[i], self.vals[i]));
1452 }
1453 }
1454 // Install fresh empty arrays at `new_cap`. On PSL overflow
1455 // during the re-insert pass (extremely rare with the 14-bit
1456 // PSL budget — would need a pathological hash distribution),
1457 // double the cap and replay the original `survivors` list
1458 // from scratch. We don't try to salvage partial work — the
1459 // rare-path retry cost is bounded by O(n × max_doublings),
1460 // and max_doublings has a hard MAX_ASIZE ceiling.
1461 let mut cap = new_cap;
1462 loop {
1463 if cap > MAX_ASIZE {
1464 return Err(TableError::Overflow);
1465 }
1466 self.keys = vec![Value::Nil; cap].into_boxed_slice();
1467 self.vals = vec![Value::Nil; cap].into_boxed_slice();
1468 self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1469 self.tombstones = 0;
1470 let mut overflowed = false;
1471 for (k, v) in survivors.iter().copied() {
1472 if self.soa_place_known_absent(k, v).is_err() {
1473 overflowed = true;
1474 break;
1475 }
1476 }
1477 if !overflowed {
1478 break;
1479 }
1480 cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1481 }
1482 let after = self.internal_bytes();
1483 heap.apply_bytes_delta(before, after);
1484 Ok(())
1485 }
1486
1487 /// C3 — Raw rob-from-rich placement for a key known to be absent
1488 /// from the SoA arrays. Used by `soa_rehash_to` (re-insert pass)
1489 /// and by `soa_insert` (new-key path after the explicit
1490 /// soa_find_slot check). This routine does NOT auto-grow on a
1491 /// load-factor trigger (caller's responsibility), but DOES signal
1492 /// back to the caller via `Err(())` when the PSL bound of 63 is
1493 /// hit before finding an empty slot — Robin Hood's long-tail
1494 /// max-PSL exceeds the 6-bit storage budget at unfavourable hash
1495 /// distributions even under the nominal 0.75 load gate (RFC §6.3
1496 /// R-A1). The caller (`soa_insert`) handles by growing & retrying.
1497 ///
1498 /// On success returns the slot index where the new key landed
1499 /// (after any rob-from-rich shuffle, the original `k` value is at
1500 /// this returned index).
1501 fn soa_place_known_absent(&mut self, k: Value, v: Value) -> Result<usize, (Value, Value)> {
1502 let cap = self.meta.len();
1503 debug_assert!(cap > 0);
1504 let mask = cap - 1;
1505 let landing = (hash_key(k) as usize) & mask;
1506 let mut idx = landing;
1507 let mut cur_psl: u16 = 0;
1508 let mut cur_key = k;
1509 let mut cur_val = v;
1510 let mut placed_at: Option<usize> = None;
1511 for _ in 0..cap {
1512 let m = self.meta[idx];
1513 if !meta_bits::is_occupied(m) || meta_bits::is_tombstone(m) {
1514 if meta_bits::is_tombstone(m) {
1515 self.tombstones = self.tombstones.saturating_sub(1);
1516 }
1517 self.meta[idx] = meta_bits::pack(cur_psl, false);
1518 self.keys[idx] = cur_key;
1519 self.vals[idx] = cur_val;
1520 return Ok(placed_at.unwrap_or(idx));
1521 }
1522 let stored_psl = meta_bits::psl(m);
1523 if cur_psl > stored_psl {
1524 // Rob: swap cur into this slot, evict stored to continue.
1525 std::mem::swap(&mut cur_key, &mut self.keys[idx]);
1526 std::mem::swap(&mut cur_val, &mut self.vals[idx]);
1527 self.meta[idx] = meta_bits::pack(cur_psl, false);
1528 if placed_at.is_none() {
1529 placed_at = Some(idx);
1530 }
1531 cur_psl = stored_psl;
1532 }
1533 idx = (idx + 1) & mask;
1534 cur_psl = cur_psl.saturating_add(1);
1535 if cur_psl > meta_bits::PSL_MAX {
1536 // PSL exceeds the 14-bit storage budget — exceptionally
1537 // rare with 16384 max. Caller (soa_insert / rehash
1538 // outer loop) handles by growing & retrying. Partial
1539 // state: all entries are still in the table EXCEPT
1540 // `(cur_key, cur_val)` which is the latest homeless
1541 // evictee — return it so caller can re-issue.
1542 return Err((cur_key, cur_val));
1543 }
1544 }
1545 // Wrapped cap probes with no free slot — invariant violation
1546 // (load < 1 should guarantee at least one empty). Signal as
1547 // PSL-overflow equivalent so caller grows + retries.
1548 Err((cur_key, cur_val))
1549 }
1550
1551 /// C3 — Grow SoA capacity if the load factor is at or above the
1552 /// 0.75 trigger. Doubles cap; from empty grows to SOA_INITIAL_CAP.
1553 fn soa_grow_if_needed(&mut self, heap: &mut Heap) -> Result<(), TableError> {
1554 // Defer rehash when an iterator is in flight (R-A3). Wired
1555 // in Phase F; in Phase C iter_depth is always 0.
1556 if self.iter_depth > 0 {
1557 return Ok(());
1558 }
1559 let cap = self.meta.len();
1560 if cap == 0 {
1561 return self.soa_rehash_to(heap, SOA_INITIAL_CAP);
1562 }
1563 let occupied = self.soa_occupied_count();
1564 if occupied * SOA_LOAD_DEN >= cap * SOA_LOAD_NUM {
1565 let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1566 return self.soa_rehash_to(heap, new_cap);
1567 }
1568 // Tombstone compaction (same cap, drops tombstones).
1569 if self.tombstones as usize * SOA_TOMB_DEN >= cap * SOA_TOMB_NUM {
1570 return self.soa_rehash_to(heap, cap);
1571 }
1572 Ok(())
1573 }
1574
1575 /// C3 — Insert (or update) `(k, v)` in the SoA hash part. Routes
1576 /// through `soa_find_slot` first so an existing key updates its
1577 /// val in place; otherwise rob-from-rich places a new entry.
1578 /// Auto-rehashes if the load factor would exceed 0.75 OR if the
1579 /// place chain runs into a PSL overflow on a pathological hash
1580 /// distribution.
1581 ///
1582 /// Phase C: this method is callable from outside via the
1583 /// equivalence-test entrypoint (Phase G); not yet hooked into
1584 /// public `set` / `set_norm`.
1585 pub(crate) fn soa_insert(
1586 &mut self,
1587 heap: &mut Heap,
1588 k: Value,
1589 v: Value,
1590 ) -> Result<(), TableError> {
1591 debug_assert!(!matches!(k, Value::Nil));
1592 // 1. Update-in-place if key is already present (live slot).
1593 if let Some(idx) = self.soa_find_slot(k) {
1594 self.vals[idx] = v;
1595 return Ok(());
1596 }
1597 // 2. New key: ensure capacity, then place. On PSL-overflow
1598 // from the place chain (extremely rare with 14-bit PSL budget),
1599 // grow + rehash with the homeless evictee merged in.
1600 // `soa_rehash_with_extra` handles further retries internally,
1601 // bounded by MAX_ASIZE.
1602 self.soa_grow_if_needed(heap)?;
1603 match self.soa_place_known_absent(k, v) {
1604 Ok(_) => Ok(()),
1605 Err(homeless) => {
1606 let cap = self.meta.len();
1607 let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1608 self.soa_rehash_with_extra(heap, new_cap, homeless)
1609 }
1610 }
1611 }
1612
1613 /// C3 — Rehash to `new_cap` while merging in an extra (k, v) pair
1614 /// not currently in the SoA arrays. Used by `soa_insert` to
1615 /// recover from PSL overflow: the homeless evictee from the failed
1616 /// place chain gets appended to the survivor list before the
1617 /// re-insert pass.
1618 fn soa_rehash_with_extra(
1619 &mut self,
1620 heap: &mut Heap,
1621 new_cap: usize,
1622 extra: (Value, Value),
1623 ) -> Result<(), TableError> {
1624 let before = self.internal_bytes();
1625 let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len() + 1);
1626 for i in 0..self.meta.len() {
1627 if meta_bits::is_live(self.meta[i]) {
1628 survivors.push((self.keys[i], self.vals[i]));
1629 }
1630 }
1631 // Avoid duplicating the extra if its key was already placed at
1632 // some slot during the failed rob chain (the rob may have
1633 // landed the original input into a slot before overflowing on
1634 // a downstream evictee — that case the meta-walk above picks
1635 // it up).
1636 if !survivors.iter().any(|(k, _)| k.raw_eq(extra.0)) {
1637 survivors.push(extra);
1638 }
1639 let mut cap = new_cap;
1640 loop {
1641 if cap > MAX_ASIZE {
1642 return Err(TableError::Overflow);
1643 }
1644 self.keys = vec![Value::Nil; cap].into_boxed_slice();
1645 self.vals = vec![Value::Nil; cap].into_boxed_slice();
1646 self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1647 self.tombstones = 0;
1648 let mut overflowed = false;
1649 for (k, v) in survivors.iter().copied() {
1650 if self.soa_place_known_absent(k, v).is_err() {
1651 overflowed = true;
1652 break;
1653 }
1654 }
1655 if !overflowed {
1656 break;
1657 }
1658 cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1659 }
1660 let after = self.internal_bytes();
1661 heap.apply_bytes_delta(before, after);
1662 Ok(())
1663 }
1664
1665 /// C3 — Read SoA hash part. Mirrors `get_hash` but reads from
1666 /// keys/vals/meta rather than nodes. Used by the Phase G
1667 /// equivalence test; not yet hooked into public `get` / `get_hash`.
1668 pub(crate) fn soa_get(&self, k: Value) -> Value {
1669 match self.soa_find_slot(k) {
1670 Some(idx) => self.vals[idx],
1671 None => Value::Nil,
1672 }
1673 }
1674
1675 /// C3 — Tombstone deletion. Marks the live slot for `k` as
1676 /// tombstoned, preserving the slot index (no backward shift).
1677 /// Slot-index stability is the PUC `next()` iteration invariant
1678 /// — `nextvar.lua:520-521` requires that deleting prior keys
1679 /// during a `pairs` traversal does NOT move unvisited keys.
1680 /// Backward-shift deletion would violate this; tombstones are
1681 /// the standard Robin Hood resolution (see RFC §4.5 + §5.1).
1682 ///
1683 /// keys[idx] / vals[idx] are reset to Nil so the GC marker is
1684 /// not held to the previous entries — only the tombstone bit
1685 /// distinguishes "occupied tombstone" from "free empty".
1686 ///
1687 /// Returns true if the key was found and deleted, false if absent.
1688 ///
1689 /// Phase D: not yet hooked into public `set(k, Nil)` — wired in
1690 /// Phase E alongside the `next()` migration.
1691 pub(crate) fn soa_delete(&mut self, k: Value) -> bool {
1692 if let Some(idx) = self.soa_find_slot(k) {
1693 let psl = meta_bits::psl(self.meta[idx]);
1694 self.meta[idx] = meta_bits::pack(psl, true);
1695 self.keys[idx] = Value::Nil;
1696 self.vals[idx] = Value::Nil;
1697 self.tombstones = self.tombstones.saturating_add(1);
1698 true
1699 } else {
1700 false
1701 }
1702 }
1703}
1704
1705fn normalize_set_key(key: Value) -> Result<Value, TableError> {
1706 match key {
1707 Value::Nil => Err(TableError::NilIndex),
1708 Value::Float(f) => match f2i_exact(f) {
1709 Some(i) => Ok(Value::Int(i)),
1710 None if f.is_nan() => Err(TableError::NanIndex),
1711 None => Ok(key),
1712 },
1713 k => Ok(k),
1714 }
1715}
1716
1717fn hash_key(k: Value) -> u64 {
1718 match k {
1719 Value::Int(i) => i as u64, // identity mod size (PUC hashint)
1720 Value::Float(f) => mix64(f.to_bits()),
1721 Value::Bool(b) => b as u64 + 1,
1722 Value::Str(s) => s.hash() as u64,
1723 Value::Table(t) => mix64(t.as_ptr() as u64),
1724 Value::Closure(c) => mix64(c.as_ptr() as u64),
1725 Value::Native(n) => mix64(n.as_ptr() as u64),
1726 Value::Coro(co) => mix64(co.as_ptr() as u64),
1727 Value::Userdata(u) => mix64(u.as_ptr() as u64),
1728 Value::LightUserdata(p) => mix64(p as u64),
1729 Value::Nil => 0, // unreachable as a stored key
1730 }
1731}
1732
1733/// splitmix64 finalizer.
1734fn mix64(mut x: u64) -> u64 {
1735 x ^= x >> 30;
1736 x = x.wrapping_mul(0xbf58_476d_1ce4_e5b9);
1737 x ^= x >> 27;
1738 x = x.wrapping_mul(0x94d0_49bb_1331_11eb);
1739 x ^ (x >> 31)
1740}
1741
1742/// For k ≥ 1: the bucket l such that k ∈ (2^(l-1), 2^l].
1743fn ceil_log2(k: u64) -> usize {
1744 (u64::BITS - (k - 1).leading_zeros()) as usize
1745}
1746
1747impl Table {
1748 /// Preallocate the array part (table.create); existing contents are
1749 /// preserved.
1750 pub fn ensure_array(&mut self, heap: &mut Heap, n: usize) {
1751 if n > self.asize() {
1752 let hash_entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1753 self.resize(heap, n, hash_entries);
1754 }
1755 }
1756}
1757
1758impl Table {
1759 /// Preallocate hash-part capacity (table.create's second size).
1760 pub fn ensure_hash(&mut self, heap: &mut Heap, n: usize) {
1761 let entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1762 if n > self.nodes.len() {
1763 self.resize(heap, self.asize(), n.max(entries));
1764 }
1765 }
1766}
1767
1768#[cfg(test)]
1769mod tests {
1770 use super::*;
1771 use crate::runtime::heap::Heap;
1772
1773 fn with_table(f: impl FnOnce(&mut Heap, &mut Table)) {
1774 let mut heap = Heap::new();
1775 let t = heap.new_table();
1776 f(&mut heap, unsafe { t.as_mut() });
1777 }
1778
1779 fn assert_is_border(t: &Table, n: i64) {
1780 if n == 0 {
1781 assert!(t.get_int(1).is_nil(), "border 0 but t[1] non-nil");
1782 } else {
1783 assert!(!t.get_int(n).is_nil(), "border {n} but t[{n}] is nil");
1784 assert!(
1785 t.get_int(n + 1).is_nil(),
1786 "border {n} but t[{}] non-nil",
1787 n + 1
1788 );
1789 }
1790 }
1791
1792 /// v2.1 Phase 1I.B — pin `Box<[Node]>` fat-ptr layout at runtime.
1793 /// The luna-jit table-field IC reads `(ptr, len)` directly out of
1794 /// the `nodes` field assuming the data pointer occupies the low 8
1795 /// bytes and the length the high 8 bytes (de-facto Rust ABI on
1796 /// 64-bit targets but not formally guaranteed). If a future Rust
1797 /// release reorders the fat-ptr, this test fails before IC fires
1798 /// at runtime.
1799 #[test]
1800 #[allow(clippy::assertions_on_constants)]
1801 #[cfg(target_pointer_width = "64")]
1802 fn phase_1i_b_node_layout_pinned() {
1803 use jit_layout::*;
1804 assert_eq!(std::mem::size_of::<Box<[Node]>>(), 16);
1805 assert_eq!(NODE_KEY_OFFSET, 0);
1806 assert_eq!(NODE_VAL_OFFSET, 16);
1807 assert!(SIZEOF_NODE >= 32);
1808
1809 // Construct a real Box<[Node]> with a known length, then
1810 // peek at the fat-pointer's two halves to confirm the
1811 // (data_ptr, len) order. Use a 4-slot box so the length is
1812 // non-zero and the data pointer is heap-allocated.
1813 let b: Box<[Node]> = vec![Node::EMPTY; 4].into_boxed_slice();
1814 let raw_ptr = b.as_ptr();
1815 let raw_len = b.len();
1816 // SAFETY: reading the fat pointer's two words is exactly the
1817 // layout luna-jit's IR assumes; it's the safest possible test
1818 // of that assumption.
1819 let words: [usize; 2] = unsafe { std::mem::transmute_copy(&b) };
1820 assert_eq!(words[0], raw_ptr as usize, "fat-ptr low word = data ptr");
1821 assert_eq!(words[1], raw_len, "fat-ptr high word = len");
1822 drop(b);
1823 }
1824
1825 #[test]
1826 fn sequence_grows_into_array() {
1827 with_table(|heap, t| {
1828 for i in 1..=1000 {
1829 let _ = t.set_int(heap, i, Value::Int(i * 10));
1830 }
1831 for i in 1..=1000 {
1832 assert!(t.get_int(i).raw_eq(Value::Int(i * 10)));
1833 }
1834 assert_eq!(t.len(), 1000);
1835 });
1836 }
1837
1838 #[test]
1839 fn string_and_mixed_keys() {
1840 with_table(|heap, t| {
1841 let k1 = Value::Str(heap.intern(b"alpha"));
1842 let k2 = Value::Str(heap.intern(b"beta"));
1843 t.set(heap, k1, Value::Int(1)).unwrap();
1844 t.set(heap, k2, Value::Int(2)).unwrap();
1845 t.set(heap, Value::Bool(true), Value::Int(3)).unwrap();
1846 t.set(heap, Value::Int(-5), Value::Int(4)).unwrap();
1847 // re-interned key reaches the same slot
1848 let k1b = Value::Str(heap.intern(b"alpha"));
1849 assert!(t.get(k1b).raw_eq(Value::Int(1)));
1850 assert!(t.get(k2).raw_eq(Value::Int(2)));
1851 assert!(t.get(Value::Bool(true)).raw_eq(Value::Int(3)));
1852 assert!(t.get(Value::Int(-5)).raw_eq(Value::Int(4)));
1853 assert!(t.get(Value::Str(heap.intern(b"gamma"))).is_nil());
1854 });
1855 }
1856
1857 #[test]
1858 fn float_keys_normalize_to_int() {
1859 with_table(|heap, t| {
1860 t.set(heap, Value::Float(2.0), Value::Int(22)).unwrap();
1861 assert!(t.get(Value::Int(2)).raw_eq(Value::Int(22)));
1862 t.set(heap, Value::Int(3), Value::Int(33)).unwrap();
1863 assert!(t.get(Value::Float(3.0)).raw_eq(Value::Int(33)));
1864 // -0.0 is key 0
1865 t.set(heap, Value::Float(-0.0), Value::Int(0)).unwrap();
1866 assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1867 // non-integral floats are their own keys
1868 t.set(heap, Value::Float(0.5), Value::Int(55)).unwrap();
1869 assert!(t.get(Value::Float(0.5)).raw_eq(Value::Int(55)));
1870 assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1871 });
1872 }
1873
1874 #[test]
1875 fn bad_keys() {
1876 with_table(|heap, t| {
1877 assert_eq!(
1878 t.set(heap, Value::Nil, Value::Int(1)),
1879 Err(TableError::NilIndex)
1880 );
1881 assert_eq!(
1882 t.set(heap, Value::Float(f64::NAN), Value::Int(1)),
1883 Err(TableError::NanIndex)
1884 );
1885 // reads with bad keys are nil, not errors
1886 assert!(t.get(Value::Nil).is_nil());
1887 assert!(t.get(Value::Float(f64::NAN)).is_nil());
1888 });
1889 }
1890
1891 #[test]
1892 fn delete_and_reinsert() {
1893 with_table(|heap, t| {
1894 let k = Value::Str(heap.intern(b"k"));
1895 t.set(heap, k, Value::Int(1)).unwrap();
1896 t.set(heap, k, Value::Nil).unwrap();
1897 assert!(t.get(k).is_nil());
1898 t.set(heap, k, Value::Int(2)).unwrap();
1899 assert!(t.get(k).raw_eq(Value::Int(2)));
1900 // setting an absent key to nil stays absent
1901 let k2 = Value::Str(heap.intern(b"k2"));
1902 t.set(heap, k2, Value::Nil).unwrap();
1903 assert!(t.get(k2).is_nil());
1904 });
1905 }
1906
1907 #[test]
1908 fn borders_with_holes() {
1909 with_table(|heap, t| {
1910 let _ = t.set_int(heap, 1, Value::Int(1));
1911 let _ = t.set_int(heap, 2, Value::Int(2));
1912 assert_eq!(t.len(), 2);
1913 t.set_int(heap, 2, Value::Nil).unwrap();
1914 assert_is_border(t, t.len());
1915 // hash-resident tail
1916 let _ = t.set_int(heap, 1_000_000, Value::Int(1));
1917 assert_is_border(t, t.len());
1918 });
1919 }
1920
1921 #[test]
1922 fn len_on_empty_and_hash_only() {
1923 with_table(|heap, t| {
1924 assert_eq!(t.len(), 0);
1925 let xk = Value::Str(heap.intern(b"x"));
1926 t.set(heap, xk, Value::Int(1)).unwrap();
1927 assert_eq!(t.len(), 0);
1928 });
1929 }
1930
1931 #[test]
1932 fn next_iterates_everything_exactly_once() {
1933 with_table(|heap, t| {
1934 let mut expected = 0i64;
1935 for i in 1..=64 {
1936 let _ = t.set_int(heap, i, Value::Int(i));
1937 expected += i;
1938 }
1939 for i in 0..32 {
1940 let k = Value::Str(heap.intern(format!("s{i}").as_bytes()));
1941 t.set(heap, k, Value::Int(1000 + i)).unwrap();
1942 expected += 1000 + i;
1943 }
1944 t.set(heap, Value::Float(2.5), Value::Int(7)).unwrap();
1945 expected += 7;
1946
1947 let mut sum = 0i64;
1948 let mut count = 0;
1949 let mut key = Value::Nil;
1950 while let Some((k, v)) = t.next(key).unwrap() {
1951 let Value::Int(x) = v else {
1952 panic!("bad value")
1953 };
1954 sum += x;
1955 count += 1;
1956 key = k;
1957 }
1958 assert_eq!(count, 64 + 32 + 1);
1959 assert_eq!(sum, expected);
1960 });
1961 }
1962
1963 #[test]
1964 fn next_skips_nil_values_and_rejects_alien_keys() {
1965 with_table(|heap, t| {
1966 let _ = t.set_int(heap, 1, Value::Int(1));
1967 let _ = t.set_int(heap, 3, Value::Int(3));
1968 let k = Value::Str(heap.intern(b"gone"));
1969 t.set(heap, k, Value::Int(9)).unwrap();
1970 t.set(heap, k, Value::Nil).unwrap();
1971 let mut seen = Vec::new();
1972 let mut key = Value::Nil;
1973 while let Some((k, v)) = t.next(key).unwrap() {
1974 let Value::Int(x) = v else { panic!() };
1975 seen.push(x);
1976 key = k;
1977 }
1978 assert_eq!(seen, vec![1, 3]);
1979 // a key never inserted is invalid for next
1980 let alien = Value::Str(heap.intern(b"never"));
1981 assert!(matches!(t.next(alien), Err(TableError::InvalidNext)));
1982 // ...but a deleted (nil-valued) key is still a valid cursor
1983 assert!(t.next(k).is_ok());
1984 });
1985 }
1986
1987 #[test]
1988 fn collision_relocation_keeps_chains_intact() {
1989 with_table(|heap, t| {
1990 // dense negative ints all land in the hash part; with identity
1991 // hashing they exercise both chain cases heavily
1992 for i in 0..512 {
1993 let _ = t.set_int(heap, -i, Value::Int(i));
1994 }
1995 for i in 0..512 {
1996 assert!(t.get_int(-i).raw_eq(Value::Int(i)), "lost key {}", -i);
1997 }
1998 });
1999 }
2000
2001 // -----------------------------------------------------------------
2002 // C3 SoA Robin Hood equivalence tests (Phase G).
2003 //
2004 // Cross-check the new SoA + RH path against the existing chain-walk
2005 // path: replay the same insert/lookup sequence on a table via
2006 // `set` (chain) and another via `soa_insert` (SoA), then assert
2007 // `get == soa_get` for every key.
2008 //
2009 // Phase B+C scope: insert + read only. Tombstone delete equivalence
2010 // arrives with Phase D.
2011 // -----------------------------------------------------------------
2012
2013 fn replay_chain(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
2014 let t = heap.new_table();
2015 let tref = unsafe { t.as_mut() };
2016 for (k, v) in ops.iter().copied() {
2017 tref.set(heap, k, v).unwrap();
2018 }
2019 t.as_ptr()
2020 }
2021
2022 fn replay_soa(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
2023 let t = heap.new_table();
2024 let tref = unsafe { t.as_mut() };
2025 for (k, v) in ops.iter().copied() {
2026 tref.soa_insert(heap, k, v).unwrap();
2027 }
2028 t.as_ptr()
2029 }
2030
2031 #[test]
2032 fn c3_soa_equivalence_string_keys() {
2033 let mut heap = Heap::new();
2034 let mut ops = Vec::new();
2035 for i in 0..40 {
2036 let k = Value::Str(heap.intern(format!("key_{i:03}").as_bytes()));
2037 ops.push((k, Value::Int(i * 7)));
2038 }
2039 let chain = unsafe { &*replay_chain(&mut heap, &ops) };
2040 let soa = unsafe { &*replay_soa(&mut heap, &ops) };
2041 for (k, _) in &ops {
2042 let cv = chain.get(*k);
2043 let sv = soa.soa_get(*k);
2044 assert!(
2045 cv.raw_eq(sv),
2046 "SoA vs chain mismatch on key — chain={:?} soa={:?}",
2047 cv,
2048 sv,
2049 );
2050 }
2051 // Absent key returns nil from both paths.
2052 let absent = Value::Str(heap.intern(b"never"));
2053 assert!(chain.get(absent).is_nil());
2054 assert!(soa.soa_get(absent).is_nil());
2055 }
2056
2057 #[test]
2058 fn c3_soa_equivalence_negative_int_keys() {
2059 // Dense negative ints with identity hashing — same collision
2060 // profile as the existing `collision_relocation_keeps_chains_intact`
2061 // test, but verified through the SoA RH path. Triggers
2062 // rob-from-rich repeatedly.
2063 let mut heap = Heap::new();
2064 let mut ops = Vec::new();
2065 for i in 0..256 {
2066 let k = Value::Int(-i);
2067 ops.push((k, Value::Int(i)));
2068 }
2069 let chain = unsafe { &*replay_chain(&mut heap, &ops) };
2070 let soa = unsafe { &*replay_soa(&mut heap, &ops) };
2071 for (k, _) in &ops {
2072 let cv = chain.get(*k);
2073 let sv = soa.soa_get(*k);
2074 assert!(cv.raw_eq(sv), "SoA mismatch on key {:?}", k);
2075 }
2076 }
2077
2078 #[test]
2079 fn c3_soa_equivalence_mixed_keys_with_updates() {
2080 // Insert, then update the same keys with new values — exercises
2081 // the soa_find_slot in-place update branch.
2082 let mut heap = Heap::new();
2083 let kstr = Value::Str(heap.intern(b"x"));
2084 let kint = Value::Int(42);
2085 let kbool = Value::Bool(true);
2086 let ops: Vec<(Value, Value)> = vec![
2087 (kstr, Value::Int(1)),
2088 (kint, Value::Int(2)),
2089 (kbool, Value::Int(3)),
2090 (kstr, Value::Int(11)), // update
2091 (kint, Value::Int(22)), // update
2092 (kbool, Value::Int(33)), // update
2093 ];
2094 let chain = unsafe { &*replay_chain(&mut heap, &ops) };
2095 let soa = unsafe { &*replay_soa(&mut heap, &ops) };
2096 for k in [kstr, kint, kbool] {
2097 assert!(chain.get(k).raw_eq(soa.soa_get(k)));
2098 }
2099 }
2100
2101 #[test]
2102 fn c3_soa_equivalence_delete_then_read() {
2103 // Phase D: tombstone delete + read on both paths, verify
2104 // matching nil-for-deleted, original-val-for-live.
2105 let mut heap = Heap::new();
2106 let mut ops_insert = Vec::new();
2107 for i in 0..30 {
2108 let k = Value::Str(heap.intern(format!("d_key_{i:03}").as_bytes()));
2109 ops_insert.push((k, Value::Int(i * 11)));
2110 }
2111 let chain = unsafe { &mut *replay_chain(&mut heap, &ops_insert) };
2112 let soa = unsafe { &mut *replay_soa(&mut heap, &ops_insert) };
2113 // Delete every 3rd key.
2114 let mut deleted: Vec<Value> = Vec::new();
2115 for (i, (k, _)) in ops_insert.iter().enumerate() {
2116 if i % 3 == 0 {
2117 // chain: set to Nil is the chain-path's delete equivalent
2118 chain.set(&mut heap, *k, Value::Nil).unwrap();
2119 let was_present = soa.soa_delete(*k);
2120 assert!(was_present, "soa_delete miss on inserted key {:?}", k);
2121 deleted.push(*k);
2122 }
2123 }
2124 // Read each key: deleted → nil, non-deleted → original val.
2125 for (k, v) in &ops_insert {
2126 let cv = chain.get(*k);
2127 let sv = soa.soa_get(*k);
2128 assert!(
2129 cv.raw_eq(sv),
2130 "delete/read mismatch on key {:?} — chain={:?} soa={:?}",
2131 k,
2132 cv,
2133 sv,
2134 );
2135 if deleted.iter().any(|d| d.raw_eq(*k)) {
2136 assert!(cv.is_nil(), "deleted key {:?} chain non-nil", k);
2137 assert!(sv.is_nil(), "deleted key {:?} soa non-nil", k);
2138 } else {
2139 assert!(cv.raw_eq(*v), "live key {:?} chain val drift", k);
2140 }
2141 }
2142 // Deleting an absent key is a no-op (returns false) on SoA.
2143 let absent = Value::Str(heap.intern(b"never_d"));
2144 assert!(!soa.soa_delete(absent));
2145 }
2146
2147 #[test]
2148 fn c3_soa_delete_then_reinsert_uses_tombstone() {
2149 // After delete + reinsert, key is findable with new val. The
2150 // SoA path may reuse the tombstoned slot (preferred) or place
2151 // elsewhere — either is correct as long as soa_get returns
2152 // the new val.
2153 let mut heap = Heap::new();
2154 let t = heap.new_table();
2155 let tref = unsafe { t.as_mut() };
2156 let k = Value::Str(heap.intern(b"reinsert_target"));
2157 tref.soa_insert(&mut heap, k, Value::Int(100)).unwrap();
2158 assert!(tref.soa_get(k).raw_eq(Value::Int(100)));
2159 let pre_tombs = tref.tombstones;
2160 assert!(tref.soa_delete(k));
2161 assert!(tref.tombstones == pre_tombs + 1);
2162 assert!(tref.soa_get(k).is_nil());
2163 // Reinsert with new val.
2164 tref.soa_insert(&mut heap, k, Value::Int(200)).unwrap();
2165 assert!(tref.soa_get(k).raw_eq(Value::Int(200)));
2166 // Tombstone reused — count back to pre_tombs.
2167 assert_eq!(tref.tombstones, pre_tombs);
2168 }
2169
2170 #[test]
2171 fn c3_soa_grows_under_load_pressure() {
2172 // Stress test: insert enough entries to trigger multiple RH
2173 // rehashes (cap doubles at load 0.75). Confirms PSL overflow
2174 // never fires and all keys survive grow cycles.
2175 let mut heap = Heap::new();
2176 let t = heap.new_table();
2177 let tref = unsafe { t.as_mut() };
2178 for i in 0..1024 {
2179 let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2180 tref.soa_insert(&mut heap, k, Value::Int(i)).unwrap();
2181 }
2182 // Verify every key is findable.
2183 for i in 0..1024 {
2184 let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2185 let v = tref.soa_get(k);
2186 assert!(
2187 v.raw_eq(Value::Int(i)),
2188 "SoA lost key entry_{:05} — got {:?}",
2189 i,
2190 v,
2191 );
2192 }
2193 assert!(tref.soa_live_count() == 1024);
2194 // Cap should have grown past the initial SOA_INITIAL_CAP via
2195 // the 0.75 load-factor trigger.
2196 assert!(
2197 tref.soa_cap() >= 2048,
2198 "SoA cap = {} after 1024 inserts — load gate didn't grow",
2199 tref.soa_cap(),
2200 );
2201 }
2202
2203 #[test]
2204 fn rehash_redistributes_into_array() {
2205 with_table(|heap, t| {
2206 // insert 1..n in reverse: starts in hash, rehash must migrate
2207 for i in (1..=256).rev() {
2208 let _ = t.set_int(heap, i, Value::Int(i));
2209 }
2210 assert_eq!(t.len(), 256);
2211 for i in 1..=256 {
2212 assert!(t.get_int(i).raw_eq(Value::Int(i)));
2213 }
2214 });
2215 }
2216}