Skip to main content

luna_core/runtime/
table.rs

1//! Lua table: hybrid array + hash.
2//!
3//! Array part uses split tag/payload storage (9 bytes/slot — the Lua 5.5
4//! "compact arrays" layout, bench-validated in benches/value_repr.rs).
5//! Hash part is the PUC node layout: main-position chaining with relocation
6//! (Brent's variation), capacity a power of two, rehash sizing per
7//! luaH_rehash/computesizes.
8
9use crate::runtime::heap::{Gc, GcHeader, Heap, Marker};
10use crate::runtime::value::{RawVal, Value, f2i_exact, raw};
11
12/// Errors that table mutation can raise back to the interpreter.
13#[derive(Clone, Copy, PartialEq, Eq, Debug)]
14pub enum TableError {
15    /// `t[nil] = …` — `nil` is forbidden as a key.
16    NilIndex,
17    /// `t[0/0] = …` — NaN floats are forbidden as keys.
18    NanIndex,
19    /// `next` called with a key not present in the table.
20    InvalidNext,
21    /// PUC `luaH_resizearray` — the array part would have to grow past
22    /// `MAXASIZE`, or the hash part past `MAXHBITS`. Raised back as
23    /// "table overflow" so a runaway `a[i] = i` loop walls within budget
24    /// (5.5/5.4 heavy.lua's `toomanyidx` pcalls exactly this scenario).
25    Overflow,
26}
27
28/// PUC `MAXASIZE` analogue: the highest power of two an array part may
29/// grow to. Choose a cap that comfortably fits in the gate's 60-second
30/// budget (each grow is O(n), so 2^27 entries × 16 bytes ≈ 2 GB is the
31/// effective ceiling). Beyond this `rehash` returns `TableError::Overflow`.
32pub(crate) const MAX_ASIZE: usize = 1 << 27;
33
34/// v2.1 Phase 1I.B — JIT layout constants for table-field IC.
35///
36/// luna-jit's trace lowerer needs to emit direct loads against
37/// `Table.nodes` (the hash part) without paying the helper-call ABI
38/// for each `Op::GetField` / `Op::SetField`. These constants expose
39/// the field offsets so the cranelift IR can be parameterised at
40/// compile time. The `Node` struct itself remains `pub(crate)` — only
41/// the offsets cross the crate boundary.
42///
43/// Layout assumptions:
44/// - `Box<[Node]>` is a fat pointer `(data_ptr, len)` on 64-bit
45///   targets (16 bytes total). The data pointer occupies the low 8
46///   bytes, length the high 8. This is the de-facto Rust ABI for
47///   `Box<[T]>` / `&[T]` but isn't formally guaranteed; the unit
48///   test `phase_1i_b_node_layout_pinned` and the const assertion
49///   on `size_of::<Box<[Node]>>()` catch drift.
50/// - `Value` is `#[repr(C, u8)]` so the discriminant byte sits at
51///   offset 0 and the payload starts at offset 8 (after 7 bytes of
52///   alignment padding). Total size 16 bytes per the existing
53///   `value_is_16_bytes` test in `runtime/value.rs`.
54/// - `Node` is `#[derive(Clone, Copy)]` with field order
55///   `(key: Value, val: Value, next: i32, dead_key: bool)`, so
56///   `key` lives at offset 0 and `val` at offset 16. The trailing
57///   `next + dead_key` fields are not read by the IC.
58pub mod jit_layout {
59    use super::Node;
60    use crate::runtime::Table;
61
62    /// Byte offset of the `nodes: Box<[Node]>` field within `Table`.
63    /// The fat-ptr low word (data ptr) lives at this offset; the
64    /// high word (length) at `TABLE_NODES_OFFSET + 8`. luna-jit
65    /// adds `TABLE_NODES_PTR_OFFSET` / `TABLE_NODES_LEN_OFFSET`
66    /// constants in `jit_backend/mod.rs` to express that split.
67    pub const TABLE_NODES_OFFSET: usize = std::mem::offset_of!(Table, nodes);
68
69    /// Byte offset of `key: Value` within `Node` (= 0).
70    pub const NODE_KEY_OFFSET: usize = std::mem::offset_of!(Node, key);
71
72    /// Byte offset of `val: Value` within `Node` (= 16 — `key` is 16-byte
73    /// `Value`, no inner padding).
74    pub const NODE_VAL_OFFSET: usize = std::mem::offset_of!(Node, val);
75
76    /// Total `Node` size in bytes (= 40 on 64-bit). Used as the stride
77    /// in `node_addr = nodes_ptr + slot_idx * SIZEOF_NODE`.
78    pub const SIZEOF_NODE: usize = std::mem::size_of::<Node>();
79
80    /// Static guard: pin the assumptions luna-jit relies on at compile
81    /// time. Layout drift here breaks IR emit, so trap it at compile
82    /// time rather than at trace-fire time.
83    ///
84    /// `Box<[T]>` is a fat pointer of `2 * usize` — 16 bytes on 64-bit
85    /// targets, 8 bytes on 32-bit (e.g. `wasm32`). Use a width-aware
86    /// expected size so the wasm32-unknown-unknown CI build does not
87    /// trip the assertion. The runtime layout still matters for luna-jit
88    /// IR emit on 64-bit hosts (the only platforms where Cranelift JIT
89    /// runs); the 32-bit branch documents the size in passing.
90    const _: () = {
91        assert!(std::mem::size_of::<Box<[Node]>>() == 2 * std::mem::size_of::<usize>());
92        assert!(NODE_KEY_OFFSET == 0);
93        assert!(NODE_VAL_OFFSET == 16);
94        assert!(SIZEOF_NODE >= 32);
95    };
96}
97
98#[derive(Clone, Copy)]
99pub(crate) struct Node {
100    key: Value,
101    val: Value,
102    /// absolute index of the next node in this chain, or NONE
103    next: i32,
104    /// PUC `setdeadkey` analogue: the key was a collectable that got swept
105    /// out of a weak table. The Gc pointer in `key` is now dangling — its
106    /// memory may have been reused for a new allocation with potentially
107    /// equal content. Marking the node "dead-key" lets `find_node` skip the
108    /// raw_eq probe (which could spuriously match a reallocated object) and
109    /// `insert_new` treat the slot as available for a fresh main-position
110    /// owner while leaving chain back-links intact for traversal.
111    dead_key: bool,
112}
113
114const NONE: i32 = -1;
115
116impl Node {
117    const EMPTY: Node = Node {
118        key: Value::Nil,
119        val: Value::Nil,
120        next: NONE,
121        dead_key: false,
122    };
123}
124
125/// C3 — SoA Robin Hood meta-word layout (Variant A, see
126/// `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1).
127///
128/// Each `meta[idx]` slot encodes the open-addressing slot state in a
129/// single u16:
130/// - bit 15 (`OCCUPIED_BIT`): 0 = empty, 1 = occupied
131/// - bit 14 (`TOMBSTONE_BIT`): 0 = live, 1 = tombstoned-occupied
132/// - bits 13..0 (`PSL_MASK`): probe-sequence length (0..16383)
133///
134/// The 14-bit PSL field is **far** beyond any realistic Robin Hood
135/// max-PSL at load ≤ 0.75 (expected max ~20 on 1024 slots; even the
136/// long-tail outliers seen empirically with luna's existing hash
137/// distributions stay under 200). 2 bytes/slot is still 20× smaller
138/// than the 40-byte Node — the SoA bandwidth gain (§3.5 of the RFC)
139/// is preserved.
140///
141/// Earlier draft used 1 byte with a 6-bit PSL cap of 63; bench under
142/// load 0.676 on cap=1024 produced a long-tail PSL of 64+ with the
143/// LuaStr+mix64 hash distribution, forcing the widening.
144///
145/// Tombstones do NOT free the slot for `find` (probe continues past), but
146/// DO free it for `insert` (write the new entry, clear the tomb bit). They
147/// accumulate; the rehash path compacts them periodically.
148#[allow(dead_code)]
149pub(crate) mod meta_bits {
150    pub const OCCUPIED_BIT: u16 = 0b1000_0000_0000_0000;
151    pub const TOMBSTONE_BIT: u16 = 0b0100_0000_0000_0000;
152    pub const PSL_MASK: u16 = 0b0011_1111_1111_1111;
153    pub const PSL_MAX: u16 = PSL_MASK;
154    /// Empty slot — bit 15 = 0, all others 0.
155    pub const EMPTY: u16 = 0;
156
157    #[inline(always)]
158    pub fn is_occupied(m: u16) -> bool {
159        (m & OCCUPIED_BIT) != 0
160    }
161    #[inline(always)]
162    pub fn is_tombstone(m: u16) -> bool {
163        (m & TOMBSTONE_BIT) != 0
164    }
165    /// Live = occupied AND not tombstoned. `next()` iteration cursor returns
166    /// these. `find_slot_rh` short-circuits on a live match.
167    #[inline(always)]
168    pub fn is_live(m: u16) -> bool {
169        (m & (OCCUPIED_BIT | TOMBSTONE_BIT)) == OCCUPIED_BIT
170    }
171    #[inline(always)]
172    pub fn psl(m: u16) -> u16 {
173        m & PSL_MASK
174    }
175    #[inline(always)]
176    pub fn pack(psl: u16, tomb: bool) -> u16 {
177        debug_assert!(psl <= PSL_MAX);
178        let mut m = OCCUPIED_BIT | (psl & PSL_MASK);
179        if tomb {
180            m |= TOMBSTONE_BIT;
181        }
182        m
183    }
184}
185
186/// P11-S5d.I — inline storage threshold. Tables whose array part has
187/// `asize <= INLINE_ASIZE` keep their atags+avals inside the Table
188/// struct itself (`inline_storage`), skipping the slab Box entirely
189/// — binary_trees's `{nil, nil}` and `{...}` 2-element leaves live
190/// here, sparing one allocator round-trip per NewTable.
191pub(crate) const INLINE_ASIZE: u64 = 2;
192/// `INLINE_ASIZE` u64 slots for avals + `ceil(INLINE_ASIZE / 8)` u64
193/// slots covering the atags bytes (with trailing pad). For
194/// `INLINE_ASIZE = 2`: 2 avals + 1 atags = 3 u64s = 24 bytes.
195pub(crate) const INLINE_U64S: usize = INLINE_ASIZE as usize + INLINE_ASIZE.div_ceil(8) as usize;
196
197/// Lua table — hybrid array + hash storage, with optional metatable and
198/// weak-mode flags.
199#[repr(C)]
200pub struct Table {
201    /// read through raw casts by the GC, not by field access
202    #[allow(dead_code)]
203    pub(crate) hdr: GcHeader,
204    /// P11-S5d.I — single backing pointer for the array part. Points
205    /// to `inline_storage` (asize <= INLINE_ASIZE) or `slab.as_ptr()`
206    /// (asize > INLINE_ASIZE). The JIT inline aset reads this with one
207    /// `load i64`, no branch — the choice between inline and slab is
208    /// already encoded in the pointer. Initialised in `Heap::new_table`
209    /// AFTER the Table reaches its final heap address (so that
210    /// `&mut self.inline_storage` is the stable heap pointer, not a
211    /// stack-local one). Updated by `Table::resize`.
212    pub array_ptr: *mut u8,
213    /// P11-S5d.H — external backing for the array part when
214    /// `asize > INLINE_ASIZE`. Layout: `[avals: asize × 8 bytes][atags:
215    /// asize bytes]`. Empty box (dangling, no alloc) when the inline
216    /// path is in use.
217    pub(crate) slab: Box<[u64]>,
218    /// Length of the array part in slots. u64 (rather than `usize` or
219    /// `u32`) so the JIT can load it with a single `load i64`.
220    pub asize: u64,
221    /// P11-S5d.I — inline backing used when `asize <= INLINE_ASIZE`.
222    /// Same layout as the slab: avals at low addresses (`asize * 8`
223    /// bytes from offset 0), atags at the trailing `asize` bytes.
224    ///
225    /// `UnsafeCell` because `array_ptr` is a SELF-REFERENTIAL cached
226    /// pointer into this field. Under Stacked Borrows, every
227    /// `&mut self` method call's function-entry retag re-tags the
228    /// whole `*self` byte range Unique and would pop the cached
229    /// pointer's tag — subsequent `array_ptr` accesses were UB (Miri:
230    /// "retag ... tag does not exist in the borrow stack", v2.13).
231    /// An `UnsafeCell` region instead receives SharedReadWrite on
232    /// retag, which coexists with the pointer derived from
233    /// `UnsafeCell::get`. All reads/writes of the inline bytes MUST
234    /// go through `array_ptr` / `.get()` — never through a direct
235    /// `&`/`&mut` borrow of the array contents.
236    pub(crate) inline_storage: std::cell::UnsafeCell<[u64; INLINE_U64S]>,
237    /// hash part: power-of-two length (or empty)
238    /// hash part: power-of-two length (or empty)
239    /// `pub(crate)` so `Heap::free_obj` (pool recycle path) can reset.
240    pub(crate) nodes: Box<[Node]>,
241    /// free-slot search position, counts down (PUC lastfree).
242    /// `pub(crate)` so `Heap::new_table` can reset on pool recycle.
243    pub(crate) lastfree: u32,
244    /// C3 — SoA Robin Hood hash part (Variant A, parallel to `nodes`
245    /// during Phase B+C transition). After Phase 4 cutover these
246    /// replace `nodes` entirely. Each of `keys` / `vals` / `meta`
247    /// is the same length as `nodes` and is sized in lockstep by
248    /// `resize`. `meta` byte layout per the `meta_bits` module above.
249    /// Empty `Box::new([])` until Phase D cuts over.
250    pub(crate) keys: Box<[Value]>,
251    pub(crate) vals: Box<[Value]>,
252    pub(crate) meta: Box<[u16]>,
253    /// Count of tombstoned-occupied meta slots; rehash trigger.
254    pub(crate) tombstones: u32,
255    /// C3 — iterator-guard counter (R-A3 mitigation). Incremented on
256    /// each `pairs`/`next` entry, decremented on exit. While > 0,
257    /// the SoA path MUST defer rehash (which would rebase slot
258    /// indices and break PUC `nextvar.lua:520-521` invariant).
259    /// Phase F wires this; Phase B initialises to 0.
260    pub(crate) iter_depth: u32,
261    /// P11-S5d.K — visibility lifted to `pub(crate)` so the JIT can
262    /// take its field offset at compile time and emit an inline
263    /// "metatable.is_none()" guard before the inline aget fast path.
264    /// `Option<Gc<Table>>` is 8 bytes via the NonNull-pointer-opt: 0
265    /// ⇔ None, non-zero ⇔ Some.
266    pub metatable: Option<Gc<Table>>,
267    /// reserved for an absent-metamethod cache (PUC `flags`); currently
268    /// unread — luna's mm lookup walks `metatable.get` each time
269    #[allow(dead_code)]
270    pub(crate) flags: u8,
271}
272
273// SAFETY: `array_ptr` looks like an unprotected raw pointer field, but
274// it always refers to memory the same Table owns (either its own inline
275// storage or its `slab` Box). The Table is heap-allocated and never
276// moved post-adoption, so the pointer stays valid for the table's
277// lifetime. No thread-unsafety concern: tables are accessed only
278// through the Vm, single-threaded.
279unsafe impl Send for Table {}
280unsafe impl Sync for Table {}
281
282impl Table {
283    pub(crate) fn new(hdr: GcHeader) -> Table {
284        Table {
285            hdr,
286            // P11-S5d.I — `array_ptr` is fixed up in
287            // `Heap::new_table` after the Table reaches its final heap
288            // address (so that `&inline_storage` is the heap address,
289            // not a stack-local one). Null sentinel here so a
290            // bug-detection invariant flags any pre-fixup read.
291            array_ptr: std::ptr::null_mut(),
292            slab: Box::new([]),
293            asize: 0,
294            inline_storage: std::cell::UnsafeCell::new([0; INLINE_U64S]),
295            nodes: Box::new([]),
296            lastfree: 0,
297            keys: Box::new([]),
298            vals: Box::new([]),
299            meta: Box::new([]),
300            tombstones: 0,
301            iter_depth: 0,
302            metatable: None,
303            flags: 0,
304        }
305    }
306
307    /// P11-S5d.I — set `array_ptr` to the inline storage's stable heap
308    /// address. Called by `Heap::new_table` once the Table is at its
309    /// final location.
310    #[inline]
311    pub(crate) fn init_array_ptr(&mut self) {
312        self.array_ptr = self.inline_storage.get() as *mut u8;
313    }
314
315    /// Freshly-derived base pointer for the array part. Rust-side
316    /// accessors MUST use this instead of the cached `array_ptr`
317    /// field when the backing is inline: a pointer into `*self`
318    /// cached across `&mut self` boundaries is invalidated by every
319    /// function-entry retag under Stacked Borrows — `&mut` retags
320    /// ignore `UnsafeCell` (only `&` retags respect it), so the
321    /// cached tag dies on the next method call (v2.13 Miri finding,
322    /// `retag ... tag does not exist in the borrow stack`). Deriving
323    /// through `UnsafeCell::get()` at each use gives a fresh
324    /// SharedReadWrite tag valid for reads AND writes even from
325    /// `&self`. The slab case keeps the cached pointer: its tag
326    /// lives on the heap allocation, outside `*self`, untouched by
327    /// entry retags. `array_ptr` itself stays maintained for the
328    /// JIT, whose emitted code loads the field directly (no Rust
329    /// borrows involved).
330    #[inline(always)]
331    fn array_base(&self) -> *mut u8 {
332        if self.asize <= INLINE_ASIZE {
333            self.inline_storage.get() as *mut u8
334        } else {
335            self.array_ptr
336        }
337    }
338
339    /// P11-S5d.H/I — read view onto the array-part tag bytes. Trails
340    /// the avals portion in the active backing (inline or slab).
341    #[inline(always)]
342    pub(crate) fn atags(&self) -> &[u8] {
343        let n = self.asize as usize;
344        if n == 0 {
345            return &[];
346        }
347        // SAFETY: `array_ptr` always points to a buffer with `n`
348        // RawVal slots followed by `n` u8 tag bytes (either
349        // `inline_storage` of `INLINE_U64S` u64s, or a `slab` of
350        // `asize + ceil(asize/8)` u64s). The tag bytes start at byte
351        // offset `n * 8` from the buffer base.
352        unsafe {
353            let ptr = self.array_base().add(n * 8);
354            std::slice::from_raw_parts(ptr, n)
355        }
356    }
357
358    #[inline(always)]
359    pub(crate) fn atags_mut(&mut self) -> &mut [u8] {
360        let n = self.asize as usize;
361        if n == 0 {
362            return &mut [];
363        }
364        // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
365        unsafe {
366            let ptr = self.array_base().add(n * 8);
367            std::slice::from_raw_parts_mut(ptr, n)
368        }
369    }
370
371    /// P11-S5d.H/I — read view onto the array-part payload slots. Sits
372    /// at the start of the active backing (u64-aligned, identical size
373    /// and layout to `RawVal`).
374    #[inline(always)]
375    pub(crate) fn avals(&self) -> &[RawVal] {
376        let n = self.asize as usize;
377        if n == 0 {
378            return &[];
379        }
380        // SAFETY: inline_storage / slab both store u64s, so the cast
381        // to `*const RawVal` is alignment-safe (RawVal size = 8,
382        // align = 8). The buffer holds at least `n` such slots.
383        unsafe { std::slice::from_raw_parts(self.array_base() as *const RawVal, n) }
384    }
385
386    #[inline(always)]
387    pub(crate) fn avals_mut(&mut self) -> &mut [RawVal] {
388        let n = self.asize as usize;
389        if n == 0 {
390            return &mut [];
391        }
392        // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
393        unsafe { std::slice::from_raw_parts_mut(self.array_base() as *mut RawVal, n) }
394    }
395
396    /// Allocate a fresh external `[avals: asize × 8 bytes][atags: asize
397    /// bytes]` slab. Only used when `asize > INLINE_ASIZE`. The buffer
398    /// is u64-aligned via `Box<[u64]>` and zeroed (avals = `RawVal::
399    /// NIL` aka `0`; atags = `raw::NIL` aka `0`).
400    fn alloc_slab(asize: usize) -> Box<[u64]> {
401        if asize == 0 {
402            return Box::new([]);
403        }
404        let avals_u64s = asize;
405        let atags_u64s = asize.div_ceil(8);
406        let total = avals_u64s + atags_u64s;
407        vec![0u64; total].into_boxed_slice()
408    }
409
410    /// This table's metatable, if any.
411    pub fn metatable(&self) -> Option<Gc<Table>> {
412        self.metatable
413    }
414
415    /// Install (or clear) this table's metatable. Does not perform any
416    /// `__metatable` guarding; that belongs in the Vm-level `setmetatable`.
417    pub fn set_metatable(&mut self, mt: Option<Gc<Table>>) {
418        self.metatable = mt;
419    }
420
421    /// Bytes occupied by the table's *external* internal allocations
422    /// (slab and nodes). Cheap O(1) read — Box len × element size, no
423    /// allocator query. `Heap::free_obj` subtracts this on the way out
424    /// so the credit applied via `set`/`rehash`/`ensure_*` is symmetric.
425    ///
426    /// P11-S5d.I — inline storage doesn't count toward this (it's part
427    /// of the Table struct itself, accounted for by `size_of::<Table>()`
428    /// at adoption time). When the array part lives inline, the slab
429    /// is empty and contributes nothing here.
430    pub(crate) fn internal_bytes(&self) -> usize {
431        let n = self.asize as usize;
432        let array_external = if n > INLINE_ASIZE as usize {
433            n + n * std::mem::size_of::<RawVal>()
434        } else {
435            0
436        };
437        let soa_external = self.keys.len() * std::mem::size_of::<Value>()
438            + self.vals.len() * std::mem::size_of::<Value>()
439            + self.meta.len() * std::mem::size_of::<u16>();
440        array_external + self.nodes.len() * std::mem::size_of::<Node>() + soa_external
441    }
442
443    fn asize(&self) -> usize {
444        self.asize as usize
445    }
446
447    fn aget(&self, idx: usize) -> Value {
448        // SAFETY: callers gate on `idx < self.asize()` before reaching here
449        // (`get_int`, `iter_array`, etc.). atags and avals are sized
450        // identically by `rehash`, so a bound check passed against atags
451        // covers avals too.
452        unsafe {
453            Value::pack(
454                *self.atags().get_unchecked(idx),
455                *self.avals().get_unchecked(idx),
456            )
457        }
458    }
459
460    fn aset(&mut self, idx: usize, v: Value) {
461        let (t, b) = v.unpack();
462        // SAFETY: see `aget`. callers (`set_norm`, `set_int`) gate on
463        // `idx < self.asize()`. The two `*_mut` calls each take a
464        // distinct `&mut self` borrow whose lifetime ends at the
465        // statement boundary, so they don't overlap.
466        unsafe {
467            *self.atags_mut().get_unchecked_mut(idx) = t;
468            *self.avals_mut().get_unchecked_mut(idx) = b;
469        }
470    }
471
472    // ---- reads ----
473
474    /// Raw lookup (no `__index` metamethod). Returns `Value::Nil` when
475    /// the key is absent. `Value::Nil` and NaN floats return `nil` directly.
476    pub fn get(&self, key: Value) -> Value {
477        match key {
478            Value::Int(i) => self.get_int(i),
479            Value::Float(f) => match f2i_exact(f) {
480                Some(i) => self.get_int(i),
481                None => {
482                    if f.is_nan() {
483                        Value::Nil
484                    } else {
485                        self.get_hash(key)
486                    }
487                }
488            },
489            Value::Nil => Value::Nil,
490            k => self.get_hash(k),
491        }
492    }
493
494    /// Integer-keyed variant of [`Self::get`].
495    pub fn get_int(&self, i: i64) -> Value {
496        if i >= 1 && (i as u64) <= self.asize() as u64 {
497            return self.aget(i as usize - 1);
498        }
499        self.get_hash(Value::Int(i))
500    }
501
502    /// String-keyed variant of [`Self::get`] for v1.2 D4 A1 GetField fast
503    /// path: the GetField interp arm always has a `Gc<LuaStr>` key from
504    /// `Proto.consts`. Skips the outer `Value` match (which would only
505    /// take the `_ => self.get_hash(k)` arm anyway) so the dispatcher
506    /// pays one less branch per call. ~5 GetField/iter × 1000 iters/cell
507    /// on the Redis-Lua-shape workload — every shaved nanosecond shows
508    /// up at the bench level. Counter-validated via
509    /// `examples/diag_opcode_breakdown.rs`.
510    #[inline]
511    pub fn get_str(&self, key: crate::runtime::Gc<crate::runtime::string::LuaStr>) -> Value {
512        self.get_hash(Value::Str(key))
513    }
514
515    fn get_hash(&self, k: Value) -> Value {
516        match self.find_node(k) {
517            Some(idx) => self.nodes[idx].val,
518            None => Value::Nil,
519        }
520    }
521
522    /// v2.1 Phase 1I.B — same logic as [`find_node`] but exposed
523    /// to luna-core's recorder so it can capture the slot index for
524    /// the table-field IC snapshot. luna-jit reads neither the
525    /// `nodes` field nor `Node` directly; only the slot index
526    /// crosses the crate boundary (baked into the IR as a `iconst`).
527    #[allow(dead_code)]
528    pub(crate) fn find_node_idx(&self, k: Value) -> Option<usize> {
529        self.find_node(k)
530    }
531
532    /// v2.1 Phase 1I.B — accessor for the recorder's
533    /// `FieldIcSnapshot` capture: read the slot's value's tag byte
534    /// for the cached_val_tag field. The recorder needs this to
535    /// match the runtime guard the IC emits. Returns None when
536    /// `idx >= nodes.len()`.
537    #[allow(dead_code)]
538    pub(crate) fn node_val_at(&self, idx: usize) -> Option<Value> {
539        self.nodes.get(idx).map(|n| n.val)
540    }
541
542    /// v2.1 Phase 1I.B — accessor for `nodes.len()` so the recorder
543    /// can capture the shape-guard's `nodes_len` field without
544    /// reaching into the private `nodes` member.
545    #[allow(dead_code)]
546    pub(crate) fn nodes_capacity(&self) -> usize {
547        self.nodes.len()
548    }
549
550    /// Walk the chain rooted at the key's main position.
551    fn find_node(&self, k: Value) -> Option<usize> {
552        // v2.13 WUC read-time probe (gc-verify): both the query key and
553        // every node key compared below must be live. This is the
554        // convergence point of ALL hash lookups, so a dangling string
555        // is named at its dereference site with role attribution.
556        #[cfg(feature = "gc-verify")]
557        {
558            let hdr = |v: Value| -> Option<usize> {
559                match v {
560                    Value::Str(s) => Some(s.as_ptr() as usize),
561                    Value::Table(t) => Some(t.as_ptr() as usize),
562                    _ => None,
563                }
564            };
565            if let Some(p) = hdr(k) {
566                if crate::runtime::gc_verify_probe::is_freed(p) {
567                    panic!("[gc-verify] find_node QUERY key {p:#x} is freed (dangling)");
568                }
569            }
570            for (i, n) in self.nodes.iter().enumerate() {
571                // NOTE: tombstones (val nil, key kept) are NOT skipped —
572                // the walk below raw_eq's their keys too.
573                if n.dead_key {
574                    continue;
575                }
576                if let Some(p) = hdr(n.key) {
577                    if crate::runtime::gc_verify_probe::is_freed(p) {
578                        panic!(
579                            "[gc-verify] find_node NODE key {p:#x} (slot {i}, \
580                             tombstone {}, table {:#x}) is freed (dangling)",
581                            n.val.is_nil(),
582                            self as *const Table as usize
583                        );
584                    }
585                }
586            }
587        }
588        if self.nodes.is_empty() {
589            return None;
590        }
591        let mut idx = self.main_position(k);
592        loop {
593            let n = &self.nodes[idx];
594            // Dead-key slots carry a dangling Gc pointer whose memory may
595            // have been reallocated to a different live object; raw_eq on
596            // such a key can spuriously match the freshly-reused address.
597            // Skip the comparison and only follow `next` (PUC `setdeadkey`
598            // / `equalkey` short-circuit). 5.5 gc.lua :459-:478 was 12%
599            // flaky on this exact path — a swept B-string's slot kept
600            // chaining into A's slot, so `a[k] = nil` (k = A_string) hit
601            // the dead slot and wrote nil there, leaving A's val untouched.
602            if !n.dead_key && n.key.raw_eq(k) {
603                return Some(idx);
604            }
605            if n.next == NONE {
606                return None;
607            }
608            idx = n.next as usize;
609        }
610    }
611
612    // ---- writes ----
613
614    /// Insert / update `(key, val)`. `heap` is used to credit any internal
615    /// Box growth (rehash) to `heap.bytes` so the counter stays in sync with
616    /// real memory; `free_obj` subtracts `internal_bytes()` on the way out.
617    pub fn set(&mut self, heap: &mut Heap, key: Value, val: Value) -> Result<(), TableError> {
618        let k = normalize_set_key(key)?;
619        self.set_norm(heap, k, val)
620    }
621
622    /// PUC `luaV_fastset` / `luaV_finishfastset` analogue: single-walk
623    /// in-place update for an existing key. Returns `true` iff `key` is
624    /// present with a non-nil value and the slot was overwritten with
625    /// `val`. Returns `false` when the key is absent, the slot holds nil,
626    /// or the key normalisation rejects it — the caller is then expected
627    /// to run the `__newindex` chain or fall back to `set` for the raw
628    /// insert.
629    ///
630    /// Collapses the SetField hot path from two hash-chain walks
631    /// (`get` + `set`) to one. The `__newindex` invariant ("fires iff
632    /// `get` would have returned nil") is preserved because this method
633    /// writes only when the existing slot is non-nil — the exact set the
634    /// prior `tb.get(key).is_nil()` gate already excluded from
635    /// `__newindex` eligibility. See
636    /// `.dev/rfcs/v2.0-pi-phase2-a3-audit.md` §4 for the case-by-case
637    /// semantics check.
638    ///
639    /// The caller is responsible for firing `Heap::barrier_back` after a
640    /// `true` return (same contract as the surrounding `raw_set`
641    /// wrapper).
642    pub fn try_set_existing(&mut self, key: Value, val: Value) -> bool {
643        let k = match normalize_set_key(key) {
644            Ok(k) => k,
645            Err(_) => return false,
646        };
647        if let Value::Int(i) = k
648            && i >= 1
649            && (i as u64) <= self.asize() as u64
650        {
651            let idx = i as usize - 1;
652            // SAFETY: `idx < self.asize()` is guarded by the conditional
653            // above, mirroring the bound on `aget`/`aset`.
654            let tag = unsafe { *self.atags().get_unchecked(idx) };
655            if tag != raw::NIL {
656                // Nil-val on a live slot must follow the same tombstone
657                // discipline as `set_norm` — routed through
658                // `clear_existing_slot` so chain-world / future
659                // data-layout cutovers (Phase E SoA) stay aligned.
660                // See `.dev/known-bugs/fixed/`.
661                if val.is_nil() {
662                    self.clear_existing_slot(k);
663                } else {
664                    self.aset(idx, val);
665                }
666                return true;
667            }
668            // Array slot present-but-nil → __newindex eligible: do NOT
669            // write. Caller falls through to the metamethod chain.
670            return false;
671        }
672        if let Some(idx) = self.find_node(k)
673            && !self.nodes[idx].val.is_nil()
674        {
675            if val.is_nil() {
676                self.clear_existing_slot(k);
677            } else {
678                self.nodes[idx].val = val;
679            }
680            return true;
681        }
682        false
683    }
684
685    /// Shared "live with val=Nil is illegal" tombstone routine for the
686    /// two write entry points (`set_norm` and `try_set_existing`). The
687    /// slot must already be known live (array slot inside `asize()` /
688    /// node returned by `find_node`).
689    ///
690    /// Chain-world today:
691    ///   - array slot → `aset(_, Nil)` clears the atag, so `next()`'s
692    ///     `tag != raw::NIL` filter skips the slot.
693    ///   - node slot  → soft tombstone (key kept, `val = Nil`); chain
694    ///     `next()` filter `!n.val.is_nil()` skips it, and `find_node`
695    ///     still routes a future re-insert into the same slot without
696    ///     a rehash.
697    ///
698    /// Centralising the discipline here lets a future Phase E SoA
699    /// cutover (Variant B linear probe, or any non-Robin-Hood layout
700    /// attack that switches `next()`'s filter to `meta_bits::is_live`)
701    /// migrate both entry points in lockstep — exactly the divergence
702    /// that surfaced as `(key, nil)` zombies in `pairs()` on the C3
703    /// Session 2 cutover branch.
704    fn clear_existing_slot(&mut self, k: Value) {
705        if let Value::Int(i) = k
706            && i >= 1
707            && (i as u64) <= self.asize() as u64
708        {
709            self.aset(i as usize - 1, Value::Nil);
710            return;
711        }
712        if let Some(idx) = self.find_node(k) {
713            self.nodes[idx].val = Value::Nil;
714        }
715    }
716
717    /// Integer-keyed variant of [`Self::set`].
718    pub fn set_int(&mut self, heap: &mut Heap, i: i64, val: Value) -> Result<(), TableError> {
719        self.set_norm(heap, Value::Int(i), val)
720    }
721
722    /// `k` is already normalized (no nil, no NaN, integral floats → Int).
723    fn set_norm(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
724        if let Value::Int(i) = k
725            && i >= 1
726            && (i as u64) <= self.asize() as u64
727        {
728            // Live array slot + Nil write goes through the shared
729            // tombstone routine (see `clear_existing_slot` for the
730            // chain ↔ future-SoA rationale). The non-Nil branch is
731            // identical to a bare `aset` today.
732            if v.is_nil() {
733                self.clear_existing_slot(k);
734            } else {
735                self.aset(i as usize - 1, v);
736            }
737            return Ok(());
738        }
739        if let Some(idx) = self.find_node(k) {
740            if v.is_nil() {
741                self.clear_existing_slot(k);
742            } else {
743                self.nodes[idx].val = v;
744            }
745            return Ok(());
746        }
747        if v.is_nil() {
748            return Ok(()); // absent key set to nil: nothing to record
749        }
750        self.insert_new(heap, k, v)
751    }
752
753    fn insert_new(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
754        if self.nodes.is_empty() {
755            self.rehash(heap, k)?;
756            return self.set_norm(heap, k, v);
757        }
758        let mp = self.main_position(k);
759        // A truly empty slot (key=Nil, !dead_key) is free for direct placement.
760        // A dead-key slot still belongs to some chain (its `next` points to a
761        // live entry the chain reaches), so we treat it as occupied here and
762        // route the new key through the collision path below — that preserves
763        // the back-links into this slot from other nodes' `next` fields.
764        if self.nodes[mp].key.is_nil() && !self.nodes[mp].dead_key {
765            self.nodes[mp] = Node {
766                key: k,
767                val: v,
768                next: NONE,
769                dead_key: false,
770            };
771            return Ok(());
772        }
773        let Some(free) = self.free_pos() else {
774            self.rehash(heap, k)?;
775            return self.set_norm(heap, k, v);
776        };
777        // Dead-key slot: it carries no live key, so by definition nobody else
778        // counts it as "their main position owner". We give it directly to
779        // the new key but preserve `next` so the chain it sits inside still
780        // reaches its downstream entries.
781        if self.nodes[mp].dead_key {
782            let preserved_next = self.nodes[mp].next;
783            self.nodes[mp] = Node {
784                key: k,
785                val: v,
786                next: preserved_next,
787                dead_key: false,
788            };
789            return Ok(());
790        }
791        let other_mp = self.main_position(self.nodes[mp].key);
792        if other_mp != mp {
793            // colliding node is out of its main position: relocate it to the
794            // free slot and take its place
795            let mut prev = other_mp;
796            while self.nodes[prev].next != mp as i32 {
797                prev = self.nodes[prev].next as usize;
798            }
799            self.nodes[prev].next = free as i32;
800            self.nodes[free] = self.nodes[mp];
801            self.nodes[mp] = Node {
802                key: k,
803                val: v,
804                next: NONE,
805                dead_key: false,
806            };
807        } else {
808            // colliding node owns this position: chain the new node behind it
809            self.nodes[free] = Node {
810                key: k,
811                val: v,
812                next: self.nodes[mp].next,
813                dead_key: false,
814            };
815            self.nodes[mp].next = free as i32;
816        }
817        Ok(())
818    }
819
820    fn free_pos(&mut self) -> Option<usize> {
821        while self.lastfree > 0 {
822            self.lastfree -= 1;
823            let n = &self.nodes[self.lastfree as usize];
824            // Dead-key slots are still occupied for chain purposes (their
825            // `next` may be the only path to a downstream entry) — don't
826            // hand them out as free.
827            if n.key.is_nil() && !n.dead_key {
828                return Some(self.lastfree as usize);
829            }
830        }
831        None
832    }
833
834    // ---- rehash (PUC luaH_rehash) ----
835
836    fn rehash(&mut self, heap: &mut Heap, pending: Value) -> Result<(), TableError> {
837        let mut nums = [0usize; 65];
838        let mut int_keys = 0usize;
839        let mut total = 1; // the pending key
840        if let Value::Int(i) = pending
841            && i >= 1
842        {
843            nums[ceil_log2(i as u64)] += 1;
844            int_keys += 1;
845        }
846        let atags = self.atags();
847        for (i, &tag) in atags.iter().enumerate() {
848            if tag != raw::NIL {
849                nums[ceil_log2(i as u64 + 1)] += 1;
850                int_keys += 1;
851                total += 1;
852            }
853        }
854        for n in self.nodes.iter() {
855            if !n.val.is_nil() {
856                total += 1;
857                if let Value::Int(i) = n.key
858                    && i >= 1
859                {
860                    nums[ceil_log2(i as u64)] += 1;
861                    int_keys += 1;
862                }
863            }
864        }
865        // computesizes: optimal array size = largest 2^i with more than 2^(i-1)
866        // integer keys in [1, 2^i]
867        let mut new_asize = 0usize;
868        let mut in_array = 0usize;
869        let mut a = 0usize;
870        let mut two_to_i = 1usize;
871        let mut i = 0usize;
872        while int_keys > two_to_i / 2 {
873            a += nums[i];
874            if a > two_to_i / 2 {
875                new_asize = two_to_i;
876                in_array = a;
877            }
878            i += 1;
879            match two_to_i.checked_mul(2) {
880                Some(n) => two_to_i = n,
881                None => break,
882            }
883        }
884        // PUC `luaH_resizearray` raises "table overflow" when the array part
885        // would have to grow past MAXASIZE. luna mirrors with `MAX_ASIZE`,
886        // checked on both the array and the hash bucket count (the latter is
887        // a power-of-two of total - in_array entries).
888        if new_asize > MAX_ASIZE {
889            return Err(TableError::Overflow);
890        }
891        let hash_entries = total - in_array;
892        if hash_entries > MAX_ASIZE {
893            return Err(TableError::Overflow);
894        }
895        self.resize(heap, new_asize, hash_entries);
896        Ok(())
897    }
898
899    /// Resize the table's array and hash parts. The array part grows
900    /// (or shrinks) to `new_asize` NIL-initialized slots; the hash
901    /// part rounds to the next power of two ≥ `hash_entries`. Any
902    /// existing entries are re-inserted into the new layout. The
903    /// Box growth is debited/credited to `heap.bytes` so `free_obj`
904    /// can subtract the symmetric amount.
905    ///
906    /// P11-S5c.B — `Heap::new_table_sized` calls this on a freshly
907    /// adopted empty table to pre-allocate the array part, sparing
908    /// the table-fill loop from O(log N) intermediate `rehash`es.
909    pub(crate) fn resize(&mut self, heap: &mut Heap, new_asize: usize, hash_entries: usize) {
910        let before = self.internal_bytes();
911        // P11-S5d.H/I — snapshot the old array entries before we
912        // re-install the backing. The active buffer can be inline OR
913        // slab; `array_ptr` already points to whichever it is, so
914        // walking via raw offsets works the same for either case.
915        let old_asize = self.asize as usize;
916        let mut old_pairs: Vec<(u8, RawVal)> = Vec::with_capacity(old_asize);
917        if old_asize > 0 {
918            // SAFETY: `array_ptr` was set up by `Heap::new_table` or
919            // an earlier `resize`; it covers `old_asize * 9` bytes
920            // (avals + atags).
921            let avals_base = self.array_base() as *const RawVal;
922            let atags_base = unsafe { self.array_base().add(old_asize * 8) as *const u8 };
923            for i in 0..old_asize {
924                // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
925                let tag = unsafe { *atags_base.add(i) };
926                // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
927                let val = unsafe { *avals_base.add(i) };
928                old_pairs.push((tag, val));
929            }
930        }
931        let old_nodes = std::mem::take(&mut self.nodes);
932
933        // Install the new array backing first, then update `array_ptr`
934        // (before potentially dropping the old slab via the assignment
935        // below) so the JIT never observes a stale pointer.
936        self.asize = new_asize as u64;
937        if new_asize <= INLINE_ASIZE as usize {
938            // Inline path — zero the inline buffer; drop any prior
939            // external slab.
940            // SAFETY: exclusive &mut self; write through the cell to
941            // stay on the raw-pointer access path (no &mut borrow of
942            // the array contents is ever formed).
943            unsafe {
944                *self.inline_storage.get() = [0; INLINE_U64S];
945            }
946            self.array_ptr = self.inline_storage.get() as *mut u8;
947            self.slab = Box::new([]);
948        } else {
949            // External slab — allocate, then re-point `array_ptr`.
950            self.slab = Self::alloc_slab(new_asize);
951            self.array_ptr = self.slab.as_mut_ptr() as *mut u8;
952        }
953
954        let hsize = if hash_entries == 0 {
955            0
956        } else {
957            hash_entries.next_power_of_two()
958        };
959        self.nodes = vec![Node::EMPTY; hsize].into_boxed_slice();
960        self.lastfree = hsize as u32;
961        // PUC `g->GCtotalbytes` analogue: credit (or debit) the box-size
962        // delta so `Heap.bytes` reflects this table's actual internal
963        // memory. `free_obj` subtracts `internal_bytes()` on the way out.
964        let after = self.internal_bytes();
965        heap.apply_bytes_delta(before, after);
966        // Re-insert old array entries via the public set_norm path
967        // (which handles rehashing if the new array shrinks below the
968        // entry count).
969        for (i, (tag, val)) in old_pairs.into_iter().enumerate() {
970            if tag != raw::NIL {
971                // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
972                let v = unsafe { Value::pack(tag, val) };
973                let _ = self.set_norm(heap, Value::Int(i as i64 + 1), v);
974            }
975        }
976        for n in old_nodes.iter() {
977            if !n.val.is_nil() {
978                let _ = self.set_norm(heap, n.key, n.val);
979            }
980        }
981    }
982
983    fn main_position(&self, k: Value) -> usize {
984        debug_assert!(!self.nodes.is_empty());
985        hash_key(k) as usize & (self.nodes.len() - 1)
986    }
987
988    // ---- length / iteration ----
989
990    /// A border: `n` where `t[n]` is non-nil and `t[n+1]` is nil (PUC `luaH_getn`).
991    /// This is Lua `#` semantics, not a container size — an `is_empty`
992    /// counterpart would be meaningless.
993    #[allow(clippy::len_without_is_empty)]
994    pub fn len(&self) -> i64 {
995        let asize = self.asize();
996        let atags = self.atags();
997        if asize > 0 && atags[asize - 1] == raw::NIL {
998            // binary search inside the array part
999            let (mut lo, mut hi) = (0usize, asize);
1000            while hi - lo > 1 {
1001                let m = lo + (hi - lo) / 2;
1002                if atags[m - 1] == raw::NIL {
1003                    hi = m;
1004                } else {
1005                    lo = m;
1006                }
1007            }
1008            return lo as i64;
1009        }
1010        if self.nodes.is_empty() {
1011            return asize as i64;
1012        }
1013        // array is full (or absent): unbound search through the hash part
1014        let mut lo = asize as i64;
1015        let mut hi = lo + 1;
1016        while !self.get_int(hi).is_nil() {
1017            lo = hi;
1018            match hi.checked_mul(2) {
1019                Some(n) => hi = n,
1020                None => {
1021                    // pathological sparse keys (the doubling overflowed): scan
1022                    // linearly from 1 for the first border, as PUC's
1023                    // unbound_search does — finds a small border fast instead of
1024                    // returning the huge one.
1025                    let mut i = 1i64;
1026                    while !self.get_int(i).is_nil() {
1027                        i += 1;
1028                    }
1029                    return i - 1;
1030                }
1031            }
1032        }
1033        while hi - lo > 1 {
1034            let m = lo + (hi - lo) / 2;
1035            if self.get_int(m).is_nil() {
1036                hi = m;
1037            } else {
1038                lo = m;
1039            }
1040        }
1041        lo
1042    }
1043
1044    /// Lua `next`: iterate array part then hash part.
1045    pub fn next(&self, key: Value) -> Result<Option<(Value, Value)>, TableError> {
1046        let start = match key {
1047            Value::Nil => 0,
1048            k => {
1049                let k = match k {
1050                    Value::Float(f) => match f2i_exact(f) {
1051                        Some(i) => Value::Int(i),
1052                        None => k,
1053                    },
1054                    k => k,
1055                };
1056                if let Value::Int(i) = k
1057                    && i >= 1
1058                    && (i as u64) <= self.asize() as u64
1059                {
1060                    i as usize
1061                } else {
1062                    match self.find_node(k) {
1063                        Some(idx) => self.asize() + idx + 1,
1064                        None => return Err(TableError::InvalidNext),
1065                    }
1066                }
1067            }
1068        };
1069        let atags = self.atags();
1070        for i in start..self.asize() {
1071            if atags[i] != raw::NIL {
1072                return Ok(Some((Value::Int(i as i64 + 1), self.aget(i))));
1073            }
1074        }
1075        let hstart = start.saturating_sub(self.asize());
1076        for (idx, n) in self.nodes.iter().enumerate().skip(hstart) {
1077            if !n.val.is_nil() {
1078                let _ = idx;
1079                return Ok(Some((n.key, n.val)));
1080            }
1081        }
1082        Ok(None)
1083    }
1084
1085    /// `(weak_keys, weak_values)` from the metatable's `__mode` field. Read by
1086    /// scanning the metatable for the `__mode` string (no interned key needed
1087    /// inside the collector).
1088    pub(crate) fn weak_mode(&self) -> (bool, bool) {
1089        let Some(mt) = self.metatable else {
1090            return (false, false);
1091        };
1092        for n in mt.nodes.iter() {
1093            if let (Value::Str(k), Value::Str(mode)) = (n.key, n.val)
1094                && k.as_bytes() == b"__mode"
1095            {
1096                let b = mode.as_bytes();
1097                return (b.contains(&b'k'), b.contains(&b'v'));
1098            }
1099        }
1100        (false, false)
1101    }
1102
1103    /// True when this table holds at least one direct reference (array slot,
1104    /// hash key, or hash value) to a coroutine whose mark bit is still clear.
1105    /// Used by the GC's cycle-finalize check (PUC 5.3 gc.lua :502) to detect
1106    /// the table ↔ thread reference cycle that needs an extra GC round before
1107    /// `__gc` runs. Tag-level scan avoids walking the full reference graph.
1108    pub(crate) fn refs_contain_unmarked_coro(&self) -> bool {
1109        use crate::runtime::heap::header_is_marked;
1110        let atags = self.atags();
1111        let avals = self.avals();
1112        for (i, &tag) in atags.iter().enumerate() {
1113            if tag == raw::CORO {
1114                // SAFETY: raw union access — the tag byte at the same index in `atags` was previously confirmed to be `co` (closure/object pointer) so the `co` variant of `RawVal` holds the valid payload.
1115                let p = unsafe { avals[i].co } as *mut crate::runtime::heap::GcHeader;
1116                if !header_is_marked(p) {
1117                    return true;
1118                }
1119            }
1120        }
1121        for n in self.nodes.iter() {
1122            if let Value::Coro(co) = n.key {
1123                if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1124                    return true;
1125                }
1126            }
1127            if let Value::Coro(co) = n.val {
1128                if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1129                    return true;
1130                }
1131            }
1132        }
1133        false
1134    }
1135
1136    /// v2.13 WUC `gc-verify`: after a completed sweep, every collectable
1137    /// reference this table still holds (array values, node keys/values,
1138    /// metatable) must point at a live heap object. Nodes flagged
1139    /// `dead_key` are the sanctioned exception — their key pointer is
1140    /// documented-dangling and never dereferenced. `describe` receives
1141    /// (what, node-index, tag-byte, ptr) on violation.
1142    #[cfg(feature = "gc-verify")]
1143    pub(crate) fn verify_refs(
1144        &self,
1145        is_live: &dyn Fn(Value) -> bool,
1146        report: &dyn Fn(&str, usize, Value),
1147    ) {
1148        let atags = self.atags();
1149        let avals = self.avals();
1150        for (i, &tag) in atags.iter().enumerate() {
1151            if raw::is_gc(tag) {
1152                // SAFETY: tags/vals parallel arrays kept in sync by all table writers.
1153                let v = unsafe { Value::pack(tag, avals[i]) };
1154                if !is_live(v) {
1155                    report("array value", i, v);
1156                }
1157            }
1158        }
1159        for (i, n) in self.nodes.iter().enumerate() {
1160            if n.val.is_nil() {
1161                continue;
1162            }
1163            if !n.dead_key && !is_live(n.key) {
1164                report("node key", i, n.key);
1165            }
1166            if !is_live(n.val) {
1167                report("node value", i, n.val);
1168            }
1169        }
1170        if let Some(mt) = self.metatable {
1171            if !is_live(Value::Table(mt)) {
1172                report("metatable", 0, Value::Table(mt));
1173            }
1174        }
1175    }
1176
1177    pub(crate) fn trace(&self, m: &mut Marker) {
1178        let (wk, wv) = self.weak_mode();
1179        if wk || wv {
1180            m.weak.push(self as *const Table as *mut Table);
1181        }
1182        // weak keys + strong values = an ephemeron table: its hash values are
1183        // marked only if the key proves reachable (deferred to the convergence
1184        // pass), not here. PUC 5.1 predates ephemerons — under `no_ephemeron`
1185        // a weak-key table marks its values strongly during this pass, which
1186        // is what gc.lua's "weak tables" section requires.
1187        let ephemeron = wk && !wv && !m.no_ephemeron;
1188        if ephemeron {
1189            m.ephemeron.push(self as *const Table as *mut Table);
1190        }
1191        // array keys are integers (never weakly collected); skip values only
1192        // when the table has weak values
1193        if !wv {
1194            let atags = self.atags();
1195            let avals = self.avals();
1196            for (i, &tag) in atags.iter().enumerate() {
1197                if raw::is_gc(tag) {
1198                    // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1199                    m.value(unsafe { Value::pack(tag, avals[i]) });
1200                }
1201            }
1202        }
1203        for n in self.nodes.iter() {
1204            if !wk {
1205                m.value(n.key);
1206            }
1207            // ephemeron hash values are deferred; otherwise mark strong values
1208            if !wv && !ephemeron {
1209                m.value(n.val);
1210            }
1211        }
1212        if let Some(mt) = self.metatable {
1213            m.value(Value::Table(mt));
1214        }
1215    }
1216
1217    /// Ephemeron pass: mark the value of every hash entry whose key is alive
1218    /// (`alive` decides — strong/marked keys, plus strings/numbers which are
1219    /// never weakly collected). Returns true if any value was newly marked, so
1220    /// the caller can iterate to a fixpoint (PUC `traverseephemeron`).
1221    pub(crate) fn converge_ephemeron(&self, alive: &dyn Fn(Value) -> bool, m: &mut Marker) -> bool {
1222        let mut changed = false;
1223        for n in self.nodes.iter() {
1224            if !n.val.is_nil() && alive(n.key) {
1225                changed |= m.value(n.val);
1226            }
1227        }
1228        changed
1229    }
1230
1231    /// Clear entries whose weak key/value did not survive marking. `is_dead`
1232    /// reports whether a GC value was left unmarked (about to be swept).
1233    /// Clear weak-table entries whose key/value no longer carries a live
1234    /// reference. `is_dead` is a **pure** check (no side effects); the GC
1235    /// uses `mark_string` to resurrect any string that's still reachable via
1236    /// a *surviving* entry — Lua manual §2.5.4 says strings in weak tables
1237    /// are not collected as long as their entry is, and PUC `iscleared`
1238    /// implements that by marking the string during the same scan.
1239    pub(crate) fn clear_weak(
1240        &mut self,
1241        wk: bool,
1242        wv: bool,
1243        is_dead: &dyn Fn(Value) -> bool,
1244        mark_string: &dyn Fn(Value),
1245    ) {
1246        if wv {
1247            let n = self.asize as usize;
1248            for i in 0..n {
1249                let tag = self.atags()[i];
1250                if raw::is_gc(tag) {
1251                    // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1252                    let v = unsafe { Value::pack(tag, self.avals()[i]) };
1253                    if is_dead(v) {
1254                        self.atags_mut()[i] = raw::NIL;
1255                        self.avals_mut()[i] = RawVal::NIL;
1256                    } else {
1257                        mark_string(v);
1258                    }
1259                }
1260            }
1261        }
1262        for n in self.nodes.iter_mut() {
1263            if n.val.is_nil() {
1264                // PUC `clearbykeys`/`clearbyvalues` end with
1265                // `if (isempty(gval(n))) clearkey(n)`: an EMPTY entry's
1266                // collectable key must be demoted to a dead key. A
1267                // tombstone (`t[k] = nil` leaves val nil, key kept for
1268                // chain links) is otherwise invisible to this sweep AND
1269                // unmarked by the weak-key trace, so its string key gets
1270                // freed while `find_node` still raw_eq's it walking the
1271                // chain — UAF-C's Linux/ASAN-confirmed read site
1272                // (v2.13 Track WUC).
1273                if !n.dead_key
1274                    && matches!(
1275                        n.key,
1276                        Value::Table(_)
1277                            | Value::Closure(_)
1278                            | Value::Native(_)
1279                            | Value::Coro(_)
1280                            | Value::Userdata(_)
1281                            | Value::Str(_)
1282                    )
1283                {
1284                    n.key = Value::Nil;
1285                    n.dead_key = true;
1286                }
1287                continue;
1288            }
1289            let key_dead = wk && is_dead(n.key);
1290            let val_dead = wv && is_dead(n.val);
1291            if key_dead || val_dead {
1292                // entry removed. PUC `setdeadkey`: when the key was a
1293                // collectable, drop the Gc pointer so a later raw_eq cannot
1294                // spuriously match a new object that gets allocated at the
1295                // same freed address. Keep `next` so the chain back-links
1296                // through this node still reach downstream entries; the
1297                // `dead_key` flag tells `find_node` to skip the comparison
1298                // and `insert_new` to treat the slot as a free
1299                // main-position owner that may inherit the chain.
1300                n.val = Value::Nil;
1301                if matches!(
1302                    n.key,
1303                    Value::Table(_)
1304                        | Value::Closure(_)
1305                        | Value::Native(_)
1306                        | Value::Coro(_)
1307                        | Value::Userdata(_)
1308                        | Value::Str(_)
1309                ) {
1310                    n.key = Value::Nil;
1311                    n.dead_key = true;
1312                }
1313            } else {
1314                // entry survives — resurrect any string reachable through it
1315                if wk {
1316                    mark_string(n.key);
1317                }
1318                if wv {
1319                    mark_string(n.val);
1320                }
1321            }
1322        }
1323    }
1324}
1325
1326// =====================================================================
1327// C3 — SoA + Robin Hood open-addressing hash part (Variant A).
1328//
1329// Parallel to the chain-walk path during Phase B+C+D transition: the
1330// chain `nodes` / `lastfree` is the authoritative read path until
1331// Phase E migrates `next()` and Phase 4 cuts over. These methods
1332// operate only on the `keys` / `vals` / `meta` / `tombstones` SoA
1333// arrays — chain state is never touched.
1334//
1335// Layout invariants the methods below maintain:
1336//   - `keys.len() == vals.len() == meta.len()`, all power-of-two
1337//     (or zero in the empty-stub state)
1338//   - `meta[i] = meta_bits::EMPTY` iff slot i is free
1339//   - tombstoned slots are scanned past by find but reused by insert
1340//   - `tombstones` counts the meta slots with TOMBSTONE_BIT set
1341//   - load factor (live + tombstone) / cap is kept ≤ 0.75 via
1342//     `soa_grow_if_needed` (R-A1 mitigation — PSL bound 63)
1343//   - rehash is REFUSED when `iter_depth > 0` (R-A3 mitigation —
1344//     wired in Phase F; for Phase C the counter is always 0 so the
1345//     refusal path is unreachable)
1346//
1347// Refs: `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1 (variant A),
1348// §6.2 Phase 1-3 (impl plan).
1349// =====================================================================
1350
1351/// C3 — initial SoA capacity when growing from empty. Power of two.
1352/// Picked at 4 so a 3-element table doesn't trigger an immediate
1353/// regrowth.
1354#[allow(dead_code)] // wired by Phase D/E/F integration
1355pub(crate) const SOA_INITIAL_CAP: usize = 4;
1356
1357/// C3 — high load-factor threshold (3/4). SoA grow trigger; matches
1358/// the RFC §5.1 recommendation. PSL_MAX is the u16 14-bit value so
1359/// long-tail PSL overruns are recoverable via grow-retry.
1360#[allow(dead_code)]
1361const SOA_LOAD_NUM: usize = 3;
1362#[allow(dead_code)]
1363const SOA_LOAD_DEN: usize = 4;
1364
1365/// C3 — tombstone density threshold (1/4). When tombstones/cap ≥ 25%
1366/// the next non-resize-triggering rehash compacts them.
1367#[allow(dead_code)]
1368const SOA_TOMB_NUM: usize = 1;
1369#[allow(dead_code)]
1370const SOA_TOMB_DEN: usize = 4;
1371
1372#[allow(dead_code)] // wired by Phase D/E/F integration into public set/get/next
1373impl Table {
1374    /// C3 — current SoA hash-part capacity in slots (0 = empty stub).
1375    #[inline]
1376    pub(crate) fn soa_cap(&self) -> usize {
1377        self.meta.len()
1378    }
1379
1380    /// C3 — count of live (occupied & not tombstone) SoA slots.
1381    /// O(n) — only used by the Phase G equivalence test path; the
1382    /// hot rehash trigger uses `live_estimate = cap*3/4 - tombstones`
1383    /// implicitly via `soa_grow_if_needed`.
1384    #[cfg(test)]
1385    pub(crate) fn soa_live_count(&self) -> usize {
1386        self.meta.iter().filter(|&&m| meta_bits::is_live(m)).count()
1387    }
1388
1389    /// C3 — count of occupied (live OR tombstoned) SoA slots; this is
1390    /// the value the load factor compares against `cap * 3/4`.
1391    #[inline]
1392    fn soa_occupied_count(&self) -> usize {
1393        // O(n) sweep — Phase C inserts the count on each call so the
1394        // worst case is bounded by per-insert amortised cost. A future
1395        // polish could maintain a counter incrementally; left as a
1396        // Phase H mini-bench-driven follow-up if PI sample shows it
1397        // contributes > 1 µs/cell.
1398        self.meta
1399            .iter()
1400            .filter(|&&m| meta_bits::is_occupied(m))
1401            .count()
1402    }
1403
1404    /// C3 — Robin Hood lookup. Returns the slot index of a *live*
1405    /// matching key, or None if absent. Walks past tombstones (they
1406    /// preserve probe chains). Returns None if the SoA cap is zero
1407    /// (Phase B stub state). Bound by `cap` probes; in practice
1408    /// expected ≤ 8 at load 0.75.
1409    pub(crate) fn soa_find_slot(&self, k: Value) -> Option<usize> {
1410        let cap = self.meta.len();
1411        if cap == 0 {
1412            return None;
1413        }
1414        let mask = cap - 1;
1415        let mut idx = (hash_key(k) as usize) & mask;
1416        // Walk until empty slot or wrap. The `steps <= cap` bound
1417        // is a safety net: a properly maintained Robin Hood table
1418        // with load < 1 always has at least one empty slot, so a
1419        // full wrap means table invariant violation.
1420        for _ in 0..cap {
1421            let m = self.meta[idx];
1422            if !meta_bits::is_occupied(m) {
1423                return None;
1424            }
1425            if !meta_bits::is_tombstone(m) && self.keys[idx].raw_eq(k) {
1426                return Some(idx);
1427            }
1428            idx = (idx + 1) & mask;
1429        }
1430        None
1431    }
1432
1433    /// C3 — Allocate fresh SoA arrays at `new_cap` (power of two) and
1434    /// re-insert every live entry from the old SoA arrays. Tombstones
1435    /// are dropped (count resets to 0). Used by `soa_grow_if_needed`
1436    /// (new_cap = max(SOA_INITIAL_CAP, 2*cap)) and by Phase D
1437    /// tombstone compaction (new_cap = cap).
1438    ///
1439    /// IMPORTANT: rehash MUST NOT fire while `iter_depth > 0`
1440    /// (R-A3) — wired in Phase F. Phase C callers all enter from
1441    /// non-iteration paths.
1442    fn soa_rehash_to(&mut self, heap: &mut Heap, new_cap: usize) -> Result<(), TableError> {
1443        debug_assert!(new_cap.is_power_of_two() && new_cap > 0);
1444        let before = self.internal_bytes();
1445        // Snapshot old live entries. This list is the canonical
1446        // "must be present after rehash" set; we restart from it on
1447        // any PSL-overflow retry.
1448        let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len());
1449        for i in 0..self.meta.len() {
1450            if meta_bits::is_live(self.meta[i]) {
1451                survivors.push((self.keys[i], self.vals[i]));
1452            }
1453        }
1454        // Install fresh empty arrays at `new_cap`. On PSL overflow
1455        // during the re-insert pass (extremely rare with the 14-bit
1456        // PSL budget — would need a pathological hash distribution),
1457        // double the cap and replay the original `survivors` list
1458        // from scratch. We don't try to salvage partial work — the
1459        // rare-path retry cost is bounded by O(n × max_doublings),
1460        // and max_doublings has a hard MAX_ASIZE ceiling.
1461        let mut cap = new_cap;
1462        loop {
1463            if cap > MAX_ASIZE {
1464                return Err(TableError::Overflow);
1465            }
1466            self.keys = vec![Value::Nil; cap].into_boxed_slice();
1467            self.vals = vec![Value::Nil; cap].into_boxed_slice();
1468            self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1469            self.tombstones = 0;
1470            let mut overflowed = false;
1471            for (k, v) in survivors.iter().copied() {
1472                if self.soa_place_known_absent(k, v).is_err() {
1473                    overflowed = true;
1474                    break;
1475                }
1476            }
1477            if !overflowed {
1478                break;
1479            }
1480            cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1481        }
1482        let after = self.internal_bytes();
1483        heap.apply_bytes_delta(before, after);
1484        Ok(())
1485    }
1486
1487    /// C3 — Raw rob-from-rich placement for a key known to be absent
1488    /// from the SoA arrays. Used by `soa_rehash_to` (re-insert pass)
1489    /// and by `soa_insert` (new-key path after the explicit
1490    /// soa_find_slot check). This routine does NOT auto-grow on a
1491    /// load-factor trigger (caller's responsibility), but DOES signal
1492    /// back to the caller via `Err(())` when the PSL bound of 63 is
1493    /// hit before finding an empty slot — Robin Hood's long-tail
1494    /// max-PSL exceeds the 6-bit storage budget at unfavourable hash
1495    /// distributions even under the nominal 0.75 load gate (RFC §6.3
1496    /// R-A1). The caller (`soa_insert`) handles by growing & retrying.
1497    ///
1498    /// On success returns the slot index where the new key landed
1499    /// (after any rob-from-rich shuffle, the original `k` value is at
1500    /// this returned index).
1501    fn soa_place_known_absent(&mut self, k: Value, v: Value) -> Result<usize, (Value, Value)> {
1502        let cap = self.meta.len();
1503        debug_assert!(cap > 0);
1504        let mask = cap - 1;
1505        let landing = (hash_key(k) as usize) & mask;
1506        let mut idx = landing;
1507        let mut cur_psl: u16 = 0;
1508        let mut cur_key = k;
1509        let mut cur_val = v;
1510        let mut placed_at: Option<usize> = None;
1511        for _ in 0..cap {
1512            let m = self.meta[idx];
1513            if !meta_bits::is_occupied(m) || meta_bits::is_tombstone(m) {
1514                if meta_bits::is_tombstone(m) {
1515                    self.tombstones = self.tombstones.saturating_sub(1);
1516                }
1517                self.meta[idx] = meta_bits::pack(cur_psl, false);
1518                self.keys[idx] = cur_key;
1519                self.vals[idx] = cur_val;
1520                return Ok(placed_at.unwrap_or(idx));
1521            }
1522            let stored_psl = meta_bits::psl(m);
1523            if cur_psl > stored_psl {
1524                // Rob: swap cur into this slot, evict stored to continue.
1525                std::mem::swap(&mut cur_key, &mut self.keys[idx]);
1526                std::mem::swap(&mut cur_val, &mut self.vals[idx]);
1527                self.meta[idx] = meta_bits::pack(cur_psl, false);
1528                if placed_at.is_none() {
1529                    placed_at = Some(idx);
1530                }
1531                cur_psl = stored_psl;
1532            }
1533            idx = (idx + 1) & mask;
1534            cur_psl = cur_psl.saturating_add(1);
1535            if cur_psl > meta_bits::PSL_MAX {
1536                // PSL exceeds the 14-bit storage budget — exceptionally
1537                // rare with 16384 max. Caller (soa_insert / rehash
1538                // outer loop) handles by growing & retrying. Partial
1539                // state: all entries are still in the table EXCEPT
1540                // `(cur_key, cur_val)` which is the latest homeless
1541                // evictee — return it so caller can re-issue.
1542                return Err((cur_key, cur_val));
1543            }
1544        }
1545        // Wrapped cap probes with no free slot — invariant violation
1546        // (load < 1 should guarantee at least one empty). Signal as
1547        // PSL-overflow equivalent so caller grows + retries.
1548        Err((cur_key, cur_val))
1549    }
1550
1551    /// C3 — Grow SoA capacity if the load factor is at or above the
1552    /// 0.75 trigger. Doubles cap; from empty grows to SOA_INITIAL_CAP.
1553    fn soa_grow_if_needed(&mut self, heap: &mut Heap) -> Result<(), TableError> {
1554        // Defer rehash when an iterator is in flight (R-A3). Wired
1555        // in Phase F; in Phase C iter_depth is always 0.
1556        if self.iter_depth > 0 {
1557            return Ok(());
1558        }
1559        let cap = self.meta.len();
1560        if cap == 0 {
1561            return self.soa_rehash_to(heap, SOA_INITIAL_CAP);
1562        }
1563        let occupied = self.soa_occupied_count();
1564        if occupied * SOA_LOAD_DEN >= cap * SOA_LOAD_NUM {
1565            let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1566            return self.soa_rehash_to(heap, new_cap);
1567        }
1568        // Tombstone compaction (same cap, drops tombstones).
1569        if self.tombstones as usize * SOA_TOMB_DEN >= cap * SOA_TOMB_NUM {
1570            return self.soa_rehash_to(heap, cap);
1571        }
1572        Ok(())
1573    }
1574
1575    /// C3 — Insert (or update) `(k, v)` in the SoA hash part. Routes
1576    /// through `soa_find_slot` first so an existing key updates its
1577    /// val in place; otherwise rob-from-rich places a new entry.
1578    /// Auto-rehashes if the load factor would exceed 0.75 OR if the
1579    /// place chain runs into a PSL overflow on a pathological hash
1580    /// distribution.
1581    ///
1582    /// Phase C: this method is callable from outside via the
1583    /// equivalence-test entrypoint (Phase G); not yet hooked into
1584    /// public `set` / `set_norm`.
1585    pub(crate) fn soa_insert(
1586        &mut self,
1587        heap: &mut Heap,
1588        k: Value,
1589        v: Value,
1590    ) -> Result<(), TableError> {
1591        debug_assert!(!matches!(k, Value::Nil));
1592        // 1. Update-in-place if key is already present (live slot).
1593        if let Some(idx) = self.soa_find_slot(k) {
1594            self.vals[idx] = v;
1595            return Ok(());
1596        }
1597        // 2. New key: ensure capacity, then place. On PSL-overflow
1598        // from the place chain (extremely rare with 14-bit PSL budget),
1599        // grow + rehash with the homeless evictee merged in.
1600        // `soa_rehash_with_extra` handles further retries internally,
1601        // bounded by MAX_ASIZE.
1602        self.soa_grow_if_needed(heap)?;
1603        match self.soa_place_known_absent(k, v) {
1604            Ok(_) => Ok(()),
1605            Err(homeless) => {
1606                let cap = self.meta.len();
1607                let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1608                self.soa_rehash_with_extra(heap, new_cap, homeless)
1609            }
1610        }
1611    }
1612
1613    /// C3 — Rehash to `new_cap` while merging in an extra (k, v) pair
1614    /// not currently in the SoA arrays. Used by `soa_insert` to
1615    /// recover from PSL overflow: the homeless evictee from the failed
1616    /// place chain gets appended to the survivor list before the
1617    /// re-insert pass.
1618    fn soa_rehash_with_extra(
1619        &mut self,
1620        heap: &mut Heap,
1621        new_cap: usize,
1622        extra: (Value, Value),
1623    ) -> Result<(), TableError> {
1624        let before = self.internal_bytes();
1625        let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len() + 1);
1626        for i in 0..self.meta.len() {
1627            if meta_bits::is_live(self.meta[i]) {
1628                survivors.push((self.keys[i], self.vals[i]));
1629            }
1630        }
1631        // Avoid duplicating the extra if its key was already placed at
1632        // some slot during the failed rob chain (the rob may have
1633        // landed the original input into a slot before overflowing on
1634        // a downstream evictee — that case the meta-walk above picks
1635        // it up).
1636        if !survivors.iter().any(|(k, _)| k.raw_eq(extra.0)) {
1637            survivors.push(extra);
1638        }
1639        let mut cap = new_cap;
1640        loop {
1641            if cap > MAX_ASIZE {
1642                return Err(TableError::Overflow);
1643            }
1644            self.keys = vec![Value::Nil; cap].into_boxed_slice();
1645            self.vals = vec![Value::Nil; cap].into_boxed_slice();
1646            self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1647            self.tombstones = 0;
1648            let mut overflowed = false;
1649            for (k, v) in survivors.iter().copied() {
1650                if self.soa_place_known_absent(k, v).is_err() {
1651                    overflowed = true;
1652                    break;
1653                }
1654            }
1655            if !overflowed {
1656                break;
1657            }
1658            cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1659        }
1660        let after = self.internal_bytes();
1661        heap.apply_bytes_delta(before, after);
1662        Ok(())
1663    }
1664
1665    /// C3 — Read SoA hash part. Mirrors `get_hash` but reads from
1666    /// keys/vals/meta rather than nodes. Used by the Phase G
1667    /// equivalence test; not yet hooked into public `get` / `get_hash`.
1668    pub(crate) fn soa_get(&self, k: Value) -> Value {
1669        match self.soa_find_slot(k) {
1670            Some(idx) => self.vals[idx],
1671            None => Value::Nil,
1672        }
1673    }
1674
1675    /// C3 — Tombstone deletion. Marks the live slot for `k` as
1676    /// tombstoned, preserving the slot index (no backward shift).
1677    /// Slot-index stability is the PUC `next()` iteration invariant
1678    /// — `nextvar.lua:520-521` requires that deleting prior keys
1679    /// during a `pairs` traversal does NOT move unvisited keys.
1680    /// Backward-shift deletion would violate this; tombstones are
1681    /// the standard Robin Hood resolution (see RFC §4.5 + §5.1).
1682    ///
1683    /// keys[idx] / vals[idx] are reset to Nil so the GC marker is
1684    /// not held to the previous entries — only the tombstone bit
1685    /// distinguishes "occupied tombstone" from "free empty".
1686    ///
1687    /// Returns true if the key was found and deleted, false if absent.
1688    ///
1689    /// Phase D: not yet hooked into public `set(k, Nil)` — wired in
1690    /// Phase E alongside the `next()` migration.
1691    pub(crate) fn soa_delete(&mut self, k: Value) -> bool {
1692        if let Some(idx) = self.soa_find_slot(k) {
1693            let psl = meta_bits::psl(self.meta[idx]);
1694            self.meta[idx] = meta_bits::pack(psl, true);
1695            self.keys[idx] = Value::Nil;
1696            self.vals[idx] = Value::Nil;
1697            self.tombstones = self.tombstones.saturating_add(1);
1698            true
1699        } else {
1700            false
1701        }
1702    }
1703}
1704
1705fn normalize_set_key(key: Value) -> Result<Value, TableError> {
1706    match key {
1707        Value::Nil => Err(TableError::NilIndex),
1708        Value::Float(f) => match f2i_exact(f) {
1709            Some(i) => Ok(Value::Int(i)),
1710            None if f.is_nan() => Err(TableError::NanIndex),
1711            None => Ok(key),
1712        },
1713        k => Ok(k),
1714    }
1715}
1716
1717fn hash_key(k: Value) -> u64 {
1718    match k {
1719        Value::Int(i) => i as u64, // identity mod size (PUC hashint)
1720        Value::Float(f) => mix64(f.to_bits()),
1721        Value::Bool(b) => b as u64 + 1,
1722        Value::Str(s) => s.hash() as u64,
1723        Value::Table(t) => mix64(t.as_ptr() as u64),
1724        Value::Closure(c) => mix64(c.as_ptr() as u64),
1725        Value::Native(n) => mix64(n.as_ptr() as u64),
1726        Value::Coro(co) => mix64(co.as_ptr() as u64),
1727        Value::Userdata(u) => mix64(u.as_ptr() as u64),
1728        Value::LightUserdata(p) => mix64(p as u64),
1729        Value::Nil => 0, // unreachable as a stored key
1730    }
1731}
1732
1733/// splitmix64 finalizer.
1734fn mix64(mut x: u64) -> u64 {
1735    x ^= x >> 30;
1736    x = x.wrapping_mul(0xbf58_476d_1ce4_e5b9);
1737    x ^= x >> 27;
1738    x = x.wrapping_mul(0x94d0_49bb_1331_11eb);
1739    x ^ (x >> 31)
1740}
1741
1742/// For k ≥ 1: the bucket l such that k ∈ (2^(l-1), 2^l].
1743fn ceil_log2(k: u64) -> usize {
1744    (u64::BITS - (k - 1).leading_zeros()) as usize
1745}
1746
1747impl Table {
1748    /// Preallocate the array part (table.create); existing contents are
1749    /// preserved.
1750    pub fn ensure_array(&mut self, heap: &mut Heap, n: usize) {
1751        if n > self.asize() {
1752            let hash_entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1753            self.resize(heap, n, hash_entries);
1754        }
1755    }
1756}
1757
1758impl Table {
1759    /// Preallocate hash-part capacity (table.create's second size).
1760    pub fn ensure_hash(&mut self, heap: &mut Heap, n: usize) {
1761        let entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1762        if n > self.nodes.len() {
1763            self.resize(heap, self.asize(), n.max(entries));
1764        }
1765    }
1766}
1767
1768#[cfg(test)]
1769mod tests {
1770    use super::*;
1771    use crate::runtime::heap::Heap;
1772
1773    fn with_table(f: impl FnOnce(&mut Heap, &mut Table)) {
1774        let mut heap = Heap::new();
1775        let t = heap.new_table();
1776        f(&mut heap, unsafe { t.as_mut() });
1777    }
1778
1779    fn assert_is_border(t: &Table, n: i64) {
1780        if n == 0 {
1781            assert!(t.get_int(1).is_nil(), "border 0 but t[1] non-nil");
1782        } else {
1783            assert!(!t.get_int(n).is_nil(), "border {n} but t[{n}] is nil");
1784            assert!(
1785                t.get_int(n + 1).is_nil(),
1786                "border {n} but t[{}] non-nil",
1787                n + 1
1788            );
1789        }
1790    }
1791
1792    /// v2.1 Phase 1I.B — pin `Box<[Node]>` fat-ptr layout at runtime.
1793    /// The luna-jit table-field IC reads `(ptr, len)` directly out of
1794    /// the `nodes` field assuming the data pointer occupies the low 8
1795    /// bytes and the length the high 8 bytes (de-facto Rust ABI on
1796    /// 64-bit targets but not formally guaranteed). If a future Rust
1797    /// release reorders the fat-ptr, this test fails before IC fires
1798    /// at runtime.
1799    #[test]
1800    #[allow(clippy::assertions_on_constants)]
1801    #[cfg(target_pointer_width = "64")]
1802    fn phase_1i_b_node_layout_pinned() {
1803        use jit_layout::*;
1804        assert_eq!(std::mem::size_of::<Box<[Node]>>(), 16);
1805        assert_eq!(NODE_KEY_OFFSET, 0);
1806        assert_eq!(NODE_VAL_OFFSET, 16);
1807        assert!(SIZEOF_NODE >= 32);
1808
1809        // Construct a real Box<[Node]> with a known length, then
1810        // peek at the fat-pointer's two halves to confirm the
1811        // (data_ptr, len) order. Use a 4-slot box so the length is
1812        // non-zero and the data pointer is heap-allocated.
1813        let b: Box<[Node]> = vec![Node::EMPTY; 4].into_boxed_slice();
1814        let raw_ptr = b.as_ptr();
1815        let raw_len = b.len();
1816        // SAFETY: reading the fat pointer's two words is exactly the
1817        // layout luna-jit's IR assumes; it's the safest possible test
1818        // of that assumption.
1819        let words: [usize; 2] = unsafe { std::mem::transmute_copy(&b) };
1820        assert_eq!(words[0], raw_ptr as usize, "fat-ptr low word = data ptr");
1821        assert_eq!(words[1], raw_len, "fat-ptr high word = len");
1822        drop(b);
1823    }
1824
1825    #[test]
1826    fn sequence_grows_into_array() {
1827        with_table(|heap, t| {
1828            for i in 1..=1000 {
1829                let _ = t.set_int(heap, i, Value::Int(i * 10));
1830            }
1831            for i in 1..=1000 {
1832                assert!(t.get_int(i).raw_eq(Value::Int(i * 10)));
1833            }
1834            assert_eq!(t.len(), 1000);
1835        });
1836    }
1837
1838    #[test]
1839    fn string_and_mixed_keys() {
1840        with_table(|heap, t| {
1841            let k1 = Value::Str(heap.intern(b"alpha"));
1842            let k2 = Value::Str(heap.intern(b"beta"));
1843            t.set(heap, k1, Value::Int(1)).unwrap();
1844            t.set(heap, k2, Value::Int(2)).unwrap();
1845            t.set(heap, Value::Bool(true), Value::Int(3)).unwrap();
1846            t.set(heap, Value::Int(-5), Value::Int(4)).unwrap();
1847            // re-interned key reaches the same slot
1848            let k1b = Value::Str(heap.intern(b"alpha"));
1849            assert!(t.get(k1b).raw_eq(Value::Int(1)));
1850            assert!(t.get(k2).raw_eq(Value::Int(2)));
1851            assert!(t.get(Value::Bool(true)).raw_eq(Value::Int(3)));
1852            assert!(t.get(Value::Int(-5)).raw_eq(Value::Int(4)));
1853            assert!(t.get(Value::Str(heap.intern(b"gamma"))).is_nil());
1854        });
1855    }
1856
1857    #[test]
1858    fn float_keys_normalize_to_int() {
1859        with_table(|heap, t| {
1860            t.set(heap, Value::Float(2.0), Value::Int(22)).unwrap();
1861            assert!(t.get(Value::Int(2)).raw_eq(Value::Int(22)));
1862            t.set(heap, Value::Int(3), Value::Int(33)).unwrap();
1863            assert!(t.get(Value::Float(3.0)).raw_eq(Value::Int(33)));
1864            // -0.0 is key 0
1865            t.set(heap, Value::Float(-0.0), Value::Int(0)).unwrap();
1866            assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1867            // non-integral floats are their own keys
1868            t.set(heap, Value::Float(0.5), Value::Int(55)).unwrap();
1869            assert!(t.get(Value::Float(0.5)).raw_eq(Value::Int(55)));
1870            assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1871        });
1872    }
1873
1874    #[test]
1875    fn bad_keys() {
1876        with_table(|heap, t| {
1877            assert_eq!(
1878                t.set(heap, Value::Nil, Value::Int(1)),
1879                Err(TableError::NilIndex)
1880            );
1881            assert_eq!(
1882                t.set(heap, Value::Float(f64::NAN), Value::Int(1)),
1883                Err(TableError::NanIndex)
1884            );
1885            // reads with bad keys are nil, not errors
1886            assert!(t.get(Value::Nil).is_nil());
1887            assert!(t.get(Value::Float(f64::NAN)).is_nil());
1888        });
1889    }
1890
1891    #[test]
1892    fn delete_and_reinsert() {
1893        with_table(|heap, t| {
1894            let k = Value::Str(heap.intern(b"k"));
1895            t.set(heap, k, Value::Int(1)).unwrap();
1896            t.set(heap, k, Value::Nil).unwrap();
1897            assert!(t.get(k).is_nil());
1898            t.set(heap, k, Value::Int(2)).unwrap();
1899            assert!(t.get(k).raw_eq(Value::Int(2)));
1900            // setting an absent key to nil stays absent
1901            let k2 = Value::Str(heap.intern(b"k2"));
1902            t.set(heap, k2, Value::Nil).unwrap();
1903            assert!(t.get(k2).is_nil());
1904        });
1905    }
1906
1907    #[test]
1908    fn borders_with_holes() {
1909        with_table(|heap, t| {
1910            let _ = t.set_int(heap, 1, Value::Int(1));
1911            let _ = t.set_int(heap, 2, Value::Int(2));
1912            assert_eq!(t.len(), 2);
1913            t.set_int(heap, 2, Value::Nil).unwrap();
1914            assert_is_border(t, t.len());
1915            // hash-resident tail
1916            let _ = t.set_int(heap, 1_000_000, Value::Int(1));
1917            assert_is_border(t, t.len());
1918        });
1919    }
1920
1921    #[test]
1922    fn len_on_empty_and_hash_only() {
1923        with_table(|heap, t| {
1924            assert_eq!(t.len(), 0);
1925            let xk = Value::Str(heap.intern(b"x"));
1926            t.set(heap, xk, Value::Int(1)).unwrap();
1927            assert_eq!(t.len(), 0);
1928        });
1929    }
1930
1931    #[test]
1932    fn next_iterates_everything_exactly_once() {
1933        with_table(|heap, t| {
1934            let mut expected = 0i64;
1935            for i in 1..=64 {
1936                let _ = t.set_int(heap, i, Value::Int(i));
1937                expected += i;
1938            }
1939            for i in 0..32 {
1940                let k = Value::Str(heap.intern(format!("s{i}").as_bytes()));
1941                t.set(heap, k, Value::Int(1000 + i)).unwrap();
1942                expected += 1000 + i;
1943            }
1944            t.set(heap, Value::Float(2.5), Value::Int(7)).unwrap();
1945            expected += 7;
1946
1947            let mut sum = 0i64;
1948            let mut count = 0;
1949            let mut key = Value::Nil;
1950            while let Some((k, v)) = t.next(key).unwrap() {
1951                let Value::Int(x) = v else {
1952                    panic!("bad value")
1953                };
1954                sum += x;
1955                count += 1;
1956                key = k;
1957            }
1958            assert_eq!(count, 64 + 32 + 1);
1959            assert_eq!(sum, expected);
1960        });
1961    }
1962
1963    #[test]
1964    fn next_skips_nil_values_and_rejects_alien_keys() {
1965        with_table(|heap, t| {
1966            let _ = t.set_int(heap, 1, Value::Int(1));
1967            let _ = t.set_int(heap, 3, Value::Int(3));
1968            let k = Value::Str(heap.intern(b"gone"));
1969            t.set(heap, k, Value::Int(9)).unwrap();
1970            t.set(heap, k, Value::Nil).unwrap();
1971            let mut seen = Vec::new();
1972            let mut key = Value::Nil;
1973            while let Some((k, v)) = t.next(key).unwrap() {
1974                let Value::Int(x) = v else { panic!() };
1975                seen.push(x);
1976                key = k;
1977            }
1978            assert_eq!(seen, vec![1, 3]);
1979            // a key never inserted is invalid for next
1980            let alien = Value::Str(heap.intern(b"never"));
1981            assert!(matches!(t.next(alien), Err(TableError::InvalidNext)));
1982            // ...but a deleted (nil-valued) key is still a valid cursor
1983            assert!(t.next(k).is_ok());
1984        });
1985    }
1986
1987    #[test]
1988    fn collision_relocation_keeps_chains_intact() {
1989        with_table(|heap, t| {
1990            // dense negative ints all land in the hash part; with identity
1991            // hashing they exercise both chain cases heavily
1992            for i in 0..512 {
1993                let _ = t.set_int(heap, -i, Value::Int(i));
1994            }
1995            for i in 0..512 {
1996                assert!(t.get_int(-i).raw_eq(Value::Int(i)), "lost key {}", -i);
1997            }
1998        });
1999    }
2000
2001    // -----------------------------------------------------------------
2002    // C3 SoA Robin Hood equivalence tests (Phase G).
2003    //
2004    // Cross-check the new SoA + RH path against the existing chain-walk
2005    // path: replay the same insert/lookup sequence on a table via
2006    // `set` (chain) and another via `soa_insert` (SoA), then assert
2007    // `get == soa_get` for every key.
2008    //
2009    // Phase B+C scope: insert + read only. Tombstone delete equivalence
2010    // arrives with Phase D.
2011    // -----------------------------------------------------------------
2012
2013    fn replay_chain(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
2014        let t = heap.new_table();
2015        let tref = unsafe { t.as_mut() };
2016        for (k, v) in ops.iter().copied() {
2017            tref.set(heap, k, v).unwrap();
2018        }
2019        t.as_ptr()
2020    }
2021
2022    fn replay_soa(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
2023        let t = heap.new_table();
2024        let tref = unsafe { t.as_mut() };
2025        for (k, v) in ops.iter().copied() {
2026            tref.soa_insert(heap, k, v).unwrap();
2027        }
2028        t.as_ptr()
2029    }
2030
2031    #[test]
2032    fn c3_soa_equivalence_string_keys() {
2033        let mut heap = Heap::new();
2034        let mut ops = Vec::new();
2035        for i in 0..40 {
2036            let k = Value::Str(heap.intern(format!("key_{i:03}").as_bytes()));
2037            ops.push((k, Value::Int(i * 7)));
2038        }
2039        let chain = unsafe { &*replay_chain(&mut heap, &ops) };
2040        let soa = unsafe { &*replay_soa(&mut heap, &ops) };
2041        for (k, _) in &ops {
2042            let cv = chain.get(*k);
2043            let sv = soa.soa_get(*k);
2044            assert!(
2045                cv.raw_eq(sv),
2046                "SoA vs chain mismatch on key — chain={:?} soa={:?}",
2047                cv,
2048                sv,
2049            );
2050        }
2051        // Absent key returns nil from both paths.
2052        let absent = Value::Str(heap.intern(b"never"));
2053        assert!(chain.get(absent).is_nil());
2054        assert!(soa.soa_get(absent).is_nil());
2055    }
2056
2057    #[test]
2058    fn c3_soa_equivalence_negative_int_keys() {
2059        // Dense negative ints with identity hashing — same collision
2060        // profile as the existing `collision_relocation_keeps_chains_intact`
2061        // test, but verified through the SoA RH path. Triggers
2062        // rob-from-rich repeatedly.
2063        let mut heap = Heap::new();
2064        let mut ops = Vec::new();
2065        for i in 0..256 {
2066            let k = Value::Int(-i);
2067            ops.push((k, Value::Int(i)));
2068        }
2069        let chain = unsafe { &*replay_chain(&mut heap, &ops) };
2070        let soa = unsafe { &*replay_soa(&mut heap, &ops) };
2071        for (k, _) in &ops {
2072            let cv = chain.get(*k);
2073            let sv = soa.soa_get(*k);
2074            assert!(cv.raw_eq(sv), "SoA mismatch on key {:?}", k);
2075        }
2076    }
2077
2078    #[test]
2079    fn c3_soa_equivalence_mixed_keys_with_updates() {
2080        // Insert, then update the same keys with new values — exercises
2081        // the soa_find_slot in-place update branch.
2082        let mut heap = Heap::new();
2083        let kstr = Value::Str(heap.intern(b"x"));
2084        let kint = Value::Int(42);
2085        let kbool = Value::Bool(true);
2086        let ops: Vec<(Value, Value)> = vec![
2087            (kstr, Value::Int(1)),
2088            (kint, Value::Int(2)),
2089            (kbool, Value::Int(3)),
2090            (kstr, Value::Int(11)),  // update
2091            (kint, Value::Int(22)),  // update
2092            (kbool, Value::Int(33)), // update
2093        ];
2094        let chain = unsafe { &*replay_chain(&mut heap, &ops) };
2095        let soa = unsafe { &*replay_soa(&mut heap, &ops) };
2096        for k in [kstr, kint, kbool] {
2097            assert!(chain.get(k).raw_eq(soa.soa_get(k)));
2098        }
2099    }
2100
2101    #[test]
2102    fn c3_soa_equivalence_delete_then_read() {
2103        // Phase D: tombstone delete + read on both paths, verify
2104        // matching nil-for-deleted, original-val-for-live.
2105        let mut heap = Heap::new();
2106        let mut ops_insert = Vec::new();
2107        for i in 0..30 {
2108            let k = Value::Str(heap.intern(format!("d_key_{i:03}").as_bytes()));
2109            ops_insert.push((k, Value::Int(i * 11)));
2110        }
2111        let chain = unsafe { &mut *replay_chain(&mut heap, &ops_insert) };
2112        let soa = unsafe { &mut *replay_soa(&mut heap, &ops_insert) };
2113        // Delete every 3rd key.
2114        let mut deleted: Vec<Value> = Vec::new();
2115        for (i, (k, _)) in ops_insert.iter().enumerate() {
2116            if i % 3 == 0 {
2117                // chain: set to Nil is the chain-path's delete equivalent
2118                chain.set(&mut heap, *k, Value::Nil).unwrap();
2119                let was_present = soa.soa_delete(*k);
2120                assert!(was_present, "soa_delete miss on inserted key {:?}", k);
2121                deleted.push(*k);
2122            }
2123        }
2124        // Read each key: deleted → nil, non-deleted → original val.
2125        for (k, v) in &ops_insert {
2126            let cv = chain.get(*k);
2127            let sv = soa.soa_get(*k);
2128            assert!(
2129                cv.raw_eq(sv),
2130                "delete/read mismatch on key {:?} — chain={:?} soa={:?}",
2131                k,
2132                cv,
2133                sv,
2134            );
2135            if deleted.iter().any(|d| d.raw_eq(*k)) {
2136                assert!(cv.is_nil(), "deleted key {:?} chain non-nil", k);
2137                assert!(sv.is_nil(), "deleted key {:?} soa non-nil", k);
2138            } else {
2139                assert!(cv.raw_eq(*v), "live key {:?} chain val drift", k);
2140            }
2141        }
2142        // Deleting an absent key is a no-op (returns false) on SoA.
2143        let absent = Value::Str(heap.intern(b"never_d"));
2144        assert!(!soa.soa_delete(absent));
2145    }
2146
2147    #[test]
2148    fn c3_soa_delete_then_reinsert_uses_tombstone() {
2149        // After delete + reinsert, key is findable with new val. The
2150        // SoA path may reuse the tombstoned slot (preferred) or place
2151        // elsewhere — either is correct as long as soa_get returns
2152        // the new val.
2153        let mut heap = Heap::new();
2154        let t = heap.new_table();
2155        let tref = unsafe { t.as_mut() };
2156        let k = Value::Str(heap.intern(b"reinsert_target"));
2157        tref.soa_insert(&mut heap, k, Value::Int(100)).unwrap();
2158        assert!(tref.soa_get(k).raw_eq(Value::Int(100)));
2159        let pre_tombs = tref.tombstones;
2160        assert!(tref.soa_delete(k));
2161        assert!(tref.tombstones == pre_tombs + 1);
2162        assert!(tref.soa_get(k).is_nil());
2163        // Reinsert with new val.
2164        tref.soa_insert(&mut heap, k, Value::Int(200)).unwrap();
2165        assert!(tref.soa_get(k).raw_eq(Value::Int(200)));
2166        // Tombstone reused — count back to pre_tombs.
2167        assert_eq!(tref.tombstones, pre_tombs);
2168    }
2169
2170    #[test]
2171    fn c3_soa_grows_under_load_pressure() {
2172        // Stress test: insert enough entries to trigger multiple RH
2173        // rehashes (cap doubles at load 0.75). Confirms PSL overflow
2174        // never fires and all keys survive grow cycles.
2175        let mut heap = Heap::new();
2176        let t = heap.new_table();
2177        let tref = unsafe { t.as_mut() };
2178        for i in 0..1024 {
2179            let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2180            tref.soa_insert(&mut heap, k, Value::Int(i)).unwrap();
2181        }
2182        // Verify every key is findable.
2183        for i in 0..1024 {
2184            let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2185            let v = tref.soa_get(k);
2186            assert!(
2187                v.raw_eq(Value::Int(i)),
2188                "SoA lost key entry_{:05} — got {:?}",
2189                i,
2190                v,
2191            );
2192        }
2193        assert!(tref.soa_live_count() == 1024);
2194        // Cap should have grown past the initial SOA_INITIAL_CAP via
2195        // the 0.75 load-factor trigger.
2196        assert!(
2197            tref.soa_cap() >= 2048,
2198            "SoA cap = {} after 1024 inserts — load gate didn't grow",
2199            tref.soa_cap(),
2200        );
2201    }
2202
2203    #[test]
2204    fn rehash_redistributes_into_array() {
2205        with_table(|heap, t| {
2206            // insert 1..n in reverse: starts in hash, rehash must migrate
2207            for i in (1..=256).rev() {
2208                let _ = t.set_int(heap, i, Value::Int(i));
2209            }
2210            assert_eq!(t.len(), 256);
2211            for i in 1..=256 {
2212                assert!(t.get_int(i).raw_eq(Value::Int(i)));
2213            }
2214        });
2215    }
2216}