Skip to main content

luna_core/runtime/
table.rs

1//! Lua table: hybrid array + hash.
2//!
3//! Array part uses split tag/payload storage (9 bytes/slot — the Lua 5.5
4//! "compact arrays" layout, bench-validated in benches/value_repr.rs).
5//! Hash part is the PUC node layout: main-position chaining with relocation
6//! (Brent's variation), capacity a power of two, rehash sizing per
7//! luaH_rehash/computesizes.
8
9use crate::runtime::heap::{Gc, GcHeader, Heap, Marker};
10use crate::runtime::value::{RawVal, Value, f2i_exact, raw};
11
12/// Errors that table mutation can raise back to the interpreter.
13#[derive(Clone, Copy, PartialEq, Eq, Debug)]
14pub enum TableError {
15    /// `t[nil] = …` — `nil` is forbidden as a key.
16    NilIndex,
17    /// `t[0/0] = …` — NaN floats are forbidden as keys.
18    NanIndex,
19    /// `next` called with a key not present in the table.
20    InvalidNext,
21    /// PUC `luaH_resizearray` — the array part would have to grow past
22    /// `MAXASIZE`, or the hash part past `MAXHBITS`. Raised back as
23    /// "table overflow" so a runaway `a[i] = i` loop walls within budget
24    /// (5.5/5.4 heavy.lua's `toomanyidx` pcalls exactly this scenario).
25    Overflow,
26}
27
28/// PUC `MAXASIZE` analogue: the highest power of two an array part may
29/// grow to. Choose a cap that comfortably fits in the gate's 60-second
30/// budget (each grow is O(n), so 2^27 entries × 16 bytes ≈ 2 GB is the
31/// effective ceiling). Beyond this `rehash` returns `TableError::Overflow`.
32pub(crate) const MAX_ASIZE: usize = 1 << 27;
33
34/// v2.1 Phase 1I.B — JIT layout constants for table-field IC.
35///
36/// luna-jit's trace lowerer needs to emit direct loads against
37/// `Table.nodes` (the hash part) without paying the helper-call ABI
38/// for each `Op::GetField` / `Op::SetField`. These constants expose
39/// the field offsets so the cranelift IR can be parameterised at
40/// compile time. The `Node` struct itself remains `pub(crate)` — only
41/// the offsets cross the crate boundary.
42///
43/// Layout assumptions:
44/// - `Box<[Node]>` is a fat pointer `(data_ptr, len)` on 64-bit
45///   targets (16 bytes total). The data pointer occupies the low 8
46///   bytes, length the high 8. This is the de-facto Rust ABI for
47///   `Box<[T]>` / `&[T]` but isn't formally guaranteed; the unit
48///   test `phase_1i_b_node_layout_pinned` and the const assertion
49///   on `size_of::<Box<[Node]>>()` catch drift.
50/// - `Value` is `#[repr(C, u8)]` so the discriminant byte sits at
51///   offset 0 and the payload starts at offset 8 (after 7 bytes of
52///   alignment padding). Total size 16 bytes per the existing
53///   `value_is_16_bytes` test in `runtime/value.rs`.
54/// - `Node` is `#[derive(Clone, Copy)]` with field order
55///   `(key: Value, val: Value, next: i32, dead_key: bool)`, so
56///   `key` lives at offset 0 and `val` at offset 16. The trailing
57///   `next + dead_key` fields are not read by the IC.
58pub mod jit_layout {
59    use super::Node;
60    use crate::runtime::Table;
61
62    /// Byte offset of the `nodes: Box<[Node]>` field within `Table`.
63    /// The fat-ptr low word (data ptr) lives at this offset; the
64    /// high word (length) at `TABLE_NODES_OFFSET + 8`. luna-jit
65    /// adds `TABLE_NODES_PTR_OFFSET` / `TABLE_NODES_LEN_OFFSET`
66    /// constants in `jit_backend/mod.rs` to express that split.
67    pub const TABLE_NODES_OFFSET: usize = std::mem::offset_of!(Table, nodes);
68
69    /// Byte offset of `key: Value` within `Node` (= 0).
70    pub const NODE_KEY_OFFSET: usize = std::mem::offset_of!(Node, key);
71
72    /// Byte offset of `val: Value` within `Node` (= 16 — `key` is 16-byte
73    /// `Value`, no inner padding).
74    pub const NODE_VAL_OFFSET: usize = std::mem::offset_of!(Node, val);
75
76    /// Total `Node` size in bytes (= 40 on 64-bit). Used as the stride
77    /// in `node_addr = nodes_ptr + slot_idx * SIZEOF_NODE`.
78    pub const SIZEOF_NODE: usize = std::mem::size_of::<Node>();
79
80    /// Static guard: pin the assumptions luna-jit relies on at compile
81    /// time. Layout drift here breaks IR emit, so trap it at compile
82    /// time rather than at trace-fire time.
83    ///
84    /// `Box<[T]>` is a fat pointer of `2 * usize` — 16 bytes on 64-bit
85    /// targets, 8 bytes on 32-bit (e.g. `wasm32`). Use a width-aware
86    /// expected size so the wasm32-unknown-unknown CI build does not
87    /// trip the assertion. The runtime layout still matters for luna-jit
88    /// IR emit on 64-bit hosts (the only platforms where Cranelift JIT
89    /// runs); the 32-bit branch documents the size in passing.
90    const _: () = {
91        assert!(std::mem::size_of::<Box<[Node]>>() == 2 * std::mem::size_of::<usize>());
92        assert!(NODE_KEY_OFFSET == 0);
93        assert!(NODE_VAL_OFFSET == 16);
94        assert!(SIZEOF_NODE >= 32);
95    };
96}
97
98#[derive(Clone, Copy)]
99pub(crate) struct Node {
100    key: Value,
101    val: Value,
102    /// absolute index of the next node in this chain, or NONE
103    next: i32,
104    /// PUC `setdeadkey` analogue: the key was a collectable that got swept
105    /// out of a weak table. The Gc pointer in `key` is now dangling — its
106    /// memory may have been reused for a new allocation with potentially
107    /// equal content. Marking the node "dead-key" lets `find_node` skip the
108    /// raw_eq probe (which could spuriously match a reallocated object) and
109    /// `insert_new` treat the slot as available for a fresh main-position
110    /// owner while leaving chain back-links intact for traversal.
111    dead_key: bool,
112}
113
114const NONE: i32 = -1;
115
116impl Node {
117    const EMPTY: Node = Node {
118        key: Value::Nil,
119        val: Value::Nil,
120        next: NONE,
121        dead_key: false,
122    };
123}
124
125/// C3 — SoA Robin Hood meta-word layout (Variant A, see
126/// `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1).
127///
128/// Each `meta[idx]` slot encodes the open-addressing slot state in a
129/// single u16:
130/// - bit 15 (`OCCUPIED_BIT`): 0 = empty, 1 = occupied
131/// - bit 14 (`TOMBSTONE_BIT`): 0 = live, 1 = tombstoned-occupied
132/// - bits 13..0 (`PSL_MASK`): probe-sequence length (0..16383)
133///
134/// The 14-bit PSL field is **far** beyond any realistic Robin Hood
135/// max-PSL at load ≤ 0.75 (expected max ~20 on 1024 slots; even the
136/// long-tail outliers seen empirically with luna's existing hash
137/// distributions stay under 200). 2 bytes/slot is still 20× smaller
138/// than the 40-byte Node — the SoA bandwidth gain (§3.5 of the RFC)
139/// is preserved.
140///
141/// Earlier draft used 1 byte with a 6-bit PSL cap of 63; bench under
142/// load 0.676 on cap=1024 produced a long-tail PSL of 64+ with the
143/// LuaStr+mix64 hash distribution, forcing the widening.
144///
145/// Tombstones do NOT free the slot for `find` (probe continues past), but
146/// DO free it for `insert` (write the new entry, clear the tomb bit). They
147/// accumulate; the rehash path compacts them periodically.
148#[allow(dead_code)]
149pub(crate) mod meta_bits {
150    pub const OCCUPIED_BIT: u16 = 0b1000_0000_0000_0000;
151    pub const TOMBSTONE_BIT: u16 = 0b0100_0000_0000_0000;
152    pub const PSL_MASK: u16 = 0b0011_1111_1111_1111;
153    pub const PSL_MAX: u16 = PSL_MASK;
154    /// Empty slot — bit 15 = 0, all others 0.
155    pub const EMPTY: u16 = 0;
156
157    #[inline(always)]
158    pub fn is_occupied(m: u16) -> bool {
159        (m & OCCUPIED_BIT) != 0
160    }
161    #[inline(always)]
162    pub fn is_tombstone(m: u16) -> bool {
163        (m & TOMBSTONE_BIT) != 0
164    }
165    /// Live = occupied AND not tombstoned. `next()` iteration cursor returns
166    /// these. `find_slot_rh` short-circuits on a live match.
167    #[inline(always)]
168    pub fn is_live(m: u16) -> bool {
169        (m & (OCCUPIED_BIT | TOMBSTONE_BIT)) == OCCUPIED_BIT
170    }
171    #[inline(always)]
172    pub fn psl(m: u16) -> u16 {
173        m & PSL_MASK
174    }
175    #[inline(always)]
176    pub fn pack(psl: u16, tomb: bool) -> u16 {
177        debug_assert!(psl <= PSL_MAX);
178        let mut m = OCCUPIED_BIT | (psl & PSL_MASK);
179        if tomb {
180            m |= TOMBSTONE_BIT;
181        }
182        m
183    }
184}
185
186/// P11-S5d.I — inline storage threshold. Tables whose array part has
187/// `asize <= INLINE_ASIZE` keep their atags+avals inside the Table
188/// struct itself (`inline_storage`), skipping the slab Box entirely
189/// — binary_trees's `{nil, nil}` and `{...}` 2-element leaves live
190/// here, sparing one allocator round-trip per NewTable.
191pub(crate) const INLINE_ASIZE: u64 = 2;
192/// `INLINE_ASIZE` u64 slots for avals + `ceil(INLINE_ASIZE / 8)` u64
193/// slots covering the atags bytes (with trailing pad). For
194/// `INLINE_ASIZE = 2`: 2 avals + 1 atags = 3 u64s = 24 bytes.
195pub(crate) const INLINE_U64S: usize = INLINE_ASIZE as usize + INLINE_ASIZE.div_ceil(8) as usize;
196
197/// Lua table — hybrid array + hash storage, with optional metatable and
198/// weak-mode flags.
199#[repr(C)]
200pub struct Table {
201    /// read through raw casts by the GC, not by field access
202    #[allow(dead_code)]
203    pub(crate) hdr: GcHeader,
204    /// P11-S5d.I — single backing pointer for the array part. Points
205    /// to `inline_storage` (asize <= INLINE_ASIZE) or `slab.as_ptr()`
206    /// (asize > INLINE_ASIZE). The JIT inline aset reads this with one
207    /// `load i64`, no branch — the choice between inline and slab is
208    /// already encoded in the pointer. Initialised in `Heap::new_table`
209    /// AFTER the Table reaches its final heap address (so that
210    /// `&mut self.inline_storage` is the stable heap pointer, not a
211    /// stack-local one). Updated by `Table::resize`.
212    pub array_ptr: *mut u8,
213    /// P11-S5d.H — external backing for the array part when
214    /// `asize > INLINE_ASIZE`. Layout: `[avals: asize × 8 bytes][atags:
215    /// asize bytes]`. Empty box (dangling, no alloc) when the inline
216    /// path is in use.
217    pub(crate) slab: Box<[u64]>,
218    /// Length of the array part in slots. u64 (rather than `usize` or
219    /// `u32`) so the JIT can load it with a single `load i64`.
220    pub asize: u64,
221    /// P11-S5d.I — inline backing used when `asize <= INLINE_ASIZE`.
222    /// Same layout as the slab: avals at low addresses (`asize * 8`
223    /// bytes from offset 0), atags at the trailing `asize` bytes.
224    pub(crate) inline_storage: [u64; INLINE_U64S],
225    /// hash part: power-of-two length (or empty)
226    /// hash part: power-of-two length (or empty)
227    /// `pub(crate)` so `Heap::free_obj` (pool recycle path) can reset.
228    pub(crate) nodes: Box<[Node]>,
229    /// free-slot search position, counts down (PUC lastfree).
230    /// `pub(crate)` so `Heap::new_table` can reset on pool recycle.
231    pub(crate) lastfree: u32,
232    /// C3 — SoA Robin Hood hash part (Variant A, parallel to `nodes`
233    /// during Phase B+C transition). After Phase 4 cutover these
234    /// replace `nodes` entirely. Each of `keys` / `vals` / `meta`
235    /// is the same length as `nodes` and is sized in lockstep by
236    /// `resize`. `meta` byte layout per the `meta_bits` module above.
237    /// Empty `Box::new([])` until Phase D cuts over.
238    pub(crate) keys: Box<[Value]>,
239    pub(crate) vals: Box<[Value]>,
240    pub(crate) meta: Box<[u16]>,
241    /// Count of tombstoned-occupied meta slots; rehash trigger.
242    pub(crate) tombstones: u32,
243    /// C3 — iterator-guard counter (R-A3 mitigation). Incremented on
244    /// each `pairs`/`next` entry, decremented on exit. While > 0,
245    /// the SoA path MUST defer rehash (which would rebase slot
246    /// indices and break PUC `nextvar.lua:520-521` invariant).
247    /// Phase F wires this; Phase B initialises to 0.
248    pub(crate) iter_depth: u32,
249    /// P11-S5d.K — visibility lifted to `pub(crate)` so the JIT can
250    /// take its field offset at compile time and emit an inline
251    /// "metatable.is_none()" guard before the inline aget fast path.
252    /// `Option<Gc<Table>>` is 8 bytes via the NonNull-pointer-opt: 0
253    /// ⇔ None, non-zero ⇔ Some.
254    pub metatable: Option<Gc<Table>>,
255    /// reserved for an absent-metamethod cache (PUC `flags`); currently
256    /// unread — luna's mm lookup walks `metatable.get` each time
257    #[allow(dead_code)]
258    pub(crate) flags: u8,
259}
260
261// SAFETY: `array_ptr` looks like an unprotected raw pointer field, but
262// it always refers to memory the same Table owns (either its own inline
263// storage or its `slab` Box). The Table is heap-allocated and never
264// moved post-adoption, so the pointer stays valid for the table's
265// lifetime. No thread-unsafety concern: tables are accessed only
266// through the Vm, single-threaded.
267unsafe impl Send for Table {}
268unsafe impl Sync for Table {}
269
270impl Table {
271    pub(crate) fn new(hdr: GcHeader) -> Table {
272        Table {
273            hdr,
274            // P11-S5d.I — `array_ptr` is fixed up in
275            // `Heap::new_table` after the Table reaches its final heap
276            // address (so that `&inline_storage` is the heap address,
277            // not a stack-local one). Null sentinel here so a
278            // bug-detection invariant flags any pre-fixup read.
279            array_ptr: std::ptr::null_mut(),
280            slab: Box::new([]),
281            asize: 0,
282            inline_storage: [0; INLINE_U64S],
283            nodes: Box::new([]),
284            lastfree: 0,
285            keys: Box::new([]),
286            vals: Box::new([]),
287            meta: Box::new([]),
288            tombstones: 0,
289            iter_depth: 0,
290            metatable: None,
291            flags: 0,
292        }
293    }
294
295    /// P11-S5d.I — set `array_ptr` to the inline storage's stable heap
296    /// address. Called by `Heap::new_table` once the Table is at its
297    /// final location.
298    #[inline]
299    pub(crate) fn init_array_ptr(&mut self) {
300        self.array_ptr = self.inline_storage.as_mut_ptr() as *mut u8;
301    }
302
303    /// P11-S5d.H/I — read view onto the array-part tag bytes. Trails
304    /// the avals portion in the active backing (inline or slab).
305    #[inline(always)]
306    pub(crate) fn atags(&self) -> &[u8] {
307        let n = self.asize as usize;
308        if n == 0 {
309            return &[];
310        }
311        // SAFETY: `array_ptr` always points to a buffer with `n`
312        // RawVal slots followed by `n` u8 tag bytes (either
313        // `inline_storage` of `INLINE_U64S` u64s, or a `slab` of
314        // `asize + ceil(asize/8)` u64s). The tag bytes start at byte
315        // offset `n * 8` from the buffer base.
316        unsafe {
317            let ptr = self.array_ptr.add(n * 8);
318            std::slice::from_raw_parts(ptr, n)
319        }
320    }
321
322    #[inline(always)]
323    pub(crate) fn atags_mut(&mut self) -> &mut [u8] {
324        let n = self.asize as usize;
325        if n == 0 {
326            return &mut [];
327        }
328        // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
329        unsafe {
330            let ptr = self.array_ptr.add(n * 8);
331            std::slice::from_raw_parts_mut(ptr, n)
332        }
333    }
334
335    /// P11-S5d.H/I — read view onto the array-part payload slots. Sits
336    /// at the start of the active backing (u64-aligned, identical size
337    /// and layout to `RawVal`).
338    #[inline(always)]
339    pub(crate) fn avals(&self) -> &[RawVal] {
340        let n = self.asize as usize;
341        if n == 0 {
342            return &[];
343        }
344        // SAFETY: inline_storage / slab both store u64s, so the cast
345        // to `*const RawVal` is alignment-safe (RawVal size = 8,
346        // align = 8). The buffer holds at least `n` such slots.
347        unsafe { std::slice::from_raw_parts(self.array_ptr as *const RawVal, n) }
348    }
349
350    #[inline(always)]
351    pub(crate) fn avals_mut(&mut self) -> &mut [RawVal] {
352        let n = self.asize as usize;
353        if n == 0 {
354            return &mut [];
355        }
356        // SAFETY: `array_ptr` was allocated by `Heap::init_array_ptr` with `array_cap` slots; the table holds it for its lifetime and the heap is single-threaded so no concurrent writers exist.
357        unsafe { std::slice::from_raw_parts_mut(self.array_ptr as *mut RawVal, n) }
358    }
359
360    /// Allocate a fresh external `[avals: asize × 8 bytes][atags: asize
361    /// bytes]` slab. Only used when `asize > INLINE_ASIZE`. The buffer
362    /// is u64-aligned via `Box<[u64]>` and zeroed (avals = `RawVal::
363    /// NIL` aka `0`; atags = `raw::NIL` aka `0`).
364    fn alloc_slab(asize: usize) -> Box<[u64]> {
365        if asize == 0 {
366            return Box::new([]);
367        }
368        let avals_u64s = asize;
369        let atags_u64s = asize.div_ceil(8);
370        let total = avals_u64s + atags_u64s;
371        vec![0u64; total].into_boxed_slice()
372    }
373
374    /// This table's metatable, if any.
375    pub fn metatable(&self) -> Option<Gc<Table>> {
376        self.metatable
377    }
378
379    /// Install (or clear) this table's metatable. Does not perform any
380    /// `__metatable` guarding; that belongs in the Vm-level `setmetatable`.
381    pub fn set_metatable(&mut self, mt: Option<Gc<Table>>) {
382        self.metatable = mt;
383    }
384
385    /// Bytes occupied by the table's *external* internal allocations
386    /// (slab and nodes). Cheap O(1) read — Box len × element size, no
387    /// allocator query. `Heap::free_obj` subtracts this on the way out
388    /// so the credit applied via `set`/`rehash`/`ensure_*` is symmetric.
389    ///
390    /// P11-S5d.I — inline storage doesn't count toward this (it's part
391    /// of the Table struct itself, accounted for by `size_of::<Table>()`
392    /// at adoption time). When the array part lives inline, the slab
393    /// is empty and contributes nothing here.
394    pub(crate) fn internal_bytes(&self) -> usize {
395        let n = self.asize as usize;
396        let array_external = if n > INLINE_ASIZE as usize {
397            n + n * std::mem::size_of::<RawVal>()
398        } else {
399            0
400        };
401        let soa_external = self.keys.len() * std::mem::size_of::<Value>()
402            + self.vals.len() * std::mem::size_of::<Value>()
403            + self.meta.len() * std::mem::size_of::<u16>();
404        array_external + self.nodes.len() * std::mem::size_of::<Node>() + soa_external
405    }
406
407    fn asize(&self) -> usize {
408        self.asize as usize
409    }
410
411    fn aget(&self, idx: usize) -> Value {
412        // SAFETY: callers gate on `idx < self.asize()` before reaching here
413        // (`get_int`, `iter_array`, etc.). atags and avals are sized
414        // identically by `rehash`, so a bound check passed against atags
415        // covers avals too.
416        unsafe {
417            Value::pack(
418                *self.atags().get_unchecked(idx),
419                *self.avals().get_unchecked(idx),
420            )
421        }
422    }
423
424    fn aset(&mut self, idx: usize, v: Value) {
425        let (t, b) = v.unpack();
426        // SAFETY: see `aget`. callers (`set_norm`, `set_int`) gate on
427        // `idx < self.asize()`. The two `*_mut` calls each take a
428        // distinct `&mut self` borrow whose lifetime ends at the
429        // statement boundary, so they don't overlap.
430        unsafe {
431            *self.atags_mut().get_unchecked_mut(idx) = t;
432            *self.avals_mut().get_unchecked_mut(idx) = b;
433        }
434    }
435
436    // ---- reads ----
437
438    /// Raw lookup (no `__index` metamethod). Returns `Value::Nil` when
439    /// the key is absent. `Value::Nil` and NaN floats return `nil` directly.
440    pub fn get(&self, key: Value) -> Value {
441        match key {
442            Value::Int(i) => self.get_int(i),
443            Value::Float(f) => match f2i_exact(f) {
444                Some(i) => self.get_int(i),
445                None => {
446                    if f.is_nan() {
447                        Value::Nil
448                    } else {
449                        self.get_hash(key)
450                    }
451                }
452            },
453            Value::Nil => Value::Nil,
454            k => self.get_hash(k),
455        }
456    }
457
458    /// Integer-keyed variant of [`Self::get`].
459    pub fn get_int(&self, i: i64) -> Value {
460        if i >= 1 && (i as u64) <= self.asize() as u64 {
461            return self.aget(i as usize - 1);
462        }
463        self.get_hash(Value::Int(i))
464    }
465
466    /// String-keyed variant of [`Self::get`] for v1.2 D4 A1 GetField fast
467    /// path: the GetField interp arm always has a `Gc<LuaStr>` key from
468    /// `Proto.consts`. Skips the outer `Value` match (which would only
469    /// take the `_ => self.get_hash(k)` arm anyway) so the dispatcher
470    /// pays one less branch per call. ~5 GetField/iter × 1000 iters/cell
471    /// on the Redis-Lua-shape workload — every shaved nanosecond shows
472    /// up at the bench level. Counter-validated via
473    /// `examples/diag_opcode_breakdown.rs`.
474    #[inline]
475    pub fn get_str(&self, key: crate::runtime::Gc<crate::runtime::string::LuaStr>) -> Value {
476        self.get_hash(Value::Str(key))
477    }
478
479    fn get_hash(&self, k: Value) -> Value {
480        match self.find_node(k) {
481            Some(idx) => self.nodes[idx].val,
482            None => Value::Nil,
483        }
484    }
485
486    /// v2.1 Phase 1I.B — same logic as [`find_node`] but exposed
487    /// to luna-core's recorder so it can capture the slot index for
488    /// the table-field IC snapshot. luna-jit reads neither the
489    /// `nodes` field nor `Node` directly; only the slot index
490    /// crosses the crate boundary (baked into the IR as a `iconst`).
491    #[allow(dead_code)]
492    pub(crate) fn find_node_idx(&self, k: Value) -> Option<usize> {
493        self.find_node(k)
494    }
495
496    /// v2.1 Phase 1I.B — accessor for the recorder's
497    /// `FieldIcSnapshot` capture: read the slot's value's tag byte
498    /// for the cached_val_tag field. The recorder needs this to
499    /// match the runtime guard the IC emits. Returns None when
500    /// `idx >= nodes.len()`.
501    #[allow(dead_code)]
502    pub(crate) fn node_val_at(&self, idx: usize) -> Option<Value> {
503        self.nodes.get(idx).map(|n| n.val)
504    }
505
506    /// v2.1 Phase 1I.B — accessor for `nodes.len()` so the recorder
507    /// can capture the shape-guard's `nodes_len` field without
508    /// reaching into the private `nodes` member.
509    #[allow(dead_code)]
510    pub(crate) fn nodes_capacity(&self) -> usize {
511        self.nodes.len()
512    }
513
514    /// Walk the chain rooted at the key's main position.
515    fn find_node(&self, k: Value) -> Option<usize> {
516        if self.nodes.is_empty() {
517            return None;
518        }
519        let mut idx = self.main_position(k);
520        loop {
521            let n = &self.nodes[idx];
522            // Dead-key slots carry a dangling Gc pointer whose memory may
523            // have been reallocated to a different live object; raw_eq on
524            // such a key can spuriously match the freshly-reused address.
525            // Skip the comparison and only follow `next` (PUC `setdeadkey`
526            // / `equalkey` short-circuit). 5.5 gc.lua :459-:478 was 12%
527            // flaky on this exact path — a swept B-string's slot kept
528            // chaining into A's slot, so `a[k] = nil` (k = A_string) hit
529            // the dead slot and wrote nil there, leaving A's val untouched.
530            if !n.dead_key && n.key.raw_eq(k) {
531                return Some(idx);
532            }
533            if n.next == NONE {
534                return None;
535            }
536            idx = n.next as usize;
537        }
538    }
539
540    // ---- writes ----
541
542    /// Insert / update `(key, val)`. `heap` is used to credit any internal
543    /// Box growth (rehash) to `heap.bytes` so the counter stays in sync with
544    /// real memory; `free_obj` subtracts `internal_bytes()` on the way out.
545    pub fn set(&mut self, heap: &mut Heap, key: Value, val: Value) -> Result<(), TableError> {
546        let k = normalize_set_key(key)?;
547        self.set_norm(heap, k, val)
548    }
549
550    /// PUC `luaV_fastset` / `luaV_finishfastset` analogue: single-walk
551    /// in-place update for an existing key. Returns `true` iff `key` is
552    /// present with a non-nil value and the slot was overwritten with
553    /// `val`. Returns `false` when the key is absent, the slot holds nil,
554    /// or the key normalisation rejects it — the caller is then expected
555    /// to run the `__newindex` chain or fall back to `set` for the raw
556    /// insert.
557    ///
558    /// Collapses the SetField hot path from two hash-chain walks
559    /// (`get` + `set`) to one. The `__newindex` invariant ("fires iff
560    /// `get` would have returned nil") is preserved because this method
561    /// writes only when the existing slot is non-nil — the exact set the
562    /// prior `tb.get(key).is_nil()` gate already excluded from
563    /// `__newindex` eligibility. See
564    /// `.dev/rfcs/v2.0-pi-phase2-a3-audit.md` §4 for the case-by-case
565    /// semantics check.
566    ///
567    /// The caller is responsible for firing `Heap::barrier_back` after a
568    /// `true` return (same contract as the surrounding `raw_set`
569    /// wrapper).
570    pub fn try_set_existing(&mut self, key: Value, val: Value) -> bool {
571        let k = match normalize_set_key(key) {
572            Ok(k) => k,
573            Err(_) => return false,
574        };
575        if let Value::Int(i) = k
576            && i >= 1
577            && (i as u64) <= self.asize() as u64
578        {
579            let idx = i as usize - 1;
580            // SAFETY: `idx < self.asize()` is guarded by the conditional
581            // above, mirroring the bound on `aget`/`aset`.
582            let tag = unsafe { *self.atags().get_unchecked(idx) };
583            if tag != raw::NIL {
584                // Nil-val on a live slot must follow the same tombstone
585                // discipline as `set_norm` — routed through
586                // `clear_existing_slot` so chain-world / future
587                // data-layout cutovers (Phase E SoA) stay aligned.
588                // See `.dev/known-bugs/fixed/`.
589                if val.is_nil() {
590                    self.clear_existing_slot(k);
591                } else {
592                    self.aset(idx, val);
593                }
594                return true;
595            }
596            // Array slot present-but-nil → __newindex eligible: do NOT
597            // write. Caller falls through to the metamethod chain.
598            return false;
599        }
600        if let Some(idx) = self.find_node(k)
601            && !self.nodes[idx].val.is_nil()
602        {
603            if val.is_nil() {
604                self.clear_existing_slot(k);
605            } else {
606                self.nodes[idx].val = val;
607            }
608            return true;
609        }
610        false
611    }
612
613    /// Shared "live with val=Nil is illegal" tombstone routine for the
614    /// two write entry points (`set_norm` and `try_set_existing`). The
615    /// slot must already be known live (array slot inside `asize()` /
616    /// node returned by `find_node`).
617    ///
618    /// Chain-world today:
619    ///   - array slot → `aset(_, Nil)` clears the atag, so `next()`'s
620    ///     `tag != raw::NIL` filter skips the slot.
621    ///   - node slot  → soft tombstone (key kept, `val = Nil`); chain
622    ///     `next()` filter `!n.val.is_nil()` skips it, and `find_node`
623    ///     still routes a future re-insert into the same slot without
624    ///     a rehash.
625    ///
626    /// Centralising the discipline here lets a future Phase E SoA
627    /// cutover (Variant B linear probe, or any non-Robin-Hood layout
628    /// attack that switches `next()`'s filter to `meta_bits::is_live`)
629    /// migrate both entry points in lockstep — exactly the divergence
630    /// that surfaced as `(key, nil)` zombies in `pairs()` on the C3
631    /// Session 2 cutover branch.
632    fn clear_existing_slot(&mut self, k: Value) {
633        if let Value::Int(i) = k
634            && i >= 1
635            && (i as u64) <= self.asize() as u64
636        {
637            self.aset(i as usize - 1, Value::Nil);
638            return;
639        }
640        if let Some(idx) = self.find_node(k) {
641            self.nodes[idx].val = Value::Nil;
642        }
643    }
644
645    /// Integer-keyed variant of [`Self::set`].
646    pub fn set_int(&mut self, heap: &mut Heap, i: i64, val: Value) -> Result<(), TableError> {
647        self.set_norm(heap, Value::Int(i), val)
648    }
649
650    /// `k` is already normalized (no nil, no NaN, integral floats → Int).
651    fn set_norm(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
652        if let Value::Int(i) = k
653            && i >= 1
654            && (i as u64) <= self.asize() as u64
655        {
656            // Live array slot + Nil write goes through the shared
657            // tombstone routine (see `clear_existing_slot` for the
658            // chain ↔ future-SoA rationale). The non-Nil branch is
659            // identical to a bare `aset` today.
660            if v.is_nil() {
661                self.clear_existing_slot(k);
662            } else {
663                self.aset(i as usize - 1, v);
664            }
665            return Ok(());
666        }
667        if let Some(idx) = self.find_node(k) {
668            if v.is_nil() {
669                self.clear_existing_slot(k);
670            } else {
671                self.nodes[idx].val = v;
672            }
673            return Ok(());
674        }
675        if v.is_nil() {
676            return Ok(()); // absent key set to nil: nothing to record
677        }
678        self.insert_new(heap, k, v)
679    }
680
681    fn insert_new(&mut self, heap: &mut Heap, k: Value, v: Value) -> Result<(), TableError> {
682        if self.nodes.is_empty() {
683            self.rehash(heap, k)?;
684            return self.set_norm(heap, k, v);
685        }
686        let mp = self.main_position(k);
687        // A truly empty slot (key=Nil, !dead_key) is free for direct placement.
688        // A dead-key slot still belongs to some chain (its `next` points to a
689        // live entry the chain reaches), so we treat it as occupied here and
690        // route the new key through the collision path below — that preserves
691        // the back-links into this slot from other nodes' `next` fields.
692        if self.nodes[mp].key.is_nil() && !self.nodes[mp].dead_key {
693            self.nodes[mp] = Node {
694                key: k,
695                val: v,
696                next: NONE,
697                dead_key: false,
698            };
699            return Ok(());
700        }
701        let Some(free) = self.free_pos() else {
702            self.rehash(heap, k)?;
703            return self.set_norm(heap, k, v);
704        };
705        // Dead-key slot: it carries no live key, so by definition nobody else
706        // counts it as "their main position owner". We give it directly to
707        // the new key but preserve `next` so the chain it sits inside still
708        // reaches its downstream entries.
709        if self.nodes[mp].dead_key {
710            let preserved_next = self.nodes[mp].next;
711            self.nodes[mp] = Node {
712                key: k,
713                val: v,
714                next: preserved_next,
715                dead_key: false,
716            };
717            return Ok(());
718        }
719        let other_mp = self.main_position(self.nodes[mp].key);
720        if other_mp != mp {
721            // colliding node is out of its main position: relocate it to the
722            // free slot and take its place
723            let mut prev = other_mp;
724            while self.nodes[prev].next != mp as i32 {
725                prev = self.nodes[prev].next as usize;
726            }
727            self.nodes[prev].next = free as i32;
728            self.nodes[free] = self.nodes[mp];
729            self.nodes[mp] = Node {
730                key: k,
731                val: v,
732                next: NONE,
733                dead_key: false,
734            };
735        } else {
736            // colliding node owns this position: chain the new node behind it
737            self.nodes[free] = Node {
738                key: k,
739                val: v,
740                next: self.nodes[mp].next,
741                dead_key: false,
742            };
743            self.nodes[mp].next = free as i32;
744        }
745        Ok(())
746    }
747
748    fn free_pos(&mut self) -> Option<usize> {
749        while self.lastfree > 0 {
750            self.lastfree -= 1;
751            let n = &self.nodes[self.lastfree as usize];
752            // Dead-key slots are still occupied for chain purposes (their
753            // `next` may be the only path to a downstream entry) — don't
754            // hand them out as free.
755            if n.key.is_nil() && !n.dead_key {
756                return Some(self.lastfree as usize);
757            }
758        }
759        None
760    }
761
762    // ---- rehash (PUC luaH_rehash) ----
763
764    fn rehash(&mut self, heap: &mut Heap, pending: Value) -> Result<(), TableError> {
765        let mut nums = [0usize; 65];
766        let mut int_keys = 0usize;
767        let mut total = 1; // the pending key
768        if let Value::Int(i) = pending
769            && i >= 1
770        {
771            nums[ceil_log2(i as u64)] += 1;
772            int_keys += 1;
773        }
774        let atags = self.atags();
775        for (i, &tag) in atags.iter().enumerate() {
776            if tag != raw::NIL {
777                nums[ceil_log2(i as u64 + 1)] += 1;
778                int_keys += 1;
779                total += 1;
780            }
781        }
782        for n in self.nodes.iter() {
783            if !n.val.is_nil() {
784                total += 1;
785                if let Value::Int(i) = n.key
786                    && i >= 1
787                {
788                    nums[ceil_log2(i as u64)] += 1;
789                    int_keys += 1;
790                }
791            }
792        }
793        // computesizes: optimal array size = largest 2^i with more than 2^(i-1)
794        // integer keys in [1, 2^i]
795        let mut new_asize = 0usize;
796        let mut in_array = 0usize;
797        let mut a = 0usize;
798        let mut two_to_i = 1usize;
799        let mut i = 0usize;
800        while int_keys > two_to_i / 2 {
801            a += nums[i];
802            if a > two_to_i / 2 {
803                new_asize = two_to_i;
804                in_array = a;
805            }
806            i += 1;
807            match two_to_i.checked_mul(2) {
808                Some(n) => two_to_i = n,
809                None => break,
810            }
811        }
812        // PUC `luaH_resizearray` raises "table overflow" when the array part
813        // would have to grow past MAXASIZE. luna mirrors with `MAX_ASIZE`,
814        // checked on both the array and the hash bucket count (the latter is
815        // a power-of-two of total - in_array entries).
816        if new_asize > MAX_ASIZE {
817            return Err(TableError::Overflow);
818        }
819        let hash_entries = total - in_array;
820        if hash_entries > MAX_ASIZE {
821            return Err(TableError::Overflow);
822        }
823        self.resize(heap, new_asize, hash_entries);
824        Ok(())
825    }
826
827    /// Resize the table's array and hash parts. The array part grows
828    /// (or shrinks) to `new_asize` NIL-initialized slots; the hash
829    /// part rounds to the next power of two ≥ `hash_entries`. Any
830    /// existing entries are re-inserted into the new layout. The
831    /// Box growth is debited/credited to `heap.bytes` so `free_obj`
832    /// can subtract the symmetric amount.
833    ///
834    /// P11-S5c.B — `Heap::new_table_sized` calls this on a freshly
835    /// adopted empty table to pre-allocate the array part, sparing
836    /// the table-fill loop from O(log N) intermediate `rehash`es.
837    pub(crate) fn resize(&mut self, heap: &mut Heap, new_asize: usize, hash_entries: usize) {
838        let before = self.internal_bytes();
839        // P11-S5d.H/I — snapshot the old array entries before we
840        // re-install the backing. The active buffer can be inline OR
841        // slab; `array_ptr` already points to whichever it is, so
842        // walking via raw offsets works the same for either case.
843        let old_asize = self.asize as usize;
844        let mut old_pairs: Vec<(u8, RawVal)> = Vec::with_capacity(old_asize);
845        if old_asize > 0 {
846            // SAFETY: `array_ptr` was set up by `Heap::new_table` or
847            // an earlier `resize`; it covers `old_asize * 9` bytes
848            // (avals + atags).
849            let avals_base = self.array_ptr as *const RawVal;
850            let atags_base = unsafe { self.array_ptr.add(old_asize * 8) as *const u8 };
851            for i in 0..old_asize {
852                // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
853                let tag = unsafe { *atags_base.add(i) };
854                // SAFETY: `i < array_len` is enforced by the surrounding loop bound; `atags_base` / `avals_base` point into the table's parallel arrays allocated in lockstep by `init_array_ptr`.
855                let val = unsafe { *avals_base.add(i) };
856                old_pairs.push((tag, val));
857            }
858        }
859        let old_nodes = std::mem::take(&mut self.nodes);
860
861        // Install the new array backing first, then update `array_ptr`
862        // (before potentially dropping the old slab via the assignment
863        // below) so the JIT never observes a stale pointer.
864        self.asize = new_asize as u64;
865        if new_asize <= INLINE_ASIZE as usize {
866            // Inline path — zero the inline buffer; drop any prior
867            // external slab.
868            for slot in self.inline_storage.iter_mut() {
869                *slot = 0;
870            }
871            self.array_ptr = self.inline_storage.as_mut_ptr() as *mut u8;
872            self.slab = Box::new([]);
873        } else {
874            // External slab — allocate, then re-point `array_ptr`.
875            self.slab = Self::alloc_slab(new_asize);
876            self.array_ptr = self.slab.as_mut_ptr() as *mut u8;
877        }
878
879        let hsize = if hash_entries == 0 {
880            0
881        } else {
882            hash_entries.next_power_of_two()
883        };
884        self.nodes = vec![Node::EMPTY; hsize].into_boxed_slice();
885        self.lastfree = hsize as u32;
886        // PUC `g->GCtotalbytes` analogue: credit (or debit) the box-size
887        // delta so `Heap.bytes` reflects this table's actual internal
888        // memory. `free_obj` subtracts `internal_bytes()` on the way out.
889        let after = self.internal_bytes();
890        heap.apply_bytes_delta(before, after);
891        // Re-insert old array entries via the public set_norm path
892        // (which handles rehashing if the new array shrinks below the
893        // entry count).
894        for (i, (tag, val)) in old_pairs.into_iter().enumerate() {
895            if tag != raw::NIL {
896                // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
897                let v = unsafe { Value::pack(tag, val) };
898                let _ = self.set_norm(heap, Value::Int(i as i64 + 1), v);
899            }
900        }
901        for n in old_nodes.iter() {
902            if !n.val.is_nil() {
903                let _ = self.set_norm(heap, n.key, n.val);
904            }
905        }
906    }
907
908    fn main_position(&self, k: Value) -> usize {
909        debug_assert!(!self.nodes.is_empty());
910        hash_key(k) as usize & (self.nodes.len() - 1)
911    }
912
913    // ---- length / iteration ----
914
915    /// A border: `n` where `t[n]` is non-nil and `t[n+1]` is nil (PUC `luaH_getn`).
916    /// This is Lua `#` semantics, not a container size — an `is_empty`
917    /// counterpart would be meaningless.
918    #[allow(clippy::len_without_is_empty)]
919    pub fn len(&self) -> i64 {
920        let asize = self.asize();
921        let atags = self.atags();
922        if asize > 0 && atags[asize - 1] == raw::NIL {
923            // binary search inside the array part
924            let (mut lo, mut hi) = (0usize, asize);
925            while hi - lo > 1 {
926                let m = lo + (hi - lo) / 2;
927                if atags[m - 1] == raw::NIL {
928                    hi = m;
929                } else {
930                    lo = m;
931                }
932            }
933            return lo as i64;
934        }
935        if self.nodes.is_empty() {
936            return asize as i64;
937        }
938        // array is full (or absent): unbound search through the hash part
939        let mut lo = asize as i64;
940        let mut hi = lo + 1;
941        while !self.get_int(hi).is_nil() {
942            lo = hi;
943            match hi.checked_mul(2) {
944                Some(n) => hi = n,
945                None => {
946                    // pathological sparse keys (the doubling overflowed): scan
947                    // linearly from 1 for the first border, as PUC's
948                    // unbound_search does — finds a small border fast instead of
949                    // returning the huge one.
950                    let mut i = 1i64;
951                    while !self.get_int(i).is_nil() {
952                        i += 1;
953                    }
954                    return i - 1;
955                }
956            }
957        }
958        while hi - lo > 1 {
959            let m = lo + (hi - lo) / 2;
960            if self.get_int(m).is_nil() {
961                hi = m;
962            } else {
963                lo = m;
964            }
965        }
966        lo
967    }
968
969    /// Lua `next`: iterate array part then hash part.
970    pub fn next(&self, key: Value) -> Result<Option<(Value, Value)>, TableError> {
971        let start = match key {
972            Value::Nil => 0,
973            k => {
974                let k = match k {
975                    Value::Float(f) => match f2i_exact(f) {
976                        Some(i) => Value::Int(i),
977                        None => k,
978                    },
979                    k => k,
980                };
981                if let Value::Int(i) = k
982                    && i >= 1
983                    && (i as u64) <= self.asize() as u64
984                {
985                    i as usize
986                } else {
987                    match self.find_node(k) {
988                        Some(idx) => self.asize() + idx + 1,
989                        None => return Err(TableError::InvalidNext),
990                    }
991                }
992            }
993        };
994        let atags = self.atags();
995        for i in start..self.asize() {
996            if atags[i] != raw::NIL {
997                return Ok(Some((Value::Int(i as i64 + 1), self.aget(i))));
998            }
999        }
1000        let hstart = start.saturating_sub(self.asize());
1001        for (idx, n) in self.nodes.iter().enumerate().skip(hstart) {
1002            if !n.val.is_nil() {
1003                let _ = idx;
1004                return Ok(Some((n.key, n.val)));
1005            }
1006        }
1007        Ok(None)
1008    }
1009
1010    /// `(weak_keys, weak_values)` from the metatable's `__mode` field. Read by
1011    /// scanning the metatable for the `__mode` string (no interned key needed
1012    /// inside the collector).
1013    pub(crate) fn weak_mode(&self) -> (bool, bool) {
1014        let Some(mt) = self.metatable else {
1015            return (false, false);
1016        };
1017        for n in mt.nodes.iter() {
1018            if let (Value::Str(k), Value::Str(mode)) = (n.key, n.val)
1019                && k.as_bytes() == b"__mode"
1020            {
1021                let b = mode.as_bytes();
1022                return (b.contains(&b'k'), b.contains(&b'v'));
1023            }
1024        }
1025        (false, false)
1026    }
1027
1028    /// True when this table holds at least one direct reference (array slot,
1029    /// hash key, or hash value) to a coroutine whose mark bit is still clear.
1030    /// Used by the GC's cycle-finalize check (PUC 5.3 gc.lua :502) to detect
1031    /// the table ↔ thread reference cycle that needs an extra GC round before
1032    /// `__gc` runs. Tag-level scan avoids walking the full reference graph.
1033    pub(crate) fn refs_contain_unmarked_coro(&self) -> bool {
1034        use crate::runtime::heap::header_is_marked;
1035        let atags = self.atags();
1036        let avals = self.avals();
1037        for (i, &tag) in atags.iter().enumerate() {
1038            if tag == raw::CORO {
1039                // SAFETY: raw union access — the tag byte at the same index in `atags` was previously confirmed to be `co` (closure/object pointer) so the `co` variant of `RawVal` holds the valid payload.
1040                let p = unsafe { avals[i].co } as *mut crate::runtime::heap::GcHeader;
1041                if !header_is_marked(p) {
1042                    return true;
1043                }
1044            }
1045        }
1046        for n in self.nodes.iter() {
1047            if let Value::Coro(co) = n.key {
1048                if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1049                    return true;
1050                }
1051            }
1052            if let Value::Coro(co) = n.val {
1053                if !header_is_marked(co.as_ptr() as *mut crate::runtime::heap::GcHeader) {
1054                    return true;
1055                }
1056            }
1057        }
1058        false
1059    }
1060
1061    pub(crate) fn trace(&self, m: &mut Marker) {
1062        let (wk, wv) = self.weak_mode();
1063        if wk || wv {
1064            m.weak.push(self as *const Table as *mut Table);
1065        }
1066        // weak keys + strong values = an ephemeron table: its hash values are
1067        // marked only if the key proves reachable (deferred to the convergence
1068        // pass), not here. PUC 5.1 predates ephemerons — under `no_ephemeron`
1069        // a weak-key table marks its values strongly during this pass, which
1070        // is what gc.lua's "weak tables" section requires.
1071        let ephemeron = wk && !wv && !m.no_ephemeron;
1072        if ephemeron {
1073            m.ephemeron.push(self as *const Table as *mut Table);
1074        }
1075        // array keys are integers (never weakly collected); skip values only
1076        // when the table has weak values
1077        if !wv {
1078            let atags = self.atags();
1079            let avals = self.avals();
1080            for (i, &tag) in atags.iter().enumerate() {
1081                if raw::is_gc(tag) {
1082                    // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1083                    m.value(unsafe { Value::pack(tag, avals[i]) });
1084                }
1085            }
1086        }
1087        for n in self.nodes.iter() {
1088            if !wk {
1089                m.value(n.key);
1090            }
1091            // ephemeron hash values are deferred; otherwise mark strong values
1092            if !wv && !ephemeron {
1093                m.value(n.val);
1094            }
1095        }
1096        if let Some(mt) = self.metatable {
1097            m.value(Value::Table(mt));
1098        }
1099    }
1100
1101    /// Ephemeron pass: mark the value of every hash entry whose key is alive
1102    /// (`alive` decides — strong/marked keys, plus strings/numbers which are
1103    /// never weakly collected). Returns true if any value was newly marked, so
1104    /// the caller can iterate to a fixpoint (PUC `traverseephemeron`).
1105    pub(crate) fn converge_ephemeron(&self, alive: &dyn Fn(Value) -> bool, m: &mut Marker) -> bool {
1106        let mut changed = false;
1107        for n in self.nodes.iter() {
1108            if !n.val.is_nil() && alive(n.key) {
1109                changed |= m.value(n.val);
1110            }
1111        }
1112        changed
1113    }
1114
1115    /// Clear entries whose weak key/value did not survive marking. `is_dead`
1116    /// reports whether a GC value was left unmarked (about to be swept).
1117    /// Clear weak-table entries whose key/value no longer carries a live
1118    /// reference. `is_dead` is a **pure** check (no side effects); the GC
1119    /// uses `mark_string` to resurrect any string that's still reachable via
1120    /// a *surviving* entry — Lua manual §2.5.4 says strings in weak tables
1121    /// are not collected as long as their entry is, and PUC `iscleared`
1122    /// implements that by marking the string during the same scan.
1123    pub(crate) fn clear_weak(
1124        &mut self,
1125        wk: bool,
1126        wv: bool,
1127        is_dead: &dyn Fn(Value) -> bool,
1128        mark_string: &dyn Fn(Value),
1129    ) {
1130        if wv {
1131            let n = self.asize as usize;
1132            for i in 0..n {
1133                let tag = self.atags()[i];
1134                if raw::is_gc(tag) {
1135                    // SAFETY: `tag` and the raw value come from this table's parallel `atags` / `avals` arrays, which the table writers always keep in sync — the tag byte matches the raw payload's discriminator (see `runtime::value` `raw` module).
1136                    let v = unsafe { Value::pack(tag, self.avals()[i]) };
1137                    if is_dead(v) {
1138                        self.atags_mut()[i] = raw::NIL;
1139                        self.avals_mut()[i] = RawVal::NIL;
1140                    } else {
1141                        mark_string(v);
1142                    }
1143                }
1144            }
1145        }
1146        for n in self.nodes.iter_mut() {
1147            if n.val.is_nil() {
1148                continue;
1149            }
1150            let key_dead = wk && is_dead(n.key);
1151            let val_dead = wv && is_dead(n.val);
1152            if key_dead || val_dead {
1153                // entry removed. PUC `setdeadkey`: when the key was a
1154                // collectable, drop the Gc pointer so a later raw_eq cannot
1155                // spuriously match a new object that gets allocated at the
1156                // same freed address. Keep `next` so the chain back-links
1157                // through this node still reach downstream entries; the
1158                // `dead_key` flag tells `find_node` to skip the comparison
1159                // and `insert_new` to treat the slot as a free
1160                // main-position owner that may inherit the chain.
1161                n.val = Value::Nil;
1162                if matches!(
1163                    n.key,
1164                    Value::Table(_)
1165                        | Value::Closure(_)
1166                        | Value::Native(_)
1167                        | Value::Coro(_)
1168                        | Value::Userdata(_)
1169                        | Value::Str(_)
1170                ) {
1171                    n.key = Value::Nil;
1172                    n.dead_key = true;
1173                }
1174            } else {
1175                // entry survives — resurrect any string reachable through it
1176                if wk {
1177                    mark_string(n.key);
1178                }
1179                if wv {
1180                    mark_string(n.val);
1181                }
1182            }
1183        }
1184    }
1185}
1186
1187// =====================================================================
1188// C3 — SoA + Robin Hood open-addressing hash part (Variant A).
1189//
1190// Parallel to the chain-walk path during Phase B+C+D transition: the
1191// chain `nodes` / `lastfree` is the authoritative read path until
1192// Phase E migrates `next()` and Phase 4 cuts over. These methods
1193// operate only on the `keys` / `vals` / `meta` / `tombstones` SoA
1194// arrays — chain state is never touched.
1195//
1196// Layout invariants the methods below maintain:
1197//   - `keys.len() == vals.len() == meta.len()`, all power-of-two
1198//     (or zero in the empty-stub state)
1199//   - `meta[i] = meta_bits::EMPTY` iff slot i is free
1200//   - tombstoned slots are scanned past by find but reused by insert
1201//   - `tombstones` counts the meta slots with TOMBSTONE_BIT set
1202//   - load factor (live + tombstone) / cap is kept ≤ 0.75 via
1203//     `soa_grow_if_needed` (R-A1 mitigation — PSL bound 63)
1204//   - rehash is REFUSED when `iter_depth > 0` (R-A3 mitigation —
1205//     wired in Phase F; for Phase C the counter is always 0 so the
1206//     refusal path is unreachable)
1207//
1208// Refs: `.dev/rfcs/v2.0-c3-soa-robinhood-rfc.md` §5.1 (variant A),
1209// §6.2 Phase 1-3 (impl plan).
1210// =====================================================================
1211
1212/// C3 — initial SoA capacity when growing from empty. Power of two.
1213/// Picked at 4 so a 3-element table doesn't trigger an immediate
1214/// regrowth.
1215#[allow(dead_code)] // wired by Phase D/E/F integration
1216pub(crate) const SOA_INITIAL_CAP: usize = 4;
1217
1218/// C3 — high load-factor threshold (3/4). SoA grow trigger; matches
1219/// the RFC §5.1 recommendation. PSL_MAX is the u16 14-bit value so
1220/// long-tail PSL overruns are recoverable via grow-retry.
1221#[allow(dead_code)]
1222const SOA_LOAD_NUM: usize = 3;
1223#[allow(dead_code)]
1224const SOA_LOAD_DEN: usize = 4;
1225
1226/// C3 — tombstone density threshold (1/4). When tombstones/cap ≥ 25%
1227/// the next non-resize-triggering rehash compacts them.
1228#[allow(dead_code)]
1229const SOA_TOMB_NUM: usize = 1;
1230#[allow(dead_code)]
1231const SOA_TOMB_DEN: usize = 4;
1232
1233#[allow(dead_code)] // wired by Phase D/E/F integration into public set/get/next
1234impl Table {
1235    /// C3 — current SoA hash-part capacity in slots (0 = empty stub).
1236    #[inline]
1237    pub(crate) fn soa_cap(&self) -> usize {
1238        self.meta.len()
1239    }
1240
1241    /// C3 — count of live (occupied & not tombstone) SoA slots.
1242    /// O(n) — only used by the Phase G equivalence test path; the
1243    /// hot rehash trigger uses `live_estimate = cap*3/4 - tombstones`
1244    /// implicitly via `soa_grow_if_needed`.
1245    #[cfg(test)]
1246    pub(crate) fn soa_live_count(&self) -> usize {
1247        self.meta.iter().filter(|&&m| meta_bits::is_live(m)).count()
1248    }
1249
1250    /// C3 — count of occupied (live OR tombstoned) SoA slots; this is
1251    /// the value the load factor compares against `cap * 3/4`.
1252    #[inline]
1253    fn soa_occupied_count(&self) -> usize {
1254        // O(n) sweep — Phase C inserts the count on each call so the
1255        // worst case is bounded by per-insert amortised cost. A future
1256        // polish could maintain a counter incrementally; left as a
1257        // Phase H mini-bench-driven follow-up if PI sample shows it
1258        // contributes > 1 µs/cell.
1259        self.meta
1260            .iter()
1261            .filter(|&&m| meta_bits::is_occupied(m))
1262            .count()
1263    }
1264
1265    /// C3 — Robin Hood lookup. Returns the slot index of a *live*
1266    /// matching key, or None if absent. Walks past tombstones (they
1267    /// preserve probe chains). Returns None if the SoA cap is zero
1268    /// (Phase B stub state). Bound by `cap` probes; in practice
1269    /// expected ≤ 8 at load 0.75.
1270    pub(crate) fn soa_find_slot(&self, k: Value) -> Option<usize> {
1271        let cap = self.meta.len();
1272        if cap == 0 {
1273            return None;
1274        }
1275        let mask = cap - 1;
1276        let mut idx = (hash_key(k) as usize) & mask;
1277        // Walk until empty slot or wrap. The `steps <= cap` bound
1278        // is a safety net: a properly maintained Robin Hood table
1279        // with load < 1 always has at least one empty slot, so a
1280        // full wrap means table invariant violation.
1281        for _ in 0..cap {
1282            let m = self.meta[idx];
1283            if !meta_bits::is_occupied(m) {
1284                return None;
1285            }
1286            if !meta_bits::is_tombstone(m) && self.keys[idx].raw_eq(k) {
1287                return Some(idx);
1288            }
1289            idx = (idx + 1) & mask;
1290        }
1291        None
1292    }
1293
1294    /// C3 — Allocate fresh SoA arrays at `new_cap` (power of two) and
1295    /// re-insert every live entry from the old SoA arrays. Tombstones
1296    /// are dropped (count resets to 0). Used by `soa_grow_if_needed`
1297    /// (new_cap = max(SOA_INITIAL_CAP, 2*cap)) and by Phase D
1298    /// tombstone compaction (new_cap = cap).
1299    ///
1300    /// IMPORTANT: rehash MUST NOT fire while `iter_depth > 0`
1301    /// (R-A3) — wired in Phase F. Phase C callers all enter from
1302    /// non-iteration paths.
1303    fn soa_rehash_to(&mut self, heap: &mut Heap, new_cap: usize) -> Result<(), TableError> {
1304        debug_assert!(new_cap.is_power_of_two() && new_cap > 0);
1305        let before = self.internal_bytes();
1306        // Snapshot old live entries. This list is the canonical
1307        // "must be present after rehash" set; we restart from it on
1308        // any PSL-overflow retry.
1309        let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len());
1310        for i in 0..self.meta.len() {
1311            if meta_bits::is_live(self.meta[i]) {
1312                survivors.push((self.keys[i], self.vals[i]));
1313            }
1314        }
1315        // Install fresh empty arrays at `new_cap`. On PSL overflow
1316        // during the re-insert pass (extremely rare with the 14-bit
1317        // PSL budget — would need a pathological hash distribution),
1318        // double the cap and replay the original `survivors` list
1319        // from scratch. We don't try to salvage partial work — the
1320        // rare-path retry cost is bounded by O(n × max_doublings),
1321        // and max_doublings has a hard MAX_ASIZE ceiling.
1322        let mut cap = new_cap;
1323        loop {
1324            if cap > MAX_ASIZE {
1325                return Err(TableError::Overflow);
1326            }
1327            self.keys = vec![Value::Nil; cap].into_boxed_slice();
1328            self.vals = vec![Value::Nil; cap].into_boxed_slice();
1329            self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1330            self.tombstones = 0;
1331            let mut overflowed = false;
1332            for (k, v) in survivors.iter().copied() {
1333                if self.soa_place_known_absent(k, v).is_err() {
1334                    overflowed = true;
1335                    break;
1336                }
1337            }
1338            if !overflowed {
1339                break;
1340            }
1341            cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1342        }
1343        let after = self.internal_bytes();
1344        heap.apply_bytes_delta(before, after);
1345        Ok(())
1346    }
1347
1348    /// C3 — Raw rob-from-rich placement for a key known to be absent
1349    /// from the SoA arrays. Used by `soa_rehash_to` (re-insert pass)
1350    /// and by `soa_insert` (new-key path after the explicit
1351    /// soa_find_slot check). This routine does NOT auto-grow on a
1352    /// load-factor trigger (caller's responsibility), but DOES signal
1353    /// back to the caller via `Err(())` when the PSL bound of 63 is
1354    /// hit before finding an empty slot — Robin Hood's long-tail
1355    /// max-PSL exceeds the 6-bit storage budget at unfavourable hash
1356    /// distributions even under the nominal 0.75 load gate (RFC §6.3
1357    /// R-A1). The caller (`soa_insert`) handles by growing & retrying.
1358    ///
1359    /// On success returns the slot index where the new key landed
1360    /// (after any rob-from-rich shuffle, the original `k` value is at
1361    /// this returned index).
1362    fn soa_place_known_absent(&mut self, k: Value, v: Value) -> Result<usize, (Value, Value)> {
1363        let cap = self.meta.len();
1364        debug_assert!(cap > 0);
1365        let mask = cap - 1;
1366        let landing = (hash_key(k) as usize) & mask;
1367        let mut idx = landing;
1368        let mut cur_psl: u16 = 0;
1369        let mut cur_key = k;
1370        let mut cur_val = v;
1371        let mut placed_at: Option<usize> = None;
1372        for _ in 0..cap {
1373            let m = self.meta[idx];
1374            if !meta_bits::is_occupied(m) || meta_bits::is_tombstone(m) {
1375                if meta_bits::is_tombstone(m) {
1376                    self.tombstones = self.tombstones.saturating_sub(1);
1377                }
1378                self.meta[idx] = meta_bits::pack(cur_psl, false);
1379                self.keys[idx] = cur_key;
1380                self.vals[idx] = cur_val;
1381                return Ok(placed_at.unwrap_or(idx));
1382            }
1383            let stored_psl = meta_bits::psl(m);
1384            if cur_psl > stored_psl {
1385                // Rob: swap cur into this slot, evict stored to continue.
1386                std::mem::swap(&mut cur_key, &mut self.keys[idx]);
1387                std::mem::swap(&mut cur_val, &mut self.vals[idx]);
1388                self.meta[idx] = meta_bits::pack(cur_psl, false);
1389                if placed_at.is_none() {
1390                    placed_at = Some(idx);
1391                }
1392                cur_psl = stored_psl;
1393            }
1394            idx = (idx + 1) & mask;
1395            cur_psl = cur_psl.saturating_add(1);
1396            if cur_psl > meta_bits::PSL_MAX {
1397                // PSL exceeds the 14-bit storage budget — exceptionally
1398                // rare with 16384 max. Caller (soa_insert / rehash
1399                // outer loop) handles by growing & retrying. Partial
1400                // state: all entries are still in the table EXCEPT
1401                // `(cur_key, cur_val)` which is the latest homeless
1402                // evictee — return it so caller can re-issue.
1403                return Err((cur_key, cur_val));
1404            }
1405        }
1406        // Wrapped cap probes with no free slot — invariant violation
1407        // (load < 1 should guarantee at least one empty). Signal as
1408        // PSL-overflow equivalent so caller grows + retries.
1409        Err((cur_key, cur_val))
1410    }
1411
1412    /// C3 — Grow SoA capacity if the load factor is at or above the
1413    /// 0.75 trigger. Doubles cap; from empty grows to SOA_INITIAL_CAP.
1414    fn soa_grow_if_needed(&mut self, heap: &mut Heap) -> Result<(), TableError> {
1415        // Defer rehash when an iterator is in flight (R-A3). Wired
1416        // in Phase F; in Phase C iter_depth is always 0.
1417        if self.iter_depth > 0 {
1418            return Ok(());
1419        }
1420        let cap = self.meta.len();
1421        if cap == 0 {
1422            return self.soa_rehash_to(heap, SOA_INITIAL_CAP);
1423        }
1424        let occupied = self.soa_occupied_count();
1425        if occupied * SOA_LOAD_DEN >= cap * SOA_LOAD_NUM {
1426            let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1427            return self.soa_rehash_to(heap, new_cap);
1428        }
1429        // Tombstone compaction (same cap, drops tombstones).
1430        if self.tombstones as usize * SOA_TOMB_DEN >= cap * SOA_TOMB_NUM {
1431            return self.soa_rehash_to(heap, cap);
1432        }
1433        Ok(())
1434    }
1435
1436    /// C3 — Insert (or update) `(k, v)` in the SoA hash part. Routes
1437    /// through `soa_find_slot` first so an existing key updates its
1438    /// val in place; otherwise rob-from-rich places a new entry.
1439    /// Auto-rehashes if the load factor would exceed 0.75 OR if the
1440    /// place chain runs into a PSL overflow on a pathological hash
1441    /// distribution.
1442    ///
1443    /// Phase C: this method is callable from outside via the
1444    /// equivalence-test entrypoint (Phase G); not yet hooked into
1445    /// public `set` / `set_norm`.
1446    pub(crate) fn soa_insert(
1447        &mut self,
1448        heap: &mut Heap,
1449        k: Value,
1450        v: Value,
1451    ) -> Result<(), TableError> {
1452        debug_assert!(!matches!(k, Value::Nil));
1453        // 1. Update-in-place if key is already present (live slot).
1454        if let Some(idx) = self.soa_find_slot(k) {
1455            self.vals[idx] = v;
1456            return Ok(());
1457        }
1458        // 2. New key: ensure capacity, then place. On PSL-overflow
1459        // from the place chain (extremely rare with 14-bit PSL budget),
1460        // grow + rehash with the homeless evictee merged in.
1461        // `soa_rehash_with_extra` handles further retries internally,
1462        // bounded by MAX_ASIZE.
1463        self.soa_grow_if_needed(heap)?;
1464        match self.soa_place_known_absent(k, v) {
1465            Ok(_) => Ok(()),
1466            Err(homeless) => {
1467                let cap = self.meta.len();
1468                let new_cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1469                self.soa_rehash_with_extra(heap, new_cap, homeless)
1470            }
1471        }
1472    }
1473
1474    /// C3 — Rehash to `new_cap` while merging in an extra (k, v) pair
1475    /// not currently in the SoA arrays. Used by `soa_insert` to
1476    /// recover from PSL overflow: the homeless evictee from the failed
1477    /// place chain gets appended to the survivor list before the
1478    /// re-insert pass.
1479    fn soa_rehash_with_extra(
1480        &mut self,
1481        heap: &mut Heap,
1482        new_cap: usize,
1483        extra: (Value, Value),
1484    ) -> Result<(), TableError> {
1485        let before = self.internal_bytes();
1486        let mut survivors: Vec<(Value, Value)> = Vec::with_capacity(self.meta.len() + 1);
1487        for i in 0..self.meta.len() {
1488            if meta_bits::is_live(self.meta[i]) {
1489                survivors.push((self.keys[i], self.vals[i]));
1490            }
1491        }
1492        // Avoid duplicating the extra if its key was already placed at
1493        // some slot during the failed rob chain (the rob may have
1494        // landed the original input into a slot before overflowing on
1495        // a downstream evictee — that case the meta-walk above picks
1496        // it up).
1497        if !survivors.iter().any(|(k, _)| k.raw_eq(extra.0)) {
1498            survivors.push(extra);
1499        }
1500        let mut cap = new_cap;
1501        loop {
1502            if cap > MAX_ASIZE {
1503                return Err(TableError::Overflow);
1504            }
1505            self.keys = vec![Value::Nil; cap].into_boxed_slice();
1506            self.vals = vec![Value::Nil; cap].into_boxed_slice();
1507            self.meta = vec![meta_bits::EMPTY; cap].into_boxed_slice();
1508            self.tombstones = 0;
1509            let mut overflowed = false;
1510            for (k, v) in survivors.iter().copied() {
1511                if self.soa_place_known_absent(k, v).is_err() {
1512                    overflowed = true;
1513                    break;
1514                }
1515            }
1516            if !overflowed {
1517                break;
1518            }
1519            cap = cap.checked_mul(2).ok_or(TableError::Overflow)?;
1520        }
1521        let after = self.internal_bytes();
1522        heap.apply_bytes_delta(before, after);
1523        Ok(())
1524    }
1525
1526    /// C3 — Read SoA hash part. Mirrors `get_hash` but reads from
1527    /// keys/vals/meta rather than nodes. Used by the Phase G
1528    /// equivalence test; not yet hooked into public `get` / `get_hash`.
1529    pub(crate) fn soa_get(&self, k: Value) -> Value {
1530        match self.soa_find_slot(k) {
1531            Some(idx) => self.vals[idx],
1532            None => Value::Nil,
1533        }
1534    }
1535
1536    /// C3 — Tombstone deletion. Marks the live slot for `k` as
1537    /// tombstoned, preserving the slot index (no backward shift).
1538    /// Slot-index stability is the PUC `next()` iteration invariant
1539    /// — `nextvar.lua:520-521` requires that deleting prior keys
1540    /// during a `pairs` traversal does NOT move unvisited keys.
1541    /// Backward-shift deletion would violate this; tombstones are
1542    /// the standard Robin Hood resolution (see RFC §4.5 + §5.1).
1543    ///
1544    /// keys[idx] / vals[idx] are reset to Nil so the GC marker is
1545    /// not held to the previous entries — only the tombstone bit
1546    /// distinguishes "occupied tombstone" from "free empty".
1547    ///
1548    /// Returns true if the key was found and deleted, false if absent.
1549    ///
1550    /// Phase D: not yet hooked into public `set(k, Nil)` — wired in
1551    /// Phase E alongside the `next()` migration.
1552    pub(crate) fn soa_delete(&mut self, k: Value) -> bool {
1553        if let Some(idx) = self.soa_find_slot(k) {
1554            let psl = meta_bits::psl(self.meta[idx]);
1555            self.meta[idx] = meta_bits::pack(psl, true);
1556            self.keys[idx] = Value::Nil;
1557            self.vals[idx] = Value::Nil;
1558            self.tombstones = self.tombstones.saturating_add(1);
1559            true
1560        } else {
1561            false
1562        }
1563    }
1564}
1565
1566fn normalize_set_key(key: Value) -> Result<Value, TableError> {
1567    match key {
1568        Value::Nil => Err(TableError::NilIndex),
1569        Value::Float(f) => match f2i_exact(f) {
1570            Some(i) => Ok(Value::Int(i)),
1571            None if f.is_nan() => Err(TableError::NanIndex),
1572            None => Ok(key),
1573        },
1574        k => Ok(k),
1575    }
1576}
1577
1578fn hash_key(k: Value) -> u64 {
1579    match k {
1580        Value::Int(i) => i as u64, // identity mod size (PUC hashint)
1581        Value::Float(f) => mix64(f.to_bits()),
1582        Value::Bool(b) => b as u64 + 1,
1583        Value::Str(s) => s.hash() as u64,
1584        Value::Table(t) => mix64(t.as_ptr() as u64),
1585        Value::Closure(c) => mix64(c.as_ptr() as u64),
1586        Value::Native(n) => mix64(n.as_ptr() as u64),
1587        Value::Coro(co) => mix64(co.as_ptr() as u64),
1588        Value::Userdata(u) => mix64(u.as_ptr() as u64),
1589        Value::LightUserdata(p) => mix64(p as u64),
1590        Value::Nil => 0, // unreachable as a stored key
1591    }
1592}
1593
1594/// splitmix64 finalizer.
1595fn mix64(mut x: u64) -> u64 {
1596    x ^= x >> 30;
1597    x = x.wrapping_mul(0xbf58_476d_1ce4_e5b9);
1598    x ^= x >> 27;
1599    x = x.wrapping_mul(0x94d0_49bb_1331_11eb);
1600    x ^ (x >> 31)
1601}
1602
1603/// For k ≥ 1: the bucket l such that k ∈ (2^(l-1), 2^l].
1604fn ceil_log2(k: u64) -> usize {
1605    (u64::BITS - (k - 1).leading_zeros()) as usize
1606}
1607
1608impl Table {
1609    /// Preallocate the array part (table.create); existing contents are
1610    /// preserved.
1611    pub fn ensure_array(&mut self, heap: &mut Heap, n: usize) {
1612        if n > self.asize() {
1613            let hash_entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1614            self.resize(heap, n, hash_entries);
1615        }
1616    }
1617}
1618
1619impl Table {
1620    /// Preallocate hash-part capacity (table.create's second size).
1621    pub fn ensure_hash(&mut self, heap: &mut Heap, n: usize) {
1622        let entries = self.nodes.iter().filter(|nd| !nd.val.is_nil()).count();
1623        if n > self.nodes.len() {
1624            self.resize(heap, self.asize(), n.max(entries));
1625        }
1626    }
1627}
1628
1629#[cfg(test)]
1630mod tests {
1631    use super::*;
1632    use crate::runtime::heap::Heap;
1633
1634    fn with_table(f: impl FnOnce(&mut Heap, &mut Table)) {
1635        let mut heap = Heap::new();
1636        let t = heap.new_table();
1637        f(&mut heap, unsafe { t.as_mut() });
1638    }
1639
1640    fn assert_is_border(t: &Table, n: i64) {
1641        if n == 0 {
1642            assert!(t.get_int(1).is_nil(), "border 0 but t[1] non-nil");
1643        } else {
1644            assert!(!t.get_int(n).is_nil(), "border {n} but t[{n}] is nil");
1645            assert!(
1646                t.get_int(n + 1).is_nil(),
1647                "border {n} but t[{}] non-nil",
1648                n + 1
1649            );
1650        }
1651    }
1652
1653    /// v2.1 Phase 1I.B — pin `Box<[Node]>` fat-ptr layout at runtime.
1654    /// The luna-jit table-field IC reads `(ptr, len)` directly out of
1655    /// the `nodes` field assuming the data pointer occupies the low 8
1656    /// bytes and the length the high 8 bytes (de-facto Rust ABI on
1657    /// 64-bit targets but not formally guaranteed). If a future Rust
1658    /// release reorders the fat-ptr, this test fails before IC fires
1659    /// at runtime.
1660    #[test]
1661    #[allow(clippy::assertions_on_constants)]
1662    #[cfg(target_pointer_width = "64")]
1663    fn phase_1i_b_node_layout_pinned() {
1664        use jit_layout::*;
1665        assert_eq!(std::mem::size_of::<Box<[Node]>>(), 16);
1666        assert_eq!(NODE_KEY_OFFSET, 0);
1667        assert_eq!(NODE_VAL_OFFSET, 16);
1668        assert!(SIZEOF_NODE >= 32);
1669
1670        // Construct a real Box<[Node]> with a known length, then
1671        // peek at the fat-pointer's two halves to confirm the
1672        // (data_ptr, len) order. Use a 4-slot box so the length is
1673        // non-zero and the data pointer is heap-allocated.
1674        let b: Box<[Node]> = vec![Node::EMPTY; 4].into_boxed_slice();
1675        let raw_ptr = b.as_ptr();
1676        let raw_len = b.len();
1677        // SAFETY: reading the fat pointer's two words is exactly the
1678        // layout luna-jit's IR assumes; it's the safest possible test
1679        // of that assumption.
1680        let words: [usize; 2] = unsafe { std::mem::transmute_copy(&b) };
1681        assert_eq!(words[0], raw_ptr as usize, "fat-ptr low word = data ptr");
1682        assert_eq!(words[1], raw_len, "fat-ptr high word = len");
1683        drop(b);
1684    }
1685
1686    #[test]
1687    fn sequence_grows_into_array() {
1688        with_table(|heap, t| {
1689            for i in 1..=1000 {
1690                let _ = t.set_int(heap, i, Value::Int(i * 10));
1691            }
1692            for i in 1..=1000 {
1693                assert!(t.get_int(i).raw_eq(Value::Int(i * 10)));
1694            }
1695            assert_eq!(t.len(), 1000);
1696        });
1697    }
1698
1699    #[test]
1700    fn string_and_mixed_keys() {
1701        with_table(|heap, t| {
1702            let k1 = Value::Str(heap.intern(b"alpha"));
1703            let k2 = Value::Str(heap.intern(b"beta"));
1704            t.set(heap, k1, Value::Int(1)).unwrap();
1705            t.set(heap, k2, Value::Int(2)).unwrap();
1706            t.set(heap, Value::Bool(true), Value::Int(3)).unwrap();
1707            t.set(heap, Value::Int(-5), Value::Int(4)).unwrap();
1708            // re-interned key reaches the same slot
1709            let k1b = Value::Str(heap.intern(b"alpha"));
1710            assert!(t.get(k1b).raw_eq(Value::Int(1)));
1711            assert!(t.get(k2).raw_eq(Value::Int(2)));
1712            assert!(t.get(Value::Bool(true)).raw_eq(Value::Int(3)));
1713            assert!(t.get(Value::Int(-5)).raw_eq(Value::Int(4)));
1714            assert!(t.get(Value::Str(heap.intern(b"gamma"))).is_nil());
1715        });
1716    }
1717
1718    #[test]
1719    fn float_keys_normalize_to_int() {
1720        with_table(|heap, t| {
1721            t.set(heap, Value::Float(2.0), Value::Int(22)).unwrap();
1722            assert!(t.get(Value::Int(2)).raw_eq(Value::Int(22)));
1723            t.set(heap, Value::Int(3), Value::Int(33)).unwrap();
1724            assert!(t.get(Value::Float(3.0)).raw_eq(Value::Int(33)));
1725            // -0.0 is key 0
1726            t.set(heap, Value::Float(-0.0), Value::Int(0)).unwrap();
1727            assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1728            // non-integral floats are their own keys
1729            t.set(heap, Value::Float(0.5), Value::Int(55)).unwrap();
1730            assert!(t.get(Value::Float(0.5)).raw_eq(Value::Int(55)));
1731            assert!(t.get(Value::Int(0)).raw_eq(Value::Int(0)));
1732        });
1733    }
1734
1735    #[test]
1736    fn bad_keys() {
1737        with_table(|heap, t| {
1738            assert_eq!(
1739                t.set(heap, Value::Nil, Value::Int(1)),
1740                Err(TableError::NilIndex)
1741            );
1742            assert_eq!(
1743                t.set(heap, Value::Float(f64::NAN), Value::Int(1)),
1744                Err(TableError::NanIndex)
1745            );
1746            // reads with bad keys are nil, not errors
1747            assert!(t.get(Value::Nil).is_nil());
1748            assert!(t.get(Value::Float(f64::NAN)).is_nil());
1749        });
1750    }
1751
1752    #[test]
1753    fn delete_and_reinsert() {
1754        with_table(|heap, t| {
1755            let k = Value::Str(heap.intern(b"k"));
1756            t.set(heap, k, Value::Int(1)).unwrap();
1757            t.set(heap, k, Value::Nil).unwrap();
1758            assert!(t.get(k).is_nil());
1759            t.set(heap, k, Value::Int(2)).unwrap();
1760            assert!(t.get(k).raw_eq(Value::Int(2)));
1761            // setting an absent key to nil stays absent
1762            let k2 = Value::Str(heap.intern(b"k2"));
1763            t.set(heap, k2, Value::Nil).unwrap();
1764            assert!(t.get(k2).is_nil());
1765        });
1766    }
1767
1768    #[test]
1769    fn borders_with_holes() {
1770        with_table(|heap, t| {
1771            let _ = t.set_int(heap, 1, Value::Int(1));
1772            let _ = t.set_int(heap, 2, Value::Int(2));
1773            assert_eq!(t.len(), 2);
1774            t.set_int(heap, 2, Value::Nil).unwrap();
1775            assert_is_border(t, t.len());
1776            // hash-resident tail
1777            let _ = t.set_int(heap, 1_000_000, Value::Int(1));
1778            assert_is_border(t, t.len());
1779        });
1780    }
1781
1782    #[test]
1783    fn len_on_empty_and_hash_only() {
1784        with_table(|heap, t| {
1785            assert_eq!(t.len(), 0);
1786            let xk = Value::Str(heap.intern(b"x"));
1787            t.set(heap, xk, Value::Int(1)).unwrap();
1788            assert_eq!(t.len(), 0);
1789        });
1790    }
1791
1792    #[test]
1793    fn next_iterates_everything_exactly_once() {
1794        with_table(|heap, t| {
1795            let mut expected = 0i64;
1796            for i in 1..=64 {
1797                let _ = t.set_int(heap, i, Value::Int(i));
1798                expected += i;
1799            }
1800            for i in 0..32 {
1801                let k = Value::Str(heap.intern(format!("s{i}").as_bytes()));
1802                t.set(heap, k, Value::Int(1000 + i)).unwrap();
1803                expected += 1000 + i;
1804            }
1805            t.set(heap, Value::Float(2.5), Value::Int(7)).unwrap();
1806            expected += 7;
1807
1808            let mut sum = 0i64;
1809            let mut count = 0;
1810            let mut key = Value::Nil;
1811            while let Some((k, v)) = t.next(key).unwrap() {
1812                let Value::Int(x) = v else {
1813                    panic!("bad value")
1814                };
1815                sum += x;
1816                count += 1;
1817                key = k;
1818            }
1819            assert_eq!(count, 64 + 32 + 1);
1820            assert_eq!(sum, expected);
1821        });
1822    }
1823
1824    #[test]
1825    fn next_skips_nil_values_and_rejects_alien_keys() {
1826        with_table(|heap, t| {
1827            let _ = t.set_int(heap, 1, Value::Int(1));
1828            let _ = t.set_int(heap, 3, Value::Int(3));
1829            let k = Value::Str(heap.intern(b"gone"));
1830            t.set(heap, k, Value::Int(9)).unwrap();
1831            t.set(heap, k, Value::Nil).unwrap();
1832            let mut seen = Vec::new();
1833            let mut key = Value::Nil;
1834            while let Some((k, v)) = t.next(key).unwrap() {
1835                let Value::Int(x) = v else { panic!() };
1836                seen.push(x);
1837                key = k;
1838            }
1839            assert_eq!(seen, vec![1, 3]);
1840            // a key never inserted is invalid for next
1841            let alien = Value::Str(heap.intern(b"never"));
1842            assert!(matches!(t.next(alien), Err(TableError::InvalidNext)));
1843            // ...but a deleted (nil-valued) key is still a valid cursor
1844            assert!(t.next(k).is_ok());
1845        });
1846    }
1847
1848    #[test]
1849    fn collision_relocation_keeps_chains_intact() {
1850        with_table(|heap, t| {
1851            // dense negative ints all land in the hash part; with identity
1852            // hashing they exercise both chain cases heavily
1853            for i in 0..512 {
1854                let _ = t.set_int(heap, -i, Value::Int(i));
1855            }
1856            for i in 0..512 {
1857                assert!(t.get_int(-i).raw_eq(Value::Int(i)), "lost key {}", -i);
1858            }
1859        });
1860    }
1861
1862    // -----------------------------------------------------------------
1863    // C3 SoA Robin Hood equivalence tests (Phase G).
1864    //
1865    // Cross-check the new SoA + RH path against the existing chain-walk
1866    // path: replay the same insert/lookup sequence on a table via
1867    // `set` (chain) and another via `soa_insert` (SoA), then assert
1868    // `get == soa_get` for every key.
1869    //
1870    // Phase B+C scope: insert + read only. Tombstone delete equivalence
1871    // arrives with Phase D.
1872    // -----------------------------------------------------------------
1873
1874    fn replay_chain(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
1875        let t = heap.new_table();
1876        let tref = unsafe { t.as_mut() };
1877        for (k, v) in ops.iter().copied() {
1878            tref.set(heap, k, v).unwrap();
1879        }
1880        t.as_ptr()
1881    }
1882
1883    fn replay_soa(heap: &mut Heap, ops: &[(Value, Value)]) -> *mut Table {
1884        let t = heap.new_table();
1885        let tref = unsafe { t.as_mut() };
1886        for (k, v) in ops.iter().copied() {
1887            tref.soa_insert(heap, k, v).unwrap();
1888        }
1889        t.as_ptr()
1890    }
1891
1892    #[test]
1893    fn c3_soa_equivalence_string_keys() {
1894        let mut heap = Heap::new();
1895        let mut ops = Vec::new();
1896        for i in 0..40 {
1897            let k = Value::Str(heap.intern(format!("key_{i:03}").as_bytes()));
1898            ops.push((k, Value::Int(i * 7)));
1899        }
1900        let chain = unsafe { &*replay_chain(&mut heap, &ops) };
1901        let soa = unsafe { &*replay_soa(&mut heap, &ops) };
1902        for (k, _) in &ops {
1903            let cv = chain.get(*k);
1904            let sv = soa.soa_get(*k);
1905            assert!(
1906                cv.raw_eq(sv),
1907                "SoA vs chain mismatch on key — chain={:?} soa={:?}",
1908                cv,
1909                sv,
1910            );
1911        }
1912        // Absent key returns nil from both paths.
1913        let absent = Value::Str(heap.intern(b"never"));
1914        assert!(chain.get(absent).is_nil());
1915        assert!(soa.soa_get(absent).is_nil());
1916    }
1917
1918    #[test]
1919    fn c3_soa_equivalence_negative_int_keys() {
1920        // Dense negative ints with identity hashing — same collision
1921        // profile as the existing `collision_relocation_keeps_chains_intact`
1922        // test, but verified through the SoA RH path. Triggers
1923        // rob-from-rich repeatedly.
1924        let mut heap = Heap::new();
1925        let mut ops = Vec::new();
1926        for i in 0..256 {
1927            let k = Value::Int(-i);
1928            ops.push((k, Value::Int(i)));
1929        }
1930        let chain = unsafe { &*replay_chain(&mut heap, &ops) };
1931        let soa = unsafe { &*replay_soa(&mut heap, &ops) };
1932        for (k, _) in &ops {
1933            let cv = chain.get(*k);
1934            let sv = soa.soa_get(*k);
1935            assert!(cv.raw_eq(sv), "SoA mismatch on key {:?}", k);
1936        }
1937    }
1938
1939    #[test]
1940    fn c3_soa_equivalence_mixed_keys_with_updates() {
1941        // Insert, then update the same keys with new values — exercises
1942        // the soa_find_slot in-place update branch.
1943        let mut heap = Heap::new();
1944        let kstr = Value::Str(heap.intern(b"x"));
1945        let kint = Value::Int(42);
1946        let kbool = Value::Bool(true);
1947        let ops: Vec<(Value, Value)> = vec![
1948            (kstr, Value::Int(1)),
1949            (kint, Value::Int(2)),
1950            (kbool, Value::Int(3)),
1951            (kstr, Value::Int(11)),  // update
1952            (kint, Value::Int(22)),  // update
1953            (kbool, Value::Int(33)), // update
1954        ];
1955        let chain = unsafe { &*replay_chain(&mut heap, &ops) };
1956        let soa = unsafe { &*replay_soa(&mut heap, &ops) };
1957        for k in [kstr, kint, kbool] {
1958            assert!(chain.get(k).raw_eq(soa.soa_get(k)));
1959        }
1960    }
1961
1962    #[test]
1963    fn c3_soa_equivalence_delete_then_read() {
1964        // Phase D: tombstone delete + read on both paths, verify
1965        // matching nil-for-deleted, original-val-for-live.
1966        let mut heap = Heap::new();
1967        let mut ops_insert = Vec::new();
1968        for i in 0..30 {
1969            let k = Value::Str(heap.intern(format!("d_key_{i:03}").as_bytes()));
1970            ops_insert.push((k, Value::Int(i * 11)));
1971        }
1972        let chain = unsafe { &mut *replay_chain(&mut heap, &ops_insert) };
1973        let soa = unsafe { &mut *replay_soa(&mut heap, &ops_insert) };
1974        // Delete every 3rd key.
1975        let mut deleted: Vec<Value> = Vec::new();
1976        for (i, (k, _)) in ops_insert.iter().enumerate() {
1977            if i % 3 == 0 {
1978                // chain: set to Nil is the chain-path's delete equivalent
1979                chain.set(&mut heap, *k, Value::Nil).unwrap();
1980                let was_present = soa.soa_delete(*k);
1981                assert!(was_present, "soa_delete miss on inserted key {:?}", k);
1982                deleted.push(*k);
1983            }
1984        }
1985        // Read each key: deleted → nil, non-deleted → original val.
1986        for (k, v) in &ops_insert {
1987            let cv = chain.get(*k);
1988            let sv = soa.soa_get(*k);
1989            assert!(
1990                cv.raw_eq(sv),
1991                "delete/read mismatch on key {:?} — chain={:?} soa={:?}",
1992                k,
1993                cv,
1994                sv,
1995            );
1996            if deleted.iter().any(|d| d.raw_eq(*k)) {
1997                assert!(cv.is_nil(), "deleted key {:?} chain non-nil", k);
1998                assert!(sv.is_nil(), "deleted key {:?} soa non-nil", k);
1999            } else {
2000                assert!(cv.raw_eq(*v), "live key {:?} chain val drift", k);
2001            }
2002        }
2003        // Deleting an absent key is a no-op (returns false) on SoA.
2004        let absent = Value::Str(heap.intern(b"never_d"));
2005        assert!(!soa.soa_delete(absent));
2006    }
2007
2008    #[test]
2009    fn c3_soa_delete_then_reinsert_uses_tombstone() {
2010        // After delete + reinsert, key is findable with new val. The
2011        // SoA path may reuse the tombstoned slot (preferred) or place
2012        // elsewhere — either is correct as long as soa_get returns
2013        // the new val.
2014        let mut heap = Heap::new();
2015        let t = heap.new_table();
2016        let tref = unsafe { t.as_mut() };
2017        let k = Value::Str(heap.intern(b"reinsert_target"));
2018        tref.soa_insert(&mut heap, k, Value::Int(100)).unwrap();
2019        assert!(tref.soa_get(k).raw_eq(Value::Int(100)));
2020        let pre_tombs = tref.tombstones;
2021        assert!(tref.soa_delete(k));
2022        assert!(tref.tombstones == pre_tombs + 1);
2023        assert!(tref.soa_get(k).is_nil());
2024        // Reinsert with new val.
2025        tref.soa_insert(&mut heap, k, Value::Int(200)).unwrap();
2026        assert!(tref.soa_get(k).raw_eq(Value::Int(200)));
2027        // Tombstone reused — count back to pre_tombs.
2028        assert_eq!(tref.tombstones, pre_tombs);
2029    }
2030
2031    #[test]
2032    fn c3_soa_grows_under_load_pressure() {
2033        // Stress test: insert enough entries to trigger multiple RH
2034        // rehashes (cap doubles at load 0.75). Confirms PSL overflow
2035        // never fires and all keys survive grow cycles.
2036        let mut heap = Heap::new();
2037        let t = heap.new_table();
2038        let tref = unsafe { t.as_mut() };
2039        for i in 0..1024 {
2040            let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2041            tref.soa_insert(&mut heap, k, Value::Int(i)).unwrap();
2042        }
2043        // Verify every key is findable.
2044        for i in 0..1024 {
2045            let k = Value::Str(heap.intern(format!("entry_{i:05}").as_bytes()));
2046            let v = tref.soa_get(k);
2047            assert!(
2048                v.raw_eq(Value::Int(i)),
2049                "SoA lost key entry_{:05} — got {:?}",
2050                i,
2051                v,
2052            );
2053        }
2054        assert!(tref.soa_live_count() == 1024);
2055        // Cap should have grown past the initial SOA_INITIAL_CAP via
2056        // the 0.75 load-factor trigger.
2057        assert!(
2058            tref.soa_cap() >= 2048,
2059            "SoA cap = {} after 1024 inserts — load gate didn't grow",
2060            tref.soa_cap(),
2061        );
2062    }
2063
2064    #[test]
2065    fn rehash_redistributes_into_array() {
2066        with_table(|heap, t| {
2067            // insert 1..n in reverse: starts in hash, rehash must migrate
2068            for i in (1..=256).rev() {
2069                let _ = t.set_int(heap, i, Value::Int(i));
2070            }
2071            assert_eq!(t.len(), 256);
2072            for i in 1..=256 {
2073                assert!(t.get_int(i).raw_eq(Value::Int(i)));
2074            }
2075        });
2076    }
2077}