Skip to main content

luna_core/runtime/
heap.rs

1//! GC heap v1: precise stop-the-world mark & sweep over an intrusive
2//! all-objects list (PUC `allgc` shape). All unsafe object plumbing is
3//! confined to this module and `string`/`table` internals.
4//!
5//! `Gc<T>` safety contract: the runtime is single-threaded; a `Gc` pointer is
6//! valid until a `collect()` call that does not reach it from the given
7//! roots. Callers must root every value they keep across a collect.
8
9use std::fmt;
10use std::ops::Deref;
11use std::ptr::{self, NonNull};
12
13use crate::runtime::function::{LuaClosure, NativeClosure, Proto, UpvalState, Upvalue};
14use crate::runtime::string::{self, LuaStr, StringTable};
15use crate::runtime::table::Table;
16use crate::runtime::userdata::{Userdata, UserdataPayload};
17use crate::runtime::value::Value;
18
19/// Discriminator the GC stores in every [`GcHeader`] so a raw header pointer
20/// can be cast back to the right object kind during tracing and sweeping.
21#[derive(Clone, Copy, PartialEq, Eq, Debug)]
22#[repr(u8)]
23pub enum ObjTag {
24    /// [`crate::runtime::string::LuaStr`].
25    Str,
26    /// [`crate::runtime::table::Table`].
27    Table,
28    /// [`crate::runtime::function::Proto`].
29    Proto,
30    /// [`crate::runtime::function::LuaClosure`].
31    Closure,
32    /// [`crate::runtime::function::Upvalue`].
33    Upvalue,
34    /// [`crate::runtime::function::NativeClosure`].
35    Native,
36    /// [`crate::runtime::coroutine::Coro`].
37    Coro,
38    /// [`crate::runtime::userdata::Userdata`].
39    Userdata,
40}
41
42/// Header prefix on every GC-managed object: intrusive next-link + type tag +
43/// mark bits. Always at offset 0 of the containing struct (`#[repr(C)]`).
44#[repr(C)]
45pub struct GcHeader {
46    next: *mut GcHeader,
47    tag: ObjTag,
48    /// tricolor + finalizer state. PUC `gch.marked` layout (lgc.h):
49    ///   bit 0 WHITE0 — current-white-A
50    ///   bit 1 WHITE1 — current-white-B (the unused white in any given cycle
51    ///                  is the "other-white" / dead-white at sweep time)
52    ///   bit 2 BLACK  — propagated; outgoing refs already traced
53    ///   bit 3 FIN    — registered for `__gc` (tracked in `finalize`)
54    ///   bit 4 FINALIZED — already enqueued or finalized once this lifetime
55    ///   bit 5 DEFERRED  — 5.3 cycle-finalize deferral marker (gc.lua :502)
56    /// Gray = no white bits, no BLACK; that is the in-stack state between the
57    /// time a Marker visits an object and the time it traces it.
58    flags: u8,
59}
60
61const WHITE0: u8 = 1;
62const WHITE1: u8 = 2;
63const BLACK: u8 = 4;
64const WHITE_BITS: u8 = WHITE0 | WHITE1;
65const COLOR_BITS: u8 = WHITE_BITS | BLACK;
66
67/// registered for finalization (`__gc`): the object is tracked in `finalize`.
68const FIN: u8 = 8;
69/// finalization already scheduled/run: never finalize this object again (PUC
70/// FINALIZEDBIT). Set when it moves to `tobefnz`.
71const FINALIZED: u8 = 16;
72/// resurrected once because a reference cycle through a coroutine kept the
73/// finalizable alive (PUC 5.3 gc.lua :502 "two collections are needed to
74/// break cycle"). The next time the object is found unreachable it is moved
75/// to `tobefnz` without re-deferring.
76const DEFERRED: u8 = 32;
77
78#[inline(always)]
79fn is_white(flags: u8) -> bool {
80    flags & WHITE_BITS != 0
81}
82#[inline(always)]
83fn is_black(flags: u8) -> bool {
84    flags & BLACK != 0
85}
86
87/// True when an object header has been reached by marking (gray or black).
88/// `pub(crate)` so other runtime modules (e.g. `Table::refs_contain_unmarked_coro`)
89/// can probe reachability without owning the bit constants. Equivalent to
90/// `isgray(o) || isblack(o)` in PUC.
91pub(crate) fn header_is_marked(h: *mut GcHeader) -> bool {
92    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
93    unsafe { !is_white((*h).flags) }
94}
95
96impl GcHeader {
97    pub(crate) fn new(tag: ObjTag) -> GcHeader {
98        GcHeader {
99            next: ptr::null_mut(),
100            tag,
101            flags: 0,
102        }
103    }
104}
105
106/// `Copy` handle to a heap-allocated GC-managed object. Layout is a single
107/// `NonNull<T>`; the GC walks reachability via root scanning and intrusive
108/// linkage on [`GcHeader`], not via reference counts.
109pub struct Gc<T> {
110    ptr: NonNull<T>,
111}
112
113impl<T> Clone for Gc<T> {
114    fn clone(&self) -> Self {
115        *self
116    }
117}
118impl<T> Copy for Gc<T> {}
119
120impl<T> Gc<T> {
121    #[doc(hidden)]
122    pub fn from_ptr(p: *mut T) -> Gc<T> {
123        Gc {
124            ptr: NonNull::new(p).expect("gc pointer must be non-null"),
125        }
126    }
127
128    /// Raw pointer to the referent. Always non-null; valid for the lifetime
129    /// of the [`Heap`] that allocated it as long as the object is reachable.
130    pub fn as_ptr(self) -> *mut T {
131        self.ptr.as_ptr()
132    }
133
134    /// Pointer-identity equality (PUC `rawequal` for reference types).
135    pub fn ptr_eq(self, other: Gc<T>) -> bool {
136        self.ptr == other.ptr
137    }
138
139    /// SAFETY: caller must ensure no other live reference to the object and
140    /// no collect() while the borrow is held (single-threaded runtime).
141    ///
142    /// `#[doc(hidden)]` (Track A4 — pub-surface 0 unsafe): embedders should
143    /// not see this in rustdoc. The safe path for mutating freshly-allocated
144    /// tables is the `TableBuilder` / `vm.table_of(...)` API (Track B3).
145    /// Cross-crate access from `luna` (e.g. `jit_backend`, `capi`) keeps
146    /// working — `#[doc(hidden)] pub` doesn't demote visibility, just docs.
147    #[doc(hidden)]
148    pub unsafe fn as_mut<'a>(self) -> &'a mut T {
149        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
150        unsafe { &mut *self.ptr.as_ptr() }
151    }
152}
153
154impl<T> Deref for Gc<T> {
155    type Target = T;
156    fn deref(&self) -> &T {
157        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
158        unsafe { self.ptr.as_ref() }
159    }
160}
161
162impl<T> fmt::Debug for Gc<T> {
163    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
164        write!(f, "Gc({:p})", self.ptr.as_ptr())
165    }
166}
167
168/// Incremental GC phase.
169///   * `Pause`     — no cycle in progress; all objects current-white.
170///   * `Propagate` — gray queue + propagate-state populated; mutator runs
171///                   alongside `gc_step_propagate(budget)` calls. Born objects
172///                   stamp the current-white; barriers re-gray modified
173///                   parents. Transitions to `Sweep` via `gc_finish_atomic`.
174///   * `Sweep`     — `sweep_cur` is the detached old heap being budget-swept.
175///
176/// `mark_all` (the STW path used by `collect_ex`) sequences start_propagate +
177/// drain_all + finish_atomic inline, never crossing a step boundary.
178#[derive(Clone, Copy, PartialEq, Eq, Debug)]
179enum GcPhase {
180    Pause,
181    Propagate,
182    Sweep,
183}
184
185/// Cross-step traversal state for the incremental Propagate phase. Owned by
186/// `Heap.propagate` (`Some` between `gc_start_propagate` and `gc_finish_atomic`,
187/// `None` otherwise). The gray queue itself lives in `Heap.gray` so write
188/// barriers can push directly without going through the Option.
189struct PropagateState {
190    weak: Vec<*mut Table>,
191    ephemeron: Vec<*mut Table>,
192    cached_protos: Vec<*mut Proto>,
193    no_ephemeron: bool,
194}
195
196/// luna's incremental mark-sweep GC heap. Owns every [`Gc<T>`] allocation
197/// (one per Vm); produces handles via the `new_*` constructors and traces
198/// reachability through registered roots. Holds the string-intern table and
199/// the auto-GC pacing state.
200pub struct Heap {
201    all: *mut GcHeader,
202    strings: StringTable,
203    seed: u32,
204    live: usize,
205    /// approximate allocated bytes — shells only. Each `link()` adds
206    /// `size_of::<T>` for its tag; each `free_obj` subtracts the same.
207    /// Internal Vec/Box growth (table array/hash parts, proto code,
208    /// closure upvals slice) is NOT auto-tracked, so this is a lower
209    /// bound on real memory. PUC's `g->GCtotalbytes` is exact because
210    /// `lmem.c` routes every malloc/free through one helper; luna pays
211    /// for that uniformity in exchange for a smaller, drift-free count.
212    bytes: usize,
213    /// byte threshold at which the VM should run a collection (auto-GC pacing)
214    next_gc: usize,
215    /// PUC `g->currentwhite`: which white bit (WHITE0 or WHITE1) means
216    /// "born / surviving this cycle". The other white is the dead-white that
217    /// sweep collects. Flipped at the end of each mark cycle (`atomic`).
218    current_white: u8,
219    /// Persistent gray queue: holds objects grayed by write barriers between
220    /// the time the marker first reached them and the next propagate step.
221    /// Lives outside `propagate` so barriers can push without going through
222    /// the Option; `gc_step_propagate` and `gc_finish_atomic` drain it.
223    gray: Vec<*mut GcHeader>,
224    /// Incremental traversal state. `Some` between `gc_start_propagate` and
225    /// `gc_finish_atomic` (and inline within `mark_all`); `None` otherwise.
226    propagate: Option<PropagateState>,
227    /// incremental-sweep phase (Pause unless a `step` cycle is mid-sweep)
228    phase: GcPhase,
229    /// the remaining detached object list being swept during `GcPhase::Sweep`;
230    /// survivors are spliced back onto `all`, garbage is freed
231    sweep_cur: *mut GcHeader,
232    /// `collectgarbage("stop")`: auto-GC is suspended while true
233    gc_stopped: bool,
234    /// objects registered for finalization (a live `__gc` metamethod was set);
235    /// parallel-tracked — ownership stays on `all` (PUC `finobj`).
236    finalize: Vec<*mut GcHeader>,
237    /// dead finalizables resurrected this cycle, awaiting their `__gc` call by
238    /// the VM (PUC `tobefnz`). Drained via `take_tobefnz`.
239    tobefnz: Vec<*mut GcHeader>,
240    /// PUC 5.1 has no ephemeron pass: a `__mode='k'` table marks its values
241    /// strongly during traversal, so entries like `a[t]=t` (key and value the
242    /// same fresh object) survive even with nothing else referencing `t`.
243    /// 5.2+ replaced that with ephemeron convergence. gc.lua's "weak tables"
244    /// section in 5.1 asserts 3*lim survivors, 5.4 only 2*lim — the loop2
245    /// pair was retired from the newer test as a result.
246    pub(crate) no_ephemeron: bool,
247    /// PUC 5.3 finalizes a table caught in a cycle through an unreachable
248    /// coroutine one GC round later than the unreachability is detected
249    /// ("two collections are needed to break cycle", gc.lua :502). 5.4 and 5.5
250    /// rewrote the GC and finalize the same cycle in a single pass (their
251    /// gc.lua :544 asserts collected after one `collectgarbage()`). 5.1/5.2
252    /// don't exercise this path. Set by the VM at construction.
253    pub(crate) defer_thread_cycle_finalize: bool,
254    /// P17-D v2 layer-15 attack: pool of freed Table allocations.
255    /// btrees-style workloads create + free ~32k tables per iter;
256    /// jemalloc's malloc/free roundtrip costs ~30ns per table = ~960µs
257    /// total per iter. Pool recycle: free_obj pushes the raw Table
258    /// pointer here instead of dropping; new_table pops + resets fields.
259    /// Cap at 4096 entries to avoid unbounded growth (worst-case: 4096
260    /// × sizeof(Table) ≈ 460 KB resident memory in idle pool).
261    table_pool: Vec<std::ptr::NonNull<crate::runtime::table::Table>>,
262    /// P09 embedding memory cap. When `Some(n)`, the VM's run loop watches
263    /// `bytes` between dispatch turns and, on overshoot, runs a full collect
264    /// and (still overshooting) raises a catchable "memory cap exceeded"
265    /// Lua error. A soft cap, not a hard alloc-time refusal: a single
266    /// allocation can briefly push `bytes` past `n`, but the embedder gets
267    /// control back at the next safe point — host policy.
268    pub(crate) mem_cap: Option<usize>,
269}
270
271/// Initial auto-GC threshold and floor (PUC GCSTEPSIZE-ish pacing).
272const GC_MIN_THRESHOLD: usize = 1 << 20;
273
274impl Heap {
275    /// Build a fresh empty heap with default GC pacing and no memory cap.
276    pub fn new() -> Heap {
277        Heap {
278            all: ptr::null_mut(),
279            strings: StringTable::new(),
280            seed: make_seed(),
281            live: 0,
282            bytes: 0,
283            next_gc: GC_MIN_THRESHOLD,
284            current_white: WHITE0,
285            gray: Vec::new(),
286            propagate: None,
287            phase: GcPhase::Pause,
288            sweep_cur: ptr::null_mut(),
289            gc_stopped: false,
290            finalize: Vec::new(),
291            tobefnz: Vec::new(),
292            no_ephemeron: false,
293            defer_thread_cycle_finalize: false,
294            mem_cap: None,
295            table_pool: Vec::new(),
296        }
297    }
298
299    fn link(&mut self, h: *mut GcHeader) {
300        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
301        unsafe {
302            (*h).next = self.all;
303            // Born color depends on phase:
304            //   * Pause / Sweep — born current-white (PUC `luaC_white(g)`);
305            //     reachable from roots gets marked next cycle.
306            //   * Propagate     — born BLACK (PUC `LUAGCRYOUNG` / sasimpl
307            //     of new-during-cycle). Born-current-white during Propagate
308            //     would lose the WHITE bits at the upcoming atomic flip and
309            //     be swept this same cycle even when reachable from a
310            //     barrier-grayed root. Born BLACK skips the trace and
311            //     transitions to current-white at sweep, matching the
312            //     reachable-survivor flow.
313            let born = if self.phase == GcPhase::Propagate {
314                BLACK
315            } else {
316                self.current_white
317            };
318            (*h).flags = ((*h).flags & !COLOR_BITS) | born;
319        }
320        self.all = h;
321        self.live += 1;
322    }
323
324    /// Take ownership of a boxed object and put it under GC management.
325    /// SAFETY-by-convention: `T` must be `repr(C)` with a `GcHeader` first
326    /// field whose tag matches `T` (enforced by the typed constructors).
327    pub(crate) fn adopt<T>(&mut self, obj: Box<T>) -> Gc<T> {
328        let p = Box::into_raw(obj);
329        self.link(p as *mut GcHeader);
330        self.bytes += std::mem::size_of::<T>();
331        Gc::from_ptr(p)
332    }
333
334    /// Allocate and adopt a fresh empty [`Table`].
335    pub fn new_table(&mut self) -> Gc<Table> {
336        // P17-D v2 layer-15 attack — table_pool fast path. When btrees-
337        // style alloc bursts have left freed Tables in the pool, pop a
338        // recycled one and reset its fields instead of mallocing fresh.
339        // Saves ~30ns per alloc (malloc roundtrip elided).
340        let p = if let Some(ptr) = self.table_pool.pop() {
341            let t = ptr.as_ptr();
342            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
343            unsafe {
344                // Reset to fresh-Table state. Box-owned slab/nodes/
345                // metatable were already cleared in `free_obj` before
346                // pool push, so we only reset stack-resident fields here.
347                (*t).hdr = GcHeader::new(ObjTag::Table);
348                (*t).array_ptr = std::ptr::null_mut();
349                (*t).asize = 0;
350                (*t).inline_storage = [0; crate::runtime::table::INLINE_U64S];
351                (*t).lastfree = 0;
352                (*t).flags = 0;
353            }
354            t
355        } else {
356            Box::into_raw(Box::new(Table::new(GcHeader::new(ObjTag::Table))))
357        };
358        // Link + bytes accounting (same as adopt path).
359        self.link(p as *mut GcHeader);
360        self.bytes += std::mem::size_of::<Table>();
361        let g = Gc::from_ptr(p);
362        // P11-S5d.I — the Table is now at its final heap address; wire
363        // `array_ptr` to point at the inline storage that lives inside
364        // the boxed Table.
365        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
366        unsafe { g.as_mut() }.init_array_ptr();
367        g
368    }
369
370    /// P11-S5c.B — adopt an empty table and pre-allocate `asize`
371    /// NIL slots in the array part. Equivalent to `new_table()`
372    /// followed by `set_int(N, Nil)` worth of `rehash`es, except
373    /// the array reaches its final size in one allocation rather
374    /// than O(log N) doubling rounds.
375    ///
376    /// `asize == 0` is identical to `new_table()`. Larger sizes
377    /// are clamped at the array part's hard ceiling
378    /// `MAX_ASIZE = 2^27`; requests beyond that fall back to the
379    /// empty table, which the interpreter would have grown
380    /// gracefully via rehash anyway.
381    pub fn new_table_sized(&mut self, asize: usize) -> Gc<Table> {
382        const MAX_ASIZE_HINT: usize = 1 << 27;
383        let g = self.new_table();
384        let clamped = asize.min(MAX_ASIZE_HINT);
385        if clamped > 0 {
386            // SAFETY: the freshly adopted table has no live borrow
387            // anywhere else; we hold the only `Gc<Table>` handle.
388            unsafe { g.as_mut() }.resize(self, clamped, 0);
389        }
390        g
391    }
392
393    /// Adopt a compiler-built prototype (its `hdr` must carry ObjTag::Proto).
394    pub fn adopt_proto(&mut self, proto: Proto) -> Gc<Proto> {
395        debug_assert!(proto.hdr.tag == ObjTag::Proto);
396        self.adopt(Box::new(proto))
397    }
398
399    /// P11-S5d.M — back-compat constructor for callers that already
400    /// built a `Box<[Gc<Upvalue>]>`. Internally re-routes through
401    /// `new_closure_inline` so small-upval cases also pick the
402    /// inline path (the input Box is freed after the copy).
403    pub fn new_closure(&mut self, proto: Gc<Proto>, upvals: Box<[Gc<Upvalue>]>) -> Gc<LuaClosure> {
404        use crate::runtime::function::INLINE_UPVALS_N;
405        let n = upvals.len();
406        if n <= INLINE_UPVALS_N {
407            let g = self.new_closure_inline(proto, &upvals);
408            drop(upvals);
409            g
410        } else {
411            // Large closure — store the input Box directly in
412            // `overflow`, no copy.
413            self.adopt_closure_with(proto, n as u32, |c| {
414                c.overflow = upvals;
415            })
416        }
417    }
418
419    /// P11-S5d.M — hot-path constructor for the `Op::Closure` handler.
420    /// Takes a slice (typically backed by a stack array) so the caller
421    /// doesn't allocate a Vec/Box just to hand it over. Upvals are
422    /// copied into `inline_storage` for small closures, or into a
423    /// freshly-allocated `Box<[..]>` for the rare overflow case.
424    pub fn new_closure_inline(
425        &mut self,
426        proto: Gc<Proto>,
427        upvals: &[Gc<Upvalue>],
428    ) -> Gc<LuaClosure> {
429        use crate::runtime::function::INLINE_UPVALS_N;
430        let n = upvals.len();
431        self.adopt_closure_with(proto, n as u32, |c| {
432            if n <= INLINE_UPVALS_N {
433                for (i, &uv) in upvals.iter().enumerate() {
434                    c.inline_storage[i] = std::mem::MaybeUninit::new(uv);
435                }
436            } else {
437                c.overflow = upvals.to_vec().into_boxed_slice();
438            }
439        })
440    }
441
442    fn adopt_closure_with<F: FnOnce(&mut LuaClosure)>(
443        &mut self,
444        proto: Gc<Proto>,
445        upvals_len: u32,
446        fill: F,
447    ) -> Gc<LuaClosure> {
448        let mut boxed = Box::new(LuaClosure {
449            hdr: GcHeader::new(ObjTag::Closure),
450            proto,
451            upvals_ptr: std::ptr::null_mut(),
452            upvals_len,
453            inline_storage: [std::mem::MaybeUninit::<Gc<Upvalue>>::uninit();
454                crate::runtime::function::INLINE_UPVALS_N],
455            overflow: Box::new([]),
456        });
457        // Box is heap-stable now — populate storage at the final
458        // address so `upvals_ptr` will be valid.
459        fill(&mut boxed);
460        let g = self.adopt(boxed);
461        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
462        unsafe { g.as_mut() }.init_upvals_ptr();
463        g
464    }
465
466    /// Allocate a [`NativeClosure`] wrapping host function `f` with the
467    /// given captured upvalues.
468    pub fn new_native(
469        &mut self,
470        f: crate::runtime::value::NativeFn,
471        upvals: Box<[Value]>,
472    ) -> Gc<NativeClosure> {
473        self.adopt(Box::new(NativeClosure {
474            hdr: GcHeader::new(ObjTag::Native),
475            f,
476            upvals,
477            is_async: false,
478        }))
479    }
480
481    /// v1.1 B10 Stage 2 — like [`Heap::new_native`] but tags the
482    /// closure with `is_async = true`. The dispatcher's native-call
483    /// path then transmutes `f` to `AsyncNativeFn` and routes through
484    /// the cooperative-yield path. The caller is responsible for
485    /// having transmuted the `AsyncNativeFn` pointer to `NativeFn`
486    /// shape (both are `fn` pointers of the same size); see
487    /// [`crate::vm::async_drive`] for the helper that does this.
488    pub fn new_async_native(
489        &mut self,
490        f: crate::runtime::value::NativeFn,
491        upvals: Box<[Value]>,
492    ) -> Gc<NativeClosure> {
493        self.adopt(Box::new(NativeClosure {
494            hdr: GcHeader::new(ObjTag::Native),
495            f,
496            upvals,
497            is_async: true,
498        }))
499    }
500
501    /// Allocate a fresh [`Upvalue`] cell in the given `state` (open / closed).
502    pub fn new_upvalue(&mut self, state: UpvalState) -> Gc<Upvalue> {
503        self.adopt(Box::new(Upvalue {
504            hdr: GcHeader::new(ObjTag::Upvalue),
505            state,
506        }))
507    }
508
509    /// Create a fresh suspended coroutine wrapping `body`. The new thread
510    /// inherits the creator's globals table; a `setfenv(0, env)` inside it
511    /// will retune that copy without affecting the creator.
512    pub fn new_coro(
513        &mut self,
514        body: Value,
515        globals: Gc<crate::runtime::Table>,
516    ) -> Gc<crate::runtime::Coro> {
517        self.adopt(Box::new(crate::runtime::Coro {
518            hdr: GcHeader::new(ObjTag::Coro),
519            status: crate::runtime::CoroStatus::Suspended,
520            body,
521            started: false,
522            resumer: None,
523            resume_at: None,
524            error_value: None,
525            error_traceback: None,
526            stack: Vec::new(),
527            frames: Vec::new(),
528            open_upvals: Vec::new(),
529            tbc: Vec::new(),
530            top: 0,
531            pcall_depth: 0,
532            hook: crate::vm::exec::HookState::default(),
533            globals,
534        }))
535    }
536
537    /// Create a userdata (an io file handle — luna's only userdata) with no
538    /// metatable yet; the io library installs the shared `FILE*` metatable.
539    pub fn new_userdata(&mut self, payload: UserdataPayload, writable: bool) -> Gc<Userdata> {
540        self.adopt(Box::new(Userdata::new(
541            GcHeader::new(ObjTag::Userdata),
542            payload,
543            writable,
544        )))
545    }
546
547    /// Create (or find) a string. Short strings (≤ 40 bytes) are interned.
548    pub fn intern(&mut self, bytes: &[u8]) -> Gc<LuaStr> {
549        if bytes.len() <= string::MAX_SHORT_LEN {
550            let (p, is_new) = self.strings.intern(bytes, self.seed);
551            if is_new {
552                self.link(p as *mut GcHeader);
553                self.bytes += string::alloc_size(bytes.len());
554            } else {
555                // PUC `luaS_new` resurrect guard (lstring.c).
556                // The bucket-chain is walked open-loop without consulting GC
557                // color; during incremental sweep an existing entry may be
558                // dead-white (in `sweep_cur`, scheduled for `free_obj`). If we
559                // hand its pointer back, the budget-paced sweep frees it out
560                // from under the mutator and the next bucket walk dereferences
561                // a libc-recycled slot — the symptom recorded in
562                // `.dev/known-bugs/stringtable-intern-uaf.md` (misaligned ptr
563                // `0x800002a80000002d` deep in `StringTable::intern`).
564                //
565                // Flip the white bits to `current_white` so the upcoming sweep
566                // skips it (PUC `changewhite`). Black / not-white objects are
567                // already safe and untouched.
568                // SAFETY: `p` came from `StringTable::intern` and is a valid
569                // `LuaStr` header (its bucket chain is consistent under our
570                // single-threaded heap).
571                unsafe {
572                    let f = (*(p as *mut GcHeader)).flags;
573                    if is_white(f) && (f & self.current_white) == 0 {
574                        (*(p as *mut GcHeader)).flags = (f & !WHITE_BITS) | self.current_white;
575                    }
576                }
577            }
578            Gc::from_ptr(p)
579        } else {
580            let p = string::alloc_long(bytes, self.seed);
581            self.link(p as *mut GcHeader);
582            self.bytes += string::alloc_size(bytes.len());
583            Gc::from_ptr(p)
584        }
585    }
586
587    /// Number of GC-managed objects currently linked into the heap (live + not
588    /// yet swept). Useful for `collectgarbage("count")`-style introspection.
589    pub fn live_objects(&self) -> usize {
590        self.live
591    }
592
593    /// Approximate heap size in bytes.
594    pub fn bytes(&self) -> usize {
595        self.bytes
596    }
597
598    /// v2.0 Track TL — pure-read walk over the intrusive `all`
599    /// objects list, invoking `visit(tag)` once per live (or
600    /// not-yet-swept) GC-managed object. Used by `luna-tools`'s
601    /// `luna-heap-dump` to build a per-type histogram; embedders
602    /// can reuse it for ad-hoc heap introspection.
603    ///
604    /// # Read-only contract
605    ///
606    /// The callback receives only the [`ObjTag`] discriminant and
607    /// is invoked under a `&self` borrow on the heap: no pointer
608    /// to the GC payload escapes, no `as_mut`-style aliasing is
609    /// available, and the walk performs zero allocation in the
610    /// loop. Safe to call between dispatch ticks (the only allocs
611    /// happen in the caller's bookkeeping).
612    ///
613    /// The walk visits both the live `all` list and the
614    /// `sweep_cur` detached list so a mid-cycle invocation reports
615    /// the same total as [`Heap::live_objects`].
616    pub fn walk_objects(&self, mut visit: impl FnMut(ObjTag)) {
617        for head in [self.all, self.sweep_cur] {
618            let mut cur = head;
619            while !cur.is_null() {
620                // SAFETY: pointers come from the runtime's
621                // intrusive all-objects list. `&self` borrow on
622                // the heap prevents concurrent mutation; the GC
623                // cannot run while this walk holds the borrow,
624                // so every `next` link is valid until consumed.
625                let (tag, next) = unsafe { ((*cur).tag, (*cur).next) };
626                visit(tag);
627                cur = next;
628            }
629        }
630    }
631
632    /// Whether allocation has crossed the auto-GC threshold (cheap safe-point
633    /// check for the interpreter loop).
634    #[inline(always)]
635    pub fn gc_due(&self) -> bool {
636        !self.gc_stopped && self.bytes >= self.next_gc
637    }
638
639    /// `collectgarbage("stop"/"restart")`: suspend or resume auto-GC.
640    pub(crate) fn gc_is_stopped(&self) -> bool {
641        self.gc_stopped
642    }
643
644    pub(crate) fn gc_set_stopped(&mut self, stopped: bool) {
645        self.gc_stopped = stopped;
646    }
647
648    /// Re-arm with caller-supplied `pause` (PUC param, % of live bytes). The
649    /// next cycle fires once `bytes >= live * pause / 100`. `pause=200` (PUC
650    /// default) waits for the heap to double; `pause=100` fires immediately
651    /// when alloc resumes; `pause=300` is 3× — lower pause = more aggressive.
652    pub fn rearm_gc_pause(&mut self, pause: i64) {
653        let pause = pause.max(0) as usize;
654        let target = self
655            .bytes
656            .saturating_mul(pause)
657            .saturating_div(100)
658            .max(GC_MIN_THRESHOLD);
659        self.next_gc = target;
660    }
661
662    /// Re-arm the auto-GC threshold after a collection (PUC pause-style: next
663    /// collection once the live set roughly doubles).
664    pub fn rearm_gc(&mut self) {
665        self.next_gc = self.bytes.saturating_mul(2).max(GC_MIN_THRESHOLD);
666    }
667
668    /// Apply a `before → after` box-size delta from a Table mutation
669    /// (`set`/`rehash`/`ensure_*`). Grows credit `Heap.bytes`; shrinks
670    /// debit it. `free_obj` for `ObjTag::Table` then subtracts the table's
671    /// final `internal_bytes()` so the round-trip is symmetric across the
672    /// table's whole lifetime.
673    pub(crate) fn apply_bytes_delta(&mut self, before: usize, after: usize) {
674        if after > before {
675            self.bytes += after - before;
676        } else if before > after {
677            self.bytes = self.bytes.saturating_sub(before - after);
678        }
679    }
680
681    /// Mark from `roots`, sweep everything unreachable. Returns the number of
682    /// objects freed.
683    pub fn collect(&mut self, roots: &[Value]) -> usize {
684        self.collect_ex(roots, &[])
685    }
686
687    /// Like `collect`, with additional bare-object roots (e.g. the VM's open
688    /// upvalues, which are not first-class Values).
689    pub(crate) fn collect_ex(&mut self, roots: &[Value], extra: &[*mut GcHeader]) -> usize {
690        // a full STW collection subsumes any in-flight incremental cycle:
691        // drive it to completion (Propagate → atomic → Sweep → Pause) so `all`
692        // holds the whole heap again with all marks cleared, then run a fresh
693        // STW cycle. Any tobefnz from the finished cycle stays queued and is
694        // re-marked (kept alive) by the upcoming mark_all so the VM's
695        // run_finalizers can still see them.
696        if self.phase == GcPhase::Propagate {
697            self.gc_finish_atomic();
698        }
699        if self.phase == GcPhase::Sweep {
700            self.gc_sweep_step(usize::MAX);
701        }
702        self.mark_all(roots, extra);
703        self.full_sweep()
704    }
705
706    /// Stop-the-world mark from `roots`/`extra`. Builds an ephemeral marker,
707    /// seeds from roots + extra + tobefnz + any barrier-carried gray queue,
708    /// propagates to completion, then runs the atomic tail (weak / ephemeron
709    /// / finalizer resurrection / current-white flip). After return all
710    /// reachable objects are BLACK and `current_white` has flipped, so the
711    /// caller's sweep tests `other-white` for dead. Does NOT change `phase`.
712    fn mark_all(&mut self, roots: &[Value], extra: &[*mut GcHeader]) {
713        let mut m = Marker {
714            stack: Vec::new(),
715            weak: Vec::new(),
716            ephemeron: Vec::new(),
717            no_ephemeron: self.no_ephemeron,
718            cached_protos: Vec::new(),
719        };
720        // Drain any barrier-grayed objects carried over: each was demoted from
721        // BLACK back to gray by a write barrier and is awaiting (re-)trace.
722        m.stack.append(&mut self.gray);
723        for &r in roots {
724            m.value(r);
725        }
726        for &h in extra {
727            m.header(h);
728        }
729        // objects already queued for finalization but not yet run must stay
730        // alive until the VM calls their `__gc` (they may be unreachable now).
731        for &h in &self.tobefnz {
732            m.header(h);
733        }
734        drain_marker(&mut m);
735        // ephemeron convergence: a weak-key entry's value is reachable only if
736        // the key is. Marking a value can make another key reachable, so repeat
737        // until no value is newly marked (PUC convergeephemerons).
738        if !m.ephemeron.is_empty() {
739            loop {
740                let mut changed = false;
741                let eph = m.ephemeron.clone();
742                for t in eph {
743                    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
744                    changed |= unsafe { (*t).converge_ephemeron(&weak_key_alive, &mut m) };
745                }
746                drain_marker(&mut m);
747                if !changed {
748                    break;
749                }
750            }
751        }
752        self.atomic_tail(&mut m);
753    }
754
755    /// PUC `atomic()` tail: weak-table value-clear, finalizer resurrection,
756    /// post-resurrection ephemeron convergence, proto cache, key-clear, late
757    /// value-clear, and current-white flip. Marker is consumed; `weak` is
758    /// empty on return.
759    ///
760    /// Shared between the STW path (`mark_all`) and the incremental path
761    /// (`gc_finish_atomic`). PUC 5.5 `lgc.c::atomic` mirror:
762    ///   propagate → remarkupvals → convergeephemerons
763    ///   → clearbyvalues(weak, NULL)            ─ early value-clear
764    ///   → clearbyvalues(allweak, NULL)         ─ (same pass under luna)
765    ///   → origweak = g->weak                   ─ snapshot pre-resurrection
766    ///   → separatetobefnz(0) + markbeingfnz    ─ separate_finalizables
767    ///   → propagateall + convergeephemerons    ─ post-resurrection
768    ///   → clearbykeys(ephemeron) + clearbykeys(allweak)
769    ///   → clearbyvalues(weak, origweak)        ─ NEW (post-resurrect) only
770    ///   → clearbyvalues(allweak, origall)      ─ (same)
771    /// The `origweak` split matters because finalizer resurrection can
772    /// re-trace fresh proto/closure → new weak tables joining `m.weak`;
773    /// PUC limits the late value-clear to those new heads.
774    fn atomic_tail(&mut self, m: &mut Marker) {
775        let early_is_dead = |v: Value| -> bool {
776            let h = match v {
777                Value::Str(_) => return false,
778                Value::Table(t) => t.as_ptr() as *mut GcHeader,
779                Value::Closure(c) => c.as_ptr() as *mut GcHeader,
780                Value::Native(n) => n.as_ptr() as *mut GcHeader,
781                Value::Coro(c) => c.as_ptr() as *mut GcHeader,
782                Value::Userdata(u) => u.as_ptr() as *mut GcHeader,
783                _ => return false,
784            };
785            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
786            unsafe { is_white((*h).flags) }
787        };
788        let mark_string = |v: Value| {
789            if let Value::Str(s) = v {
790                // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
791                unsafe {
792                    let h = s.as_ptr() as *mut GcHeader;
793                    // strings are leaves: skip gray and go straight to black
794                    (*h).flags = ((*h).flags & !COLOR_BITS) | BLACK;
795                }
796            }
797        };
798        // (1) early clearbyvalues — drop dead-value entries from every weak
799        // table on `m.weak` (PUC's combined `clearbyvalues(weak, NULL) +
800        // clearbyvalues(allweak, NULL)`). Keys are deferred to the
801        // post-resurrection sweep below.
802        for t in &m.weak {
803            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
804            unsafe {
805                let (_wk, wv) = (**t).weak_mode();
806                if wv {
807                    (**t).clear_weak(false, true, &early_is_dead, &mark_string);
808                }
809            }
810        }
811        // (2) `origweak` snapshot — PUC takes the list head; luna's `m.weak`
812        // is a Vec, so the equivalent is its length before resurrection.
813        // Anything appended past this index is a "NEW" weak table that the
814        // resurrection pass brought into view.
815        let origweak_n = m.weak.len();
816        // (3) separate + markbeingfnz — resurrect every registered finalizable
817        // that ended up unmarked. `m.header(h)` enqueues each into the marker
818        // so the following drain_marker propagates through it.
819        self.separate_finalizables(m);
820        drain_marker(m);
821        // (4) post-resurrection ephemeron convergence — a resurrected
822        // finalizable may bring new keys into reach, which in turn marks new
823        // ephemeron values.
824        if !m.ephemeron.is_empty() {
825            loop {
826                let mut changed = false;
827                let eph = m.ephemeron.clone();
828                for t in eph {
829                    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
830                    changed |= unsafe { (*t).converge_ephemeron(&weak_key_alive, m) };
831                }
832                drain_marker(m);
833                if !changed {
834                    break;
835                }
836            }
837        }
838        // (5) closure-cache weak refs — PUC `traverseproto` clears
839        // `Proto.cache` when the cached LClosure ended the cycle unmarked.
840        // Without this, an LClosure whose only outstanding reference is the
841        // proto's cache would survive forever and its upvalues' `__gc`
842        // finalisers would never run.
843        for &p in &m.cached_protos {
844            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
845            unsafe {
846                if let Some(c) = (*p).cache.get() {
847                    let h = c.as_ptr() as *mut GcHeader;
848                    if is_white((*h).flags) {
849                        (*p).cache.set(None);
850                    }
851                }
852            }
853        }
854        // (6) clearbykeys — drop entries whose weak key did not survive
855        // marking, across every weak table (PUC's `clearbykeys(ephemeron)
856        // + clearbykeys(allweak)`). Pure key sweep — value-dead entries are
857        // either already nil from step (1) or wait for step (7).
858        for t in &m.weak {
859            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
860            unsafe {
861                let (wk, _wv) = (**t).weak_mode();
862                if wk {
863                    (**t).clear_weak(true, false, &early_is_dead, &mark_string);
864                }
865            }
866        }
867        // (7) late clearbyvalues — PUC's `clearbyvalues(weak, origweak) +
868        // clearbyvalues(allweak, origall)`. Limit the sweep to NEW heads so
869        // we don't redo work already done in step (1) for the pre-resurrect
870        // tables (they were drained by then and re-marking happens through
871        // mark_string in step (6)). resurrected weak tables joining `m.weak`
872        // past `origweak_n` get their first value-clear here.
873        let weak_snapshot = std::mem::take(&mut m.weak);
874        for t in &weak_snapshot[origweak_n..] {
875            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
876            unsafe {
877                let (_wk, wv) = (**t).weak_mode();
878                if wv {
879                    (**t).clear_weak(false, true, &early_is_dead, &mark_string);
880                }
881            }
882        }
883        // PUC 5.5 `atomic` end: flip currentwhite so survivors (presently
884        // BLACK) get transitioned into the *new* current-white during sweep,
885        // and the pre-flip current-white becomes the dead-white (the bit the
886        // sweep tests for). Born-during-sweep allocations stamp the new
887        // current-white via `Heap::link`, so they survive this cycle.
888        self.current_white ^= WHITE_BITS;
889    }
890
891    /// Borrow Heap's persistent propagate state as an ephemeral Marker.
892    /// Caller MUST call `stash_marker` with the same Marker after work to
893    /// write the (potentially mutated) state back. Used by the incremental
894    /// Propagate path to avoid lifetime entanglement between `&mut self` and
895    /// `&mut self.propagate`.
896    fn loan_marker(&mut self) -> Marker {
897        let mut prop = self
898            .propagate
899            .take()
900            .expect("propagate state taken outside Propagate phase");
901        Marker {
902            stack: std::mem::take(&mut self.gray),
903            weak: std::mem::take(&mut prop.weak),
904            ephemeron: std::mem::take(&mut prop.ephemeron),
905            no_ephemeron: prop.no_ephemeron,
906            cached_protos: std::mem::take(&mut prop.cached_protos),
907        }
908    }
909
910    fn stash_marker(&mut self, m: Marker) {
911        let no_ephemeron = m.no_ephemeron;
912        self.gray = m.stack;
913        self.propagate = Some(PropagateState {
914            weak: m.weak,
915            ephemeron: m.ephemeron,
916            cached_protos: m.cached_protos,
917            no_ephemeron,
918        });
919    }
920
921    /// Begin an incremental mark cycle: seed the persistent gray queue from
922    /// roots + extra + tobefnz + any barrier-carried gray, install a fresh
923    /// PropagateState, and enter `GcPhase::Propagate`. Precondition: `Pause`.
924    pub(crate) fn gc_start_propagate(&mut self, roots: &[Value], extra: &[*mut GcHeader]) {
925        debug_assert!(self.phase == GcPhase::Pause);
926        self.phase = GcPhase::Propagate;
927        self.propagate = Some(PropagateState {
928            weak: Vec::new(),
929            ephemeron: Vec::new(),
930            cached_protos: Vec::new(),
931            no_ephemeron: self.no_ephemeron,
932        });
933        let mut m = self.loan_marker();
934        for &r in roots {
935            m.value(r);
936        }
937        for &h in extra {
938            m.header(h);
939        }
940        for &h in &self.tobefnz {
941            m.header(h);
942        }
943        self.stash_marker(m);
944    }
945
946    /// Drain up to `budget` gray objects (blacken + trace). Returns true if
947    /// the gray queue is now empty (caller should follow up with
948    /// `gc_finish_atomic`). PUC `propagatemark` budgeted loop.
949    pub(crate) fn gc_step_propagate(&mut self, budget: usize) -> bool {
950        debug_assert!(self.phase == GcPhase::Propagate);
951        let mut m = self.loan_marker();
952        let mut n = 0;
953        while n < budget {
954            let Some(h) = m.stack.pop() else {
955                break;
956            };
957            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
958            unsafe {
959                (*h).flags = ((*h).flags & !WHITE_BITS) | BLACK;
960                match (*h).tag {
961                    ObjTag::Str => {}
962                    ObjTag::Table => (*(h as *mut Table)).trace(&mut m),
963                    ObjTag::Proto => (*(h as *mut Proto)).trace(&mut m),
964                    ObjTag::Closure => (*(h as *mut LuaClosure)).trace(&mut m),
965                    ObjTag::Upvalue => (*(h as *mut Upvalue)).trace(&mut m),
966                    ObjTag::Native => (*(h as *mut NativeClosure)).trace(&mut m),
967                    ObjTag::Coro => (*(h as *mut crate::runtime::Coro)).trace(&mut m),
968                    ObjTag::Userdata => (*(h as *mut Userdata)).trace(&mut m),
969                }
970            }
971            n += 1;
972        }
973        let exhausted = m.stack.is_empty();
974        self.stash_marker(m);
975        exhausted
976    }
977
978    /// Conclude a Propagate cycle: drain any residual gray, run the atomic
979    /// tail (weak / ephemeron / finalizer / proto-cache / flip), detach `all`
980    /// into `sweep_cur`, and enter `GcPhase::Sweep`. Releases `propagate`.
981    /// PUC `atomic` + `entersweep` transition.
982    pub(crate) fn gc_finish_atomic(&mut self) {
983        debug_assert!(self.phase == GcPhase::Propagate);
984        let mut m = self.loan_marker();
985        // any residual gray (caller may not have drained to empty)
986        drain_marker(&mut m);
987        // pre-atomic ephemeron convergence
988        if !m.ephemeron.is_empty() {
989            loop {
990                let mut changed = false;
991                let eph = m.ephemeron.clone();
992                for t in eph {
993                    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
994                    changed |= unsafe { (*t).converge_ephemeron(&weak_key_alive, &mut m) };
995                }
996                drain_marker(&mut m);
997                if !changed {
998                    break;
999                }
1000            }
1001        }
1002        self.atomic_tail(&mut m);
1003        // PropagateState consumed; transition to Sweep phase by detaching
1004        // the whole heap into sweep_cur (mirrors gc_mark_atomic). Anything
1005        // allocated past this point links onto fresh `all` and survives.
1006        self.propagate = None;
1007        debug_assert!(self.gray.is_empty(), "gray queue not drained at atomic");
1008        self.sweep_cur = std::mem::replace(&mut self.all, ptr::null_mut());
1009        self.phase = GcPhase::Sweep;
1010    }
1011
1012    /// Phase peek (for the VM-side step driver).
1013    pub(crate) fn gc_phase_is_pause(&self) -> bool {
1014        self.phase == GcPhase::Pause
1015    }
1016    pub(crate) fn gc_phase_is_propagate(&self) -> bool {
1017        self.phase == GcPhase::Propagate
1018    }
1019    #[allow(dead_code)] // public phase-peek API trio; sweep variant unused internally
1020    pub(crate) fn gc_phase_is_sweep(&self) -> bool {
1021        self.phase == GcPhase::Sweep
1022    }
1023
1024    /// Forward write barrier: when a BLACK `parent` acquires a fresh reference
1025    /// to a WHITE `child`, gray the child (strings go straight to BLACK as
1026    /// leaves) and push onto the persistent gray queue so the next propagate
1027    /// step traces it. Mirrors PUC `luaC_barrier_`. No-op outside Propagate
1028    /// (parent is gray or white — the mutator never sees a BLACK object live
1029    /// outside an incremental cycle).
1030    #[allow(clippy::not_unsafe_ptr_arg_deref)] // Internal GC barrier; caller (Gc<T>::write_*) guarantees ptr validity per SAFETY below.
1031    pub fn barrier_forward(&mut self, parent: *mut GcHeader, child: Value) {
1032        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1033        unsafe {
1034            if !is_black((*parent).flags) {
1035                return;
1036            }
1037            let ch = match child {
1038                Value::Str(s) => s.as_ptr() as *mut GcHeader,
1039                Value::Table(t) => t.as_ptr() as *mut GcHeader,
1040                Value::Closure(c) => c.as_ptr() as *mut GcHeader,
1041                Value::Native(n) => n.as_ptr() as *mut GcHeader,
1042                Value::Coro(c) => c.as_ptr() as *mut GcHeader,
1043                Value::Userdata(u) => u.as_ptr() as *mut GcHeader,
1044                _ => return,
1045            };
1046            let cf = (*ch).flags;
1047            if !is_white(cf) {
1048                return;
1049            }
1050            if (*ch).tag == ObjTag::Str {
1051                (*ch).flags = (cf & !COLOR_BITS) | BLACK;
1052            } else {
1053                (*ch).flags = cf & !WHITE_BITS;
1054                self.gray.push(ch);
1055            }
1056        }
1057    }
1058
1059    /// Backward write barrier for objects with many fields (tables, threads):
1060    /// demote the parent itself back to gray so propagate re-traces it.
1061    /// Mirrors PUC `luaC_barrierback_`. One call covers any number of
1062    /// subsequent stores until the next propagate finishes — much cheaper for
1063    /// tables than per-child forward barriers. No-op outside Propagate.
1064    #[allow(clippy::not_unsafe_ptr_arg_deref)] // Internal GC barrier; caller (Gc<T>::write_*) guarantees ptr validity per SAFETY below.
1065    pub fn barrier_back(&mut self, parent: *mut GcHeader) {
1066        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1067        unsafe {
1068            let f = (*parent).flags;
1069            if !is_black(f) {
1070                return;
1071            }
1072            (*parent).flags = f & !COLOR_BITS;
1073            self.gray.push(parent);
1074        }
1075    }
1076
1077    /// Move every registered finalizable that is now unreachable to `tobefnz`
1078    /// and resurrect it (mark it via `m`) so it — and the data its `__gc` needs
1079    /// — survives this cycle. Survivors stay registered in `finalize`. PUC's
1080    /// `separatetobefnz` walks `g->finobj` head-first, but `g->finobj` is a
1081    /// linked list that registration *prepends* to — so dead objects end up
1082    /// in `tobefnz` in reverse registration order, and `__gc` ultimately
1083    /// runs LIFO. luna's `finalize` is a Vec that grows forward, so iterate
1084    /// it in reverse here to match the LIFO contract (gc.lua's userdata
1085    /// section asserts the finalizers fire from value 10 back to 0).
1086    fn separate_finalizables(&mut self, m: &mut Marker) {
1087        let mut i = self.finalize.len();
1088        while i > 0 {
1089            i -= 1;
1090            let h = self.finalize[i];
1091            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1092            if unsafe { is_white((*h).flags) } {
1093                // Two-pass cycle-finalize (PUC 5.3 gc.lua :502): when a
1094                // finalizable table holds onto an unreachable coroutine, the
1095                // cycle (table → coroutine.stack → closure → table) keeps the
1096                // mark phase from reaching the table even though it is still
1097                // logically alive for one more GC pass. PUC's mark-sweep wakes
1098                // it via `markbeingfnz` *after* sweeping, so the actual `__gc`
1099                // call lands one cycle later. luna mirrors this by resurrecting
1100                // the table on the first sighting and only enqueuing it for
1101                // `__gc` on the second.
1102                let in_thread_cycle = self.defer_thread_cycle_finalize
1103                    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1104                    && unsafe { (*h).tag } == ObjTag::Table
1105                    && {
1106                        let t = h as *mut Table;
1107                        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1108                        unsafe { (*t).refs_contain_unmarked_coro() }
1109                    };
1110                // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1111                let already_deferred = unsafe { (*h).flags & DEFERRED != 0 };
1112                if in_thread_cycle && !already_deferred {
1113                    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1114                    unsafe { (*h).flags |= DEFERRED };
1115                    m.header(h);
1116                    continue;
1117                }
1118                // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1119                unsafe { (*h).flags = ((*h).flags & !(FIN | DEFERRED)) | FINALIZED };
1120                self.tobefnz.push(h);
1121                m.header(h);
1122                self.finalize.swap_remove(i);
1123            } else {
1124                // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1125                unsafe { (*h).flags &= !DEFERRED };
1126            }
1127        }
1128    }
1129
1130    /// Register a table for finalization (a live `__gc` metamethod was just set
1131    /// via setmetatable). No-op if it is already pending a finalize (FIN bit).
1132    /// PUC 5.5 reference manual §2.5.3: "An object can be marked again for
1133    /// finalization by calling setmetatable with a different metatable, or
1134    /// with the same metatable but with a different __gc field" — so a
1135    /// previously finalized object (FINALIZED bit set then reset by
1136    /// `take_tobefnz`) can re-register. PUC's `luaC_checkfinalizer` is gated
1137    /// on `tofinalize(o)` only, which mirrors checking the FIN bit.
1138    pub(crate) fn register_finalizable(&mut self, t: Gc<Table>) {
1139        let h = t.as_ptr() as *mut GcHeader;
1140        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1141        unsafe {
1142            if (*h).flags & FIN == 0 {
1143                (*h).flags |= FIN;
1144                self.finalize.push(h);
1145            }
1146        }
1147    }
1148
1149    /// Register a userdata for finalization. PUC 5.1 `newproxy(true)` plus a
1150    /// metatable carrying `__gc` lets a Lua script attach a finalizer to a
1151    /// proxy object — gc.lua's "testing userdata" section binds this
1152    /// behaviour together with weak tables.
1153    pub(crate) fn register_finalizable_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
1154        let h = u.as_ptr() as *mut GcHeader;
1155        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1156        unsafe {
1157            if (*h).flags & FIN == 0 {
1158                (*h).flags |= FIN;
1159                self.finalize.push(h);
1160            }
1161        }
1162    }
1163
1164    /// Take the objects awaiting their `__gc` call (the VM runs the
1165    /// finalizers). Each entry is dispatched on its `ObjTag` so the caller
1166    /// can look up `__gc` for either a table or a proxy userdata.
1167    ///
1168    /// Mirrors PUC 5.5 `udata2finalize`: the FINALIZED bit is reset on the
1169    /// way out so a `setmetatable(obj, mt_with___gc)` inside (or after) the
1170    /// finalizer can re-register the object for a future round, per the Lua
1171    /// 5.5 reference manual §2.5.3 re-finalize semantics.
1172    pub(crate) fn take_tobefnz(&mut self) -> Vec<crate::runtime::Value> {
1173        use crate::runtime::Value;
1174        std::mem::take(&mut self.tobefnz)
1175            .into_iter()
1176            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1177            .map(|h| unsafe {
1178                (*h).flags &= !FINALIZED;
1179                match (*h).tag {
1180                    ObjTag::Table => Value::Table(Gc::from_ptr(h as *mut Table)),
1181                    ObjTag::Userdata => {
1182                        Value::Userdata(Gc::from_ptr(h as *mut crate::runtime::Userdata))
1183                    }
1184                    _ => unreachable!("non-finalizable object queued for finalization"),
1185                }
1186            })
1187            .collect()
1188    }
1189
1190    /// Move ALL still-registered finalizables to the pending queue regardless of
1191    /// reachability (PUC separatetobefnz(g, 1) at state close), so the VM can run
1192    /// every `__gc` before the heap is torn down.
1193    pub(crate) fn queue_all_finalizers(&mut self) {
1194        for h in std::mem::take(&mut self.finalize) {
1195            // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1196            unsafe { (*h).flags = ((*h).flags & !FIN) | FINALIZED };
1197            self.tobefnz.push(h);
1198        }
1199    }
1200
1201    /// Sweep the whole `all` list in one pass: free dead-white objects,
1202    /// transition survivors (BLACK or current-white) to the new current-white
1203    /// so the next cycle can re-mark them. Returns the number of objects freed.
1204    fn full_sweep(&mut self) -> usize {
1205        // detach the list first so freeing (which needs &mut self for the
1206        // string table) never aliases a pointer into self
1207        let mut freed = 0;
1208        let new_white = self.current_white;
1209        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1210        unsafe {
1211            let mut cur = std::mem::replace(&mut self.all, ptr::null_mut());
1212            let mut kept_head: *mut GcHeader = ptr::null_mut();
1213            let mut kept_tail: *mut GcHeader = ptr::null_mut();
1214            while !cur.is_null() {
1215                let next = (*cur).next;
1216                let f = (*cur).flags;
1217                // dead = other-white (i.e. white but not current-white).
1218                // Survivors are BLACK (just-marked) or current-white (born
1219                // during the sweep itself).
1220                let dead = is_white(f) && (f & new_white) == 0;
1221                if !dead {
1222                    (*cur).flags = (f & !COLOR_BITS) | new_white;
1223                    (*cur).next = ptr::null_mut();
1224                    if kept_tail.is_null() {
1225                        kept_head = cur;
1226                    } else {
1227                        (*kept_tail).next = cur;
1228                    }
1229                    kept_tail = cur;
1230                } else {
1231                    self.free_obj(cur);
1232                    freed += 1;
1233                }
1234                cur = next;
1235            }
1236            self.all = kept_head;
1237        }
1238        self.live -= freed;
1239        freed
1240    }
1241
1242    /// Sweep up to `budget` objects from the detached `sweep_cur` list: free
1243    /// unmarked ones, splice marked survivors back onto `all` (clearing their
1244    /// MARK bit). Returns true once the list is exhausted (cycle complete →
1245    /// back to `Pause`). Safe with no write barrier: marking was atomic, so any
1246    /// object still unmarked here was unreachable at mark time and the mutator
1247    /// holds no reference that could have resurrected it.
1248    pub(crate) fn gc_sweep_step(&mut self, budget: usize) -> bool {
1249        let mut n = 0;
1250        let new_white = self.current_white;
1251        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1252        unsafe {
1253            while n < budget && !self.sweep_cur.is_null() {
1254                let cur = self.sweep_cur;
1255                let next = (*cur).next;
1256                let f = (*cur).flags;
1257                let dead = is_white(f) && (f & new_white) == 0;
1258                if !dead {
1259                    (*cur).flags = (f & !COLOR_BITS) | new_white;
1260                    (*cur).next = self.all;
1261                    self.all = cur;
1262                } else {
1263                    self.free_obj(cur);
1264                    self.live -= 1;
1265                }
1266                self.sweep_cur = next;
1267                n += 1;
1268            }
1269        }
1270        if self.sweep_cur.is_null() {
1271            self.phase = GcPhase::Pause;
1272            true
1273        } else {
1274            false
1275        }
1276    }
1277
1278    unsafe fn free_obj(&mut self, h: *mut GcHeader) {
1279        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1280        unsafe {
1281            match (*h).tag {
1282                ObjTag::Table => {
1283                    let t = h as *mut Table;
1284                    let internal = (*t).internal_bytes();
1285                    self.bytes = self
1286                        .bytes
1287                        .saturating_sub(std::mem::size_of::<Table>() + internal);
1288                    // P17-D v2 layer-15 attack — pool recycle. Drop the
1289                    // Box-owned interior (slab, nodes, metatable) so the
1290                    // Table struct itself can be re-handed-out by a
1291                    // future `new_table` without re-mallocing. Cap pool
1292                    // at 4096 entries to bound idle memory.
1293                    const TABLE_POOL_CAP: usize = 4096;
1294                    if self.table_pool.len() < TABLE_POOL_CAP {
1295                        // Free interior heap allocations now (slab + nodes).
1296                        // Each Box::new([]) is a dangling no-alloc empty
1297                        // slice, so reassigning is just a pointer move.
1298                        (*t).slab = Box::new([]);
1299                        (*t).nodes = Box::new([]);
1300                        // C3 — drop SoA Robin Hood parallel arrays too.
1301                        // These are Box::new([]) dangling stubs until
1302                        // Phase D cuts over (Phase B initial state).
1303                        (*t).keys = Box::new([]);
1304                        (*t).vals = Box::new([]);
1305                        (*t).meta = Box::new([]);
1306                        (*t).tombstones = 0;
1307                        (*t).iter_depth = 0;
1308                        (*t).metatable = None;
1309                        // Stash the raw pointer for future reuse.
1310                        // SAFETY: t is non-null (came from a live Gc<Table>);
1311                        // pool owns it until reuse or Heap::Drop.
1312                        self.table_pool.push(std::ptr::NonNull::new_unchecked(t));
1313                    } else {
1314                        drop(Box::from_raw(t));
1315                    }
1316                }
1317                ObjTag::Proto => {
1318                    self.bytes = self.bytes.saturating_sub(std::mem::size_of::<Proto>());
1319                    drop(Box::from_raw(h as *mut Proto));
1320                }
1321                ObjTag::Closure => {
1322                    self.bytes = self.bytes.saturating_sub(std::mem::size_of::<LuaClosure>());
1323                    drop(Box::from_raw(h as *mut LuaClosure));
1324                }
1325                ObjTag::Upvalue => {
1326                    self.bytes = self.bytes.saturating_sub(std::mem::size_of::<Upvalue>());
1327                    drop(Box::from_raw(h as *mut Upvalue));
1328                }
1329                ObjTag::Native => {
1330                    self.bytes = self
1331                        .bytes
1332                        .saturating_sub(std::mem::size_of::<NativeClosure>());
1333                    drop(Box::from_raw(h as *mut NativeClosure));
1334                }
1335                ObjTag::Coro => {
1336                    self.bytes = self
1337                        .bytes
1338                        .saturating_sub(std::mem::size_of::<crate::runtime::Coro>());
1339                    drop(Box::from_raw(h as *mut crate::runtime::Coro));
1340                }
1341                ObjTag::Userdata => {
1342                    self.bytes = self.bytes.saturating_sub(std::mem::size_of::<Userdata>());
1343                    drop(Box::from_raw(h as *mut Userdata));
1344                }
1345                ObjTag::Str => {
1346                    let s = h as *mut LuaStr;
1347                    self.bytes = self.bytes.saturating_sub(string::alloc_size((*s).len()));
1348                    if (*s).is_short() {
1349                        self.strings.remove(s);
1350                    }
1351                    string::free(s);
1352                }
1353            }
1354        }
1355    }
1356}
1357
1358impl Drop for Heap {
1359    fn drop(&mut self) {
1360        // free everything regardless of reachability, including any list still
1361        // detached for an in-flight incremental sweep
1362        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1363        unsafe {
1364            for mut cur in [self.all, self.sweep_cur] {
1365                while !cur.is_null() {
1366                    let next = (*cur).next;
1367                    self.free_obj(cur);
1368                    cur = next;
1369                }
1370            }
1371            // P17-D v2 layer-15 attack — release the table_pool's
1372            // dangling Box<Table> ptrs. Each was Box::into_raw'd into
1373            // the pool (via free_obj recycle path); without this, the
1374            // Tables would leak. The pool's Tables had their interior
1375            // Box-owned fields (slab/nodes/metatable) already cleared
1376            // when they were recycled, so dropping the Table now only
1377            // releases the Table struct itself.
1378            for ptr in self.table_pool.drain(..) {
1379                drop(Box::from_raw(ptr.as_ptr()));
1380            }
1381        }
1382    }
1383}
1384
1385impl Default for Heap {
1386    fn default() -> Heap {
1387        Heap::new()
1388    }
1389}
1390
1391/// Mark accumulator: gray stack plus entry points for Values and bare
1392/// object headers (Protos/Upvalues are not first-class Values).
1393pub(crate) struct Marker {
1394    stack: Vec<*mut GcHeader>,
1395    /// live tables with a weak `__mode`, collected during marking and processed
1396    /// (dead weak entries cleared) before the sweep
1397    pub(crate) weak: Vec<*mut Table>,
1398    /// ephemeron tables (weak keys, strong values): their hash values are not
1399    /// marked during trace but in a fixpoint pass keyed on key-reachability
1400    pub(crate) ephemeron: Vec<*mut Table>,
1401    /// PUC 5.1 mode: skip ephemeron handling — `__mode='k'` tables mark their
1402    /// values strongly during the normal trace pass (see [`Heap::no_ephemeron`]).
1403    pub(crate) no_ephemeron: bool,
1404    /// Protos with a non-null closure cache (PUC `Proto.cache`). After
1405    /// marking is done, any cached LClosure that ended the cycle unmarked is
1406    /// cleared so the sweep can collect it — the cache is a *weak* reference
1407    /// (PUC `traverseproto` checks `iswhite(cache)`). Seen via [`Proto::trace`].
1408    pub(crate) cached_protos: Vec<*mut crate::runtime::Proto>,
1409}
1410
1411/// Drain the gray stack: pop each marked object and trace its children until
1412/// the worklist is empty (iterative, so deep graphs don't overflow the Rust
1413/// stack). Shared by the root mark and the post-resurrection remark.
1414fn drain_marker(m: &mut Marker) {
1415    while let Some(h) = m.stack.pop() {
1416        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1417        unsafe {
1418            // PUC `propagatemark`: gray → black before scanning children, so a
1419            // child that points back at us (cycle) re-traces us as already
1420            // black and does not loop. White bits were cleared on push.
1421            (*h).flags = ((*h).flags & !WHITE_BITS) | BLACK;
1422            match (*h).tag {
1423                ObjTag::Str => {}
1424                ObjTag::Table => (*(h as *mut Table)).trace(m),
1425                ObjTag::Proto => (*(h as *mut Proto)).trace(m),
1426                ObjTag::Closure => (*(h as *mut LuaClosure)).trace(m),
1427                ObjTag::Upvalue => (*(h as *mut Upvalue)).trace(m),
1428                ObjTag::Native => (*(h as *mut NativeClosure)).trace(m),
1429                ObjTag::Coro => (*(h as *mut crate::runtime::Coro)).trace(m),
1430                ObjTag::Userdata => (*(h as *mut Userdata)).trace(m),
1431            }
1432        }
1433    }
1434}
1435
1436impl Marker {
1437    /// Mark a value, returning true if it was newly marked (was white).
1438    pub(crate) fn value(&mut self, v: Value) -> bool {
1439        let h = match v {
1440            Value::Str(s) => s.as_ptr() as *mut GcHeader,
1441            Value::Table(t) => t.as_ptr() as *mut GcHeader,
1442            Value::Closure(c) => c.as_ptr() as *mut GcHeader,
1443            Value::Native(n) => n.as_ptr() as *mut GcHeader,
1444            Value::Coro(c) => c.as_ptr() as *mut GcHeader,
1445            Value::Userdata(u) => u.as_ptr() as *mut GcHeader,
1446            _ => return false,
1447        };
1448        self.header(h)
1449    }
1450
1451    /// Mark a bare header, returning true if it was newly marked (was white).
1452    /// Transitions white → gray (in PUC `reallymarkobject` terms): clears the
1453    /// current-white bit and pushes onto the gray stack. `drain_marker` later
1454    /// pops it, traces children, and stamps it BLACK.
1455    pub(crate) fn header(&mut self, h: *mut GcHeader) -> bool {
1456        // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1457        unsafe {
1458            let f = (*h).flags;
1459            if is_white(f) {
1460                (*h).flags = f & !WHITE_BITS;
1461                self.stack.push(h);
1462                true
1463            } else {
1464                false
1465            }
1466        }
1467    }
1468}
1469
1470/// Whether a value is "alive" for ephemeron key purposes: non-collectable
1471/// values and strings are always alive (strings are never weakly cleared);
1472/// a collectable object is alive only once marked (gray or black).
1473fn weak_key_alive(v: Value) -> bool {
1474    let h = match v {
1475        Value::Table(t) => t.as_ptr() as *mut GcHeader,
1476        Value::Closure(c) => c.as_ptr() as *mut GcHeader,
1477        Value::Native(n) => n.as_ptr() as *mut GcHeader,
1478        Value::Coro(c) => c.as_ptr() as *mut GcHeader,
1479        Value::Userdata(u) => u.as_ptr() as *mut GcHeader,
1480        _ => return true, // strings, numbers, booleans: never weak-collected
1481    };
1482    // SAFETY: `h` is a GcHeader pointer drawn from the runtime's all-objects intrusive list (or from a live `Gc<T>` cast above); it is non-null and remains live for the duration of this GC step (heap.rs:5-7).
1483    unsafe { !is_white((*h).flags) }
1484}
1485
1486/// Hash seed from address entropy (ASLR) and clock, luaL_makeseed style.
1487fn make_seed() -> u32 {
1488    let stack_var = 0u8;
1489    let mut h = &stack_var as *const u8 as u64;
1490    h ^= (make_seed as *const () as u64) << 16;
1491    if let Ok(d) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) {
1492        h ^= (d.subsec_nanos() as u64) << 32 ^ d.as_secs();
1493    }
1494    h ^= h >> 33;
1495    h = h.wrapping_mul(0xff51_afd7_ed55_8ccd);
1496    h ^= h >> 33;
1497    h as u32
1498}
1499
1500#[cfg(test)]
1501mod tests {
1502    use super::*;
1503    use crate::vm::isa::{Inst, Op};
1504
1505    #[test]
1506    fn collect_traces_function_objects() {
1507        let mut heap = Heap::new();
1508        let source = heap.intern(b"@test");
1509        let kstr = heap.intern(b"a-constant-string");
1510        let inner = Proto {
1511            hdr: GcHeader::new(ObjTag::Proto),
1512            code: Box::new([Inst::iabc(Op::Return0, 0, 0, 0, false)]),
1513            consts: Box::new([]),
1514            protos: Box::new([]),
1515            upvals: Box::new([]),
1516            num_params: 0,
1517            is_vararg: false,
1518            has_vararg_table_pseudo: false,
1519            has_compat_vararg_arg: false,
1520            max_stack: 2,
1521            lines: Box::new([1]),
1522            source,
1523            line_defined: 1,
1524            last_line_defined: 1,
1525            locvars: Box::new([]),
1526            cache: std::cell::Cell::new(None),
1527            jit: std::cell::Cell::new(crate::runtime::function::JitProtoState::Untried),
1528            env_upval_idx: u8::MAX,
1529            trace_hot_count: std::cell::Cell::new(0),
1530            call_hot_count: std::cell::Cell::new(0),
1531            trace_discard_count: std::cell::Cell::new(0),
1532            trace_gave_up: std::cell::Cell::new(false),
1533            traces: crate::jit::send_compat::TRefLock::new(Vec::new()),
1534        };
1535        let inner = heap.adopt_proto(inner);
1536        let outer = Proto {
1537            hdr: GcHeader::new(ObjTag::Proto),
1538            code: Box::new([Inst::iabc(Op::Return0, 0, 0, 0, false)]),
1539            consts: Box::new([Value::Str(kstr)]),
1540            protos: Box::new([inner]),
1541            upvals: Box::new([]),
1542            num_params: 0,
1543            is_vararg: true,
1544            has_vararg_table_pseudo: false,
1545            has_compat_vararg_arg: false,
1546            max_stack: 2,
1547            lines: Box::new([1]),
1548            source,
1549            line_defined: 0,
1550            last_line_defined: 0,
1551            locvars: Box::new([]),
1552            cache: std::cell::Cell::new(None),
1553            jit: std::cell::Cell::new(crate::runtime::function::JitProtoState::Untried),
1554            env_upval_idx: u8::MAX,
1555            trace_hot_count: std::cell::Cell::new(0),
1556            call_hot_count: std::cell::Cell::new(0),
1557            trace_discard_count: std::cell::Cell::new(0),
1558            trace_gave_up: std::cell::Cell::new(false),
1559            traces: crate::jit::send_compat::TRefLock::new(Vec::new()),
1560        };
1561        let outer = heap.adopt_proto(outer);
1562        let captured = heap.intern(b"captured-value-string-xxxxxxxxxxxxxxxxxxxxxxxxx");
1563        let uv = heap.new_upvalue(UpvalState::Closed(Value::Str(captured)));
1564        let cl = heap.new_closure(outer, Box::new([uv]));
1565        // objects: source, kstr, inner, outer, captured, uv, cl
1566        assert_eq!(heap.live_objects(), 7);
1567        // rooting the closure keeps the whole graph alive
1568        assert_eq!(heap.collect(&[Value::Closure(cl)]), 0);
1569        assert_eq!(heap.live_objects(), 7);
1570        assert_eq!(heap.collect(&[]), 7);
1571        assert_eq!(heap.live_objects(), 0);
1572    }
1573
1574    #[test]
1575    fn collect_unreachable() {
1576        let mut heap = Heap::new();
1577        let s = heap.intern(b"hello");
1578        let t = heap.new_table();
1579        assert_eq!(heap.live_objects(), 2);
1580        // both rooted: nothing freed
1581        assert_eq!(heap.collect(&[Value::Str(s), Value::Table(t)]), 0);
1582        // only table rooted: string freed
1583        assert_eq!(heap.collect(&[Value::Table(t)]), 1);
1584        assert_eq!(heap.live_objects(), 1);
1585        // nothing rooted
1586        assert_eq!(heap.collect(&[]), 1);
1587        assert_eq!(heap.live_objects(), 0);
1588    }
1589
1590    #[test]
1591    fn collect_traces_table_contents() {
1592        let mut heap = Heap::new();
1593        let t = heap.new_table();
1594        let k = heap.intern(b"key-string-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); // long
1595        let v = heap.intern(b"val");
1596        unsafe { t.as_mut() }
1597            .set(&mut heap, Value::Str(k), Value::Str(v))
1598            .unwrap();
1599        let inner = heap.new_table();
1600        unsafe { t.as_mut() }
1601            .set(&mut heap, Value::Int(1), Value::Table(inner))
1602            .unwrap();
1603        unsafe { inner.as_mut() }.set_metatable(Some(t));
1604        assert_eq!(heap.live_objects(), 4);
1605        // root only the outer table: everything reachable through it survives
1606        assert_eq!(heap.collect(&[Value::Table(t)]), 0);
1607        assert_eq!(heap.live_objects(), 4);
1608        assert_eq!(heap.collect(&[]), 4);
1609    }
1610
1611    #[test]
1612    fn interned_string_reclaimed_and_reinternable() {
1613        let mut heap = Heap::new();
1614        heap.intern(b"transient");
1615        assert_eq!(heap.collect(&[]), 1);
1616        let s2 = heap.intern(b"transient");
1617        assert_eq!(s2.as_bytes(), b"transient");
1618        assert_eq!(heap.live_objects(), 1);
1619    }
1620
1621    #[test]
1622    fn bytes_and_live_round_trip_to_zero() {
1623        // Memory-invariant audit: after a churn of table allocation, rehash-
1624        // driven growth, and full collection of an empty root set, both
1625        // `heap.bytes` and `heap.live_objects` must return to 0. Catches any
1626        // alloc / free asymmetry in the Table internal-Box delta tracking
1627        // (4bab3c5) or the live counter (link/sweep symmetry).
1628        let mut heap = Heap::new();
1629        assert_eq!(heap.bytes(), 0);
1630        assert_eq!(heap.live_objects(), 0);
1631        // Build a churn: 50 tables, each filled with 200 int keys (forces
1632        // multiple rehashes); plus interned strings spliced through the
1633        // hash part. Bytes should grow well past the empty baseline.
1634        let mut roots: Vec<Value> = Vec::new();
1635        for ti in 0..50 {
1636            let t = heap.new_table();
1637            for k in 1..=200 {
1638                let _ =
1639                    unsafe { t.as_mut() }.set(&mut heap, Value::Int(k), Value::Int(ti * 1000 + k));
1640            }
1641            for sk in 0..32 {
1642                let key = Value::Str(heap.intern(format!("k{ti}-{sk}").as_bytes()));
1643                let _ = unsafe { t.as_mut() }.set(&mut heap, key, Value::Int(sk));
1644            }
1645            roots.push(Value::Table(t));
1646        }
1647        let live_peak = heap.live_objects();
1648        let bytes_peak = heap.bytes();
1649        assert!(live_peak > 0, "live should be >0 after churn");
1650        assert!(bytes_peak > 0, "bytes should be >0 after churn");
1651        // Root only half — the other half should be collected.
1652        let half = roots.len() / 2;
1653        let freed = heap.collect(&roots[..half]);
1654        assert!(freed > 0, "some objects should have been freed");
1655        assert!(
1656            heap.bytes() < bytes_peak,
1657            "bytes must drop after partial collect"
1658        );
1659        assert!(
1660            heap.live_objects() < live_peak,
1661            "live must drop after partial collect"
1662        );
1663        // Drop everything: counters must return to 0 exactly.
1664        drop(roots);
1665        let _ = heap.collect(&[]);
1666        assert_eq!(heap.live_objects(), 0, "live not zero after full collect");
1667        assert_eq!(
1668            heap.bytes(),
1669            0,
1670            "bytes not zero after full collect — asymmetric alloc/free"
1671        );
1672    }
1673
1674    /// Regression for `.dev/known-bugs/stringtable-intern-uaf.md`:
1675    /// `StringTable::intern` must NOT return a dead-white (about-to-be-swept)
1676    /// short-string pointer. Mirrors PUC `luaS_new`'s resurrect-on-hit guard
1677    /// (lstring.c — `if (isdead(g, ts)) changewhite(ts);`).
1678    ///
1679    /// Without the guard, the bucket-chain still references the unswept
1680    /// short string after the atomic flip; a re-`intern` of the same bytes
1681    /// hands back that pointer; the budget-paced sweep then frees it and
1682    /// the next bucket walk dereferences libc-recycled garbage (the
1683    /// `0x800002a80000002d` misaligned pointer seen in the audit).
1684    #[test]
1685    fn intern_resurrects_dead_white_short_string() {
1686        let mut heap = Heap::new();
1687        let alive = heap.intern(b"keep-me-alive-1");
1688        let dying = heap.intern(b"transient-x");
1689        let dying_ptr = dying.as_ptr();
1690        let dying_bytes = dying.as_bytes().to_vec();
1691        // Drive an incremental cycle by hand to reproduce the race:
1692        //   1. mark-propagate with `alive` only as a root → `dying` stays white
1693        //   2. atomic flip → `dying` becomes dead-white, bucket still points at it
1694        //   3. RE-INTERN the same bytes BEFORE sweep clears the bucket
1695        //   4. fix must either (a) skip the dead entry & alloc fresh, or
1696        //      (b) resurrect dying back to current-white
1697        let alive_root = [Value::Str(alive)];
1698        heap.gc_start_propagate(&alive_root, &[]);
1699        while !heap.gc_step_propagate(usize::MAX) {}
1700        heap.gc_finish_atomic();
1701        // At this point sweep_cur holds the detached old-heap list and the
1702        // dying string is dead-white. The bucket chain in `self.strings`
1703        // still references it.
1704        let resurrected = heap.intern(&dying_bytes);
1705        // Two valid outcomes:
1706        //   * resurrect: same pointer, but flagged current-white so sweep
1707        //     will keep it alive (PUC luaS_new shape)
1708        //   * skip-and-alloc-fresh: different pointer, dying gets swept
1709        //     normally as it should
1710        // Either way, after completing the sweep the heap must NOT crash
1711        // when we try to intern more short strings (which walks bucket chains).
1712        while !heap.gc_sweep_step(usize::MAX) {}
1713        // Smoke: bucket-chain walk for a fresh string must not deref a
1714        // freed pointer.
1715        let _fresh = heap.intern(b"after-sweep-canary");
1716        // If the fix is "resurrect", same pointer + bytes preserved:
1717        if resurrected.as_ptr() == dying_ptr {
1718            assert_eq!(resurrected.as_bytes(), dying_bytes.as_slice());
1719        }
1720        // Final cleanup: full collect must complete without UAF.
1721        drop(heap);
1722    }
1723
1724    #[test]
1725    fn deep_table_chain_marks_iteratively() {
1726        // deep chain: explicit mark stack must not overflow (smaller under
1727        // miri — the interpreter makes 100k tables take ~30 minutes)
1728        let n = if cfg!(miri) { 2_000 } else { 100_000 };
1729        let mut heap = Heap::new();
1730        let head = heap.new_table();
1731        let mut cur = head;
1732        for _ in 0..n {
1733            let next = heap.new_table();
1734            unsafe { cur.as_mut() }
1735                .set(&mut heap, Value::Int(1), Value::Table(next))
1736                .unwrap();
1737            cur = next;
1738        }
1739        assert_eq!(heap.collect(&[Value::Table(head)]), 0);
1740        assert_eq!(heap.collect(&[]), n + 1);
1741    }
1742}