Skip to main content

lua_gc/
heap.rs

1//! Phase D mark-and-sweep garbage collector.
2//!
3//! This module is the production GC for the Lua runtime, replacing the
4//! `Rc<T>`-backed `GcRef<T>` placeholder used through Phase B/C. It is a
5//! single-threaded, stop-the-world, precise tracing collector with a
6//! forward write barrier. Incremental marking is a future enhancement;
7//! the current implementation does a full collect each time `step` decides
8//! it's time.
9//!
10//! # Vocabulary
11//!
12//! - **Gc<T>**: a pointer-sized handle. `Copy + Clone`. Replaces `GcRef<T>`.
13//! - **GcBox<T>**: the heap allocation; contains a header and the value.
14//! - **GcHeader**: per-object metadata (color, finalized flag, intrusive
15//!   `next` pointer for the allgc list).
16//! - **Trace**: trait every GC-rooted type implements. The `trace` method
17//!   walks all `Gc<_>` fields and calls `Marker::mark` on each.
18//! - **Marker**: passed to `trace`; carries the gray queue.
19//! - **Heap**: owns the allgc list head, byte counters, GC state machine.
20//!
21//! # Safety model
22//!
23//! All `unsafe` is confined to this crate (per `harness/unsafe-budgets.toml`).
24//! The invariants are:
25//!
26//! 1. Every `Gc<T>` points to a valid, allocated, not-yet-swept `GcBox<T>`.
27//! 2. The allgc intrusive list is consistent: traversing `header.next` from
28//!    `Heap.head` reaches every live `GcBox` exactly once.
29//! 3. After `Heap::full_collect(roots)`, every `Gc<T>` reachable from `roots`
30//!    is still valid; unreachable boxes are dropped and deallocated.
31//!
32//! # Migration shape
33//!
34//! Existing code holds `GcRef<T>` (which after Phase D is a type alias for
35//! `Gc<T>`). Legacy call sites like `GcRef::new(value)` route through
36//! `Gc::new_uncollected` which allocates a `GcBox` but does NOT register it
37//! in any heap. Phase D-1b agent work converts these to
38//! `state.heap().allocate(value)` so the new box joins the allgc chain.
39
40use std::cell::{Cell, RefCell};
41use std::marker::PhantomData;
42use std::ptr::NonNull;
43
44// ──────────────────────────────────────────────────────────────────────────
45// Phase D-1c — scoped thread-local HeapGuard
46//
47// Lua's C API supports multiple `lua_State`s on one OS thread (sandbox-per-
48// state is a real embedding pattern). We honor that by stacking the
49// currently-active heap rather than holding a single slot. `HeapGuard::push`
50// activates a heap; drop pops it.
51//
52// `with_current_heap(...)` exposes the top of the stack only for the dynamic
53// extent of a closure — used by `GcRef::new` call sites that don't have
54// `&mut LuaState` in scope.
55// ──────────────────────────────────────────────────────────────────────────
56
57thread_local! {
58    static CURRENT_HEAP_STACK: RefCell<Vec<NonNull<Heap>>> = const { RefCell::new(Vec::new()) };
59}
60
61/// A scoped guard for the currently-active heap. Pushed at entry to
62/// `state.run()` / `state.protected_call()` / `state.load()`; popped on
63/// drop. Supports nesting (multiple LuaStates on one thread).
64pub struct HeapGuard {
65    // Anchor a NonNull so the user can't accidentally drop the guard while
66    // an inner Lua state is still active. We rely on RAII.
67    _private: (),
68}
69
70impl HeapGuard {
71    /// Push `heap` onto the active stack. Returns a guard; dropping it pops.
72    ///
73    /// # Safety
74    ///
75    /// The pointer must remain valid for the lifetime of the guard. Callers
76    /// typically pass `&state.global.heap`, which lives as long as the
77    /// `GlobalState` (an `Rc<RefCell<_>>`); the guard must drop before the
78    /// state is dropped.
79    pub fn push(heap: &Heap) -> Self {
80        let ptr = NonNull::from(heap);
81        CURRENT_HEAP_STACK.with(|stack| stack.borrow_mut().push(ptr));
82        HeapGuard { _private: () }
83    }
84}
85
86impl Drop for HeapGuard {
87    fn drop(&mut self) {
88        CURRENT_HEAP_STACK.with(|stack| {
89            let popped = stack.borrow_mut().pop();
90            debug_assert!(popped.is_some(), "HeapGuard::drop with empty stack");
91        });
92    }
93}
94
95/// Runs `f` with a reference to the currently-active heap, or `None` if no
96/// `HeapGuard` is in scope.
97///
98/// The heap reference is deliberately scoped to the closure. This avoids the
99/// previous `current_heap() -> Option<&'static Heap>` lifetime lie while still
100/// supporting legacy `GcRef::new` call sites that do not receive `&mut LuaState`.
101pub fn with_current_heap<R>(f: impl for<'a> FnOnce(Option<&'a Heap>) -> R) -> R {
102    CURRENT_HEAP_STACK.with(|stack| {
103        let ptr = stack.borrow().last().copied();
104        // SAFETY: the top NonNull was produced from a live `&Heap` whose
105        // lifetime is bounded by the corresponding `HeapGuard`. The reference
106        // is only handed to `f`, and cannot escape through the return type.
107        let heap = ptr.map(|ptr| unsafe { &*ptr.as_ptr() });
108        f(heap)
109    })
110}
111
112/// A traced color in the tri-color invariant.
113#[derive(Copy, Clone, PartialEq, Eq, Debug)]
114pub enum Color {
115    /// Not yet visited this cycle. Candidate for sweep.
116    White,
117    /// Visited; outgoing references not yet traced.
118    Gray,
119    /// Fully traced; no outgoing pointers to white objects.
120    Black,
121}
122
123/// Per-object GC metadata. Lives at the start of every `GcBox`.
124#[repr(C)]
125pub struct GcHeader {
126    color: Cell<Color>,
127    /// Set true after this object's finalizer (`__gc` metamethod) has run.
128    /// Phase D defers finalizers; this is reserved.
129    finalized: Cell<bool>,
130    /// Intrusive link into the heap's allgc chain.
131    next: Cell<Option<NonNull<GcBox<dyn Trace>>>>,
132    /// Rough byte size of the contained value; used for memory accounting.
133    size: usize,
134}
135
136impl GcHeader {
137    fn new_white(size: usize) -> Self {
138        Self {
139            color: Cell::new(Color::White),
140            finalized: Cell::new(false),
141            next: Cell::new(None),
142            size,
143        }
144    }
145}
146
147/// A heap-allocated, GC-tracked value plus its header.
148#[repr(C)]
149pub struct GcBox<T: ?Sized> {
150    header: GcHeader,
151    value: T,
152}
153
154impl<T: ?Sized> GcBox<T> {
155    pub fn header(&self) -> &GcHeader {
156        &self.header
157    }
158    pub fn value(&self) -> &T {
159        &self.value
160    }
161}
162
163/// A GC-managed pointer. Copy + Clone (one-machine-word). Replaces `GcRef<T>`.
164pub struct Gc<T: ?Sized> {
165    ptr: NonNull<GcBox<T>>,
166    /// Marker so `Gc<T>` is treated as if it owns `T` for variance.
167    _marker: PhantomData<T>,
168}
169
170// SAFETY: Gc is just a pointer. The Cell-based interior mutability of the
171// header is single-threaded (the entire Lua runtime is single-threaded), so
172// no Send/Sync impls are needed and we don't provide them.
173impl<T: ?Sized> Copy for Gc<T> {}
174impl<T: ?Sized> Clone for Gc<T> {
175    fn clone(&self) -> Self {
176        *self
177    }
178}
179
180impl<T: ?Sized> PartialEq for Gc<T> {
181    fn eq(&self, other: &Self) -> bool {
182        std::ptr::addr_eq(self.ptr.as_ptr(), other.ptr.as_ptr())
183    }
184}
185impl<T: ?Sized> Eq for Gc<T> {}
186
187impl<T: ?Sized> std::hash::Hash for Gc<T> {
188    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
189        self.ptr.as_ptr().hash(state)
190    }
191}
192
193impl<T: ?Sized> std::fmt::Debug for Gc<T> {
194    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195        write!(f, "Gc({:p})", self.ptr.as_ptr())
196    }
197}
198
199impl<T: Trace + 'static> Gc<T> {
200    /// Allocate a `GcBox<T>` outside any heap registry. Used by legacy
201    /// `GcRef::new` call sites until Phase D-1b migrates them. The returned
202    /// `Gc<T>` is reachable only through the caller's own retention path;
203    /// without joining a heap's allgc chain, it will never be swept (so
204    /// effectively leaks until process exit — same as Rc behavior).
205    pub fn new_uncollected(value: T) -> Self {
206        let size = std::mem::size_of::<T>();
207        let boxed = Box::new(GcBox {
208            header: GcHeader::new_white(size),
209            value,
210        });
211        Gc {
212            ptr: NonNull::new(Box::into_raw(boxed)).expect("Box::into_raw is non-null"),
213            _marker: PhantomData,
214        }
215    }
216}
217
218impl<T: ?Sized> Gc<T> {
219    /// Two `Gc<T>`s are identity-equal iff they point at the same box.
220    pub fn ptr_eq(a: Self, b: Self) -> bool {
221        std::ptr::addr_eq(a.ptr.as_ptr(), b.ptr.as_ptr())
222    }
223
224    /// Identity as a usize — usable as a hash table key for "is the *same
225    /// object*" lookups.
226    pub fn identity(self) -> usize {
227        self.ptr.as_ptr() as *const () as usize
228    }
229
230    /// Access the underlying value. Returns `&T` so callers can read fields
231    /// without taking the `Gc` apart. Interior mutability lives inside T's
232    /// own fields (Cell, RefCell, etc.).
233    fn as_box(&self) -> &GcBox<T> {
234        // SAFETY: A Gc<T> is constructed only by allocate() or
235        // new_uncollected(), both of which produce a valid GcBox. The box
236        // outlives the Gc until sweep, which only frees boxes not reachable
237        // from any root — so as long as this Gc is on the stack or in a
238        // traced field, the box is live.
239        unsafe { self.ptr.as_ref() }
240    }
241
242    fn header(&self) -> &GcHeader {
243        &self.as_box().header
244    }
245}
246
247impl<T: ?Sized> std::ops::Deref for Gc<T> {
248    type Target = T;
249    fn deref(&self) -> &T {
250        &self.as_box().value
251    }
252}
253
254impl<T: ?Sized> AsRef<T> for Gc<T> {
255    fn as_ref(&self) -> &T {
256        &self.as_box().value
257    }
258}
259
260/// Every GC-rooted type implements `Trace` to expose its `Gc<_>` fields.
261///
262/// The `trace` method visits every reachable `Gc<_>` and calls
263/// `Marker::mark` on it. For container fields (`Vec<LuaValue>`, etc.) call
264/// `field.trace(m)` to delegate.
265///
266/// # Mechanical pattern
267///
268/// ```ignore
269/// impl Trace for LuaTable {
270///     fn trace(&self, m: &mut Marker) {
271///         for v in self.array.iter() { v.trace(m); }
272///         if let Some(mt) = self.metatable { m.mark(mt); }
273///     }
274/// }
275/// ```
276pub trait Trace {
277    fn trace(&self, m: &mut Marker);
278}
279
280// Common blanket impls so most container types Just Work.
281impl<T: Trace> Trace for Vec<T> {
282    fn trace(&self, m: &mut Marker) {
283        for item in self.iter() {
284            item.trace(m);
285        }
286    }
287}
288
289impl<T: Trace> Trace for Option<T> {
290    fn trace(&self, m: &mut Marker) {
291        if let Some(v) = self {
292            v.trace(m);
293        }
294    }
295}
296
297impl<T: Trace + ?Sized> Trace for Box<T> {
298    fn trace(&self, m: &mut Marker) {
299        (**self).trace(m);
300    }
301}
302
303impl<T: Trace + ?Sized> Trace for std::rc::Rc<T> {
304    fn trace(&self, m: &mut Marker) {
305        (**self).trace(m);
306    }
307}
308
309impl<T: Trace> Trace for RefCell<T> {
310    fn trace(&self, m: &mut Marker) {
311        self.borrow().trace(m);
312    }
313}
314
315/// `Gc<T>` is itself traceable: marking it forwards to the contained `T`.
316impl<T: Trace + 'static> Trace for Gc<T> {
317    fn trace(&self, m: &mut Marker) {
318        m.mark(*self);
319    }
320}
321
322// Trivially-traceable primitive types: visiting does nothing.
323macro_rules! trace_noop {
324    ($($t:ty),*) => {
325        $(impl Trace for $t {
326            fn trace(&self, _m: &mut Marker) {}
327        })*
328    };
329}
330trace_noop!(
331    bool, u8, u16, u32, u64, u128, usize,
332    i8, i16, i32, i64, i128, isize,
333    f32, f64, char, String, str
334);
335
336impl<T> Trace for std::marker::PhantomData<T> {
337    fn trace(&self, _m: &mut Marker) {}
338}
339
340/// Holds the gray queue during a mark phase. Passed to `Trace::trace`.
341pub struct Marker {
342    gray_queue: Vec<NonNull<GcBox<dyn Trace>>>,
343    visited: std::collections::HashSet<usize>,
344}
345
346impl Marker {
347    fn new() -> Self {
348        Self {
349            gray_queue: Vec::with_capacity(256),
350            visited: std::collections::HashSet::new(),
351        }
352    }
353
354    /// Mark a `Gc<T>` as gray (reachable, but its outgoing edges not yet
355    /// traced). Called by `Trace::trace` implementations.
356    ///
357    /// Per-cycle dedup uses `visited` (a HashSet of box identities) rather
358    /// than the color flag. Color-based dedup would silently skip
359    /// `new_uncollected` boxes left Black by the previous cycle — those
360    /// allocations are NOT on the heap's allgc chain, so the start-of-mark
361    /// "reset all allgc to White" loop does not reach them, and a Black
362    /// uncollected box would be skipped without re-tracing its children
363    /// (causing reachable allgc descendants to be swept). The visited set
364    /// is rebuilt every `full_collect` (Marker::new), so this dedup is
365    /// always per-cycle.
366    pub fn mark<T: Trace + 'static>(&mut self, gc: Gc<T>) {
367        let id = gc.identity();
368        if self.visited.insert(id) {
369            let header = gc.header();
370            header.color.set(Color::Gray);
371            let ptr: NonNull<GcBox<dyn Trace>> = gc.ptr;
372            self.gray_queue.push(ptr);
373        }
374    }
375
376    /// Record that an Rc-backed object (`GcRef<T>` in Phase A-D-0) has been
377    /// visited and return whether this is the first visit. Used by recursive
378    /// `Trace` impls to break cycles while the real `Gc<T>` gray-queue path is
379    /// not yet wired (e.g. `_G._G == _G` would otherwise infinitely recurse).
380    pub fn try_visit(&mut self, addr: usize) -> bool {
381        self.visited.insert(addr)
382    }
383
384    /// True iff `id` was reached during the mark phase. Used by the
385    /// post-mark hook (`Heap::full_collect_with_post_mark`) to decide whether
386    /// a weak-table entry's target is still strongly reachable. Read-only —
387    /// callers cannot add entries.
388    pub fn is_visited(&self, id: usize) -> bool {
389        self.visited.contains(&id)
390    }
391
392    /// Number of objects marked so far. Used by the post-mark hook's
393    /// ephemeron-convergence fixed-point loop to detect when an iteration
394    /// added no new reachable objects and the loop can terminate.
395    pub fn visited_count(&self) -> usize {
396        self.visited.len()
397    }
398
399    /// Drain the gray queue, transitively marking children. Each gray box
400    /// becomes black; its `Trace::trace` is called so the children it points
401    /// at get pushed onto the queue. Repeats until the queue is empty.
402    ///
403    /// Exposed for the post-mark hook so it can run ephemeron convergence:
404    /// after marking new values via [`Marker::mark`] (or `value.trace(self)`),
405    /// the hook calls `drain_gray_queue` to propagate the new reachability,
406    /// then re-checks for fixed-point.
407    pub fn drain_gray_queue(&mut self) {
408        while let Some(gray_ptr) = self.gray_queue.pop() {
409            unsafe {
410                let bx = gray_ptr.as_ref();
411                bx.header.color.set(Color::Black);
412                bx.value.trace(self);
413            }
414        }
415    }
416}
417
418/// Phases of the incremental collection cycle.
419///
420/// The state machine matches a simplified subset of C-Lua's `lgc.c` FSM and
421/// is driven by [`Heap::incremental_step_with_post_mark`].
422///
423/// Transitions:
424/// - `Pause` → `Propagate` (on first step: reset colors, trace roots).
425/// - `Propagate` → `Atomic` (when the gray queue empties).
426/// - `Atomic` → `Sweep` (post-mark hook has run; sweep cursor is initialized).
427/// - `Sweep` → `Finalize` (sweep cursor reached the end of allgc).
428/// - `Finalize` → `Pause` (any pending finalize work has been drained).
429///
430/// `Collecting` is kept as a compatibility alias for the old API (used by
431/// `barrier`) — it means "anything but Pause."
432#[derive(Copy, Clone, PartialEq, Eq, Debug)]
433pub enum GcState {
434    Pause,
435    Propagate,
436    Atomic,
437    Sweep,
438    Finalize,
439}
440
441impl GcState {
442    pub fn is_pause(self) -> bool {
443        matches!(self, GcState::Pause)
444    }
445    pub fn is_propagate(self) -> bool {
446        matches!(self, GcState::Propagate)
447    }
448    pub fn is_sweep(self) -> bool {
449        matches!(self, GcState::Sweep)
450    }
451}
452
453/// Outcome of one call to [`Heap::incremental_step_with_post_mark`].
454#[derive(Copy, Clone, PartialEq, Eq, Debug)]
455pub enum StepOutcome {
456    /// The step finished a cycle. The heap is back at `GcState::Pause`.
457    Paused,
458    /// The step performed work but the cycle is not finished. Caller may
459    /// step again.
460    InProgress,
461    /// The heap is paused (via [`Heap::pause`]) or the caller asked for zero
462    /// budget while no cycle was in progress and no work was needed.
463    SkippedStopped,
464}
465
466/// Work budget for one incremental step.
467///
468/// `remaining_work` counts down by one for each unit of work performed (one
469/// gray object traced, one swept node visited, one finalizer dispatched).
470/// `max_credit` caps how negative `remaining_work` may be allowed to go — a
471/// step that overshoots its budget rolls the overrun into the caller's debt
472/// rather than letting unbounded work happen in one call.
473#[derive(Copy, Clone, Debug)]
474pub struct StepBudget {
475    pub remaining_work: isize,
476    pub max_credit: isize,
477}
478
479impl StepBudget {
480    /// Build a budget from a number of allowed work units.
481    pub fn from_work(work: isize) -> Self {
482        Self { remaining_work: work.max(1), max_credit: work.max(1) }
483    }
484}
485
486/// The heap. One per `GlobalState`. Owns the intrusive allgc list of every
487/// allocated `GcBox`, tracks total bytes, and runs collections.
488pub struct Heap {
489    /// Head of the singly-linked allgc list (every live `GcBox`).
490    head: Cell<Option<NonNull<GcBox<dyn Trace>>>>,
491    /// Total bytes allocated (sum of header sizes; rough).
492    bytes: Cell<usize>,
493    /// Threshold above which `step` triggers a collection.
494    threshold: Cell<usize>,
495    /// Multiplier on bytes_used to set next threshold after collection.
496    pause_multiplier: Cell<usize>,
497    /// State machine for the incremental collector.
498    state: Cell<GcState>,
499    /// If true, `step` and `barrier` are no-ops (for bootstrap before the
500    /// world is consistent).
501    paused: Cell<bool>,
502    /// Counter of full collections performed (for diagnostics).
503    collections: Cell<usize>,
504    /// In-progress marker state for incremental cycles. `Some` between
505    /// `Propagate` start and `Sweep` start; `None` otherwise.
506    marker: RefCell<Option<Marker>>,
507    /// Sweep cursor. Points at the `Cell` whose `Option<NonNull>` is the
508    /// "current" link being inspected during the sweep phase. Encoded as a
509    /// raw pointer because the cell lives inside a `GcHeader` (Cell, not Cell<Cell>).
510    /// `None` means: sweep starts from `self.head`.
511    sweep_prev_next: Cell<Option<NonNull<Cell<Option<NonNull<GcBox<dyn Trace>>>>>>>,
512}
513
514impl Default for Heap {
515    fn default() -> Self {
516        Self::new()
517    }
518}
519
520impl Heap {
521    pub fn new() -> Self {
522        Self {
523            head: Cell::new(None),
524            bytes: Cell::new(0),
525            threshold: Cell::new(64 * 1024), // initial threshold: 64 KB
526            pause_multiplier: Cell::new(200), // 200% = collect when bytes 2x threshold
527            state: Cell::new(GcState::Pause),
528            paused: Cell::new(true), // start paused; caller enables when world is consistent
529            collections: Cell::new(0),
530            marker: RefCell::new(None),
531            sweep_prev_next: Cell::new(None),
532        }
533    }
534
535    /// Enable collection. Until this is called, `step` is a no-op (so the
536    /// runtime can bootstrap without prematurely freeing objects).
537    pub fn unpause(&self) {
538        self.paused.set(false);
539    }
540
541    pub fn is_paused(&self) -> bool {
542        self.paused.get()
543    }
544
545    /// Allocate a new `GcBox<T>` and prepend it to the allgc chain.
546    pub fn allocate<T: Trace + 'static>(&self, value: T) -> Gc<T> {
547        let size = std::mem::size_of::<GcBox<T>>();
548        let boxed = Box::new(GcBox {
549            header: GcHeader::new_white(size),
550            value,
551        });
552        boxed.header.next.set(self.head.get());
553        let raw: *mut GcBox<T> = Box::into_raw(boxed);
554        let ptr: NonNull<GcBox<T>> =
555            NonNull::new(raw).expect("Box::into_raw is non-null");
556        let dyn_ptr: NonNull<GcBox<dyn Trace>> = ptr;
557        self.head.set(Some(dyn_ptr));
558        self.bytes.set(self.bytes.get() + size);
559        Gc {
560            ptr,
561            _marker: PhantomData,
562        }
563    }
564
565    /// Bytes currently retained by GC-tracked objects (rough estimate).
566    pub fn bytes_used(&self) -> usize {
567        self.bytes.get()
568    }
569
570    /// Current collection threshold in bytes. When `bytes_used() >= threshold_bytes()`,
571    /// the next `step()` will run a full collection (unless paused). Used by
572    /// callers that want to short-circuit expensive prep work (e.g. snapshotting
573    /// weak tables / pending finalizers) when no collection will actually fire.
574    pub fn threshold_bytes(&self) -> usize {
575        self.threshold.get()
576    }
577
578    /// Cheap predicate: would a `step()` actually do work? Equivalent to
579    /// `!paused && bytes_used() >= threshold_bytes()`. Callers that build
580    /// snapshot state before invoking the heap should gate on this.
581    pub fn would_collect(&self) -> bool {
582        !self.paused.get() && self.bytes.get() >= self.threshold.get()
583    }
584
585    pub fn collections(&self) -> usize {
586        self.collections.get()
587    }
588
589    /// Forward write barrier: invoked when `parent` (already-traced black
590    /// object) gains a new reference to `child`. To preserve the tri-color
591    /// invariant ("no black points to white"), we mark the child gray
592    /// immediately. Cheap: one branch + maybe one queue push.
593    ///
594    /// During incremental mode this prevents the marking phase from missing
595    /// the new edge. In current stop-the-world mode it's still correct (a
596    /// no-op when the collection is idle), so call sites can be wired now
597    /// and the incremental upgrade is mechanical later.
598    pub fn barrier<P, C>(&self, parent: Gc<P>, child: Gc<C>)
599    where
600        P: Trace + 'static,
601        C: Trace + 'static,
602    {
603        if self.paused.get() || self.state.get().is_pause() {
604            return;
605        }
606        if parent.header().color.get() != Color::Black {
607            return;
608        }
609        if child.header().color.get() != Color::White {
610            return;
611        }
612        child.header().color.set(Color::Gray);
613        if let Ok(mut m_opt) = self.marker.try_borrow_mut() {
614            if let Some(m) = m_opt.as_mut() {
615                let ptr: NonNull<GcBox<dyn Trace>> = child.ptr;
616                m.gray_queue.push(ptr);
617                m.visited.insert(child.identity());
618            }
619        }
620    }
621
622    /// Possibly run a collection. Trigger: bytes_used > threshold.
623    /// Caller passes the root set (the runtime — typically `GlobalState`
624    /// implementing `Trace`).
625    pub fn step(&self, roots: &dyn Trace) {
626        self.step_with_post_mark(roots, |_: &mut Marker| {});
627    }
628
629    /// Like [`step`] but invokes a `post_mark` hook when a collection
630    /// actually fires (threshold reached). Hook is a no-op on the
631    /// short-circuit path. The runtime uses this to bridge weak-table
632    /// pruning into implicit GC steps fired from inside the VM loop.
633    pub fn step_with_post_mark<F: FnMut(&mut Marker)>(
634        &self,
635        roots: &dyn Trace,
636        post_mark: F,
637    ) {
638        if self.paused.get() {
639            return;
640        }
641        if self.bytes.get() < self.threshold.get() {
642            return;
643        }
644        self.full_collect_with_post_mark(roots, post_mark);
645    }
646
647    /// Stop-the-world full collect. Marks every reachable object from
648    /// `roots`, then sweeps white (unreachable) boxes from the allgc chain.
649    pub fn full_collect(&self, roots: &dyn Trace) {
650        self.full_collect_with_post_mark(roots, |_: &mut Marker| {});
651    }
652
653    /// Run only the mark/atomic hook portion of a collection, without sweeping.
654    ///
655    /// This is used by runtimes that need an atomic reachability snapshot for
656    /// weak-table cleanup while they are deliberately avoiding object freeing.
657    pub fn mark_only_with_post_mark<F: FnMut(&mut Marker)>(
658        &self,
659        roots: &dyn Trace,
660        mut post_mark: F,
661    ) {
662        if self.paused.get() {
663            return;
664        }
665        let mut cursor = self.head.get();
666        while let Some(ptr) = cursor {
667            let header = unsafe { &(*ptr.as_ptr()).header };
668            header.color.set(Color::White);
669            cursor = header.next.get();
670        }
671        let mut marker = Marker::new();
672        roots.trace(&mut marker);
673        marker.drain_gray_queue();
674        post_mark(&mut marker);
675        marker.drain_gray_queue();
676    }
677
678    /// Stop-the-world full collect with a post-mark hook.
679    ///
680    /// Internally drives the incremental state machine to completion with
681    /// an unbounded budget — equivalent to repeatedly calling
682    /// [`incremental_step_with_post_mark`] until it returns `Paused`. The
683    /// post-mark hook is invoked exactly once, during the atomic transition.
684    pub fn full_collect_with_post_mark<F: FnMut(&mut Marker)>(
685        &self,
686        roots: &dyn Trace,
687        mut post_mark: F,
688    ) {
689        if self.paused.get() {
690            return;
691        }
692        if !self.state.get().is_pause() {
693            self.abort_cycle();
694        }
695        let unlimited = StepBudget { remaining_work: isize::MAX, max_credit: isize::MAX };
696        loop {
697            let outcome = self.incremental_step_with_post_mark(roots, unlimited, &mut post_mark);
698            if matches!(outcome, StepOutcome::Paused | StepOutcome::SkippedStopped) {
699                break;
700            }
701        }
702    }
703
704    /// Run one budgeted step of the incremental collector.
705    ///
706    /// The state machine advances `Pause → Propagate → Atomic → Sweep →
707    /// Finalize → Pause`. Each phase consumes budget; the call returns when
708    /// the budget runs out or the cycle reaches `Pause`. The `post_mark`
709    /// hook is invoked exactly once per cycle, during the `Atomic`
710    /// transition (after the initial gray-queue drain, before sweep starts).
711    ///
712    /// Returns:
713    /// - [`StepOutcome::Paused`] — the cycle completed.
714    /// - [`StepOutcome::InProgress`] — budget exhausted before the cycle
715    ///   finished; caller may step again.
716    /// - [`StepOutcome::SkippedStopped`] — heap is paused; nothing happened.
717    pub fn incremental_step_with_post_mark<F: FnMut(&mut Marker)>(
718        &self,
719        roots: &dyn Trace,
720        mut budget: StepBudget,
721        mut post_mark: F,
722    ) -> StepOutcome {
723        if self.paused.get() {
724            return StepOutcome::SkippedStopped;
725        }
726        self.run_budgeted(roots, &mut budget, &mut post_mark);
727        if self.state.get().is_pause() {
728            StepOutcome::Paused
729        } else {
730            StepOutcome::InProgress
731        }
732    }
733
734    fn run_budgeted(
735        &self,
736        roots: &dyn Trace,
737        budget: &mut StepBudget,
738        post_mark: &mut dyn FnMut(&mut Marker),
739    ) -> bool {
740        let mut did_work = false;
741        loop {
742            if budget.remaining_work <= -budget.max_credit {
743                return did_work;
744            }
745            match self.state.get() {
746                GcState::Pause => {
747                    self.start_cycle(roots);
748                    self.state.set(GcState::Propagate);
749                    budget.remaining_work -= 1;
750                    did_work = true;
751                }
752                GcState::Propagate => {
753                    let work = self.drain_gray_budgeted(budget.remaining_work.max(1));
754                    budget.remaining_work -= work as isize;
755                    did_work = did_work || work > 0;
756                    let empty = {
757                        let m = self.marker.borrow();
758                        m.as_ref().map(|m| m.gray_queue.is_empty()).unwrap_or(true)
759                    };
760                    if empty {
761                        self.state.set(GcState::Atomic);
762                    } else if budget.remaining_work <= 0 {
763                        return did_work;
764                    }
765                }
766                GcState::Atomic => {
767                    self.run_atomic(post_mark);
768                    self.state.set(GcState::Sweep);
769                    budget.remaining_work -= 1;
770                    did_work = true;
771                }
772                GcState::Sweep => {
773                    let work = self.sweep_budgeted(budget.remaining_work.max(1));
774                    budget.remaining_work -= work as isize;
775                    did_work = did_work || work > 0;
776                    if self.sweep_prev_next.get().is_none() {
777                        self.state.set(GcState::Finalize);
778                    } else if budget.remaining_work <= 0 {
779                        return did_work;
780                    }
781                }
782                GcState::Finalize => {
783                    self.finish_cycle();
784                    self.state.set(GcState::Pause);
785                    return did_work;
786                }
787            }
788        }
789    }
790
791    fn start_cycle(&self, roots: &dyn Trace) {
792        let mut cursor = self.head.get();
793        while let Some(ptr) = cursor {
794            let header = unsafe { &(*ptr.as_ptr()).header };
795            header.color.set(Color::White);
796            cursor = header.next.get();
797        }
798        let mut marker = Marker::new();
799        roots.trace(&mut marker);
800        *self.marker.borrow_mut() = Some(marker);
801        self.sweep_prev_next.set(None);
802    }
803
804    fn drain_gray_budgeted(&self, max_units: isize) -> usize {
805        let mut m_opt = self.marker.borrow_mut();
806        let marker = match m_opt.as_mut() {
807            Some(m) => m,
808            None => return 0,
809        };
810        let mut work = 0usize;
811        let mut budget = max_units;
812        while budget > 0 {
813            let next = match marker.gray_queue.pop() {
814                Some(p) => p,
815                None => break,
816            };
817            unsafe {
818                let bx = next.as_ref();
819                bx.header.color.set(Color::Black);
820                bx.value.trace(marker);
821            }
822            work += 1;
823            budget -= 1;
824        }
825        work
826    }
827
828    fn run_atomic(&self, post_mark: &mut dyn FnMut(&mut Marker)) {
829        let mut m_opt = self.marker.borrow_mut();
830        if let Some(marker) = m_opt.as_mut() {
831            marker.drain_gray_queue();
832            post_mark(marker);
833            marker.drain_gray_queue();
834        }
835        self.sweep_prev_next.set(Some(NonNull::from(&self.head)));
836    }
837
838    fn sweep_budgeted(&self, max_units: isize) -> usize {
839        let mut work = 0usize;
840        let mut budget = max_units;
841        let mut freed_bytes = 0usize;
842        let mut prev_next_ptr = match self.sweep_prev_next.get() {
843            Some(p) => p,
844            None => return 0,
845        };
846        while budget > 0 {
847            let prev_cell = unsafe { prev_next_ptr.as_ref() };
848            let cursor = prev_cell.get();
849            let ptr = match cursor {
850                Some(p) => p,
851                None => {
852                    self.sweep_prev_next.set(None);
853                    break;
854                }
855            };
856            let header = unsafe { &(*ptr.as_ptr()).header };
857            let next = header.next.get();
858            match header.color.get() {
859                Color::White => {
860                    prev_cell.set(next);
861                    freed_bytes += header.size;
862                    unsafe {
863                        let _ = Box::from_raw(ptr.as_ptr());
864                    }
865                }
866                Color::Black | Color::Gray => {
867                    header.color.set(Color::White);
868                    prev_next_ptr = unsafe {
869                        NonNull::from(&(*ptr.as_ptr()).header.next)
870                    };
871                    self.sweep_prev_next.set(Some(prev_next_ptr));
872                }
873            }
874            work += 1;
875            budget -= 1;
876        }
877        if freed_bytes > 0 {
878            self.bytes.set(self.bytes.get().saturating_sub(freed_bytes));
879        }
880        work
881    }
882
883    fn finish_cycle(&self) {
884        *self.marker.borrow_mut() = None;
885        self.sweep_prev_next.set(None);
886        self.threshold
887            .set(self.bytes.get() * self.pause_multiplier.get() / 100);
888        self.collections.set(self.collections.get() + 1);
889    }
890
891    fn abort_cycle(&self) {
892        if !self.state.get().is_pause() {
893            *self.marker.borrow_mut() = None;
894            self.sweep_prev_next.set(None);
895            self.state.set(GcState::Pause);
896        }
897    }
898
899    /// Returns the current state of the incremental collector.
900    pub fn gc_state(&self) -> GcState {
901        self.state.get()
902    }
903
904    /// Approximate number of live GC boxes, computed by walking the allgc
905    /// chain. Linear in heap size; used for cycle-cost estimation and tests.
906    pub fn allgc_count(&self) -> usize {
907        let mut count = 0usize;
908        let mut cursor = self.head.get();
909        while let Some(ptr) = cursor {
910            count += 1;
911            let header = unsafe { &(*ptr.as_ptr()).header };
912            cursor = header.next.get();
913        }
914        count
915    }
916
917    /// Drop every allocation, ignoring reachability. Called at shutdown.
918    /// After this returns, every outstanding `Gc<T>` is dangling — callers
919    /// must ensure no `Gc<T>` outlives the `Heap`.
920    pub fn drop_all(&self) {
921        *self.marker.borrow_mut() = None;
922        self.sweep_prev_next.set(None);
923        self.state.set(GcState::Pause);
924        let mut cursor = self.head.get();
925        self.head.set(None);
926        while let Some(ptr) = cursor {
927            // SAFETY: same chain invariant as full_collect's sweep.
928            let next = unsafe { (*ptr.as_ptr()).header.next.get() };
929            unsafe {
930                let _ = Box::from_raw(ptr.as_ptr());
931            }
932            cursor = next;
933        }
934        self.bytes.set(0);
935    }
936}
937
938impl Drop for Heap {
939    fn drop(&mut self) {
940        self.drop_all();
941    }
942}
943
944// ──────────────────────────────────────────────────────────────────────────
945// Tests — confirm the skeleton's invariants before agents ever touch it.
946// ──────────────────────────────────────────────────────────────────────────
947
948#[cfg(test)]
949mod tests {
950    use super::*;
951
952    /// A tiny GC-tracked type for the smoke test.
953    struct Cell0 {
954        next: Cell<Option<Gc<Cell0>>>,
955        marker_calls: Cell<usize>,
956    }
957
958    impl Trace for Cell0 {
959        fn trace(&self, m: &mut Marker) {
960            self.marker_calls.set(self.marker_calls.get() + 1);
961            if let Some(n) = self.next.get() {
962                m.mark(n);
963            }
964        }
965    }
966
967    /// Roots for tests: just a single Gc<Cell0>, or none.
968    struct OneRoot(Option<Gc<Cell0>>);
969    impl Trace for OneRoot {
970        fn trace(&self, m: &mut Marker) {
971            if let Some(g) = self.0 {
972                m.mark(g);
973            }
974        }
975    }
976
977    #[test]
978    fn alloc_and_drop_all() {
979        let heap = Heap::new();
980        heap.unpause();
981        let _a = heap.allocate(Cell0 {
982            next: Cell::new(None),
983            marker_calls: Cell::new(0),
984        });
985        let _b = heap.allocate(Cell0 {
986            next: Cell::new(None),
987            marker_calls: Cell::new(0),
988        });
989        assert!(heap.bytes_used() > 0);
990        heap.drop_all();
991        assert_eq!(heap.bytes_used(), 0);
992    }
993
994    #[test]
995    fn collect_unreachable_frees_bytes() {
996        let heap = Heap::new();
997        heap.unpause();
998        let _a = heap.allocate(Cell0 {
999            next: Cell::new(None),
1000            marker_calls: Cell::new(0),
1001        });
1002        let bytes_before = heap.bytes_used();
1003        assert!(bytes_before > 0);
1004        // No roots — everything should sweep.
1005        heap.full_collect(&OneRoot(None));
1006        assert_eq!(heap.bytes_used(), 0);
1007        assert_eq!(heap.collections(), 1);
1008    }
1009
1010    #[test]
1011    fn collect_keeps_reachable() {
1012        let heap = Heap::new();
1013        heap.unpause();
1014        let root = heap.allocate(Cell0 {
1015            next: Cell::new(None),
1016            marker_calls: Cell::new(0),
1017        });
1018        let bytes_before = heap.bytes_used();
1019        heap.full_collect(&OneRoot(Some(root)));
1020        assert_eq!(heap.bytes_used(), bytes_before);
1021        assert_eq!(root.marker_calls.get(), 1);
1022    }
1023
1024    #[test]
1025    fn collect_traverses_cycles() {
1026        let heap = Heap::new();
1027        heap.unpause();
1028        let a = heap.allocate(Cell0 {
1029            next: Cell::new(None),
1030            marker_calls: Cell::new(0),
1031        });
1032        let b = heap.allocate(Cell0 {
1033            next: Cell::new(Some(a)),
1034            marker_calls: Cell::new(0),
1035        });
1036        a.next.set(Some(b)); // cycle
1037        // With a as root, both should survive.
1038        heap.full_collect(&OneRoot(Some(a)));
1039        assert_eq!(a.marker_calls.get(), 1);
1040        assert_eq!(b.marker_calls.get(), 1);
1041        // Drop the only root path; cycle should now be collected.
1042        // (Note: `a` and `b` are still on the stack as Gc<Cell0> handles, but
1043        // they're not in a Trace-visible position.)
1044        heap.full_collect(&OneRoot(None));
1045        assert_eq!(heap.bytes_used(), 0);
1046    }
1047
1048    #[test]
1049    fn heap_guard_stacks() {
1050        assert!(with_current_heap(|heap| heap.is_none()), "no guard initially");
1051        let h1 = Heap::new();
1052        h1.unpause();
1053        {
1054            let _g1 = HeapGuard::push(&h1);
1055            assert!(with_current_heap(|heap| heap.is_some()));
1056            let h2 = Heap::new();
1057            h2.unpause();
1058            {
1059                let _g2 = HeapGuard::push(&h2);
1060                // top of stack is h2
1061                with_current_heap(|heap| {
1062                    assert!(std::ptr::addr_eq(
1063                        heap.unwrap() as *const _,
1064                        &h2 as *const _,
1065                    ));
1066                });
1067            }
1068            // _g2 dropped — top is back to h1
1069            with_current_heap(|heap| {
1070                assert!(std::ptr::addr_eq(
1071                    heap.unwrap() as *const _,
1072                    &h1 as *const _,
1073                ));
1074            });
1075        }
1076        assert!(
1077            with_current_heap(|heap| heap.is_none()),
1078            "stack empty after all guards drop"
1079        );
1080    }
1081
1082    #[test]
1083    fn step_threshold_triggers_collect() {
1084        let heap = Heap::new();
1085        heap.unpause();
1086        // Allocate enough boxes to exceed the default 64KB threshold.
1087        let mut keeps = Vec::new();
1088        for _ in 0..64 {
1089            // ~1KB per box (Cell0 is small, but allocating many headers
1090            // accumulates). For the threshold test we'd typically allocate
1091            // larger objects; this is a smoke test.
1092            keeps.push(heap.allocate(Cell0 {
1093                next: Cell::new(None),
1094                marker_calls: Cell::new(0),
1095            }));
1096        }
1097        // Build roots that retain all of keeps via individual marks.
1098        struct ManyRoots<'a>(&'a [Gc<Cell0>]);
1099        impl<'a> Trace for ManyRoots<'a> {
1100            fn trace(&self, m: &mut Marker) {
1101                for g in self.0.iter() {
1102                    m.mark(*g);
1103                }
1104            }
1105        }
1106        heap.step(&ManyRoots(&keeps));
1107        // step is a no-op below threshold; all roots retained.
1108        assert!(heap.bytes_used() > 0);
1109    }
1110
1111    fn build_chain(heap: &Heap, len: usize) -> Gc<Cell0> {
1112        let head = heap.allocate(Cell0 {
1113            next: Cell::new(None),
1114            marker_calls: Cell::new(0),
1115        });
1116        let mut cur = head;
1117        for _ in 1..len {
1118            let n = heap.allocate(Cell0 {
1119                next: Cell::new(None),
1120                marker_calls: Cell::new(0),
1121            });
1122            cur.next.set(Some(n));
1123            cur = n;
1124        }
1125        head
1126    }
1127
1128    #[test]
1129    fn budget_zero_does_some_work() {
1130        let heap = Heap::new();
1131        heap.unpause();
1132        let head = build_chain(&heap, 8);
1133        let roots = OneRoot(Some(head));
1134        let budget = StepBudget::from_work(0);
1135        let outcome = heap.incremental_step_with_post_mark(&roots, budget, |_| {});
1136        assert_ne!(outcome, StepOutcome::SkippedStopped);
1137        assert_ne!(heap.gc_state(), GcState::Pause);
1138    }
1139
1140    #[test]
1141    fn larger_budget_drains_more_gray_than_smaller() {
1142        let small_heap = Heap::new();
1143        small_heap.unpause();
1144        let h1 = build_chain(&small_heap, 64);
1145        let r1 = OneRoot(Some(h1));
1146        let mut small_calls = 0;
1147        loop {
1148            small_calls += 1;
1149            let outcome = small_heap.incremental_step_with_post_mark(
1150                &r1,
1151                StepBudget::from_work(2),
1152                |_| {},
1153            );
1154            if outcome == StepOutcome::Paused {
1155                break;
1156            }
1157            assert!(small_calls < 10_000, "did not converge");
1158        }
1159
1160        let big_heap = Heap::new();
1161        big_heap.unpause();
1162        let h2 = build_chain(&big_heap, 64);
1163        let r2 = OneRoot(Some(h2));
1164        let mut big_calls = 0;
1165        loop {
1166            big_calls += 1;
1167            let outcome = big_heap.incremental_step_with_post_mark(
1168                &r2,
1169                StepBudget::from_work(64),
1170                |_| {},
1171            );
1172            if outcome == StepOutcome::Paused {
1173                break;
1174            }
1175            assert!(big_calls < 10_000, "did not converge");
1176        }
1177
1178        assert!(
1179            big_calls < small_calls,
1180            "expected big_calls={} < small_calls={}",
1181            big_calls,
1182            small_calls
1183        );
1184    }
1185
1186    #[test]
1187    fn sweep_can_pause_and_resume() {
1188        let heap = Heap::new();
1189        heap.unpause();
1190        for _ in 0..16 {
1191            let _ = heap.allocate(Cell0 {
1192                next: Cell::new(None),
1193                marker_calls: Cell::new(0),
1194            });
1195        }
1196        let roots = OneRoot(None);
1197        let bytes_before = heap.bytes_used();
1198        assert!(bytes_before > 0);
1199        let mut step_count = 0;
1200        let mut saw_in_progress_during_sweep = false;
1201        loop {
1202            step_count += 1;
1203            let outcome = heap.incremental_step_with_post_mark(
1204                &roots,
1205                StepBudget::from_work(2),
1206                |_| {},
1207            );
1208            if heap.gc_state() == GcState::Sweep && outcome == StepOutcome::InProgress {
1209                saw_in_progress_during_sweep = true;
1210            }
1211            if outcome == StepOutcome::Paused {
1212                break;
1213            }
1214            assert!(step_count < 10_000, "did not converge");
1215        }
1216        assert!(saw_in_progress_during_sweep, "sweep never paused mid-list");
1217        assert_eq!(heap.bytes_used(), 0);
1218    }
1219
1220    #[test]
1221    fn post_mark_runs_once_per_atomic() {
1222        let heap = Heap::new();
1223        heap.unpause();
1224        for _ in 0..32 {
1225            let _ = heap.allocate(Cell0 {
1226                next: Cell::new(None),
1227                marker_calls: Cell::new(0),
1228            });
1229        }
1230        let roots = OneRoot(None);
1231        let call_count = std::cell::Cell::new(0);
1232        let mut step_count = 0;
1233        loop {
1234            step_count += 1;
1235            let outcome = heap.incremental_step_with_post_mark(
1236                &roots,
1237                StepBudget::from_work(2),
1238                |_| {
1239                    call_count.set(call_count.get() + 1);
1240                },
1241            );
1242            if outcome == StepOutcome::Paused {
1243                break;
1244            }
1245            assert!(step_count < 10_000, "did not converge");
1246        }
1247        assert_eq!(call_count.get(), 1, "post_mark must run exactly once per cycle");
1248    }
1249
1250    #[test]
1251    fn full_collect_equivalent_to_incremental_to_pause() {
1252        let h1 = Heap::new();
1253        h1.unpause();
1254        let head1 = h1.allocate(Cell0 {
1255            next: Cell::new(None),
1256            marker_calls: Cell::new(0),
1257        });
1258        let _orphan1 = h1.allocate(Cell0 {
1259            next: Cell::new(None),
1260            marker_calls: Cell::new(0),
1261        });
1262        let _orphan2 = h1.allocate(Cell0 {
1263            next: Cell::new(None),
1264            marker_calls: Cell::new(0),
1265        });
1266        let roots1 = OneRoot(Some(head1));
1267        h1.full_collect(&roots1);
1268        let bytes_full = h1.bytes_used();
1269
1270        let h2 = Heap::new();
1271        h2.unpause();
1272        let head2 = h2.allocate(Cell0 {
1273            next: Cell::new(None),
1274            marker_calls: Cell::new(0),
1275        });
1276        let _orphan3 = h2.allocate(Cell0 {
1277            next: Cell::new(None),
1278            marker_calls: Cell::new(0),
1279        });
1280        let _orphan4 = h2.allocate(Cell0 {
1281            next: Cell::new(None),
1282            marker_calls: Cell::new(0),
1283        });
1284        let roots2 = OneRoot(Some(head2));
1285        loop {
1286            let outcome = h2.incremental_step_with_post_mark(
1287                &roots2,
1288                StepBudget::from_work(1),
1289                |_| {},
1290            );
1291            if outcome == StepOutcome::Paused {
1292                break;
1293            }
1294        }
1295        assert_eq!(h2.bytes_used(), bytes_full);
1296    }
1297}
1298
1299// ──────────────────────────────────────────────────────────────────────────────
1300// PORT STATUS
1301//   source:        production Rust heap/collector substrate
1302//   target_crate:  lua-gc
1303//   confidence:    medium
1304//   todos:         0
1305//   port_notes:    0
1306//   unsafe_blocks: 13
1307//   notes:         Mark-and-sweep collector heap + the Gc<T> raw-pointer substrate. Uses
1308//                  NonNull<GcBox<T>> with linked-list allgc walking; unsafe is
1309//                  required for raw pointer ops and Box::into_raw/from_raw. See
1310//                  unsafe-budgets.toml for the per-crate ceiling.
1311// ──────────────────────────────────────────────────────────────────────────────