Skip to main content

luna_core/vm/
exec.rs

1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16    AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17    MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48    /// The GC heap owned by this VM. Embedders normally interact via the
49    /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50    /// the heap directly.
51    pub heap: Heap,
52    stack: Vec<Value>,
53    frames: Vec<CallFrame>,
54    /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55    /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56    /// helpers (debug-asserted on use). NOT consumed by readers yet;
57    /// week 1 is pure scaffold. Week 2-N migrations replace readers
58    /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59    /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60    frames_top: u32,
61    /// open upvalues, sorted ascending by stack slot
62    open_upvals: Vec<(u32, Gc<Upvalue>)>,
63    /// to-be-closed slots, ascending
64    tbc: Vec<u32>,
65    /// logical stack top for multi-result sequences
66    pub(crate) top: u32,
67    globals: Gc<Table>,
68    /// shared metatable for all strings (populated by the string lib, P04)
69    /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70    /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71    /// own. Settable via debug.setmetatable.
72    type_mt: [Option<Gc<Table>>; 5],
73    /// pre-interned metamethod event names, indexed by `Mm`
74    mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75    /// native↔Lua nesting depth (PUC C-stack guard analogue)
76    c_depth: u32,
77    /// number of live pcall/xpcall continuation frames on the running thread
78    /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79    /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80    /// with the coroutine context, since continuations survive a yield.
81    pcall_depth: u32,
82    /// number of non-yieldable C calls in flight on the running thread (PUC's
83    /// `L->nny`). A library callback that runs via synchronous Rust recursion
84    /// (sort comparator, gsub replacement) cannot be continued across a yield,
85    /// so it bumps this for its duration; `coroutine.yield` inside hits the
86    /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87    /// never cross such a call), so it needs no per-thread save/restore.
88    nny: u32,
89    /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90    /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91    /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92    /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93    /// loop)` then matches. PUC tracks this via the soft-cap window
94    /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95    /// scope explicitly.
96    msgh_depth: u32,
97    /// set by a coroutine closing itself (`coroutine.close()` on the running
98    /// thread): the to-be-closed handlers have already run; the thread must now
99    /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100    /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101    /// unwind, so a protecting pcall cannot catch it) the termination.
102    terminating: Option<Option<Value>>,
103    /// xoshiro256** state (math.random)
104    rng: [u64; 4],
105    /// VM creation time (os.clock)
106    started: std::time::Instant,
107    version: LuaVersion,
108    /// error object being threaded through a chain of __close handlers; a GC
109    /// root for the duration (a handler may trigger collection)
110    closing_err: Option<Value>,
111    /// the coroutine whose context is currently live in the fields above;
112    /// `None` while the main thread runs (P05)
113    current: Option<Gc<crate::runtime::Coro>>,
114    /// the main thread's saved execution context while a coroutine runs
115    main_ctx: Option<SavedCtx>,
116    /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117    /// values plus the slot/result-count needed to finish the yielding call on
118    /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119    yielding: Option<(Vec<Value>, u32, i32)>,
120    /// results expected by the in-flight native call (so `yield` knows how many
121    /// values its call site wants when it suspends)
122    native_nresults: i32,
123    /// identity object for the main thread, returned by `coroutine.running`
124    /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125    main_coro: Option<Gc<Coro>>,
126    /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127    /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128    /// switches report the previous one, as PUC does.
129    gc_mode: &'static str,
130    /// the live-register boundary of the running thread for GC rooting (PUC's
131    /// `L->top`): set precisely at each GC safe point so freed temporary
132    /// registers above it are not rooted. Without this the collector roots the
133    /// whole stack window, pinning weak-table values stranded in stale temps
134    /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135    pub(crate) gc_top: u32,
136    /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137    /// is still stop-the-world, so these are stored/returned for API fidelity
138    /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139    /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140    gc_pause: i64,
141    gc_stepmul: i64,
142    gc_stepsize: i64,
143    /// true while `__gc` finalizers are being run, so a finalizer that calls
144    /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145    /// `collectgarbage` yields fail).
146    gc_finalizing: bool,
147    /// C ABI scratch (`capi` module): the host-visible value stack that C
148    /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149    /// Kept here (instead of in a separate `LuaState` wrapper) so the
150    /// trampoline that bridges to a `LuaCFunction` can safely cast the
151    /// Vm pointer it already holds to the public `*mut LuaState` type
152    /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153    pub capi_stack: Vec<crate::runtime::Value>,
154    /// Pinned CString backing the pointer last returned by `lua_tostring`;
155    /// valid until the next `lua_tostring` on the same Vm.
156    pub capi_cstr_pin: Option<std::ffi::CString>,
157    /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158    /// concatenate across continuation calls until a non-`tocont` call
159    /// flushes; the default warnf recognises `@on`/`@off` control messages
160    /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161    /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162    /// keep the older raise semantics).
163    pub(crate) warn_state: WarnState,
164    pub(crate) warn_buf: Vec<u8>,
165    /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166    /// loop decrements once per dispatch turn. When it hits zero the loop
167    /// raises a catchable "instruction budget exceeded" error so the embedder
168    /// can yield control back to its caller (short-script eval, game
169    /// frame budgets). `None` = unbounded; reset on each call via
170    /// `set_instr_budget`.
171    pub(crate) instr_budget: Option<i64>,
172    // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173    // `self.jit` below + `crate::vm::jit_state` for field docs.
174    // (Was: jit_enabled here.)
175    // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176    // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177    // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178    // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179    // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180    // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181    // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182    // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183    // materialize_emit,closure_emit}_count — all moved to JitState.
184    /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185    /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186    /// precompiled chunks (which bypass the parser's depth / opcode
187    /// limits). When `false`, the loader rejects any source whose first
188    /// byte is the bytecode signature `\27` ("`\27Lua`").
189    pub(crate) bytecode_loading: bool,
190    /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191    /// a strictly larger trust surface than luna's own dump format
192    /// (third-party toolchain bugs, malformed chunks, unknown opcode
193    /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194    /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195    /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196    /// Embedder toggles via `set_puc_bytecode_loading`.
197    pub(crate) puc_bytecode_loading: bool,
198    /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199    /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200    /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201    /// source exceeds this, the loader returns the PUC-shaped
202    /// `not enough memory` error before the host allocator is asked to
203    /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204    /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205    /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206    /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207    /// Embedders that genuinely need to load > 256 MiB sources widen the
208    /// cap via [`Vm::set_loader_input_budget`].
209    pub(crate) loader_input_budget: usize,
210    /// In-process log of fully-emitted warnings (each entry = one flushed
211    /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212    /// tests assert what was warned without scraping stderr.
213    pub(crate) warn_log: Vec<Vec<u8>>,
214    /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215    /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216    /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217    /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218    /// shape stub for db.lua :328; if other registry-keyed APIs land later
219    /// they can share this table.
220    pub(crate) registry: Option<Gc<Table>>,
221    /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222    /// registry entry); attached to every file userdata the io library makes
223    pub(crate) file_mt: Option<Gc<Table>>,
224    /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225    pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226    pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227    /// the running thread's debug hook state (`debug.sethook`); per-thread,
228    /// swapped with the execution context on a coroutine resume/yield
229    pub(crate) hook: HookState,
230    /// true while the hook itself runs, so its own execution fires no events
231    /// (PUC clears the mask for the duration)
232    pub(crate) in_hook: bool,
233    /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234    /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235    /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236    /// accumulate the count instead of capping at 1.
237    pub(crate) pending_tailcalls: u32,
238    /// Name of the C native that just propagated an error (captured before
239    /// the native is popped from `running_natives`). Lets a dying coroutine
240    /// preserve `[C]: in function '<name>'` at the top of its traceback
241    /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242    /// luna's native frames are off-stack so we stash the name explicitly.
243    pub(crate) errored_native: Option<String>,
244    /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245    /// (relative to the activation's func slot) and the number transferred.
246    /// Set just before firing a call/return hook, read by `getinfo("r")`.
247    pub(crate) hook_ftransfer: u16,
248    pub(crate) hook_ntransfer: u16,
249    /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250    /// pushed by `push_frame`; `close_slots` sets this before calling a
251    /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252    /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253    pending_tm: Option<&'static str>,
254    /// `true` when the next `push_frame` is the user hook function itself,
255    /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256    /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257    pending_is_hook: bool,
258    /// traceback snapshot taken at the error point (the first `unwind` entry
259    /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260    /// the failed frames are popped — can still see the error point's stack
261    /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262    /// stack intact; we approximate by snapshotting the string and letting
263    /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264    /// `call_value` entry (`public_call_depth == 0`).
265    pub(crate) error_traceback: Option<Vec<u8>>,
266    /// nesting depth of public `call_value` entries (host vs. internal). The
267    /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268    /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269    public_call_depth: u32,
270    /// stack of native (`Value::Native`) closures currently running on the
271    /// Rust call stack. `begin_call` pushes the closure before invoking
272    /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273    /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274    /// caller is C, not Lua) and qualify the running function's name via
275    /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276    pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277    /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278    /// the native's argument-window head and width, so `debug.getlocal`
279    /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280    pub(crate) running_native_slots: Vec<(u32, u32)>,
281    // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282    // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283    // trace_compiler — all moved to JitState. See `jit` below.
284    /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285    /// when `chunk_compiler` / `trace_compiler` are
286    /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287    ///
288    /// `#[doc(hidden)] pub` so the `luna` crate's
289    /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290    /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291    /// field). Not part of the embedder-facing API surface.
292    #[doc(hidden)]
293    pub jit: crate::vm::jit_state::JitState,
294
295    /// B12 host roots — append-only `Vec<Value>` traced as an extra
296    /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297    /// `LuaRoot`) hold indices into this vector so the underlying
298    /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299    ///
300    /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301    /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302    /// the trade-offs between `Drop` plumbing and append-only memory
303    /// growth have a richer ergonomics envelope to live in.
304    pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305    /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306    /// back if non-empty, else extends `host_roots`. Generation
307    /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308    pub(crate) host_roots_free: Vec<u32>,
309
310    /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311    /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312    /// callback). Each entry is one in-flight working buffer; `gc_roots`
313    /// extends with every contained `Value` so a `collectgarbage()`
314    /// inside the comparator cannot free strings/tables snapshotted
315    /// here. Nested sorts push a new buffer on entry, pop on exit
316    /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317    /// regression).
318    pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320    /// v1.3 Phase ML — MacroLua compile-time macro registry.
321    /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322    /// `@if` / `@gensym`) at construction time when `version ==
323    /// LuaVersion::MacroLua`; embedders register custom macros via
324    /// [`Vm::define_macro`]. The expander runs once per `load()` call
325    /// between lexing and parsing (only when `is_macro_lua()`).
326    pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328    /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329    /// by `TypeId::of::<T>()` for embedder types implementing
330    /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331    /// [`Vm::register_userdata`]; metatables are pinned via
332    /// [`Vm::pin_host`] at registration time so the entry's
333    /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334    pub(crate) userdata_metatables:
335        std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337    /// B6 — classification of the most recent error raised on this Vm.
338    /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339    /// at well-known sites (syntax errors, instr-budget trips, native
340    /// callback errors, type errors).
341    pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343    /// B6 — `(source_name, line)` of the most recent error. Set by the
344    /// dispatcher / lexer / parser; cleared when a new call_value
345    /// enters cleanly.
346    pub(crate) last_error_source: Option<(String, u32)>,
347
348    /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349    /// the dispatcher hot loop yields cooperatively (sets
350    /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351    /// to `EvalFuture::poll`) instead of returning a real
352    /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353    /// for the duration of the future; restored to `false` on
354    /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355    /// leave it `false` so v1.0 behavior is preserved exactly.
356    pub(crate) async_mode: bool,
357
358    /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359    /// before driving a slice. The dispatcher itself does not call it
360    /// (the future's poll loop does `wake_by_ref` after observing
361    /// `BudgetExhausted`), but storing the waker keeps the door open
362    /// for Stage 2 async natives to wake the host directly from a
363    /// helper future.
364    pub(crate) async_waker: Option<std::task::Waker>,
365
366    /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367    /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368    /// Default 10_000 (RFC §D5). Tunable via
369    /// [`Vm::set_async_slice`].
370    pub(crate) async_slice_size: i64,
371
372    /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373    /// budget exhaustion fires; checked by `exec_with` (so the
374    /// sentinel propagates without `unwind` running, mirroring
375    /// `yielding.is_some()`) and by `call_value_impl` (so the call
376    /// frames survive for the next poll). Cleared by `drive_one`
377    /// after translating it to `DispatchOutcome::BudgetExhausted`.
378    pub(crate) host_yield_pending: bool,
379
380    /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381    /// when an async-marked [`NativeClosure`] is invoked under
382    /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383    /// mechanism as `host_yield_pending` — see `exec_with` +
384    /// `call_value_impl`), stashes the in-flight future +
385    /// post-completion context here, and surfaces them to
386    /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387    /// once the future is moved out into a
388    /// `DispatchOutcome::AsyncNativeAwaiting`.
389    pub(crate) pending_async_native_fut:
390        Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392    /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393    /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394    /// commit the future's eventual `Ok(nret)` back into the calling
395    /// frame's expected result slots. Recorded by the dispatcher;
396    /// consumed by [`Vm::commit_async_native_result`] after the
397    /// future resolves.
398    pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411    pub func_slot: u32,
412    /// Recorded for parity with the sync native-call path's
413    /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414    /// hook firing + traceback shaping. Not yet read in Stage 2.
415    #[allow(dead_code)]
416    pub nargs: u32,
417    pub nresults: i32,
418    /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419    /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420    /// unread today; carried so an Stage 3 audit can confirm the
421    /// pre-suspend root window matches the post-resume one.
422    #[allow(dead_code)]
423    pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430    /// the hook function (`None` when no hook is installed)
431    pub func: Option<Value>,
432    /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433    /// (Rust first); both can be installed simultaneously, but most
434    /// embedders pick one.
435    pub rust_func: Option<RustDebugHook>,
436    /// LUA_MASKCALL — fire on function entry
437    pub call: bool,
438    /// LUA_MASKRET — fire on function return
439    pub ret: bool,
440    /// LUA_MASKLINE — fire on source-line change
441    pub line: bool,
442    /// LUA_MASKCOUNT — fire every `count_base` instructions
443    pub count: bool,
444    /// instruction count between count events (PUC basehookcount)
445    pub count_base: i64,
446    /// instructions left until the next count event (PUC hookcount)
447    pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459    /// Function entry (`hook_call` analogue).
460    Call,
461    /// Function return (`hook_return` analogue).
462    Return,
463    /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464    TailCall,
465    /// Source-line change (the `u32` is the 1-based line number).
466    Line(u32),
467    /// Instruction count event (fires every `count_base` instructions).
468    Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483    stack: Vec<Value>,
484    frames: Vec<CallFrame>,
485    open_upvals: Vec<(u32, Gc<Upvalue>)>,
486    tbc: Vec<u32>,
487    top: u32,
488    pcall_depth: u32,
489    hook: HookState,
490    /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491    /// the rest of the suspended state so each thread can keep its own
492    /// `setfenv(0, env)` rewire without the swap leaking into another
493    /// thread (5.1 closure.lua :177).
494    globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499    /// caught by a pcall/xpcall continuation; resume running its caller
500    Caught,
501    /// caught by a continuation that was the entry-level activation; these are
502    /// the call's (wrapped) results
503    CaughtReturn(Vec<Value>),
504    /// no protecting continuation up to `entry_depth`; propagate the error
505    Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511    Lua(usize),
512    /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513    /// used to name the native via its invoking call instruction.
514    C(usize),
515    /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516    /// caller's activation, so `debug.getinfo(level)` at this slot returns
517    /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518    /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519    /// both shapes). The index points at the *tail-called* frame whose
520    /// `is_tail` flag induced this synthetic level.
521    Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528    /// index → the looked-up value; newindex → done (raw set performed);
529    /// comparison → the boolean result already known
530    Done(Value),
531    /// a metamethod to call; `recv` is the chain element it was found on (the
532    /// extra args — key / value — are supplied by the caller)
533    Mm { func: Value, recv: Value },
534    /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535    /// carries `__le` — `op_compare` swaps the args and negates the result.
536    /// Lives separate from `Mm` so the synth path can stay yieldable without
537    /// every other Mm caller learning a swap flag they would never set.
538    CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545    Index,
546    NewIndex,
547    Call,
548    ToString,
549    Metatable,
550    Name,
551    Eq,
552    Lt,
553    Le,
554    Concat,
555    Len,
556    Add,
557    Sub,
558    Mul,
559    Div,
560    Mod,
561    Pow,
562    IDiv,
563    BAnd,
564    BOr,
565    BXor,
566    Shl,
567    Shr,
568    Unm,
569    BNot,
570    Close,
571    Gc,
572    Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576    "__index",
577    "__newindex",
578    "__call",
579    "__tostring",
580    "__metatable",
581    "__name",
582    "__eq",
583    "__lt",
584    "__le",
585    "__concat",
586    "__len",
587    "__add",
588    "__sub",
589    "__mul",
590    "__div",
591    "__mod",
592    "__pow",
593    "__idiv",
594    "__band",
595    "__bor",
596    "__bxor",
597    "__shl",
598    "__shr",
599    "__unm",
600    "__bnot",
601    "__close",
602    "__gc",
603    "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614    if version <= LuaVersion::Lua53 {
615        format!("__{tm}")
616    } else if tm == "gc" {
617        "__gc".to_string()
618    } else {
619        tm.to_string()
620    }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626    use crate::vm::isa::Op;
627    Some(match op {
628        Op::Add => "add",
629        Op::Sub => "sub",
630        Op::Mul => "mul",
631        Op::Div => "div",
632        Op::Mod => "mod",
633        Op::Pow => "pow",
634        Op::IDiv => "idiv",
635        Op::BAnd => "band",
636        Op::BOr => "bor",
637        Op::BXor => "bxor",
638        Op::Shl => "shl",
639        Op::Shr => "shr",
640        Op::Unm => "unm",
641        Op::BNot => "bnot",
642        Op::Concat => "concat",
643        Op::Len => "len",
644        Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645        Op::SetField | Op::SetTable | Op::SetI => "newindex",
646        Op::Eq | Op::EqK => "eq",
647        Op::Lt => "lt",
648        Op::Le => "le",
649        _ => return None,
650    })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678    /// `warn` calls are silently dropped (default after `warn("@off")`).
679    Off,
680    /// `warn` calls are delivered to stderr (after `warn("@on")`).
681    On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689    if let Some(s) = payload.downcast_ref::<String>() {
690        return s.clone();
691    }
692    if let Some(s) = payload.downcast_ref::<&'static str>() {
693        return (*s).to_string();
694    }
695    "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702    /// Parse or compile failure.
703    Syntax(SyntaxError),
704    /// Runtime error raised during execution.
705    Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709    fn from(e: SyntaxError) -> Error {
710        Error::Syntax(e)
711    }
712}
713
714impl From<LuaError> for Error {
715    fn from(e: LuaError) -> Error {
716        Error::Runtime(e)
717    }
718}
719
720impl Drop for Vm {
721    fn drop(&mut self) {
722        // state close: run `__gc` for every still-registered finalizable before
723        // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724        // single pass — objects created by a closing finalizer are not
725        // re-finalized (they go to the heap's free list directly).
726        self.heap.queue_all_finalizers();
727        self.run_finalizers();
728    }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742    frames.push(cf);
743    // Shadow maintenance is debug-only: release builds skip the
744    // increment + assertion entirely. The shadow's purpose in Week 1
745    // is to VERIFY the assumed invariant (frames_top == frames.len())
746    // across all push/pop sites; once Week 2+ migrates readers to
747    // consume the shadow, release will run the increment unconditionally.
748    #[cfg(debug_assertions)]
749    {
750        *frames_top += 1;
751        debug_assert_eq!(
752            *frames_top as usize,
753            frames.len(),
754            "P17-D frames_top out of sync after push",
755        );
756    }
757    #[cfg(not(debug_assertions))]
758    let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763    let r = frames.pop();
764    #[cfg(debug_assertions)]
765    {
766        if r.is_some() {
767            *frames_top = frames_top.saturating_sub(1);
768        }
769        debug_assert_eq!(
770            *frames_top as usize,
771            frames.len(),
772            "P17-D frames_top out of sync after pop",
773        );
774    }
775    #[cfg(not(debug_assertions))]
776    let _ = frames_top;
777    r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786    static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787    *PROBE_ON.get_or_init(|| {
788        std::env::var("LUNA_AOT_PROBE")
789            .ok()
790            .filter(|v| !v.is_empty())
791            .is_some()
792    })
793}
794
795impl Vm {
796    /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797    /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798    /// the Vec replacement to keep the shadow valid.
799    #[inline(always)]
800    fn frames_resync(&mut self) {
801        // Debug-only Week 1 — see `frames_push_sync` comment.
802        #[cfg(debug_assertions)]
803        {
804            self.frames_top = self.frames.len() as u32;
805        }
806    }
807
808    // ====================================================================
809    // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810    //
811    // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812    // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813    // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814    // sites to consume them. Phase 4 removes Vec<CallFrame>.
815    //
816    // Preconditions (debug-asserted):
817    // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818    // - self.stack.len() > base + max_stack (caller has grown stack)
819    // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820    // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821    //
822    // No release-build cost when unused (LTO strips dead methods).
823    // ====================================================================
824
825    /// Write a Lua frame's closure pointer into `stack[base-2]`.
826    /// The caller must ensure `base >= 2` and the slot is within the
827    /// stack's allocated range.
828    #[inline]
829    #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830    fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831        debug_assert!(
832            base >= 2,
833            "frame closure slot needs base >= 2; got {}",
834            base
835        );
836        let idx = (base - 2) as usize;
837        debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838        self.stack[idx] = Value::Closure(cl);
839    }
840
841    /// Read a Lua frame's closure pointer from `stack[base-2]`.
842    /// Returns `None` if the slot doesn't hold a closure (caller is
843    /// expected to treat that as a corrupt frame).
844    ///
845    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846    /// to avoid the enum-match cost on the hot path. Tag check via
847    /// 1-byte load + branch + `as_closure_unchecked` payload load.
848    #[inline]
849    #[allow(dead_code)]
850    fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851        debug_assert!(base >= 2);
852        let v = self.stack.get((base - 2) as usize)?;
853        if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854            // SAFETY: tag byte just verified == CLOSURE.
855            Some(unsafe { v.as_closure_unchecked() })
856        } else {
857            None
858        }
859    }
860
861    /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862    /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863    /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864    /// cleanly through the value stack without losing bits.
865    #[inline]
866    #[allow(dead_code)]
867    fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868        debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869        let idx = (base - 1) as usize;
870        debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871        self.stack[idx] = Value::Int(marker.to_raw());
872    }
873
874    /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875    /// `None` if the slot isn't a `Value::Int` (caller treats as a
876    /// corrupt frame); the kind tag itself may still be invalid, in
877    /// which case [`FrameMarker::kind`] returns `None` on the result.
878    ///
879    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880    /// for the tag check + `as_int_unchecked` for the payload load.
881    #[inline]
882    #[allow(dead_code)]
883    fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884        debug_assert!(base >= 1);
885        let v = self.stack.get((base - 1) as usize)?;
886        if v.tag_byte() == crate::runtime::value::tag::INT {
887            // SAFETY: tag byte just verified == INT.
888            Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889                unsafe { v.as_int_unchecked() },
890            ))
891        } else {
892            None
893        }
894    }
895
896    /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897    /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898    /// caller is responsible for the rest of the bring-up.
899    fn new_inner(version: LuaVersion) -> Vm {
900        let mut heap = Heap::new();
901        // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902        // values strongly. gc.lua's "weak tables" section relies on that.
903        heap.no_ephemeron = version <= LuaVersion::Lua51;
904        // PUC 5.3 needs two GC cycles to finalize a table caught in a
905        // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906        // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907        heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908        let globals = heap.new_table();
909        let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911        Vm {
912            heap,
913            stack: Vec::new(),
914            frames: Vec::new(),
915            frames_top: 0,
916            open_upvals: Vec::new(),
917            tbc: Vec::new(),
918            top: 0,
919            globals,
920            type_mt: [None; 5],
921            mm_names,
922            c_depth: 0,
923            pcall_depth: 0,
924            nny: 0,
925            msgh_depth: 0,
926            terminating: None,
927            rng: [0; 4],
928            started: std::time::Instant::now(),
929            version,
930            closing_err: None,
931            current: None,
932            main_ctx: None,
933            yielding: None,
934            native_nresults: -1,
935            main_coro: None,
936            gc_mode: "incremental",
937            gc_top: 0,
938            gc_pause: 200,
939            gc_stepmul: 100,
940            gc_stepsize: 13,
941            gc_finalizing: false,
942            capi_stack: Vec::new(),
943            capi_cstr_pin: None,
944            warn_state: WarnState::Off,
945            warn_buf: Vec::new(),
946            warn_log: Vec::new(),
947            instr_budget: None,
948            bytecode_loading: true,
949            puc_bytecode_loading: false,
950            loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951            registry: None,
952            file_mt: None,
953            io_input: None,
954            io_output: None,
955            hook: HookState::default(),
956            in_hook: false,
957            pending_tailcalls: 0,
958            errored_native: None,
959            hook_ftransfer: 0,
960            hook_ntransfer: 0,
961            pending_tm: None,
962            pending_is_hook: false,
963            error_traceback: None,
964            public_call_depth: 0,
965            running_natives: Vec::new(),
966            running_native_slots: Vec::new(),
967            // v1.1 A2 — JIT-specific state factored into `JitState`
968            // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969            // `install_jit_backend` / `luaL_newstate` swap in
970            // `CraneliftBackend` for callers that want JIT acceleration.
971            jit: crate::vm::jit_state::JitState::with_null_backend(),
972            // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973            host_roots: Vec::new(),
974            // v1.3 Phase ML — MacroLua registry. Pre-populated with
975            // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976            // when this Vm is constructed under `LuaVersion::MacroLua`.
977            macro_registry: if version == LuaVersion::MacroLua {
978                crate::frontend::macro_expander::MacroRegistry::with_builtins()
979            } else {
980                crate::frontend::macro_expander::MacroRegistry::new()
981            },
982            host_roots_free: Vec::new(),
983            sort_scratch: Vec::new(),
984            // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985            // metatable cache. Populated lazily by register_userdata.
986            userdata_metatables: std::collections::HashMap::new(),
987            // v1.1 B6 — error classification metadata. Defaults to
988            // Runtime; set at known sites (syntax / budget trip /
989            // native error / type error).
990            last_error_kind: crate::vm::error::LuaErrorKind::default(),
991            last_error_source: None,
992            // v1.1 B10 Stage 1 — async embedder fields. Defaults
993            // preserve sync behavior bit-for-bit (`async_mode = false`
994            // means the budget hot loop errors out exactly as v1.0).
995            async_mode: false,
996            async_waker: None,
997            async_slice_size: 10_000,
998            host_yield_pending: false,
999            // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000            // default; populated only by the dispatcher when an
1001            // async-marked NativeClosure is invoked under async_mode.
1002            pending_async_native_fut: None,
1003            pending_async_native_ctx: None,
1004        }
1005    }
1006
1007    /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008    /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009    /// followed by `vm.open_all_libs()`.
1010    pub fn new(version: LuaVersion) -> Vm {
1011        let mut vm = Vm::new_minimal(version);
1012        vm.open_all_libs();
1013        vm
1014    }
1015
1016    /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017    /// that want a sandbox (Redis-style scripts, in-game scripting with
1018    /// a curated API) call this and then `open_base` / `open_math` / etc.
1019    /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020    /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021    pub fn new_minimal(version: LuaVersion) -> Vm {
1022        let mut vm = Vm::new_inner(version);
1023        let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025        unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026        vm.main_coro = Some(mc);
1027        let (a, b) = vm.rng_auto_seed();
1028        vm.rng_seed(a as u64, b as u64);
1029        vm
1030    }
1031
1032    /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033    /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034    /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035    /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036    /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037    /// does NOT evict those cached entries — call right after
1038    /// construction for a clean swap.
1039    ///
1040    /// Naming: `install_jit_backend` (not `install_default_jit`)
1041    /// because the "default" in luna-core is `NullJitBackend`; the
1042    /// "default JIT" lives in the `luna` crate.
1043    pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044    where
1045        C: crate::jit::IntChunkCompiler + 'static,
1046        T: crate::jit::TraceCompiler + 'static,
1047    {
1048        self.jit.chunk_compiler = Box::new(chunk);
1049        self.jit.trace_compiler = Box::new(trace);
1050    }
1051
1052    /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053    /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054    /// the `luna_jit` crate's `install_default_jit` pairs this with
1055    /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056    /// also install a fresh `CraneliftJitStorage`. Storage holds
1057    /// the per-`Vm` JIT cache + handle collections that used to be
1058    /// `thread_local!`s in `luna_jit::jit_backend`.
1059    ///
1060    /// Idempotency: re-installing storage on a Vm that already
1061    /// holds compiled-trace pointers WILL evict their owners (the
1062    /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063    /// pages). Call right after construction for a clean swap.
1064    pub fn install_jit_storage<S>(&mut self, storage: S)
1065    where
1066        S: crate::jit::JitStorage + 'static,
1067    {
1068        self.jit.storage = Box::new(storage);
1069    }
1070
1071    /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072    /// reports "skipped" so every closure stays on the interpreter
1073    /// path, and the trace recorder's compile attempt always returns
1074    /// `None`. Intended for tests that want to verify the trait
1075    /// boundary works in a JIT-free configuration, and for the future
1076    /// `luna-core` build path that ships without Cranelift.
1077    ///
1078    /// Calling this on a Vm whose closures already populated
1079    /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080    /// cached entries — the dispatcher will still call into them. For
1081    /// a truly JIT-free run, call this immediately after construction.
1082    pub fn install_null_jit(&mut self) {
1083        self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084        self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085    }
1086
1087    /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088    /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089    /// time instead (`open_base`, `open_math`, `open_table`, …).
1090    pub fn open_all_libs(&mut self) {
1091        self.open_base();
1092        self.open_math();
1093        self.open_table();
1094        self.open_string();
1095        self.open_utf8();
1096        self.open_os_io();
1097        self.open_debug();
1098        self.open_coroutine();
1099        self.open_package();
1100        // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101        // operators replace it on 64-bit integers). Only expose it under 5.2
1102        // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103        // leaking the global into newer dialects.
1104        if self.version == LuaVersion::Lua52 {
1105            self.open_bit32();
1106        }
1107    }
1108
1109    /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110    /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111    /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112    /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113    /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114    /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115    /// once per Vm.
1116    pub fn open_base(&mut self) {
1117        crate::vm::builtins::open_base(self);
1118    }
1119    /// Install the `math` standard library.
1120    pub fn open_math(&mut self) {
1121        crate::vm::lib_math::open_math(self);
1122    }
1123    /// Install the `table` standard library.
1124    pub fn open_table(&mut self) {
1125        crate::vm::lib_table::open_table(self);
1126    }
1127    /// Install the `string` standard library (and the shared string metatable).
1128    pub fn open_string(&mut self) {
1129        crate::vm::lib_string::open_string(self);
1130    }
1131    /// Install the `utf8` standard library (5.3+).
1132    pub fn open_utf8(&mut self) {
1133        crate::vm::lib_utf8::open_utf8(self);
1134    }
1135    /// `os` and `io` are merged because file userdata shares state with both
1136    /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137    /// wraps `os.execute`'s shell).
1138    pub fn open_os_io(&mut self) {
1139        crate::vm::lib_os_io::open_os_io(self);
1140    }
1141    /// Install the `debug` standard library (introspection / hooks). Off by
1142    /// default for sandbox embedders.
1143    pub fn open_debug(&mut self) {
1144        crate::vm::lib_debug::open_debug(self);
1145    }
1146    /// Install the `coroutine` standard library.
1147    pub fn open_coroutine(&mut self) {
1148        crate::vm::lib_coroutine::open_coroutine(self);
1149    }
1150    /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151    pub fn open_package(&mut self) {
1152        crate::vm::lib_os_io::open_package(self);
1153    }
1154    /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155    /// ops on 64-bit integers).
1156    pub fn open_bit32(&mut self) {
1157        crate::vm::lib_bit32::open_bit32(self);
1158    }
1159
1160    /// xoshiro256** next.
1161    pub(crate) fn rng_next(&mut self) -> u64 {
1162        let s = &mut self.rng;
1163        let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164        let t = s[1] << 17;
1165        s[2] ^= s[0];
1166        s[3] ^= s[1];
1167        s[1] ^= s[2];
1168        s[0] ^= s[3];
1169        s[2] ^= t;
1170        s[3] = s[3].rotate_left(45);
1171        result
1172    }
1173
1174    /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175    pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176        // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177        // state), then 16 discards to spread the seed. Matches PUC's exact
1178        // sequence so the low-level conformance test passes.
1179        self.rng = [a, 0xff, b, 0];
1180        for _ in 0..16 {
1181            self.rng_next();
1182        }
1183    }
1184
1185    /// Wall-clock since VM creation (os.clock approximation).
1186    pub(crate) fn uptime(&self) -> std::time::Duration {
1187        self.started.elapsed()
1188    }
1189
1190    /// Entropy for math.randomseed() with no arguments.
1191    pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192        let t = std::time::SystemTime::now()
1193            .duration_since(std::time::UNIX_EPOCH)
1194            .map(|d| d.as_nanos() as u64)
1195            .unwrap_or(0);
1196        let addr = &self.rng as *const _ as u64;
1197        (t as i64, addr as i64)
1198    }
1199
1200    /// Allocate a native function object (no upvalues): builtin registration.
1201    pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202        Value::Native(self.heap.new_native(f, Box::new([])))
1203    }
1204
1205    /// Allocate a native function object with captured upvalues.
1206    pub fn native_with(
1207        &mut self,
1208        f: crate::runtime::value::NativeFn,
1209        upvals: Box<[Value]>,
1210    ) -> Value {
1211        Value::Native(self.heap.new_native(f, upvals))
1212    }
1213
1214    /// Install the shared string metatable (string library, P04).
1215    pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216        self.type_mt[3] = mt;
1217    }
1218
1219    /// The current globals table (`_G` / `_ENV` source for new chunks).
1220    pub fn globals(&self) -> Gc<Table> {
1221        self.globals
1222    }
1223
1224    /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225    /// Library code that pushes a known number of fresh slots — e.g.
1226    /// `table.unpack` returning N values — consults this to refuse when
1227    /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228    /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229    /// coroutine's already-built table eats a few slots, so an unpack of
1230    /// ~lim values can't fit.
1231    pub(crate) fn stack_room(&self) -> i64 {
1232        PUC_MAXSTACK - (self.stack.len() as i64)
1233    }
1234
1235    /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236    /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237    /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238    /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239    pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240        self.globals = env;
1241    }
1242
1243    /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244    /// 5.5). Determines numeric semantics, available standard libraries, and
1245    /// metamethod behavior.
1246    pub fn version(&self) -> LuaVersion {
1247        self.version
1248    }
1249
1250    /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251    /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252    /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253    /// `Gc<NativeClosure>` handle.
1254    ///
1255    /// Returns `Err(LuaError)` only if the globals table overflows
1256    /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257    /// String interning + key construction cannot fail.
1258    ///
1259    /// ```
1260    /// # use luna_core::vm::Vm;
1261    /// # use luna_core::version::LuaVersion;
1262    /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263    /// vm.set_global("answer", 42).unwrap();
1264    /// vm.set_global("ratio", 0.5_f64).unwrap();
1265    /// vm.set_global("hello", "world").unwrap();
1266    /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267    /// assert_eq!(r.len(), 3);
1268    /// ```
1269    pub fn set_global<V: crate::vm::IntoValue>(
1270        &mut self,
1271        name: &str,
1272        v: V,
1273    ) -> Result<(), LuaError> {
1274        let v = v.into_value(self);
1275        let k = Value::Str(self.heap.intern(name.as_bytes()));
1276        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277        unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278        self.heap
1279            .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280        Ok(())
1281    }
1282
1283    /// Backward write barrier shorthand for native lib code: demote `t` from
1284    /// BLACK back to gray so the next propagate step re-traces its fields.
1285    /// No-op outside Propagate (parent is never BLACK at mutation time).
1286    pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287        self.heap
1288            .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289    }
1290
1291    /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292    /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293    /// No-op outside Propagate.
1294    pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295        self.heap
1296            .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297    }
1298
1299    /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300    /// under non-MacroLua dialects (the macro is stored but the load
1301    /// path only consults the registry when
1302    /// `self.version == LuaVersion::MacroLua`).
1303    ///
1304    /// `name` is stored without the leading `@` — source code writes
1305    /// `@double(x)` to invoke a macro registered as `"double"`.
1306    pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307        self.macro_registry.register(name, m);
1308    }
1309
1310    /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311    /// Mostly useful for tests / dogfood resets.
1312    pub fn clear_macros(&mut self) {
1313        self.macro_registry.clear();
1314    }
1315
1316    /// Parse + compile a chunk and close it over the globals table.
1317    pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318        // Reject oversize input *before* handing the parser/lexer a
1319        // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320        // message keeps `heavy.lua::loadrep` compatibility: that test
1321        // accepts either `string length overflow` or `not enough memory`
1322        // as the failure mode for a feeder loop that outruns the host
1323        // allocator. See `set_loader_input_budget`.
1324        if src.len() > self.loader_input_budget {
1325            return Err(SyntaxError {
1326                line: 0,
1327                msg: b"not enough memory".to_vec(),
1328            });
1329        }
1330        // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331        let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332        if is_bytecode && !self.bytecode_loading {
1333            return Err(SyntaxError {
1334                line: 0,
1335                msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336            });
1337        }
1338        let proto = if is_bytecode {
1339            let allow_puc = self.puc_bytecode_loading;
1340            crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341                |msg| SyntaxError {
1342                    line: 0,
1343                    msg: msg.into_bytes(),
1344                },
1345            )?
1346        } else if self.version.is_macro_lua() {
1347            // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348            // token vec, run the macro expander pre-pass against the
1349            // per-Vm registry, then hand the rewritten stream to
1350            // `parse_tokens`. The AST + compiler are dialect-agnostic
1351            // because by this point all `@`/quote tokens are gone.
1352            let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353            let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354            loop {
1355                let t = lexer.next_token()?;
1356                let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357                raw.push(t);
1358                if eof {
1359                    break;
1360                }
1361            }
1362            // Drop the trailing Eof — expander operates on the body and
1363            // `parse_tokens` reinserts Eof when it runs out of tokens.
1364            raw.pop();
1365            let expanded = self.macro_registry.expand(raw)?;
1366            let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368        } else {
1369            let ast = parse(src, self.version)?;
1370            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371        };
1372        // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373        // upvalue with the globals table when the closure has *exactly* one
1374        // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375        // function with two-or-more upvalues keeps every cell at nil; the
1376        // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377        // :293's `assert(x() == nil)` pins this contract.
1378        let n = proto.upvals.len();
1379        let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380        if n == 0 {
1381            // synthetic main chunk has no declared upvalues, but the engine
1382            // still expects at least one cell so the host can probe via
1383            // `debug.upvalueid` etc. Match the historical luna shape.
1384            ups.push(
1385                self.heap
1386                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387            );
1388        } else if n == 1 {
1389            ups.push(
1390                self.heap
1391                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392            );
1393        } else {
1394            for _ in 0..n {
1395                ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396            }
1397        }
1398        Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399    }
1400
1401    /// Compile and run `src` as an anonymous chunk; return its results.
1402    /// Source name in the traceback is `"=eval"`. Syntax errors are
1403    /// surfaced as `LuaError` carrying the formatted PUC-style message
1404    /// (interned through the heap so the error value composes with
1405    /// `pcall` / `error_text` like any runtime error).
1406    pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407        self.eval_chunk(src, "=eval")
1408    }
1409
1410    /// Render an error value for messages/tests. Non-string errors —
1411    /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412    /// (`"(error object is a table value)"`); embedders that need
1413    /// structured payloads should inspect `e.0` directly. Errors whose
1414    /// text starts with `"native panic:"` indicate a Rust panic
1415    /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416    /// be dropped (do not reuse).
1417    pub fn error_text(&self, e: &LuaError) -> String {
1418        match e.0 {
1419            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420            v => format!("(error object is a {} value)", v.type_name()),
1421        }
1422    }
1423
1424    /// Call any callable value from the host (or from natives like pcall).
1425    pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426        // host-level entry (no enclosing exec): drop any error state from a
1427        // prior call that propagated uncaught (`error_traceback` would
1428        // otherwise leak into the next debug.traceback call).
1429        if self.public_call_depth == 0 {
1430            self.error_traceback = None;
1431        }
1432        self.public_call_depth += 1;
1433        // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434        // chunk whose body fits the S1 int-arith whitelist short-circuits
1435        // the whole interpreter dispatch and runs straight through the
1436        // mmap'd native code. The lookup is one Cell::get + one match —
1437        // the slow path (compile attempt on first reach) is paid once per
1438        // Proto.
1439        if args.is_empty()
1440            && let Value::Closure(cl) = f
1441            && let Some(vs) = self.try_jit_call(cl)
1442        {
1443            self.public_call_depth -= 1;
1444            return Ok(vs);
1445        }
1446        let r = self.call_value_impl(f, args, true);
1447        self.public_call_depth -= 1;
1448        r
1449    }
1450
1451    /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452    /// `Some(values)` when the cached native fn is callable for a
1453    /// zero-arg call. (Non-zero-arg dispatch is handled by
1454    /// `try_jit_call_op` from inside `begin_call`.)
1455    fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456        use crate::runtime::function::JitProtoState;
1457        if !self.jit.enabled {
1458            return None;
1459        }
1460        let proto = cl.proto;
1461        if let JitProtoState::Untried = proto.jit.get() {
1462            self.populate_jit_cache(proto);
1463        }
1464        match proto.jit.get() {
1465            JitProtoState::Compiled {
1466                entry,
1467                num_args: 0,
1468                returns_one,
1469                arg_float_mask: _,
1470                arg_table_mask: _,
1471                ret_is_float,
1472                ret_is_table,
1473            } => {
1474                // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475                let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476                // P11-S5c / S5d.J — install the active Vm + closure
1477                // for any Rust helper the JIT'd code may call (e.g.
1478                // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479                // cranelift `Linkage::Import`. RAII clear on return.
1480                // Chunks with no upvalue reads don't touch the closure
1481                // slot, paying nothing.
1482                // v1.1 A1 Session A — route through chunk_compiler so
1483                // the NullJitBackend path stays inert. Raw-ptr arg
1484                // avoids the &mut self borrow conflict against the
1485                // shared self.jit.chunk_compiler read.
1486                let vm_ptr: *mut Vm = self;
1487                let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489                let r = unsafe { f() };
1490                drop(_jit_vm_guard);
1491                // P11-S5d.E' — a JIT helper may have detected a metatable
1492                // on a table operand and parked a deopt request here.
1493                // Discard the sentinel value and return None so the caller
1494                // re-runs the call through the interpreter, which honours
1495                // __index/__newindex.
1496                if self.jit.pending_err.take().is_some() {
1497                    return None;
1498                }
1499                Some(if returns_one {
1500                    let v = if ret_is_float {
1501                        Value::Float(f64::from_bits(r as u64))
1502                    } else if ret_is_table {
1503                        Value::Table(crate::runtime::Gc::from_ptr(
1504                            r as *mut crate::runtime::Table,
1505                        ))
1506                    } else {
1507                        Value::Int(r)
1508                    };
1509                    vec![v]
1510                } else {
1511                    Vec::new()
1512                })
1513            }
1514            // Non-zero-arg Compiled state: call_value's empty-args
1515            // fast path can't drive it. Op::Call handles those.
1516            JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517        }
1518    }
1519
1520    /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521    /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522    /// states (call sites guard with a get before invoking).
1523    ///
1524    /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525    /// `proto.code`. Compiled artefacts live in the thread-local
1526    /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527    /// `Vm`s loading the same source skip the cranelift compile step
1528    /// entirely.
1529    fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530        use crate::runtime::function::JitProtoState;
1531        let version = self.version();
1532        let pre53 = version <= crate::version::LuaVersion::Lua53;
1533        // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534        // are Float). The JIT's `GetUpval` ValueRead path uses this
1535        // to default-pin upvalue reads to Float without a tag check.
1536        let float_only = version <= crate::version::LuaVersion::Lua52;
1537        // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538        // trait method can take `&mut dyn JitStorage` without
1539        // double-borrowing self.jit.
1540        let jit = &mut self.jit;
1541        let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542        match jit
1543            .chunk_compiler
1544            .try_compile(storage, proto, pre53, float_only)
1545        {
1546            crate::jit::CompileResult::Compiled {
1547                entry,
1548                num_args,
1549                returns_one,
1550                arg_float_mask,
1551                arg_table_mask,
1552                ret_is_float,
1553                ret_is_table,
1554            } => {
1555                proto.jit.set(JitProtoState::Compiled {
1556                    entry,
1557                    num_args,
1558                    returns_one,
1559                    arg_float_mask,
1560                    arg_table_mask,
1561                    ret_is_float,
1562                    ret_is_table,
1563                });
1564            }
1565            crate::jit::CompileResult::Skipped => {
1566                proto.jit.set(JitProtoState::Failed);
1567            }
1568        }
1569    }
1570
1571    /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572    /// before `push_frame`. Returns `true` when the call was handled
1573    /// in-place (no new Lua frame). Constraints: every arg slot must
1574    /// be `Value::Int`, the cached arity must match the call site's
1575    /// `nargs`, the host wanted-count `wanted` is honoured by
1576    /// `finish_results`. Also bails when a debug hook is armed —
1577    /// JIT'd code does not fire line / call / return hooks, so any
1578    /// active hook makes the interpreter the source of truth.
1579    fn try_jit_call_op(
1580        &mut self,
1581        cl: Gc<LuaClosure>,
1582        func_slot: u32,
1583        nargs: u32,
1584        wanted: i32,
1585    ) -> bool {
1586        use crate::runtime::function::JitProtoState;
1587        if !self.jit.enabled {
1588            return false;
1589        }
1590        // Any active debug hook means the interpreter has to run the
1591        // call so the hook gets the expected events.
1592        if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593            return false;
1594        }
1595        let proto = cl.proto;
1596        if let JitProtoState::Untried = proto.jit.get() {
1597            self.populate_jit_cache(proto);
1598        }
1599        let JitProtoState::Compiled {
1600            entry,
1601            num_args,
1602            returns_one,
1603            arg_float_mask,
1604            arg_table_mask,
1605            ret_is_float,
1606            ret_is_table,
1607        } = proto.jit.get()
1608        else {
1609            return false;
1610        };
1611        if num_args as u32 != nargs {
1612            return false;
1613        }
1614        // Pack args into i64 bit-patterns per the per-slot expected
1615        // kind. A Float-typed slot accepts Value::Float verbatim and
1616        // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617        // accepts only Value::Table and passes the raw Gc ptr; an
1618        // Int-typed slot accepts only Value::Int. Any other shape
1619        // bails to the interpreter so the call's actual dynamics
1620        // (metamethod dispatch / type-coerce) take over.
1621        let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622            [0; crate::jit::MAX_JIT_ARITY as usize];
1623        for i in 0..num_args as usize {
1624            let v = self.stack[(func_slot + 1) as usize + i];
1625            let want_float = (arg_float_mask >> i) & 1 == 1;
1626            let want_table = (arg_table_mask >> i) & 1 == 1;
1627            args[i] = match (want_table, want_float, v) {
1628                (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629                (false, false, Value::Int(x)) => x,
1630                (false, true, Value::Float(f)) => f.to_bits() as i64,
1631                (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632                _ => return false,
1633            };
1634        }
1635        // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636        // matching guard in `try_jit_call`.
1637        // v1.1 A1 Session A — route through chunk_compiler.
1638        let vm_ptr: *mut Vm = self;
1639        let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640        // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641        let r = unsafe {
1642            match num_args {
1643                0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644                1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645                2 => {
1646                    (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647                }
1648                3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649                    args[0], args[1], args[2],
1650                ),
1651                4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652                    args[0], args[1], args[2], args[3],
1653                ),
1654                _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655            }
1656        };
1657        drop(_jit_vm_guard);
1658        // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659        // flagged a metatable on a table operand; bail to the interpreter
1660        // so `push_frame` runs the call from scratch.
1661        if self.jit.pending_err.take().is_some() {
1662            return false;
1663        }
1664        // Write result at func_slot, replacing the closure value, then
1665        // hand to finish_results to pad/truncate per the call site's
1666        // `wanted` count.
1667        if returns_one {
1668            let v = if ret_is_float {
1669                Value::Float(f64::from_bits(r as u64))
1670            } else if ret_is_table {
1671                Value::Table(crate::runtime::Gc::from_ptr(
1672                    r as *mut crate::runtime::Table,
1673                ))
1674            } else {
1675                Value::Int(r)
1676            };
1677            self.stack[func_slot as usize] = v;
1678            self.finish_results(func_slot, 1, wanted);
1679        } else {
1680            self.finish_results(func_slot, 0, wanted);
1681        }
1682        true
1683    }
1684
1685    /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686    /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687    /// invokes it inside that ci), so it is called with `from_c = false`: its
1688    /// debug parent is the closing function, not a synthetic C level.
1689    fn call_value_impl(
1690        &mut self,
1691        f: Value,
1692        args: &[Value],
1693        from_c: bool,
1694    ) -> Result<Vec<Value>, LuaError> {
1695        if self.c_depth >= MAX_C_DEPTH {
1696            return Err(self.rt_err("stack overflow"));
1697        }
1698        self.c_depth += 1;
1699        let func_slot = self.stack.len() as u32;
1700        self.stack.push(f);
1701        self.stack.extend_from_slice(args);
1702        self.top = self.stack.len() as u32;
1703        let r = self.call_at(func_slot, args.len() as u32, from_c);
1704        self.c_depth -= 1;
1705        if r.is_err()
1706            && self.yielding.is_none()
1707            && self.terminating.is_none()
1708            && !self.host_yield_pending
1709            && self.pending_async_native_fut.is_none()
1710        {
1711            // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712            // Rust stack, but the suspended coroutine's frames/registers (which
1713            // sit at/above `func_slot`) must survive for the next resume — so we
1714            // only truncate on a real error. A self-close termination is in the
1715            // same boat: the dying thread's state is discarded wholesale.
1716            // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717            // the same boat as `yielding`: the next `EvalFuture::poll`
1718            // resumes the same call, so the in-flight frames must
1719            // survive.
1720            self.stack.truncate(func_slot as usize);
1721            self.top = func_slot;
1722        }
1723        r
1724    }
1725
1726    /// Invoke `f` with the running thread marked non-yieldable for the duration
1727    /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728    /// boundary and errors instead of suspending. Used by library callbacks
1729    /// (sort comparator, gsub replacement) that run via synchronous Rust
1730    /// recursion and so could not be re-entered after a yield.
1731    pub(crate) fn call_noyield(
1732        &mut self,
1733        f: Value,
1734        args: &[Value],
1735    ) -> Result<Vec<Value>, LuaError> {
1736        self.nny += 1;
1737        let r = self.call_value(f, args);
1738        self.nny -= 1;
1739        r
1740    }
1741
1742    // ---- coroutines (P05) ----
1743
1744    pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745        // The new coroutine inherits the creating thread's current globals
1746        // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747        // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748        // it here picks the creator regardless of which coro is running.
1749        self.heap.new_coro(body, self.globals)
1750    }
1751
1752    /// Is `t` the thread whose context is currently live in the VM?
1753    pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754        match (self.current, t) {
1755            (None, None) => true,
1756            (Some(a), Some(b)) => a.ptr_eq(b),
1757            _ => false,
1758        }
1759    }
1760
1761    /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762    /// stack if that thread is current, else its saved context).
1763    #[doc(hidden)]
1764    pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765        let s = slot as usize;
1766        if self.is_current_thread(thread) {
1767            self.stack[s]
1768        } else {
1769            match thread {
1770                Some(co) => co.stack[s],
1771                None => self.main_ctx.as_ref().expect("main context").stack[s],
1772            }
1773        }
1774    }
1775
1776    fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777        let s = slot as usize;
1778        if self.is_current_thread(thread) {
1779            self.stack[s] = v;
1780        } else {
1781            match thread {
1782                Some(co) => {
1783                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784                    unsafe { co.as_mut() }.stack[s] = v;
1785                    // co.stack is traced by Coro::trace; demote co back to
1786                    // gray so propagate re-traces this slot if it was
1787                    // already black.
1788                    self.heap
1789                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790                }
1791                None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792            }
1793        }
1794    }
1795
1796    /// Whether `co` is the main thread's identity object.
1797    pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798        self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799    }
1800
1801    /// The status of `co` from the caller's view. The main thread's identity
1802    /// object has no stored status — it is "running" when nothing else runs,
1803    /// else "normal" (it resumed the active coroutine).
1804    pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805        if self.is_main_coro(co) {
1806            if self.current.is_none() {
1807                CoroStatus::Running
1808            } else {
1809                CoroStatus::Normal
1810            }
1811        } else {
1812            co.status
1813        }
1814    }
1815
1816    /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817    /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818    /// context. Handlers see the coroutine's death error (if it died by error)
1819    /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820    /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821    pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822        // re-entrant close: a __close handler closed its own coroutine while the
1823        // outer close is mid-flight (its context is live). Report success and let
1824        // the outer close finish — re-entering the swap would corrupt the stack.
1825        if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826            return Ok(None);
1827        }
1828        // A chain of coroutines whose `__close` handlers each close the previous
1829        // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830        // The calling handler's `call_value` has already pushed `c_depth` to the
1831        // cap, so here it reads as full first — report PUC's "C stack overflow"
1832        // before the next handler call would surface the plainer "stack overflow".
1833        if self.c_depth >= MAX_C_DEPTH {
1834            return Err(self.rt_err("C stack overflow"));
1835        }
1836        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837        let death_err = unsafe { co.as_mut() }.error_value.take();
1838        // swap the caller's live context out (into a GC-rooted home) and the
1839        // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840        // the coroutine's stack while everything stays rooted.
1841        let resumer = self.current;
1842        let rctx = self.take_ctx();
1843        match resumer {
1844            Some(r) => {
1845                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846                let m = unsafe { r.as_mut() };
1847                m.stack = rctx.stack;
1848                m.frames = rctx.frames;
1849                m.open_upvals = rctx.open_upvals;
1850                m.tbc = rctx.tbc;
1851                m.top = rctx.top;
1852                m.pcall_depth = rctx.pcall_depth;
1853            }
1854            None => self.main_ctx = Some(rctx),
1855        }
1856        self.load_coro_ctx(co);
1857        self.current = Some(co);
1858        let result = self.close_slots(0, death_err);
1859        // discard the (now-closed) coroutine context and restore the caller
1860        let _ = self.take_ctx();
1861        match resumer {
1862            Some(r) => {
1863                self.load_coro_ctx(r);
1864                self.current = Some(r);
1865            }
1866            None => {
1867                let m = self.main_ctx.take().expect("main context saved");
1868                self.put_ctx(m);
1869                self.current = None;
1870            }
1871        }
1872        {
1873            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874            let m = unsafe { co.as_mut() };
1875            m.status = CoroStatus::Dead;
1876            m.stack = Vec::new();
1877            m.frames = Vec::new();
1878            m.open_upvals = Vec::new();
1879            m.tbc = Vec::new();
1880            m.top = 0;
1881            m.pcall_depth = 0;
1882            m.resume_at = None;
1883            m.error_value = None;
1884        }
1885        result.map(|()| death_err)
1886    }
1887
1888    /// `coroutine.running`: the running thread plus whether it is the main one.
1889    pub(crate) fn running_thread(&self) -> (Value, bool) {
1890        match self.current {
1891            Some(co) => (Value::Coro(co), false),
1892            None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893        }
1894    }
1895
1896    /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897    /// thread) can yield. The main thread never can; any other coroutine can
1898    /// unless it is dead.
1899    pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900        match co {
1901            Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902            // the running thread can yield only outside any non-yieldable C call
1903            None => self.current.is_some() && self.nny == 0,
1904        }
1905    }
1906
1907    /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908    /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909    /// from "inside an unyieldable C call" (sort/gsub callback).
1910    pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911        if self.current.is_none() {
1912            Some("attempt to yield from outside a coroutine")
1913        } else if self.nny > 0 {
1914            Some("attempt to yield across a C-call boundary")
1915        } else {
1916            None
1917        }
1918    }
1919
1920    /// The coroutine whose context is currently live (`None` on the main thread).
1921    pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922        self.current
1923    }
1924
1925    /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926    /// its pending `__close` handlers, then signal termination. The handlers run
1927    /// here, in place, with the thread still non-yieldable (a yield in one hits
1928    /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929    /// way a yield does — `exec_with` propagates it past any protecting pcall
1930    /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931    /// clean death (or, if a handler raised, the coroutine's error).
1932    pub(crate) fn close_running(&mut self) -> LuaError {
1933        let death = match self.close_slots(0, None) {
1934            Ok(()) => None,
1935            Err(e) => Some(e.0),
1936        };
1937        self.terminating = Some(death);
1938        LuaError(Value::Nil)
1939    }
1940
1941    /// `coroutine.status` as seen by the caller.
1942    pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943        match self.effective_coro_status(co) {
1944            CoroStatus::Suspended => "suspended",
1945            CoroStatus::Running => "running",
1946            CoroStatus::Normal => "normal",
1947            CoroStatus::Dead => "dead",
1948        }
1949    }
1950
1951    fn take_ctx(&mut self) -> SavedCtx {
1952        let saved = SavedCtx {
1953            stack: std::mem::take(&mut self.stack),
1954            frames: std::mem::take(&mut self.frames),
1955            open_upvals: std::mem::take(&mut self.open_upvals),
1956            tbc: std::mem::take(&mut self.tbc),
1957            top: self.top,
1958            pcall_depth: self.pcall_depth,
1959            hook: self.hook,
1960            globals: self.globals,
1961        };
1962        self.frames_resync(); // P17-D Week 1 — frames now empty.
1963        saved
1964    }
1965
1966    fn put_ctx(&mut self, c: SavedCtx) {
1967        self.stack = c.stack;
1968        self.frames = c.frames;
1969        self.open_upvals = c.open_upvals;
1970        self.tbc = c.tbc;
1971        self.top = c.top;
1972        self.pcall_depth = c.pcall_depth;
1973        self.hook = c.hook;
1974        self.globals = c.globals;
1975        self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976    }
1977
1978    /// Move a coroutine's saved context into the live VM fields.
1979    fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981        let m = unsafe { co.as_mut() };
1982        self.stack = std::mem::take(&mut m.stack);
1983        self.frames = std::mem::take(&mut m.frames);
1984        self.open_upvals = std::mem::take(&mut m.open_upvals);
1985        self.tbc = std::mem::take(&mut m.tbc);
1986        self.top = m.top;
1987        self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988        self.pcall_depth = m.pcall_depth;
1989        self.hook = m.hook;
1990        self.globals = m.globals;
1991    }
1992
1993    /// Save the live VM context back into a coroutine object.
1994    fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995        let c = self.take_ctx();
1996        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997        let m = unsafe { co.as_mut() };
1998        m.stack = c.stack;
1999        m.frames = c.frames;
2000        m.open_upvals = c.open_upvals;
2001        m.tbc = c.tbc;
2002        m.top = c.top;
2003        m.pcall_depth = c.pcall_depth;
2004        m.hook = c.hook;
2005        m.globals = c.globals;
2006        // bulk-overwrite of every collectable field traced by Coro::trace:
2007        // demote the coro back to gray so propagate re-traces its new state.
2008        self.heap
2009            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010    }
2011
2012    /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013    /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014    /// error raised inside the coroutine (the coroutine becomes dead).
2015    pub(crate) fn resume_coro(
2016        &mut self,
2017        co: Gc<Coro>,
2018        args: Vec<Value>,
2019    ) -> Result<Vec<Value>, LuaError> {
2020        match co.status {
2021            CoroStatus::Suspended => {}
2022            CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
2023            _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
2024        }
2025        if self.c_depth >= MAX_C_DEPTH {
2026            return Err(self.rt_err("C stack overflow"));
2027        }
2028        self.c_depth += 1;
2029        let resumer = self.current;
2030        // save the resumer's live context away
2031        let rctx = self.take_ctx();
2032        match resumer {
2033            Some(r) => {
2034                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035                let m = unsafe { r.as_mut() };
2036                m.stack = rctx.stack;
2037                m.frames = rctx.frames;
2038                m.open_upvals = rctx.open_upvals;
2039                m.tbc = rctx.tbc;
2040                m.top = rctx.top;
2041                m.pcall_depth = rctx.pcall_depth;
2042                m.globals = rctx.globals;
2043                m.status = CoroStatus::Normal;
2044                // bulk overwrite of every traced field on r — mirror
2045                // store_coro_ctx's barrier_back so propagate re-traces r.
2046                self.heap
2047                    .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048            }
2049            None => self.main_ctx = Some(rctx),
2050        }
2051        // swap the coroutine in
2052        self.load_coro_ctx(co);
2053        {
2054            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055            let m = unsafe { co.as_mut() };
2056            m.status = CoroStatus::Running;
2057            m.resumer = resumer;
2058        }
2059        // co.resumer is a traced Gc field; barrier_back covers the new
2060        // resumer reference and any future field writes during this call.
2061        self.heap
2062            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063        self.current = Some(co);
2064
2065        // drive it
2066        let drive = if co.started {
2067            self.coro_continue(&args)
2068        } else {
2069            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070            unsafe { co.as_mut() }.started = true;
2071            self.coro_first(co.body, &args)
2072        };
2073
2074        // classify: a self-close termination or a pending yield each win over
2075        // the (sentinel) error they raised to unwind the Rust stack.
2076        let (outcome, status) = if let Some(death) = self.terminating.take() {
2077            // the coroutine closed itself: it dies now, cleanly or with the
2078            // error a `__close` handler raised.
2079            match death {
2080                Some(e) => {
2081                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082                    unsafe { co.as_mut() }.error_value = Some(e);
2083                    self.heap
2084                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085                    (Err(LuaError(e)), CoroStatus::Dead)
2086                }
2087                None => (Ok(Vec::new()), CoroStatus::Dead),
2088            }
2089        } else {
2090            match self.yielding.take() {
2091                Some((vals, fslot, nres)) => {
2092                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093                    unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094                    (Ok(vals), CoroStatus::Suspended)
2095                }
2096                None => {
2097                    // died: a return is clean, an error is remembered so a later
2098                    // `coroutine.close` can report it (PUC lua_closethread).
2099                    // Capture the error-point traceback (set by `unwind` before
2100                    // popping the failing frames) and prepend a synthetic
2101                    // top entry for the C native that initiated the error
2102                    // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103                    // on the dead coroutine still shows the error site
2104                    // (db.lua :848 family).
2105                    if drive.is_err() {
2106                        let mut tb = self.error_traceback.take().unwrap_or_default();
2107                        if let Some(nm) = self.errored_native.take() {
2108                            let mut prefixed: Vec<u8> = Vec::new();
2109                            prefixed.extend_from_slice(
2110                                format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111                            );
2112                            prefixed.extend(tb);
2113                            tb = prefixed;
2114                        }
2115                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116                        unsafe { co.as_mut() }.error_traceback = Some(tb);
2117                    }
2118                    if let Err(e) = drive {
2119                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120                        unsafe { co.as_mut() }.error_value = Some(e.0);
2121                        self.heap
2122                            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123                    }
2124                    (drive, CoroStatus::Dead)
2125                }
2126            }
2127        };
2128
2129        // save the coroutine's context back and restore the resumer
2130        self.store_coro_ctx(co);
2131        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132        unsafe { co.as_mut() }.status = status;
2133        match resumer {
2134            Some(r) => {
2135                self.load_coro_ctx(r);
2136                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137                unsafe { r.as_mut() }.status = CoroStatus::Running;
2138                self.current = Some(r);
2139            }
2140            None => {
2141                let m = self.main_ctx.take().expect("main context saved");
2142                self.put_ctx(m);
2143                self.current = None;
2144            }
2145        }
2146        self.c_depth -= 1;
2147        outcome
2148    }
2149
2150    /// First resume: install the body function at slot 0 and run.
2151    fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152        self.stack.clear();
2153        self.stack.push(body);
2154        self.stack.extend_from_slice(args);
2155        self.top = self.stack.len() as u32;
2156        match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157            Ok(true) => self.exec_with(1),
2158            Ok(false) => Ok(self.take_results(0)),
2159            Err(e) => Err(e),
2160        }
2161    }
2162
2163    /// Resume after a yield: deliver `args` as the results of the call that
2164    /// yielded, then continue the suspended thread.
2165    fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166        let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167        let n = args.len() as u32;
2168        // Restore the full register window of the suspended top frame: a yield
2169        // that unwound through a native (call_value) may have left the stack
2170        // shorter than the frame needs. `base + max_stack` is what push_frame
2171        // allocates; `fslot + n` covers the delivered yield results.
2172        let frame_need = self
2173            .frames
2174            .last()
2175            .and_then(CallFrame::lua)
2176            .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177            .unwrap_or(0);
2178        let need = frame_need.max((fslot + n) as usize);
2179        if self.stack.len() < need {
2180            self.stack.resize(need, Value::Nil);
2181        }
2182        for (i, &v) in args.iter().enumerate() {
2183            self.stack[fslot as usize + i] = v;
2184        }
2185        self.finish_results(fslot, n, nres);
2186        // the suspended `coroutine.yield` (a C call) now returns its resume
2187        // values: fire the matching "return" hook PUC defers until the resume.
2188        self.hook_return(true, 1, n)?;
2189        self.exec_with(1)
2190    }
2191
2192    /// `coroutine.yield`: suspend the running coroutine, recording where to
2193    /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194    /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195    pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196        let nres = self.native_nresults;
2197        self.yielding = Some((vals, func_slot, nres));
2198        // value is irrelevant: resume_coro consults `self.yielding`, not this
2199        LuaError(Value::Nil)
2200    }
2201
2202    /// Install or clear the debug hook on the running thread (`debug.sethook`
2203    /// without a thread argument). Arms the calling frame's `oldpc` to the
2204    /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205    /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206    /// native return: the very next traceexec compares against the sethook
2207    /// CALL's line. When the install statement and the following statement are
2208    /// on different source lines (db.lua :322), `changedline` fires for that
2209    /// first statement; when they share a line (db.lua :25 wrapper), they do
2210    /// not, so the wrapper line is not re-fired.
2211    pub(crate) fn install_hook(&mut self, hook: HookState) {
2212        self.hook = hook;
2213        if self.hook.line
2214            && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215        {
2216            f.hook_oldpc = f.pc.saturating_sub(1);
2217        }
2218    }
2219
2220    /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221    /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222    /// with an optional thread argument.
2223    ///
2224    /// `target == None` means "no explicit thread argument" — PUC binds that
2225    /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226    /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227    /// of whether that's the main thread or a currently-resumed coroutine
2228    /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229    /// `store_coro_ctx`). So a `None` target should always route to
2230    /// `install_hook` on the live fields. The pre-fix predicate gate
2231    /// `is_current_thread(target)` returned `false` when running inside a
2232    /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233    /// and silently dropped the hook on the floor — the install happened on
2234    /// no thread at all.
2235    pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236        if target.is_none() || self.is_current_thread(target) {
2237            self.install_hook(state);
2238        } else if let Some(co) = target {
2239            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240            let m = unsafe { co.as_mut() };
2241            m.hook = state;
2242            if state.line
2243                && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244            {
2245                f.hook_oldpc = u32::MAX;
2246            }
2247            // co.hook.func is a traced Value (Coro::trace covers it); demote
2248            // co back to gray so propagate sees the new hook function.
2249            self.heap
2250                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251        }
2252    }
2253
2254    /// The hook state of `target` (`None`/current → the live VM state).
2255    pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256        match target {
2257            t if self.is_current_thread(t) => self.hook,
2258            Some(co) => co.hook,
2259            None => self.hook,
2260        }
2261    }
2262
2263    /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264    /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265    /// discarded so the interrupted frame's register window is untouched.
2266    /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267    fn run_hook(
2268        &mut self,
2269        event: &[u8],
2270        line: Option<i64>,
2271        from_native: bool,
2272    ) -> Result<(), LuaError> {
2273        // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274        // synchronous fn pointer call). Both Rust and Lua hooks may be
2275        // installed; both observe each event.
2276        if let Some(rh) = self.hook.rust_func {
2277            let evt = match event {
2278                b"call" => Some(RustHookEvent::Call),
2279                b"return" => Some(RustHookEvent::Return),
2280                b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281                b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282                b"count" => Some(RustHookEvent::Count),
2283                _ => None,
2284            };
2285            if let Some(evt) = evt {
2286                let was_in_hook = self.in_hook;
2287                self.in_hook = true;
2288                rh(self, evt);
2289                self.in_hook = was_in_hook;
2290            }
2291        }
2292        let Some(hook) = self.hook.func else {
2293            return Ok(());
2294        };
2295        let saved_top = self.top;
2296        let saved_len = self.stack.len();
2297        let name = Value::Str(self.heap.intern(event));
2298        let lv = line.map_or(Value::Nil, Value::Int);
2299        self.in_hook = true;
2300        // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301        // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302        // ci sat below `hookf` (the function being hooked). When that hooked
2303        // function is native, no Lua frame for it exists in luna's `frames`;
2304        // model it as a synthetic C level by pushing the hook with
2305        // `from_c = true` (then `c_frame_name` reads the caller's call
2306        // instruction → e.g. `name = "sethook"`). When the hooked function is
2307        // Lua (its frame is still on the stack), push with `from_c = false`
2308        // so the level descent lands on it directly. The hook's own frame
2309        // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310        // (PUC `CIST_HOOKED`).
2311        self.pending_is_hook = true;
2312        let r = self.call_value_impl(hook, &[name, lv], from_native);
2313        self.pending_is_hook = false;
2314        self.in_hook = false;
2315        self.stack.truncate(saved_len);
2316        self.top = saved_top;
2317        r.map(|_| ())
2318    }
2319
2320    /// Fire the "call" hook on entry to a function, if armed and not already in
2321    /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322    /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323    /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324    /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325    fn hook_call_with(
2326        &mut self,
2327        from_native: bool,
2328        nargs: u32,
2329        is_tail: bool,
2330    ) -> Result<(), LuaError> {
2331        if self.hook.call
2332            && !self.in_hook
2333            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334        {
2335            self.hook_ftransfer = 1;
2336            self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337            // PUC 5.1 didn't distinguish tail-call events — every call,
2338            // including tail-calls, fired plain `"call"`. 5.2 introduced
2339            // the separate `"tail call"` event (mask `"c"` covers both).
2340            // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341            // "return","tail return","return","tail return"}`.
2342            let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343                b"tail call"
2344            } else {
2345                b"call"
2346            };
2347            self.run_hook(event, None, from_native)?;
2348        }
2349        Ok(())
2350    }
2351
2352    pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353        self.hook_call_with(from_native, nargs, false)
2354    }
2355
2356    /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357    /// the first result slot relative to the activation's func slot, ntransfer
2358    /// the number of results.
2359    pub(crate) fn hook_return(
2360        &mut self,
2361        from_native: bool,
2362        ftransfer: u32,
2363        nresults: u32,
2364    ) -> Result<(), LuaError> {
2365        if self.hook.ret
2366            && !self.in_hook
2367            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368        {
2369            self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370            self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371            self.run_hook(b"return", None, from_native)?;
2372        }
2373        Ok(())
2374    }
2375
2376    /// PUC "tail return" event — fires once per tail call that collapsed
2377    /// into the activation now returning, *after* its own "return" event.
2378    /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379    fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380        if self.hook.ret
2381            && !self.in_hook
2382            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383        {
2384            self.run_hook(b"tail return", None, false)?;
2385        }
2386        Ok(())
2387    }
2388
2389    /// Call a metamethod with a single expected result.
2390    fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391        let mut r = self.call_value(f, args)?;
2392        Ok(if r.is_empty() {
2393            Value::Nil
2394        } else {
2395            r.swap_remove(0)
2396        })
2397    }
2398
2399    /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400    /// driven through the interpreter loop with a `Meta` continuation, so a
2401    /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402    /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403    /// Returns to the caller with the call set up — the opcode arm must do no
2404    /// further work on the running frame and let the loop iterate. `tm` is
2405    /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406    /// born from this call inherits it via `pending_tm`, so
2407    /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408    /// (db.lua :878).
2409    fn begin_meta_call(
2410        &mut self,
2411        func: Value,
2412        args: &[Value],
2413        action: MetaAction,
2414        tm: &'static str,
2415    ) -> Result<(), LuaError> {
2416        let saved_top = self.top;
2417        let cont_slot = self.stack.len() as u32;
2418        self.stack.push(func);
2419        self.stack.extend_from_slice(args);
2420        self.top = self.stack.len() as u32;
2421        frames_push_sync(
2422            &mut self.frames,
2423            &mut self.frames_top,
2424            CallFrame::Cont(NativeCont {
2425                kind: ContKind::Meta(MetaCont { action, saved_top }),
2426                func_slot: cont_slot,
2427                nresults: 1,
2428            }),
2429        );
2430        let saved_tm = self.pending_tm.replace(tm);
2431        // begin_call drives a Lua metamethod through the loop (returns true) or
2432        // runs a native one inline (returns false, leaving results at cont_slot
2433        // for the loop head to pick up); either way the Meta cont resolves there.
2434        let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435        // Native callees never consumed pending_tm (push_frame is only hit on
2436        // a Lua callee); restore so it doesn't leak to a later push_frame.
2437        self.pending_tm = saved_tm;
2438        r?;
2439        Ok(())
2440    }
2441
2442    /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443    fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444        match self.index_step(t, key)? {
2445            MmOut::Done(v) => self.stack[dst as usize] = v,
2446            MmOut::Mm { func, recv } => {
2447                self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2448            }
2449            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2450        }
2451        Ok(())
2452    }
2453
2454    /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2455    fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2456        match self.newindex_step(t, key, v)? {
2457            MmOut::Done(_) => {}
2458            MmOut::Mm { func, recv } => {
2459                self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2460            }
2461            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2462        }
2463        Ok(())
2464    }
2465
2466    /// Apply a comparison opcode's outcome: a known boolean drives the
2467    /// conditional skip directly; a metamethod is called yieldably, its
2468    /// truthiness driving the skip on return.
2469    fn op_compare(
2470        &mut self,
2471        step: MmOut,
2472        l: Value,
2473        r: Value,
2474        k: bool,
2475        tm: &'static str,
2476    ) -> Result<(), LuaError> {
2477        match step {
2478            MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2479            MmOut::Mm { func, .. } => {
2480                self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2481            }
2482            MmOut::CompareSynth { func } => {
2483                // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2484                // negation are driven through `MetaAction::Compare` so the
2485                // metamethod call can yield like any other compare.
2486                self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2487            }
2488        }
2489        Ok(())
2490    }
2491
2492    /// Complete a VM instruction whose metamethod just returned `result` (PUC
2493    /// `luaV_finishOp`). The running frame is already back on top.
2494    fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2495        match action {
2496            MetaAction::Store { dst } => self.stack[dst as usize] = result,
2497            MetaAction::Discard => {}
2498            MetaAction::Compare { k, negate } => {
2499                let t = if negate {
2500                    !result.truthy()
2501                } else {
2502                    result.truthy()
2503                };
2504                self.cond_skip(t, k);
2505            }
2506            MetaAction::Concat { dst, base_a } => {
2507                self.stack[dst as usize] = result;
2508                self.top = dst + 1;
2509                self.concat_run(base_a)?;
2510            }
2511        }
2512        Ok(())
2513    }
2514
2515    // ---- metatables ----
2516
2517    pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2518        match v {
2519            Value::Table(t) => t.metatable(),
2520            Value::Userdata(u) => u.metatable(),
2521            v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2522        }
2523    }
2524
2525    /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2526    /// non-table). No-op for tables (they carry their own).
2527    pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2528        if let Some(i) = type_mt_slot(v) {
2529            self.type_mt[i] = mt;
2530        }
2531    }
2532
2533    /// The metamethod of `v` for `mm`, or nil.
2534    pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2535        match self.metatable_of(v) {
2536            Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2537            None => Value::Nil,
2538        }
2539    }
2540
2541    /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2542    /// only fires when both operands carry a metatable that exposes the same
2543    /// implementation. Returns the metamethod to call, or `Nil` when no
2544    /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2545    /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2546    pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2547        let mt1 = self.metatable_of(l);
2548        let Some(mt1) = mt1 else { return Value::Nil };
2549        let key = Value::Str(self.mm_names[mm as usize]);
2550        let tm1 = mt1.get(key);
2551        if tm1.is_nil() {
2552            return Value::Nil;
2553        }
2554        let mt2 = self.metatable_of(r);
2555        let Some(mt2) = mt2 else { return Value::Nil };
2556        if mt1.as_ptr() == mt2.as_ptr() {
2557            return tm1;
2558        }
2559        let tm2 = mt2.get(key);
2560        if tm2.is_nil() {
2561            return Value::Nil;
2562        }
2563        if tm1.raw_eq(tm2) {
2564            return tm1;
2565        }
2566        Value::Nil
2567    }
2568
2569    /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2570    /// or full userdata whose metatable carries a string `__name` reports that
2571    /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2572    pub(crate) fn obj_typename(&self, v: Value) -> String {
2573        if matches!(v, Value::Table(_) | Value::Userdata(_))
2574            && let Value::Str(s) = self.get_mm(v, Mm::Name)
2575        {
2576            return String::from_utf8_lossy(s.as_bytes()).into_owned();
2577        }
2578        v.type_name().to_string()
2579    }
2580
2581    fn call_at(
2582        &mut self,
2583        func_slot: u32,
2584        nargs: u32,
2585        from_c: bool,
2586    ) -> Result<Vec<Value>, LuaError> {
2587        if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2588            self.exec()
2589        } else {
2590            // native completed inline; results at func_slot..top
2591            Ok(self.take_results(func_slot))
2592        }
2593    }
2594
2595    /// Switch the `collectgarbage` mode, returning the previous mode name.
2596    pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2597        std::mem::replace(&mut self.gc_mode, new)
2598    }
2599
2600    /// Whether the current `collectgarbage` mode is "generational" (where a
2601    /// "step" is a minor collection — a full atomic pass — rather than a paced
2602    /// incremental sweep).
2603    pub(crate) fn gc_mode_is_generational(&self) -> bool {
2604        self.gc_mode == "generational"
2605    }
2606
2607    /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2608    /// completes a whole cycle at once).
2609    pub(crate) fn gc_stepsize(&self) -> i64 {
2610        self.gc_stepsize
2611    }
2612
2613    /// `collectgarbage("param", name [,value])`: read (or set, returning the
2614    /// previous value of) a pacing parameter. Returns `None` for an unknown
2615    /// name so the caller can raise PUC's `invalid parameter` error. The
2616    /// collector is stop-the-world, so these only round-trip for API fidelity.
2617    pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2618        let slot = match name {
2619            b"pause" => &mut self.gc_pause,
2620            b"stepmul" => &mut self.gc_stepmul,
2621            b"stepsize" => &mut self.gc_stepsize,
2622            _ => return None,
2623        };
2624        let prev = *slot;
2625        if let Some(v) = set {
2626            *slot = v;
2627        }
2628        Some(prev)
2629    }
2630
2631    /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2632    /// paced sweep via `Vm::gc_step`.
2633    ///
2634    /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2635    /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2636    /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2637    /// suspected UAF is structurally closed — born objects no longer become
2638    /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2639    ///
2640    /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2641    /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2642    /// O(SWEEP_DIVISOR) safe-points regardless of size.
2643    #[inline(always)]
2644    pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2645        if self.gc_finalizing {
2646            return;
2647        }
2648        if !self.heap.gc_due() {
2649            return;
2650        }
2651        // v2.5 P1B-2E: tighten to bare `live_top`. The v2.2.0
2652        // `live_top.max(self.top)` workaround is now obsoleted by
2653        // v2.3's `finish_results` slot-clear + v2.5 P1B-2A
2654        // (Op::TailCall collapse slot-clear) + v2.5 P1B-2B
2655        // (pcall unwind slot-clear). PUC L->top discipline is now
2656        // mirrored at every frame-pop site.
2657        self.gc_top = live_top;
2658        // PUC stepmul: % of allocation rate. Higher = more GC work per
2659        // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2660        // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2661        const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2662        const MIN_BUDGET: usize = 64_000;
2663        let stepmul = self.gc_stepmul.max(1) as usize;
2664        let divisor = (SWEEP_BASE / stepmul).max(1);
2665        let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2666        if self.gc_step(budget) {
2667            self.heap.rearm_gc_pause(self.gc_pause);
2668        }
2669    }
2670
2671    /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2672    /// roots (open upvalues, which are not first-class Values). Shared by the
2673    /// full collector and the incremental-sweep driver so both snapshot the
2674    /// exact same live set.
2675    fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2676        let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2677        roots.push(Value::Table(self.globals));
2678        for mt in self.type_mt.into_iter().flatten() {
2679            roots.push(Value::Table(mt));
2680        }
2681        for &n in &self.mm_names {
2682            roots.push(Value::Str(n));
2683        }
2684        // root only the running thread's live registers (PUC marks [stack, top)):
2685        // freed temporaries above `gc_top` are excluded so weak values stranded
2686        // there are not pinned. Suspended threads (main_ctx, other coroutines)
2687        // stay whole-rooted below — safe over-rooting, and they are not the
2688        // thread whose weak-table loop is under test.
2689        let live = (self.gc_top as usize).min(self.stack.len());
2690        roots.extend_from_slice(&self.stack[..live]);
2691        for cf in &self.frames {
2692            match cf {
2693                CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2694                CallFrame::Cont(NativeCont {
2695                    kind: ContKind::Xpcall { handler },
2696                    ..
2697                }) => roots.push(*handler),
2698                CallFrame::Cont(NativeCont {
2699                    kind: ContKind::Close(cc),
2700                    ..
2701                }) => {
2702                    // Root the error threaded through this close chain so a
2703                    // `collectgarbage()` inside a sibling `__close` handler
2704                    // does not free it before the next handler is invoked
2705                    // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2706                    if let Some(e) = cc.pending {
2707                        roots.push(e);
2708                    }
2709                    if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2710                        roots.push(err);
2711                    }
2712                }
2713                CallFrame::Cont(_) => {}
2714            }
2715        }
2716        if let Some(e) = self.closing_err {
2717            roots.push(e);
2718        }
2719        // B12 host roots — Lua-facade handles keep their referenced
2720        // values alive across calls/yields. Trace the whole vector;
2721        // unused slots (post-`unpin_all`) carry Value::Nil which the
2722        // GC ignores.
2723        for slot in &self.host_roots {
2724            // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2725            roots.push(slot.value);
2726        }
2727        // v2.1 — `table.sort` and similar builtins stash their working
2728        // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2729        // comparator callback doesn't free strings/tables snapshotted
2730        // off the live table (sort.lua's `load(..)(); collectgarbage()`
2731        // compare regression).
2732        for buf in &self.sort_scratch {
2733            roots.extend_from_slice(buf);
2734        }
2735        // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2736        // mid-execution. Without rooting them here, a `collectgarbage()`
2737        // invoked inside the running native (sort.lua AA `load(..)();
2738        // collectgarbage()` compare callback regression) sweeps the
2739        // closure that's actively executing, leaving `nc.upvals`
2740        // dangling and the Rust local `nc` pointing at recycled memory
2741        // — the SIGSEGV pops on the very next field access or pop.
2742        for &nc in &self.running_natives {
2743            roots.push(Value::Native(nc));
2744        }
2745        // the running thread's debug hook (suspended threads root theirs via
2746        // Coro::trace / the main_ctx sweep below)
2747        if let Some(h) = self.hook.func {
2748            roots.push(h);
2749        }
2750        // the running coroutine (its saved-context fields live in the VM, but
2751        // the object itself + its resumer chain must stay reachable)
2752        if let Some(co) = self.current {
2753            roots.push(Value::Coro(co));
2754        }
2755        if let Some(mc) = self.main_coro {
2756            roots.push(Value::Coro(mc));
2757        }
2758        // debug.getregistry() and io library state
2759        if let Some(r) = self.registry {
2760            roots.push(Value::Table(r));
2761        }
2762        if let Some(mt) = self.file_mt {
2763            roots.push(Value::Table(mt));
2764        }
2765        if let Some(f) = self.io_input {
2766            roots.push(Value::Userdata(f));
2767        }
2768        if let Some(f) = self.io_output {
2769            roots.push(Value::Userdata(f));
2770        }
2771        // the main thread's saved context while a coroutine runs
2772        if let Some(m) = &self.main_ctx {
2773            roots.extend_from_slice(&m.stack);
2774            if let Some(h) = m.hook.func {
2775                roots.push(h);
2776            }
2777            for cf in &m.frames {
2778                match cf {
2779                    CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2780                    CallFrame::Cont(NativeCont {
2781                        kind: ContKind::Xpcall { handler },
2782                        ..
2783                    }) => roots.push(*handler),
2784                    CallFrame::Cont(_) => {}
2785                }
2786            }
2787        }
2788        let mut extra: Vec<*mut GcHeader> = self
2789            .open_upvals
2790            .iter()
2791            .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2792            .collect();
2793        if let Some(m) = &self.main_ctx {
2794            extra.extend(
2795                m.open_upvals
2796                    .iter()
2797                    .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2798            );
2799        }
2800        (roots, extra)
2801    }
2802
2803    /// Run a full collection with the VM's roots, then run any `__gc`
2804    /// finalizers the collection scheduled. A no-op (returns 0) when already
2805    /// inside a finalizer — the collector is not reentrant (PUC).
2806    pub fn collect_garbage(&mut self) -> usize {
2807        if self.gc_finalizing {
2808            return 0;
2809        }
2810        let (roots, extra) = self.gc_roots();
2811        let freed = self.heap.collect_ex(&roots, &extra);
2812        self.run_finalizers();
2813        freed
2814    }
2815
2816    /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2817    /// threw; gc.lua's "errors during collection" probe relies on it. This
2818    /// variant runs the same cycle but propagates the captured finalizer
2819    /// error to the explicit caller.
2820    pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2821        if self.gc_finalizing {
2822            return Ok(0);
2823        }
2824        let (roots, extra) = self.gc_roots();
2825        let freed = self.heap.collect_ex(&roots, &extra);
2826        self.run_finalizers_or_err()?;
2827        Ok(freed)
2828    }
2829
2830    /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2831    /// should report fail rather than collect).
2832    pub(crate) fn gc_is_finalizing(&self) -> bool {
2833        self.gc_finalizing
2834    }
2835
2836    /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2837    /// = true indicates more pieces follow (concatenated until the first
2838    /// `to_cont = false` call flushes the whole line). Mirrors
2839    /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2840    ///   * If the buffer is fresh, `to_cont` is false, and the message is
2841    ///     `@<word>`, treat as a control message — only `@on` / `@off` are
2842    ///     recognised; any other `@…` is silently ignored.
2843    ///   * Otherwise, while the state is `Off`, drop the piece; while `On`,
2844    ///     accumulate, and flush to stderr + `warn_log` on the
2845    ///     non-continuation call.
2846    pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2847        if self.warn_buf.is_empty()
2848            && !to_cont
2849            && let Some(b'@') = msg.first().copied()
2850        {
2851            match &msg[1..] {
2852                b"on" => self.warn_state = WarnState::On,
2853                b"off" => self.warn_state = WarnState::Off,
2854                _ => {} // unknown control — silently ignored (PUC checkcontrol)
2855            }
2856            return;
2857        }
2858        if self.warn_state == WarnState::Off {
2859            // drop continuation pieces too — PUC `warnfoff` is the trampoline
2860            return;
2861        }
2862        self.warn_buf.extend_from_slice(msg);
2863        if !to_cont {
2864            let line = std::mem::take(&mut self.warn_buf);
2865            eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2866            self.warn_log.push(line);
2867        }
2868    }
2869
2870    /// Drain the in-process warning log (one entry per emitted message, sans
2871    /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2872    /// assert on warn output without scraping stderr.
2873    pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2874        std::mem::take(&mut self.warn_log)
2875    }
2876
2877    /// Arm the cooperative instruction budget (P09 embedding). The run loop
2878    /// decrements this once per dispatch turn; on zero it raises a catchable
2879    /// `"instruction budget exceeded"` error and disarms itself so the host
2880    /// can resume with a fresh budget on the next call. `None` removes the
2881    /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2882    /// short-script semantics.
2883    pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2884        self.instr_budget = budget;
2885    }
2886
2887    /// Remaining instruction budget (None when unbounded).
2888    pub fn instr_budget_remaining(&self) -> Option<i64> {
2889        self.instr_budget
2890    }
2891
2892    /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2893    /// **must** disable JIT when relying on `instr_budget` — see the
2894    /// `jit_enabled` field doc for the rationale.
2895    pub fn set_jit_enabled(&mut self, enabled: bool) {
2896        self.jit.enabled = enabled;
2897    }
2898
2899    /// Current JIT enable state.
2900    pub fn jit_enabled(&self) -> bool {
2901        self.jit.enabled
2902    }
2903
2904    /// Toggle the trace JIT (P12). Off by default while the sprint
2905    /// develops. When enabled, hot back-edges are counted on
2906    /// `Proto.trace_hot_count`; once the counter passes
2907    /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2908    /// mode at the back-edge target. Stays a no-op until S2's
2909    /// trace lowerer and S3's dispatcher land.
2910    pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2911        self.jit.trace_enabled = enabled;
2912    }
2913
2914    /// P16-A — opt-in flag for the self-link cycle catch. See field
2915    /// docs for the correctness blocker. Default `false`.
2916    pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2917        self.jit.p16_self_link_enabled = enabled;
2918    }
2919
2920    /// Current state of the P16-A self-link cycle catch.
2921    pub fn p16_self_link_enabled(&self) -> bool {
2922        self.jit.p16_self_link_enabled
2923    }
2924
2925    /// Current trace-JIT enable state.
2926    pub fn trace_jit_enabled(&self) -> bool {
2927        self.jit.trace_enabled
2928    }
2929
2930    /// Number of traces that have closed cleanly (looped back to the
2931    /// head PC) since this Vm was constructed. Cumulative; used by
2932    /// tests + tuning. Will become the dominant signal once S2's
2933    /// compile + cache lands.
2934    pub fn trace_closed_count(&self) -> u64 {
2935        self.jit.counters.closed
2936    }
2937
2938    /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2939    /// hit an un-recordable op — the latter lands at S2).
2940    pub fn trace_aborted_count(&self) -> u64 {
2941        self.jit.counters.aborted
2942    }
2943
2944    /// P13-S13-G v2 — number of compiled traces whose close shape
2945    /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2946    /// pin `dispatchable=false` because the dispatcher can't
2947    /// resume at a depth>0 PC without the matching CallFrames.
2948    /// S4-step4b's frame-mat helper could synthesise those, but
2949    /// the InlineAbort emit path isn't wired up yet — fresh
2950    /// pickup work for S13-G v2-full.
2951    pub fn trace_inline_abort_count(&self) -> u64 {
2952        self.jit.counters.inline_abort
2953    }
2954
2955    /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2956    pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2957        &self.jit.counters.dispatch_off_reasons
2958    }
2959
2960    /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2961    pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2962        &self.jit.counters.compile_failed_reasons
2963    }
2964
2965    /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2966    /// `(is_call_triggered, ops_len)` for every trace that closed.
2967    pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2968        &self.jit.counters.closed_lens
2969    }
2970
2971    /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
2972    /// Per-reason close-cause counts (recorder-side abort/discard +
2973    /// lowerer-side dispatch_off labels) keyed by `&'static str`.
2974    pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
2975        &self.jit.counters.close_cause_counts
2976    }
2977
2978    /// v2.0 Track-R R3b — number of compiled traces whose
2979    /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
2980    /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
2981    /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
2982    /// hot loop with p16-on. R3b keeps `dispatchable = false` even
2983    /// when this count bumps; R3d will lift it.
2984    pub fn trace_downrec_link_compiled_count(&self) -> u64 {
2985        self.jit.counters.downrec_link_compiled
2986    }
2987
2988    /// v2.0 Track-R R3c — see
2989    /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
2990    /// of times the dispatcher's `is_downrec_sentinel` arm fired and
2991    /// classified the return as a caller-pc-guard HIT.
2992    pub fn trace_downrec_dispatched_count(&self) -> u64 {
2993        self.jit.counters.downrec_dispatched
2994    }
2995
2996    /// v2.0 Track-R R3c — see
2997    /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
2998    /// times the dispatcher entered a `downrec_link`-bearing trace and
2999    /// the trace returned via the lowerer's deopt block (caller-pc
3000    /// guard MISS), or the dispatcher itself force-deopted via the
3001    /// stitch-cycle checkpoint.
3002    pub fn trace_downrec_deopt_count(&self) -> u64 {
3003        self.jit.counters.downrec_deopt
3004    }
3005
3006    /// v2.0 Track-R R3d — see
3007    /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3008    /// Number of compiled traces whose lowerer emitted a multi-way
3009    /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3010    /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3011    pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3012        self.jit.counters.multi_way_guard_emitted
3013    }
3014
3015    /// P12-S2.C — number of closed traces the lowerer compiled and
3016    /// parked on `Proto.traces`. Re-records of the same head_pc are
3017    /// deduped (the second close finds the head_pc already cached
3018    /// and skips compile), so this never exceeds `trace_closed_count`.
3019    pub fn trace_compiled_count(&self) -> u64 {
3020        self.jit.counters.compiled
3021    }
3022
3023    /// v2.1 Phase 1I.B — number of times the recorder captured a
3024    /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3025    /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3026    /// by the Phase 1I.B opt-in fire test to verify the env gate
3027    /// wiring round-trips end-to-end (env -> recorder -> snapshot
3028    /// -> counter -> getter -> assertion).
3029    pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3030        self.jit.counters.field_ic_snapshot_captured
3031    }
3032
3033    /// P12-S2.C — number of closed traces the lowerer rejected
3034    /// (any of the bail conditions in
3035    /// `crate::jit::trace::try_compile_trace`).
3036    pub fn trace_compile_failed_count(&self) -> u64 {
3037        self.jit.counters.compile_failed
3038    }
3039
3040    /// P12-S3 — number of times the dispatcher jumped into a
3041    /// compiled trace. Bumps on every entry; `trace_deopt_count`
3042    /// counts the subset where the trace returned with a parked
3043    /// `jit_pending_err`.
3044    pub fn trace_dispatched_count(&self) -> u64 {
3045        self.jit.counters.dispatched
3046    }
3047
3048    /// P12-S3 — number of trace entries that came back with
3049    /// `jit_pending_err` set (typically a metatable shadowed an
3050    /// index inside a helper, forcing the dispatcher to fall back
3051    /// to the interpreter without committing the trace's result).
3052    pub fn trace_deopt_count(&self) -> u64 {
3053        self.jit.counters.deopt
3054    }
3055
3056    /// P15-A v1 — number of times the dispatcher started a side
3057    /// trace recording (an `exit_hit_counts` slot crossed
3058    /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3059    /// was None and trace JIT was enabled). Each unit is exactly one
3060    /// `start_side_trace` call; the actual compile success counts
3061    /// under [`Self::trace_compiled_count`] like any other trace.
3062    /// Probe use: distinguishes the "side-trace pipeline fired"
3063    /// signal from the "primary back-edge / call-trigger fired"
3064    /// signal so v0-v3 architectural progress is visible without
3065    /// reading per-counter histograms.
3066    pub fn trace_side_trace_started_count(&self) -> u64 {
3067        self.jit.counters.side_trace_started
3068    }
3069
3070    /// P15-A v2-A — number of side-trace recordings that closed,
3071    /// compiled successfully, AND patched their parent's
3072    /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3073    /// dispatch through these ptrs yet (v2-B/C job), but the
3074    /// counter + ptr write proves the compile + link pipeline is
3075    /// complete end-to-end.
3076    pub fn trace_side_trace_compiled_count(&self) -> u64 {
3077        self.jit.counters.side_trace_compiled
3078    }
3079
3080    /// P15-A v2-C-A5-C — number of side traces that compiled
3081    /// successfully but were SHEDDED by the close-handler shape-
3082    /// match gate (`exit_tags_match_entry_tags`). High ratios
3083    /// vs. `trace_side_trace_compiled_count` indicate the
3084    /// architecture is shedding lots of would-be side traces;
3085    /// useful as a tuning probe for future relaxation of the
3086    /// gate or for child-IR re-specialisation against parent's
3087    /// exit shape.
3088    pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3089        self.jit.counters.side_trace_shape_mismatch
3090    }
3091
3092    /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3093    /// classified as `crate::jit::trace::EscapeState::Sinkable`
3094    /// across every successfully compiled trace on this Vm. The
3095    /// count is post-demotion: sites pre-emit drops back to Escaped
3096    /// for not meeting v1 sunk-emit criteria are NOT counted.
3097    /// `trace_sunk_alloc_count` matches one-for-one today (every
3098    /// surviving Sinkable site goes through sunk emit).
3099    pub fn trace_sinkable_seen_count(&self) -> u64 {
3100        self.jit.counters.sinkable_seen
3101    }
3102
3103    /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3104    pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3105        self.jit.counters.accum_bufferable_seen
3106    }
3107
3108    /// P15-prep — total dispatch hits across all known traces,
3109    /// broken into hot-exit telemetry (max single-exit count,
3110    /// total dispatches, exit count). Used by probes to identify
3111    /// hot side-exits as side-trace candidates.
3112    ///
3113    /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3114    /// recursively, so inner functions' traces are reported.
3115    pub fn trace_exit_hit_summary(
3116        &self,
3117        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3118    ) -> Vec<(u32, Vec<u32>)> {
3119        fn walk(
3120            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3121            out: &mut Vec<(u32, Vec<u32>)>,
3122        ) {
3123            for ct in proto.traces.borrow().iter() {
3124                let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3125                out.push((ct.head_pc, counts));
3126            }
3127            for inner in proto.protos.iter() {
3128                walk(*inner, out);
3129            }
3130        }
3131        let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3132        walk(cl.proto, &mut out);
3133        out
3134    }
3135
3136    /// P15-A v0 — surface every side-exit slot whose hit count is
3137    /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3138    /// `cl.proto` (recursively walking `proto.protos`). Returned
3139    /// entries are side-trace candidates: each carries the parent
3140    /// trace's `(head_proto, head_pc)`, the exit's index in the
3141    /// parent's `exit_hit_counts`, and the side trace's natural
3142    /// entry shape (`cont_pc` + `exit_tags`).
3143    ///
3144    /// Layout of `exit_hit_counts` (mirrored by the iter):
3145    /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3146    ///   window-sized exit_tags).
3147    /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3148    ///   → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3149    /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3150    ///   exit_tags = `ct.exit_tags`).
3151    pub fn hot_exit_iter(
3152        &self,
3153        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3154    ) -> Vec<crate::jit::trace::HotExitInfo> {
3155        use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3156        fn walk(
3157            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3158            out: &mut Vec<HotExitInfo>,
3159        ) {
3160            for ct in proto.traces.borrow().iter() {
3161                let inline_n = ct.per_exit_inline.len();
3162                let tags_n = ct.per_exit_tags.len();
3163                debug_assert_eq!(
3164                    ct.exit_hit_counts.len(),
3165                    inline_n + tags_n + 1,
3166                    "exit_hit_counts layout invariant violated"
3167                );
3168                for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3169                    let hits = cell.get();
3170                    if hits < HOTEXIT_THRESHOLD {
3171                        continue;
3172                    }
3173                    let (cont_pc, exit_tags) = if idx < inline_n {
3174                        let ent = &ct.per_exit_inline[idx];
3175                        (ent.cont_pc, ent.exit_tags.clone())
3176                    } else if idx < inline_n + tags_n {
3177                        let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3178                        (*pc, tags.clone())
3179                    } else {
3180                        (ct.head_pc, ct.exit_tags.clone())
3181                    };
3182                    out.push(HotExitInfo {
3183                        head_proto: proto,
3184                        head_pc: ct.head_pc,
3185                        exit_idx: idx,
3186                        hits,
3187                        cont_pc,
3188                        exit_tags,
3189                    });
3190                }
3191            }
3192            for inner in proto.protos.iter() {
3193                walk(*inner, out);
3194            }
3195        }
3196        let mut out: Vec<HotExitInfo> = Vec::new();
3197        walk(cl.proto, &mut out);
3198        out
3199    }
3200
3201    /// P12-S5-B — sum of NewTable sites that actually took the
3202    /// sunk-emit path across every successfully compiled trace on
3203    /// this Vm. Each counted site skips its heap `Gc<Table>`
3204    /// allocation per dispatch; the array part lives as Cranelift
3205    /// `Variable`s for the duration of the trace.
3206    pub fn trace_sunk_alloc_count(&self) -> u64 {
3207        self.jit.counters.sunk_alloc
3208    }
3209
3210    /// P12-S5-C — sum of materialise-helper emit sites across every
3211    /// successfully compiled trace on this Vm. Each unit is a
3212    /// (site × cmp side-exit) pair whose IR reconstructs a heap
3213    /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3214    /// emit is wiring materialise into the right side-exits.
3215    pub fn trace_materialize_emit_count(&self) -> u64 {
3216        self.jit.counters.materialize_emit
3217    }
3218
3219    /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3220    /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3221    /// replaces a `Heap::new_closure_inline` call on the dispatch
3222    /// path; the count is static (one per matching op per compiled
3223    /// trace), summed at compile success.
3224    pub fn trace_closure_emit_count(&self) -> u64 {
3225        self.jit.counters.closure_emit
3226    }
3227
3228    /// v2.0 Stage 7 polish 6 fire experiment — see
3229    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3230    /// Number of compiled traces whose `per_exit_inline.len() > 0`
3231    /// (depth>0 inlined cmp side-exits emitted).
3232    pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3233        self.jit.counters.per_exit_inline_compiled
3234    }
3235
3236    /// v2.0 Stage 7 polish 6 fire experiment — see
3237    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3238    /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3239    /// `dispatchable == true` — i.e. the count of compiled traces
3240    /// that would actually exercise the AOT polish 6 chain-reloc +
3241    /// deploy-resolver path.
3242    pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3243        self.jit.counters.per_exit_inline_dispatchable
3244    }
3245
3246    /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3247    /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3248    /// whether a self-recursive function actually walked the depth
3249    /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3250    /// across traces and Vm activations; reset only on `Vm::new`.
3251    pub fn trace_max_depth_seen(&self) -> u8 {
3252        self.jit.max_depth_seen
3253    }
3254
3255    /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3256    /// dispatch time). The frame-materialization helper reads `.base`
3257    /// to compute offsets for each inlined frame's window.
3258    #[doc(hidden)]
3259    pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3260        match self.frames.last() {
3261            Some(CallFrame::Lua(f)) => Some(*f),
3262            _ => None,
3263        }
3264    }
3265
3266    /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3267    /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3268    /// by `luna-tools` (`luna-profile`'s sampler walks this from
3269    /// inside a `Count` hook). Sibling-module scope: not part of the
3270    /// public embedder surface, but `inspect::frames_for_profile` is.
3271    #[doc(hidden)]
3272    pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3273        &self.frames
3274    }
3275
3276    /// P12-S4-step4b — ensure the value stack covers indices
3277    /// `[0..need)`. Extends with Nil if shorter. Called by the
3278    /// frame-materialization helper before pushing an inlined frame
3279    /// whose register window may exceed the current stack length.
3280    #[doc(hidden)]
3281    pub fn jit_ensure_stack(&mut self, need: usize) {
3282        if self.stack.len() < need {
3283            self.stack.resize(need, Value::Nil);
3284        }
3285    }
3286
3287    /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3288    /// `__close` handlers would run (any active tbc slot ≥ from
3289    /// holding a non-nil/false Value); if so, parks a deopt sentinel
3290    /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3291    /// IR branches to the deopt block. Otherwise performs the safe
3292    /// part of close — `close_from(from)` to close open upvals +
3293    /// drop any drained tbc entries ≥ from — and returns 0.
3294    ///
3295    /// Returns are i64-shaped so the cranelift import sig stays
3296    /// trivial (i64 → i64 mapping).
3297    #[doc(hidden)]
3298    pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3299        if self.jit.pending_err.is_some() {
3300            return 1;
3301        }
3302        let Some(f) = self.jit_last_lua_frame() else {
3303            self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3304            return 1;
3305        };
3306        let from = f.base + start_offset;
3307        let has_handler = self.tbc.iter().any(|&s| {
3308            s >= from && {
3309                let v = self.stack[s as usize];
3310                !matches!(v, Value::Nil | Value::Bool(false))
3311            }
3312        });
3313        if has_handler {
3314            self.jit.pending_err =
3315                Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3316            return 1;
3317        }
3318        self.close_from(from);
3319        // Drain any tbc entries ≥ from (they're nil/false stubs the
3320        // interpreter's drive_close would have skipped silently).
3321        while let Some(&s) = self.tbc.last() {
3322            if s < from {
3323                break;
3324            }
3325            self.tbc.pop();
3326        }
3327        0
3328    }
3329
3330    /// P12-S7-B — spill the trace's current value for a register to
3331    /// the underlying `vm.stack[base + slot_offset]`. Required before
3332    /// an `Op::Closure` whose inner proto has an `in_stack: true`
3333    /// upval at `slot_offset` — the helper's `find_or_create_upval`
3334    /// captures a live pointer to `vm.stack[base + slot_offset]`,
3335    /// which must hold the right value at call time (trace IR's
3336    /// Variable hasn't yet been written back).
3337    ///
3338    /// Parameters arrive as i64 from the IR: `slot_offset` is the
3339    /// caller-frame register index (`u32` in practice, depth=0
3340    /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3341    /// `crate::runtime::value::raw` byte for the slot's RegKind;
3342    /// `raw_bits` is the trace Variable's `use_var` payload
3343    /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3344    /// raw `Gc::as_ptr` cast).
3345    #[doc(hidden)]
3346    pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3347        let Some(f) = self.jit_last_lua_frame() else {
3348            self.jit.pending_err =
3349                Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3350            return;
3351        };
3352        let idx = (f.base as usize) + (slot_offset as usize);
3353        if self.stack.len() <= idx {
3354            self.stack.resize(idx + 1, Value::Nil);
3355        }
3356        // SAFETY: caller (trace JIT IR emit) provides matching
3357        // `(tag, raw_bits)` — same shape produced by Value::unpack.
3358        let v = unsafe {
3359            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3360        };
3361        self.stack[idx] = v;
3362    }
3363
3364    /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3365    /// Mirrors the interp arm (this file ~L5316): copies the
3366    /// generator/state/control triple from `R[A..=A+2]` to
3367    /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3368    /// the iterator function via `begin_call`. v2 only handles
3369    /// `Value::Native` iterators (the canonical `ipairs_iter` /
3370    /// `next` builtins) — a Lua-closure iterator would push a Lua
3371    /// frame mid-trace, breaking `recording_frame_base`, so we
3372    /// deopt by parking a `pending_err` and returning `-1`.
3373    ///
3374    /// `slot_offset` is the caller-frame register index (=
3375    /// `inst.a()` decoded from a u32-wide field). `nvars` is
3376    /// `inst.c() as i32` — the caller's expected return count.
3377    /// P12-S12-C v1 — refresh only the raw payload of
3378    /// `vm.stack[base + slot_offset]`, preserving its existing
3379    /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3380    /// uses this when the slot's `RegKind` is `Unset` (no compile-
3381    /// time tag info; commonly `Str` slots which the trace doesn't
3382    /// model). The interp's previous execution of the same op
3383    /// already populated the slot with the right tag — the trace
3384    /// only needs to swap in its current raw value.
3385    #[doc(hidden)]
3386    pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3387        let Some(f) = self.jit_last_lua_frame() else {
3388            return;
3389        };
3390        let idx = (f.base as usize) + (slot_offset as usize);
3391        if idx >= self.stack.len() {
3392            return;
3393        }
3394        let (tag, _) = self.stack[idx].unpack();
3395        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3396        self.stack[idx] = unsafe {
3397            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3398        };
3399    }
3400
3401    /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3402    ///
3403    /// Mirrors the interp arm (this file ~L5112): `self.top =
3404    /// base + a + n; concat_run(base + a)`. Result lands at
3405    /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3406    /// deopt (any error from `concat_run` OR detection that the
3407    /// metamethod path was taken — `concat_run` returns `Ok(())`
3408    /// after `begin_meta_call` which has pushed a Lua frame the
3409    /// trace can't safely continue past).
3410    ///
3411    /// The frame-push detection uses `pre/post frames.len()` and
3412    /// unwinds any pushed frames before deopting, so the
3413    /// dispatcher's existing deopt path sees a clean stack.
3414    #[doc(hidden)]
3415    pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3416        if self.jit.pending_err.is_some() {
3417            return -1;
3418        }
3419        let Some(f) = self.jit_last_lua_frame() else {
3420            self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3421            return -1;
3422        };
3423        let abs_a = f.base + slot_offset;
3424        self.top = abs_a + n as u32;
3425        let pre_frames = self.frames.len();
3426        let result = self.concat_run(abs_a);
3427        let post_frames = self.frames.len();
3428        // Frame-push = metamethod path taken (begin_meta_call pushed
3429        // a Lua frame). The trace can't continue past it; unwind +
3430        // deopt so interp redoes Op::Concat in the slow path.
3431        while self.frames.len() > pre_frames {
3432            frames_pop_sync(&mut self.frames, &mut self.frames_top);
3433        }
3434        if let Err(e) = result {
3435            self.jit.pending_err = Some(e);
3436            return -1;
3437        }
3438        if post_frames > pre_frames {
3439            self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3440            return -1;
3441        }
3442        0
3443    }
3444
3445    /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3446    /// accumulator buffer pool, returning a raw pointer. The trace
3447    /// fn's IR holds this pointer in a stack slot through the loop
3448    /// and calls `jit_str_buf_extend` per iter. If the pool is
3449    /// empty, allocate fresh.
3450    ///
3451    /// Safety: the returned pointer is valid until
3452    /// `jit_str_buf_release` is called or the Vm is dropped. The
3453    /// caller MUST not retain it across `enter_jit` boundaries.
3454    #[doc(hidden)]
3455    pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3456        let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3457        // Move into a Box so the pointer is stable until release.
3458        Box::into_raw(Box::new(buf))
3459    }
3460
3461    /// P14-S14-B v2 — return a previously-acquired buffer to the
3462    /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3463    /// buffer is `clear`ed (capacity retained) so the next acquire
3464    /// gets a ready-to-extend Vec.
3465    ///
3466    /// Safety: `buf` must have been returned by a prior
3467    /// `jit_str_buf_acquire` on the same Vm.
3468    #[doc(hidden)]
3469    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3470    pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3471        if buf.is_null() {
3472            return;
3473        }
3474        // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3475        let mut owned = unsafe { Box::from_raw(buf) };
3476        owned.clear();
3477        if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3478            self.jit.str_buf_pool.push(*owned);
3479        }
3480        // Else: drop the buffer.
3481    }
3482
3483    /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3484    /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3485    /// the piece slot) and passes it through; we treat it as a
3486    /// `*mut LuaStr` and append its bytes.
3487    ///
3488    /// Returns 0 on success, -1 if the piece isn't a Str (would
3489    /// trip __concat metamethod path → deopt to interp).
3490    ///
3491    /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3492    /// trace's piece slot raw bits.
3493    #[doc(hidden)]
3494    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3495    pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3496        if buf.is_null() || str_ptr == 0 {
3497            return -1;
3498        }
3499        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3500        let buf = unsafe { &mut *buf };
3501        let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3502        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3503        let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3504        buf.extend_from_slice(bytes);
3505        0
3506    }
3507
3508    /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3509    /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3510    /// the trace to write into the accumulator slot.
3511    ///
3512    /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3513    /// (the v2 hard cap; the trace deopts).
3514    ///
3515    /// Safety: `buf` from prior `acquire`. The buffer is left
3516    /// CLEAR (drained) ready for `release`.
3517    #[doc(hidden)]
3518    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3519    pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3520        if buf.is_null() {
3521            return 0;
3522        }
3523        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3524        let buf = unsafe { &mut *buf };
3525        let bytes = std::mem::take(buf);
3526        // v2 hard cap at 256KB per RFC Q3.
3527        if bytes.len() > 256 * 1024 {
3528            return 0;
3529        }
3530        let gc = self.heap.intern(&bytes);
3531        gc.as_ptr() as i64
3532    }
3533
3534    /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3535    ///
3536    /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3537    /// v3: ipairs `inext` fast path at the top — skip begin_call
3538    ///     when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3539    ///     R[A+2]=Int.
3540    /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3541    ///     caller-provided buffers + return R[A+4]'s tag byte. Lets
3542    ///     emit skip 3 separate `luna_jit_stack_load` calls and 1
3543    ///     `luna_jit_stack_tag` call by reading the buffer via
3544    ///     cranelift `stack_load` IR instead. Returns -1 on deopt.
3545    #[doc(hidden)]
3546    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3547    pub fn jit_op_tforcall(
3548        &mut self,
3549        slot_offset: u32,
3550        nvars: i32,
3551        ctrl_out: *mut i64,
3552        key_out: *mut i64,
3553        val_out: *mut i64,
3554    ) -> i64 {
3555        if self.jit.pending_err.is_some() {
3556            return -1;
3557        }
3558        let Some(f) = self.jit_last_lua_frame() else {
3559            self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3560            return -1;
3561        };
3562        let abs = f.base + slot_offset;
3563        let need = (abs + 7) as usize;
3564        if self.stack.len() < need {
3565            self.stack.resize(need, Value::Nil);
3566        }
3567        // v3 fast path.
3568        let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3569            && std::ptr::fn_addr_eq(
3570                n.f,
3571                crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3572            )
3573            && let Value::Table(t) = self.stack[(abs + 1) as usize]
3574            && t.metatable().is_none()
3575            && let Value::Int(i) = self.stack[(abs + 2) as usize]
3576        {
3577            let next_i = i.wrapping_add(1);
3578            let v = t.get_int(next_i);
3579            if v.is_nil() {
3580                self.stack[(abs + 4) as usize] = Value::Nil;
3581            } else {
3582                self.stack[(abs + 4) as usize] = Value::Int(next_i);
3583                if (nvars as usize) >= 2 {
3584                    self.stack[(abs + 5) as usize] = v;
3585                }
3586                for j in 2..nvars as usize {
3587                    let slot = abs + 4 + j as u32;
3588                    if (slot as usize) < self.stack.len() {
3589                        self.stack[slot as usize] = Value::Nil;
3590                    }
3591                }
3592            }
3593            true
3594        } else {
3595            false
3596        };
3597        if !took_fast_path {
3598            // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3599            // route through begin_call. Lua-closure iters would push
3600            // a Lua frame mid-trace → deopt.
3601            self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3602            self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3603            self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3604            if !matches!(self.stack[abs as usize], Value::Native(_)) {
3605                self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3606                return -1;
3607            }
3608            if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3609                self.jit.pending_err = Some(e);
3610                return -1;
3611            }
3612        }
3613        // v4 batched writeback — fill the caller's buffers with the
3614        // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3615        // reload via cranelift `stack_load` instead of separate
3616        // `luna_jit_stack_load` helper calls.
3617        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3618        let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3619        let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3620        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3621        let key_raw = unsafe { key_rv.zero };
3622        let val_raw = if (nvars as usize) >= 2 {
3623            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3624            unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3625        } else {
3626            0u64
3627        };
3628        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3629        unsafe {
3630            ctrl_out.write(ctrl_raw as i64);
3631            key_out.write(key_raw as i64);
3632            val_out.write(val_raw as i64);
3633        }
3634        key_tag as i64
3635    }
3636
3637    /// P12-S12-B-v2 — load the raw `i64` payload of
3638    /// `vm.stack[base + slot_offset]` for the active trace's head
3639    /// Lua frame. Used to reload trace IR `Variable`s after a
3640    /// helper has written to `vm.stack` directly (e.g. TForCall's
3641    /// iter results land at `R[A+4..A+4+nvars]`).
3642    #[doc(hidden)]
3643    pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3644        let Some(f) = self.jit_last_lua_frame() else {
3645            return 0;
3646        };
3647        let idx = (f.base as usize) + (slot_offset as usize);
3648        if idx >= self.stack.len() {
3649            return 0;
3650        }
3651        let v = self.stack[idx];
3652        let (_, raw) = v.unpack();
3653        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3654        unsafe { raw.zero as i64 }
3655    }
3656
3657    /// P12-S12-B-v2 — read the tag byte of
3658    /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3659    /// to dispatch on the iterator's return-key tag at runtime
3660    /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3661    /// deopt for v2).
3662    #[doc(hidden)]
3663    pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3664        let Some(f) = self.jit_last_lua_frame() else {
3665            return crate::runtime::value::raw::NIL;
3666        };
3667        let idx = (f.base as usize) + (slot_offset as usize);
3668        if idx >= self.stack.len() {
3669            return crate::runtime::value::raw::NIL;
3670        }
3671        self.stack[idx].unpack().0
3672    }
3673
3674    /// P12-S4-step4b — push a Lua frame onto the call stack with
3675    /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3676    /// at trace side-exits to recreate the inlined call activations
3677    /// the lowerer compiled past. The contract (enforced by the
3678    /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3679    /// `nresults` is the caller's expected count (today always 1
3680    /// because the lowerer bails Op::Call C != 2), and the caller
3681    /// has already called `jit_ensure_stack` to cover
3682    /// `[0..base + cl.proto.max_stack)`.
3683    #[doc(hidden)]
3684    pub fn jit_push_inlined_frame(
3685        &mut self,
3686        cl: Gc<LuaClosure>,
3687        base: u32,
3688        pc: u32,
3689        nresults: i32,
3690    ) {
3691        frames_push_sync(
3692            &mut self.frames,
3693            &mut self.frames_top,
3694            CallFrame::Lua(Frame {
3695                closure: cl,
3696                base,
3697                pc,
3698                // Lua call ABI: callee R[0] sits at caller R[A+1], so
3699                // callee.base = caller.base + A + 1; func_slot is
3700                // caller.base + A = callee.base - 1.
3701                func_slot: base - 1,
3702                n_varargs: 0,
3703                nresults,
3704                hook_oldpc: u32::MAX,
3705                from_c: false,
3706                tm: None,
3707                is_hook: false,
3708                tailcalls: 0,
3709            }),
3710        );
3711    }
3712
3713    /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3714    /// should set to `false` so `load`/`loadstring` reject bytecode input
3715    /// (which bypasses parser limits and could exploit verifier gaps).
3716    pub fn set_bytecode_loading(&mut self, enabled: bool) {
3717        self.bytecode_loading = enabled;
3718    }
3719
3720    /// Current bytecode-loading gate state.
3721    pub fn bytecode_loading(&self) -> bool {
3722        self.bytecode_loading
3723    }
3724
3725    /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3726    /// bytecode is a strictly larger trust surface than luna's own dump
3727    /// format (third-party toolchain bugs, malformed chunks, unknown
3728    /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3729    /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3730    pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3731        self.puc_bytecode_loading = enabled;
3732    }
3733
3734    /// Current PUC bytecode-loading gate state.
3735    pub fn puc_bytecode_loading(&self) -> bool {
3736        self.puc_bytecode_loading
3737    }
3738
3739    /// Default loader input budget — 256 MiB.
3740    ///
3741    /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3742    /// sources whose byte length crosses this cap, returning the
3743    /// PUC-shaped `not enough memory` error rather than letting the
3744    /// host allocator try (and crash) to hold the next chunk.
3745    pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3746
3747    /// Set the loader input byte budget (see
3748    /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3749    /// effectively disable. Smaller caps are honored verbatim — a 0
3750    /// cap rejects every non-empty source.
3751    pub fn set_loader_input_budget(&mut self, bytes: usize) {
3752        self.loader_input_budget = bytes;
3753    }
3754
3755    /// Current loader input byte budget.
3756    pub fn loader_input_budget(&self) -> usize {
3757        self.loader_input_budget
3758    }
3759
3760    /// Take the error traceback captured at the latest error point and
3761    /// reset it. Embedders should call this immediately after a failed
3762    /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3763    /// entry clears it. Returns `None` if no error was in flight.
3764    pub fn take_error_traceback(&mut self) -> Option<String> {
3765        self.error_traceback
3766            .take()
3767            .map(|b| String::from_utf8_lossy(&b).into_owned())
3768    }
3769
3770    /// Arm the soft memory cap (P09 embedding). The run loop checks the
3771    /// heap's tracked byte usage between dispatch turns; on overshoot it
3772    /// first runs a full collect, and if `bytes` still exceeds the cap it
3773    /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3774    /// itself (fire-once: re-arm before the next `call_value` if reusing
3775    /// the Vm across requests). `None` removes the cap. The accounting is
3776    /// approximate — internal Vec/Box capacity overhead is not tracked,
3777    /// so embedders should size the cap with ~2× margin over the desired
3778    /// hard limit and additionally bound the Vm's lifetime (drop after
3779    /// each request).
3780    pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3781        self.heap.mem_cap = cap;
3782    }
3783
3784    /// Approximate bytes the heap is currently holding. Object shells plus
3785    /// every table's internal array/hash boxes (tracked via
3786    /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3787    /// bytecode and closure upvalue slices still go uncounted — this is a
3788    /// lower bound, not a precise `malloc_stats`-style total.
3789    pub fn memory_used(&self) -> usize {
3790        self.heap.bytes()
3791    }
3792
3793    /// Read upvalue slot `i` of the native function currently on top of the
3794    /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3795    /// when no native is running. Public so the C ABI trampoline can fetch
3796    /// the host C function pointer it stashed there at registration time.
3797    pub fn running_native_upvalue(&self, i: usize) -> Value {
3798        match self.running_natives.last() {
3799            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3800            Some(nc) => unsafe {
3801                let upvals = &(*nc.as_ptr()).upvals;
3802                upvals.get(i).copied().unwrap_or(Value::Nil)
3803            },
3804            None => Value::Nil,
3805        }
3806    }
3807
3808    /// Register a table for finalization if its (just-set) metatable carries a
3809    /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3810    /// `__gc` to the metatable afterwards does not retroactively register).
3811    pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3812        if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3813            self.heap.register_finalizable(t);
3814        }
3815    }
3816
3817    /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3818    /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3819    /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3820    /// metatable that `newproxy` returned, which then needs to flow through.
3821    /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3822    /// would re-check at metatable-set time); luna's only userdata
3823    /// finalizables today come via `newproxy`, which registers itself.
3824    #[allow(dead_code)]
3825    pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3826        if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3827            self.heap.register_finalizable_userdata(u);
3828        }
3829    }
3830
3831    /// Run pending `__gc` finalizers (objects the collector resurrected for
3832    /// finalization). Finalizer errors are swallowed — PUC turns them into a
3833    /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3834    fn run_finalizers(&mut self) {
3835        let _ = self.run_finalizers_or_err();
3836    }
3837
3838    fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3839        if self.gc_finalizing {
3840            return Ok(());
3841        }
3842        let pending = self.heap.take_tobefnz();
3843        if pending.is_empty() {
3844            return Ok(());
3845        }
3846        self.gc_finalizing = true;
3847        let mut first_err: Option<LuaError> = None;
3848        for obj in pending {
3849            let gc = self.get_mm(obj, Mm::Gc);
3850            // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3851            // schedule the object for finalization (`__gc = true` is the
3852            // canonical placeholder); only call it at finalize time when it
3853            // is actually a function. gc.lua 5.2 :412 wires up exactly this
3854            // sentinel and then expects no call.
3855            let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3856            if callable {
3857                // PUC `GCTM` sets `CIST_FIN` on the new ci so
3858                // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3859                // `name = "__gc"`. luna threads the same outcome through the
3860                // generic `pending_tm` slot: the Lua frame born from this
3861                // call consumes it in `push_frame`. Saved/restored around the
3862                // call in case the handler is a native (which never pops it).
3863                // Bare event name; `frame_name` / `c_frame_name` add the
3864                // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3865                // the convention used by `__close`, `__index`, …
3866                let saved_tm = self.pending_tm.replace("gc");
3867                // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3868                // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3869                // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3870                // luna mirrors by temporarily tagging the current top Lua
3871                // frame's `tm` to "__gc" for the duration of the call.
3872                let caller_tm_idx = self
3873                    .frames
3874                    .iter()
3875                    .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3876                let saved_caller_tm = caller_tm_idx.and_then(|i| {
3877                    if let CallFrame::Lua(fr) = &mut self.frames[i] {
3878                        let prev = fr.tm;
3879                        fr.tm = Some("gc");
3880                        Some(prev)
3881                    } else {
3882                        None
3883                    }
3884                });
3885                if let Err(e) = self.call_value(gc, &[obj]) {
3886                    // PUC 5.1 GCTM raised the finalizer's error to the
3887                    // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3888                    // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3889                    // wrapped it in `error in __gc metamethod (msg)` first
3890                    // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3891                    // introduced the warning system and switched to "warn
3892                    // then continue" — never re-raise, just route the
3893                    // wrapped message through `warn`. gc.lua 5.5 :378 wires
3894                    // up `_WARN` capture under the `if T then …` block to
3895                    // baseline on the same wrapped string.
3896                    if self.version >= LuaVersion::Lua54 {
3897                        let inner = self.error_text(&e);
3898                        let msg = format!("error in __gc metamethod ({inner})");
3899                        self.emit_warn(msg.as_bytes(), false);
3900                    } else if first_err.is_none() {
3901                        let wrapped = if self.version >= LuaVersion::Lua52 {
3902                            let inner = self.error_text(&e);
3903                            let msg = format!("error in __gc metamethod ({inner})");
3904                            let s = Value::Str(self.heap.intern(msg.as_bytes()));
3905                            LuaError(s)
3906                        } else {
3907                            e
3908                        };
3909                        first_err = Some(wrapped);
3910                    }
3911                }
3912                self.pending_tm = saved_tm;
3913                if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3914                    && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3915                {
3916                    fr.tm = prev; // prev is Option<&'static str>; restore exactly
3917                }
3918            }
3919        }
3920        self.gc_finalizing = false;
3921        match first_err {
3922            Some(e) => Err(e),
3923            None => Ok(()),
3924        }
3925    }
3926
3927    /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3928    /// Crosses up to three phases per call:
3929    ///   1. Pause      → seed Propagate (`gc_start_propagate`)
3930    ///   2. Propagate  → drain gray up to `budget`; on exhaustion run atomic
3931    ///                   (`gc_finish_atomic` → tobefnz populated; finalizers
3932    ///                   run via `run_finalizers`) and enter Sweep
3933    ///   3. Sweep      → `gc_sweep_step` up to (residual) `budget`
3934    /// Returns true when this call completed the cycle's sweep (back to
3935    /// Pause). The budget is spent generously across phases — a large `n`
3936    /// can finish a whole cycle in one call (PUC stop-the-world step).
3937    pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3938        // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3939        // that may hit a safe point and try to step again. Re-entry was OK
3940        // under STW (collect_garbage had its own guard) but here the
3941        // intermediate phase state would corrupt.
3942        if self.gc_finalizing {
3943            return false;
3944        }
3945        if self.heap.gc_phase_is_pause() {
3946            let (roots, extra) = self.gc_roots();
3947            self.heap.gc_start_propagate(&roots, &extra);
3948        }
3949        if self.heap.gc_phase_is_propagate() {
3950            if !self.heap.gc_step_propagate(budget) {
3951                return false;
3952            }
3953            self.heap.gc_finish_atomic();
3954            // any __gc scheduled by atomic — run before sweep so a finalizer
3955            // re-registering `self` re-enters the next cycle, not this sweep
3956            self.run_finalizers();
3957        }
3958        // either we just transitioned, or we entered already in Sweep, or
3959        // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3960        self.heap.gc_sweep_step(budget)
3961    }
3962
3963    // ---- frames & calls ----
3964
3965    /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3966    /// Returns true if a Lua frame was pushed (the dispatch loop continues
3967    /// there), false if a native completed inline.
3968    fn begin_call(
3969        &mut self,
3970        func_slot: u32,
3971        nargs: Option<u32>,
3972        nresults: i32,
3973        from_c: bool,
3974    ) -> Result<bool, LuaError> {
3975        let mut nargs = match nargs {
3976            Some(n) => n,
3977            None => self.top - (func_slot + 1),
3978        };
3979        // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3980        // only for the immediately-following Lua activation. Native dispatch
3981        // (or `__call` resolution) below must not let it leak to the next
3982        // begin_call's frame; restore it just before push_frame for the Lua
3983        // arm so its meaning is preserved across __call chaining.
3984        let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3985        // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3986        // is inserted before the value so it becomes the first argument, and a
3987        // chain of `__call` tables resolves down to a real function.
3988        let mut chain = 0u32;
3989        loop {
3990            match self.stack[func_slot as usize] {
3991                Value::Closure(cl) => {
3992                    // P11-S2c.B JIT fast path: if the Proto's body fits
3993                    // the int-arith whitelist, every arg is `Value::Int`,
3994                    // and the cached arity matches, skip frame setup and
3995                    // run the cached native fn in-place.
3996                    if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
3997                        self.pending_tailcalls = tailcalls;
3998                        return Ok(false);
3999                    }
4000                    self.pending_tailcalls = tailcalls;
4001                    self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4002                    // P12-S4-step0 — trace-on-call trigger. The frame
4003                    // we just pushed is the callee whose body the
4004                    // recorder will trace. Bump the per-Proto call
4005                    // counter; once it crosses `CALL_HOT_THRESHOLD`
4006                    // and no other trace is in flight, snapshot the
4007                    // callee's register window (R[0..max_stack]) and
4008                    // begin recording at `pc=0`. This is what unlocks
4009                    // tracing for functions whose body has no negative
4010                    // `Op::Jmp` back-edge (`fib`, recursive helpers).
4011                    //
4012                    // Gated on `trace_jit_enabled`, so the default
4013                    // dispatch pays a single not-taken branch.
4014                    if self.jit.trace_enabled {
4015                        let proto = cl.proto;
4016                        let c = proto.call_hot_count.get();
4017                        if c < u32::MAX / 2 {
4018                            proto.call_hot_count.set(c + 1);
4019                        }
4020                        // P13-S13-H — relaxed call-trigger:
4021                        // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4022                        // `!already_cached` short-circuit. Lets a
4023                        // discarded short call-trigger close retry
4024                        // on the next call (fib(10/15/20/25)
4025                        // pathology — first capture is base-case
4026                        // [Lt,Jmp,Return1]; coverage-heuristic
4027                        // discards; next call gets to record at a
4028                        // potentially deeper recursion point).
4029                        // Without `already_cached`, the relaxed
4030                        // condition would re-record over a cached
4031                        // trace every call.
4032                        //
4033                        // P13-S13-K — additionally short-circuit on
4034                        // `proto.trace_gave_up`. The S13-I discard
4035                        // cap force-compiles a partial trace and
4036                        // flips this flag; subsequent calls into
4037                        // this Proto skip the RefCell borrow + Vec
4038                        // scan entirely.
4039                        if proto.trace_gave_up.get() {
4040                            return Ok(true);
4041                        }
4042                        let call_already_cached =
4043                            proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4044                        if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4045                            && self.jit.active_trace.is_none()
4046                            && !call_already_cached
4047                        {
4048                            // The new frame is on top: index in
4049                            // `self.frames` is `len() - 1`.
4050                            let frame_idx = self.frames.len() - 1;
4051                            // Snapshot R[0..max_stack] at the callee's
4052                            // base. `push_frame` resized `self.stack`
4053                            // to `base + max_stack`, so this window is
4054                            // guaranteed in-bounds.
4055                            let f = match &self.frames[frame_idx] {
4056                                CallFrame::Lua(f) => f,
4057                                _ => unreachable!("push_frame just pushed a Lua frame"),
4058                            };
4059                            let max_stack = cl.proto.max_stack as usize;
4060                            let base_us = f.base as usize;
4061                            let mut entry_tags = Vec::with_capacity(max_stack);
4062                            for i in 0..max_stack {
4063                                let (tag, _) = self.stack[base_us + i].unpack();
4064                                entry_tags.push(tag);
4065                            }
4066                            self.jit.active_trace =
4067                                Some(Box::new(crate::jit::trace::TraceRecord::start(
4068                                    cl.proto, 0, entry_tags, true,
4069                                )));
4070                            self.jit.recording_frame_base = frame_idx;
4071                        }
4072                    }
4073                    return Ok(true);
4074                }
4075                Value::Native(nc) => {
4076                    // v1.1 B10 Stage 2 — async-marked NativeClosure.
4077                    // Route through the cooperative-yield mechanism
4078                    // when async_mode is on; reject when called from
4079                    // a sync `eval`/`call_value` path (would have no
4080                    // executor to drive the returned future).
4081                    if nc.is_async {
4082                        if !self.async_mode {
4083                            let s = Value::Str(
4084                                self.heap.intern(b"async native called in sync context"),
4085                            );
4086                            self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4087                            return Err(LuaError(s));
4088                        }
4089                        // Same root-up bookkeeping as the sync path:
4090                        // pin args + result-count expectation so a
4091                        // collection across the suspend boundary
4092                        // keeps the arg window live.
4093                        self.native_nresults = nresults;
4094                        self.gc_top = func_slot + nargs + 1;
4095                        // v1.3 Phase AS — fire the "call" hook BEFORE
4096                        // building the future. Mirrors the sync native
4097                        // path's `hook_call(true, nargs)` site
4098                        // (`exec.rs` further down) so embedders with a
4099                        // Rust debug hook installed see a Call event
4100                        // for async natives identical to the sync
4101                        // path. The matching "return" hook fires from
4102                        // `commit_async_native_result` in
4103                        // `async_drive.rs` after the future resolves.
4104                        // Placement follows audit §"Open questions"
4105                        // Q6: after the `native_nresults` / `gc_top`
4106                        // pin, before the future is constructed, so a
4107                        // hook body that triggers GC observes the
4108                        // correct pinned window. On hook error the
4109                        // sentinel never returns and
4110                        // `pending_async_native_*` remain `None` —
4111                        // the executor sees `DispatchOutcome::Error`
4112                        // (audit §A.1 edge cases).
4113                        self.hook_call(true, nargs)?;
4114                        // Transmute the stored NativeFn back to its
4115                        // real AsyncNativeFn shape. Sound because
4116                        // `set_async_native` / `create_async_native`
4117                        // installed an AsyncNativeFn through the
4118                        // identically-sized fn-pointer slot, and the
4119                        // `is_async` marker bit is what records that
4120                        // fact.
4121                        let async_fn: crate::vm::async_drive::AsyncNativeFn =
4122                            // SAFETY: same-size fn pointers; provenance
4123                            // preserved through `mem::transmute`. The
4124                            // `is_async` marker is the only safe-to-call
4125                            // gate, set exclusively by
4126                            // `Vm::create_async_native`.
4127                            unsafe { std::mem::transmute(nc.f) };
4128                        let vm_ptr: *mut Vm = self;
4129                        let fut = async_fn(vm_ptr, func_slot, nargs);
4130                        // Stash the future + post-call context for
4131                        // `drive_one` to surface to `EvalFuture::poll`.
4132                        self.pending_async_native_fut = Some(fut);
4133                        self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4134                            func_slot,
4135                            nargs,
4136                            nresults,
4137                            gc_top: self.gc_top,
4138                        });
4139                        // Sentinel Err walked up to `drive_one` (same
4140                        // shape as `host_yield_pending`'s budget yield).
4141                        // Value::Nil — never seen by user code.
4142                        return Err(LuaError(Value::Nil));
4143                    }
4144                    // pcall/xpcall are yieldable: rather than calling the
4145                    // protected function through the Rust stack (which cannot be
4146                    // suspended), push a continuation frame and drive the call
4147                    // through the interpreter loop (PUC lua_pcallk). A yield
4148                    // inside it is preserved with the thread's saved frames.
4149                    use crate::runtime::value::NativeFn;
4150                    if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4151                        return self.begin_pcall(func_slot, nargs, nresults);
4152                    }
4153                    if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4154                        return self.begin_xpcall(func_slot, nargs, nresults);
4155                    }
4156                    // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4157                    // luaB_pairs); without one, fall through to the plain native.
4158                    if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4159                        let arg = self.stack[(func_slot + 1) as usize];
4160                        if !self.get_mm(arg, Mm::Pairs).is_nil() {
4161                            return self.begin_pairs(func_slot, nresults);
4162                        }
4163                    }
4164                    // a native that collects (e.g. `collectgarbage`) roots up to
4165                    // its own arguments — the caller's live registers all sit
4166                    // below `func_slot` and stay rooted.
4167                    self.native_nresults = nresults;
4168                    self.gc_top = func_slot + nargs + 1;
4169                    // Push the native onto the running-natives chain BEFORE
4170                    // firing the call hook so that `debug.getinfo(level)` and
4171                    // `arg_error` from inside the hook see this native as the
4172                    // currently-running C function (db.lua :344 reads
4173                    // `getinfo(2, "f").func` for the just-entered callee).
4174                    // Popped after the matching return hook fires — even on
4175                    // error, the pop must happen, so the body is bracketed
4176                    // through a scope guard.
4177                    self.running_natives.push(nc);
4178                    self.running_native_slots.push((func_slot, nargs));
4179                    // PUC luaD_precall fires the "call" hook for C functions too.
4180                    // A yield inside the native (coroutine.yield) propagates an
4181                    // Err and the matching "return" hook fires on resume instead.
4182                    if let Err(e) = self.hook_call(true, nargs) {
4183                        self.running_natives.pop();
4184                        self.running_native_slots.pop();
4185                        return Err(e);
4186                    }
4187                    // P09: trap a Rust panic in the native and surface it as
4188                    // a Lua error rather than letting it unwind through the
4189                    // VM into the embedder. The VM's internal state may still
4190                    // be inconsistent after a panic (half-pushed args,
4191                    // dangling GC references), so embedders that catch this
4192                    // class of error should drop and re-create the Vm — but
4193                    // it's still better than tearing the host process down.
4194                    // `AssertUnwindSafe` is sound because the caller is the
4195                    // dispatch loop and any half-done state is fenced behind
4196                    // the immediate Err return below.
4197                    use std::panic::{AssertUnwindSafe, catch_unwind};
4198                    let result =
4199                        match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4200                            Ok(r) => r,
4201                            Err(payload) => {
4202                                let msg = panic_payload_str(&payload);
4203                                let s = Value::Str(
4204                                    self.heap.intern(format!("native panic: {msg}").as_bytes()),
4205                                );
4206                                Err(LuaError(s))
4207                            }
4208                        };
4209                    let nret = match result {
4210                        Ok(n) => n,
4211                        Err(e) => {
4212                            // Stash the offending native's name BEFORE the
4213                            // pop so a dying coroutine's traceback snapshot
4214                            // can prepend `[C]: in function '<name>'`. Use
4215                            // pushglobalfuncname (PUC walks package.loaded
4216                            // to qualify); fall back to "?".
4217                            self.errored_native =
4218                                Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4219                            self.running_natives.pop();
4220                            self.running_native_slots.pop();
4221                            return Err(e);
4222                        }
4223                    };
4224                    // PUC `luaD_poscall` fires the return hook BEFORE moving
4225                    // results into the function's slot — at that point args
4226                    // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4227                    // results above them at `[func_slot + 1 + nargs, …)`.
4228                    // luna's `nat_return` has already written the results
4229                    // into `[func_slot, func_slot + nret)`, so we replay PUC's
4230                    // layout by copying the results up past the preserved
4231                    // args, firing the hook (with ftransfer = nargs + 1, so
4232                    // `getlocal(2, ftransfer..)` reads results), and then
4233                    // copying back for `finish_results`. db.lua :541 reads
4234                    // `getinfo("r").ftransfer` + `getlocal` to inspect a
4235                    // returning native's results this way.
4236                    if self.hook.ret
4237                        && !self.in_hook
4238                        && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4239                    {
4240                        let res_dst = func_slot + nargs + 1;
4241                        let need = (res_dst + nret) as usize;
4242                        if self.stack.len() < need {
4243                            self.stack.resize(need, Value::Nil);
4244                        }
4245                        for i in (0..nret).rev() {
4246                            self.stack[(res_dst + i) as usize] =
4247                                self.stack[(func_slot + i) as usize];
4248                        }
4249                        // widen the C-frame's argument window for getlocal
4250                        if let Some(slot) = self.running_native_slots.last_mut() {
4251                            slot.1 = nargs + nret;
4252                        }
4253                        let hr = self.hook_return(true, nargs + 1, nret);
4254                        if let Some(slot) = self.running_native_slots.last_mut() {
4255                            slot.1 = nargs;
4256                        }
4257                        // restore results into the slot finish_results expects
4258                        for i in 0..nret {
4259                            self.stack[(func_slot + i) as usize] =
4260                                self.stack[(res_dst + i) as usize];
4261                        }
4262                        self.running_natives.pop();
4263                        self.running_native_slots.pop();
4264                        hr?;
4265                    } else {
4266                        self.running_natives.pop();
4267                        self.running_native_slots.pop();
4268                    }
4269                    self.finish_results(func_slot, nret, nresults);
4270                    // the native may have allocated; collect with the results as
4271                    // the live boundary (PUC checks GC after a call returns).
4272                    self.maybe_collect_garbage(self.top);
4273                    return Ok(false);
4274                }
4275                v => {
4276                    let mm = self.get_mm(v, Mm::Call);
4277                    if mm.is_nil() {
4278                        return Err(self.call_err(v));
4279                    }
4280                    chain += 1;
4281                    // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4282                    // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4283                    // and the 5.5 test exercises the new tight bound directly
4284                    // (calls.lua :225 builds a 16-deep chain and expects the
4285                    // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4286                    // deep chain and expects it to succeed.
4287                    let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4288                        15
4289                    } else {
4290                        MAX_CCMT
4291                    };
4292                    if chain > cap {
4293                        return Err(self.rt_err("'__call' chain too long"));
4294                    }
4295                    // slots above shift by one; at a call site those are dead
4296                    // temps of the current frame
4297                    self.stack.insert(func_slot as usize, mm);
4298                    if self.top > func_slot {
4299                        self.top += 1;
4300                    }
4301                    nargs += 1;
4302                }
4303            }
4304        }
4305    }
4306
4307    fn push_frame(
4308        &mut self,
4309        cl: Gc<LuaClosure>,
4310        func_slot: u32,
4311        nargs: u32,
4312        nresults: i32,
4313        from_c: bool,
4314    ) -> Result<(), LuaError> {
4315        if func_slot + 256 > MAX_LUA_STACK {
4316            // PUC `stackerror`: a stack overflow that surfaces while the
4317            // current activation is inside an xpcall message handler is
4318            // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4319            // error handling". errors.lua :606 expects the inner pcall(loop)
4320            // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4321            // message matching "error handling".
4322            let msg = if self.msgh_depth > 0 {
4323                "error in error handling"
4324            } else {
4325                "stack overflow"
4326            };
4327            return Err(self.rt_err(msg));
4328        }
4329        let proto = cl.proto;
4330        let nparams = proto.num_params as u32;
4331        // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4332        // stack just below the new `base`, so a named vararg can be indexed
4333        // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4334        // `[e1..em][p1..pn]` so the fixed params land at the new base.
4335        let n_varargs = if proto.is_vararg {
4336            nargs.saturating_sub(nparams)
4337        } else {
4338            0
4339        };
4340        if n_varargs > 0 {
4341            let s = (func_slot + 1) as usize;
4342            self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4343        }
4344        let base = func_slot + 1 + n_varargs;
4345        let need = (base + proto.max_stack as u32) as usize;
4346        if self.stack.len() < need {
4347            self.stack.resize(need, Value::Nil);
4348        }
4349        // wipe the register window beyond the kept parameters (stale values —
4350        // required for GC-safety and codegen). The varargs below `base` survive.
4351        let kept = nargs.saturating_sub(n_varargs).min(nparams);
4352        // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4353        // need` since `base + nparams <= base + max_stack = need` and `kept <=
4354        // nparams`. `slice::fill` lowers to a single memset on Copy types.
4355        unsafe {
4356            self.stack
4357                .get_unchecked_mut((base + kept) as usize..need)
4358                .fill(Value::Nil);
4359        }
4360        frames_push_sync(
4361            &mut self.frames,
4362            &mut self.frames_top,
4363            CallFrame::Lua(Frame {
4364                closure: cl,
4365                base,
4366                pc: 0,
4367                func_slot,
4368                nresults,
4369                hook_oldpc: u32::MAX,
4370                from_c,
4371                n_varargs,
4372                // single-shot consume: `close_slots` sets pending_tm before each
4373                // handler call; the next Lua frame born is that handler's.
4374                tm: self.pending_tm.take(),
4375                // `run_hook` sets `pending_is_hook` before dispatching the user
4376                // hook so its frame reports `namewhat = "hook"` via getinfo.
4377                is_hook: std::mem::take(&mut self.pending_is_hook),
4378                tailcalls: std::mem::take(&mut self.pending_tailcalls),
4379            }),
4380        );
4381        // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4382        // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4383        // the slot at `base + nparams`; the extras sit just below `base` from
4384        // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4385        // hook; vararg.lua's contradictory expectations were already going to
4386        // fail either way (some asserts want `arg == nil`).
4387        if proto.has_compat_vararg_arg {
4388            let arg_slot = (base + nparams) as usize;
4389            let t = self.heap.new_table();
4390            {
4391                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4392                let tm = unsafe { t.as_mut() };
4393                for i in 0..n_varargs {
4394                    let v = self.stack[(base - n_varargs + i) as usize];
4395                    // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4396                    // below `MAX_ASIZE`
4397                    let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4398                }
4399                let nk = Value::Str(self.heap.intern(b"n"));
4400                tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4401                    .expect("'n' key");
4402            }
4403            // once-per-table barrier mirrors SETLIST: t is born BLACK during
4404            // Propagate and the bulk `set_int`/`set` calls above don't barrier
4405            self.heap
4406                .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4407            self.stack[arg_slot] = Value::Table(t);
4408        }
4409        // PUC luaD_precall fires the "call" hook with the new frame current, so
4410        // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4411        // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4412        // fixed params count — extras already live below `base`).
4413        // A frame born via OP_TailCall fires "tail call" instead (PUC
4414        // luaD_pretailcall) and skips the matching "return" hook on exit.
4415        let is_tail = self
4416            .frames
4417            .last()
4418            .and_then(|f| f.lua())
4419            .is_some_and(|f| f.tailcalls > 0);
4420        self.hook_call_with(false, nparams, is_tail)?;
4421        Ok(())
4422    }
4423
4424    /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4425    /// the protected call `f` through the interpreter loop. The protected
4426    /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4427    /// at `func_slot+1` lets its results land one slot above the continuation —
4428    /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4429    /// Always returns `Ok(true)`: a continuation is now on the stack to be
4430    /// resolved by the loop (even when `f` is a native that already ran inline).
4431    fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4432        if nargs == 0 {
4433            return Err(crate::vm::builtins::raise_str(
4434                self,
4435                "bad argument #1 to 'pcall' (value expected)",
4436            ));
4437        }
4438        if self.pcall_depth >= MAX_C_DEPTH {
4439            return Err(self.rt_err("C stack overflow"));
4440        }
4441        self.pcall_depth += 1;
4442        frames_push_sync(
4443            &mut self.frames,
4444            &mut self.frames_top,
4445            CallFrame::Cont(NativeCont {
4446                kind: ContKind::Pcall,
4447                func_slot,
4448                nresults,
4449            }),
4450        );
4451        // call f (slot func_slot+1) with the remaining args, asking for all
4452        // results; a yield or error inside propagates with the continuation kept
4453        // on the stack (caught by `unwind` / preserved across a yield).
4454        self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4455        Ok(true)
4456    }
4457
4458    /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4459    /// message handler is stashed in the continuation and the arguments are
4460    /// shifted down over the handler's slot so `f`'s args are contiguous.
4461    fn begin_xpcall(
4462        &mut self,
4463        func_slot: u32,
4464        nargs: u32,
4465        nresults: i32,
4466    ) -> Result<bool, LuaError> {
4467        if nargs < 2 {
4468            return Err(crate::vm::builtins::raise_str(
4469                self,
4470                "bad argument #2 to 'xpcall' (value expected)",
4471            ));
4472        }
4473        if self.pcall_depth >= MAX_C_DEPTH {
4474            return Err(self.rt_err("C stack overflow"));
4475        }
4476        self.pcall_depth += 1;
4477        // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4478        // close its gap so f's args become [f@+1, a1@+2, ...].
4479        let handler = self.stack[(func_slot + 2) as usize];
4480        let nfargs = nargs - 2;
4481        for i in 0..nfargs {
4482            self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4483        }
4484        self.top = func_slot + 2 + nfargs;
4485        frames_push_sync(
4486            &mut self.frames,
4487            &mut self.frames_top,
4488            CallFrame::Cont(NativeCont {
4489                kind: ContKind::Xpcall { handler },
4490                func_slot,
4491                nresults,
4492            }),
4493        );
4494        self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4495        Ok(true)
4496    }
4497
4498    /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4499    /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4500    /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4501    /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4502    /// results land exactly where `pairs`'s results belong.
4503    fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4504        let arg = self.stack[(func_slot + 1) as usize];
4505        let mm = self.get_mm(arg, Mm::Pairs);
4506        // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4507        self.stack[func_slot as usize] = mm;
4508        self.top = func_slot + 2;
4509        frames_push_sync(
4510            &mut self.frames,
4511            &mut self.frames_top,
4512            CallFrame::Cont(NativeCont {
4513                kind: ContKind::Pairs,
4514                func_slot,
4515                nresults,
4516            }),
4517        );
4518        self.begin_call(func_slot, Some(1), 4, true)?;
4519        Ok(true)
4520    }
4521
4522    /// The running (top) Lua frame. The interpreter only reads this while a Lua
4523    /// frame is on top — a continuation frame is never the running frame (it is
4524    /// consumed the instant the call it protects unwinds onto it).
4525    #[inline]
4526    fn top_frame(&self) -> &Frame {
4527        self.frames
4528            .last()
4529            .and_then(CallFrame::lua)
4530            .expect("running Lua frame")
4531    }
4532
4533    #[inline]
4534    fn top_frame_mut(&mut self) -> &mut Frame {
4535        self.frames
4536            .last_mut()
4537            .and_then(CallFrame::lua_mut)
4538            .expect("running Lua frame")
4539    }
4540
4541    /// Pad/announce results sitting at func_slot.
4542    pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4543        // v2.3 P1B-A: capture the call's high-water-mark before
4544        // setting the new top so we can Nil-clear slots that the
4545        // call temporarily wrote but no longer holds — matching
4546        // PUC's `L->top` discipline (slots past L->top are "free"
4547        // and the next push overwrites them). Without this clear,
4548        // a stale `Value::Closure` (e.g. the called function
4549        // itself, when wanted = 0) sits at `func_slot` and a
4550        // later GC with wider `gc_top` traces it after the
4551        // closure has been freed by a previous narrow safe-point
4552        // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4553        // sort.lua AA case).
4554        let prev_top = self.top as usize;
4555        if wanted < 0 {
4556            self.top = func_slot + nret;
4557        } else {
4558            let wanted = wanted as u32;
4559            let need = (func_slot + wanted) as usize;
4560            if self.stack.len() < need {
4561                self.stack.resize(need, Value::Nil);
4562            }
4563            for i in nret..wanted {
4564                self.stack[(func_slot + i) as usize] = Value::Nil;
4565            }
4566            self.top = func_slot + wanted;
4567        }
4568        let new_top = self.top as usize;
4569        let clear_end = prev_top.min(self.stack.len());
4570        if new_top < clear_end {
4571            for slot in &mut self.stack[new_top..clear_end] {
4572                *slot = Value::Nil;
4573            }
4574        }
4575    }
4576
4577    /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4578    /// Used by `EvalFuture` on the bootstrap poll to compute the
4579    /// `entry_depth` it will pass to subsequent resume slices.
4580    pub(crate) fn frame_count(&self) -> usize {
4581        self.frames.len()
4582    }
4583
4584    fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4585        let nret = self.top - func_slot;
4586        let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4587        self.stack.truncate(func_slot as usize);
4588        self.top = func_slot;
4589        out
4590    }
4591
4592    // ---- open upvalues ----
4593
4594    #[doc(hidden)]
4595    pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4596        match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4597            Ok(i) => self.open_upvals[i].1,
4598            Err(i) => {
4599                let uv = self.heap.new_upvalue(UpvalState::Open {
4600                    slot,
4601                    thread: self.current,
4602                });
4603                self.open_upvals.insert(i, (slot, uv));
4604                uv
4605            }
4606        }
4607    }
4608
4609    pub(crate) fn close_from(&mut self, slot: u32) {
4610        while let Some(&(s, uv)) = self.open_upvals.last() {
4611            if s < slot {
4612                break;
4613            }
4614            let v = self.stack[s as usize];
4615            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4616            unsafe { uv.as_mut() }.set_closed(v);
4617            self.heap
4618                .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4619            self.open_upvals.pop();
4620        }
4621    }
4622
4623    /// Register a to-be-closed slot (TBC op / generic-for closing value).
4624    fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4625        let v = self.stack[slot as usize];
4626        if matches!(v, Value::Nil | Value::Bool(false)) {
4627            return Ok(()); // nil and false are silently ignored
4628        }
4629        if self.get_mm(v, Mm::Close).is_nil() {
4630            // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4631            // (a <type> value)"; the local's name comes from the running
4632            // frame's locvars at this pc.
4633            let tn = v.type_name();
4634            let f = self.top_frame();
4635            let reg = slot - f.base;
4636            let pc = (f.pc as usize).saturating_sub(1);
4637            let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4638                Some(n) => format!("variable '{n}'"),
4639                None => "to-be-closed slot".to_string(),
4640            };
4641            return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4642        }
4643        debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4644        self.tbc.push(slot);
4645        Ok(())
4646    }
4647
4648    /// Close upvalues and run `__close` handlers for slots ≥ `from`
4649    /// (handlers in reverse registration order; PUC luaF_close).
4650    fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4651        self.close_from(from);
4652        // PUC: handlers run in reverse declaration order; an error raised by a
4653        // handler becomes the error object passed to the remaining ones, and
4654        // the rest are still closed. The last raised error propagates.
4655        let mut pending = err;
4656        let mut result = Ok(());
4657        let saved_err = self.closing_err;
4658        // On a normal close the handler runs within the closing function's
4659        // activation (debug parent = that function); during error unwinding the
4660        // function's frame is already gone, so the handler sits at the C
4661        // boundary instead (PUC: luaF_close runs after the ci is restored).
4662        let error_close = err.is_some();
4663        while let Some(&s) = self.tbc.last() {
4664            if s < from {
4665                break;
4666            }
4667            self.tbc.pop();
4668            let v = self.stack[s as usize];
4669            if matches!(v, Value::Nil | Value::Bool(false)) {
4670                continue;
4671            }
4672            let mm = self.get_mm(v, Mm::Close);
4673            if mm.is_nil() {
4674                // PUC `prepclosingmethod`: the __close metamethod was present
4675                // at OP_TBC (else we would have errored there) but has since
4676                // been removed/replaced. Treat as a non-callable target.
4677                let tn = self.obj_typename(v);
4678                let e = self.rt_err(&format!(
4679                    "attempt to call a {tn} value (metamethod 'close')"
4680                ));
4681                pending = Some(e.0);
4682                result = Err(e);
4683                continue;
4684            }
4685            // root the pending error: a handler may trigger a collection
4686            self.closing_err = pending;
4687            // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4688            // getinfo report the handler as "in metamethod 'close'". Saved/
4689            // restored around the call to cover the path where `mm` is a
4690            // native (`push_frame` never consumes it) or it raises before
4691            // reaching push_frame.
4692            let saved_tm = self.pending_tm.replace("close");
4693            // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4694            // is nil on a normal close (5.4 locals.lua :875's
4695            // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4696            // through the yield). PUC 5.5 dropped the trailing nil: a clean
4697            // close passes only `obj`, the error case still passes both
4698            // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4699            // normal-close arms, n=2 for the error arm).
4700            let call = match pending {
4701                Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4702                None => {
4703                    if self.version >= LuaVersion::Lua55 {
4704                        self.call_value_impl(mm, &[v], error_close)
4705                    } else {
4706                        self.call_value_impl(mm, &[v, Value::Nil], error_close)
4707                    }
4708                }
4709            };
4710            self.pending_tm = saved_tm;
4711            if let Err(e) = call {
4712                pending = Some(e.0);
4713                result = Err(e);
4714            }
4715        }
4716        self.closing_err = saved_err;
4717        result
4718    }
4719
4720    /// Yieldable variant of `close_slots`: drive the chain of `__close`
4721    /// handlers for slots ≥ `from` through the interpreter loop with a
4722    /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4723    /// suspends cleanly (the close iteration's state rides on the thread's
4724    /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4725    /// applied to `luaF_close`. `after` runs when every slot is closed; if
4726    /// `after` is `Return` and we've returned past `entry_depth`,
4727    /// `Ok(Some(vals))` carries the result up to the host caller.
4728    fn begin_close(
4729        &mut self,
4730        from: u32,
4731        err: Option<Value>,
4732        after: AfterClose,
4733        entry_depth: usize,
4734    ) -> Result<Option<Vec<Value>>, LuaError> {
4735        self.close_from(from);
4736        self.drive_close(from, err, after, entry_depth)
4737    }
4738
4739    /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4740    /// non-callable-mm error for an `__close` that was reset to a bad value
4741    /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4742    /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4743    /// the interpreter then drives the handler and re-enters this driver via
4744    /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4745    /// the threaded error (if any) propagates or `after` fires.
4746    fn drive_close(
4747        &mut self,
4748        from: u32,
4749        mut pending: Option<Value>,
4750        after: AfterClose,
4751        entry_depth: usize,
4752    ) -> Result<Option<Vec<Value>>, LuaError> {
4753        loop {
4754            let drained = match self.tbc.last() {
4755                None => true,
4756                Some(&s) => s < from,
4757            };
4758            if drained {
4759                return self.finish_close_after(after, pending, entry_depth);
4760            }
4761            let s = self.tbc.pop().expect("tbc non-empty");
4762            let v = self.stack[s as usize];
4763            if matches!(v, Value::Nil | Value::Bool(false)) {
4764                continue;
4765            }
4766            let mm = self.get_mm(v, Mm::Close);
4767            if mm.is_nil() {
4768                let tn = self.obj_typename(v);
4769                let e = self.rt_err(&format!(
4770                    "attempt to call a {tn} value (metamethod 'close')"
4771                ));
4772                pending = Some(e.0);
4773                continue;
4774            }
4775            // A real handler: stage [mm, v, (err?)] above the current top,
4776            // record the close iteration state in a Cont::Close, and let the
4777            // interpreter dispatch the handler. On return the run() head
4778            // re-enters this driver via the Cont::Close consumer.
4779            let func_slot = self.top;
4780            let error_close = pending.is_some();
4781            let need = (func_slot + 3) as usize;
4782            if self.stack.len() < need {
4783                self.stack.resize(need, Value::Nil);
4784            }
4785            self.stack[func_slot as usize] = mm;
4786            self.stack[func_slot as usize + 1] = v;
4787            // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4788            // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4789            let nargs = match pending {
4790                Some(e) => {
4791                    self.stack[func_slot as usize + 2] = e;
4792                    2u32
4793                }
4794                None => {
4795                    if self.version >= LuaVersion::Lua55 {
4796                        1u32
4797                    } else {
4798                        self.stack[func_slot as usize + 2] = Value::Nil;
4799                        2u32
4800                    }
4801                }
4802            };
4803            self.top = func_slot + 1 + nargs;
4804            // Root the pending error during the call (a handler may collect).
4805            let saved_err = self.closing_err;
4806            self.closing_err = pending;
4807            // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4808            // for traceback / getinfo.
4809            let saved_tm = self.pending_tm.replace("close");
4810            frames_push_sync(
4811                &mut self.frames,
4812                &mut self.frames_top,
4813                CallFrame::Cont(NativeCont {
4814                    kind: ContKind::Close(CloseCont {
4815                        from,
4816                        pending,
4817                        after,
4818                    }),
4819                    func_slot,
4820                    nresults: 0,
4821                }),
4822            );
4823            // PUC luaF_close runs a normal close *within* the closing
4824            // function's activation (debug parent = that function); during an
4825            // error unwind the function's frame is already gone and the
4826            // handler sits at the C boundary instead.
4827            let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4828            self.pending_tm = saved_tm;
4829            self.closing_err = saved_err;
4830            r?;
4831            return Ok(None);
4832        }
4833    }
4834
4835    /// Fire `after` once every `__close` handler has run. `Block` propagates
4836    /// any remaining error or simply continues; `Return` performs OP_Return's
4837    /// tail (hook + frame pop + result delivery) and may surface results to
4838    /// the host when the function whose return triggered the close was the
4839    /// entry activation, but only on a clean drain — a pending error skips
4840    /// the return tail and propagates instead. `ResumeUnwind` pops the
4841    /// deferred Lua frame and re-raises, letting a handler's own error win
4842    /// over the original propagating one (PUC luaF_close).
4843    fn finish_close_after(
4844        &mut self,
4845        after: AfterClose,
4846        pending: Option<Value>,
4847        entry_depth: usize,
4848    ) -> Result<Option<Vec<Value>>, LuaError> {
4849        match after {
4850            AfterClose::Block => match pending {
4851                Some(e) => Err(LuaError(e)),
4852                None => Ok(None),
4853            },
4854            AfterClose::Return {
4855                abs_a,
4856                nret,
4857                from_native,
4858            } => match pending {
4859                Some(e) => Err(LuaError(e)),
4860                None => self.complete_return(abs_a, nret, from_native, entry_depth),
4861            },
4862            AfterClose::ResumeUnwind { func_slot, err } => {
4863                // The aborting Lua frame was popped before `begin_close`;
4864                // restore the catcher's stack window down to `func_slot` and
4865                // re-raise — preferring a handler-raised error over the
4866                // original (PUC luaF_close).
4867                self.stack.truncate(func_slot as usize);
4868                self.top = func_slot;
4869                self.tbc.retain(|&s| s < func_slot);
4870                Err(LuaError(pending.unwrap_or(err)))
4871            }
4872        }
4873    }
4874
4875    /// OP_Return's post-close tail: fire the "return" hook (frame still
4876    /// current), pop the Lua frame, slide results into `func_slot`, then
4877    /// either hand them to the host (`Ok(Some(vals))` when we've returned
4878    /// past `entry_depth`), leave them contiguous for an exposed
4879    /// pcall/xpcall continuation, or finish into the caller's expected
4880    /// result slot. Mirrors the synchronous OP_Return tail so both paths
4881    /// share semantics — the `from_native` flag selects the right "return"
4882    /// hook context for `hook_return`.
4883    fn complete_return(
4884        &mut self,
4885        abs_a: u32,
4886        nret: u32,
4887        from_native: bool,
4888        entry_depth: usize,
4889    ) -> Result<Option<Vec<Value>>, LuaError> {
4890        // ftransfer is the local index (1-based) of the first result, as
4891        // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4892        // exposes locals starting at `frame.base` (= func_slot + 1 +
4893        // n_varargs for a vararg call), so the conversion is the absolute
4894        // result slot minus base, plus one to make it 1-based. db.lua 5.4
4895        // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4896        // shape end-to-end.
4897        let ftransfer = self
4898            .frames
4899            .last()
4900            .and_then(CallFrame::lua)
4901            .map(|fr| {
4902                let raw = abs_a.saturating_sub(fr.base) + 1;
4903                // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4904                // local injected at index `numparams + 1`, so getlocal
4905                // numbering shifts results past it (5.5 db.lua :539
4906                // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4907                if fr.closure.proto.has_vararg_table_pseudo {
4908                    raw + 1
4909                } else {
4910                    raw
4911                }
4912            })
4913            .unwrap_or(1);
4914        // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4915        // per tail call that collapsed into this activation, *after* its
4916        // own "return". `tailcalls` tracks that count exactly (PUC
4917        // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4918        // "return" hook fires once even when the activation absorbed
4919        // multiple tail calls — only `istailcall` on getinfo surfaces the
4920        // collapse. 5.1 db.lua :366 pins the event ordering.
4921        let tailcalls = if self.version <= LuaVersion::Lua51 {
4922            self.frames
4923                .last()
4924                .and_then(|f| f.lua())
4925                .map(|f| f.tailcalls)
4926                .unwrap_or(0)
4927        } else {
4928            0
4929        };
4930        self.hook_return(from_native, ftransfer, nret)?;
4931        for _ in 0..tailcalls {
4932            self.hook_tail_return()?;
4933        }
4934        let CallFrame::Lua(fr) =
4935            frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4936        else {
4937            unreachable!("returning from a non-Lua frame")
4938        };
4939        for i in 0..nret {
4940            self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4941        }
4942        if self.frames.len() < entry_depth {
4943            self.top = fr.func_slot + nret;
4944            return Ok(Some(self.take_results(fr.func_slot)));
4945        } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4946            self.top = fr.func_slot + nret;
4947        } else {
4948            self.finish_results(fr.func_slot, nret, fr.nresults);
4949        }
4950        Ok(None)
4951    }
4952
4953    #[doc(hidden)]
4954    pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4955        match cl.upvals()[idx as usize].state() {
4956            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4957            UpvalState::Closed(v) => v,
4958        }
4959    }
4960
4961    fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4962        let uv = cl.upvals()[idx as usize];
4963        match uv.state() {
4964            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4965            UpvalState::Closed(_) => {
4966                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4967                unsafe { uv.as_mut() }.set_closed(v);
4968                // forward barrier: a closed upvalue is single-slot, so the
4969                // forward variant is cheaper than barrier_back (PUC uses
4970                // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4971                // tables / threads).
4972                self.heap
4973                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4974            }
4975        }
4976    }
4977
4978    // ---- register / error helpers ----
4979
4980    #[inline(always)]
4981    fn r(&self, base: u32, i: u32) -> Value {
4982        // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4983        // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4984        // and every bytecode-generated reference falls within `[0, max_stack)`.
4985        // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4986        // reason. The bounds check would re-validate this invariant on every
4987        // op — the dispatch hot path can't afford it.
4988        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4989        unsafe { *self.stack.get_unchecked((base + i) as usize) }
4990    }
4991
4992    #[inline(always)]
4993    fn set_r(&mut self, base: u32, i: u32, v: Value) {
4994        // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
4995        // frame-entry contract.
4996        unsafe {
4997            *self.stack.get_unchecked_mut((base + i) as usize) = v;
4998        }
4999    }
5000
5001    #[doc(hidden)]
5002    pub fn rt_err(&mut self, msg: &str) -> LuaError {
5003        let text = match self.position_prefix() {
5004            Some(p) => format!("{p}{msg}"),
5005            None => msg.to_string(),
5006        };
5007        LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5008    }
5009
5010    pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5011        let extra = self.subject_varinfo(v);
5012        let tn = self.obj_typename(v);
5013        self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5014    }
5015
5016    /// Name the offending operand of the current instruction (PUC varinfo) for
5017    /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5018    /// to the instruction's subject register(s); a native-raised error whose
5019    /// current instruction doesn't hold `bad` simply yields "".
5020    fn subject_varinfo(&self, bad: Value) -> String {
5021        use crate::vm::isa::Op;
5022        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5023            return String::new();
5024        };
5025        let proto = f.closure.proto;
5026        let p: &crate::runtime::Proto = &proto;
5027        let pc = f.pc as usize;
5028        if pc == 0 || pc > p.code.len() {
5029            return String::new();
5030        }
5031        let instr = p.code[pc - 1];
5032        let mut cands: Vec<u32> = Vec::new();
5033        match instr.op() {
5034            // indexed reads / length / method: the table/object is in B
5035            Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5036                cands.push(instr.b());
5037            }
5038            // indexed writes / calls: the table/function is in A
5039            Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5040                cands.push(instr.a());
5041            }
5042            // arithmetic/bitwise: a register operand (B, and C unless constant)
5043            Op::Add
5044            | Op::Sub
5045            | Op::Mul
5046            | Op::Div
5047            | Op::Mod
5048            | Op::Pow
5049            | Op::IDiv
5050            | Op::BAnd
5051            | Op::BOr
5052            | Op::BXor
5053            | Op::Shl
5054            | Op::Shr => {
5055                cands.push(instr.b());
5056                if !instr.k() {
5057                    cands.push(instr.c());
5058                }
5059            }
5060            Op::Unm | Op::BNot => cands.push(instr.b()),
5061            Op::Concat => {
5062                let a = instr.a();
5063                for r in a..a + instr.b() {
5064                    cands.push(r);
5065                }
5066            }
5067            _ => {}
5068        }
5069        for reg in cands {
5070            if self.r(f.base, reg).raw_eq(bad) {
5071                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5072                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5073                    None => String::new(),
5074                };
5075            }
5076        }
5077        String::new()
5078    }
5079
5080    /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5081    /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5082    /// 'add')" when the call is a metamethod dispatched by the current opcode.
5083    fn call_err(&mut self, v: Value) -> LuaError {
5084        let extra = self.call_target_varinfo(v);
5085        let tn = self.obj_typename(v);
5086        self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5087    }
5088
5089    /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5090    /// frame before the call, so the opcode that triggered it lives in the
5091    /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5092    /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5093    fn call_target_varinfo(&self, bad: Value) -> String {
5094        use crate::vm::isa::Op;
5095        let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5096            return String::new();
5097        };
5098        let proto = f.closure.proto;
5099        let p: &crate::runtime::Proto = &proto;
5100        let pc = f.pc as usize;
5101        if pc == 0 || pc > p.code.len() {
5102            return String::new();
5103        }
5104        let instr = p.code[pc - 1];
5105        match instr.op() {
5106            Op::Call | Op::TailCall => {
5107                let reg = instr.a();
5108                if self.r(f.base, reg).raw_eq(bad) {
5109                    match crate::vm::objname::getobjname(p, pc - 1, reg) {
5110                        Some((kind, name)) => format!(" ({kind} '{name}')"),
5111                        None => String::new(),
5112                    }
5113                } else {
5114                    String::new()
5115                }
5116            }
5117            op => match mm_event_name(op) {
5118                Some(ev) => format!(" (metamethod '{ev}')"),
5119                None => String::new(),
5120            },
5121        }
5122    }
5123
5124    /// "number has no integer representation", enriched (PUC luaG_tointerror)
5125    /// with a "(field 'x')"-style suffix naming the offending operand of the
5126    /// current arithmetic instruction when it can be recovered from bytecode.
5127    fn no_int_rep_err(&mut self) -> LuaError {
5128        let extra = self.bad_operand_varinfo();
5129        self.rt_err(&format!("number{extra} has no integer representation"))
5130    }
5131
5132    /// Inspect the current frame's faulting instruction: find the register
5133    /// operand holding a float with no integer representation and name it.
5134    fn bad_operand_varinfo(&self) -> String {
5135        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5136            return String::new();
5137        };
5138        let proto = f.closure.proto;
5139        let p: &crate::runtime::Proto = &proto;
5140        let pc = f.pc as usize;
5141        if pc == 0 || pc > p.code.len() {
5142            return String::new();
5143        }
5144        let instr = p.code[pc - 1];
5145        let mut regs = vec![instr.b()];
5146        if !instr.k() {
5147            regs.push(instr.c());
5148        }
5149        for reg in regs {
5150            let v = self.r(f.base, reg);
5151            if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5152                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5153                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5154                    None => String::new(),
5155                };
5156            }
5157        }
5158        String::new()
5159    }
5160
5161    /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5162    /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5163    /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5164    /// native call), PUC pushes no prefix — match that by looking only at the
5165    /// topmost frame directly and bailing if it is anything but a Lua frame.
5166    pub(crate) fn position_prefix(&self) -> Option<String> {
5167        let f = self.frames.last().and_then(CallFrame::lua)?;
5168        let proto = f.closure.proto;
5169        if proto.source.as_bytes().is_empty() {
5170            return Some(self.stripped_prefix());
5171        }
5172        if proto.lines.is_empty() {
5173            return None;
5174        }
5175        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5176        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5177        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5178        let display = crate::vm::lib_debug::chunk_id(raw);
5179        let src = String::from_utf8_lossy(&display).into_owned();
5180        Some(format!("{src}:{line}: "))
5181    }
5182
5183    /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5184    /// for the source and renders the line as "?" (so the prefix reads
5185    /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5186    /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5187    /// matches `^%?:%-1:`).
5188    fn stripped_prefix(&self) -> String {
5189        if self.version >= crate::version::LuaVersion::Lua55 {
5190            "?:?: ".to_string()
5191        } else {
5192            "?:-1: ".to_string()
5193        }
5194    }
5195
5196    /// Position prefix of the Lua frame `level` steps up from the running C
5197    /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5198    /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5199    /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5200    /// caller's frame is reported even across pcall/xpcall continuations.
5201    pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5202        let fi = match self.dbg_frame(level)? {
5203            DbgKind::Lua(fi) => fi,
5204            DbgKind::C(_) | DbgKind::Tail(_) => return None,
5205        };
5206        let f = self.frames[fi].lua()?;
5207        let proto = f.closure.proto;
5208        // PUC luaG_addinfo: a stripped chunk has no source — see
5209        // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5210        if proto.source.as_bytes().is_empty() {
5211            return Some(self.stripped_prefix());
5212        }
5213        // a stripped chunk carries no per-instruction line info
5214        if proto.lines.is_empty() {
5215            return None;
5216        }
5217        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5218        // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5219        // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5220        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5221        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5222        let display = crate::vm::lib_debug::chunk_id(raw);
5223        let src = String::from_utf8_lossy(&display).into_owned();
5224        Some(format!("{src}:{line}: "))
5225    }
5226
5227    // ---- the interpreter ----
5228
5229    fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5230        let entry_depth = self.frames.len();
5231        self.exec_with(entry_depth)
5232    }
5233
5234    /// Run from the current top frame down to (but not past) `entry_depth`
5235    /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5236    /// runs to completion or a yield.
5237    /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5238    /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5239    /// `EvalFuture::poll` on every poll after the first to walk the
5240    /// existing call frames until the next `BudgetExhausted` or
5241    /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5242    /// embedder reaches it through `Vm::eval_async`.
5243    pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5244        self.exec_with(entry_depth)
5245    }
5246
5247    fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5248        loop {
5249            let r = self.run(entry_depth);
5250            if r.is_err()
5251                && (self.yielding.is_some()
5252                    || self.terminating.is_some()
5253                    || self.host_yield_pending
5254                    || self.pending_async_native_fut.is_some())
5255            {
5256                // a `coroutine.yield` is in flight: keep the frames intact (they
5257                // are the suspended coroutine's saved state) and propagate to
5258                // resume. A self-close termination propagates the same way, so a
5259                // protecting pcall on the way out cannot catch (unwind) it.
5260                // v1.1 B10 — `host_yield_pending` is the async-mode
5261                // analogue: the sentinel must reach `drive_one` without
5262                // a protecting `pcall` swallowing it.
5263                return r;
5264            }
5265            match r {
5266                Ok(vals) => return Ok(vals),
5267                // unwind toward `entry_depth`. A protecting pcall/xpcall
5268                // continuation caught along the way turns the error into
5269                // `false, msg` and the loop resumes running its caller; an
5270                // uncaught error propagates out.
5271                Err(e) => match self.unwind(e.0, entry_depth) {
5272                    Unwound::Caught => continue,
5273                    Unwound::CaughtReturn(vals) => return Ok(vals),
5274                    Unwound::Propagated(err) => return Err(err),
5275                },
5276            }
5277        }
5278    }
5279
5280    /// Unwind the call stack from the error point toward `entry_depth`, running
5281    /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5282    /// continuation frame at/above `entry_depth` (the error is *caught*: its
5283    /// slot receives `false, msg`); if none is reached, the error propagates.
5284    fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5285        // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5286        // error object is nil — so `pcall(function() error(nil) end)` returns
5287        // that string instead of nil, and `assert(nil, nil)` (whose path
5288        // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5289        // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5290        // `doit("error()") == nil` and luna would fail that if it always
5291        // substituted. luna's native `error()` still does its own conversion
5292        // for direct callers.
5293        if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5294            err = Value::Str(self.heap.intern(b"<no error object>"));
5295        }
5296        // The protected call runs in-place among the caller frames' registers,
5297        // so truncating the failed frames here cuts into caller windows below
5298        // the catcher. Snapshot the live length: at the error point the stack
5299        // already spans every surviving frame's window, so restoring it after a
5300        // catch reinstates them all (the reclaimed slots above are dead temps).
5301        // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5302        // we instead clamp the restore to the catcher's caller window when the
5303        // error point was at the stack limit (cause: the next `call_value_impl`
5304        // picks `func_slot = stack.len()` which would otherwise re-overflow).
5305        let saved_len = self.stack.len();
5306        // Snapshot the traceback at the error point — before any frame is
5307        // popped — so an `xpcall` msgh (which runs after the failed frames are
5308        // gone) can still describe the error site. The handler frame about to
5309        // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5310        // visible here; once popped, `debug.traceback` would miss it.
5311        // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5312        // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5313        // recovery — locals.lua:659) re-overflows. Capture-once propagates
5314        // through nested unwinds (inner→outer) without re-running msgh.
5315        if self.error_traceback.is_none() {
5316            self.error_traceback = Some(self.traceback_bytes(1));
5317        }
5318        while self.frames.len() >= entry_depth {
5319            match *self.frames.last().expect("frame") {
5320                // a yieldable-metamethod continuation does not catch: discard the
5321                // abandoned instruction and keep unwinding (PUC drops the partial
5322                // op on error).
5323                CallFrame::Cont(NativeCont {
5324                    kind: ContKind::Meta(mc),
5325                    func_slot,
5326                    ..
5327                }) => {
5328                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5329                    self.stack.truncate(func_slot as usize);
5330                    self.top = mc.saved_top.min(func_slot);
5331                    self.tbc.retain(|&s| s < func_slot);
5332                }
5333                // a __pairs continuation does not catch either: an error inside
5334                // the metamethod propagates past `pairs`.
5335                CallFrame::Cont(NativeCont {
5336                    kind: ContKind::Pairs,
5337                    func_slot,
5338                    ..
5339                }) => {
5340                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5341                    self.stack.truncate(func_slot as usize);
5342                    self.top = func_slot;
5343                    self.tbc.retain(|&s| s < func_slot);
5344                }
5345                // a __close continuation does not catch: drop the half-run
5346                // handler's window, then continue the close yieldably with
5347                // the new error threaded as `pending`. Preserve `cc.after`
5348                // verbatim — `Return`/`Block` originating from an aborting
5349                // OP_Return/OP_Close will be short-circuited by
5350                // `finish_close_after` (pending propagates as Err); a
5351                // `ResumeUnwind` originated by our own Lua-frame handler
5352                // must keep its deferred frame-pop semantics so that frame
5353                // is not orphaned. If a fresh handler yields, `drive_close`
5354                // pushes another `Cont::Close` and we return `Caught` so
5355                // `exec_with` re-enters the run loop.
5356                CallFrame::Cont(NativeCont {
5357                    kind: ContKind::Close(cc),
5358                    func_slot,
5359                    ..
5360                }) => {
5361                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5362                    self.stack.truncate(func_slot as usize);
5363                    self.top = func_slot;
5364                    self.tbc.retain(|&s| s < func_slot);
5365                    match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5366                        Ok(Some(_)) => {
5367                            unreachable!(
5368                                "Block / Return / ResumeUnwind never return host values mid-unwind"
5369                            )
5370                        }
5371                        Ok(None) => return Unwound::Caught,
5372                        Err(e) => {
5373                            err = e.0;
5374                            continue;
5375                        }
5376                    }
5377                }
5378                CallFrame::Cont(nc) => {
5379                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5380                    self.pcall_depth -= 1;
5381                    let result = match nc.kind {
5382                        ContKind::Pcall => err,
5383                        ContKind::Xpcall { handler } => {
5384                            // PUC keeps `L->errfunc` set across the handler's
5385                            // call: `luaG_errormsg` re-fires the handler when
5386                            // it raises (so `xpcall(error, err, 170)` lets the
5387                            // chain bottom out at err(0) → "END"). luna mirrors
5388                            // that by looping until the handler returns or
5389                            // luna's `iters` cap forces termination.
5390                            //
5391                            // The cap models PUC's nCcalls soft window
5392                            // (MAXCCALLS/10*11): once tripped, `stackerror`
5393                            // raises "C stack overflow" via `luaG_runerror`
5394                            // which itself re-enters `luaG_errormsg`, so the
5395                            // handler runs once more with that string and
5396                            // naturally returns it (errors.lua :637 at N=300).
5397                            // We count iterations per Cont::Xpcall rather than
5398                            // a global counter — nested xpcalls each get their
5399                            // own budget, matching the way PUC's stack frames
5400                            // accumulate per dispatch path.
5401                            const MSGH_CAP: u32 = MAX_C_DEPTH;
5402                            let mut cur_err = err;
5403                            let mut iters: u32 = 0;
5404                            let mut capped = false;
5405                            loop {
5406                                if iters >= MSGH_CAP && !capped {
5407                                    cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5408                                    capped = true;
5409                                }
5410                                iters += 1;
5411                                self.msgh_depth += 1;
5412                                let r = self.call_value(handler, &[cur_err]);
5413                                self.msgh_depth -= 1;
5414                                match r {
5415                                    Ok(hr) => {
5416                                        break hr.first().copied().unwrap_or(Value::Nil);
5417                                    }
5418                                    Err(_) if capped => {
5419                                        // the handler still errored on the
5420                                        // synthesized "C stack overflow"; fall
5421                                        // back to PUC's LUA_ERRERR string.
5422                                        break Value::Str(
5423                                            self.heap.intern(b"error in error handling"),
5424                                        );
5425                                    }
5426                                    Err(e) => {
5427                                        cur_err = e.0;
5428                                    }
5429                                }
5430                            }
5431                        }
5432                        ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5433                            unreachable!("Meta/Pairs/Close cont handled above")
5434                        }
5435                    };
5436                    // the error has been caught (pcall/xpcall): the captured
5437                    // traceback was for that error and is no longer in flight.
5438                    self.error_traceback = None;
5439                    let fs = nc.func_slot as usize;
5440                    if self.stack.len() < fs + 2 {
5441                        self.stack.resize(fs + 2, Value::Nil);
5442                    }
5443                    self.stack[fs] = Value::Bool(false);
5444                    self.stack[fs + 1] = result;
5445                    self.top = nc.func_slot + 2;
5446                    self.tbc.retain(|&s| s < nc.func_slot);
5447                    if self.frames.len() < entry_depth {
5448                        return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5449                    }
5450                    self.finish_results(nc.func_slot, 2, nc.nresults);
5451                    // reinstate the caller windows the unwind truncated into,
5452                    // clamped to the catcher's caller window + a `MIN_STACK`
5453                    // reserve. The clamp is a no-op for normal pcall catches
5454                    // (saved_len lies within the caller's max_stack window),
5455                    // and prevents the stack from staying near `MAX_LUA_STACK`
5456                    // after an overflow-recovery catch — which would make the
5457                    // next `call_value_impl` (e.g. a `__close` in the catcher's
5458                    // errorh, locals.lua:659) pick `func_slot = stack.len()`
5459                    // above the limit and re-overflow.
5460                    // Restore the caller's full register window: opcodes
5461                    // index it directly. The cap covers caller's base +
5462                    // `max_stack` + a small reserve. We always resize to
5463                    // exactly this window — previously this clamped
5464                    // `saved_len` from above to prevent staying near
5465                    // `MAX_LUA_STACK` after an overflow-recovery catch, and
5466                    // a yieldable-unwind re-entry adds the dual case where
5467                    // `saved_len` is *below* the window (a prior
5468                    // `ResumeUnwind` truncated). Using the window directly
5469                    // covers both.
5470                    let restore = self
5471                        .frames
5472                        .iter()
5473                        .rev()
5474                        .find_map(CallFrame::lua)
5475                        .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5476                        .unwrap_or(saved_len);
5477                    if self.stack.len() < restore {
5478                        self.stack.resize(restore, Value::Nil);
5479                    } else if self.stack.len() > restore {
5480                        self.stack.truncate(restore);
5481                    }
5482                    // v2.5 P1B-2B: clear slots vacated by the popped
5483                    // frames the unwind walked over. finish_results
5484                    // above clears `[nc.func_slot + nresults ..
5485                    // nc.func_slot + 2)`, which only covers the
5486                    // pcall's own result region — the unwind-popped
5487                    // frames' locals in `[nc.func_slot + 2 .. restore)`
5488                    // are still in place with whatever Gc-bearing
5489                    // Values they last held. Without this clear, a
5490                    // later GC marks the stale pointers (UAF-A family
5491                    // analog of the v2.3 Op::Return finish_results
5492                    // path). PUC's `luaD_pcall` similarly truncates
5493                    // L->top to the catcher's level — luna's
5494                    // truncate above resizes the Vec but doesn't
5495                    // touch slots [func_slot+2..restore) that were
5496                    // already present.
5497                    let clear_lo = (nc.func_slot as usize + 2).min(self.stack.len());
5498                    let clear_hi = restore.min(self.stack.len());
5499                    if clear_lo < clear_hi {
5500                        for slot in &mut self.stack[clear_lo..clear_hi] {
5501                            *slot = Value::Nil;
5502                        }
5503                    }
5504                    return Unwound::Caught;
5505                }
5506                CallFrame::Lua(f) => {
5507                    // Yieldable error-unwind close, PUC luaG_errormsg shape:
5508                    // (1) pop the Lua frame immediately so each `__close`
5509                    // handler runs at the C boundary above — `debug.getinfo`
5510                    // sees the next outer Lua frame's call site (typically
5511                    // `pcall`), not this aborting function (locals.lua:480).
5512                    // (2) drive the close yieldably with
5513                    // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5514                    // it truncates to `func_slot` and re-raises (letting a
5515                    // handler-raised error win over `err`). If a handler
5516                    // yields, `drive_close` pushes `Cont::Close` and we
5517                    // return `Caught` so `exec_with` re-enters the run loop;
5518                    // a synchronous drain returns Err exactly as the old
5519                    // path did.
5520                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5521                    let after = AfterClose::ResumeUnwind {
5522                        func_slot: f.func_slot,
5523                        err,
5524                    };
5525                    match self.begin_close(f.base, Some(err), after, entry_depth) {
5526                        Ok(Some(_)) => {
5527                            unreachable!("ResumeUnwind never returns host values")
5528                        }
5529                        Ok(None) => return Unwound::Caught,
5530                        Err(e) => {
5531                            err = e.0;
5532                            continue;
5533                        }
5534                    }
5535                }
5536            }
5537        }
5538        Unwound::Propagated(LuaError(err))
5539    }
5540
5541    fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5542        loop {
5543            // Fast-path slow-check gate: most embedders run with both
5544            // `instr_budget` and `mem_cap` as None, so a single combined
5545            // is_some test lets the hot loop skip both branches with one
5546            // load + branch instead of two.
5547            if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5548                if let Some(b) = self.instr_budget.as_mut() {
5549                    *b -= 1;
5550                    if *b <= 0 {
5551                        self.instr_budget = None;
5552                        // v1.1 B10 Stage 1 — async-mode cooperative
5553                        // yield. Set a sentinel flag so `exec_with`
5554                        // propagates the Err without `unwind` running
5555                        // (mirroring the `yielding.is_some()` path),
5556                        // and `call_value_impl` preserves the call
5557                        // frames for the next `poll`. Translation back
5558                        // to `DispatchOutcome::BudgetExhausted` happens
5559                        // in `drive_one`. The Err value itself is
5560                        // `Value::Nil` — a pure sentinel, never seen by
5561                        // user code.
5562                        if self.async_mode {
5563                            self.host_yield_pending = true;
5564                            return Err(LuaError(Value::Nil));
5565                        }
5566                        // B6: classify the trip so embedders can
5567                        // distinguish budget exhaustion from a
5568                        // generic Runtime error and retry / give up
5569                        // accordingly.
5570                        self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5571                        let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5572                        return Err(LuaError(s));
5573                    }
5574                }
5575                if let Some(cap) = self.heap.mem_cap
5576                    && self.heap.bytes() > cap
5577                {
5578                    // First try a full collect — embedders set tight caps
5579                    // and the overshoot may be reclaimable (closures kept
5580                    // by short-lived frames, intermediate strings). Only
5581                    // disarm + raise if the cap is still breached after
5582                    // collection. PUC's `LUA_GCEMERGENCY` path matches.
5583                    //
5584                    // v2.6 A.2: tighten mem-cap-fire over-root from
5585                    // entire `self.stack.len()` (whole heap) to the
5586                    // deepest Lua frame's `base + max_stack` window
5587                    // (covers register operands the current opcode
5588                    // might reference). The cap fires during table
5589                    // mutation in a tight `a[i] = i` loop where `a`
5590                    // lives at a frame-register slot past `self.top`
5591                    // (OP_NEWINDEX doesn't advance top); the deepest
5592                    // frame's max_stack window provably covers it
5593                    // since `a` is a register of the executing proto.
5594                    //
5595                    // Still over-roots caller frames' dead regs
5596                    // (slots between caller.base and the callee
5597                    // func_slot are live; slots past callee
5598                    // func_slot in caller's frame are dead until
5599                    // caller resumes). For fire-once cap path this
5600                    // residual over-root is acceptable; full
5601                    // per-frame walk was canceled per
5602                    // `.dev/rfcs/v2.6-plan-state.md` amendments log
5603                    // (charter §2.1's strong/weak pass split is
5604                    // semantically impossible — weak pass depends on
5605                    // strong-pass marks).
5606                    let cap_root_top = self
5607                        .frames
5608                        .iter()
5609                        .rev()
5610                        .find_map(CallFrame::lua)
5611                        .map(|f| f.base + f.closure.proto.max_stack as u32)
5612                        .unwrap_or(self.top);
5613                    self.gc_top = cap_root_top.max(self.top);
5614                    self.collect_garbage();
5615                    if self.heap.bytes() > cap {
5616                        self.heap.mem_cap = None;
5617                        let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5618                        return Err(LuaError(s));
5619                    }
5620                }
5621            }
5622            // Single combined frame fetch: continuation arm OR Lua arm. Saves
5623            // a second `self.frames.last()` slice access vs the prior split
5624            // form (LLVM doesn't always CSE these across the cont branch).
5625            // A continuation frame on top means the call it protected just
5626            // delivered its results — wrap as `true, results…` and hand to
5627            // the pcall/xpcall caller. The error path is handled by `unwind`;
5628            // this branch is only reached on success/resume completion.
5629            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5630            let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5631            if let &CallFrame::Cont(nc) = frame_peek {
5632                // a yieldable metamethod returned: complete the interrupted
5633                // instruction (PUC luaV_finishOp) and resume the running frame.
5634                if let ContKind::Meta(mc) = nc.kind {
5635                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5636                    let result = if self.top > nc.func_slot {
5637                        self.stack[nc.func_slot as usize]
5638                    } else {
5639                        Value::Nil
5640                    };
5641                    self.stack.truncate(nc.func_slot as usize);
5642                    self.top = mc.saved_top;
5643                    self.finish_meta(mc.action, result)?;
5644                    continue;
5645                }
5646                // a __close handler returned successfully: discard its
5647                // results, restore `top` to the slot the handler was called
5648                // at (the surrounding frame's register window above this slot
5649                // must stay alloc'd — never truncate the underlying stack),
5650                // then continue the close chain (next slot, or fire
5651                // AfterClose). When the close ends an entry activation,
5652                // drive_close hands the results up to exec_with directly.
5653                if let ContKind::Close(cc) = nc.kind {
5654                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5655                    self.top = nc.func_slot;
5656                    if let Some(vals) =
5657                        self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5658                    {
5659                        return Ok(vals);
5660                    }
5661                    continue;
5662                }
5663                // __pairs returned: normalize its results to exactly four
5664                // (iterator, state, control, closing) at pairs's slot, where
5665                // the metamethod was called, and hand them to pairs's caller.
5666                if let ContKind::Pairs = nc.kind {
5667                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5668                    let total = 4u32;
5669                    let need = (nc.func_slot + total) as usize;
5670                    if self.stack.len() < need {
5671                        self.stack.resize(need, Value::Nil);
5672                    }
5673                    for s in self.top..(nc.func_slot + total) {
5674                        self.stack[s as usize] = Value::Nil;
5675                    }
5676                    self.top = nc.func_slot + total;
5677                    if self.frames.len() < entry_depth {
5678                        return Ok(self.take_results(nc.func_slot));
5679                    }
5680                    self.finish_results(nc.func_slot, total, nc.nresults);
5681                    continue;
5682                }
5683                frames_pop_sync(&mut self.frames, &mut self.frames_top);
5684                self.pcall_depth -= 1;
5685                // f's results sit at nc.func_slot+1.. (f was called one slot
5686                // above the continuation), so writing `true` at the slot makes
5687                // `true, results…` already contiguous.
5688                let nret = self.top - (nc.func_slot + 1);
5689                self.stack[nc.func_slot as usize] = Value::Bool(true);
5690                let total = 1 + nret;
5691                self.top = nc.func_slot + total;
5692                if self.frames.len() < entry_depth {
5693                    return Ok(self.take_results(nc.func_slot));
5694                }
5695                self.finish_results(nc.func_slot, total, nc.nresults);
5696                continue;
5697            }
5698            // GC runs only at the allocation safe points below (PUC's
5699            // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5700            // no longer collects, so a stale full-window `gc_top` cannot leak in.
5701            //
5702            // Hot-path frame fetch: the Cont arm above continues the loop,
5703            // so reaching here means `frame_peek` is the Lua frame. Reuse it
5704            // rather than re-fetching `self.frames.last()`.
5705            let f = match frame_peek {
5706                CallFrame::Lua(f) => f,
5707                _ => unreachable!("Cont frame survived the dispatch loop head"),
5708            };
5709            let cl = f.closure;
5710            let base = f.base;
5711            let func_slot = f.func_slot;
5712            let n_varargs = f.n_varargs;
5713            let pc = f.pc;
5714            let oldpc = f.hook_oldpc;
5715
5716            // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5717            // — every branch / call op only sets `pc` to a valid index, and
5718            // function entry initialises pc=0 with a non-empty body. PUC's
5719            // `vmfetch` uses the equivalent unchecked load.
5720            let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5721
5722            // P12-S1.C/D — trace recording append + close detection.
5723            // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5724            // so default dispatch keeps a single not-taken branch.
5725            //
5726            // - At the head PC with a non-empty record, the trace has
5727            //   looped back to its start: mark `closed = true` and
5728            //   take the record (S2 will compile + cache).
5729            // - Otherwise, capture the op. If the record overflows
5730            //   MAX_TRACE_LEN, abort by dropping it.
5731            if self.jit.trace_enabled
5732                && let Some(_rec) = self.jit.active_trace.as_mut()
5733            {
5734                // P12-S4 — depth tracking. The trace head's frame is
5735                // at index `recording_frame_base`; every Op::Call that
5736                // pushes a new frame bumps the live depth, every
5737                // Op::Return that pops one decrements it.
5738                //
5739                // **Three clean-close conditions** (P12-S4-step4a):
5740                // - `at_head`: cur_depth == 0 AND about-to-execute the
5741                //   trace's head_pc on its head_proto (loop closed back
5742                //   to start). Same for loop-triggered and call-triggered
5743                //   traces — step4a unified the gating so call-triggered
5744                //   no longer closes on the first re-entry (that left
5745                //   fib's body at 7 depth=0 ops; step4a lets it inline
5746                //   up to MAX_INLINE_DEPTH levels before any close).
5747                // - `returned_past_head`: trace head's frame is gone
5748                //   (callee returned past it, or the call-trigger
5749                //   started a recording inside a callee that has now
5750                //   returned). Whatever ops were recorded form the
5751                //   trace body; the lowerer treats the partial trace
5752                //   the same as InlineAbort (dispatchable=false until
5753                //   step4b's frame materialization lands).
5754                // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5755                //   Recording any deeper would just bloat the IR; close
5756                //   with the body we have. Lowerer's existing length
5757                //   gate + InlineAbort path handles short bodies.
5758                let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5759                let cur_depth = if returned_past_head {
5760                    0
5761                } else {
5762                    self.frames.len() - 1 - self.jit.recording_frame_base
5763                };
5764                let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5765                let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5766                let at_head_loop = cur_depth == 0
5767                    && !rec.ops.is_empty()
5768                    && !returned_past_head
5769                    && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5770                    && pc == rec.head_pc;
5771                // P16-A — self-link cycle catch (mirrors LuaJIT's
5772                // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5773                //   1. We're about to execute the head_pc on head_proto
5774                //      at depth > 0 (we're re-entering the trace head
5775                //      from inside an inlined recursion level — UpRec).
5776                //   2. The count of ancestor frames in the recording
5777                //      window that share `head_proto` exceeds
5778                //      [`RECUNROLL_THRESHOLD`] (default 2).
5779                // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5780                // recursion levels are captured, the recorder enters
5781                // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5782                // trips this catch — closing with `SelfRecKind::UpRec`.
5783                // The lowerer's `TraceEnd::SelfLink` tail emits the
5784                // bump-base + branch-to-self loop body.
5785                //
5786                // TailRec vs UpRec: LJ distinguishes via
5787                // `framedepth + retdepth == 0`. luna doesn't track
5788                // retdepth separately; cur_depth == 0 with a non-empty
5789                // call chain in tail position is rare (would require
5790                // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5791                // condition (fib's case); cur_depth == 0 with positive
5792                // ancestor count would route to TailRec, but luna's
5793                // recorder doesn't currently produce that shape because
5794                // tail-call elision pops the caller frame and we'd
5795                // hit `at_head_loop` instead.
5796                let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5797                    if self.jit.p16_self_link_enabled
5798                        && !returned_past_head
5799                        && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5800                        && pc == rec.head_pc
5801                        && cur_depth > 0
5802                    {
5803                        // Count ancestor frames sharing head_proto.
5804                        // self.frames[recording_frame_base..] currently
5805                        // includes the just-pushed frame at the top
5806                        // (the one about to execute head_pc). Ancestors
5807                        // = the slice excluding the top frame.
5808                        let head_proto_ptr = rec.head_proto.as_ptr();
5809                        let last_idx = self.frames.len() - 1;
5810                        let mut count = 0usize;
5811                        for i in self.jit.recording_frame_base..last_idx {
5812                            if let CallFrame::Lua(f) = &self.frames[i]
5813                                && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5814                            {
5815                                count += 1;
5816                            }
5817                        }
5818                        if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5819                            // cur_depth > 0 → UpRec (fib pattern).
5820                            // cur_depth == 0 wouldn't reach this arm.
5821                            Some(crate::jit::trace::SelfRecKind::UpRec)
5822                        } else {
5823                            None
5824                        }
5825                    } else {
5826                        None
5827                    }
5828                };
5829                if let Some(kind) = self_link_trip {
5830                    // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
5831                    // self-recursive patterns at frame depth >= 2.
5832                    //
5833                    // Pre sub-0: a SelfLink trip at the head_pc re-entry
5834                    // unconditionally stamped `self_link_kind`. The
5835                    // R3a `downrec_close` marker can only fire from the
5836                    // depth>0 Op::Return path (`rec.retfs` chain),
5837                    // which never reaches the recorder for fib(28)-like
5838                    // shapes that hit the SelfLink cycle catch BEFORE
5839                    // any base-case Return — leaving `downrec_close`
5840                    // None and routing the trace through R1's safe
5841                    // `dispatchable=false` `"self-link-retf-r1"` path
5842                    // (audit measured `trace_dispatched = 0`).
5843                    //
5844                    // Sub-0 lift: when the SelfLink trip fires AND
5845                    // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
5846                    // gate already requires this — kept explicit as a
5847                    // safety floor), route the close through `downrec_
5848                    // close` INSTEAD of `self_link_kind`. The recorder
5849                    // synthesises the close marker from the most
5850                    // recent Op::Call at depth `cur_depth - 1`:
5851                    //   - `return_pc` = `call.pc + 1` (caller's resume
5852                    //     PC after the recursive call returns; mirror
5853                    //     of R3a's `caller_pc` derivation at the
5854                    //     depth>0 Op::Return capture path below).
5855                    //   - `target_proto` = `call.proto` (caller's
5856                    //     proto; equals `rec.head_proto` for self-
5857                    //     recursion).
5858                    //   - `depth_delta` = `1` (today's recorder always
5859                    //     unrolls one level; R3a uses the same
5860                    //     constant).
5861                    //
5862                    // The lowerer's `end_idx` picker (`trace.rs:3729`)
5863                    // routes through `TraceEnd::DownRec` ahead of the
5864                    // `self_link_kind` arm; the R3b/R3d lowerer arm
5865                    // emits the stitch-sentinel + caller-pc-guard
5866                    // scaffold. Single-candidate guard chain (sub-0's
5867                    // recorder produces 1 caller_pc candidate because
5868                    // `rec.retfs` is empty) keeps `dispatchable=false`
5869                    // + `"downrec-stitch-pending"` label (per R3d's
5870                    // `multi_way_candidate_count >= 2` gate at
5871                    // `trace.rs:7385`). Net behaviour: trace compiles
5872                    // under DownRec routing; interp runs the
5873                    // recursion naturally → result 317811.
5874                    //
5875                    // The `cur_depth >= 2` gate is automatically
5876                    // satisfied by the count > RECUNROLL_THRESHOLD=2
5877                    // trip condition (3 ancestor frames sharing
5878                    // head_proto implies cur_depth >= 3), kept
5879                    // explicit so a future RECUNROLL_THRESHOLD tweak
5880                    // doesn't silently flip shallow-recursion
5881                    // shapes (cur_depth == 1) onto the DownRec arm.
5882                    //
5883                    // R3.3+ sub-1/2/3/4 will replace the depth-baked
5884                    // op_offsets[] addressing with runtime base_var
5885                    // threading so the trace's recorded body is
5886                    // depth-relative and the DownRec dispatch
5887                    // becomes wall-clock-positive. Sub-0 is the
5888                    // routing scaffold; it does not aim for gain.
5889                    let _ = kind;
5890                    let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
5891                        let caller_depth_u8 = (cur_depth - 1) as u8;
5892                        if let Some(call_op) = rec.ops.iter().rev().find(|r| {
5893                            r.inline_depth == caller_depth_u8
5894                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
5895                        }) {
5896                            rec.downrec_close = Some(crate::jit::trace::DownRecClose {
5897                                return_pc: call_op.pc + 1,
5898                                target_proto: call_op.proto,
5899                                depth_delta: 1,
5900                            });
5901                            true
5902                        } else {
5903                            false
5904                        }
5905                    };
5906                    if relaxed_to_downrec {
5907                        // R2 close-cause taxonomy: tag the lift so
5908                        // probes can tally the fire rate. Mirrors
5909                        // R3a's `"downrec-restart"` bump for the
5910                        // depth>0 Op::Return path (different trip
5911                        // origin, same downstream routing). The
5912                        // existing `"self-link-retf-r1"` label still
5913                        // fires for trips that DON'T relax (no
5914                        // candidate Op::Call ancestor in rec.ops, or
5915                        // cur_depth < 2) via the lowerer's
5916                        // dispatch_off_reason mirror at the close
5917                        // handler — kept as a regression safety net.
5918                        self.jit
5919                            .counters
5920                            .bump_close_cause("selflink-yields-to-downrec");
5921                    } else {
5922                        rec.self_link_kind = Some(kind);
5923                    }
5924                }
5925                let should_close =
5926                    at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5927                if should_close {
5928                    // P13-S13-H — long-trace bias: a call-triggered
5929                    // recording that closed with a very short body
5930                    // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5931                    // binary_trees `make(0)`: 4 ops) is pathological.
5932                    // Compiling + caching it pins `Proto.traces` to a
5933                    // trace that the length gate will refuse to
5934                    // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5935                    // = 40`), AND blocks the back-edge / longer-call
5936                    // path from re-recording the same head_pc (the
5937                    // dedup `already_cached` check below short-
5938                    // circuits). The fix: discard the short call-
5939                    // triggered recording WITHOUT caching, and bias
5940                    // the proto's `call_hot_count` back to
5941                    // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5942                    // sequence of calls retries the trigger at a
5943                    // different (hopefully deeper) recursion point.
5944                    //
5945                    // Back-edge triggered traces are exempt — a
5946                    // tight numeric-for loop's body is legitimately
5947                    // 3 ops (`Add`, ForLoop) and DOES dispatch
5948                    // usefully when re-entered many times.
5949                    // P13-S13-H — coverage heuristic to detect
5950                    // pathologically partial call-triggered traces:
5951                    // for self-recursive / branchy protos like
5952                    // `fib` (~17 bytecode ops) or
5953                    // `binary_trees.make` (~26 ops), the recorder
5954                    // can fire at a BASE-case entry (`fib(0)` or
5955                    // `make(0)`) producing a 3–4 op trace that
5956                    // covers a tiny fraction of the proto's code.
5957                    // That trace is doomed by the length gate
5958                    // post-compile AND blocks any longer follow-up
5959                    // (the dedup `already_cached` check below). The
5960                    // fix: discard call-triggered closes where
5961                    // `rec.ops.len() * 2 < head_proto.code.len()`
5962                    // (less than half the proto's bytecode), so the
5963                    // back-edge / longer call path can take over.
5964                    //
5965                    // Why coverage > raw length:protos with
5966                    // intrinsically short bodies (closure
5967                    // factories: `Closure + Return1` = 2 ops,
5968                    // simple wrappers: `LoadI + Return1` = 2 ops)
5969                    // record 100% coverage even at length 2 — those
5970                    // ARE legitimately short and the closure /
5971                    // sunk-emit lowering paths (S7-A / S9-C) make
5972                    // them worth compiling. The heuristic admits
5973                    // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5974                    // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5975                    // ~26) get discarded.
5976                    //
5977                    // Back-edge triggered traces are unaffected —
5978                    // a tight numeric-for body legitimately covers
5979                    // 3 of ~3 proto ops it can dispatch from
5980                    // (`Add + ForLoop`) and the recorder fires on
5981                    // the back-edge, not call entry.
5982                    //
5983                    // `call_hot_count` is intentionally NOT reset
5984                    // (an earlier draft tried `THRESHOLD - 32` but
5985                    // caused active_trace contention with the
5986                    // outer back-edge trigger — see
5987                    // setlist_b_zero_with_call_c_zero_sunk_emits).
5988                    // We give up on dispatching the pathological
5989                    // shape on the same proto; the back-edge or a
5990                    // longer call path on a deeper recursion point
5991                    // can still record + cache a real trace.
5992                    let proto_code_len = rec.head_proto.code.len();
5993                    let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5994                    // P13-S13-I — per-Proto discard cap. The S13-H
5995                    // relaxed trigger condition (`c >= THRESHOLD &&
5996                    // !already_cached`) means a Proto whose every
5997                    // recording is partial-coverage will re-fire the
5998                    // trigger every call indefinitely (1500+ in
5999                    // `binary_trees`-pattern test). The cap stops
6000                    // discarding after `MAX_DISCARDS_PER_PROTO` —
6001                    // the next close falls through to compile (even
6002                    // if partial), caches the trace, and the
6003                    // `already_cached` short-circuit kills the
6004                    // storm. Dispatch may still be refused
6005                    // post-compile (length gate), but the recorder
6006                    // stops churning.
6007                    const MAX_DISCARDS_PER_PROTO: u32 = 5;
6008                    let prior_discards = rec.head_proto.trace_discard_count.get();
6009                    let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
6010                    // P13-S13-K — flip the `gave_up` flag the
6011                    // moment cap is reached (BEFORE the close-
6012                    // dispatching branch below). The trigger gates
6013                    // short-circuit on this flag, skipping the
6014                    // RefCell + linear `already_cached` scan on
6015                    // every subsequent call to this Proto. Useful
6016                    // for `binary_trees_pattern`-class loads where
6017                    // a single Proto sees ~20k calls post-cap.
6018                    if cap_reached
6019                        && rec.is_call_triggered
6020                        && is_partial_coverage
6021                        && !rec.head_proto.trace_gave_up.get()
6022                    {
6023                        rec.head_proto.trace_gave_up.set(true);
6024                    }
6025                    if rec.is_call_triggered && is_partial_coverage && !cap_reached {
6026                        // Tally as closed (for visibility) but DROP
6027                        // without compile/cache. Use the existing
6028                        // closed-lens accumulator so probes can
6029                        // observe the discarded shape.
6030                        // P13-S13-I — bump discard count BEFORE
6031                        // dropping the recording so the next
6032                        // close sees the updated counter.
6033                        rec.head_proto.trace_discard_count.set(prior_discards + 1);
6034                        self.jit.counters.closed += 1;
6035                        self.jit
6036                            .counters
6037                            .closed_lens
6038                            .push((rec.is_call_triggered, rec.ops.len()));
6039                        // v2.0 Track-R R2 — partial-coverage discard
6040                        // close path. Pre-R2 this site bumped `closed`
6041                        // + `closed_lens` (visibility) but no per-
6042                        // reason label, so probes couldn't separate a
6043                        // real successful close from a discard tally.
6044                        // Tag explicitly to make the recorder-side
6045                        // close-cause taxonomy single-source.
6046                        self.jit
6047                            .counters
6048                            .bump_close_cause("partial-coverage-discard");
6049                        self.jit.active_trace = None;
6050                        // Continue with interp loop — don't
6051                        // fall through to compile path.
6052                        // The op at `pc` hasn't dispatched yet;
6053                        // the outer loop iteration handles it.
6054                    } else {
6055                        rec.closed = true;
6056                        // P12-S2.C — detach the closed record, then try
6057                        // to compile it. Dedup by `head_pc`: a Proto
6058                        // already carrying a CompiledTrace for this PC
6059                        // skips recompile (the hot counter caps
6060                        // re-recording at `u32::MAX / 2` anyway, but
6061                        // explicit dedup keeps `Proto.traces` short
6062                        // for the S3 dispatcher's linear scan).
6063                        //
6064                        // No `Vm::run` change for failure: we just bump
6065                        // the failed counter and drop the record. S3
6066                        // will read `Proto.traces` to decide whether to
6067                        // dispatch — until then, this is bookkeeping.
6068                        let head_pc_val = rec.head_pc;
6069                        let closed_record = self
6070                            .jit
6071                            .active_trace
6072                            .take()
6073                            .expect("active_trace was Some this branch");
6074                        self.jit.counters.closed += 1;
6075                        self.jit
6076                            .counters
6077                            .closed_lens
6078                            .push((closed_record.is_call_triggered, closed_record.ops.len()));
6079                        // P12-S5-B fix: cache the trace on the
6080                        // recorder's *head proto*, not the current
6081                        // closure's proto. For non-recursive
6082                        // call-triggered traces, close fires after
6083                        // `Return1` pops the callee frame — `cl` at
6084                        // that point is the CALLER's closure, while
6085                        // `closed_record.head_proto` is the CALLEE's
6086                        // proto (the one we actually want the trace
6087                        // to be discoverable from on the next call).
6088                        // Self-recursive fib closed via depth-cap
6089                        // mid-recursion so `cl.proto == head_proto`
6090                        // happened to coincide — this fix makes that
6091                        // accidental coincidence intentional.
6092                        let head_proto = closed_record.head_proto;
6093                        let already_cached = head_proto
6094                            .traces
6095                            .borrow()
6096                            .iter()
6097                            .any(|t| t.head_pc == head_pc_val);
6098                        if !already_cached {
6099                            // Internal-loop = true: the trace runs in
6100                            // a native loop until a cmp side-exits, so
6101                            // the dispatcher's per-entry marshal cost
6102                            // amortizes across the whole run of
6103                            // iterations the loop's recorded direction
6104                            // stays valid. The lowerer auto-downgrades
6105                            // to one-shot for cmp-less or Call-truncating
6106                            // traces.
6107                            // P15-A v2-C-A6-5 — side traces MUST NOT
6108                            // internal-loop. The parent's recorded prefix
6109                            // (ops at PCs < side trace's head_pc) defines
6110                            // values for registers the child's body reads
6111                            // without re-writing each iter — e.g. for
6112                            // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6113                            // + R[11]` sets R[12], and the child trace
6114                            // (head_pc=24) re-runs `pc=20 Move R[1] =
6115                            // R[12]` each iter via its outer ForLoop
6116                            // internal-loop, ALWAYS reading the stale
6117                            // entry-time R[12]. The parent's Add never
6118                            // re-runs during child's loop, so R[1] gets
6119                            // pinned to one stale value. Force one-shot
6120                            // for side traces: each parent-exit round-
6121                            // trips through dispatcher → parent's Add
6122                            // runs → side trace runs ONE iter → return.
6123                            let opts = crate::jit::trace::CompileOptions {
6124                                internal_loop: closed_record.side_trace_parent.is_none(),
6125                                pre53: self.version() <= LuaVersion::Lua53,
6126                                aot: false,
6127                            };
6128                            // v1.1 A1 Session A — route through trace_compiler.
6129                            // v2.0 Track J sub-step J-B — split-borrow JitState
6130                            // so the trait method can take `&mut dyn JitStorage`.
6131                            let result = {
6132                                let jit = &mut self.jit;
6133                                let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6134                                jit.trace_compiler
6135                                    .try_compile_trace(storage, &closed_record, opts)
6136                            };
6137                            match result {
6138                                Some(mut ct) => {
6139                                    // P12-S5-A/B/C — tally Sinkable sites
6140                                    // + actually-sunk-emit sites + materialise
6141                                    // emit sites before moving `ct` into
6142                                    // Proto.traces.
6143                                    self.jit.counters.sinkable_seen +=
6144                                        ct.sinkable_sites_seen as u64;
6145                                    self.jit.counters.accum_bufferable_seen +=
6146                                        ct.accum_bufferable_seen as u64;
6147                                    self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6148                                    self.jit.counters.materialize_emit +=
6149                                        ct.materialize_emit_count as u64;
6150                                    self.jit.counters.closure_emit += ct.closure_seen as u64;
6151                                    if ct.is_inline_abort_close {
6152                                        self.jit.counters.inline_abort += 1;
6153                                    }
6154                                    // v2.0 Stage 7 polish 6 fire
6155                                    // experiment — split tally so a
6156                                    // probe can answer the AOT
6157                                    // `accepted_with_per_exit_inline`
6158                                    // gate's question at the JIT
6159                                    // surface too: how many compiled
6160                                    // traces emitted depth>0 cmp
6161                                    // side-exits, and how many of
6162                                    // those survived all the
6163                                    // `dispatchable = false` pins
6164                                    // (`InlineAbort-gate`,
6165                                    // `self-link-retf-r1`,
6166                                    // `downrec-stitch-pending`, etc.).
6167                                    if !ct.per_exit_inline.is_empty() {
6168                                        self.jit.counters.per_exit_inline_compiled += 1;
6169                                        if ct.dispatchable {
6170                                            self.jit.counters.per_exit_inline_dispatchable += 1;
6171                                        }
6172                                    }
6173                                    if let Some(reason) = ct.dispatch_off_reason {
6174                                        self.jit.counters.dispatch_off_reasons.push(reason);
6175                                        // v2.0 Track-R R2 — mirror
6176                                        // the ordered Vec push into
6177                                        // the per-reason HashMap so
6178                                        // probes can answer "how many
6179                                        // of each dispatch_off label
6180                                        // fired" in O(1) without
6181                                        // walking the Vec. Same
6182                                        // bucket as the recorder-side
6183                                        // abort/discard tags above.
6184                                        self.jit.counters.bump_close_cause(reason);
6185                                    }
6186                                    // v2.0 Track-R R3b — count
6187                                    // compiled traces that carry a
6188                                    // down-recursion stitch link.
6189                                    // Bumped here (not at the lowerer
6190                                    // emit site) because the Vm's
6191                                    // JitCounters live on the Vm,
6192                                    // and the lowerer doesn't have a
6193                                    // Vm handle. R3b's regression
6194                                    // pin reads this via
6195                                    // `Vm::trace_downrec_link_compiled_count`.
6196                                    if ct.downrec_link.is_some() {
6197                                        self.jit.counters.downrec_link_compiled += 1;
6198                                    }
6199                                    // v2.0 Track-R R3d — multi-way
6200                                    // guard emit counter. Bumped when
6201                                    // the lowerer's R3d arm collected
6202                                    // >= 2 distinct caller_pc candidates
6203                                    // and lifted `dispatchable=true`.
6204                                    // R3c's single-CMP shape stores
6205                                    // `1` here without bumping; non-
6206                                    // DownRec closes store `0`.
6207                                    if ct.downrec_multi_way_count >= 2 {
6208                                        self.jit.counters.multi_way_guard_emitted += 1;
6209                                    }
6210                                    // P15-A v2-A — side-trace finalisation.
6211                                    // Pin `dispatchable=false` so the
6212                                    // primary lookup `traces.find(|t|
6213                                    // t.head_pc == pc && t.dispatchable)`
6214                                    // never matches this entry — the
6215                                    // side trace is meant to be entered
6216                                    // ONLY through the parent's exit
6217                                    // indirection (v2-B/C IR), not the
6218                                    // back-edge / call-trigger paths.
6219                                    // Then write the entry fn ptr into
6220                                    // the parent's `exit_side_trace_ptrs`
6221                                    // slot so v2-B/C IR can read it.
6222                                    if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6223                                        closed_record.side_trace_parent
6224                                    {
6225                                        ct.dispatchable = false;
6226                                        let entry_ptr = ct.entry as *const () as *const u8;
6227                                        let _side_trace_head_pc = closed_record.head_pc;
6228                                        let parent_traces = parent_proto.traces.borrow();
6229                                        if let Some(parent_ct) = parent_traces
6230                                            .iter()
6231                                            .find(|t| t.head_pc == parent_head_pc)
6232                                        {
6233                                            // P15-A v2-C-A5-C — shape-match
6234                                            // gate. Find the parent's per-exit
6235                                            // tag snapshot at the wired exit
6236                                            // (inline / tag / global) and
6237                                            // check the child's entry_tags
6238                                            // match. If not, leave the cell
6239                                            // null + skip cache populate so
6240                                            // the future v2-C-A2 IR's
6241                                            // `call_indirect` stays inert at
6242                                            // this exit (the child's
6243                                            // shape-specialised IR would
6244                                            // mis-interpret raw bits the
6245                                            // parent writes to reg_state).
6246                                            let inline_n = parent_ct.per_exit_inline.len();
6247                                            let tags_n = parent_ct.per_exit_tags.len();
6248                                            let parent_exit_tags_slice: &[
6249                                            crate::jit::trace::ExitTag
6250                                        ] = if parent_exit_idx < inline_n {
6251                                            &parent_ct.per_exit_inline
6252                                                [parent_exit_idx]
6253                                                .exit_tags
6254                                        } else if parent_exit_idx
6255                                            < inline_n + tags_n
6256                                        {
6257                                            &parent_ct.per_exit_tags
6258                                                [parent_exit_idx - inline_n]
6259                                                .1
6260                                        } else {
6261                                            &parent_ct.exit_tags
6262                                        };
6263                                            let shape_ok =
6264                                                crate::jit::trace::exit_tags_match_entry_tags(
6265                                                    &ct.entry_tags,
6266                                                    parent_exit_tags_slice,
6267                                                    &parent_ct.entry_tags,
6268                                                );
6269                                            if !shape_ok {
6270                                                self.jit.counters.side_trace_shape_mismatch += 1;
6271                                            }
6272                                            // P15-A v2-C-A4 — write the child's
6273                                            // entry fn ptr to BOTH the legacy
6274                                            // v2-A `exit_side_trace_ptrs[idx]`
6275                                            // cell (kept so v2-A's
6276                                            // walk_any_side_ptr_non_null tests
6277                                            // stay green) AND the per-kind cell
6278                                            // whose heap address the parent's
6279                                            // IR baked (v2-C-A2). The IR-baked
6280                                            // cell is what the call_indirect
6281                                            // gate actually reads. Only write
6282                                            // when A5-C shape gate passes.
6283                                            if shape_ok {
6284                                                if let Some(cell) = parent_ct
6285                                                    .exit_side_trace_ptrs
6286                                                    .get(parent_exit_idx)
6287                                                {
6288                                                    cell.set(entry_ptr);
6289                                                }
6290                                                // Compute (kind, local) for the
6291                                                // IR-baked cell. Layout follows
6292                                                // exit_hit_counts: inline first,
6293                                                // then per_exit_tags, then the
6294                                                // global tail slot.
6295                                                let (sent_kind, sent_local) = if parent_exit_idx
6296                                                    < inline_n
6297                                                {
6298                                                    parent_ct.per_exit_inline[parent_exit_idx]
6299                                                        .side_trace_ptr
6300                                                        .set(entry_ptr);
6301                                                    (
6302                                                        crate::jit::trace::SIDE_SENT_KIND_INLINE,
6303                                                        parent_exit_idx as u32,
6304                                                    )
6305                                                } else if parent_exit_idx < inline_n + tags_n {
6306                                                    let local = parent_exit_idx - inline_n;
6307                                                    if let Some(b) =
6308                                                        parent_ct.tags_side_trace_ptrs.get(local)
6309                                                    {
6310                                                        b.set(entry_ptr);
6311                                                    }
6312                                                    (
6313                                                        crate::jit::trace::SIDE_SENT_KIND_TAG,
6314                                                        local as u32,
6315                                                    )
6316                                                } else {
6317                                                    parent_ct.global_side_trace_ptr.set(entry_ptr);
6318                                                    (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6319                                                };
6320                                                self.jit.counters.side_trace_compiled += 1;
6321                                                // P15-A v2-D-A8 — flip the
6322                                                // parent's fast-path hint so
6323                                                // the dispatcher knows to do
6324                                                // the tentative decode + cell
6325                                                // check on subsequent
6326                                                // dispatches. Set once and
6327                                                // stays true (we never unwire
6328                                                // a side trace today).
6329                                                parent_ct.has_any_side_wired.set(true);
6330
6331                                                // P15-A v2-C-A1/A4 — populate
6332                                                // the O(1) lookup cache the
6333                                                // dispatcher consults on
6334                                                // sentinel-bit-set returns.
6335                                                // Key is the encoded sentinel
6336                                                // (same encoding the IR ORs
6337                                                // into bits 56..=62 of the
6338                                                // child's i64 return).
6339                                                let sentinel =
6340                                                    crate::jit::trace::encode_side_sentinel(
6341                                                        sent_kind, sent_local,
6342                                                    );
6343                                                let predicted_idx = if std::ptr::eq(
6344                                                    parent_proto.as_ptr(),
6345                                                    head_proto.as_ptr(),
6346                                                ) {
6347                                                    parent_traces.len() as u32
6348                                                } else {
6349                                                    head_proto.traces.borrow().len() as u32
6350                                                };
6351                                                parent_ct
6352                                                    .side_trace_cache
6353                                                    .borrow_mut()
6354                                                    .insert(sentinel, predicted_idx);
6355                                            }
6356                                        }
6357                                        drop(parent_traces);
6358                                    }
6359                                    head_proto.traces.borrow_mut().push(TArc::new(ct));
6360                                    self.jit.counters.compiled += 1;
6361                                }
6362                                None => {
6363                                    self.jit.counters.compile_failed += 1;
6364                                    self.jit
6365                                        .counters
6366                                        .compile_failed_reasons
6367                                        .push(self.jit.trace_compiler.last_compile_checkpoint());
6368                                }
6369                            }
6370                        }
6371                    } // P13-S13-H — close the long-trace-bias else branch
6372                } else {
6373                    // P12-S4-step1 + step4a — depth-aware push at the
6374                    // current `cur_depth`. The `depth_cap_hit` /
6375                    // `returned_past_head` early-exit is handled by
6376                    // the `should_close` branch above; reaching here
6377                    // means `cur_depth <= MAX_INLINE_DEPTH` and the
6378                    // trace head's frame is still live.
6379                    let depth_u8 = cur_depth as u8;
6380                    if depth_u8 > self.jit.max_depth_seen {
6381                        self.jit.max_depth_seen = depth_u8;
6382                    }
6383                    // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6384                    // return / variable return count). Recorder pushed
6385                    // it with var_count=None before the call dispatched;
6386                    // now that the call has returned and we're about to
6387                    // push the next op, top reflects the actual return
6388                    // count. Snapshot top - (caller.base + call.a).
6389                    if let Some(last) = rec.ops.last_mut()
6390                        && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6391                        && last.inst.c() == 0
6392                        && last.var_count.is_none()
6393                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6394                    {
6395                        let from = f.base + last.inst.a();
6396                        if self.top >= from {
6397                            last.var_count = Some(self.top - from);
6398                        }
6399                    }
6400                    // P12-S9-A/C — for SetList B=0, snapshot the source
6401                    // count = top - A - 1 (mirrors Lua's `n = top - ra
6402                    // - 1` from lvm.c OP_SETLIST). Sources are
6403                    // R[A+1..top), exclusive top. For Call C=0's
6404                    // var_count (the return count = top - A inclusive),
6405                    // see the prior-op fix-up above; here we
6406                    // initialise the current Call op to None and let
6407                    // the fix-up on the next op's push populate it.
6408                    let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6409                        && inst.b() == 0
6410                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6411                    {
6412                        let from = f.base + inst.a();
6413                        if self.top > from {
6414                            Some(self.top - from - 1)
6415                        } else {
6416                            None
6417                        }
6418                    } else {
6419                        None
6420                    };
6421                    let op = crate::jit::trace::RecordedOp {
6422                        proto: cl.proto,
6423                        pc,
6424                        inst,
6425                        inline_depth: depth_u8,
6426                        var_count,
6427                    };
6428                    // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6429                    // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6430                    // Captured as a side-channel `RetfRecord` parallel to
6431                    // `ops` when `p16_self_link_enabled` is on. R3's
6432                    // down-rec stitch consumes these to guard side-trace
6433                    // inlined-frame topology against the recorded shape.
6434                    // Gated on the same flag as the cycle catch so the
6435                    // ship-default path (p16 off) sees zero behavior
6436                    // change. `caller_pc` is the recorded enclosing Call's
6437                    // pc + 1 — interp's resume point after the inlined
6438                    // frame pops.
6439                    if self.jit.p16_self_link_enabled
6440                        && depth_u8 > 0
6441                        && matches!(
6442                            inst.op(),
6443                            crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6444                        )
6445                    {
6446                        let results: u8 = match inst.op() {
6447                            crate::vm::isa::Op::Return0 => 0,
6448                            crate::vm::isa::Op::Return1 => 1,
6449                            _ => 0,
6450                        };
6451                        // Most recent Op::Call recorded at the caller's
6452                        // depth (`depth_u8 - 1`) is the frame this Return
6453                        // is unwinding from. Reverse scan stops at the
6454                        // first match.
6455                        let caller_depth = depth_u8 - 1;
6456                        let caller_call = rec.ops.iter().rev().find(|r| {
6457                            r.inline_depth == caller_depth
6458                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6459                        });
6460                        let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6461                        // v2.0 Track-R R3a — capture the caller's proto
6462                        // for the RetfRecord. LuaJIT `IR_RETF.op1`
6463                        // equivalent. For fib(28) the caller's proto
6464                        // equals the trace head; for future mutual
6465                        // recursion the recorded Op::Call's proto is the
6466                        // right target. Fallback to head_proto when no
6467                        // enclosing Call op was captured (mirrors
6468                        // `caller_pc`'s fallback to the Return's own pc).
6469                        let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6470                        rec.retfs.push(crate::jit::trace::RetfRecord {
6471                            from_depth: depth_u8,
6472                            to_depth: caller_depth,
6473                            results,
6474                            caller_pc,
6475                            proto: caller_proto,
6476                        });
6477                        // v2.0 Track-R R3a — DownRec close trigger:
6478                        // count RetfRecords on this recording whose
6479                        // `proto` matches `caller_proto` (LuaJIT
6480                        // `check_downrec_unroll` chain filter
6481                        // `op1 == ptref`). Threshold mirrors
6482                        // RECUNROLL_THRESHOLD; first trip stamps the
6483                        // `downrec_close` marker, subsequent retfs
6484                        // keep the marker without overwrite. The
6485                        // lowerer's end_idx picker routes through
6486                        // TraceEnd::DownRec when the marker is set;
6487                        // R3a's tail emit still falls through to R1's
6488                        // safe deopt path so fib(28) result stays
6489                        // 317_811. R3b lifts.
6490                        if rec.downrec_close.is_none() {
6491                            let caller_proto_ptr = caller_proto.as_ptr();
6492                            let prior_match_count = rec
6493                                .retfs
6494                                .iter()
6495                                .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6496                                .count();
6497                            // Strictly-greater-than threshold matches
6498                            // LuaJIT `count + J->tailcalled > recunroll`.
6499                            // The newly-pushed retf is already counted.
6500                            if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6501                                rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6502                                    return_pc: caller_pc,
6503                                    target_proto: caller_proto,
6504                                    depth_delta: 1,
6505                                });
6506                                // R2 close-cause taxonomy: tag the
6507                                // restart with `"downrec-restart"`. R3b
6508                                // adds `"downrec-stitch-failed"` when
6509                                // the lifted back-edge falls back to
6510                                // deopt.
6511                                self.jit.counters.bump_close_cause("downrec-restart");
6512                            }
6513                        }
6514                    }
6515                    // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6516                    // FIRST eligible Op::GetField site under env-gate
6517                    // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6518                    //   - R[B] is Value::Table with metatable.is_none()
6519                    //   - K[C] is Value::Str
6520                    //   - The string key actually occupies a hash slot
6521                    //     (so the IC's slot_idx is a real index, not
6522                    //     a probe sentinel).
6523                    // Once captured, subsequent GetFields skip this
6524                    // logic (rec.field_ic_snapshot.is_some() short-
6525                    // circuits). Env-OFF short-circuits on the cached
6526                    // atomic check inside field_ic_enabled().
6527                    if rec.field_ic_snapshot.is_none()
6528                        && matches!(inst.op(), crate::vm::isa::Op::GetField)
6529                        && crate::jit::trace_types::field_ic_enabled()
6530                    {
6531                        let b = inst.b();
6532                        let c_idx = inst.c() as usize;
6533                        let r_b = self.stack[(base + b) as usize];
6534                        if let Value::Table(g) = r_b
6535                            && g.metatable().is_none()
6536                            && c_idx < cl.proto.consts.len()
6537                            && let Value::Str(s) = cl.proto.consts[c_idx]
6538                        {
6539                            let key = Value::Str(s);
6540                            let tbl_ref = &*g;
6541                            if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6542                                && let Some(val) = tbl_ref.node_val_at(slot_idx)
6543                            {
6544                                let op_idx = rec.ops.len() as u32;
6545                                rec.field_ic_snapshot =
6546                                    Some(crate::jit::trace_types::FieldIcSnapshot {
6547                                        op_idx,
6548                                        nodes_len: tbl_ref.nodes_capacity() as u64,
6549                                        slot_idx: slot_idx as u64,
6550                                        key_ptr_bits: s.as_ptr() as u64,
6551                                        cached_val_tag: val.tag_byte(),
6552                                    });
6553                                self.jit.counters.field_ic_snapshot_captured += 1;
6554                            }
6555                        }
6556                    }
6557                    if !rec.push(op) {
6558                        // v2.0 Track-R R2 — recorder overflow
6559                        // (MAX_TRACE_LEN). Pre-R2 this site bumped
6560                        // `aborted` with no reason label, leaving the
6561                        // overflow indistinguishable from any other
6562                        // abort cause that might be added later.
6563                        // Tag it explicitly under the close-cause
6564                        // bucket so probes can tally overflow vs
6565                        // other abort causes in O(1).
6566                        self.jit.active_trace = None;
6567                        self.jit.counters.aborted += 1;
6568                        self.jit.counters.bump_close_cause("trace-overflow");
6569                    }
6570                }
6571            }
6572
6573            // P12-S3 — trace JIT dispatcher.
6574            //
6575            // When the dispatch loop is about to execute the op at
6576            // `pc` and there's a `numeric_only` CompiledTrace cached
6577            // for that `head_pc`, marshal the live regs into an
6578            // i64 buffer, jump into the trace, and resume the
6579            // interpreter at the returned continuation PC.
6580            //
6581            // Skipped (zero overhead) when `trace_jit_enabled` is
6582            // false; the lookup is a borrow + scan over
6583            // `cl.proto.traces`, which is a `Vec` whose size is at
6584            // most one entry per back-edge per Proto in practice.
6585            //
6586            // Marshalling contract — only Int slots survive the
6587            // round-trip cleanly (the reg_state ABI is `*mut i64`
6588            // with no tag info). Any non-Int slot in the affected
6589            // window forces a skip; interp takes over for one op
6590            // and the back-edge brings us back to try again next
6591            // pass (slots that were Nil/Float at one moment can
6592            // settle to Int by the time the next back-edge fires).
6593            //
6594            // A trace that comes back with `vm.jit.pending_err`
6595            // parked is treated as a deopt: clear the err, leave
6596            // the stack as the trace wrote it, and let the
6597            // interpreter run from the same `pc`. The trace itself
6598            // is left cached — a future entry might find no
6599            // metatable in the way and succeed.
6600            // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6601            // of 6 per-field Rc clones. proto.traces is now
6602            // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6603            // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6604            // operations per dispatch × 434k = ~2.2M Rc atomic ops
6605            // (~1-2% gain measured separately).
6606            // v2.0 Track-R R3c — one-shot consume of the
6607            // `suppress_downrec_admit_once` flag. Set by the R3c
6608            // downrec post-invoke arm below when it force-deopts the
6609            // trace (caller-pc guard miss OR cycle-budget exhausted)
6610            // so the NEXT interpreter loop iteration skips the
6611            // downrec admit, lets interp run the op at `head_pc`,
6612            // advances `pc` past `head_pc`, and breaks the otherwise-
6613            // infinite admit loop. Reading + clearing here means a
6614            // single dispatch tick consumes the suppression — the
6615            // following tick re-admits naturally (with the budget
6616            // also reset by the deopt site).
6617            let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6618            self.jit.suppress_downrec_admit_once = false;
6619            if self.jit.trace_enabled
6620                && let Some(ct) = {
6621                    let traces = cl.proto.traces.borrow();
6622                    traces
6623                        .iter()
6624                        .find(|t| {
6625                            if t.head_pc != pc {
6626                                return false;
6627                            }
6628                            let is_downrec = t.downrec_link.is_some();
6629                            // v2.0 Track-R R3c — the one-shot suppress
6630                            // flag blocks any admit (primary or fallback)
6631                            // for `downrec_link`-bearing traces so the
6632                            // next interp iter can run the natural op
6633                            // at `head_pc` and advance past it. R3d's
6634                            // `dispatchable=true` lift means the suppress
6635                            // must also cover the primary `t.dispatchable`
6636                            // arm — otherwise the lifted lookup would
6637                            // immediately re-admit after a force-deopt
6638                            // and the infinite loop returns.
6639                            if is_downrec && downrec_admit_blocked {
6640                                return false;
6641                            }
6642                            // Primary arm: `dispatchable=true` traces
6643                            // (R3d-lifted DownRec or normal traces).
6644                            // Fallback arm: R3c-shape `dispatchable=false`
6645                            // DownRec traces (single-CMP guard kept
6646                            // pinned because the 90% miss-rate would
6647                            // make blind admit perf-negative).
6648                            t.dispatchable || is_downrec
6649                        })
6650                        .cloned()
6651                }
6652            {
6653                // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6654                // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6655                // across the entire dispatch block so the fields outlive
6656                // all consumers. Saves 5 Rc::clone per dispatch.
6657                let entry_fn = ct.entry;
6658                let head_pc_val = ct.head_pc;
6659                let window_size = ct.window_size;
6660                let exit_tags = &ct.exit_tags;
6661                let per_exit_tags = &ct.per_exit_tags;
6662                let per_exit_inline = &ct.per_exit_inline;
6663                let compile_entry_tags = &ct.entry_tags;
6664                let global_tag_res_kind = ct.global_tag_res_kind;
6665                let exit_hit_counts = &ct.exit_hit_counts;
6666                let max_stack = cl.proto.max_stack as usize;
6667                let window_size_us = window_size as usize;
6668                let base_us = base as usize;
6669                // P12-S4-step3a — `reg_state` sized to the trace's
6670                // `window_size`, which today equals max_stack but
6671                // S4-step3b will expand for inlined frames.
6672                // Marshal-in still only writes [0..max_stack); slots
6673                // [max_stack..window_size) are zero-initialised and
6674                // filled by the trace's own GetUpval / arith.
6675                // P13-S13-D — reuse the Vm's amortised buffers
6676                // instead of allocating fresh Vecs each dispatch.
6677                // mem::take leaves an empty placeholder we restore
6678                // at the end of the dispatch block (success +
6679                // deopt paths both fall through to the restore).
6680                let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6681                entry_tags.clear();
6682                entry_tags.reserve(max_stack);
6683                // v2.0 Track-R R3c — this trace was admitted via the
6684                // `downrec_link.is_some()` arm rather than the normal
6685                // `dispatchable=true` arm. The pre-invoke path
6686                // populates a reserved saved-PC slot just past the
6687                // normal register window so R3b's lowerer guard load
6688                // (`reg_state[window_size]`) compares the runtime
6689                // saved caller PC against the recorded `dr_return_pc`.
6690                //
6691                // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6692                // gate. After R3d lifts `dispatchable = true` for
6693                // multi-way guards, the trace's body still emits the
6694                // R3b/R3d sentinel shape on return — the saved-PC slot
6695                // and post-invoke classifier must keep firing.
6696                // `downrec_link.is_some()` is the unique structural
6697                // signal that the trace closes via DownRec.
6698                let is_downrec_entry = ct.downrec_link.is_some();
6699                let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6700                reg_state.clear();
6701                // v2.0 Track-R R3c — when admitting a downrec trace,
6702                // size the buffer to `window_size + 1` so the lowerer
6703                // can `load(I64, ..., reg_state, window_size * 8)`
6704                // for the saved caller PC guard input. The extra slot
6705                // is the LAST element so cranelift's existing
6706                // `0..window_size` accesses are unaffected.
6707                let reg_state_len = if is_downrec_entry {
6708                    window_size_us + 1
6709                } else {
6710                    window_size_us
6711                };
6712                reg_state.resize(reg_state_len, 0i64);
6713                let mut dispatch_ok = true;
6714                for i in 0..max_stack {
6715                    let v = self.stack[base_us + i];
6716                    let (tag, raw) = v.unpack();
6717                    entry_tags.push(tag);
6718                    // P12-S12-C v3 — entry tag guard. The trace's IR
6719                    // is specialised to the compile-time entry tags
6720                    // (via current_kinds propagation from
6721                    // from_entry_tag). A runtime tag mismatch means
6722                    // body ops would mis-interpret raw bits (e.g.
6723                    // treat a Str pointer as Int payload → garbage).
6724                    // Skip dispatch on mismatch so interp handles
6725                    // this entry shape; the trace stays cached for
6726                    // future entries that match.
6727                    if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6728                        dispatch_ok = false;
6729                        break;
6730                    }
6731                    match tag {
6732                        // Int / Float / Table / Nil all marshal
6733                        // to raw payload cleanly; the trace's IR
6734                        // treats the 8-byte slot as an i64 (with
6735                        // f64 ops bitcasting around the boundary).
6736                        crate::runtime::value::raw::INT
6737                        | crate::runtime::value::raw::FLOAT
6738                        | crate::runtime::value::raw::TABLE
6739                        | crate::runtime::value::raw::CLOSURE
6740                        // P12-S12-B-v2 — Native iter slots (e.g.
6741                        // R[A] = ipairs_iter) are present in
6742                        // generic-for traces; the raw bits are a
6743                        // valid `*mut NativeClosure` and round-trip
6744                        // cleanly.
6745                        | crate::runtime::value::raw::NATIVE
6746                        // P12-S12-C v1 — Str slots show up in
6747                        // string-concat traces; raw bits = `*mut
6748                        // LuaStr` (interned, GC-managed). Round-
6749                        // trips cleanly as a heap pointer.
6750                        | crate::runtime::value::raw::STR
6751                        | crate::runtime::value::raw::NIL => {
6752                            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6753                            reg_state[i] = unsafe { raw.zero as i64 };
6754                        }
6755                        _ => {
6756                            dispatch_ok = false;
6757                            break;
6758                        }
6759                    }
6760                }
6761
6762                if dispatch_ok {
6763                    debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6764                    self.jit.pending_err = None;
6765                    // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6766                    // count. A cmp@d>0 side-exit calls the materialize
6767                    // helper which pushes inlined frames onto
6768                    // `vm.frames`; on deopt those frames must be popped
6769                    // before falling through to the interpreter, else
6770                    // the stack grows unboundedly per deopted dispatch.
6771                    let pre_frames = self.frames.len();
6772                    // v2.0 Track-R R3c — saved-PC slot population. The
6773                    // recorded `dr_return_pc` on the closing trace is
6774                    // the caller's resume PC captured at a depth>0
6775                    // Return push (recorder push site, see R3a verdict
6776                    // §3). The natural runtime analogue for self-
6777                    // stitch is the dispatching frame's PARENT frame's
6778                    // PC: the trace's head_pc sits inside a Lua frame,
6779                    // and the parent (caller) frame's `pc` is what
6780                    // luna would observe as `[base-8]` in the LJ
6781                    // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6782                    // the parent isn't a Lua frame (top-level dispatch
6783                    // — first invocation through `call_value`), no
6784                    // saved PC exists; we write 0, which always
6785                    // mismatches the recorded `dr_return_pc != 0`
6786                    // invariant pinned by R3b
6787                    // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6788                    // debug_assert!(dr_return_pc != 0, ...)`).
6789                    if is_downrec_entry {
6790                        let saved_pc: i64 = if pre_frames >= 2 {
6791                            match &self.frames[pre_frames - 2] {
6792                                CallFrame::Lua(parent) => parent.pc as i64,
6793                                CallFrame::Cont(_) => 0,
6794                            }
6795                        } else {
6796                            0
6797                        };
6798                        reg_state[window_size_us] = saved_pc;
6799                    }
6800                    // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6801                    // diagnostic hook. The probe fires once per trace dispatch
6802                    // (regardless of JIT vs AOT origin — both go through this
6803                    // arm), letting the AOT smoke test verify mcode actually
6804                    // executed. Guarded behind `OnceLock` so the env read is
6805                    // a one-time cost per process; not gated on a particular
6806                    // counter so the smoke test gets a deterministic single-
6807                    // line `aot_trace_fired pc=N` per first dispatch.
6808                    if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6809                        eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6810                    }
6811                    let continuation_pc = {
6812                        // v1.1 A1 Session A — chunk_compiler.enter
6813                        // (CraneliftBackend delegates to enter_jit;
6814                        // NullJitBackend returns an inert guard).
6815                        let vm_ptr: *mut Vm = self;
6816                        let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6817                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6818                        unsafe { entry_fn(reg_state.as_mut_ptr()) }
6819                    };
6820                    self.jit.counters.dispatched += 1;
6821
6822                    if self.jit.pending_err.is_some() {
6823                        self.jit.pending_err = None;
6824                        self.jit.counters.deopt += 1;
6825                        // P12-S4-step4b-C-2 — unwind any helper-pushed
6826                        // inlined frames before the interpreter resumes.
6827                        // Don't restore reg_state — the trace's partial
6828                        // writes are discarded; interp re-executes from
6829                        // the original `pc`.
6830                        while self.frames.len() > pre_frames {
6831                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6832                        }
6833                        if is_downrec_entry {
6834                            // v2.0 Track-R R3c — pending_err observed
6835                            // mid-trace inside a downrec admit. Treat
6836                            // it as a guard miss: bump `downrec_deopt`
6837                            // and suppress the next downrec admit so
6838                            // interp can advance past `head_pc` and
6839                            // the same trace doesn't immediately re-
6840                            // fire on the next loop iteration.
6841                            self.jit.counters.downrec_deopt += 1;
6842                            self.jit.suppress_downrec_admit_once = true;
6843                        }
6844                    } else if is_downrec_entry && {
6845                        // v2.0 Track-R R3d — only enter the R3c/R3d
6846                        // downrec classifier for returns whose shape
6847                        // matches the lowerer's `downrec_idx_opt` tail
6848                        // emit: either the stitch_blk DOWNREC sentinel
6849                        // (HIT) or the deopt_blk GLOBAL-sentinel-with-
6850                        // body==head_pc (MISS via guard fail). Any
6851                        // other return from a downrec trace (intermediate
6852                        // body cmp side-exit, GetField inference fail,
6853                        // etc.) carries a different sentinel/body shape
6854                        // and means the body exited BEFORE reaching the
6855                        // downrec close — classify those through the
6856                        // normal decode path (else branch below) so
6857                        // reg_state restores + pc advances correctly.
6858                        // The pre-R3d behavior (R3c) classified them all
6859                        // as MISS and skipped the normal restore, which
6860                        // inflated `downrec_deopt` with non-downrec
6861                        // events and lost the trace's mid-flight writes.
6862                        let raw_ret = continuation_pc as u64;
6863                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6864                        let sentinel_code = if from_side_trace {
6865                            ((raw_ret >> 56) & 0x7F) as u32
6866                        } else {
6867                            0
6868                        };
6869                        let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6870                        let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
6871                            crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
6872                            0,
6873                        );
6874                        from_side_trace
6875                            && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
6876                                || (sentinel_code == global_deopt_code
6877                                    && raw_body == head_pc_val as u64))
6878                    } {
6879                        // R3d downrec event classifier.
6880                        let raw_ret = continuation_pc as u64;
6881                        let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6882                        if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
6883                            // Guard HIT — saved_pc matched one of the
6884                            // baked candidates and the trace's
6885                            // `stitch_blk` arm returned the DOWNREC
6886                            // sentinel. Cycle-safety checkpoint:
6887                            // decrement budget; on underflow,
6888                            // reclassify as deopt + reset budget.
6889                            // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
6890                            // ~all natural HITs in a hot loop fire
6891                            // before reset pressure.
6892                            if self.jit.stitch_depth_remaining > 0 {
6893                                self.jit.stitch_depth_remaining -= 1;
6894                                self.jit.counters.downrec_dispatched += 1;
6895                            } else {
6896                                self.jit.counters.downrec_deopt += 1;
6897                                self.jit.stitch_depth_remaining =
6898                                    crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
6899                            }
6900                        } else {
6901                            // Guard MISS via the lowerer's deopt_blk
6902                            // arm (GLOBAL sentinel + body == head_pc).
6903                            // The deopt_blk emit performs the
6904                            // store-back via `emit_store_back_and_return_pc`,
6905                            // so the live stack already reflects the
6906                            // body's writes; no extra restore needed
6907                            // from the dispatcher side.
6908                            self.jit.counters.downrec_deopt += 1;
6909                        }
6910                        self.jit.suppress_downrec_admit_once = true;
6911                        // Pop helper-pushed inlined frames (defensive —
6912                        // R3d's emit shape doesn't push frames in the
6913                        // tail, but a body side-exit before reaching
6914                        // the tail may have via the materialize helper).
6915                        while self.frames.len() > pre_frames {
6916                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6917                        }
6918                        self.jit.reg_state_buf = reg_state;
6919                        self.jit.entry_tags_buf = entry_tags;
6920                        continue;
6921                    } else {
6922                        // Restore each slot using the trace's
6923                        // exit-tag analysis (see ExitTag docs).
6924                        // P12-S4-step4b-C-2 — decode the IR's
6925                        // side-exit shape. Upper 32 bits = (site_idx
6926                        // + 1) for inline cmp side-exits, 0 for
6927                        // legacy clean-tail / non-inline exits.
6928                        // P15-A v2-C-A0 — decode lives in
6929                        // `crate::jit::trace::decode_exit_shape` so
6930                        // v2-C-A3 can reuse it with the SIDE TRACE's
6931                        // shape inputs when the sentinel bit
6932                        // (v2-C-A2) is set on `raw_ret`.
6933                        let raw_ret = continuation_pc as u64;
6934                        // P15-A v2-C-A3 — side-trace return decode.
6935                        // Bit 63 of `raw_ret` is the side-trace
6936                        // marker the parent's IR OR'd in when it
6937                        // tail-called into a wired child trace.
6938                        // Bits 56..=62 carry the sentinel code (the
6939                        // cache key into the parent's
6940                        // `side_trace_cache`); bits 0..=55 are the
6941                        // child's own return value (encoded site or
6942                        // plain cont_pc) which we MUST decode using
6943                        // the CHILD's per_exit_inline / per_exit_tags
6944                        // / exit_tags / exit_hit_counts — not the
6945                        // parent's. The dispatcher snapshot read
6946                        // above holds the parent's shapes; when bit
6947                        // 63 is set we re-fetch the child's via the
6948                        // sentinel-keyed cache.
6949                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6950                        let (
6951                            decode_inline,
6952                            decode_tags,
6953                            decode_exit_tags,
6954                            decode_hit_counts,
6955                            decode_body,
6956                        ) = if from_side_trace {
6957                            let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6958                            let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6959                            let traces = cl.proto.traces.borrow();
6960                            let child_idx = traces
6961                                .iter()
6962                                .find(|t| t.head_pc == head_pc_val)
6963                                .and_then(|pct| {
6964                                    pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6965                                });
6966                            if let Some(idx) = child_idx
6967                                && let Some(child) = traces.get(idx as usize)
6968                            {
6969                                if crate::jit::trace::v2c_probe_enabled() {
6970                                    eprintln!(
6971                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6972                                        sentinel_code,
6973                                        body,
6974                                        idx,
6975                                        child.n_ops,
6976                                        child.head_pc,
6977                                        child.window_size,
6978                                        pc,
6979                                        window_size,
6980                                        child.dispatchable,
6981                                        child.is_inline_abort_close,
6982                                    );
6983                                }
6984                                (
6985                                    child.per_exit_inline.clone(),
6986                                    child.per_exit_tags.clone(),
6987                                    child.exit_tags.clone(),
6988                                    child.exit_hit_counts.clone(),
6989                                    body,
6990                                )
6991                            } else {
6992                                if crate::jit::trace::v2c_probe_enabled() {
6993                                    eprintln!(
6994                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6995                                        sentinel_code, body,
6996                                    );
6997                                }
6998                                // Cache miss — fall back to parent
6999                                // shapes with the body bits. Best-
7000                                // effort; the trace_side_trace_
7001                                // shape_mismatch_count records this
7002                                // path indirectly (close-handler
7003                                // skips wiring on mismatch so we
7004                                // shouldn't reach here when shape
7005                                // gate held).
7006                                (
7007                                    per_exit_inline.clone(),
7008                                    per_exit_tags.clone(),
7009                                    exit_tags.clone(),
7010                                    exit_hit_counts.clone(),
7011                                    body,
7012                                )
7013                            }
7014                        } else {
7015                            // P15-A v2-D — dispatcher-level side-trace
7016                            // invocation. Replaces v2-C's universal IR
7017                            // gate (`load + icmp + brif` at every
7018                            // emit_store_back callsite, which A6/A7
7019                            // measured as a net perf regression).
7020                            // A8 fast-path: skip the tentative decode +
7021                            // child lookup entirely when `has_any_side
7022                            // _wired == false` (the common case until
7023                            // the first side trace compiles for this
7024                            // parent). For fib_10_x10k and other tight
7025                            // short-trace workloads where most parent
7026                            // traces never get a wired child, this
7027                            // collapses the v2-D overhead to a single
7028                            // `Cell::get()` on the cold path.
7029                            // A8-revert: A8 had `parent_has_side` short-
7030                            // circuit + snapshot hoist; mini N=3 showed
7031                            // A8 lost the btrees_d8 1.02× win (dropped
7032                            // to 0.95×) WITHOUT helping fib_10 (same
7033                            // 0.86×). Drop A8 — accept the always-run
7034                            // v2-D path; the tentative decode + cell
7035                            // load is cheaper than the cost A8 added.
7036                            {
7037                                let tentative = crate::jit::trace::decode_exit_shape(
7038                                    raw_ret,
7039                                    per_exit_inline,
7040                                    per_exit_tags,
7041                                    exit_tags,
7042                                );
7043                                let tentative_exit_idx = tentative.exit_hit_idx;
7044                                let child_invoke = {
7045                                    let traces = cl.proto.traces.borrow();
7046                                    traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7047                                        |pct| {
7048                                            let cell =
7049                                                pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7050                                            let fn_ptr = cell.get();
7051                                            if fn_ptr.is_null() {
7052                                                return None;
7053                                            }
7054                                            traces
7055                                                .iter()
7056                                                .find(|t| {
7057                                                    t.entry as *const () as *const u8 == fn_ptr
7058                                                })
7059                                                .map(|child| {
7060                                                    (
7061                                                        child.entry,
7062                                                        child.per_exit_inline.clone(),
7063                                                        child.per_exit_tags.clone(),
7064                                                        child.exit_tags.clone(),
7065                                                        child.exit_hit_counts.clone(),
7066                                                    )
7067                                                })
7068                                        },
7069                                    )
7070                                };
7071                                if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7072                                    let child_raw_ret = {
7073                                        // v1.1 A1 Session A — chunk_compiler.enter
7074                                        // (side-trace entry).
7075                                        let vm_ptr: *mut Vm = self;
7076                                        let _guard =
7077                                            self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7078                                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7079                                        unsafe { cent(reg_state.as_mut_ptr()) }
7080                                    };
7081                                    (cpi, cpt, cet, chc, child_raw_ret as u64)
7082                                } else {
7083                                    (
7084                                        per_exit_inline.clone(),
7085                                        per_exit_tags.clone(),
7086                                        exit_tags.clone(),
7087                                        exit_hit_counts.clone(),
7088                                        raw_ret,
7089                                    )
7090                                }
7091                            }
7092                        };
7093                        let decoded = crate::jit::trace::decode_exit_shape(
7094                            decode_body,
7095                            &decode_inline,
7096                            &decode_tags,
7097                            &decode_exit_tags,
7098                        );
7099                        let site_id = decoded.site_id;
7100                        let cont_pc = decoded.cont_pc;
7101                        let exit_hit_idx = decoded.exit_hit_idx;
7102                        let exit_tags_for_pc = decoded.exit_tags_for_pc;
7103                        // P15-A v2-C-A3 — for side-trace returns
7104                        // force using_global_exit_tags=false so the
7105                        // restore loop always takes the per-tag slow
7106                        // path (the child's global_tag_res_kind
7107                        // classification isn't plumbed through yet
7108                        // — TODO for a future polish step).
7109                        let using_global_exit_tags = if from_side_trace {
7110                            false
7111                        } else {
7112                            decoded.using_global_exit_tags
7113                        };
7114                        // P15-prep — increment the counter (saturate
7115                        // at u32::MAX to avoid wrap on long runs).
7116                        // P15-A v1 — track whether this increment is
7117                        // the one that crossed `HOTEXIT_THRESHOLD`
7118                        // (transition: previous v < threshold, new v
7119                        // == threshold). The side-trace start is
7120                        // deferred to just before `continue;` so
7121                        // vm.stack and frame.pc are fully restored
7122                        // (the snapshot reads post-restore values).
7123                        let mut side_trace_should_start = false;
7124                        // P15-A v2-C-A3 — for side-trace returns the
7125                        // counter to bump is the CHILD's (decoded
7126                        // shape lookup) — `exit_hit_idx` is into the
7127                        // decoded layout, so use the matching
7128                        // `decode_hit_counts`. For parent decode
7129                        // they're aliased (clone of the parent's
7130                        // own Rc).
7131                        if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7132                            let v = c.get();
7133                            if v < u32::MAX {
7134                                c.set(v + 1);
7135                            }
7136                            if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7137                                && self.jit.active_trace.is_none()
7138                                && self.jit.trace_enabled
7139                            {
7140                                side_trace_should_start = true;
7141                            }
7142                        }
7143                        // P12-S4-step4b-C-2 — at an inline cmp@d>0
7144                        // side-exit, the helper has pushed N frames on
7145                        // top of the trace head's frame and
7146                        // `exit_tags_for_pc.len()` covers the full
7147                        // window (caller + each inlined frame's
7148                        // window). Slots beyond `max_stack` belong to
7149                        // an inlined frame: their `Untouched` entries
7150                        // default to Nil (no entry-tag fallback —
7151                        // marshal-in only captured caller slots) and
7152                        // we write to interp stack at `base + i` which
7153                        // mirrors `op_offsets`-derived layout.
7154                        let slot_count = exit_tags_for_pc.len();
7155                        // P12-S4-step4b-C-2 — the helper only extends
7156                        // vm.stack up to the deepest pushed frame's
7157                        // window, but the exit_tags snapshot covers
7158                        // the trace's full `window_size` (which
7159                        // includes depth-N+1 scratch slots that the
7160                        // trace's IR may have written without a
7161                        // matching pushed frame). Extend with Nil so
7162                        // the write at the tail doesn't panic; these
7163                        // slots get overwritten by the writeback loop
7164                        // and won't leak meaningful data past the
7165                        // pushed frames' R[0..max_stack) windows.
7166                        if self.stack.len() < base_us + slot_count {
7167                            self.stack
7168                                .resize(base_us + slot_count, crate::runtime::Value::Nil);
7169                        }
7170                        // P13-S13-E — fast-path restore loop. When
7171                        // we landed on the global `exit_tags`,
7172                        // dispatch on the compile-time
7173                        // classification: skip the loop entirely
7174                        // for `AllUntouched`, do a tag-free
7175                        // `Value::Int(...)` write per slot for
7176                        // `AllInt`, otherwise fall through to the
7177                        // general match-arm loop. site_id > 0
7178                        // (inline frame mat) and per_exit_tags
7179                        // hits always take the general path —
7180                        // their per-side-exit shapes aren't
7181                        // pre-classified yet.
7182                        let fast_path_taken = if using_global_exit_tags {
7183                            match global_tag_res_kind {
7184                                crate::jit::trace::TagResKind::AllUntouched => {
7185                                    // No-op: vm.stack already
7186                                    // matches the trace's post-
7187                                    // entry state for these
7188                                    // slots (entry values not
7189                                    // overridden, or already
7190                                    // spilled by helpers).
7191                                    true
7192                                }
7193                                crate::jit::trace::TagResKind::AllInt => {
7194                                    for i in 0..slot_count {
7195                                        self.stack[base_us + i] =
7196                                            crate::runtime::Value::Int(reg_state[i]);
7197                                    }
7198                                    true
7199                                }
7200                                crate::jit::trace::TagResKind::Mixed => false,
7201                            }
7202                        } else {
7203                            false
7204                        };
7205                        if !fast_path_taken {
7206                            for i in 0..slot_count {
7207                                let tag = match exit_tags_for_pc[i] {
7208                                    crate::jit::trace::ExitTag::Untouched => {
7209                                        if i < max_stack {
7210                                            entry_tags[i]
7211                                        } else {
7212                                            crate::runtime::value::raw::NIL
7213                                        }
7214                                    }
7215                                    crate::jit::trace::ExitTag::Int => {
7216                                        crate::runtime::value::raw::INT
7217                                    }
7218                                    crate::jit::trace::ExitTag::Float => {
7219                                        crate::runtime::value::raw::FLOAT
7220                                    }
7221                                    crate::jit::trace::ExitTag::Table => {
7222                                        crate::runtime::value::raw::TABLE
7223                                    }
7224                                    crate::jit::trace::ExitTag::Closure => {
7225                                        crate::runtime::value::raw::CLOSURE
7226                                    }
7227                                    // P12-S6-A1 — trace actively wrote Nil
7228                                    // to this slot (e.g. via Op::LoadNil).
7229                                    // Restore as Nil regardless of the entry
7230                                    // tag, since the i64 payload is 0 and
7231                                    // packing as the entry tag (e.g. INT)
7232                                    // would mis-type the slot.
7233                                    crate::jit::trace::ExitTag::Nil => {
7234                                        crate::runtime::value::raw::NIL
7235                                    }
7236                                    // P12-S12-C v2 — trace wrote a Str ptr
7237                                    // to this slot (LoadK Str / Move from
7238                                    // Str / Concat result). Restore as
7239                                    // Value::Str with raw bits round-
7240                                    // tripped.
7241                                    crate::jit::trace::ExitTag::Str => {
7242                                        crate::runtime::value::raw::STR
7243                                    }
7244                                };
7245                                // SAFETY: tag is from a verified slot
7246                                // (entry validated above) or pinned by
7247                                // the exit-tag analysis to INT/TABLE.
7248                                // The raw payload sits in reg_state[i].
7249                                // Stack was extended by the materialize
7250                                // helper for inline frames.
7251                                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7252                                self.stack[base_us + i] = unsafe {
7253                                    Value::pack(
7254                                        tag,
7255                                        crate::runtime::value::RawVal {
7256                                            zero: reg_state[i] as u64,
7257                                        },
7258                                    )
7259                                };
7260                            }
7261                        }
7262                        // P12-S4-step4b-C-2 — for non-inline exits the
7263                        // helper was never called (no metas chain for
7264                        // this cont_pc), so `frames.last()` is the
7265                        // trace head's frame and we set its pc to
7266                        // cont_pc as before. For inline exits the
7267                        // helper baked the side-exit PC into the
7268                        // innermost frame's `pc` at push time
7269                        // (chain.last().pc was overridden at emit),
7270                        // so this assignment to `frames.last_mut().pc
7271                        // = cont_pc` is a redundant-but-correct
7272                        // confirmation.
7273                        let _ = &per_exit_inline; // hold the Rc alive across dispatch
7274                        // P12-S4-step4b-C-2 — for inline side-exits the
7275                        // helper has pushed N frames on top. The trace
7276                        // head frame is at `pre_frames - 1`; set its
7277                        // pc to `head_resume_pc` so when the chain
7278                        // eventually pops back to it, interp resumes
7279                        // PAST the trace's depth-0 Op::Call instead of
7280                        // restarting from `head_pc` and re-triggering
7281                        // dispatch (infinite loop). The innermost
7282                        // (helper-pushed) frame already has its pc
7283                        // baked in at compile time, but we still
7284                        // assign `cont_pc` below for parity with the
7285                        // non-inline path (no-op).
7286                        if site_id > 0 {
7287                            let idx = (site_id - 1) as usize;
7288                            let head_resume_pc = decode_inline[idx].head_resume_pc;
7289                            if pre_frames > 0 {
7290                                if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7291                                    f.pc = head_resume_pc;
7292                                }
7293                            }
7294                        }
7295                        let frames_len_now = self.frames.len();
7296                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7297                        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7298                            CallFrame::Lua(fmut) => {
7299                                if crate::jit::trace::v2c_probe_enabled() {
7300                                    eprintln!(
7301                                        "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7302                                        from_side_trace,
7303                                        raw_ret,
7304                                        fmut.pc,
7305                                        cont_pc,
7306                                        site_id,
7307                                        frames_len_now,
7308                                        pre_frames,
7309                                        max_stack,
7310                                    );
7311                                }
7312                                fmut.pc = cont_pc;
7313                            }
7314                            _ => unreachable!("Cont frame at trace dispatch"),
7315                        }
7316                        // P15-A v1 — deferred side-trace start. The
7317                        // increment block above flagged this exit's
7318                        // hit count crossing HOTEXIT_THRESHOLD; now
7319                        // that vm.stack is restored and frame.pc is
7320                        // settled, snapshot entry_tags from the
7321                        // resume frame's window and create the
7322                        // recorder. The recorder's first push fires
7323                        // on the next interp iteration at cont_pc.
7324                        //
7325                        // `head_proto` for the side trace = cl.proto
7326                        // (trace JIT only inlines self-recursive
7327                        // calls today, so cont_pc always lands in
7328                        // the same proto as the parent). Frame base
7329                        // is the resume frame (top of `self.frames`
7330                        // — inline-pushed frames moved this).
7331                        if side_trace_should_start {
7332                            let (resume_base, resume_proto) = match self.frames.last() {
7333                                Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7334                                _ => (base_us, cl.proto),
7335                            };
7336                            let resume_max_stack = resume_proto.max_stack as usize;
7337                            let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7338                            // Extend stack if cont_pc's frame window
7339                            // overhangs the current stack len (rare,
7340                            // but inline-pushed frame stack writes
7341                            // only covered the trace's writeback).
7342                            if self.stack.len() < resume_base + resume_max_stack {
7343                                self.stack.resize(
7344                                    resume_base + resume_max_stack,
7345                                    crate::runtime::Value::Nil,
7346                                );
7347                            }
7348                            for i in 0..resume_max_stack {
7349                                let (tag, _) = self.stack[resume_base + i].unpack();
7350                                side_entry_tags.push(tag);
7351                            }
7352                            self.jit.active_trace =
7353                                Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7354                                    resume_proto,
7355                                    cont_pc,
7356                                    side_entry_tags,
7357                                    cl.proto,
7358                                    head_pc_val,
7359                                    exit_hit_idx,
7360                                )));
7361                            self.jit.recording_frame_base = self.frames.len() - 1;
7362                            self.jit.counters.side_trace_started += 1;
7363                        }
7364                        // P13-S13-D — put the dispatch buffers back
7365                        // before the `continue;` so the next
7366                        // dispatch picks up the same allocation.
7367                        self.jit.reg_state_buf = reg_state;
7368                        self.jit.entry_tags_buf = entry_tags;
7369                        continue;
7370                    }
7371                }
7372                // P13-S13-D — !dispatch_ok / deopt path / non-cont
7373                // exit also restore the buffers before falling
7374                // through to the interp.
7375                self.jit.reg_state_buf = reg_state;
7376                self.jit.entry_tags_buf = entry_tags;
7377            }
7378
7379            // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7380            // hook code that consults `currentpc = savedpc - 1` lands on the
7381            // instruction now executing. luna mirrors that by advancing
7382            // `f.pc` to `pc + 1` before the hook block — local_at /
7383            // getinfo / line attribution all read f.pc, and the existing
7384            // `pc - 1` convention in those helpers then yields the current
7385            // instruction's pc (db.lua :696: local `A` visible at the
7386            // chunk's return line once OP_CLOSURE has advanced pc).
7387            //
7388            // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7389            // (cont frames drained above) so the and_then/Option layers are
7390            // dead weight.
7391            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7392            match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7393                CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7394                _ => unreachable!("Cont frame at pc bump"),
7395            }
7396
7397            // count + line hooks (PUC traceexec): before executing the
7398            // instruction. Skipped while the hook itself runs.
7399            // (Parens here are load-bearing — without them `&&` binds tighter
7400            // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7401            // letting a Lua line hook recurse into itself → stack overflow
7402            // on db.lua line-hook assertions. Matches the `hook_call_with` /
7403            // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7404            if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7405                let lines = &cl.proto.lines;
7406                let cur_line = if lines.is_empty() {
7407                    None
7408                } else {
7409                    Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7410                };
7411                // count hook: fire every `count_base` instructions
7412                if self.hook.count {
7413                    self.hook.count_left -= 1;
7414                    if self.hook.count_left <= 0 {
7415                        self.hook.count_left = self.hook.count_base;
7416                        // hooked function is the running Lua frame: its frame
7417                        // is on the stack, so no synthetic C level is needed.
7418                        self.run_hook(b"count", cur_line, false)?;
7419                    }
7420                }
7421                // line hook: fire on a fresh frame, a backward jump (loop), or a
7422                // change of source line.
7423                if self.hook.line {
7424                    if lines.is_empty() {
7425                        // PUC: a stripped chunk has no line info, so
7426                        // `getfuncline` returns -1. The line hook still fires
7427                        // on the first instruction of the new frame (where
7428                        // `npci <= oldpc` holds at oldpc=0), with the line
7429                        // pushed as `nil` instead of an integer (db.lua :1030
7430                        // "hook called without debug info for 1st instruction").
7431                        if oldpc == u32::MAX {
7432                            self.run_hook(b"line", None, false)?;
7433                            self.top_frame_mut().hook_oldpc = pc;
7434                        }
7435                    } else {
7436                        let newline = lines[(pc as usize).min(lines.len() - 1)];
7437                        // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7438                        // on a backward jump (`pc < oldpc` — strict; an equal pc
7439                        // would re-fire the install-site after `oldpc = pc`),
7440                        // or when the source line changes.
7441                        let fire = oldpc == u32::MAX
7442                            || pc < oldpc
7443                            || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7444                        if fire {
7445                            self.run_hook(b"line", Some(newline as i64), false)?;
7446                        }
7447                        self.top_frame_mut().hook_oldpc = pc;
7448                    }
7449                }
7450            }
7451
7452            match inst.op() {
7453                Op::Move => {
7454                    let v = self.r(base, inst.b());
7455                    self.set_r(base, inst.a(), v);
7456                }
7457                Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7458                Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7459                Op::LoadK => {
7460                    let v = cl.proto.consts[inst.bx() as usize];
7461                    self.set_r(base, inst.a(), v);
7462                }
7463                Op::LoadKx => {
7464                    let extra = cl.proto.code[self.pc_of_top() as usize];
7465                    self.bump_pc();
7466                    let v = cl.proto.consts[extra.ax() as usize];
7467                    self.set_r(base, inst.a(), v);
7468                }
7469                Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7470                Op::LFalseSkip => {
7471                    self.set_r(base, inst.a(), Value::Bool(false));
7472                    self.bump_pc();
7473                }
7474                Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7475                Op::LoadNil => {
7476                    let a = inst.a();
7477                    for i in 0..=inst.b() {
7478                        self.set_r(base, a + i, Value::Nil);
7479                    }
7480                }
7481                Op::GetUpval => {
7482                    let v = self.upval_get(cl, inst.b());
7483                    self.set_r(base, inst.a(), v);
7484                }
7485                Op::SetUpval => {
7486                    let v = self.r(base, inst.a());
7487                    self.upval_set(cl, inst.b(), v);
7488                }
7489                Op::GetTabUp => {
7490                    let t = self.upval_get(cl, inst.b());
7491                    let key = cl.proto.consts[inst.c() as usize];
7492                    self.op_index(t, key, base + inst.a())?;
7493                }
7494                Op::GetTable => {
7495                    let t = self.r(base, inst.b());
7496                    let key = self.r(base, inst.c());
7497                    self.op_index(t, key, base + inst.a())?;
7498                }
7499                Op::GetI => {
7500                    let t = self.r(base, inst.b());
7501                    self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7502                }
7503                Op::GetField => {
7504                    let t = self.r(base, inst.b());
7505                    let key = cl.proto.consts[inst.c() as usize];
7506                    // v1.2 D4 A1 — fast path: known-Str const key + no
7507                    // metatable on the table → skip `op_index` /
7508                    // `index_step`'s MAX_TAG_LOOP setup and the outer
7509                    // `Value` match. Falls through to the slow path
7510                    // unchanged when either invariant breaks (so
7511                    // `__index` metamethods, non-Table receivers, and
7512                    // non-Str keys behave exactly as before).
7513                    if let Value::Table(tb) = t
7514                        && tb.metatable().is_none()
7515                        && let Value::Str(s) = key
7516                    {
7517                        let v = tb.get_str(s);
7518                        self.stack[(base + inst.a()) as usize] = v;
7519                    } else {
7520                        self.op_index(t, key, base + inst.a())?;
7521                    }
7522                }
7523                Op::SetTabUp => {
7524                    let t = self.upval_get(cl, inst.a());
7525                    let key = cl.proto.consts[inst.b() as usize];
7526                    let v = self.r(base, inst.c());
7527                    self.op_newindex(t, key, v)?;
7528                }
7529                Op::SetTable => {
7530                    let t = self.r(base, inst.a());
7531                    let key = self.r(base, inst.b());
7532                    let v = self.r(base, inst.c());
7533                    self.op_newindex(t, key, v)?;
7534                }
7535                Op::SetI => {
7536                    let t = self.r(base, inst.a());
7537                    let v = self.r(base, inst.c());
7538                    self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7539                }
7540                Op::SetField => {
7541                    let t = self.r(base, inst.a());
7542                    let key = cl.proto.consts[inst.b() as usize];
7543                    let v = self.r(base, inst.c());
7544                    self.op_newindex(t, key, v)?;
7545                }
7546                Op::NewTable => {
7547                    let t = self.heap.new_table();
7548                    self.set_r(base, inst.a(), Value::Table(t));
7549                    self.maybe_collect_garbage(base + inst.a() + 1);
7550                }
7551                Op::SetList => {
7552                    let a = inst.a();
7553                    let abs_a = base + a;
7554                    let n = if inst.b() == 0 {
7555                        self.top - (abs_a + 1)
7556                    } else {
7557                        inst.b()
7558                    };
7559                    let offset = if inst.k() {
7560                        let extra = cl.proto.code[self.pc_of_top() as usize];
7561                        self.bump_pc();
7562                        extra.ax() as i64
7563                    } else {
7564                        inst.c() as i64
7565                    };
7566                    let Value::Table(t) = self.r(base, a) else {
7567                        unreachable!("SETLIST on non-table");
7568                    };
7569                    for i in 1..=n {
7570                        let v = self.r(base, a + i);
7571                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7572                        if let Err(TableError::Overflow) =
7573                            unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7574                        {
7575                            return Err(self.rt_err("table overflow"));
7576                        }
7577                    }
7578                    // one barrier_back covers every store this op did — PUC's
7579                    // `luaC_barrierback_` once-per-table optimisation
7580                    self.heap
7581                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7582                    // the element temps above the table are now consumed
7583                    self.maybe_collect_garbage(base + a + 1);
7584                }
7585                Op::SelfOp => {
7586                    let o = self.r(base, inst.b());
7587                    self.set_r(base, inst.a() + 1, o);
7588                    // PUC OP_SELF's C is a constant index when the k-flag is
7589                    // set; otherwise it points to a register that holds the
7590                    // (constant-loaded) key. luna's compiler falls back to the
7591                    // register form when the constant index exceeds OP_SELF's
7592                    // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7593                    // a table with 250+ string keys, where "findfield" lands
7594                    // past const #255). The exec must honour the same split.
7595                    let key = if inst.k() {
7596                        cl.proto.consts[inst.c() as usize]
7597                    } else {
7598                        self.r(base, inst.c())
7599                    };
7600                    self.op_index(o, key, base + inst.a())?;
7601                }
7602                Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7603                Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7604                Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7605                Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7606                Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7607                Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7608                Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7609                Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7610                Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7611                Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7612                Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7613                Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7614                Op::Unm => {
7615                    let v = self.r(base, inst.b());
7616                    match coerce_num(v) {
7617                        Some(Num::Int(i)) => {
7618                            self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7619                        }
7620                        Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7621                        None => {
7622                            let mm = self.get_mm(v, Mm::Unm);
7623                            if mm.is_nil() {
7624                                return Err(self.type_err("perform arithmetic on", v));
7625                            }
7626                            let dst = base + inst.a();
7627                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7628                        }
7629                    }
7630                }
7631                Op::BNot => {
7632                    let v = self.r(base, inst.b());
7633                    match coerce_num(v) {
7634                        Some(n) => {
7635                            let i = self.int_from_num(n)?;
7636                            self.set_r(base, inst.a(), Value::Int(!i));
7637                        }
7638                        None => {
7639                            let mm = self.get_mm(v, Mm::BNot);
7640                            if mm.is_nil() {
7641                                return Err(self.type_err("perform bitwise operation on", v));
7642                            }
7643                            let dst = base + inst.a();
7644                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7645                        }
7646                    }
7647                }
7648                Op::Not => {
7649                    let v = self.r(base, inst.b());
7650                    self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7651                }
7652                Op::Len => {
7653                    let v = self.r(base, inst.b());
7654                    match self.len_step(v)? {
7655                        MmOut::Done(r) => self.set_r(base, inst.a(), r),
7656                        MmOut::Mm { func, recv } => {
7657                            let dst = base + inst.a();
7658                            self.begin_meta_call(
7659                                func,
7660                                &[recv, recv],
7661                                MetaAction::Store { dst },
7662                                "len",
7663                            )?;
7664                        }
7665                        MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7666                    }
7667                }
7668                Op::Concat => {
7669                    // right-associative fold over operands at base+a .. base+a+n,
7670                    // in place on the stack so a yielding __concat can suspend.
7671                    let a = inst.a();
7672                    let n = inst.b();
7673                    self.top = base + a + n;
7674                    self.concat_run(base + a)?;
7675                }
7676                Op::Close => {
7677                    // Yieldable: drive __close handlers through the
7678                    // interpreter loop so a coroutine.yield() inside a
7679                    // handler suspends cleanly (locals.lua block-end yield).
7680                    // `drive_close` parks the handler call at `self.top`, so
7681                    // raise `top` past this frame's full register window
7682                    // first — a goto out of a nested for-loop can fire
7683                    // OP_Close while `self.top` still sits at the inner
7684                    // body's working top, which would let `push_frame`'s
7685                    // wipe clobber the outer tbc slot before it could be
7686                    // closed (locals.lua:1219 nested-for goto regression).
7687                    self.top = self.top.max(base + cl.proto.max_stack as u32);
7688                    let _ =
7689                        self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7690                }
7691                Op::Tbc => {
7692                    self.register_tbc(base + inst.a())?;
7693                }
7694                Op::Jmp => {
7695                    let off = inst.sj();
7696                    // P12-S1.B — trace JIT back-edge counter. A negative
7697                    // jump offset is a loop back-edge (the only canonical
7698                    // backward jumps the compiler emits — `while`, `for`,
7699                    // `repeat`). Tick the per-Proto counter and, once it
7700                    // exceeds the threshold, log a stub promotion that
7701                    // S1.C will turn into actual trace recording. The
7702                    // whole block is gated on `trace_jit_enabled` so
7703                    // existing benches see one branch-not-taken and no
7704                    // counter writes.
7705                    if self.jit.trace_enabled && off < 0 {
7706                        let proto = cl.proto;
7707                        let c = proto.trace_hot_count.get();
7708                        if c < u32::MAX / 2 {
7709                            proto.trace_hot_count.set(c + 1);
7710                        }
7711                        // P13-S13-H — relaxed back-edge trigger:
7712                        // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7713                        // a missed crossing (active_trace busy with
7714                        // a call-trigger, or the recorder slot
7715                        // happened to be in use) doesn't permanently
7716                        // lock this back-edge target out. The
7717                        // `already_cached` short-circuit prevents
7718                        // duplicate recordings: once a trace is
7719                        // cached for this target, subsequent
7720                        // crossings skip the start. This pairs with
7721                        // S13-H's discard-on-partial-coverage close
7722                        // handling — when a short call-trigger is
7723                        // discarded, the back-edge can still find an
7724                        // open slot at the next iteration.
7725                        let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7726                        // P13-S13-K — gave-up short-circuit. Skip
7727                        // the RefCell borrow + scan when the
7728                        // S13-I cap force-compiled a partial
7729                        // trace on this Proto.
7730                        let back_edge_already_cached = if proto.trace_gave_up.get() {
7731                            true
7732                        } else {
7733                            proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7734                        };
7735                        if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7736                            && self.jit.active_trace.is_none()
7737                            && !back_edge_already_cached
7738                        {
7739                            // Back-edge target = pc after `add_pc(off)`,
7740                            // i.e. current `pc + 1 + off` (the dispatch
7741                            // loop has already advanced f.pc to pc+1).
7742                            let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7743                            // Snapshot per-slot Value tag at trace
7744                            // entry so the lowerer's kind tracker
7745                            // knows which arith path to lower
7746                            // (iadd vs fadd, etc.).
7747                            let max_stack = cl.proto.max_stack as usize;
7748                            let base_us = base as usize;
7749                            let mut entry_tags = Vec::with_capacity(max_stack);
7750                            for i in 0..max_stack {
7751                                let (tag, _) = self.stack[base_us + i].unpack();
7752                                entry_tags.push(tag);
7753                            }
7754                            self.jit.active_trace =
7755                                Some(Box::new(crate::jit::trace::TraceRecord::start(
7756                                    cl.proto, target, entry_tags, false,
7757                                )));
7758                            // P12-S4 — record the frame the trace
7759                            // started in. `self.frames.len() - 1`
7760                            // since we're inside the currently-running
7761                            // Lua frame's dispatch.
7762                            self.jit.recording_frame_base = self.frames.len() - 1;
7763                        }
7764                    }
7765                    self.add_pc(off);
7766                }
7767                Op::Eq => {
7768                    let l = self.r(base, inst.a());
7769                    let r = self.r(base, inst.b());
7770                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7771                        if (a == b) != inst.k() {
7772                            self.bump_pc();
7773                        }
7774                    } else {
7775                        let step = self.eq_step(l, r);
7776                        self.op_compare(step, l, r, inst.k(), "eq")?;
7777                    }
7778                }
7779                Op::EqK => {
7780                    let l = self.r(base, inst.a());
7781                    let r = cl.proto.consts[inst.b() as usize];
7782                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7783                        if (a == b) != inst.k() {
7784                            self.bump_pc();
7785                        }
7786                    } else {
7787                        let step = self.eq_step(l, r);
7788                        self.op_compare(step, l, r, inst.k(), "eq")?;
7789                    }
7790                }
7791                Op::Lt => {
7792                    let l = self.r(base, inst.a());
7793                    let r = self.r(base, inst.b());
7794                    // hot path: Int < Int — drops the MmOut + op_compare match
7795                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7796                        if (a < b) != inst.k() {
7797                            self.bump_pc();
7798                        }
7799                    } else {
7800                        let step = self.less_step(l, r, false)?;
7801                        self.op_compare(step, l, r, inst.k(), "lt")?;
7802                    }
7803                }
7804                Op::Le => {
7805                    let l = self.r(base, inst.a());
7806                    let r = self.r(base, inst.b());
7807                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7808                        if (a <= b) != inst.k() {
7809                            self.bump_pc();
7810                        }
7811                    } else {
7812                        let step = self.less_step(l, r, true)?;
7813                        self.op_compare(step, l, r, inst.k(), "le")?;
7814                    }
7815                }
7816                Op::Test => {
7817                    let cond = self.r(base, inst.a()).truthy();
7818                    self.cond_skip(cond, inst.k());
7819                }
7820                Op::TestSet => {
7821                    let v = self.r(base, inst.b());
7822                    if v.truthy() == inst.k() {
7823                        self.set_r(base, inst.a(), v);
7824                    } else {
7825                        self.bump_pc();
7826                    }
7827                }
7828                Op::Call => {
7829                    let abs = base + inst.a();
7830                    let nargs = if inst.b() == 0 {
7831                        None
7832                    } else {
7833                        Some(inst.b() - 1)
7834                    };
7835                    let wanted = inst.c() as i32 - 1;
7836                    self.begin_call(abs, nargs, wanted, false)?;
7837                }
7838                Op::TailCall => {
7839                    let fr = *self.top_frame();
7840                    let abs = base + inst.a();
7841                    let mut nargs = if inst.b() == 0 {
7842                        self.top - (abs + 1)
7843                    } else {
7844                        inst.b() - 1
7845                    };
7846                    // A tail call pops this frame before begin_call, so a
7847                    // non-callable target would lose its name/position. Report
7848                    // it now (PUC reads funcname from the still-current ci),
7849                    // while the frame is intact, for "(field 'x')"-style info.
7850                    let mut func = self.stack[abs as usize];
7851                    if !matches!(func, Value::Closure(_) | Value::Native(_))
7852                        && self.get_mm(func, Mm::Call).is_nil()
7853                    {
7854                        return Err(self.call_err(func));
7855                    }
7856                    // PUC `luaD_pretailcall` resolves a chain of `__call`
7857                    // metamethods *in place* before deciding whether to
7858                    // collapse this frame. Without that, each __call hop
7859                    // would push a fresh Lua frame and a 10000-deep
7860                    // tail-recursion through a 100-deep __call chain
7861                    // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7862                    // shift args right, install the handler at `abs`, retry.
7863                    // Chain depth limit matches the call-site `begin_call`
7864                    // version cap (5.5 calls.lua :223 — 15 max, then "too
7865                    // long"; 16th wrap fails the call). An infinite
7866                    // self-referential `__call` would otherwise spin.
7867                    let chain_cap = if self.version >= LuaVersion::Lua55 {
7868                        15
7869                    } else {
7870                        MAX_CCMT
7871                    };
7872                    let mut chain = 0u32;
7873                    while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7874                        let mm = self.get_mm(func, Mm::Call);
7875                        if mm.is_nil() {
7876                            return Err(self.call_err(func));
7877                        }
7878                        chain += 1;
7879                        if chain > chain_cap {
7880                            return Err(self.rt_err("'__call' chain too long"));
7881                        }
7882                        let end = (abs + 1 + nargs) as usize;
7883                        if self.stack.len() < end + 1 {
7884                            self.stack.resize(end + 1, Value::Nil);
7885                        }
7886                        for i in (0..=nargs).rev() {
7887                            self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7888                        }
7889                        self.stack[abs as usize] = mm;
7890                        nargs += 1;
7891                        self.top = abs + 1 + nargs;
7892                        func = mm;
7893                    }
7894                    // PUC's tail-call collapse is Lua→Lua only. A tail call to
7895                    // a C function runs the C function under the *current* Lua
7896                    // activation (no frame fold — a C frame has nothing to
7897                    // collapse into); after the C function returns, the
7898                    // calling Lua function returns those results normally.
7899                    // Mirror that: keep our Lua frame on the stack, call the
7900                    // target through `begin_call(abs, …)` as a regular call,
7901                    // and let the fallback `Op::Return` that the compiler
7902                    // emits right after `Op::TailCall` forward the results.
7903                    // 5.1 closure.lua :177's `return getfenv()` from inside
7904                    // foo needs level 1 to resolve to foo, not to the
7905                    // thread's globals fallback that happens when no Lua
7906                    // frame is on the stack.
7907                    let lua_target = matches!(func, Value::Closure(_));
7908                    if lua_target {
7909                        self.close_slots(fr.base, None)?;
7910                        for i in 0..=nargs {
7911                            self.stack[(fr.func_slot + i) as usize] =
7912                                self.stack[(abs + i) as usize];
7913                        }
7914                        // v2.5 P1B-2A: clear the slot range that's now
7915                        // stranded by the tail-call collapse. The args
7916                        // were copied to `[fr.func_slot..fr.func_slot+
7917                        // nargs+1)`; the source slots `[abs..abs+
7918                        // nargs+1)` still hold the same `Value::Closure
7919                        // / Value::Str / ...` entries, but they're past
7920                        // the new call's window. Without this clear, a
7921                        // later GC with wider gc_top would mark stale
7922                        // pointers there (same UAF-A family the v2.3
7923                        // finish_results slot-clear closed for the
7924                        // Op::Return path).
7925                        let new_top_lower_bound = fr.func_slot + nargs + 1;
7926                        let prev_top = (self.top as usize).min(self.stack.len());
7927                        if (new_top_lower_bound as usize) < prev_top {
7928                            for slot in &mut self.stack[new_top_lower_bound as usize..prev_top] {
7929                                *slot = Value::Nil;
7930                            }
7931                        }
7932                        // PUC `CIST_TAIL`: the new Lua activation inherits
7933                        // the popped frame's tailcalls count plus one for
7934                        // this collapse. 5.1 db.lua :372 hammers 30000
7935                        // recursive tail calls and expects to see the
7936                        // synthetic tail level for every one of them.
7937                        self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7938                        frames_pop_sync(&mut self.frames, &mut self.frames_top);
7939                        if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7940                            && self.frames.len() < entry_depth
7941                        {
7942                            // a native completed what was this function's result
7943                            return Ok(self.take_results(fr.func_slot));
7944                        }
7945                    } else {
7946                        // Native (or __call-bearing) target: regular call. The
7947                        // results land at `abs..self.top` and the next op (the
7948                        // fallback `Op::Return`) forwards them. `wanted = -1`
7949                        // because the caller will multret them through Return.
7950                        self.begin_call(abs, Some(nargs), -1, false)?;
7951                    }
7952                }
7953                Op::Return | Op::Return0 | Op::Return1 => {
7954                    let (abs_a, nret) = match inst.op() {
7955                        Op::Return0 => (base, 0),
7956                        Op::Return1 => (base + inst.a(), 1),
7957                        _ => {
7958                            let abs_a = base + inst.a();
7959                            let nret = if inst.b() == 0 {
7960                                self.top - abs_a
7961                            } else {
7962                                inst.b() - 1
7963                            };
7964                            (abs_a, nret)
7965                        }
7966                    };
7967                    // close before moving results: __close handlers run above
7968                    // the stack top, so the result region [abs_a..abs_a+nret)
7969                    // stays intact across any yields the close performs.
7970                    // Fixed-count returns may leave `self.top` below the last
7971                    // result slot (the compiler does not always re-bump it);
7972                    // raise it past the result region so `drive_close` parks
7973                    // the handler call *above* — landing at `self.top` would
7974                    // otherwise clobber a result with the handler closure.
7975                    self.top = self.top.max(abs_a + nret);
7976                    if let Some(vals) = self.begin_close(
7977                        base,
7978                        None,
7979                        AfterClose::Return {
7980                            abs_a,
7981                            nret,
7982                            from_native: false,
7983                        },
7984                        entry_depth,
7985                    )? {
7986                        return Ok(vals);
7987                    }
7988                }
7989                Op::ForPrep => self.for_prep(inst, base)?,
7990                Op::ForLoop => {
7991                    // P12 — trace JIT back-edge counter on the
7992                    // numeric-for back-edge. ForLoop is always at
7993                    // a back-edge position (when it continues);
7994                    // for the trace recorder we treat it as the
7995                    // close-detection equivalent of `Op::Jmp` with
7996                    // negative offset. Counter only ticks when the
7997                    // back-edge will actually fire (count > 0 in
7998                    // the 5.4+ Int form, comparable predicates in
7999                    // pre-5.3 / Float). The cheap check up front
8000                    // matches the for_loop helper's branch.
8001                    if self.jit.trace_enabled {
8002                        let a = inst.a();
8003                        let pre53 = self.version() <= LuaVersion::Lua53;
8004                        let take_back_edge =
8005                            match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
8006                                (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
8007                                    count > 0
8008                                }
8009                                (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
8010                                    let next = cur.wrapping_add(st);
8011                                    if st > 0 { next <= lim } else { next >= lim }
8012                                }
8013                                (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
8014                                    let next = cur + st;
8015                                    if st > 0.0 { next <= lim } else { next >= lim }
8016                                }
8017                                _ => false,
8018                            };
8019                        if take_back_edge {
8020                            let proto = cl.proto;
8021                            let c = proto.trace_hot_count.get();
8022                            if c < u32::MAX / 2 {
8023                                proto.trace_hot_count.set(c + 1);
8024                            }
8025                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8026                                && self.jit.active_trace.is_none()
8027                            {
8028                                // ForLoop's back-edge target = pc
8029                                // after `add_pc(-bx)` runs from the
8030                                // already-bumped f.pc (= pc + 1).
8031                                // So target = (pc + 1) - bx.
8032                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8033                                let max_stack = cl.proto.max_stack as usize;
8034                                let base_us = base as usize;
8035                                let mut entry_tags = Vec::with_capacity(max_stack);
8036                                for i in 0..max_stack {
8037                                    let (tag, _) = self.stack[base_us + i].unpack();
8038                                    entry_tags.push(tag);
8039                                }
8040                                self.jit.active_trace =
8041                                    Some(Box::new(crate::jit::trace::TraceRecord::start(
8042                                        cl.proto, target, entry_tags, false,
8043                                    )));
8044                                // P12-S4 — record the frame the trace
8045                                // started in. The currently-running
8046                                // Lua frame is at len() - 1.
8047                                self.jit.recording_frame_base = self.frames.len() - 1;
8048                            }
8049                        }
8050                    }
8051                    self.for_loop(inst, base);
8052                }
8053                Op::TForPrep => {
8054                    // the 4th control slot is the iterator's closing value
8055                    self.register_tbc(base + inst.a() + 3)?;
8056                    self.add_pc(inst.bx() as i32);
8057                }
8058                Op::TForCall => {
8059                    let abs = base + inst.a();
8060                    let need = (abs + 7) as usize;
8061                    if self.stack.len() < need {
8062                        self.stack.resize(need, Value::Nil);
8063                    }
8064                    self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8065                    self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8066                    self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8067                    let nvars = inst.c() as i32;
8068                    self.begin_call(abs + 4, Some(2), nvars, false)?;
8069                }
8070                Op::TForLoop => {
8071                    let a = inst.a();
8072                    let ctrl = self.r(base, a + 4);
8073                    if !ctrl.is_nil() {
8074                        // P12-S12-B v1 — trace JIT back-edge counter on
8075                        // generic-for back-edge. TForLoop sits at the
8076                        // tail of `for k,v in expr do ... end`; recorder
8077                        // treats it as the close-detection equivalent of
8078                        // a negative Op::Jmp. Gate on `take_back_edge`
8079                        // (= `ctrl != nil`) so empty-iter loops don't
8080                        // pollute hot_count. v1 only adds the trigger;
8081                        // whitelist + helper + emit live in v2.
8082                        if self.jit.trace_enabled {
8083                            let proto = cl.proto;
8084                            let c = proto.trace_hot_count.get();
8085                            if c < u32::MAX / 2 {
8086                                proto.trace_hot_count.set(c + 1);
8087                            }
8088                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8089                                && self.jit.active_trace.is_none()
8090                            {
8091                                // TForLoop back-edge target = pc after
8092                                // `add_pc(-bx)` runs from the already-
8093                                // bumped f.pc (= pc + 1). So target =
8094                                // (pc + 1) - bx, normally landing on
8095                                // body_top (the op right after TForPrep).
8096                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8097                                let max_stack = cl.proto.max_stack as usize;
8098                                let base_us = base as usize;
8099                                let mut entry_tags = Vec::with_capacity(max_stack);
8100                                for i in 0..max_stack {
8101                                    let (tag, _) = self.stack[base_us + i].unpack();
8102                                    entry_tags.push(tag);
8103                                }
8104                                // P12-S12-B-v5 — snapshot the iter
8105                                // fn's address if Native, so the
8106                                // lowerer can specialise ipairs into
8107                                // inline Table aget IR.
8108                                let iter_ptr =
8109                                    if let Value::Native(n) = self.stack[base_us + a as usize] {
8110                                        Some(n.f as usize)
8111                                    } else {
8112                                        None
8113                                    };
8114                                // P12-S12-C v3 — snapshot R[A+5]'s
8115                                // tag (= current iter's val from
8116                                // the just-fired TForCall). The v5
8117                                // inline aget fast_blk emits a
8118                                // runtime guard against this tag;
8119                                // mixed-tag arrays deopt rather
8120                                // than producing garbage pointers
8121                                // through the v2 spill path.
8122                                let val_slot = base_us + (a as usize) + 5;
8123                                let val_tag = if val_slot < self.stack.len() {
8124                                    Some(self.stack[val_slot].unpack().0)
8125                                } else {
8126                                    None
8127                                };
8128                                let mut rec = crate::jit::trace::TraceRecord::start(
8129                                    cl.proto, target, entry_tags, false,
8130                                );
8131                                rec.tfor_iter_ptr = iter_ptr;
8132                                rec.tfor_val_tag = val_tag;
8133                                self.jit.active_trace = Some(Box::new(rec));
8134                                self.jit.recording_frame_base = self.frames.len() - 1;
8135                            }
8136                        }
8137                        self.set_r(base, a + 2, ctrl);
8138                        self.add_pc(-(inst.bx() as i32));
8139                    }
8140                }
8141                Op::Closure => {
8142                    let proto = cl.proto.protos[inst.bx() as usize];
8143                    let n_ups = proto.upvals.len();
8144                    // P11-S5d.M — build upvals on the stack for small
8145                    // closures, skipping the per-call Vec/Box alloc
8146                    // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8147                    // = 2 covers most Lua source (1 captured local, or
8148                    // _ENV + a single capture). Beyond that, fall back
8149                    // to a heap Vec.
8150                    use crate::runtime::function::INLINE_UPVALS_N;
8151                    let mut stack_buf: [std::mem::MaybeUninit<
8152                        Gc<crate::runtime::function::Upvalue>,
8153                    >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8154                    let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8155                    let use_inline = n_ups <= INLINE_UPVALS_N;
8156                    if !use_inline {
8157                        heap_buf.reserve_exact(n_ups);
8158                    }
8159                    for (i, d) in proto.upvals.iter().enumerate() {
8160                        let uv = if d.in_stack {
8161                            self.find_or_create_upval(base + d.index as u32)
8162                        } else {
8163                            cl.upvals()[d.index as usize]
8164                        };
8165                        if use_inline {
8166                            stack_buf[i] = std::mem::MaybeUninit::new(uv);
8167                        } else {
8168                            heap_buf.push(uv);
8169                        }
8170                    }
8171                    // Tiny shim around the two paths so the 5.1 _ENV
8172                    // clone + cache check below see one uniform
8173                    // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8174                    // into the local frame (still valid through the
8175                    // rest of this Op::Closure handler).
8176                    let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8177                        // SAFETY: the first n_ups slots of stack_buf
8178                        // were initialised above; we hand out a slice
8179                        // covering exactly them.
8180                        unsafe {
8181                            std::slice::from_raw_parts_mut(
8182                                stack_buf.as_mut_ptr()
8183                                    as *mut Gc<crate::runtime::function::Upvalue>,
8184                                n_ups,
8185                            )
8186                        }
8187                    } else {
8188                        &mut heap_buf[..]
8189                    };
8190                    // PUC 5.1 had per-function environments: every Lua
8191                    // function carried its own `env` slot, snapshotted from
8192                    // the creating function's env at closure time, so a
8193                    // `setfenv` on one closure never bled into a sibling.
8194                    // luna models that by giving the 5.1 closure a *fresh*
8195                    // closed upvalue for whichever cell holds `_ENV`, seeded
8196                    // from the parent's current env value. Only that cell is
8197                    // cloned — every other upvalue keeps its open/shared
8198                    // identity (so e.g. `local function range(...) ...
8199                    // range(...) ... end` still sees its self-reference). 5.2+
8200                    // keeps the shared-upval model (and the proto cache that
8201                    // depends on it).
8202                    let v51 = self.version() <= LuaVersion::Lua51;
8203                    if v51 && proto.env_upval_idx != u8::MAX {
8204                        let i = proto.env_upval_idx as usize;
8205                        let cur = match ups[i].state() {
8206                            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8207                            UpvalState::Closed(v) => v,
8208                        };
8209                        ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8210                    }
8211                    let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8212                    // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8213                    // and reuses it when every fresh-upvalue binding still
8214                    // points to the same Upvalue object as the cached one.
8215                    // That keeps `function() return outer end` repeated in a
8216                    // loop comparing equal across iterations (the captured
8217                    // outer is a shared open upvalue), while `function()
8218                    // return loop_var end` gets a fresh closure each round
8219                    // because the loop var is re-created per iteration. PUC
8220                    // 5.1 predated the cache, and the per-closure `_ENV`
8221                    // clone above would defeat it anyway, so skip it.
8222                    let nc = if v51 {
8223                        self.heap.new_closure_inline(proto, ups_slice)
8224                    } else {
8225                        let cached = proto.cache.get().filter(|c| {
8226                            c.upvals().len() == ups_slice.len()
8227                                && c.upvals()
8228                                    .iter()
8229                                    .zip(ups_slice.iter())
8230                                    .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8231                        });
8232                        match cached {
8233                            Some(c) => c,
8234                            None => {
8235                                let n = self.heap.new_closure_inline(proto, ups_slice);
8236                                proto.cache.set(Some(n));
8237                                n
8238                            }
8239                        }
8240                    };
8241                    self.set_r(base, inst.a(), Value::Closure(nc));
8242                    self.maybe_collect_garbage(base + inst.a() + 1);
8243                }
8244                Op::Vararg => {
8245                    let abs_a = base + inst.a();
8246                    let wanted = inst.c() as i32 - 1;
8247                    // A materialized named vararg lives in func_slot (its writes
8248                    // must be visible to `...`); otherwise spread the extra args
8249                    // straight off the stack at func_slot+1 .. +n_varargs.
8250                    let vt = match self.stack[func_slot as usize] {
8251                        Value::Table(t) => Some(t),
8252                        _ => None,
8253                    };
8254                    let n = match vt {
8255                        Some(t) => {
8256                            let n_key = Value::Str(self.heap.intern(b"n"));
8257                            // PUC getnumargs: a named vararg `t.n` set out of the
8258                            // integer range [0, INT_MAX/2] is rejected here
8259                            match t.get(n_key) {
8260                                Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8261                                _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8262                            }
8263                        }
8264                        None => n_varargs,
8265                    };
8266                    let count = if wanted < 0 { n } else { wanted as u32 };
8267                    let need = (abs_a + count) as usize;
8268                    if self.stack.len() < need {
8269                        self.stack.resize(need, Value::Nil);
8270                    }
8271                    for i in 0..count {
8272                        let v = if i >= n {
8273                            Value::Nil
8274                        } else if let Some(t) = vt {
8275                            t.get_int(i as i64 + 1)
8276                        } else {
8277                            self.stack[(func_slot + 1 + i) as usize]
8278                        };
8279                        self.stack[(abs_a + i) as usize] = v;
8280                    }
8281                    if wanted < 0 {
8282                        self.top = abs_a + count;
8283                    }
8284                }
8285                Op::GetVarg => {
8286                    // materialize the vararg table (PUC table.pack shape) from the
8287                    // stack varargs — used when the named vararg is written /
8288                    // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8289                    // sees later writes) and in the local register R[A].
8290                    let n = n_varargs;
8291                    let t = self.heap.new_table();
8292                    {
8293                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8294                        let tm = unsafe { t.as_mut() };
8295                        for i in 0..n {
8296                            let _ = tm.set_int(
8297                                &mut self.heap,
8298                                i as i64 + 1,
8299                                self.stack[(func_slot + 1 + i) as usize],
8300                            );
8301                        }
8302                    }
8303                    let n_key = Value::Str(self.heap.intern(b"n"));
8304                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8305                    unsafe { t.as_mut() }
8306                        .set(&mut self.heap, n_key, Value::Int(n as i64))
8307                        .expect("'n' is a valid key");
8308                    // once-per-table barrier (mirror SETLIST): t is born BLACK
8309                    // during Propagate; the bulk inserts above don't barrier.
8310                    self.heap
8311                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8312                    self.stack[func_slot as usize] = Value::Table(t);
8313                    self.set_r(base, inst.a(), Value::Table(t));
8314                }
8315                Op::VargIdx => {
8316                    // R[A] := vararg[R[C]] without allocating: integer key in
8317                    // [1,n] → that vararg, "n" → the count, else nil.
8318                    let key = self.r(base, inst.c());
8319                    let n = n_varargs;
8320                    let v = match key {
8321                        Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8322                            self.stack[(func_slot + k as u32) as usize]
8323                        }
8324                        Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8325                            self.stack[(func_slot + f as u32) as usize]
8326                        }
8327                        Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8328                        _ => Value::Nil,
8329                    };
8330                    self.set_r(base, inst.a(), v);
8331                }
8332                Op::ErrNNil => {
8333                    let v = self.r(base, inst.a());
8334                    if !matches!(v, Value::Nil) {
8335                        let bx = inst.bx();
8336                        let name = if bx == 0 {
8337                            "?".to_string()
8338                        } else {
8339                            match cl.proto.consts[(bx - 1) as usize] {
8340                                Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8341                                _ => "?".to_string(),
8342                            }
8343                        };
8344                        return Err(self.rt_err(&format!("global '{name}' already defined")));
8345                    }
8346                }
8347                Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8348            }
8349        }
8350    }
8351
8352    #[inline(always)]
8353    fn pc_of_top(&self) -> u32 {
8354        self.top_frame().pc
8355    }
8356
8357    #[inline(always)]
8358    fn bump_pc(&mut self) {
8359        // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8360        // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8361        // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8362        // up over `fib_28`'s ~500k jumps.
8363        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8364        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8365            CallFrame::Lua(f) => f.pc += 1,
8366            _ => unreachable!("Cont frame at bump_pc"),
8367        }
8368    }
8369
8370    #[inline(always)]
8371    fn add_pc(&mut self, d: i32) {
8372        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8373        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8374            CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8375            _ => unreachable!("Cont frame at add_pc"),
8376        }
8377    }
8378
8379    /// PUC conditional-skip convention: the JMP that follows is executed when
8380    /// `cond == k`; otherwise it is skipped.
8381    #[inline(always)]
8382    fn cond_skip(&mut self, cond: bool, k: bool) {
8383        if cond != k {
8384            self.bump_pc();
8385        }
8386    }
8387
8388    // ---- indexing (with __index/__newindex chains) ----
8389
8390    /// The `#` length operation: string byte length, `__len` if present, else
8391    /// the raw table border. Returns the raw length value (may be non-integer
8392    /// when `__len` is exotic).
8393    pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8394        match self.len_step(v)? {
8395            MmOut::Done(n) => Ok(n),
8396            // PUC calls unary metamethods with the operand twice
8397            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8398            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8399        }
8400    }
8401
8402    /// Length fast path: a string's byte count or a table's raw border when no
8403    /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8404    /// called with the operand twice. Errors for a non-table with no `__len`.
8405    fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8406        match v {
8407            Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8408            Value::Table(t) => {
8409                let mm = self.get_mm(v, Mm::Len);
8410                if mm.is_nil() {
8411                    Ok(MmOut::Done(Value::Int(t.len())))
8412                } else {
8413                    Ok(MmOut::Mm { func: mm, recv: v })
8414                }
8415            }
8416            _ => {
8417                let mm = self.get_mm(v, Mm::Len);
8418                if mm.is_nil() {
8419                    Err(self.type_err("get length of", v))
8420                } else {
8421                    Ok(MmOut::Mm { func: mm, recv: v })
8422                }
8423            }
8424        }
8425    }
8426
8427    /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8428    /// value with no integer representation.
8429    pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8430        match self.len_value(v)? {
8431            Value::Int(i) => Ok(i),
8432            Value::Float(f) => crate::runtime::value::f2i_exact(f)
8433                .ok_or_else(|| self.rt_err("object length is not an integer")),
8434            _ => Err(self.rt_err("object length is not an integer")),
8435        }
8436    }
8437
8438    pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8439        match self.index_step(t, key)? {
8440            MmOut::Done(v) => Ok(v),
8441            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8442            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8443        }
8444    }
8445
8446    /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8447    /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8448    /// are followed inline (no yield possible); only a function link can yield.
8449    fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8450        let mut cur = t;
8451        for _ in 0..MAX_TAG_LOOP {
8452            let mm = match cur {
8453                Value::Table(tb) => {
8454                    let v = tb.get(key);
8455                    if !v.is_nil() {
8456                        return Ok(MmOut::Done(v));
8457                    }
8458                    let mm = self.get_mm(cur, Mm::Index);
8459                    if mm.is_nil() {
8460                        return Ok(MmOut::Done(Value::Nil));
8461                    }
8462                    mm
8463                }
8464                v => {
8465                    let mm = self.get_mm(v, Mm::Index);
8466                    if mm.is_nil() {
8467                        return Err(self.type_err("index", v));
8468                    }
8469                    mm
8470                }
8471            };
8472            match mm {
8473                Value::Closure(_) | Value::Native(_) => {
8474                    return Ok(MmOut::Mm {
8475                        func: mm,
8476                        recv: cur,
8477                    });
8478                }
8479                next => cur = next,
8480            }
8481        }
8482        Err(self.rt_err("'__index' chain too long; possible loop"))
8483    }
8484
8485    pub(crate) fn newindex_value(
8486        &mut self,
8487        t: Value,
8488        key: Value,
8489        v: Value,
8490    ) -> Result<(), LuaError> {
8491        match self.newindex_step(t, key, v)? {
8492            MmOut::Done(_) => Ok(()),
8493            MmOut::Mm { func, recv } => {
8494                self.call_value(func, &[recv, key, v])?;
8495                Ok(())
8496            }
8497            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8498        }
8499    }
8500
8501    /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8502    /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8503    /// needs an actual call — which the caller may run yieldably.
8504    fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8505        let mut cur = t;
8506        for _ in 0..MAX_TAG_LOOP {
8507            let mm = match cur {
8508                Value::Table(tb) => {
8509                    // PI-A3 single-walk collapse — Table::try_set_existing
8510                    // fuses the prior `tb.get(key).is_nil()` gate and
8511                    // `raw_set` walk into one chain traversal when the
8512                    // key is already present with a non-nil value. The
8513                    // __newindex chain semantics are preserved by the
8514                    // identity (slot_nil ⇔ fire_newindex); see
8515                    // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8516                    //
8517                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8518                    // heap is single-threaded and the pointer is live as
8519                    // long as it is reachable from active roots (see
8520                    // heap.rs:5-7). Mirrors the raw_set wrapper below.
8521                    if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8522                        self.heap
8523                            .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8524                        return Ok(MmOut::Done(Value::Nil));
8525                    }
8526                    let mm = self.get_mm(cur, Mm::NewIndex);
8527                    if mm.is_nil() {
8528                        self.raw_set(tb, key, v)?;
8529                        return Ok(MmOut::Done(Value::Nil));
8530                    }
8531                    mm
8532                }
8533                bad => {
8534                    let mm = self.get_mm(bad, Mm::NewIndex);
8535                    if mm.is_nil() {
8536                        return Err(self.type_err("index", bad));
8537                    }
8538                    mm
8539                }
8540            };
8541            match mm {
8542                Value::Closure(_) | Value::Native(_) => {
8543                    return Ok(MmOut::Mm {
8544                        func: mm,
8545                        recv: cur,
8546                    });
8547                }
8548                next => cur = next,
8549            }
8550        }
8551        Err(self.rt_err("'__newindex' chain too long; possible loop"))
8552    }
8553
8554    fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8555        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8556        match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8557            Ok(()) => {
8558                self.heap
8559                    .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8560                Ok(())
8561            }
8562            Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8563            Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8564            Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8565            Err(TableError::InvalidNext) => unreachable!(),
8566        }
8567    }
8568
8569    /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8570    /// the boolean result; `Mm` (when raw equality fails and both are tables
8571    /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8572    fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8573        if l.raw_eq(r) {
8574            return MmOut::Done(Value::Bool(true));
8575        }
8576        if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8577            (l, r)
8578        {
8579            // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8580            // (and earlier) required the two operands' metatables to expose a
8581            // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8582            // metatable falls straight back to raw inequality. events.lua 5.1
8583            // :262 bakes this in.
8584            let mm = if self.version() <= LuaVersion::Lua51 {
8585                self.get_comp_mm(l, r, Mm::Eq)
8586            } else {
8587                let mut m = self.get_mm(l, Mm::Eq);
8588                if m.is_nil() {
8589                    m = self.get_mm(r, Mm::Eq);
8590                }
8591                m
8592            };
8593            if !mm.is_nil() {
8594                return MmOut::Mm { func: mm, recv: l };
8595            }
8596        }
8597        MmOut::Done(Value::Bool(false))
8598    }
8599
8600    // ---- arithmetic ----
8601
8602    #[inline(always)]
8603    fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8604        let l = self.r(base, inst.b());
8605        let r = self.r(base, inst.c());
8606        // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8607        // binary_trees all hammer these. Skipping coerce_num + the big
8608        // arith_fast match shaves several conditional moves per op.
8609        if let (Value::Int(a), Value::Int(b)) = (l, r) {
8610            let fast = match op {
8611                ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8612                ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8613                ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8614                _ => None,
8615            };
8616            if let Some(v) = fast {
8617                self.set_r(base, inst.a(), v);
8618                return Ok(());
8619            }
8620        }
8621        // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8622        // and any numeric workload with non-integer accumulators benefits.
8623        if let (Value::Float(a), Value::Float(b)) = (l, r) {
8624            let fast = match op {
8625                ArithOp::Add => Some(Value::Float(a + b)),
8626                ArithOp::Sub => Some(Value::Float(a - b)),
8627                ArithOp::Mul => Some(Value::Float(a * b)),
8628                ArithOp::Div => Some(Value::Float(a / b)),
8629                _ => None,
8630            };
8631            if let Some(v) = fast {
8632                self.set_r(base, inst.a(), v);
8633                return Ok(());
8634            }
8635        }
8636        match self.arith_fast(op, l, r)? {
8637            Some(v) => self.set_r(base, inst.a(), v),
8638            None => {
8639                let mm = self.arith_mm_func(op, l, r)?;
8640                let dst = base + inst.a();
8641                self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8642            }
8643        }
8644        Ok(())
8645    }
8646
8647    /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8648    /// directly, `Ok(None)` when a metamethod is required (the caller decides
8649    /// whether to call it synchronously or yieldably).
8650    fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8651        use ArithOp::*;
8652        match op {
8653            BAnd | BOr | BXor | Shl | Shr => {
8654                // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8655                match (coerce_num(l), coerce_num(r)) {
8656                    (Some(a), Some(b)) => {
8657                        let to_int = |n: Num| match n {
8658                            Num::Int(i) => Some(i),
8659                            Num::Float(f) => crate::runtime::value::f2i_exact(f),
8660                        };
8661                        let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8662                            // PUC luaG_tointerror: name the offending operand
8663                            return Err(self.no_int_rep_err());
8664                        };
8665                        let v = match op {
8666                            BAnd => a & b,
8667                            BOr => a | b,
8668                            BXor => a ^ b,
8669                            Shl => shift_left(a, b),
8670                            Shr => shift_left(a, b.wrapping_neg()),
8671                            _ => unreachable!(),
8672                        };
8673                        return Ok(Some(Value::Int(v)));
8674                    }
8675                    _ => return Ok(None),
8676                }
8677            }
8678            _ => {}
8679        }
8680        let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8681            (Some(a), Some(b)) => (a, b),
8682            _ => return Ok(None),
8683        };
8684        let v = match (op, ln, rn) {
8685            (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8686            (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8687            (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8688            (IDiv, Num::Int(a), Num::Int(b)) => {
8689                if b == 0 {
8690                    return Err(self.rt_err("attempt to divide by zero"));
8691                }
8692                let mut q = a.wrapping_div(b);
8693                if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8694                    q -= 1;
8695                }
8696                Value::Int(q)
8697            }
8698            (Mod, Num::Int(a), Num::Int(b)) => {
8699                if b == 0 {
8700                    return Err(self.rt_err("attempt to perform 'n%0'"));
8701                }
8702                let mut m = a.wrapping_rem(b);
8703                if m != 0 && (m ^ b) < 0 {
8704                    m += b;
8705                }
8706                Value::Int(m)
8707            }
8708            (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8709            (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8710            (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8711            (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8712            (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8713            (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8714            (Mod, a, b) => {
8715                let (x, y) = (a.as_f64(), b.as_f64());
8716                // PUC luai_nummod: correct fmod's sign without the `m*y`
8717                // product, which underflows to 0 for tiny denormals
8718                let mut m = x % y;
8719                if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8720                    m += y;
8721                }
8722                Value::Float(m)
8723            }
8724            _ => unreachable!(),
8725        };
8726        Ok(Some(v))
8727    }
8728
8729    pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8730        match v {
8731            Value::Int(i) => Ok(i),
8732            Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8733                Some(i) => Ok(i),
8734                None => Err(self.rt_err("number has no integer representation")),
8735            },
8736            v => Err(self.type_err(what, v)),
8737        }
8738    }
8739
8740    fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8741        match n {
8742            Num::Int(i) => Ok(i),
8743            Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8744                Some(i) => Ok(i),
8745                None => Err(self.rt_err("number has no integer representation")),
8746            },
8747        }
8748    }
8749
8750    /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8751    /// PUC type error when neither operand provides one.
8752    fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8753        use ArithOp::*;
8754        let event = match op {
8755            Add => Mm::Add,
8756            Sub => Mm::Sub,
8757            Mul => Mm::Mul,
8758            Div => Mm::Div,
8759            Mod => Mm::Mod,
8760            Pow => Mm::Pow,
8761            IDiv => Mm::IDiv,
8762            BAnd => Mm::BAnd,
8763            BOr => Mm::BOr,
8764            BXor => Mm::BXor,
8765            Shl => Mm::Shl,
8766            Shr => Mm::Shr,
8767        };
8768        let mut mm = self.get_mm(l, event);
8769        if mm.is_nil() {
8770            mm = self.get_mm(r, event);
8771        }
8772        if mm.is_nil() {
8773            let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8774                "perform bitwise operation on"
8775            } else {
8776                "perform arithmetic on"
8777            };
8778            let bad = if coerce_num(l).is_none() { l } else { r };
8779            return Err(self.type_err(what, bad));
8780        }
8781        Ok(mm)
8782    }
8783
8784    // ---- comparison ----
8785
8786    pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8787        match self.less_step(l, r, or_eq)? {
8788            MmOut::Done(v) => Ok(v.truthy()),
8789            MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8790            MmOut::CompareSynth { func } => {
8791                // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8792                // by library code (sort comparator etc.) — no yield expected
8793                // here (a yield would have hit `call_noyield`'s C boundary).
8794                Ok(!self.call_mm1(func, &[r, l])?.truthy())
8795            }
8796        }
8797    }
8798
8799    /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8800    /// carries the boolean result; `Mm` (for non-number/string operands) carries
8801    /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8802    /// neither operand provides one.
8803    fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8804        let b = match (l, r) {
8805            (Value::Int(a), Value::Int(b)) => {
8806                if or_eq {
8807                    a <= b
8808                } else {
8809                    a < b
8810                }
8811            }
8812            (Value::Float(a), Value::Float(b)) => {
8813                if or_eq {
8814                    a <= b
8815                } else {
8816                    a < b
8817                }
8818            }
8819            (Value::Int(a), Value::Float(b)) => {
8820                if or_eq {
8821                    int_le_float(a, b)
8822                } else {
8823                    int_lt_float(a, b)
8824                }
8825            }
8826            (Value::Float(a), Value::Int(b)) => {
8827                if a.is_nan() {
8828                    false
8829                } else if or_eq {
8830                    !int_lt_float(b, a)
8831                } else {
8832                    !int_le_float(b, a)
8833                }
8834            }
8835            (Value::Str(a), Value::Str(b)) => {
8836                let (a, b) = (a.as_bytes(), b.as_bytes());
8837                if or_eq { a <= b } else { a < b }
8838            }
8839            (l, r) => {
8840                let event = if or_eq { Mm::Le } else { Mm::Lt };
8841                // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8842                // too: both operands' metatables must expose the same
8843                // implementation for `__lt` / `__le` to fire. events.lua 5.1
8844                // :262 expects `c < d` (where `d` has no metatable) to error
8845                // with the default "attempt to compare two table values"
8846                // rather than running c's `__lt` blindly.
8847                let mm = if self.version() <= LuaVersion::Lua51 {
8848                    self.get_comp_mm(l, r, event)
8849                } else {
8850                    let mut m = self.get_mm(l, event);
8851                    if m.is_nil() {
8852                        m = self.get_mm(r, event);
8853                    }
8854                    m
8855                };
8856                // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8857                // operand carries `__le`. 5.4 dropped the synthesis (now
8858                // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8859                // on the synthesis — its metatable defines only `__lt`.
8860                // The fallback calls `__lt(r, l)` synchronously (the suite's
8861                // `__lt` doesn't yield) and negates the result; the yieldable
8862                // `__lt` path stays reserved for the explicit `<` operator.
8863                if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8864                    let lt = Mm::Lt;
8865                    let mut mm_lt = self.get_mm(l, lt);
8866                    if mm_lt.is_nil() {
8867                        mm_lt = self.get_mm(r, lt);
8868                    }
8869                    if !mm_lt.is_nil() {
8870                        return Ok(MmOut::CompareSynth { func: mm_lt });
8871                    }
8872                }
8873                if mm.is_nil() {
8874                    // PUC luaG_ordererror: "two X values" when the operand
8875                    // types match, "X with Y" otherwise (objtypename-aware).
8876                    let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8877                    return Err(self.rt_err(&if t1 == t2 {
8878                        format!("attempt to compare two {t1} values")
8879                    } else {
8880                        format!("attempt to compare {t1} with {t2}")
8881                    }));
8882                }
8883                return Ok(MmOut::Mm { func: mm, recv: l });
8884            }
8885        };
8886        Ok(MmOut::Done(Value::Bool(b)))
8887    }
8888
8889    // ---- numeric for ----
8890
8891    fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8892        let a = inst.a();
8893        let init = self.r(base, a);
8894        let limit = self.r(base, a + 1);
8895        let step = self.r(base, a + 2);
8896        let (Some(init_n), Some(limit_n), Some(step_n)) =
8897            (as_num(init), as_num(limit), as_num(step))
8898        else {
8899            // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8900            // PUC checks limit, then step, then initial value.
8901            let (what, bad) = if as_num(limit).is_none() {
8902                ("limit", limit)
8903            } else if as_num(step).is_none() {
8904                ("step", step)
8905            } else {
8906                ("initial value", init)
8907            };
8908            let tn = self.obj_typename(bad);
8909            return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8910        };
8911        // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8912        // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8913        // first test, so each successful iteration emits a backward `OP_FORLOOP`
8914        // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8915        // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8916        // distance in luna's encoding is `loop_pc - prep_pc`; firing
8917        // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8918        let pre53 = self.version() <= LuaVersion::Lua53;
8919        match (init_n, step_n) {
8920            (Num::Int(i0), Num::Int(st)) => {
8921                if st == 0 {
8922                    return Err(self.rt_err("'for' step is zero"));
8923                }
8924                if pre53 {
8925                    // PUC 5.3 `forlimit`: int limit passes through; float limit
8926                    // gets clamped to MIN/MAX with a `stopnow` flag set only
8927                    // when the clamp is unreachable (positive float with a
8928                    // negative step → limit=MAX, stopnow; negative float with
8929                    // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8930                    // `init = 0` so OP_FORLOOP's first test against the
8931                    // unreachable clamp fails cleanly. An ordinary in-range
8932                    // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8933                    // lets OP_FORLOOP's natural test reject the first step.
8934                    let (lim, stopnow) = match limit_n {
8935                        Num::Int(l) => (l, false),
8936                        Num::Float(f) => {
8937                            if f.is_nan() {
8938                                (0, true)
8939                            } else if f >= i64::MAX as f64 + 1.0 {
8940                                // beyond +MAX: unreachable for a decreasing loop
8941                                (i64::MAX, st < 0)
8942                            } else if f <= i64::MIN as f64 {
8943                                // beyond -MIN: unreachable for an increasing loop
8944                                (i64::MIN, st >= 0)
8945                            } else if st > 0 {
8946                                (f.floor() as i64, false)
8947                            } else {
8948                                (f.ceil() as i64, false)
8949                            }
8950                        }
8951                    };
8952                    let initv = if stopnow { 0 } else { i0 };
8953                    let pre = initv.wrapping_sub(st);
8954                    self.set_r(base, a, Value::Int(pre));
8955                    self.set_r(base, a + 1, Value::Int(lim));
8956                    self.set_r(base, a + 2, Value::Int(st));
8957                    self.add_pc(inst.bx() as i32 - 1);
8958                    return Ok(());
8959                }
8960                let (lim, empty) = int_for_limit(limit_n, i0, st);
8961                if empty {
8962                    self.add_pc(inst.bx() as i32);
8963                    return Ok(());
8964                }
8965                let count = if st > 0 {
8966                    (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8967                } else {
8968                    (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8969                };
8970                self.set_r(base, a, Value::Int(i0));
8971                self.set_r(base, a + 1, Value::Int(count as i64));
8972                self.set_r(base, a + 2, Value::Int(st));
8973                self.set_r(base, a + 3, Value::Int(i0));
8974            }
8975            _ => {
8976                let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8977                if st == 0.0 {
8978                    return Err(self.rt_err("'for' step is zero"));
8979                }
8980                if pre53 {
8981                    let pre = x0 - st;
8982                    self.set_r(base, a, Value::Float(pre));
8983                    self.set_r(base, a + 1, Value::Float(lim));
8984                    self.set_r(base, a + 2, Value::Float(st));
8985                    self.add_pc(inst.bx() as i32 - 1);
8986                    return Ok(());
8987                }
8988                let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8989                if !runs {
8990                    self.add_pc(inst.bx() as i32);
8991                    return Ok(());
8992                }
8993                self.set_r(base, a, Value::Float(x0));
8994                self.set_r(base, a + 1, Value::Float(lim));
8995                self.set_r(base, a + 2, Value::Float(st));
8996                self.set_r(base, a + 3, Value::Float(x0));
8997            }
8998        }
8999        Ok(())
9000    }
9001
9002    #[inline(always)]
9003    fn for_loop(&mut self, inst: Inst, base: u32) {
9004        let a = inst.a();
9005        // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
9006        // directly (R[a+1] holds the limit, *not* a remaining-count) so the
9007        // first iteration's test fires through the same backward-jump path as
9008        // every later iteration. 5.4+ switched to the count-based form luna
9009        // already uses for `Int`; the float branch was already PUC-3.x-style.
9010        let pre53 = self.version() <= LuaVersion::Lua53;
9011        match self.r(base, a) {
9012            Value::Int(cur) if pre53 => {
9013                let Value::Int(lim) = self.r(base, a + 1) else {
9014                    unreachable!()
9015                };
9016                let Value::Int(st) = self.r(base, a + 2) else {
9017                    unreachable!()
9018                };
9019                let next = cur.wrapping_add(st);
9020                let cont = if st > 0 { next <= lim } else { next >= lim };
9021                if cont {
9022                    self.set_r(base, a, Value::Int(next));
9023                    self.set_r(base, a + 3, Value::Int(next));
9024                    self.add_pc(-(inst.bx() as i32));
9025                }
9026            }
9027            Value::Int(cur) => {
9028                let Value::Int(count) = self.r(base, a + 1) else {
9029                    unreachable!()
9030                };
9031                if count > 0 {
9032                    let Value::Int(st) = self.r(base, a + 2) else {
9033                        unreachable!()
9034                    };
9035                    let next = cur.wrapping_add(st);
9036                    self.set_r(base, a, Value::Int(next));
9037                    self.set_r(base, a + 1, Value::Int(count - 1));
9038                    self.set_r(base, a + 3, Value::Int(next));
9039                    self.add_pc(-(inst.bx() as i32));
9040                }
9041            }
9042            Value::Float(cur) => {
9043                let Value::Float(lim) = self.r(base, a + 1) else {
9044                    unreachable!()
9045                };
9046                let Value::Float(st) = self.r(base, a + 2) else {
9047                    unreachable!()
9048                };
9049                let next = cur + st;
9050                let cont = if st > 0.0 { next <= lim } else { next >= lim };
9051                if cont {
9052                    self.set_r(base, a, Value::Float(next));
9053                    self.set_r(base, a + 3, Value::Float(next));
9054                    self.add_pc(-(inst.bx() as i32));
9055                }
9056            }
9057            _ => unreachable!("corrupt for-loop state"),
9058        }
9059    }
9060
9061    // ---- native helpers (used by builtins) ----
9062
9063    /// A native function's own captured upvalue (self lives at func_slot).
9064    ///
9065    /// Public so `native_typed` trampolines and embedders authoring
9066    /// stateful natives via `native_with(...)` can read their upvals.
9067    pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9068        let Value::Native(nc) = self.stack[func_slot as usize] else {
9069            unreachable!("native frame without native closure");
9070        };
9071        nc.upvals[i]
9072    }
9073
9074    /// Number of upvalues captured by the native at `func_slot` (variadic
9075    /// captures such as the `io.lines` format list).
9076    pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9077        let Value::Native(nc) = self.stack[func_slot as usize] else {
9078            unreachable!("native frame without native closure");
9079        };
9080        nc.upvals.len()
9081    }
9082
9083    /// Write a native function's own upvalue (stateful iterators).
9084    pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9085        let Value::Native(nc) = self.stack[func_slot as usize] else {
9086            unreachable!("native frame without native closure");
9087        };
9088        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9089        unsafe { nc.as_mut() }.upvals[i] = v;
9090        // NativeClosure.upvals is traced as part of its Trace; a long-lived
9091        // stateful iterator closure (e.g. string.gmatch) sees many writes —
9092        // barrier_back once-and-done is cheaper than per-child forward.
9093        self.heap
9094            .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9095    }
9096
9097    /// Read the i-th positional argument inside a `NativeFn` body
9098    /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9099    /// matching PUC's "missing arg is nil" contract. Public so embedders
9100    /// can author their own natives.
9101    pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9102        if i < nargs {
9103            self.stack[(func_slot + 1 + i) as usize]
9104        } else {
9105            Value::Nil
9106        }
9107    }
9108
9109    /// Push the return values of a `NativeFn` and return their count
9110    /// (analogous to pushing N values then `return N` from a C function).
9111    /// Public so embedders can author their own natives.
9112    pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9113        let need = func_slot as usize + vals.len();
9114        if self.stack.len() < need {
9115            self.stack.resize(need, Value::Nil);
9116        }
9117        for (i, &v) in vals.iter().enumerate() {
9118            self.stack[func_slot as usize + i] = v;
9119        }
9120        vals.len() as u32
9121    }
9122
9123    /// Fast string concatenation of an adjacent pair, or `None` when a
9124    /// `__concat` metamethod is required.
9125    fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9126        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9127        // Length-check fast paths for both string operands BEFORE the
9128        // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9129        // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9130        // overflow on the first pair that would exceed `INT_MAX` instead
9131        // of allocating multi-GB intermediates first.
9132        let max_str = i32::MAX as usize;
9133        if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9134            let a_len = ls.as_bytes().len();
9135            let b_len = rs.as_bytes().len();
9136            let new_len = a_len.checked_add(b_len);
9137            if new_len.is_none() || new_len.unwrap() > max_str {
9138                return Err(self.rt_err("string length overflow"));
9139            }
9140        }
9141        match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9142            (Some(a), Some(b)) => {
9143                // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9144                // concat past it raises "string length overflow"
9145                // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9146                // exactly this wall).
9147                let new_len = a.len().checked_add(b.len());
9148                if new_len.is_none() || new_len.unwrap() > max_str {
9149                    return Err(self.rt_err("string length overflow"));
9150                }
9151                let mut combined = a;
9152                combined.extend_from_slice(&b);
9153                Ok(Some(Value::Str(self.heap.intern(&combined))))
9154            }
9155            _ => Ok(None),
9156        }
9157    }
9158
9159    /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9160    /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9161    /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9162    /// call — its `Meta` continuation re-enters here on the metamethod's return.
9163    fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9164        // Sum the lengths of all all-Str operands BEFORE starting the
9165        // right-associative fold so a 129-operand `a..a..…` chain
9166        // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9167        // not after dozens of multi-GB intermediate intern+hash rounds.
9168        // A non-Str operand falls through to the per-pair check.
9169        let max_str = i32::MAX as usize;
9170        let mut total: usize = 0;
9171        let mut all_str = true;
9172        for slot in base_a..self.top {
9173            match self.stack[slot as usize] {
9174                Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9175                    Some(t) if t <= max_str => total = t,
9176                    _ => return Err(self.rt_err("string length overflow")),
9177                },
9178                _ => {
9179                    all_str = false;
9180                    break;
9181                }
9182            }
9183        }
9184        let _ = all_str; // discrimination already captured by early returns above
9185        while self.top.saturating_sub(base_a) >= 2 {
9186            let i = self.top - 1; // rightmost operand
9187            let x = self.stack[(i - 1) as usize];
9188            let y = self.stack[i as usize];
9189            match self.concat_pair(x, y)? {
9190                Some(s) => {
9191                    self.stack[(i - 1) as usize] = s;
9192                    self.top = i; // consumed y
9193                }
9194                None => {
9195                    let mut mm = self.get_mm(x, Mm::Concat);
9196                    if mm.is_nil() {
9197                        mm = self.get_mm(y, Mm::Concat);
9198                    }
9199                    if mm.is_nil() {
9200                        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9201                        let bad = if concat_piece(x, legacy).is_none() {
9202                            x
9203                        } else {
9204                            y
9205                        };
9206                        return Err(self.type_err("concatenate", bad));
9207                    }
9208                    // result lands at i-1, dropping y (top→i); resume continues.
9209                    let dst = i - 1;
9210                    self.begin_meta_call(
9211                        mm,
9212                        &[x, y],
9213                        MetaAction::Concat { dst, base_a },
9214                        "concat",
9215                    )?;
9216                    return Ok(());
9217                }
9218            }
9219        }
9220        self.maybe_collect_garbage(base_a + 1);
9221        Ok(())
9222    }
9223
9224    /// tostring with __tostring / __name support.
9225    pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9226        let mm = self.get_mm(v, Mm::ToString);
9227        if !mm.is_nil() {
9228            return match self.call_mm1(mm, &[v])? {
9229                Value::Str(s) => Ok(s.as_bytes().to_vec()),
9230                _ => Err(self.rt_err("'__tostring' must return a string")),
9231            };
9232        }
9233        if let Value::Table(t) = v
9234            && let Value::Str(name) = self.get_mm(v, Mm::Name)
9235        {
9236            let mut out = name.as_bytes().to_vec();
9237            out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9238            return Ok(out);
9239        }
9240        Ok(self.tostring_basic(v))
9241    }
9242
9243    /// Basic tostring (no metamethods).
9244    pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9245        match v {
9246            Value::Nil => b"nil".to_vec(),
9247            Value::Bool(true) => b"true".to_vec(),
9248            Value::Bool(false) => b"false".to_vec(),
9249            Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9250            // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9251            // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9252            // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9253            // concatenating these renderings.
9254            Value::Float(f) => {
9255                let legacy = self.version <= crate::version::LuaVersion::Lua52;
9256                numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9257            }
9258            Value::Str(s) => s.as_bytes().to_vec(),
9259            Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9260            Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9261            Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9262            Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9263            // PUC names file handles `file (0x…)`; a bare userdata is
9264            // `userdata: 0x…`. The io library overrides this via __tostring.
9265            Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9266            // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9267            // (the "light" qualifier only appears in `luaL_typeerror`).
9268            Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9269        }
9270    }
9271}
9272
9273#[derive(Clone, Copy, PartialEq, Eq)]
9274enum ArithOp {
9275    Add,
9276    Sub,
9277    Mul,
9278    Mod,
9279    Pow,
9280    Div,
9281    IDiv,
9282    BAnd,
9283    BOr,
9284    BXor,
9285    Shl,
9286    Shr,
9287}
9288
9289impl ArithOp {
9290    /// PUC metamethod event name (`__add` → "add" etc.) used by
9291    /// `debug.getinfo(level, "n")` inside a metamethod handler.
9292    fn mm_name(self) -> &'static str {
9293        match self {
9294            ArithOp::Add => "add",
9295            ArithOp::Sub => "sub",
9296            ArithOp::Mul => "mul",
9297            ArithOp::Mod => "mod",
9298            ArithOp::Pow => "pow",
9299            ArithOp::Div => "div",
9300            ArithOp::IDiv => "idiv",
9301            ArithOp::BAnd => "band",
9302            ArithOp::BOr => "bor",
9303            ArithOp::BXor => "bxor",
9304            ArithOp::Shl => "shl",
9305            ArithOp::Shr => "shr",
9306        }
9307    }
9308}
9309
9310fn as_num(v: Value) -> Option<Num> {
9311    match v {
9312        Value::Int(i) => Some(Num::Int(i)),
9313        Value::Float(f) => Some(Num::Float(f)),
9314        // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9315        Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9316        _ => None,
9317    }
9318}
9319
9320/// A concatenable operand's byte form (string, or a number coerced to its
9321/// string), or `None` when only a `__concat` metamethod can handle it.
9322/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9323/// suffix on integer-valued floats) — see `num_to_string_for`.
9324fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9325    match v {
9326        Value::Str(s) => Some(s.as_bytes().to_vec()),
9327        Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9328        Value::Float(x) => {
9329            Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9330        }
9331        _ => None,
9332    }
9333}
9334
9335/// Index into the per-basic-type metatable table for a non-table value
9336/// (None for tables, which carry their own metatable).
9337fn type_mt_slot(v: Value) -> Option<usize> {
9338    match v {
9339        Value::Nil => Some(0),
9340        Value::Bool(_) => Some(1),
9341        Value::Int(_) | Value::Float(_) => Some(2),
9342        Value::Str(_) => Some(3),
9343        Value::Closure(_) | Value::Native(_) => Some(4),
9344        // tables and full userdata carry their own metatable; threads and
9345        // light userdata have none (PUC keeps a shared per-type mt slot for
9346        // light, but luna doesn't expose it — no test gates on it yet).
9347        Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9348    }
9349}
9350
9351/// Number, or string coerced to number (5.5 default string-arith coercion).
9352fn coerce_num(v: Value) -> Option<Num> {
9353    match v {
9354        Value::Int(i) => Some(Num::Int(i)),
9355        Value::Float(f) => Some(Num::Float(f)),
9356        Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9357        _ => None,
9358    }
9359}
9360
9361/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9362/// reverse direction.
9363fn shift_left(a: i64, b: i64) -> i64 {
9364    if b < 0 {
9365        if b <= -64 {
9366            0
9367        } else {
9368            ((a as u64) >> (-b as u32)) as i64
9369        }
9370    } else if b >= 64 {
9371        0
9372    } else {
9373        ((a as u64) << (b as u32)) as i64
9374    }
9375}
9376
9377/// i < f, exactly (PUC LTintfloat shape).
9378fn int_lt_float(i: i64, f: f64) -> bool {
9379    if f.is_nan() {
9380        return false;
9381    }
9382    if f >= 9_223_372_036_854_775_808.0 {
9383        return true;
9384    }
9385    if f < -9_223_372_036_854_775_808.0 {
9386        return false;
9387    }
9388    let ff = f.floor();
9389    let fi = ff as i64;
9390    if f == ff { i < fi } else { i <= fi }
9391}
9392
9393/// i <= f, exactly.
9394fn int_le_float(i: i64, f: f64) -> bool {
9395    if f.is_nan() {
9396        return false;
9397    }
9398    if f >= 9_223_372_036_854_775_808.0 {
9399        return true;
9400    }
9401    if f < -9_223_372_036_854_775_808.0 {
9402        return false;
9403    }
9404    i <= f.floor() as i64
9405}
9406
9407/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9408/// (clipped limit, loop-is-empty).
9409fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9410    match limit {
9411        Num::Int(l) => {
9412            let empty = if step > 0 { init > l } else { init < l };
9413            (l, empty)
9414        }
9415        Num::Float(f) => {
9416            if f.is_nan() {
9417                return (0, true);
9418            }
9419            if step > 0 {
9420                if f >= 9_223_372_036_854_775_808.0 {
9421                    (i64::MAX, false)
9422                } else {
9423                    let l = f.floor();
9424                    if l < -9_223_372_036_854_775_808.0 {
9425                        (i64::MIN, true)
9426                    } else {
9427                        let li = l as i64;
9428                        (li, init > li)
9429                    }
9430                }
9431            } else if f <= -9_223_372_036_854_775_808.0 {
9432                (i64::MIN, false)
9433            } else {
9434                let l = f.ceil();
9435                if l >= 9_223_372_036_854_775_808.0 {
9436                    // PUC forlimit: a positive limit beyond the integer range
9437                    // is unreachable for a decreasing loop — empty.
9438                    (i64::MAX, true)
9439                } else {
9440                    let li = l as i64;
9441                    (li, init < li)
9442                }
9443            }
9444        }
9445    }
9446}
9447
9448/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9449/// `@file` / `=name` markers in `source`).
9450fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9451    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9452    let b = unsafe { crate::runtime::string::bytes_of(p) };
9453    match b.first() {
9454        Some(b'@') | Some(b'=') => &b[1..],
9455        _ => b,
9456    }
9457}
9458
9459impl Vm {
9460    /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9461    /// that called the current native. Returns (closure, current line,
9462    /// extra vararg count).
9463    /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9464    /// function running at `level`, recovered from the caller's call
9465    /// instruction (PUC funcnamefromcode). None for the main chunk or a
9466    /// tail/anonymous call with no recoverable name.
9467    /// A debug-level position: either a real Lua frame (by index) or a synthetic
9468    /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9469    /// coroutine body), which `debug.getinfo` and traceback report as "C".
9470    /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9471    /// frame at `level`'s current pc, as (name, value). Locals are visited in
9472    /// registration order (start pc, then register) to match luaF_getlocalname.
9473    pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9474        if n == 0 {
9475            return None;
9476        }
9477        let fi = match self.dbg_frame(level)? {
9478            DbgKind::Lua(fi) => fi,
9479            // Tail-call placeholder has no real frame backing it — no locals
9480            // exist to read or write here. PUC `findlocal` returns NULL on
9481            // a CIST_TAIL activation.
9482            DbgKind::Tail(_) => return None,
9483            // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9484            // for slot `n` inside the argument window (db.lua :408-:413, and
9485            // the call/return hook reads of math.sin / select args via
9486            // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9487            // meaningful for a C frame here.
9488            DbgKind::C(fi) => {
9489                if n < 1 {
9490                    return None;
9491                }
9492                let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9493                if (n as u32) > nargs {
9494                    return None;
9495                }
9496                let slot = (func_slot + n as u32) as usize;
9497                let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9498                return Some((self.temporary_locvar_name().to_string(), val));
9499            }
9500        };
9501        let f = self.frames[fi].lua()?;
9502        // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9503        // is the first extra arg passed to the function (`...[1]`), `-2` the
9504        // second, etc. The 5.5 stack layout parks varargs in
9505        // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9506        if n < 0 {
9507            let i = (-n) as u32;
9508            if i == 0 || i > f.n_varargs {
9509                return None;
9510            }
9511            let val = self
9512                .stack
9513                .get((f.func_slot + i) as usize)
9514                .copied()
9515                .unwrap_or(Value::Nil);
9516            return Some((self.vararg_locvar_name().to_string(), val));
9517        }
9518        let proto = f.closure.proto;
9519        // PUC's parser injects a hidden `(vararg table)` locvar for an
9520        // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9521        // right after the fixed parameters (`numparams + 1`). Main chunks
9522        // and `(...t)` named-vararg funcs do NOT get one — gate on the
9523        // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9524        // their declared registers (no shadow slot allocated), so we expose
9525        // that hidden index purely in this debug view.
9526        let num_params = proto.num_params as i64;
9527        let vararg_slot = if proto.has_vararg_table_pseudo {
9528            Some(num_params + 1)
9529        } else {
9530            None
9531        };
9532        if vararg_slot == Some(n) {
9533            return Some(("(vararg table)".to_string(), Value::Nil));
9534        }
9535        let pc = (f.pc as usize).saturating_sub(1);
9536        let mut active: Vec<&crate::runtime::LocVar> = proto
9537            .locvars
9538            .iter()
9539            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9540            .collect();
9541        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9542        let mut idx: i64 = n - 1;
9543        if let Some(vs) = vararg_slot
9544            && n > vs
9545        {
9546            idx -= 1;
9547        }
9548        let idx = idx as usize;
9549        if let Some(lv) = active.get(idx) {
9550            let val = self
9551                .stack
9552                .get((f.base + lv.reg) as usize)
9553                .copied()
9554                .unwrap_or(Value::Nil);
9555            return Some((lv.name.to_string(), val));
9556        }
9557        // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9558        // still inside the frame's live register window — report a
9559        // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9560        // the next frame's func slot (`ci->next->func.p`) so the
9561        // temporary window stops where the callee's frame begins
9562        // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9563        // an out-of-range slot).
9564        let limit = self
9565            .frames
9566            .get(fi + 1)
9567            .and_then(|cf| cf.lua())
9568            .map(|nf| nf.func_slot)
9569            .unwrap_or_else(|| self.top.max(f.base));
9570        let temp_reg = idx as u32;
9571        if f.base + temp_reg < limit {
9572            let val = self
9573                .stack
9574                .get((f.base + temp_reg) as usize)
9575                .copied()
9576                .unwrap_or(Value::Nil);
9577            return Some((self.lua_temporary_locvar_name().to_string(), val));
9578        }
9579        None
9580    }
9581
9582    /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9583    /// the local / vararg name on success, `None` when the slot does not
9584    /// resolve. Mirrors `local_at`'s indexing exactly.
9585    pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9586        if n == 0 {
9587            return None;
9588        }
9589        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9590            return None;
9591        };
9592        let f = self.frames[fi].lua()?;
9593        if n < 0 {
9594            let i = (-n) as u32;
9595            if i == 0 || i > f.n_varargs {
9596                return None;
9597            }
9598            let slot = (f.func_slot + i) as usize;
9599            if let Some(s) = self.stack.get_mut(slot) {
9600                *s = v;
9601            }
9602            return Some(self.vararg_locvar_name().to_string());
9603        }
9604        let proto = f.closure.proto;
9605        let num_params = proto.num_params as i64;
9606        let vararg_slot = if proto.has_vararg_table_pseudo {
9607            Some(num_params + 1)
9608        } else {
9609            None
9610        };
9611        if vararg_slot == Some(n) {
9612            // hidden (vararg table) slot has no real storage — accept the
9613            // write as a no-op for PUC parity (db.lua doesn't write to it).
9614            return Some("(vararg table)".to_string());
9615        }
9616        let pc = (f.pc as usize).saturating_sub(1);
9617        let mut active: Vec<&crate::runtime::LocVar> = proto
9618            .locvars
9619            .iter()
9620            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9621            .collect();
9622        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9623        let mut idx: i64 = n - 1;
9624        if let Some(vs) = vararg_slot
9625            && n > vs
9626        {
9627            idx -= 1;
9628        }
9629        let idx = idx as usize;
9630        let (name, reg) = if let Some(lv) = active.get(idx) {
9631            (lv.name.to_string(), lv.reg)
9632        } else {
9633            // PUC `luaG_findlocal` fallback into the temporary window —
9634            // bounded by the next frame's func slot (see local_at).
9635            let limit = self
9636                .frames
9637                .get(fi + 1)
9638                .and_then(|cf| cf.lua())
9639                .map(|nf| nf.func_slot)
9640                .unwrap_or_else(|| self.top.max(f.base));
9641            let temp_reg = idx as u32;
9642            if f.base + temp_reg >= limit {
9643                return None;
9644            }
9645            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9646        };
9647        let slot = (f.base + reg) as usize;
9648        if let Some(s) = self.stack.get_mut(slot) {
9649            *s = v;
9650        }
9651        Some(name)
9652    }
9653
9654    /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9655    /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9656    /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9657    /// vararg indexing past `n_varargs`, or for a register past the live
9658    /// window. Naming follows the same priority as `local_at`: named locals,
9659    /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9660    /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9661    /// window.
9662    pub(crate) fn local_at_coro(
9663        &self,
9664        co: Gc<crate::runtime::Coro>,
9665        level: i64,
9666        n: i64,
9667    ) -> Option<(String, Value)> {
9668        if level < 1 || n == 0 {
9669            return None;
9670        }
9671        let frames = &co.frames;
9672        // Logical level: iterate Lua frames from the top.
9673        let lua_indices: Vec<usize> = (0..frames.len())
9674            .rev()
9675            .filter(|&i| frames[i].lua().is_some())
9676            .collect();
9677        let fi = *lua_indices.get((level - 1) as usize)?;
9678        let f = frames[fi].lua()?;
9679        if n < 0 {
9680            let i = (-n) as u32;
9681            if i == 0 || i > f.n_varargs {
9682                return None;
9683            }
9684            let val = co
9685                .stack
9686                .get((f.func_slot + i) as usize)
9687                .copied()
9688                .unwrap_or(Value::Nil);
9689            return Some((self.vararg_locvar_name().to_string(), val));
9690        }
9691        let proto = f.closure.proto;
9692        let num_params = proto.num_params as i64;
9693        let vararg_slot = if proto.has_vararg_table_pseudo {
9694            Some(num_params + 1)
9695        } else {
9696            None
9697        };
9698        if vararg_slot == Some(n) {
9699            return Some(("(vararg table)".to_string(), Value::Nil));
9700        }
9701        let pc = (f.pc as usize).saturating_sub(1);
9702        let mut active: Vec<&crate::runtime::LocVar> = proto
9703            .locvars
9704            .iter()
9705            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9706            .collect();
9707        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9708        let mut idx: i64 = n - 1;
9709        if let Some(vs) = vararg_slot
9710            && n > vs
9711        {
9712            idx -= 1;
9713        }
9714        let idx = idx as usize;
9715        if let Some(lv) = active.get(idx) {
9716            let val = co
9717                .stack
9718                .get((f.base + lv.reg) as usize)
9719                .copied()
9720                .unwrap_or(Value::Nil);
9721            return Some((lv.name.to_string(), val));
9722        }
9723        let limit = frames
9724            .get(fi + 1)
9725            .and_then(|cf| cf.lua())
9726            .map(|nf| nf.func_slot)
9727            .unwrap_or(co.top.max(f.base));
9728        let temp_reg = idx as u32;
9729        if f.base + temp_reg < limit {
9730            let val = co
9731                .stack
9732                .get((f.base + temp_reg) as usize)
9733                .copied()
9734                .unwrap_or(Value::Nil);
9735            return Some((self.lua_temporary_locvar_name().to_string(), val));
9736        }
9737        None
9738    }
9739
9740    /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9741    /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9742    pub(crate) fn local_set_coro(
9743        &mut self,
9744        co: Gc<crate::runtime::Coro>,
9745        level: i64,
9746        n: i64,
9747        v: Value,
9748    ) -> Option<String> {
9749        if level < 1 || n == 0 {
9750            return None;
9751        }
9752        let lua_indices: Vec<usize> = (0..co.frames.len())
9753            .rev()
9754            .filter(|&i| co.frames[i].lua().is_some())
9755            .collect();
9756        let fi = *lua_indices.get((level - 1) as usize)?;
9757        let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9758            let f = co.frames[fi].lua()?;
9759            (
9760                f.func_slot,
9761                f.n_varargs,
9762                f.base,
9763                f.closure.proto,
9764                co.top.max(f.base),
9765                co.frames
9766                    .get(fi + 1)
9767                    .and_then(|cf| cf.lua())
9768                    .map(|nf| nf.func_slot),
9769            )
9770        };
9771        if n < 0 {
9772            let i = (-n) as u32;
9773            if i == 0 || i > n_varargs {
9774                return None;
9775            }
9776            let slot = (func_slot + i) as usize;
9777            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9778            let stack = unsafe { &mut co.as_mut().stack };
9779            if let Some(s) = stack.get_mut(slot) {
9780                *s = v;
9781            }
9782            // co.stack values are traced — once-per-call barrier so propagate
9783            // sees the new value if co was already BLACK this cycle.
9784            self.heap
9785                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9786            return Some(self.vararg_locvar_name().to_string());
9787        }
9788        let num_params = proto.num_params as i64;
9789        let vararg_slot = if proto.has_vararg_table_pseudo {
9790            Some(num_params + 1)
9791        } else {
9792            None
9793        };
9794        if vararg_slot == Some(n) {
9795            return Some("(vararg table)".to_string());
9796        }
9797        let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9798        let mut active: Vec<&crate::runtime::LocVar> = proto
9799            .locvars
9800            .iter()
9801            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9802            .collect();
9803        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9804        let mut idx: i64 = n - 1;
9805        if let Some(vs) = vararg_slot
9806            && n > vs
9807        {
9808            idx -= 1;
9809        }
9810        let idx = idx as usize;
9811        let (name, reg) = if let Some(lv) = active.get(idx) {
9812            (lv.name.to_string(), lv.reg)
9813        } else {
9814            let limit = next_func_slot.unwrap_or(top_for_temp);
9815            let temp_reg = idx as u32;
9816            if base + temp_reg >= limit {
9817                return None;
9818            }
9819            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9820        };
9821        let slot = (base + reg) as usize;
9822        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9823        let stack = unsafe { &mut co.as_mut().stack };
9824        if let Some(s) = stack.get_mut(slot) {
9825            *s = v;
9826        }
9827        // co.stack values are traced — once-per-call barrier so propagate
9828        // sees the new value if co was already BLACK this cycle.
9829        self.heap
9830            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9831        Some(name)
9832    }
9833
9834    /// Frame info for a level on a suspended coroutine (PUC
9835    /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9836    /// Returns the closure + currentline + extraargs + istailcall for the
9837    /// level-th Lua activation in `co.frames`. None if level overshoots.
9838    pub(crate) fn coro_frame_info(
9839        &self,
9840        co: Gc<crate::runtime::Coro>,
9841        level: i64,
9842    ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9843        if level < 1 {
9844            return None;
9845        }
9846        let lua_indices: Vec<usize> = (0..co.frames.len())
9847            .rev()
9848            .filter(|&i| co.frames[i].lua().is_some())
9849            .collect();
9850        let fi = *lua_indices.get((level - 1) as usize)?;
9851        let f = co.frames[fi].lua()?;
9852        let proto = f.closure.proto;
9853        let pc = (f.pc as usize)
9854            .saturating_sub(1)
9855            .min(proto.lines.len().saturating_sub(1));
9856        let line = proto.lines.get(pc).copied().unwrap_or(0);
9857        Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9858    }
9859
9860    /// Whether `level` resolves to any live activation (PUC lua_getstack).
9861    pub(crate) fn level_in_range(&self, level: i64) -> bool {
9862        self.dbg_frame(level).is_some()
9863    }
9864
9865    /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9866    /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9867    /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9868    /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9869    pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9870        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9871            "(*vararg)"
9872        } else {
9873            "(vararg)"
9874        }
9875    }
9876
9877    /// PUC's debug-API placeholder for an unnamed temporary on a C
9878    /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9879    /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9880    /// their spelling.
9881    pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9882        if matches!(
9883            self.version,
9884            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9885        ) {
9886            // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9887            // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9888            // to `(C temporary)`.
9889            "(*temporary)"
9890        } else {
9891            "(C temporary)"
9892        }
9893    }
9894
9895    /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9896    /// (an arithmetic intermediate sitting past the last named local on a
9897    /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9898    /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9899    /// spelling.
9900    pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9901        if matches!(
9902            self.version,
9903            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9904        ) {
9905            "(*temporary)"
9906        } else {
9907            "(temporary)"
9908        }
9909    }
9910
9911    /// The Lua closure running at `level` on the current thread, or `None`
9912    /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9913    /// need this to reach the function whose env they read or rewrite.
9914    pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9915        // `DbgKind::Tail` also falls into the else branch — a tail-call
9916        // placeholder has no closure of its own, so PUC's `lua_getstack` +
9917        // `getfunc` for that level returns no function, and `getfenv(level)`
9918        // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9919        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9920            return None;
9921        };
9922        Some(self.frames[fi].lua()?.closure)
9923    }
9924
9925    pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9926        if level < 1 {
9927            return false;
9928        }
9929        let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9930        (level as usize) <= count
9931    }
9932
9933    pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9934        if level < 1 {
9935            return None;
9936        }
9937        // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9938        // activation counts as a level, and each Lua activation's
9939        // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9940        // dropped the synthetic shape: `istailcall` becomes a flag on the
9941        // real frame and Cont activations no longer count separately.
9942        // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9943        // `getinfo(2).func == g1` pins the 5.2+ shape.
9944        let v51 = self.version <= LuaVersion::Lua51;
9945        let mut lvl = level;
9946        for fi in (0..self.frames.len()).rev() {
9947            match &self.frames[fi] {
9948                CallFrame::Lua(f) => {
9949                    lvl -= 1;
9950                    if lvl == 0 {
9951                        return Some(DbgKind::Lua(fi));
9952                    }
9953                    if v51 {
9954                        // 5.1 reports one synthetic CIST_TAIL level per
9955                        // collapsed tail call (PUC `lua_getstack` subtracts
9956                        // `ci->u.l.tailcalls` from the remaining level).
9957                        for _ in 0..f.tailcalls {
9958                            lvl -= 1;
9959                            if lvl == 0 {
9960                                return Some(DbgKind::Tail(fi));
9961                            }
9962                        }
9963                    }
9964                    if f.from_c {
9965                        lvl -= 1;
9966                        if lvl == 0 {
9967                            return Some(DbgKind::C(fi));
9968                        }
9969                    }
9970                }
9971                CallFrame::Cont(_) => {
9972                    if !v51 {
9973                        continue;
9974                    }
9975                    lvl -= 1;
9976                    if lvl == 0 {
9977                        let parent = (0..fi)
9978                            .rev()
9979                            .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9980                        return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9981                    }
9982                }
9983            }
9984        }
9985        None
9986    }
9987
9988    pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9989        let f = self.frames[fi].lua()?;
9990        // metamethod handler frames carry the event tag (e.g. "close" for
9991        // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9992        if f.is_hook {
9993            return Some(("hook", "?".to_string()));
9994        }
9995        if let Some(tm) = f.tm {
9996            return Some(("metamethod", tm_debug_name(self.version, tm)));
9997        }
9998        // a frame entered across a C boundary has no naming call instruction
9999        if fi == 0 || f.from_c {
10000            return None;
10001        }
10002        // the caller's call instruction names this frame; a continuation frame
10003        // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
10004        // already short-circuits those.
10005        let caller = self.frames[fi - 1].lua()?;
10006        let caller_proto = caller.closure.proto;
10007        let p: &crate::runtime::Proto = &caller_proto;
10008        let call_pc = (caller.pc as usize).checked_sub(1)?;
10009        let instr = *p.code.get(call_pc)?;
10010        match instr.op() {
10011            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10012            Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10013            _ => None,
10014        }
10015    }
10016
10017    /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
10018    /// (PUC names a C function from the call instruction that invoked it). The
10019    /// native was called by the nearest Lua frame below `fi` (skipping pcall/
10020    /// xpcall continuations); that frame's call instruction names it.
10021    pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10022        // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
10023        // the synthetic C edge between the __gc finalizer (top Lua frame, has
10024        // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
10025        // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
10026        // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
10027        // sets the tag on the handler frame only, so level 2 there still
10028        // names the calling Lua frame's call instruction (5.5 locals.lua
10029        // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
10030        if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
10031            && fr.tm == Some("gc")
10032        {
10033            let name = tm_debug_name(self.version, "gc");
10034            return Some(("metamethod", name));
10035        }
10036        let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
10037        let caller = self.frames[caller_fi].lua()?;
10038        let p = &caller.closure.proto;
10039        let call_pc = (caller.pc as usize).checked_sub(1)?;
10040        let instr = *p.code.get(call_pc)?;
10041        match instr.op() {
10042            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10043            _ => None,
10044        }
10045    }
10046
10047    /// Native value currently sitting on the synthetic C edge identified by
10048    /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
10049    /// above `fi` (each one corresponds to one native pushing the hook) and
10050    /// indexes into `running_natives` from the top, also skipping the caller
10051    /// of `getinfo` itself (the native that is currently asking).
10052    /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10053    /// expects the just-entered C function.
10054    pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10055        let idx = self.c_frame_native_idx(fi)?;
10056        Some(Value::Native(self.running_natives[idx]))
10057    }
10058
10059    /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10060    /// so `local_at` can index the native's argument window like PUC's
10061    /// `(C temporary)` path. Returns `None` when no matching native exists
10062    /// (e.g. the C edge corresponds to a non-native boundary).
10063    pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10064        let idx = self.c_frame_native_idx(fi)?;
10065        self.running_native_slots.get(idx).copied()
10066    }
10067
10068    fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10069        let n_above = self.frames[fi..]
10070            .iter()
10071            .filter_map(CallFrame::lua)
10072            .filter(|f| f.from_c)
10073            .count();
10074        if n_above == 0 {
10075            return None;
10076        }
10077        // running_natives.last() is the native currently executing (the one
10078        // that called getinfo). Pop it conceptually, then take the n_above-th
10079        // entry from the top of what remains.
10080        let nr = self.running_natives.len().checked_sub(1)?;
10081        nr.checked_sub(n_above)
10082    }
10083
10084    /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10085    /// native whose function pointer matches `target`, and return its qualified
10086    /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10087    /// `None` if no match is found. Used by `arg_error` when the running native
10088    /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10089    pub(crate) fn pushglobalfuncname(
10090        &mut self,
10091        target: crate::runtime::value::NativeFn,
10092    ) -> Option<String> {
10093        let pkg_k = Value::Str(self.heap.intern(b"package"));
10094        let pkg = match self.globals().get(pkg_k) {
10095            Value::Table(t) => t,
10096            _ => return None,
10097        };
10098        let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10099        let loaded = match pkg.get(loaded_k) {
10100            Value::Table(t) => t,
10101            _ => return None,
10102        };
10103        let matches = |v: Value| -> bool {
10104            matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10105        };
10106        let mut k = Value::Nil;
10107        while let Ok(Some((nk, nv))) = loaded.next(k) {
10108            k = nk;
10109            let Value::Str(outer) = nk else { continue };
10110            let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10111            if matches(nv) {
10112                return Some(if outer == "_G" { String::new() } else { outer });
10113            }
10114            if let Value::Table(inner_t) = nv {
10115                let mut k2 = Value::Nil;
10116                while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10117                    k2 = nk2;
10118                    if matches(nv2)
10119                        && let Value::Str(inner) = nk2
10120                    {
10121                        let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10122                        return Some(if outer == "_G" {
10123                            inner
10124                        } else {
10125                            format!("{outer}.{inner}")
10126                        });
10127                    }
10128                }
10129            }
10130        }
10131        None
10132    }
10133
10134    /// Name and namewhat of the native currently running on behalf of the top
10135    /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10136    /// `luaL_argerror` rewrite a method call's self-argument error.
10137    pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10138        let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10139        let p = &caller.closure.proto;
10140        let call_pc = (caller.pc as usize).checked_sub(1)?;
10141        let instr = *p.code.get(call_pc)?;
10142        match instr.op() {
10143            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10144            _ => None,
10145        }
10146    }
10147
10148    pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10149        let f = self.frames[fi].lua().expect("Lua frame");
10150        let proto = f.closure.proto;
10151        let pc = (f.pc as usize)
10152            .saturating_sub(1)
10153            .min(proto.lines.len().saturating_sub(1));
10154        let line = proto.lines.get(pc).copied().unwrap_or(0);
10155        // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10156        // (independent of any later write to a materialized vararg table's `n`).
10157        // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10158        // any nonzero `tailcalls` count flips it true.
10159        (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10160    }
10161
10162    /// Read an upvalue cell of a closure (debug.getupvalue).
10163    pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10164        match cl.upvals()[idx].state() {
10165            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10166            UpvalState::Closed(v) => v,
10167        }
10168    }
10169
10170    /// Write an upvalue cell of a closure (debug.setupvalue).
10171    pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10172        let uv = cl.upvals()[idx];
10173        match uv.state() {
10174            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10175            UpvalState::Closed(_) => {
10176                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10177                unsafe { uv.as_mut() }.set_closed(v);
10178                self.heap
10179                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10180            }
10181        }
10182    }
10183
10184    /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10185    /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10186    /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10187    /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10188    /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10189    /// else `"function <src:line_defined>"` for an anonymous Lua function.
10190    /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10191    /// Walks the coroutine's saved frames and prepends a synthetic C-level
10192    /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10193    /// (its `resume_at` marker is set). `level` skips entries from the top
10194    /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10195    /// frame; etc.). db.lua :764-:768 sample several levels.
10196    pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10197        use crate::runtime::CoroStatus;
10198        const LEVELS1: usize = 10;
10199        const LEVELS2: usize = 11;
10200        #[derive(Clone, Copy)]
10201        enum VFrame<'a> {
10202            Lua(&'a crate::runtime::function::Frame),
10203            CPcall,
10204            CXpcall,
10205            CYield,
10206            /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10207            /// call collapsed into the next Lua frame down the chain.
10208            Tail,
10209        }
10210        let v51 = self.version <= LuaVersion::Lua51;
10211        let mut visible: Vec<VFrame<'_>> = Vec::new();
10212        // PUC's level 0 entry on a suspended coroutine is the C call where it
10213        // paused — `coroutine.yield` for a yielded thread.
10214        if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10215            visible.push(VFrame::CYield);
10216        }
10217        for cf in co.frames.iter().rev() {
10218            match cf {
10219                CallFrame::Lua(f) => {
10220                    visible.push(VFrame::Lua(f));
10221                    if v51 {
10222                        for _ in 0..f.tailcalls {
10223                            visible.push(VFrame::Tail);
10224                        }
10225                    }
10226                }
10227                CallFrame::Cont(nc) => match nc.kind {
10228                    ContKind::Pcall => visible.push(VFrame::CPcall),
10229                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10230                    _ => {}
10231                },
10232            }
10233        }
10234        if level < 0 {
10235            level = 0;
10236        }
10237        if (level as usize) >= visible.len() {
10238            return Vec::new();
10239        }
10240        let visible = &visible[level as usize..];
10241        let total = visible.len();
10242        let mut out = Vec::new();
10243        // To name a Lua frame, PUC consults the caller's OP_CALL via
10244        // getobjname: find the index `fi` of the current frame in co.frames,
10245        // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10246        let coro_frame_name = |frames: &[CallFrame],
10247                               target: &crate::runtime::function::Frame|
10248         -> Option<(&'static str, String)> {
10249            let fi = frames
10250                .iter()
10251                .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10252            if fi == 0 || target.from_c {
10253                return None;
10254            }
10255            let caller = frames[fi - 1].lua()?;
10256            let p = &caller.closure.proto;
10257            let call_pc = (caller.pc as usize).checked_sub(1)?;
10258            let instr = *p.code.get(call_pc)?;
10259            match instr.op() {
10260                Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10261                Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10262                _ => None,
10263            }
10264        };
10265        let frames = &co.frames;
10266        let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10267            VFrame::Lua(f) => {
10268                let proto = f.closure.proto;
10269                let src = chunk_display_name(proto.source.as_ptr());
10270                let pc = (f.pc as usize)
10271                    .saturating_sub(1)
10272                    .min(proto.lines.len().saturating_sub(1));
10273                let line = proto.lines.get(pc).copied().unwrap_or(0);
10274                out.extend_from_slice(b"\n\t");
10275                out.extend_from_slice(src);
10276                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10277                if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10278                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10279                } else if proto.line_defined == 0 {
10280                    out.extend_from_slice(b"main chunk");
10281                } else {
10282                    out.extend_from_slice(
10283                        format!(
10284                            "function <{}:{}>",
10285                            String::from_utf8_lossy(src),
10286                            proto.line_defined
10287                        )
10288                        .as_bytes(),
10289                    );
10290                }
10291            }
10292            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10293            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10294            VFrame::CYield => {
10295                // PUC `pushglobalfuncname` reports `yield` as
10296                // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10297                // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10298                // `'yield'` (5.5 :841).
10299                let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10300                if qualified {
10301                    out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10302                } else {
10303                    out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10304                }
10305            }
10306            VFrame::Tail => {
10307                // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10308                // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10309                // :403 asserts these appear once per collapsed tail call.
10310                out.extend_from_slice(b"\n\t(...tail calls...)");
10311            }
10312        };
10313        if total <= LEVELS1 + LEVELS2 {
10314            for &v in visible {
10315                emit(&mut out, v);
10316            }
10317        } else {
10318            for &v in &visible[..LEVELS1] {
10319                emit(&mut out, v);
10320            }
10321            let skip = total - LEVELS1 - LEVELS2;
10322            out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10323            for &v in &visible[total - LEVELS2..] {
10324                emit(&mut out, v);
10325            }
10326        }
10327        out
10328    }
10329
10330    pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10331        // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10332        // (11) bottom frames; if there are more, the middle is collapsed into
10333        // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10334        // overflow traceback would balloon to tens of megabytes (errors.lua's
10335        // stack-overflow test ran string.gmatch over the resulting buffer).
10336        const LEVELS1: usize = 10;
10337        const LEVELS2: usize = 11;
10338        // Collect visible frames in top-down order (deepest first). Both Lua
10339        // activations and pcall/xpcall continuations (which stand in for a
10340        // C-level pcall on the stack) are visible; PUC's traceback enumerates
10341        // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10342        #[derive(Clone, Copy)]
10343        enum VFrame {
10344            Lua(usize),
10345            CPcall,
10346            CXpcall,
10347        }
10348        let mut visible: Vec<VFrame> = Vec::new();
10349        for (fi, cf) in self.frames.iter().enumerate().rev() {
10350            match cf {
10351                CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10352                CallFrame::Cont(nc) => match nc.kind {
10353                    ContKind::Pcall => visible.push(VFrame::CPcall),
10354                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10355                    _ => {}
10356                },
10357            }
10358        }
10359        // PUC `luaL_traceback` starts enumerating at the given `level` (in
10360        // terms of L1's CallInfo chain). For the running-thread case the C
10361        // frame for debug.traceback itself is level 0 and luna's `visible`
10362        // doesn't include it — so level=1 (PUC default) means "emit from the
10363        // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10364        // the top. level<=0 emits nothing extra here (d_traceback handles the
10365        // "[C]: in function 'traceback'" prefix for level==0 separately).
10366        let skip = (level - 1).max(0) as usize;
10367        if skip >= visible.len() {
10368            return Vec::new();
10369        }
10370        let visible = &visible[skip..];
10371        let total = visible.len();
10372        let mut out = Vec::new();
10373        let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10374            VFrame::Lua(fi) => {
10375                let f = this.frames[fi].lua().expect("Lua frame");
10376                let proto = f.closure.proto;
10377                let src = chunk_display_name(proto.source.as_ptr());
10378                let pc = (f.pc as usize)
10379                    .saturating_sub(1)
10380                    .min(proto.lines.len().saturating_sub(1));
10381                let line = proto.lines.get(pc).copied().unwrap_or(0);
10382                out.extend_from_slice(b"\n\t");
10383                out.extend_from_slice(src);
10384                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10385                if let Some((namewhat, name)) = this.frame_name(fi) {
10386                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10387                } else if proto.line_defined == 0 {
10388                    out.extend_from_slice(b"main chunk");
10389                } else {
10390                    out.extend_from_slice(
10391                        format!(
10392                            "function <{}:{}>",
10393                            String::from_utf8_lossy(src),
10394                            proto.line_defined
10395                        )
10396                        .as_bytes(),
10397                    );
10398                }
10399            }
10400            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10401            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10402        };
10403        if total <= LEVELS1 + LEVELS2 {
10404            for &v in visible {
10405                emit_frame(&mut out, v, self);
10406            }
10407        } else {
10408            for &v in &visible[..LEVELS1] {
10409                emit_frame(&mut out, v, self);
10410            }
10411            let dropped = total - LEVELS1 - LEVELS2;
10412            out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10413            for &v in &visible[total - LEVELS2..] {
10414                emit_frame(&mut out, v, self);
10415            }
10416        }
10417        out
10418    }
10419}
10420
10421// ────────────────────────────────────────────────────────────────────
10422// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10423//
10424// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10425// trace-meta section after `vm.load`, resolves each entry's
10426// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10427// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10428// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10429// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10430// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10431// mcode without further plumbing — same code path the runtime JIT
10432// uses.
10433//
10434// Why a separate impl block: keeps the AOT API surface (one fn) easy
10435// to locate when grep'ing for `install_aot_trace`, without dragging
10436// the 8500-line `impl Vm` block above.
10437// ────────────────────────────────────────────────────────────────────
10438
10439impl Vm {
10440    /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10441    /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10442    /// fires it at the trace's `head_pc`. This is the runtime install
10443    /// API the deploy-side `luna-runtime-helpers` resolver calls once
10444    /// per AOT-emitted trace meta entry, after looking up `proto` by
10445    /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10446    ///
10447    /// # What this does
10448    ///
10449    /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10450    /// The trace's `entry` fn ptr must already point at runnable
10451    /// machine code (the AOT linker resolved the symbol at link time;
10452    /// the deploy resolver passes the address verbatim).
10453    ///
10454    /// # What this does NOT do
10455    ///
10456    /// - **No deduplication.** Calling twice with the same `head_pc`
10457    ///   pushes two entries; the dispatcher's `find` will pick the
10458    ///   first match. The deploy resolver is responsible for not
10459    ///   double-installing.
10460    /// - **No invalidation of the runtime JIT cache.** If the runtime
10461    ///   JIT later records + compiles a trace for the same
10462    ///   `(proto, head_pc)`, both coexist on `proto.traces` and the
10463    ///   dispatcher's `find` picks whichever appears first. AOT
10464    ///   traces install before any runtime recording is possible
10465    ///   (resolver runs before `vm.load` returns its first closure),
10466    ///   so AOT traces win the race for the same site.
10467    /// - **No coverage gating.** AOT traces are trusted by
10468    ///   construction — they were validated at compile time. Setting
10469    ///   `dispatchable: false` on the input would silently disable
10470    ///   dispatch; the caller controls that flag.
10471    ///
10472    /// # Safety / soundness
10473    ///
10474    /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10475    /// machine code). Soundness contract:
10476    ///
10477    /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10478    ///   In the AOT-binary deploy shape this is trivially satisfied —
10479    ///   the fn lives in the binary's `.text`.
10480    /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10481    ///   what the trace's IR actually compiled against; the dispatcher
10482    ///   uses them to marshal `reg_state` in and out without further
10483    ///   validation. A mismatch corrupts vm.stack.
10484    ///
10485    /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10486    /// invariants hold; this fn is a plain push — no validation that
10487    /// would slow the dispatcher's hot path either.
10488    pub fn install_aot_trace(
10489        &mut self,
10490        proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10491        trace: crate::jit::trace::CompiledTrace,
10492    ) {
10493        let _ = self; // resolver passes &mut Vm for symmetry with future
10494        // pending-install + hash-walk variants; nothing on `self` to
10495        // mutate today because the install target lives on the Proto.
10496        proto.traces.borrow_mut().push(TArc::new(trace));
10497    }
10498
10499    /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10500    /// reachable from `root` and return `(proto, stable_hash)` pairs
10501    /// for every Proto found. Used by the deploy-side resolver to
10502    /// match AOT-emitted `proto_hash` keys against the freshly
10503    /// `undump`'d chunk's protos.
10504    ///
10505    /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10506    /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10507    /// multiple nested closures (rare; the cache field would catch
10508    /// the closure-side dedup, not the Proto side) is reported once.
10509    ///
10510    /// # Why on `&Vm` and not a free fn
10511    ///
10512    /// Keeps the AOT install API discoverable on the Vm surface —
10513    /// `vm.collect_proto_hashes(root)` reads naturally next to
10514    /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10515    /// any Vm field, so `&self` (read-only) is enough.
10516    pub fn collect_proto_hashes(
10517        &self,
10518        root: crate::runtime::Gc<crate::runtime::function::Proto>,
10519    ) -> Vec<(
10520        crate::runtime::Gc<crate::runtime::function::Proto>,
10521        [u8; 16],
10522    )> {
10523        let _ = self;
10524        let mut out = Vec::new();
10525        let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10526            std::collections::HashSet::new();
10527        let mut queue: std::collections::VecDeque<
10528            crate::runtime::Gc<crate::runtime::function::Proto>,
10529        > = std::collections::VecDeque::new();
10530        queue.push_back(root);
10531        while let Some(p) = queue.pop_front() {
10532            let key = p.as_ptr() as *const _;
10533            if !seen.insert(key) {
10534                continue;
10535            }
10536            out.push((p, p.stable_hash()));
10537            for &child in p.protos.iter() {
10538                queue.push_back(child);
10539            }
10540        }
10541        out
10542    }
10543}