Skip to main content

luna_core/vm/
exec.rs

1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16    AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17    MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48    /// The GC heap owned by this VM. Embedders normally interact via the
49    /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50    /// the heap directly.
51    pub heap: Heap,
52    stack: Vec<Value>,
53    frames: Vec<CallFrame>,
54    /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55    /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56    /// helpers (debug-asserted on use). NOT consumed by readers yet;
57    /// week 1 is pure scaffold. Week 2-N migrations replace readers
58    /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59    /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60    frames_top: u32,
61    /// open upvalues, sorted ascending by stack slot
62    open_upvals: Vec<(u32, Gc<Upvalue>)>,
63    /// to-be-closed slots, ascending
64    tbc: Vec<u32>,
65    /// logical stack top for multi-result sequences
66    pub(crate) top: u32,
67    globals: Gc<Table>,
68    /// shared metatable for all strings (populated by the string lib, P04)
69    /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70    /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71    /// own. Settable via debug.setmetatable.
72    type_mt: [Option<Gc<Table>>; 5],
73    /// pre-interned metamethod event names, indexed by `Mm`
74    mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75    /// native↔Lua nesting depth (PUC C-stack guard analogue)
76    c_depth: u32,
77    /// number of live pcall/xpcall continuation frames on the running thread
78    /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79    /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80    /// with the coroutine context, since continuations survive a yield.
81    pcall_depth: u32,
82    /// number of non-yieldable C calls in flight on the running thread (PUC's
83    /// `L->nny`). A library callback that runs via synchronous Rust recursion
84    /// (sort comparator, gsub replacement) cannot be continued across a yield,
85    /// so it bumps this for its duration; `coroutine.yield` inside hits the
86    /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87    /// never cross such a call), so it needs no per-thread save/restore.
88    nny: u32,
89    /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90    /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91    /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92    /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93    /// loop)` then matches. PUC tracks this via the soft-cap window
94    /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95    /// scope explicitly.
96    msgh_depth: u32,
97    /// set by a coroutine closing itself (`coroutine.close()` on the running
98    /// thread): the to-be-closed handlers have already run; the thread must now
99    /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100    /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101    /// unwind, so a protecting pcall cannot catch it) the termination.
102    terminating: Option<Option<Value>>,
103    /// xoshiro256** state (math.random)
104    rng: [u64; 4],
105    /// VM creation time (os.clock)
106    started: std::time::Instant,
107    version: LuaVersion,
108    /// error object being threaded through a chain of __close handlers; a GC
109    /// root for the duration (a handler may trigger collection)
110    closing_err: Option<Value>,
111    /// the coroutine whose context is currently live in the fields above;
112    /// `None` while the main thread runs (P05)
113    current: Option<Gc<crate::runtime::Coro>>,
114    /// the main thread's saved execution context while a coroutine runs
115    main_ctx: Option<SavedCtx>,
116    /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117    /// values plus the slot/result-count needed to finish the yielding call on
118    /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119    yielding: Option<(Vec<Value>, u32, i32)>,
120    /// results expected by the in-flight native call (so `yield` knows how many
121    /// values its call site wants when it suspends)
122    native_nresults: i32,
123    /// identity object for the main thread, returned by `coroutine.running`
124    /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125    main_coro: Option<Gc<Coro>>,
126    /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127    /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128    /// switches report the previous one, as PUC does.
129    gc_mode: &'static str,
130    /// the live-register boundary of the running thread for GC rooting (PUC's
131    /// `L->top`): set precisely at each GC safe point so freed temporary
132    /// registers above it are not rooted. Without this the collector roots the
133    /// whole stack window, pinning weak-table values stranded in stale temps
134    /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135    pub(crate) gc_top: u32,
136    /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137    /// is still stop-the-world, so these are stored/returned for API fidelity
138    /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139    /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140    gc_pause: i64,
141    gc_stepmul: i64,
142    gc_stepsize: i64,
143    /// true while `__gc` finalizers are being run, so a finalizer that calls
144    /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145    /// `collectgarbage` yields fail).
146    gc_finalizing: bool,
147    /// C ABI scratch (`capi` module): the host-visible value stack that C
148    /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149    /// Kept here (instead of in a separate `LuaState` wrapper) so the
150    /// trampoline that bridges to a `LuaCFunction` can safely cast the
151    /// Vm pointer it already holds to the public `*mut LuaState` type
152    /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153    pub capi_stack: Vec<crate::runtime::Value>,
154    /// Pinned CString backing the pointer last returned by `lua_tostring`;
155    /// valid until the next `lua_tostring` on the same Vm.
156    pub capi_cstr_pin: Option<std::ffi::CString>,
157    /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158    /// concatenate across continuation calls until a non-`tocont` call
159    /// flushes; the default warnf recognises `@on`/`@off` control messages
160    /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161    /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162    /// keep the older raise semantics).
163    pub(crate) warn_state: WarnState,
164    pub(crate) warn_buf: Vec<u8>,
165    /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166    /// loop decrements once per dispatch turn. When it hits zero the loop
167    /// raises a catchable "instruction budget exceeded" error so the embedder
168    /// can yield control back to its caller (short-script eval, game
169    /// frame budgets). `None` = unbounded; reset on each call via
170    /// `set_instr_budget`.
171    pub(crate) instr_budget: Option<i64>,
172    // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173    // `self.jit` below + `crate::vm::jit_state` for field docs.
174    // (Was: jit_enabled here.)
175    // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176    // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177    // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178    // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179    // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180    // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181    // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182    // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183    // materialize_emit,closure_emit}_count — all moved to JitState.
184    /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185    /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186    /// precompiled chunks (which bypass the parser's depth / opcode
187    /// limits). When `false`, the loader rejects any source whose first
188    /// byte is the bytecode signature `\27` ("`\27Lua`").
189    pub(crate) bytecode_loading: bool,
190    /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191    /// a strictly larger trust surface than luna's own dump format
192    /// (third-party toolchain bugs, malformed chunks, unknown opcode
193    /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194    /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195    /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196    /// Embedder toggles via `set_puc_bytecode_loading`.
197    pub(crate) puc_bytecode_loading: bool,
198    /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199    /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200    /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201    /// source exceeds this, the loader returns the PUC-shaped
202    /// `not enough memory` error before the host allocator is asked to
203    /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204    /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205    /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206    /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207    /// Embedders that genuinely need to load > 256 MiB sources widen the
208    /// cap via [`Vm::set_loader_input_budget`].
209    pub(crate) loader_input_budget: usize,
210    /// In-process log of fully-emitted warnings (each entry = one flushed
211    /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212    /// tests assert what was warned without scraping stderr.
213    pub(crate) warn_log: Vec<Vec<u8>>,
214    /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215    /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216    /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217    /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218    /// shape stub for db.lua :328; if other registry-keyed APIs land later
219    /// they can share this table.
220    pub(crate) registry: Option<Gc<Table>>,
221    /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222    /// registry entry); attached to every file userdata the io library makes
223    pub(crate) file_mt: Option<Gc<Table>>,
224    /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225    pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226    pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227    /// the running thread's debug hook state (`debug.sethook`); per-thread,
228    /// swapped with the execution context on a coroutine resume/yield
229    pub(crate) hook: HookState,
230    /// true while the hook itself runs, so its own execution fires no events
231    /// (PUC clears the mask for the duration)
232    pub(crate) in_hook: bool,
233    /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234    /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235    /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236    /// accumulate the count instead of capping at 1.
237    pub(crate) pending_tailcalls: u32,
238    /// Name of the C native that just propagated an error (captured before
239    /// the native is popped from `running_natives`). Lets a dying coroutine
240    /// preserve `[C]: in function '<name>'` at the top of its traceback
241    /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242    /// luna's native frames are off-stack so we stash the name explicitly.
243    pub(crate) errored_native: Option<String>,
244    /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245    /// (relative to the activation's func slot) and the number transferred.
246    /// Set just before firing a call/return hook, read by `getinfo("r")`.
247    pub(crate) hook_ftransfer: u16,
248    pub(crate) hook_ntransfer: u16,
249    /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250    /// pushed by `push_frame`; `close_slots` sets this before calling a
251    /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252    /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253    pending_tm: Option<&'static str>,
254    /// `true` when the next `push_frame` is the user hook function itself,
255    /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256    /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257    pending_is_hook: bool,
258    /// traceback snapshot taken at the error point (the first `unwind` entry
259    /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260    /// the failed frames are popped — can still see the error point's stack
261    /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262    /// stack intact; we approximate by snapshotting the string and letting
263    /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264    /// `call_value` entry (`public_call_depth == 0`).
265    pub(crate) error_traceback: Option<Vec<u8>>,
266    /// nesting depth of public `call_value` entries (host vs. internal). The
267    /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268    /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269    public_call_depth: u32,
270    /// stack of native (`Value::Native`) closures currently running on the
271    /// Rust call stack. `begin_call` pushes the closure before invoking
272    /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273    /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274    /// caller is C, not Lua) and qualify the running function's name via
275    /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276    pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277    /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278    /// the native's argument-window head and width, so `debug.getlocal`
279    /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280    pub(crate) running_native_slots: Vec<(u32, u32)>,
281    // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282    // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283    // trace_compiler — all moved to JitState. See `jit` below.
284    /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285    /// when `chunk_compiler` / `trace_compiler` are
286    /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287    ///
288    /// `#[doc(hidden)] pub` so the `luna` crate's
289    /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290    /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291    /// field). Not part of the embedder-facing API surface.
292    #[doc(hidden)]
293    pub jit: crate::vm::jit_state::JitState,
294
295    /// B12 host roots — append-only `Vec<Value>` traced as an extra
296    /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297    /// `LuaRoot`) hold indices into this vector so the underlying
298    /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299    ///
300    /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301    /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302    /// the trade-offs between `Drop` plumbing and append-only memory
303    /// growth have a richer ergonomics envelope to live in.
304    pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305    /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306    /// back if non-empty, else extends `host_roots`. Generation
307    /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308    pub(crate) host_roots_free: Vec<u32>,
309
310    /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311    /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312    /// callback). Each entry is one in-flight working buffer; `gc_roots`
313    /// extends with every contained `Value` so a `collectgarbage()`
314    /// inside the comparator cannot free strings/tables snapshotted
315    /// here. Nested sorts push a new buffer on entry, pop on exit
316    /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317    /// regression).
318    pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320    /// v1.3 Phase ML — MacroLua compile-time macro registry.
321    /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322    /// `@if` / `@gensym`) at construction time when `version ==
323    /// LuaVersion::MacroLua`; embedders register custom macros via
324    /// [`Vm::define_macro`]. The expander runs once per `load()` call
325    /// between lexing and parsing (only when `is_macro_lua()`).
326    pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328    /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329    /// by `TypeId::of::<T>()` for embedder types implementing
330    /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331    /// [`Vm::register_userdata`]; metatables are pinned via
332    /// [`Vm::pin_host`] at registration time so the entry's
333    /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334    pub(crate) userdata_metatables:
335        std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337    /// B6 — classification of the most recent error raised on this Vm.
338    /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339    /// at well-known sites (syntax errors, instr-budget trips, native
340    /// callback errors, type errors).
341    pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343    /// B6 — `(source_name, line)` of the most recent error. Set by the
344    /// dispatcher / lexer / parser; cleared when a new call_value
345    /// enters cleanly.
346    pub(crate) last_error_source: Option<(String, u32)>,
347
348    /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349    /// the dispatcher hot loop yields cooperatively (sets
350    /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351    /// to `EvalFuture::poll`) instead of returning a real
352    /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353    /// for the duration of the future; restored to `false` on
354    /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355    /// leave it `false` so v1.0 behavior is preserved exactly.
356    pub(crate) async_mode: bool,
357
358    /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359    /// before driving a slice. The dispatcher itself does not call it
360    /// (the future's poll loop does `wake_by_ref` after observing
361    /// `BudgetExhausted`), but storing the waker keeps the door open
362    /// for Stage 2 async natives to wake the host directly from a
363    /// helper future.
364    pub(crate) async_waker: Option<std::task::Waker>,
365
366    /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367    /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368    /// Default 10_000 (RFC §D5). Tunable via
369    /// [`Vm::set_async_slice`].
370    pub(crate) async_slice_size: i64,
371
372    /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373    /// budget exhaustion fires; checked by `exec_with` (so the
374    /// sentinel propagates without `unwind` running, mirroring
375    /// `yielding.is_some()`) and by `call_value_impl` (so the call
376    /// frames survive for the next poll). Cleared by `drive_one`
377    /// after translating it to `DispatchOutcome::BudgetExhausted`.
378    pub(crate) host_yield_pending: bool,
379
380    /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381    /// when an async-marked [`NativeClosure`] is invoked under
382    /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383    /// mechanism as `host_yield_pending` — see `exec_with` +
384    /// `call_value_impl`), stashes the in-flight future +
385    /// post-completion context here, and surfaces them to
386    /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387    /// once the future is moved out into a
388    /// `DispatchOutcome::AsyncNativeAwaiting`.
389    pub(crate) pending_async_native_fut:
390        Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392    /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393    /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394    /// commit the future's eventual `Ok(nret)` back into the calling
395    /// frame's expected result slots. Recorded by the dispatcher;
396    /// consumed by [`Vm::commit_async_native_result`] after the
397    /// future resolves.
398    pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411    pub func_slot: u32,
412    /// Recorded for parity with the sync native-call path's
413    /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414    /// hook firing + traceback shaping. Not yet read in Stage 2.
415    #[allow(dead_code)]
416    pub nargs: u32,
417    pub nresults: i32,
418    /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419    /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420    /// unread today; carried so an Stage 3 audit can confirm the
421    /// pre-suspend root window matches the post-resume one.
422    #[allow(dead_code)]
423    pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430    /// the hook function (`None` when no hook is installed)
431    pub func: Option<Value>,
432    /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433    /// (Rust first); both can be installed simultaneously, but most
434    /// embedders pick one.
435    pub rust_func: Option<RustDebugHook>,
436    /// LUA_MASKCALL — fire on function entry
437    pub call: bool,
438    /// LUA_MASKRET — fire on function return
439    pub ret: bool,
440    /// LUA_MASKLINE — fire on source-line change
441    pub line: bool,
442    /// LUA_MASKCOUNT — fire every `count_base` instructions
443    pub count: bool,
444    /// instruction count between count events (PUC basehookcount)
445    pub count_base: i64,
446    /// instructions left until the next count event (PUC hookcount)
447    pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459    /// Function entry (`hook_call` analogue).
460    Call,
461    /// Function return (`hook_return` analogue).
462    Return,
463    /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464    TailCall,
465    /// Source-line change (the `u32` is the 1-based line number).
466    Line(u32),
467    /// Instruction count event (fires every `count_base` instructions).
468    Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483    stack: Vec<Value>,
484    frames: Vec<CallFrame>,
485    open_upvals: Vec<(u32, Gc<Upvalue>)>,
486    tbc: Vec<u32>,
487    top: u32,
488    pcall_depth: u32,
489    hook: HookState,
490    /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491    /// the rest of the suspended state so each thread can keep its own
492    /// `setfenv(0, env)` rewire without the swap leaking into another
493    /// thread (5.1 closure.lua :177).
494    globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499    /// caught by a pcall/xpcall continuation; resume running its caller
500    Caught,
501    /// caught by a continuation that was the entry-level activation; these are
502    /// the call's (wrapped) results
503    CaughtReturn(Vec<Value>),
504    /// no protecting continuation up to `entry_depth`; propagate the error
505    Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511    Lua(usize),
512    /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513    /// used to name the native via its invoking call instruction.
514    C(usize),
515    /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516    /// caller's activation, so `debug.getinfo(level)` at this slot returns
517    /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518    /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519    /// both shapes). The index points at the *tail-called* frame whose
520    /// `is_tail` flag induced this synthetic level.
521    Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528    /// index → the looked-up value; newindex → done (raw set performed);
529    /// comparison → the boolean result already known
530    Done(Value),
531    /// a metamethod to call; `recv` is the chain element it was found on (the
532    /// extra args — key / value — are supplied by the caller)
533    Mm { func: Value, recv: Value },
534    /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535    /// carries `__le` — `op_compare` swaps the args and negates the result.
536    /// Lives separate from `Mm` so the synth path can stay yieldable without
537    /// every other Mm caller learning a swap flag they would never set.
538    CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545    Index,
546    NewIndex,
547    Call,
548    ToString,
549    Metatable,
550    Name,
551    Eq,
552    Lt,
553    Le,
554    Concat,
555    Len,
556    Add,
557    Sub,
558    Mul,
559    Div,
560    Mod,
561    Pow,
562    IDiv,
563    BAnd,
564    BOr,
565    BXor,
566    Shl,
567    Shr,
568    Unm,
569    BNot,
570    Close,
571    Gc,
572    Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576    "__index",
577    "__newindex",
578    "__call",
579    "__tostring",
580    "__metatable",
581    "__name",
582    "__eq",
583    "__lt",
584    "__le",
585    "__concat",
586    "__len",
587    "__add",
588    "__sub",
589    "__mul",
590    "__div",
591    "__mod",
592    "__pow",
593    "__idiv",
594    "__band",
595    "__bor",
596    "__bxor",
597    "__shl",
598    "__shr",
599    "__unm",
600    "__bnot",
601    "__close",
602    "__gc",
603    "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614    if version <= LuaVersion::Lua53 {
615        format!("__{tm}")
616    } else if tm == "gc" {
617        "__gc".to_string()
618    } else {
619        tm.to_string()
620    }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626    use crate::vm::isa::Op;
627    Some(match op {
628        Op::Add => "add",
629        Op::Sub => "sub",
630        Op::Mul => "mul",
631        Op::Div => "div",
632        Op::Mod => "mod",
633        Op::Pow => "pow",
634        Op::IDiv => "idiv",
635        Op::BAnd => "band",
636        Op::BOr => "bor",
637        Op::BXor => "bxor",
638        Op::Shl => "shl",
639        Op::Shr => "shr",
640        Op::Unm => "unm",
641        Op::BNot => "bnot",
642        Op::Concat => "concat",
643        Op::Len => "len",
644        Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645        Op::SetField | Op::SetTable | Op::SetI => "newindex",
646        Op::Eq | Op::EqK => "eq",
647        Op::Lt => "lt",
648        Op::Le => "le",
649        _ => return None,
650    })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678    /// `warn` calls are silently dropped (default after `warn("@off")`).
679    Off,
680    /// `warn` calls are delivered to stderr (after `warn("@on")`).
681    On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689    if let Some(s) = payload.downcast_ref::<String>() {
690        return s.clone();
691    }
692    if let Some(s) = payload.downcast_ref::<&'static str>() {
693        return (*s).to_string();
694    }
695    "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702    /// Parse or compile failure.
703    Syntax(SyntaxError),
704    /// Runtime error raised during execution.
705    Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709    fn from(e: SyntaxError) -> Error {
710        Error::Syntax(e)
711    }
712}
713
714impl From<LuaError> for Error {
715    fn from(e: LuaError) -> Error {
716        Error::Runtime(e)
717    }
718}
719
720impl Drop for Vm {
721    fn drop(&mut self) {
722        // state close: run `__gc` for every still-registered finalizable before
723        // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724        // single pass — objects created by a closing finalizer are not
725        // re-finalized (they go to the heap's free list directly).
726        self.heap.queue_all_finalizers();
727        self.run_finalizers();
728    }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742    frames.push(cf);
743    // Shadow maintenance is debug-only: release builds skip the
744    // increment + assertion entirely. The shadow's purpose in Week 1
745    // is to VERIFY the assumed invariant (frames_top == frames.len())
746    // across all push/pop sites; once Week 2+ migrates readers to
747    // consume the shadow, release will run the increment unconditionally.
748    #[cfg(debug_assertions)]
749    {
750        *frames_top += 1;
751        debug_assert_eq!(
752            *frames_top as usize,
753            frames.len(),
754            "P17-D frames_top out of sync after push",
755        );
756    }
757    #[cfg(not(debug_assertions))]
758    let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763    let r = frames.pop();
764    #[cfg(debug_assertions)]
765    {
766        if r.is_some() {
767            *frames_top = frames_top.saturating_sub(1);
768        }
769        debug_assert_eq!(
770            *frames_top as usize,
771            frames.len(),
772            "P17-D frames_top out of sync after pop",
773        );
774    }
775    #[cfg(not(debug_assertions))]
776    let _ = frames_top;
777    r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786    static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787    *PROBE_ON.get_or_init(|| {
788        std::env::var("LUNA_AOT_PROBE")
789            .ok()
790            .filter(|v| !v.is_empty())
791            .is_some()
792    })
793}
794
795impl Vm {
796    /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797    /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798    /// the Vec replacement to keep the shadow valid.
799    #[inline(always)]
800    fn frames_resync(&mut self) {
801        // Debug-only Week 1 — see `frames_push_sync` comment.
802        #[cfg(debug_assertions)]
803        {
804            self.frames_top = self.frames.len() as u32;
805        }
806    }
807
808    // ====================================================================
809    // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810    //
811    // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812    // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813    // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814    // sites to consume them. Phase 4 removes Vec<CallFrame>.
815    //
816    // Preconditions (debug-asserted):
817    // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818    // - self.stack.len() > base + max_stack (caller has grown stack)
819    // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820    // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821    //
822    // No release-build cost when unused (LTO strips dead methods).
823    // ====================================================================
824
825    /// Write a Lua frame's closure pointer into `stack[base-2]`.
826    /// The caller must ensure `base >= 2` and the slot is within the
827    /// stack's allocated range.
828    #[inline]
829    #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830    fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831        debug_assert!(
832            base >= 2,
833            "frame closure slot needs base >= 2; got {}",
834            base
835        );
836        let idx = (base - 2) as usize;
837        debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838        self.stack[idx] = Value::Closure(cl);
839    }
840
841    /// Read a Lua frame's closure pointer from `stack[base-2]`.
842    /// Returns `None` if the slot doesn't hold a closure (caller is
843    /// expected to treat that as a corrupt frame).
844    ///
845    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846    /// to avoid the enum-match cost on the hot path. Tag check via
847    /// 1-byte load + branch + `as_closure_unchecked` payload load.
848    #[inline]
849    #[allow(dead_code)]
850    fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851        debug_assert!(base >= 2);
852        let v = self.stack.get((base - 2) as usize)?;
853        if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854            // SAFETY: tag byte just verified == CLOSURE.
855            Some(unsafe { v.as_closure_unchecked() })
856        } else {
857            None
858        }
859    }
860
861    /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862    /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863    /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864    /// cleanly through the value stack without losing bits.
865    #[inline]
866    #[allow(dead_code)]
867    fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868        debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869        let idx = (base - 1) as usize;
870        debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871        self.stack[idx] = Value::Int(marker.to_raw());
872    }
873
874    /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875    /// `None` if the slot isn't a `Value::Int` (caller treats as a
876    /// corrupt frame); the kind tag itself may still be invalid, in
877    /// which case [`FrameMarker::kind`] returns `None` on the result.
878    ///
879    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880    /// for the tag check + `as_int_unchecked` for the payload load.
881    #[inline]
882    #[allow(dead_code)]
883    fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884        debug_assert!(base >= 1);
885        let v = self.stack.get((base - 1) as usize)?;
886        if v.tag_byte() == crate::runtime::value::tag::INT {
887            // SAFETY: tag byte just verified == INT.
888            Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889                unsafe { v.as_int_unchecked() },
890            ))
891        } else {
892            None
893        }
894    }
895
896    /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897    /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898    /// caller is responsible for the rest of the bring-up.
899    fn new_inner(version: LuaVersion) -> Vm {
900        let mut heap = Heap::new();
901        // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902        // values strongly. gc.lua's "weak tables" section relies on that.
903        heap.no_ephemeron = version <= LuaVersion::Lua51;
904        // PUC 5.3 needs two GC cycles to finalize a table caught in a
905        // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906        // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907        heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908        let globals = heap.new_table();
909        let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911        Vm {
912            heap,
913            stack: Vec::new(),
914            frames: Vec::new(),
915            frames_top: 0,
916            open_upvals: Vec::new(),
917            tbc: Vec::new(),
918            top: 0,
919            globals,
920            type_mt: [None; 5],
921            mm_names,
922            c_depth: 0,
923            pcall_depth: 0,
924            nny: 0,
925            msgh_depth: 0,
926            terminating: None,
927            rng: [0; 4],
928            started: std::time::Instant::now(),
929            version,
930            closing_err: None,
931            current: None,
932            main_ctx: None,
933            yielding: None,
934            native_nresults: -1,
935            main_coro: None,
936            // PUC 5.4+ boots in GENERATIONAL mode (the first
937            // `collectgarbage("generational")` reports "generational"
938            // as the previous mode — v2.14 dialect fixture 5.4/549;
939            // 5.5 behaves the same, probed against lua5.5). luna's
940            // collector is a single incremental engine either way;
941            // this field is the MODE REPORT the stdlib exposes.
942            gc_mode: if version >= crate::version::LuaVersion::Lua54 {
943                "generational"
944            } else {
945                "incremental"
946            },
947            gc_top: 0,
948            gc_pause: 200,
949            gc_stepmul: 100,
950            gc_stepsize: 13,
951            gc_finalizing: false,
952            capi_stack: Vec::new(),
953            capi_cstr_pin: None,
954            warn_state: WarnState::Off,
955            warn_buf: Vec::new(),
956            warn_log: Vec::new(),
957            instr_budget: None,
958            bytecode_loading: true,
959            puc_bytecode_loading: false,
960            loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
961            registry: None,
962            file_mt: None,
963            io_input: None,
964            io_output: None,
965            hook: HookState::default(),
966            in_hook: false,
967            pending_tailcalls: 0,
968            errored_native: None,
969            hook_ftransfer: 0,
970            hook_ntransfer: 0,
971            pending_tm: None,
972            pending_is_hook: false,
973            error_traceback: None,
974            public_call_depth: 0,
975            running_natives: Vec::new(),
976            running_native_slots: Vec::new(),
977            // v1.1 A2 — JIT-specific state factored into `JitState`
978            // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
979            // `install_jit_backend` / `luaL_newstate` swap in
980            // `CraneliftBackend` for callers that want JIT acceleration.
981            jit: crate::vm::jit_state::JitState::with_null_backend(),
982            // v1.1 B12 — host roots ticket pool for the `Lua` facade.
983            host_roots: Vec::new(),
984            // v1.3 Phase ML — MacroLua registry. Pre-populated with
985            // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
986            // when this Vm is constructed under `LuaVersion::MacroLua`.
987            macro_registry: if version == LuaVersion::MacroLua {
988                crate::frontend::macro_expander::MacroRegistry::with_builtins()
989            } else {
990                crate::frontend::macro_expander::MacroRegistry::new()
991            },
992            host_roots_free: Vec::new(),
993            sort_scratch: Vec::new(),
994            // v1.2 Track B — LuaUserdata trait sugar's per-Vm
995            // metatable cache. Populated lazily by register_userdata.
996            userdata_metatables: std::collections::HashMap::new(),
997            // v1.1 B6 — error classification metadata. Defaults to
998            // Runtime; set at known sites (syntax / budget trip /
999            // native error / type error).
1000            last_error_kind: crate::vm::error::LuaErrorKind::default(),
1001            last_error_source: None,
1002            // v1.1 B10 Stage 1 — async embedder fields. Defaults
1003            // preserve sync behavior bit-for-bit (`async_mode = false`
1004            // means the budget hot loop errors out exactly as v1.0).
1005            async_mode: false,
1006            async_waker: None,
1007            async_slice_size: 10_000,
1008            host_yield_pending: false,
1009            // v1.1 B10 Stage 2 — pending async-native state. Empty by
1010            // default; populated only by the dispatcher when an
1011            // async-marked NativeClosure is invoked under async_mode.
1012            pending_async_native_fut: None,
1013            pending_async_native_ctx: None,
1014        }
1015    }
1016
1017    /// Build a fully-loaded Vm — the default for embedders that want PUC's
1018    /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1019    /// followed by `vm.open_all_libs()`.
1020    pub fn new(version: LuaVersion) -> Vm {
1021        let mut vm = Vm::new_minimal(version);
1022        vm.open_all_libs();
1023        vm
1024    }
1025
1026    /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1027    /// that want a sandbox (Redis-style scripts, in-game scripting with
1028    /// a curated API) call this and then `open_base` / `open_math` / etc.
1029    /// selectively. The Vm is otherwise fully initialized (main coroutine,
1030    /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1031    pub fn new_minimal(version: LuaVersion) -> Vm {
1032        let mut vm = Vm::new_inner(version);
1033        let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1034        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1035        unsafe { mc.as_mut() }.status = CoroStatus::Running;
1036        vm.main_coro = Some(mc);
1037        let (a, b) = vm.rng_auto_seed();
1038        vm.rng_seed(a as u64, b as u64);
1039        vm
1040    }
1041
1042    /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1043    /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1044    /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1045    /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1046    /// closures already populated `Proto.jit: JitProtoState::Compiled`
1047    /// does NOT evict those cached entries — call right after
1048    /// construction for a clean swap.
1049    ///
1050    /// Naming: `install_jit_backend` (not `install_default_jit`)
1051    /// because the "default" in luna-core is `NullJitBackend`; the
1052    /// "default JIT" lives in the `luna` crate.
1053    pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1054    where
1055        C: crate::jit::IntChunkCompiler + 'static,
1056        T: crate::jit::TraceCompiler + 'static,
1057    {
1058        self.jit.chunk_compiler = Box::new(chunk);
1059        self.jit.trace_compiler = Box::new(trace);
1060    }
1061
1062    /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1063    /// storage holder. Default is [`crate::jit::NullJitStorage`];
1064    /// the `luna_jit` crate's `install_default_jit` pairs this with
1065    /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1066    /// also install a fresh `CraneliftJitStorage`. Storage holds
1067    /// the per-`Vm` JIT cache + handle collections that used to be
1068    /// `thread_local!`s in `luna_jit::jit_backend`.
1069    ///
1070    /// Idempotency: re-installing storage on a Vm that already
1071    /// holds compiled-trace pointers WILL evict their owners (the
1072    /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1073    /// pages). Call right after construction for a clean swap.
1074    pub fn install_jit_storage<S>(&mut self, storage: S)
1075    where
1076        S: crate::jit::JitStorage + 'static,
1077    {
1078        self.jit.storage = Box::new(storage);
1079    }
1080
1081    /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1082    /// reports "skipped" so every closure stays on the interpreter
1083    /// path, and the trace recorder's compile attempt always returns
1084    /// `None`. Intended for tests that want to verify the trait
1085    /// boundary works in a JIT-free configuration, and for the future
1086    /// `luna-core` build path that ships without Cranelift.
1087    ///
1088    /// Calling this on a Vm whose closures already populated
1089    /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1090    /// cached entries — the dispatcher will still call into them. For
1091    /// a truly JIT-free run, call this immediately after construction.
1092    pub fn install_null_jit(&mut self) {
1093        self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1094        self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1095    }
1096
1097    /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1098    /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1099    /// time instead (`open_base`, `open_math`, `open_table`, …).
1100    pub fn open_all_libs(&mut self) {
1101        self.open_base();
1102        self.open_math();
1103        self.open_table();
1104        self.open_string();
1105        self.open_utf8();
1106        self.open_os_io();
1107        self.open_debug();
1108        self.open_coroutine();
1109        self.open_package();
1110        // PUC 5.2 introduced `bit32`; 5.3 retired it in the manual BUT
1111        // the stock 5.3 build ships -DLUA_COMPAT_5_2, which keeps the
1112        // library loaded. The diff ground truth is the default build
1113        // (v2.14 dialect fixture 5.3/535), so expose it under 5.2 AND
1114        // 5.3; 5.4 dropped the compat default for real.
1115        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
1116            self.open_bit32();
1117        }
1118    }
1119
1120    /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1121    /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1122    /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1123    /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1124    /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1125    /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1126    /// once per Vm.
1127    pub fn open_base(&mut self) {
1128        crate::vm::builtins::open_base(self);
1129    }
1130    /// Install the `math` standard library.
1131    pub fn open_math(&mut self) {
1132        crate::vm::lib_math::open_math(self);
1133    }
1134    /// Install the `table` standard library.
1135    pub fn open_table(&mut self) {
1136        crate::vm::lib_table::open_table(self);
1137    }
1138    /// Install the `string` standard library (and the shared string metatable).
1139    pub fn open_string(&mut self) {
1140        crate::vm::lib_string::open_string(self);
1141    }
1142    /// Install the `utf8` standard library (5.3+).
1143    pub fn open_utf8(&mut self) {
1144        crate::vm::lib_utf8::open_utf8(self);
1145    }
1146    /// `os` and `io` are merged because file userdata shares state with both
1147    /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1148    /// wraps `os.execute`'s shell).
1149    pub fn open_os_io(&mut self) {
1150        crate::vm::lib_os_io::open_os_io(self);
1151    }
1152    /// Install the `debug` standard library (introspection / hooks). Off by
1153    /// default for sandbox embedders.
1154    pub fn open_debug(&mut self) {
1155        crate::vm::lib_debug::open_debug(self);
1156    }
1157    /// Install the `coroutine` standard library.
1158    pub fn open_coroutine(&mut self) {
1159        crate::vm::lib_coroutine::open_coroutine(self);
1160    }
1161    /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1162    pub fn open_package(&mut self) {
1163        crate::vm::lib_os_io::open_package(self);
1164    }
1165    /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1166    /// ops on 64-bit integers).
1167    pub fn open_bit32(&mut self) {
1168        crate::vm::lib_bit32::open_bit32(self);
1169    }
1170
1171    /// xoshiro256** next.
1172    pub(crate) fn rng_next(&mut self) -> u64 {
1173        let s = &mut self.rng;
1174        let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1175        let t = s[1] << 17;
1176        s[2] ^= s[0];
1177        s[3] ^= s[1];
1178        s[1] ^= s[2];
1179        s[0] ^= s[3];
1180        s[2] ^= t;
1181        s[3] = s[3].rotate_left(45);
1182        result
1183    }
1184
1185    /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1186    pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1187        // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1188        // state), then 16 discards to spread the seed. Matches PUC's exact
1189        // sequence so the low-level conformance test passes.
1190        self.rng = [a, 0xff, b, 0];
1191        for _ in 0..16 {
1192            self.rng_next();
1193        }
1194    }
1195
1196    /// Wall-clock since VM creation (os.clock approximation).
1197    pub(crate) fn uptime(&self) -> std::time::Duration {
1198        self.started.elapsed()
1199    }
1200
1201    /// Entropy for math.randomseed() with no arguments.
1202    pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1203        let t = std::time::SystemTime::now()
1204            .duration_since(std::time::UNIX_EPOCH)
1205            .map(|d| d.as_nanos() as u64)
1206            .unwrap_or(0);
1207        let addr = &self.rng as *const _ as u64;
1208        (t as i64, addr as i64)
1209    }
1210
1211    /// Allocate a native function object (no upvalues): builtin registration.
1212    pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1213        Value::Native(self.heap.new_native(f, Box::new([])))
1214    }
1215
1216    /// Allocate a native function object with captured upvalues.
1217    pub fn native_with(
1218        &mut self,
1219        f: crate::runtime::value::NativeFn,
1220        upvals: Box<[Value]>,
1221    ) -> Value {
1222        Value::Native(self.heap.new_native(f, upvals))
1223    }
1224
1225    /// Install the shared string metatable (string library, P04).
1226    pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1227        self.type_mt[3] = mt;
1228    }
1229
1230    /// The current globals table (`_G` / `_ENV` source for new chunks).
1231    pub fn globals(&self) -> Gc<Table> {
1232        self.globals
1233    }
1234
1235    /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1236    /// Library code that pushes a known number of fresh slots — e.g.
1237    /// `table.unpack` returning N values — consults this to refuse when
1238    /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1239    /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1240    /// coroutine's already-built table eats a few slots, so an unpack of
1241    /// ~lim values can't fit.
1242    pub(crate) fn stack_room(&self) -> i64 {
1243        PUC_MAXSTACK - (self.stack.len() as i64)
1244    }
1245
1246    /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1247    /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1248    /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1249    /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1250    pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1251        self.globals = env;
1252    }
1253
1254    /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1255    /// 5.5). Determines numeric semantics, available standard libraries, and
1256    /// metamethod behavior.
1257    pub fn version(&self) -> LuaVersion {
1258        self.version
1259    }
1260
1261    /// Set a global by name. `v` may be any `IntoValue`: a primitive
1262    /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1263    /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1264    /// `Gc<NativeClosure>` handle.
1265    ///
1266    /// Returns `Err(LuaError)` only if the globals table overflows
1267    /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1268    /// String interning + key construction cannot fail.
1269    ///
1270    /// ```
1271    /// # use luna_core::vm::Vm;
1272    /// # use luna_core::version::LuaVersion;
1273    /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1274    /// vm.set_global("answer", 42).unwrap();
1275    /// vm.set_global("ratio", 0.5_f64).unwrap();
1276    /// vm.set_global("hello", "world").unwrap();
1277    /// let r = vm.eval("return answer, ratio, hello").unwrap();
1278    /// assert_eq!(r.len(), 3);
1279    /// ```
1280    pub fn set_global<V: crate::vm::IntoValue>(
1281        &mut self,
1282        name: &str,
1283        v: V,
1284    ) -> Result<(), LuaError> {
1285        let v = v.into_value(self);
1286        let k = Value::Str(self.heap.intern(name.as_bytes()));
1287        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1288        unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1289        self.heap
1290            .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1291        Ok(())
1292    }
1293
1294    /// Backward write barrier shorthand for native lib code: demote `t` from
1295    /// BLACK back to gray so the next propagate step re-traces its fields.
1296    /// No-op outside Propagate (parent is never BLACK at mutation time).
1297    pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1298        self.heap
1299            .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1300    }
1301
1302    /// Forward write barrier shorthand: a closed upvalue is a single-slot
1303    /// container — `barrier_forward` is cheaper than `barrier_back` here.
1304    /// No-op outside Propagate.
1305    pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1306        self.heap
1307            .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1308    }
1309
1310    /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1311    /// under non-MacroLua dialects (the macro is stored but the load
1312    /// path only consults the registry when
1313    /// `self.version == LuaVersion::MacroLua`).
1314    ///
1315    /// `name` is stored without the leading `@` — source code writes
1316    /// `@double(x)` to invoke a macro registered as `"double"`.
1317    pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1318        self.macro_registry.register(name, m);
1319    }
1320
1321    /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1322    /// Mostly useful for tests / dogfood resets.
1323    pub fn clear_macros(&mut self) {
1324        self.macro_registry.clear();
1325    }
1326
1327    /// Parse + compile a chunk and close it over the globals table.
1328    pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1329        // Reject oversize input *before* handing the parser/lexer a
1330        // potentially multi-GB slice. The PUC-shaped `not enough memory`
1331        // message keeps `heavy.lua::loadrep` compatibility: that test
1332        // accepts either `string length overflow` or `not enough memory`
1333        // as the failure mode for a feeder loop that outruns the host
1334        // allocator. See `set_loader_input_budget`.
1335        if src.len() > self.loader_input_budget {
1336            return Err(SyntaxError {
1337                line: 0,
1338                msg: b"not enough memory".to_vec(),
1339            });
1340        }
1341        // a precompiled (binary) chunk is undumped; source is parsed + compiled
1342        let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1343        if is_bytecode && !self.bytecode_loading {
1344            return Err(SyntaxError {
1345                line: 0,
1346                msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1347            });
1348        }
1349        let proto = if is_bytecode {
1350            let allow_puc = self.puc_bytecode_loading;
1351            crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1352                |msg| SyntaxError {
1353                    line: 0,
1354                    msg: msg.into_bytes(),
1355                },
1356            )?
1357        } else if self.version.is_macro_lua() {
1358            // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1359            // token vec, run the macro expander pre-pass against the
1360            // per-Vm registry, then hand the rewritten stream to
1361            // `parse_tokens`. The AST + compiler are dialect-agnostic
1362            // because by this point all `@`/quote tokens are gone.
1363            let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1364            let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1365            loop {
1366                let t = lexer.next_token()?;
1367                let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1368                raw.push(t);
1369                if eof {
1370                    break;
1371                }
1372            }
1373            // Drop the trailing Eof — expander operates on the body and
1374            // `parse_tokens` reinserts Eof when it runs out of tokens.
1375            raw.pop();
1376            let expanded = self.macro_registry.expand(raw)?;
1377            let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1378            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1379        } else {
1380            let ast = parse(src, self.version)?;
1381            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1382        };
1383        // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1384        // upvalue with the globals table when the closure has *exactly* one
1385        // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1386        // function with two-or-more upvalues keeps every cell at nil; the
1387        // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1388        // :293's `assert(x() == nil)` pins this contract.
1389        let n = proto.upvals.len();
1390        let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1391        if n == 0 {
1392            // synthetic main chunk has no declared upvalues, but the engine
1393            // still expects at least one cell so the host can probe via
1394            // `debug.upvalueid` etc. Match the historical luna shape.
1395            ups.push(
1396                self.heap
1397                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1398            );
1399        } else if n == 1 {
1400            ups.push(
1401                self.heap
1402                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1403            );
1404        } else {
1405            for _ in 0..n {
1406                ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1407            }
1408        }
1409        Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1410    }
1411
1412    /// Compile and run `src` as an anonymous chunk; return its results.
1413    /// Source name in the traceback is `"=eval"`. Syntax errors are
1414    /// surfaced as `LuaError` carrying the formatted PUC-style message
1415    /// (interned through the heap so the error value composes with
1416    /// `pcall` / `error_text` like any runtime error).
1417    pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1418        self.eval_chunk(src, "=eval")
1419    }
1420
1421    /// Render an error value for messages/tests. Non-string errors —
1422    /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1423    /// (`"(error object is a table value)"`); embedders that need
1424    /// structured payloads should inspect `e.0` directly. Errors whose
1425    /// text starts with `"native panic:"` indicate a Rust panic
1426    /// crossed `catch_unwind` — the Vm may be inconsistent and should
1427    /// be dropped (do not reuse).
1428    pub fn error_text(&self, e: &LuaError) -> String {
1429        match e.0 {
1430            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1431            v => format!("(error object is a {} value)", v.type_name()),
1432        }
1433    }
1434
1435    /// Render an error value the way PUC's standalone `msghandler`
1436    /// does (lua.c): strings pass through, numbers stringify, and any
1437    /// other object is given a chance at its `__tostring` metamethod
1438    /// (the result must be a string) before collapsing to the
1439    /// `"(error object is a … value)"` tag. Needs `&mut self` because
1440    /// `__tostring` runs arbitrary Lua — `error_text` remains the
1441    /// non-executing variant (v2.14 CV.2, fixture 5.5/321).
1442    pub fn error_display(&mut self, e: &LuaError) -> String {
1443        match e.0 {
1444            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1445            v @ (Value::Int(_) | Value::Float(_)) => {
1446                String::from_utf8_lossy(&self.tostring_basic(v)).into_owned()
1447            }
1448            v => {
1449                let mm = self.get_mm(v, Mm::ToString);
1450                if !mm.is_nil()
1451                    && let Ok(r) = self.call_value(mm, &[v])
1452                    && let Some(Value::Str(s)) = r.first()
1453                {
1454                    return String::from_utf8_lossy(s.as_bytes()).into_owned();
1455                }
1456                format!("(error object is a {} value)", v.type_name())
1457            }
1458        }
1459    }
1460
1461    /// Call any callable value from the host (or from natives like pcall).
1462    pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1463        // host-level entry (no enclosing exec): drop any error state from a
1464        // prior call that propagated uncaught (`error_traceback` would
1465        // otherwise leak into the next debug.traceback call).
1466        if self.public_call_depth == 0 {
1467            self.error_traceback = None;
1468        }
1469        self.public_call_depth += 1;
1470        // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1471        // chunk whose body fits the S1 int-arith whitelist short-circuits
1472        // the whole interpreter dispatch and runs straight through the
1473        // mmap'd native code. The lookup is one Cell::get + one match —
1474        // the slow path (compile attempt on first reach) is paid once per
1475        // Proto.
1476        if args.is_empty()
1477            && let Value::Closure(cl) = f
1478            && let Some(vs) = self.try_jit_call(cl)
1479        {
1480            self.public_call_depth -= 1;
1481            return Ok(vs);
1482        }
1483        let r = self.call_value_impl(f, args, true);
1484        self.public_call_depth -= 1;
1485        r
1486    }
1487
1488    /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1489    /// `Some(values)` when the cached native fn is callable for a
1490    /// zero-arg call. (Non-zero-arg dispatch is handled by
1491    /// `try_jit_call_op` from inside `begin_call`.)
1492    fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1493        use crate::runtime::function::JitProtoState;
1494        if !self.jit.enabled {
1495            return None;
1496        }
1497        let proto = cl.proto;
1498        if let JitProtoState::Untried = proto.jit.get() {
1499            self.populate_jit_cache(proto);
1500        }
1501        match proto.jit.get() {
1502            JitProtoState::Compiled {
1503                entry,
1504                num_args: 0,
1505                returns_one,
1506                arg_float_mask: _,
1507                arg_table_mask: _,
1508                ret_is_float,
1509                ret_is_table,
1510            } => {
1511                // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1512                let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1513                // P11-S5c / S5d.J — install the active Vm + closure
1514                // for any Rust helper the JIT'd code may call (e.g.
1515                // `luna_jit_new_table`, `luna_jit_upval_get`) via
1516                // cranelift `Linkage::Import`. RAII clear on return.
1517                // Chunks with no upvalue reads don't touch the closure
1518                // slot, paying nothing.
1519                // v1.1 A1 Session A — route through chunk_compiler so
1520                // the NullJitBackend path stays inert. Raw-ptr arg
1521                // avoids the &mut self borrow conflict against the
1522                // shared self.jit.chunk_compiler read.
1523                let vm_ptr: *mut Vm = self;
1524                let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1525                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1526                let r = unsafe { f() };
1527                drop(_jit_vm_guard);
1528                // P11-S5d.E' — a JIT helper may have detected a metatable
1529                // on a table operand and parked a deopt request here.
1530                // Discard the sentinel value and return None so the caller
1531                // re-runs the call through the interpreter, which honours
1532                // __index/__newindex.
1533                if self.jit.pending_err.take().is_some() {
1534                    return None;
1535                }
1536                Some(if returns_one {
1537                    let v = if ret_is_float {
1538                        Value::Float(f64::from_bits(r as u64))
1539                    } else if ret_is_table {
1540                        Value::Table(crate::runtime::Gc::from_ptr(
1541                            r as *mut crate::runtime::Table,
1542                        ))
1543                    } else {
1544                        Value::Int(r)
1545                    };
1546                    vec![v]
1547                } else {
1548                    Vec::new()
1549                })
1550            }
1551            // Non-zero-arg Compiled state: call_value's empty-args
1552            // fast path can't drive it. Op::Call handles those.
1553            JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1554        }
1555    }
1556
1557    /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1558    /// `Compiled { … }` or `Failed`; idempotent on already-populated
1559    /// states (call sites guard with a get before invoking).
1560    ///
1561    /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1562    /// `proto.code`. Compiled artefacts live in the thread-local
1563    /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1564    /// `Vm`s loading the same source skip the cranelift compile step
1565    /// entirely.
1566    fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1567        use crate::runtime::function::JitProtoState;
1568        let version = self.version();
1569        let pre53 = version <= crate::version::LuaVersion::Lua53;
1570        // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1571        // are Float). The JIT's `GetUpval` ValueRead path uses this
1572        // to default-pin upvalue reads to Float without a tag check.
1573        let float_only = version <= crate::version::LuaVersion::Lua52;
1574        // v2.0 Track J sub-step J-B — split-borrow JitState so the
1575        // trait method can take `&mut dyn JitStorage` without
1576        // double-borrowing self.jit.
1577        let jit = &mut self.jit;
1578        let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1579        match jit
1580            .chunk_compiler
1581            .try_compile(storage, proto, pre53, float_only)
1582        {
1583            crate::jit::CompileResult::Compiled {
1584                entry,
1585                num_args,
1586                returns_one,
1587                arg_float_mask,
1588                arg_table_mask,
1589                ret_is_float,
1590                ret_is_table,
1591            } => {
1592                proto.jit.set(JitProtoState::Compiled {
1593                    entry,
1594                    num_args,
1595                    returns_one,
1596                    arg_float_mask,
1597                    arg_table_mask,
1598                    ret_is_float,
1599                    ret_is_table,
1600                });
1601            }
1602            crate::jit::CompileResult::Skipped => {
1603                proto.jit.set(JitProtoState::Failed);
1604            }
1605        }
1606    }
1607
1608    /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1609    /// before `push_frame`. Returns `true` when the call was handled
1610    /// in-place (no new Lua frame). Constraints: every arg slot must
1611    /// be `Value::Int`, the cached arity must match the call site's
1612    /// `nargs`, the host wanted-count `wanted` is honoured by
1613    /// `finish_results`. Also bails when a debug hook is armed —
1614    /// JIT'd code does not fire line / call / return hooks, so any
1615    /// active hook makes the interpreter the source of truth.
1616    fn try_jit_call_op(
1617        &mut self,
1618        cl: Gc<LuaClosure>,
1619        func_slot: u32,
1620        nargs: u32,
1621        wanted: i32,
1622    ) -> bool {
1623        use crate::runtime::function::JitProtoState;
1624        if !self.jit.enabled {
1625            return false;
1626        }
1627        // Any active debug hook means the interpreter has to run the
1628        // call so the hook gets the expected events.
1629        if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1630            return false;
1631        }
1632        let proto = cl.proto;
1633        if let JitProtoState::Untried = proto.jit.get() {
1634            self.populate_jit_cache(proto);
1635        }
1636        let JitProtoState::Compiled {
1637            entry,
1638            num_args,
1639            returns_one,
1640            arg_float_mask,
1641            arg_table_mask,
1642            ret_is_float,
1643            ret_is_table,
1644        } = proto.jit.get()
1645        else {
1646            return false;
1647        };
1648        if num_args as u32 != nargs {
1649            return false;
1650        }
1651        // Pack args into i64 bit-patterns per the per-slot expected
1652        // kind. A Float-typed slot accepts Value::Float verbatim and
1653        // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1654        // accepts only Value::Table and passes the raw Gc ptr; an
1655        // Int-typed slot accepts only Value::Int. Any other shape
1656        // bails to the interpreter so the call's actual dynamics
1657        // (metamethod dispatch / type-coerce) take over.
1658        let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1659            [0; crate::jit::MAX_JIT_ARITY as usize];
1660        for i in 0..num_args as usize {
1661            let v = self.stack[(func_slot + 1) as usize + i];
1662            let want_float = (arg_float_mask >> i) & 1 == 1;
1663            let want_table = (arg_table_mask >> i) & 1 == 1;
1664            args[i] = match (want_table, want_float, v) {
1665                (true, _, Value::Table(t)) => t.as_ptr() as i64,
1666                (false, false, Value::Int(x)) => x,
1667                (false, true, Value::Float(f)) => f.to_bits() as i64,
1668                (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1669                _ => return false,
1670            };
1671        }
1672        // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1673        // matching guard in `try_jit_call`.
1674        // v1.1 A1 Session A — route through chunk_compiler.
1675        let vm_ptr: *mut Vm = self;
1676        let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1677        // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1678        let r = unsafe {
1679            match num_args {
1680                0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1681                1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1682                2 => {
1683                    (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1684                }
1685                3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1686                    args[0], args[1], args[2],
1687                ),
1688                4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1689                    args[0], args[1], args[2], args[3],
1690                ),
1691                _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1692            }
1693        };
1694        drop(_jit_vm_guard);
1695        // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1696        // flagged a metatable on a table operand; bail to the interpreter
1697        // so `push_frame` runs the call from scratch.
1698        if self.jit.pending_err.take().is_some() {
1699            return false;
1700        }
1701        // Write result at func_slot, replacing the closure value, then
1702        // hand to finish_results to pad/truncate per the call site's
1703        // `wanted` count.
1704        if returns_one {
1705            let v = if ret_is_float {
1706                Value::Float(f64::from_bits(r as u64))
1707            } else if ret_is_table {
1708                Value::Table(crate::runtime::Gc::from_ptr(
1709                    r as *mut crate::runtime::Table,
1710                ))
1711            } else {
1712                Value::Int(r)
1713            };
1714            self.stack[func_slot as usize] = v;
1715            self.finish_results(func_slot, 1, wanted);
1716        } else {
1717            self.finish_results(func_slot, 0, wanted);
1718        }
1719        true
1720    }
1721
1722    /// `call_value` with control over the `from_c` debug boundary. A `__close`
1723    /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1724    /// invokes it inside that ci), so it is called with `from_c = false`: its
1725    /// debug parent is the closing function, not a synthetic C level.
1726    fn call_value_impl(
1727        &mut self,
1728        f: Value,
1729        args: &[Value],
1730        from_c: bool,
1731    ) -> Result<Vec<Value>, LuaError> {
1732        if self.c_depth >= MAX_C_DEPTH {
1733            return Err(self.rt_err("stack overflow"));
1734        }
1735        self.c_depth += 1;
1736        let func_slot = self.stack.len() as u32;
1737        self.stack.push(f);
1738        self.stack.extend_from_slice(args);
1739        self.top = self.stack.len() as u32;
1740        let r = self.call_at(func_slot, args.len() as u32, from_c);
1741        self.c_depth -= 1;
1742        if r.is_err()
1743            && self.yielding.is_none()
1744            && self.terminating.is_none()
1745            && !self.host_yield_pending
1746            && self.pending_async_native_fut.is_none()
1747        {
1748            // A `coroutine.yield` in flight raises a sentinel error to unwind the
1749            // Rust stack, but the suspended coroutine's frames/registers (which
1750            // sit at/above `func_slot`) must survive for the next resume — so we
1751            // only truncate on a real error. A self-close termination is in the
1752            // same boat: the dying thread's state is discarded wholesale.
1753            // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1754            // the same boat as `yielding`: the next `EvalFuture::poll`
1755            // resumes the same call, so the in-flight frames must
1756            // survive.
1757            self.stack.truncate(func_slot as usize);
1758            self.top = func_slot;
1759        }
1760        r
1761    }
1762
1763    /// Invoke `f` with the running thread marked non-yieldable for the duration
1764    /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1765    /// boundary and errors instead of suspending. Used by library callbacks
1766    /// (sort comparator, gsub replacement) that run via synchronous Rust
1767    /// recursion and so could not be re-entered after a yield.
1768    pub(crate) fn call_noyield(
1769        &mut self,
1770        f: Value,
1771        args: &[Value],
1772    ) -> Result<Vec<Value>, LuaError> {
1773        self.nny += 1;
1774        let r = self.call_value(f, args);
1775        self.nny -= 1;
1776        r
1777    }
1778
1779    // ---- coroutines (P05) ----
1780
1781    pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1782        // The new coroutine inherits the creating thread's current globals
1783        // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1784        // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1785        // it here picks the creator regardless of which coro is running.
1786        self.heap.new_coro(body, self.globals)
1787    }
1788
1789    /// Is `t` the thread whose context is currently live in the VM?
1790    pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1791        match (self.current, t) {
1792            (None, None) => true,
1793            (Some(a), Some(b)) => a.ptr_eq(b),
1794            _ => false,
1795        }
1796    }
1797
1798    /// Read an open-upvalue slot from its owning thread's stack (the live VM
1799    /// stack if that thread is current, else its saved context).
1800    #[doc(hidden)]
1801    pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1802        let s = slot as usize;
1803        if self.is_current_thread(thread) {
1804            self.stack[s]
1805        } else {
1806            match thread {
1807                Some(co) => co.stack[s],
1808                None => self.main_ctx.as_ref().expect("main context").stack[s],
1809            }
1810        }
1811    }
1812
1813    fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1814        let s = slot as usize;
1815        if self.is_current_thread(thread) {
1816            self.stack[s] = v;
1817        } else {
1818            match thread {
1819                Some(co) => {
1820                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1821                    unsafe { co.as_mut() }.stack[s] = v;
1822                    // co.stack is traced by Coro::trace; demote co back to
1823                    // gray so propagate re-traces this slot if it was
1824                    // already black.
1825                    self.heap
1826                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1827                }
1828                None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1829            }
1830        }
1831    }
1832
1833    /// Whether `co` is the main thread's identity object.
1834    pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1835        self.main_coro.is_some_and(|m| m.ptr_eq(co))
1836    }
1837
1838    /// The status of `co` from the caller's view. The main thread's identity
1839    /// object has no stored status — it is "running" when nothing else runs,
1840    /// else "normal" (it resumed the active coroutine).
1841    pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1842        if self.is_main_coro(co) {
1843            if self.current.is_none() {
1844                CoroStatus::Running
1845            } else {
1846                CoroStatus::Normal
1847            }
1848        } else {
1849            co.status
1850        }
1851    }
1852
1853    /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1854    /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1855    /// context. Handlers see the coroutine's death error (if it died by error)
1856    /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1857    /// with error `e` and no handler overrode it; `Err` means a handler raised.
1858    pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1859        // re-entrant close: a __close handler closed its own coroutine while the
1860        // outer close is mid-flight (its context is live). Report success and let
1861        // the outer close finish — re-entering the swap would corrupt the stack.
1862        if self.current.is_some_and(|c| c.ptr_eq(co)) {
1863            return Ok(None);
1864        }
1865        // A chain of coroutines whose `__close` handlers each close the previous
1866        // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1867        // The calling handler's `call_value` has already pushed `c_depth` to the
1868        // cap, so here it reads as full first — report PUC's "C stack overflow"
1869        // before the next handler call would surface the plainer "stack overflow".
1870        if self.c_depth >= MAX_C_DEPTH {
1871            return Err(self.rt_err("C stack overflow"));
1872        }
1873        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874        let death_err = unsafe { co.as_mut() }.error_value.take();
1875        // swap the caller's live context out (into a GC-rooted home) and the
1876        // coroutine's in, mirroring resume_coro, so the __close handlers run on
1877        // the coroutine's stack while everything stays rooted.
1878        let resumer = self.current;
1879        let rctx = self.take_ctx();
1880        match resumer {
1881            Some(r) => {
1882                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1883                let m = unsafe { r.as_mut() };
1884                m.stack = rctx.stack;
1885                m.frames = rctx.frames;
1886                m.open_upvals = rctx.open_upvals;
1887                m.tbc = rctx.tbc;
1888                m.top = rctx.top;
1889                m.pcall_depth = rctx.pcall_depth;
1890            }
1891            None => self.main_ctx = Some(rctx),
1892        }
1893        self.load_coro_ctx(co);
1894        self.current = Some(co);
1895        let result = self.close_slots(0, death_err);
1896        // discard the (now-closed) coroutine context and restore the caller
1897        let _ = self.take_ctx();
1898        match resumer {
1899            Some(r) => {
1900                self.load_coro_ctx(r);
1901                self.current = Some(r);
1902            }
1903            None => {
1904                let m = self.main_ctx.take().expect("main context saved");
1905                self.put_ctx(m);
1906                self.current = None;
1907            }
1908        }
1909        {
1910            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1911            let m = unsafe { co.as_mut() };
1912            m.status = CoroStatus::Dead;
1913            m.stack = Vec::new();
1914            m.frames = Vec::new();
1915            m.open_upvals = Vec::new();
1916            m.tbc = Vec::new();
1917            m.top = 0;
1918            m.pcall_depth = 0;
1919            m.resume_at = None;
1920            m.error_value = None;
1921        }
1922        result.map(|()| death_err)
1923    }
1924
1925    /// `coroutine.running`: the running thread plus whether it is the main one.
1926    pub(crate) fn running_thread(&self) -> (Value, bool) {
1927        match self.current {
1928            Some(co) => (Value::Coro(co), false),
1929            None => (Value::Coro(self.main_coro.expect("main coro")), true),
1930        }
1931    }
1932
1933    /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1934    /// thread) can yield. The main thread never can; any other coroutine can
1935    /// unless it is dead.
1936    pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1937        match co {
1938            Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1939            // the running thread can yield only outside any non-yieldable C call
1940            None => self.current.is_some() && self.nny == 0,
1941        }
1942    }
1943
1944    /// Why `coroutine.yield` may not suspend the running thread right now, as a
1945    /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1946    /// from "inside an unyieldable C call" (sort/gsub callback).
1947    pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1948        if self.current.is_none() {
1949            Some("attempt to yield from outside a coroutine")
1950        } else if self.nny > 0 {
1951            Some("attempt to yield across a C-call boundary")
1952        } else {
1953            None
1954        }
1955    }
1956
1957    /// The coroutine whose context is currently live (`None` on the main thread).
1958    pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1959        self.current
1960    }
1961
1962    /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1963    /// its pending `__close` handlers, then signal termination. The handlers run
1964    /// here, in place, with the thread still non-yieldable (a yield in one hits
1965    /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1966    /// way a yield does — `exec_with` propagates it past any protecting pcall
1967    /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1968    /// clean death (or, if a handler raised, the coroutine's error).
1969    pub(crate) fn close_running(&mut self) -> LuaError {
1970        let death = match self.close_slots(0, None) {
1971            Ok(()) => None,
1972            Err(e) => Some(e.0),
1973        };
1974        self.terminating = Some(death);
1975        LuaError(Value::Nil)
1976    }
1977
1978    /// `coroutine.status` as seen by the caller.
1979    pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1980        match self.effective_coro_status(co) {
1981            CoroStatus::Suspended => "suspended",
1982            CoroStatus::Running => "running",
1983            CoroStatus::Normal => "normal",
1984            CoroStatus::Dead => "dead",
1985        }
1986    }
1987
1988    fn take_ctx(&mut self) -> SavedCtx {
1989        let saved = SavedCtx {
1990            stack: std::mem::take(&mut self.stack),
1991            frames: std::mem::take(&mut self.frames),
1992            open_upvals: std::mem::take(&mut self.open_upvals),
1993            tbc: std::mem::take(&mut self.tbc),
1994            top: self.top,
1995            pcall_depth: self.pcall_depth,
1996            hook: self.hook,
1997            globals: self.globals,
1998        };
1999        self.frames_resync(); // P17-D Week 1 — frames now empty.
2000        saved
2001    }
2002
2003    fn put_ctx(&mut self, c: SavedCtx) {
2004        self.stack = c.stack;
2005        self.frames = c.frames;
2006        self.open_upvals = c.open_upvals;
2007        self.tbc = c.tbc;
2008        self.top = c.top;
2009        self.pcall_depth = c.pcall_depth;
2010        self.hook = c.hook;
2011        self.globals = c.globals;
2012        self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
2013    }
2014
2015    /// Move a coroutine's saved context into the live VM fields.
2016    fn load_coro_ctx(&mut self, co: Gc<Coro>) {
2017        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2018        let m = unsafe { co.as_mut() };
2019        self.stack = std::mem::take(&mut m.stack);
2020        self.frames = std::mem::take(&mut m.frames);
2021        self.open_upvals = std::mem::take(&mut m.open_upvals);
2022        self.tbc = std::mem::take(&mut m.tbc);
2023        self.top = m.top;
2024        self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
2025        self.pcall_depth = m.pcall_depth;
2026        self.hook = m.hook;
2027        self.globals = m.globals;
2028    }
2029
2030    /// Save the live VM context back into a coroutine object.
2031    fn store_coro_ctx(&mut self, co: Gc<Coro>) {
2032        let c = self.take_ctx();
2033        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2034        let m = unsafe { co.as_mut() };
2035        m.stack = c.stack;
2036        m.frames = c.frames;
2037        m.open_upvals = c.open_upvals;
2038        m.tbc = c.tbc;
2039        m.top = c.top;
2040        m.pcall_depth = c.pcall_depth;
2041        m.hook = c.hook;
2042        m.globals = c.globals;
2043        // bulk-overwrite of every collectable field traced by Coro::trace:
2044        // demote the coro back to gray so propagate re-traces its new state.
2045        self.heap
2046            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2047    }
2048
2049    /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2050    /// or errors. Ok(values) carries yielded or returned values; Err carries an
2051    /// error raised inside the coroutine (the coroutine becomes dead).
2052    pub(crate) fn resume_coro(
2053        &mut self,
2054        co: Gc<Coro>,
2055        args: Vec<Value>,
2056    ) -> Result<Vec<Value>, LuaError> {
2057        match co.status {
2058            CoroStatus::Suspended => {}
2059            CoroStatus::Dead => return Err(self.plain_err("cannot resume dead coroutine")),
2060            _ => return Err(self.plain_err("cannot resume non-suspended coroutine")),
2061        }
2062        if self.c_depth >= MAX_C_DEPTH {
2063            return Err(self.plain_err("C stack overflow"));
2064        }
2065        self.c_depth += 1;
2066        let resumer = self.current;
2067        // save the resumer's live context away
2068        let rctx = self.take_ctx();
2069        match resumer {
2070            Some(r) => {
2071                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2072                let m = unsafe { r.as_mut() };
2073                m.stack = rctx.stack;
2074                m.frames = rctx.frames;
2075                m.open_upvals = rctx.open_upvals;
2076                m.tbc = rctx.tbc;
2077                m.top = rctx.top;
2078                m.pcall_depth = rctx.pcall_depth;
2079                m.globals = rctx.globals;
2080                m.status = CoroStatus::Normal;
2081                // bulk overwrite of every traced field on r — mirror
2082                // store_coro_ctx's barrier_back so propagate re-traces r.
2083                self.heap
2084                    .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085            }
2086            None => self.main_ctx = Some(rctx),
2087        }
2088        // swap the coroutine in
2089        self.load_coro_ctx(co);
2090        {
2091            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2092            let m = unsafe { co.as_mut() };
2093            m.status = CoroStatus::Running;
2094            m.resumer = resumer;
2095        }
2096        // co.resumer is a traced Gc field; barrier_back covers the new
2097        // resumer reference and any future field writes during this call.
2098        self.heap
2099            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2100        self.current = Some(co);
2101
2102        // drive it
2103        let drive = if co.started {
2104            self.coro_continue(&args)
2105        } else {
2106            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2107            unsafe { co.as_mut() }.started = true;
2108            self.coro_first(co.body, &args)
2109        };
2110
2111        // classify: a self-close termination or a pending yield each win over
2112        // the (sentinel) error they raised to unwind the Rust stack.
2113        let (outcome, status) = if let Some(death) = self.terminating.take() {
2114            // the coroutine closed itself: it dies now, cleanly or with the
2115            // error a `__close` handler raised.
2116            match death {
2117                Some(e) => {
2118                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2119                    unsafe { co.as_mut() }.error_value = Some(e);
2120                    self.heap
2121                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2122                    (Err(LuaError(e)), CoroStatus::Dead)
2123                }
2124                None => (Ok(Vec::new()), CoroStatus::Dead),
2125            }
2126        } else {
2127            match self.yielding.take() {
2128                Some((vals, fslot, nres)) => {
2129                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2130                    unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2131                    (Ok(vals), CoroStatus::Suspended)
2132                }
2133                None => {
2134                    // died: a return is clean, an error is remembered so a later
2135                    // `coroutine.close` can report it (PUC lua_closethread).
2136                    // Capture the error-point traceback (set by `unwind` before
2137                    // popping the failing frames) and prepend a synthetic
2138                    // top entry for the C native that initiated the error
2139                    // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2140                    // on the dead coroutine still shows the error site
2141                    // (db.lua :848 family).
2142                    if drive.is_err() {
2143                        let mut tb = self.error_traceback.take().unwrap_or_default();
2144                        if let Some(nm) = self.errored_native.take() {
2145                            let mut prefixed: Vec<u8> = Vec::new();
2146                            prefixed.extend_from_slice(
2147                                format!("\n\t[C]: in function '{nm}'").as_bytes(),
2148                            );
2149                            prefixed.extend(tb);
2150                            tb = prefixed;
2151                        }
2152                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2153                        unsafe { co.as_mut() }.error_traceback = Some(tb);
2154                    }
2155                    if let Err(e) = drive {
2156                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2157                        unsafe { co.as_mut() }.error_value = Some(e.0);
2158                        self.heap
2159                            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2160                    }
2161                    (drive, CoroStatus::Dead)
2162                }
2163            }
2164        };
2165
2166        // save the coroutine's context back and restore the resumer
2167        self.store_coro_ctx(co);
2168        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2169        unsafe { co.as_mut() }.status = status;
2170        match resumer {
2171            Some(r) => {
2172                self.load_coro_ctx(r);
2173                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2174                unsafe { r.as_mut() }.status = CoroStatus::Running;
2175                self.current = Some(r);
2176            }
2177            None => {
2178                let m = self.main_ctx.take().expect("main context saved");
2179                self.put_ctx(m);
2180                self.current = None;
2181            }
2182        }
2183        self.c_depth -= 1;
2184        outcome
2185    }
2186
2187    /// First resume: install the body function at slot 0 and run.
2188    fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2189        self.stack.clear();
2190        self.stack.push(body);
2191        self.stack.extend_from_slice(args);
2192        self.top = self.stack.len() as u32;
2193        match self.begin_call(0, Some(args.len() as u32), -1, true) {
2194            Ok(true) => self.exec_with(1),
2195            Ok(false) => Ok(self.take_results(0)),
2196            Err(e) => Err(e),
2197        }
2198    }
2199
2200    /// Resume after a yield: deliver `args` as the results of the call that
2201    /// yielded, then continue the suspended thread.
2202    fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2203        let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2204        let n = args.len() as u32;
2205        // Restore the full register window of the suspended top frame: a yield
2206        // that unwound through a native (call_value) may have left the stack
2207        // shorter than the frame needs. `base + max_stack` is what push_frame
2208        // allocates; `fslot + n` covers the delivered yield results.
2209        let frame_need = self
2210            .frames
2211            .last()
2212            .and_then(CallFrame::lua)
2213            .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2214            .unwrap_or(0);
2215        let need = frame_need.max((fslot + n) as usize);
2216        if self.stack.len() < need {
2217            self.stack.resize(need, Value::Nil);
2218        }
2219        for (i, &v) in args.iter().enumerate() {
2220            self.stack[fslot as usize + i] = v;
2221        }
2222        self.finish_results(fslot, n, nres);
2223        // the suspended `coroutine.yield` (a C call) now returns its resume
2224        // values: fire the matching "return" hook PUC defers until the resume.
2225        self.hook_return(true, 1, n)?;
2226        self.exec_with(1)
2227    }
2228
2229    /// `coroutine.yield`: suspend the running coroutine, recording where to
2230    /// resume. Errors if called outside a coroutine. Returns a sentinel error
2231    /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2232    pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2233        let nres = self.native_nresults;
2234        self.yielding = Some((vals, func_slot, nres));
2235        // value is irrelevant: resume_coro consults `self.yielding`, not this
2236        LuaError(Value::Nil)
2237    }
2238
2239    /// Install or clear the debug hook on the running thread (`debug.sethook`
2240    /// without a thread argument). Arms the calling frame's `oldpc` to the
2241    /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2242    /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2243    /// native return: the very next traceexec compares against the sethook
2244    /// CALL's line. When the install statement and the following statement are
2245    /// on different source lines (db.lua :322), `changedline` fires for that
2246    /// first statement; when they share a line (db.lua :25 wrapper), they do
2247    /// not, so the wrapper line is not re-fired.
2248    pub(crate) fn install_hook(&mut self, hook: HookState) {
2249        self.hook = hook;
2250        if self.hook.line
2251            && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2252        {
2253            f.hook_oldpc = f.pc.saturating_sub(1);
2254        }
2255    }
2256
2257    /// Install a hook on `target` (`None`/current thread → the live VM fields;
2258    /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2259    /// with an optional thread argument.
2260    ///
2261    /// `target == None` means "no explicit thread argument" — PUC binds that
2262    /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2263    /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2264    /// of whether that's the main thread or a currently-resumed coroutine
2265    /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2266    /// `store_coro_ctx`). So a `None` target should always route to
2267    /// `install_hook` on the live fields. The pre-fix predicate gate
2268    /// `is_current_thread(target)` returned `false` when running inside a
2269    /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2270    /// and silently dropped the hook on the floor — the install happened on
2271    /// no thread at all.
2272    pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2273        if target.is_none() || self.is_current_thread(target) {
2274            self.install_hook(state);
2275        } else if let Some(co) = target {
2276            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2277            let m = unsafe { co.as_mut() };
2278            m.hook = state;
2279            if state.line
2280                && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2281            {
2282                f.hook_oldpc = u32::MAX;
2283            }
2284            // co.hook.func is a traced Value (Coro::trace covers it); demote
2285            // co back to gray so propagate sees the new hook function.
2286            self.heap
2287                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2288        }
2289    }
2290
2291    /// The hook state of `target` (`None`/current → the live VM state).
2292    pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2293        match target {
2294            t if self.is_current_thread(t) => self.hook,
2295            Some(co) => co.hook,
2296            None => self.hook,
2297        }
2298    }
2299
2300    /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2301    /// hooks disabled (PUC clears the mask) and its results/stack growth are
2302    /// discarded so the interrupted frame's register window is untouched.
2303    /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2304    fn run_hook(
2305        &mut self,
2306        event: &[u8],
2307        line: Option<i64>,
2308        from_native: bool,
2309    ) -> Result<(), LuaError> {
2310        // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2311        // synchronous fn pointer call). Both Rust and Lua hooks may be
2312        // installed; both observe each event.
2313        if let Some(rh) = self.hook.rust_func {
2314            let evt = match event {
2315                b"call" => Some(RustHookEvent::Call),
2316                b"return" => Some(RustHookEvent::Return),
2317                b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2318                b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2319                b"count" => Some(RustHookEvent::Count),
2320                _ => None,
2321            };
2322            if let Some(evt) = evt {
2323                let was_in_hook = self.in_hook;
2324                self.in_hook = true;
2325                rh(self, evt);
2326                self.in_hook = was_in_hook;
2327            }
2328        }
2329        let Some(hook) = self.hook.func else {
2330            return Ok(());
2331        };
2332        let saved_top = self.top;
2333        let saved_len = self.stack.len();
2334        let name = Value::Str(self.heap.intern(event));
2335        let lv = line.map_or(Value::Nil, Value::Int);
2336        self.in_hook = true;
2337        // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2338        // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2339        // ci sat below `hookf` (the function being hooked). When that hooked
2340        // function is native, no Lua frame for it exists in luna's `frames`;
2341        // model it as a synthetic C level by pushing the hook with
2342        // `from_c = true` (then `c_frame_name` reads the caller's call
2343        // instruction → e.g. `name = "sethook"`). When the hooked function is
2344        // Lua (its frame is still on the stack), push with `from_c = false`
2345        // so the level descent lands on it directly. The hook's own frame
2346        // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2347        // (PUC `CIST_HOOKED`).
2348        self.pending_is_hook = true;
2349        let r = self.call_value_impl(hook, &[name, lv], from_native);
2350        self.pending_is_hook = false;
2351        self.in_hook = false;
2352        self.stack.truncate(saved_len);
2353        self.top = saved_top;
2354        r.map(|_| ())
2355    }
2356
2357    /// Fire the "call" hook on entry to a function, if armed and not already in
2358    /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2359    /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2360    /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2361    /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2362    fn hook_call_with(
2363        &mut self,
2364        from_native: bool,
2365        nargs: u32,
2366        is_tail: bool,
2367    ) -> Result<(), LuaError> {
2368        if self.hook.call
2369            && !self.in_hook
2370            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2371        {
2372            self.hook_ftransfer = 1;
2373            self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2374            // PUC 5.1 didn't distinguish tail-call events — every call,
2375            // including tail-calls, fired plain `"call"`. 5.2 introduced
2376            // the separate `"tail call"` event (mask `"c"` covers both).
2377            // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2378            // "return","tail return","return","tail return"}`.
2379            let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2380                b"tail call"
2381            } else {
2382                b"call"
2383            };
2384            self.run_hook(event, None, from_native)?;
2385        }
2386        Ok(())
2387    }
2388
2389    pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2390        self.hook_call_with(from_native, nargs, false)
2391    }
2392
2393    /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2394    /// the first result slot relative to the activation's func slot, ntransfer
2395    /// the number of results.
2396    pub(crate) fn hook_return(
2397        &mut self,
2398        from_native: bool,
2399        ftransfer: u32,
2400        nresults: u32,
2401    ) -> Result<(), LuaError> {
2402        if self.hook.ret
2403            && !self.in_hook
2404            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2405        {
2406            self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2407            self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2408            self.run_hook(b"return", None, from_native)?;
2409        }
2410        Ok(())
2411    }
2412
2413    /// PUC "tail return" event — fires once per tail call that collapsed
2414    /// into the activation now returning, *after* its own "return" event.
2415    /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2416    fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2417        if self.hook.ret
2418            && !self.in_hook
2419            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2420        {
2421            self.run_hook(b"tail return", None, false)?;
2422        }
2423        Ok(())
2424    }
2425
2426    /// Call a metamethod with a single expected result.
2427    fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2428        let mut r = self.call_value(f, args)?;
2429        Ok(if r.is_empty() {
2430            Value::Nil
2431        } else {
2432            r.swap_remove(0)
2433        })
2434    }
2435
2436    /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2437    /// driven through the interpreter loop with a `Meta` continuation, so a
2438    /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2439    /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2440    /// Returns to the caller with the call set up — the opcode arm must do no
2441    /// further work on the running frame and let the loop iterate. `tm` is
2442    /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2443    /// born from this call inherits it via `pending_tm`, so
2444    /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2445    /// (db.lua :878).
2446    fn begin_meta_call(
2447        &mut self,
2448        func: Value,
2449        args: &[Value],
2450        action: MetaAction,
2451        tm: &'static str,
2452    ) -> Result<(), LuaError> {
2453        let saved_top = self.top;
2454        let cont_slot = self.stack.len() as u32;
2455        self.stack.push(func);
2456        self.stack.extend_from_slice(args);
2457        self.top = self.stack.len() as u32;
2458        frames_push_sync(
2459            &mut self.frames,
2460            &mut self.frames_top,
2461            CallFrame::Cont(NativeCont {
2462                kind: ContKind::Meta(MetaCont { action, saved_top }),
2463                func_slot: cont_slot,
2464                nresults: 1,
2465            }),
2466        );
2467        let saved_tm = self.pending_tm.replace(tm);
2468        // begin_call drives a Lua metamethod through the loop (returns true) or
2469        // runs a native one inline (returns false, leaving results at cont_slot
2470        // for the loop head to pick up); either way the Meta cont resolves there.
2471        let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2472        // Native callees never consumed pending_tm (push_frame is only hit on
2473        // a Lua callee); restore so it doesn't leak to a later push_frame.
2474        self.pending_tm = saved_tm;
2475        r?;
2476        Ok(())
2477    }
2478
2479    /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2480    fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2481        // v2.13 WUC read-time probe: a collectable key must be live at
2482        // the moment it is used. O(1) membership test against the
2483        // freed-pointer log — gc-verify diagnostic builds only; exact
2484        // under quarantining allocators (ASAN).
2485        #[cfg(feature = "gc-verify")]
2486        if matches!(key, Value::Str(_)) {
2487            let h = match key {
2488                Value::Str(s) => s.as_ptr() as usize,
2489                _ => unreachable!(),
2490            };
2491            if self.heap.recently_freed.contains(&h) {
2492                let (pc, reg_info) = match self.frames.last() {
2493                    Some(CallFrame::Lua(f)) => {
2494                        let pc = f.pc as usize;
2495                        let inst = f.closure.proto.code.get(pc.wrapping_sub(1));
2496                        (
2497                            pc,
2498                            inst.map(|i| {
2499                                format!(
2500                                    "op[pc-1]={:?} a={} b={} c={} base={}",
2501                                    i.op(),
2502                                    i.a(),
2503                                    i.b(),
2504                                    i.c(),
2505                                    f.base
2506                                )
2507                            })
2508                            .unwrap_or_default(),
2509                        )
2510                    }
2511                    _ => (0, String::new()),
2512                };
2513                panic!(
2514                    "[gc-verify] op_index READ of dead string key {h:#x} \
2515                     (gc_top {}, top {}, pc {pc}, {reg_info})",
2516                    self.gc_top, self.top,
2517                );
2518            }
2519        }
2520        match self.index_step(t, key)? {
2521            MmOut::Done(v) => self.stack[dst as usize] = v,
2522            MmOut::Mm { func, recv } => {
2523                self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2524            }
2525            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2526        }
2527        Ok(())
2528    }
2529
2530    /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2531    fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2532        match self.newindex_step(t, key, v)? {
2533            MmOut::Done(_) => {}
2534            MmOut::Mm { func, recv } => {
2535                self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2536            }
2537            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2538        }
2539        Ok(())
2540    }
2541
2542    /// Apply a comparison opcode's outcome: a known boolean drives the
2543    /// conditional skip directly; a metamethod is called yieldably, its
2544    /// truthiness driving the skip on return.
2545    fn op_compare(
2546        &mut self,
2547        step: MmOut,
2548        l: Value,
2549        r: Value,
2550        k: bool,
2551        tm: &'static str,
2552    ) -> Result<(), LuaError> {
2553        match step {
2554            MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2555            MmOut::Mm { func, .. } => {
2556                self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2557            }
2558            MmOut::CompareSynth { func } => {
2559                // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2560                // negation are driven through `MetaAction::Compare` so the
2561                // metamethod call can yield like any other compare.
2562                self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2563            }
2564        }
2565        Ok(())
2566    }
2567
2568    /// Complete a VM instruction whose metamethod just returned `result` (PUC
2569    /// `luaV_finishOp`). The running frame is already back on top.
2570    fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2571        match action {
2572            MetaAction::Store { dst } => self.stack[dst as usize] = result,
2573            MetaAction::Discard => {}
2574            MetaAction::Compare { k, negate } => {
2575                let t = if negate {
2576                    !result.truthy()
2577                } else {
2578                    result.truthy()
2579                };
2580                self.cond_skip(t, k);
2581            }
2582            MetaAction::Concat { dst, base_a } => {
2583                self.stack[dst as usize] = result;
2584                self.top = dst + 1;
2585                self.concat_run(base_a)?;
2586            }
2587        }
2588        Ok(())
2589    }
2590
2591    // ---- metatables ----
2592
2593    pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2594        match v {
2595            Value::Table(t) => t.metatable(),
2596            Value::Userdata(u) => u.metatable(),
2597            v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2598        }
2599    }
2600
2601    /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2602    /// non-table). No-op for tables (they carry their own).
2603    pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2604        if let Some(i) = type_mt_slot(v) {
2605            self.type_mt[i] = mt;
2606        }
2607    }
2608
2609    /// The metamethod of `v` for `mm`, or nil.
2610    pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2611        match self.metatable_of(v) {
2612            Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2613            None => Value::Nil,
2614        }
2615    }
2616
2617    /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2618    /// only fires when both operands carry a metatable that exposes the same
2619    /// implementation. Returns the metamethod to call, or `Nil` when no
2620    /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2621    /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2622    pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2623        let mt1 = self.metatable_of(l);
2624        let Some(mt1) = mt1 else { return Value::Nil };
2625        let key = Value::Str(self.mm_names[mm as usize]);
2626        let tm1 = mt1.get(key);
2627        if tm1.is_nil() {
2628            return Value::Nil;
2629        }
2630        let mt2 = self.metatable_of(r);
2631        let Some(mt2) = mt2 else { return Value::Nil };
2632        if mt1.as_ptr() == mt2.as_ptr() {
2633            return tm1;
2634        }
2635        let tm2 = mt2.get(key);
2636        if tm2.is_nil() {
2637            return Value::Nil;
2638        }
2639        if tm1.raw_eq(tm2) {
2640            return tm1;
2641        }
2642        Value::Nil
2643    }
2644
2645    /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2646    /// or full userdata whose metatable carries a string `__name` reports that
2647    /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2648    pub(crate) fn obj_typename(&self, v: Value) -> String {
2649        if matches!(v, Value::Table(_) | Value::Userdata(_))
2650            && let Value::Str(s) = self.get_mm(v, Mm::Name)
2651        {
2652            return String::from_utf8_lossy(s.as_bytes()).into_owned();
2653        }
2654        v.type_name().to_string()
2655    }
2656
2657    fn call_at(
2658        &mut self,
2659        func_slot: u32,
2660        nargs: u32,
2661        from_c: bool,
2662    ) -> Result<Vec<Value>, LuaError> {
2663        if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2664            self.exec()
2665        } else {
2666            // native completed inline; results at func_slot..top
2667            Ok(self.take_results(func_slot))
2668        }
2669    }
2670
2671    /// Switch the `collectgarbage` mode, returning the previous mode name.
2672    pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2673        std::mem::replace(&mut self.gc_mode, new)
2674    }
2675
2676    /// Whether the current `collectgarbage` mode is "generational" (where a
2677    /// "step" is a minor collection — a full atomic pass — rather than a paced
2678    /// incremental sweep).
2679    pub(crate) fn gc_mode_is_generational(&self) -> bool {
2680        self.gc_mode == "generational"
2681    }
2682
2683    /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2684    /// completes a whole cycle at once).
2685    pub(crate) fn gc_stepsize(&self) -> i64 {
2686        self.gc_stepsize
2687    }
2688
2689    /// `collectgarbage("param", name [,value])`: read (or set, returning the
2690    /// previous value of) a pacing parameter. Returns `None` for an unknown
2691    /// name so the caller can raise PUC's `invalid parameter` error. The
2692    /// collector is stop-the-world, so these only round-trip for API fidelity.
2693    pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2694        let slot = match name {
2695            b"pause" => &mut self.gc_pause,
2696            b"stepmul" => &mut self.gc_stepmul,
2697            b"stepsize" => &mut self.gc_stepsize,
2698            _ => return None,
2699        };
2700        let prev = *slot;
2701        if let Some(v) = set {
2702            *slot = v;
2703        }
2704        Some(prev)
2705    }
2706
2707    /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2708    /// paced sweep via `Vm::gc_step`.
2709    ///
2710    /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2711    /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2712    /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2713    /// suspected UAF is structurally closed — born objects no longer become
2714    /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2715    ///
2716    /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2717    /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2718    /// O(SWEEP_DIVISOR) safe-points regardless of size.
2719    #[inline(always)]
2720    pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2721        if self.gc_finalizing {
2722            return;
2723        }
2724        if !self.heap.gc_due() {
2725            return;
2726        }
2727        // v2.5 P1B-2E: tighten to bare `live_top`. The v2.2.0
2728        // `live_top.max(self.top)` workaround is now obsoleted by
2729        // v2.3's `finish_results` slot-clear + v2.5 P1B-2A
2730        // (Op::TailCall collapse slot-clear) + v2.5 P1B-2B
2731        // (pcall unwind slot-clear). PUC L->top discipline is now
2732        // mirrored at every frame-pop site.
2733        self.gc_top = live_top;
2734        // PUC stepmul: % of allocation rate. Higher = more GC work per
2735        // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2736        // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2737        const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2738        const MIN_BUDGET: usize = 64_000;
2739        let stepmul = self.gc_stepmul.max(1) as usize;
2740        let divisor = (SWEEP_BASE / stepmul).max(1);
2741        let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2742        if self.gc_step(budget) {
2743            self.heap.rearm_gc_pause(self.gc_pause);
2744        }
2745    }
2746
2747    /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2748    /// roots (open upvalues, which are not first-class Values). Shared by the
2749    /// full collector and the incremental-sweep driver so both snapshot the
2750    /// exact same live set.
2751    fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2752        let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2753        roots.push(Value::Table(self.globals));
2754        for mt in self.type_mt.into_iter().flatten() {
2755            roots.push(Value::Table(mt));
2756        }
2757        for &n in &self.mm_names {
2758            roots.push(Value::Str(n));
2759        }
2760        // Root the running thread's live registers (PUC marks [stack, top)).
2761        // `gc_top` is the instruction-level cursor of the last GC
2762        // safe-point: allocation safe-points set it via
2763        // `maybe_collect_garbage(live_top)`, and `begin_call` raises it
2764        // to the callee's argument top when entering a native — PUC's
2765        // `L->top = func + 1 + nargs` C-call discipline. Without that
2766        // raise, an explicit `collectgarbage()` collected with a STALE
2767        // cursor from some earlier (lower) safe-point and freed its own
2768        // caller's register-held strings — UAF-C
2769        // (STATUS_ACCESS_VIOLATION on Windows / ASAN heap-use-after-free
2770        // on Linux; the v2.13 WUC gc-verify frame audit pinpointed the
2771        // under-rooted slots). Values stranded above the cursor stay
2772        // excluded so weak-table entries are not spuriously pinned
2773        // (gc.lua:544 suspended-coroutine collection).
2774        let live = (self.gc_top as usize).min(self.stack.len());
2775        roots.extend_from_slice(&self.stack[..live]);
2776        for cf in &self.frames {
2777            match cf {
2778                CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2779                CallFrame::Cont(NativeCont {
2780                    kind: ContKind::Xpcall { handler },
2781                    ..
2782                }) => roots.push(*handler),
2783                CallFrame::Cont(NativeCont {
2784                    kind: ContKind::Close(cc),
2785                    ..
2786                }) => {
2787                    // Root the error threaded through this close chain so a
2788                    // `collectgarbage()` inside a sibling `__close` handler
2789                    // does not free it before the next handler is invoked
2790                    // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2791                    if let Some(e) = cc.pending {
2792                        roots.push(e);
2793                    }
2794                    if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2795                        roots.push(err);
2796                    }
2797                }
2798                CallFrame::Cont(_) => {}
2799            }
2800        }
2801        if let Some(e) = self.closing_err {
2802            roots.push(e);
2803        }
2804        // B12 host roots — Lua-facade handles keep their referenced
2805        // values alive across calls/yields. Trace the whole vector;
2806        // unused slots (post-`unpin_all`) carry Value::Nil which the
2807        // GC ignores.
2808        for slot in &self.host_roots {
2809            // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2810            roots.push(slot.value);
2811        }
2812        // v2.1 — `table.sort` and similar builtins stash their working
2813        // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2814        // comparator callback doesn't free strings/tables snapshotted
2815        // off the live table (sort.lua's `load(..)(); collectgarbage()`
2816        // compare regression).
2817        for buf in &self.sort_scratch {
2818            roots.extend_from_slice(buf);
2819        }
2820        // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2821        // mid-execution. Without rooting them here, a `collectgarbage()`
2822        // invoked inside the running native (sort.lua AA `load(..)();
2823        // collectgarbage()` compare callback regression) sweeps the
2824        // closure that's actively executing, leaving `nc.upvals`
2825        // dangling and the Rust local `nc` pointing at recycled memory
2826        // — the SIGSEGV pops on the very next field access or pop.
2827        for &nc in &self.running_natives {
2828            roots.push(Value::Native(nc));
2829        }
2830        // the running thread's debug hook (suspended threads root theirs via
2831        // Coro::trace / the main_ctx sweep below)
2832        if let Some(h) = self.hook.func {
2833            roots.push(h);
2834        }
2835        // the running coroutine (its saved-context fields live in the VM, but
2836        // the object itself + its resumer chain must stay reachable)
2837        if let Some(co) = self.current {
2838            roots.push(Value::Coro(co));
2839        }
2840        if let Some(mc) = self.main_coro {
2841            roots.push(Value::Coro(mc));
2842        }
2843        // debug.getregistry() and io library state
2844        if let Some(r) = self.registry {
2845            roots.push(Value::Table(r));
2846        }
2847        if let Some(mt) = self.file_mt {
2848            roots.push(Value::Table(mt));
2849        }
2850        if let Some(f) = self.io_input {
2851            roots.push(Value::Userdata(f));
2852        }
2853        if let Some(f) = self.io_output {
2854            roots.push(Value::Userdata(f));
2855        }
2856        // the main thread's saved context while a coroutine runs
2857        if let Some(m) = &self.main_ctx {
2858            roots.extend_from_slice(&m.stack);
2859            if let Some(h) = m.hook.func {
2860                roots.push(h);
2861            }
2862            for cf in &m.frames {
2863                match cf {
2864                    CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2865                    CallFrame::Cont(NativeCont {
2866                        kind: ContKind::Xpcall { handler },
2867                        ..
2868                    }) => roots.push(*handler),
2869                    CallFrame::Cont(_) => {}
2870                }
2871            }
2872        }
2873        let mut extra: Vec<*mut GcHeader> = self
2874            .open_upvals
2875            .iter()
2876            .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2877            .collect();
2878        if let Some(m) = &self.main_ctx {
2879            extra.extend(
2880                m.open_upvals
2881                    .iter()
2882                    .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2883            );
2884        }
2885        (roots, extra)
2886    }
2887
2888    /// Run a full collection with the VM's roots, then run any `__gc`
2889    /// finalizers the collection scheduled. A no-op (returns 0) when already
2890    /// inside a finalizer — the collector is not reentrant (PUC).
2891    pub fn collect_garbage(&mut self) -> usize {
2892        if self.gc_finalizing {
2893            return 0;
2894        }
2895        let (roots, extra) = self.gc_roots();
2896        let freed = self.heap.collect_ex(&roots, &extra);
2897        #[cfg(feature = "gc-verify")]
2898        self.verify_frame_regs_live("collect_garbage");
2899        self.run_finalizers();
2900        freed
2901    }
2902
2903    /// v2.13 WUC `gc-verify` — after a collect, every register slot the
2904    /// collector just rooted (`[0, max(gc_top, top))` — the same bound
2905    /// `gc_roots` uses) must hold a live value. A dead value inside the
2906    /// rooted range means the root snapshot and the sweep disagreed —
2907    /// the bug class behind UAF-C. (Slots ABOVE the bound may hold
2908    /// stale dead values legitimately; the interpreter's contract is
2909    /// that it writes them before reading.)
2910    #[cfg(feature = "gc-verify")]
2911    pub(crate) fn verify_frame_regs_live(&self, ctx: &str) {
2912        let live = self.heap.debug_live_set();
2913        let header = |v: Value| -> Option<usize> {
2914            match v {
2915                Value::Str(s) => Some(s.as_ptr() as usize),
2916                Value::Table(t) => Some(t.as_ptr() as usize),
2917                Value::Closure(c) => Some(c.as_ptr() as usize),
2918                Value::Native(n) => Some(n.as_ptr() as usize),
2919                Value::Coro(c) => Some(c.as_ptr() as usize),
2920                Value::Userdata(u) => Some(u.as_ptr() as usize),
2921                _ => None,
2922            }
2923        };
2924        let bound = (self.gc_top as usize).min(self.stack.len());
2925        for i in 0..bound {
2926            if let Some(h) = header(self.stack[i]) {
2927                if !live.contains(&h) {
2928                    panic!(
2929                        "[gc-verify] {ctx}: rooted stack slot {i} (gc_top {}, top {}) \
2930                         holds a dead value {h:#x} after collect",
2931                        self.gc_top, self.top,
2932                    );
2933                }
2934            }
2935        }
2936        // Diagnostic tier: a dead value ABOVE the cursor is only a bug if
2937        // that register is a named local still in scope (the interpreter
2938        // WILL read it). Cross-check against the proto's LocVar table.
2939        for (fi, cf) in self.frames.iter().enumerate() {
2940            if let CallFrame::Lua(f) = cf {
2941                let base = f.base as usize;
2942                let maxs = f.closure.proto.max_stack as usize;
2943                let hi = (base + maxs).min(self.stack.len());
2944                let pc = f.pc;
2945                for i in bound.max(base)..hi {
2946                    if let Some(h) = header(self.stack[i]) {
2947                        if !live.contains(&h) {
2948                            let reg = (i - base) as u32;
2949                            if let Some(lv) = f
2950                                .closure
2951                                .proto
2952                                .locvars
2953                                .iter()
2954                                .find(|lv| lv.reg == reg && lv.start_pc <= pc && pc < lv.end_pc)
2955                            {
2956                                panic!(
2957                                    "[gc-verify] {ctx}: frame {fi} IN-SCOPE LOCAL '{}' \
2958                                     (reg {reg}, abs {i}, pc {pc}, gc_top {}) holds a \
2959                                     dead value {h:#x} — live_top cursor excluded a \
2960                                     live named local",
2961                                    lv.name, self.gc_top,
2962                                );
2963                            }
2964                        }
2965                    }
2966                }
2967            }
2968        }
2969    }
2970
2971    /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2972    /// threw; gc.lua's "errors during collection" probe relies on it. This
2973    /// variant runs the same cycle but propagates the captured finalizer
2974    /// error to the explicit caller.
2975    pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2976        if self.gc_finalizing {
2977            return Ok(0);
2978        }
2979        let (roots, extra) = self.gc_roots();
2980        let freed = self.heap.collect_ex(&roots, &extra);
2981        #[cfg(feature = "gc-verify")]
2982        self.verify_frame_regs_live("collect_garbage_propagating");
2983        self.run_finalizers_or_err()?;
2984        Ok(freed)
2985    }
2986
2987    /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2988    /// should report fail rather than collect).
2989    pub(crate) fn gc_is_finalizing(&self) -> bool {
2990        self.gc_finalizing
2991    }
2992
2993    /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2994    /// = true indicates more pieces follow (concatenated until the first
2995    /// `to_cont = false` call flushes the whole line). Mirrors
2996    /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2997    ///   * If the buffer is fresh, `to_cont` is false, and the message is
2998    ///     `@<word>`, treat as a control message — only `@on` / `@off` are
2999    ///     recognised; any other `@…` is silently ignored.
3000    ///   * Otherwise, while the state is `Off`, drop the piece; while `On`,
3001    ///     accumulate, and flush to stderr + `warn_log` on the
3002    ///     non-continuation call.
3003    pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
3004        if self.warn_buf.is_empty()
3005            && !to_cont
3006            && let Some(b'@') = msg.first().copied()
3007        {
3008            match &msg[1..] {
3009                b"on" => self.warn_state = WarnState::On,
3010                b"off" => self.warn_state = WarnState::Off,
3011                _ => {} // unknown control — silently ignored (PUC checkcontrol)
3012            }
3013            return;
3014        }
3015        if self.warn_state == WarnState::Off {
3016            // drop continuation pieces too — PUC `warnfoff` is the trampoline
3017            return;
3018        }
3019        self.warn_buf.extend_from_slice(msg);
3020        if !to_cont {
3021            let line = std::mem::take(&mut self.warn_buf);
3022            eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
3023            self.warn_log.push(line);
3024        }
3025    }
3026
3027    /// Drain the in-process warning log (one entry per emitted message, sans
3028    /// `"Lua warning: "` prefix and newline). For test harnesses that want to
3029    /// assert on warn output without scraping stderr.
3030    pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
3031        std::mem::take(&mut self.warn_log)
3032    }
3033
3034    /// Arm the cooperative instruction budget (P09 embedding). The run loop
3035    /// decrements this once per dispatch turn; on zero it raises a catchable
3036    /// `"instruction budget exceeded"` error and disarms itself so the host
3037    /// can resume with a fresh budget on the next call. `None` removes the
3038    /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
3039    /// short-script semantics.
3040    pub fn set_instr_budget(&mut self, budget: Option<i64>) {
3041        self.instr_budget = budget;
3042    }
3043
3044    /// Remaining instruction budget (None when unbounded).
3045    pub fn instr_budget_remaining(&self) -> Option<i64> {
3046        self.instr_budget
3047    }
3048
3049    /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
3050    /// **must** disable JIT when relying on `instr_budget` — see the
3051    /// `jit_enabled` field doc for the rationale.
3052    pub fn set_jit_enabled(&mut self, enabled: bool) {
3053        self.jit.enabled = enabled;
3054    }
3055
3056    /// Current JIT enable state.
3057    pub fn jit_enabled(&self) -> bool {
3058        self.jit.enabled
3059    }
3060
3061    /// Toggle the trace JIT (P12). Off by default while the sprint
3062    /// develops. When enabled, hot back-edges are counted on
3063    /// `Proto.trace_hot_count`; once the counter passes
3064    /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
3065    /// mode at the back-edge target. Stays a no-op until S2's
3066    /// trace lowerer and S3's dispatcher land.
3067    pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
3068        self.jit.trace_enabled = enabled;
3069    }
3070
3071    /// P16-A — opt-in flag for the self-link cycle catch. See field
3072    /// docs for the correctness blocker. Default `false`.
3073    pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
3074        self.jit.p16_self_link_enabled = enabled;
3075    }
3076
3077    /// Current state of the P16-A self-link cycle catch.
3078    pub fn p16_self_link_enabled(&self) -> bool {
3079        self.jit.p16_self_link_enabled
3080    }
3081
3082    /// Current trace-JIT enable state.
3083    pub fn trace_jit_enabled(&self) -> bool {
3084        self.jit.trace_enabled
3085    }
3086
3087    /// Number of traces that have closed cleanly (looped back to the
3088    /// head PC) since this Vm was constructed. Cumulative; used by
3089    /// tests + tuning. Will become the dominant signal once S2's
3090    /// compile + cache lands.
3091    pub fn trace_closed_count(&self) -> u64 {
3092        self.jit.counters.closed
3093    }
3094
3095    /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
3096    /// hit an un-recordable op — the latter lands at S2).
3097    pub fn trace_aborted_count(&self) -> u64 {
3098        self.jit.counters.aborted
3099    }
3100
3101    /// P13-S13-G v2 — number of compiled traces whose close shape
3102    /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
3103    /// pin `dispatchable=false` because the dispatcher can't
3104    /// resume at a depth>0 PC without the matching CallFrames.
3105    /// S4-step4b's frame-mat helper could synthesise those, but
3106    /// the InlineAbort emit path isn't wired up yet — fresh
3107    /// pickup work for S13-G v2-full.
3108    pub fn trace_inline_abort_count(&self) -> u64 {
3109        self.jit.counters.inline_abort
3110    }
3111
3112    /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
3113    pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
3114        &self.jit.counters.dispatch_off_reasons
3115    }
3116
3117    /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
3118    pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
3119        &self.jit.counters.compile_failed_reasons
3120    }
3121
3122    /// P13-S13-H — see `JitCounters::closed_lens`. Returns
3123    /// `(is_call_triggered, ops_len)` for every trace that closed.
3124    pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
3125        &self.jit.counters.closed_lens
3126    }
3127
3128    /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
3129    /// Per-reason close-cause counts (recorder-side abort/discard +
3130    /// lowerer-side dispatch_off labels) keyed by `&'static str`.
3131    pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
3132        &self.jit.counters.close_cause_counts
3133    }
3134
3135    /// v2.0 Track-R R3b — number of compiled traces whose
3136    /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
3137    /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
3138    /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
3139    /// hot loop with p16-on. R3b keeps `dispatchable = false` even
3140    /// when this count bumps; R3d will lift it.
3141    pub fn trace_downrec_link_compiled_count(&self) -> u64 {
3142        self.jit.counters.downrec_link_compiled
3143    }
3144
3145    /// v2.0 Track-R R3c — see
3146    /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
3147    /// of times the dispatcher's `is_downrec_sentinel` arm fired and
3148    /// classified the return as a caller-pc-guard HIT.
3149    pub fn trace_downrec_dispatched_count(&self) -> u64 {
3150        self.jit.counters.downrec_dispatched
3151    }
3152
3153    /// v2.0 Track-R R3c — see
3154    /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
3155    /// times the dispatcher entered a `downrec_link`-bearing trace and
3156    /// the trace returned via the lowerer's deopt block (caller-pc
3157    /// guard MISS), or the dispatcher itself force-deopted via the
3158    /// stitch-cycle checkpoint.
3159    pub fn trace_downrec_deopt_count(&self) -> u64 {
3160        self.jit.counters.downrec_deopt
3161    }
3162
3163    /// v2.0 Track-R R3d — see
3164    /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3165    /// Number of compiled traces whose lowerer emitted a multi-way
3166    /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3167    /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3168    pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3169        self.jit.counters.multi_way_guard_emitted
3170    }
3171
3172    /// P12-S2.C — number of closed traces the lowerer compiled and
3173    /// parked on `Proto.traces`. Re-records of the same head_pc are
3174    /// deduped (the second close finds the head_pc already cached
3175    /// and skips compile), so this never exceeds `trace_closed_count`.
3176    pub fn trace_compiled_count(&self) -> u64 {
3177        self.jit.counters.compiled
3178    }
3179
3180    /// v2.1 Phase 1I.B — number of times the recorder captured a
3181    /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3182    /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3183    /// by the Phase 1I.B opt-in fire test to verify the env gate
3184    /// wiring round-trips end-to-end (env -> recorder -> snapshot
3185    /// -> counter -> getter -> assertion).
3186    pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3187        self.jit.counters.field_ic_snapshot_captured
3188    }
3189
3190    /// P12-S2.C — number of closed traces the lowerer rejected
3191    /// (any of the bail conditions in
3192    /// `crate::jit::trace::try_compile_trace`).
3193    pub fn trace_compile_failed_count(&self) -> u64 {
3194        self.jit.counters.compile_failed
3195    }
3196
3197    /// P12-S3 — number of times the dispatcher jumped into a
3198    /// compiled trace. Bumps on every entry; `trace_deopt_count`
3199    /// counts the subset where the trace returned with a parked
3200    /// `jit_pending_err`.
3201    pub fn trace_dispatched_count(&self) -> u64 {
3202        self.jit.counters.dispatched
3203    }
3204
3205    /// P12-S3 — number of trace entries that came back with
3206    /// `jit_pending_err` set (typically a metatable shadowed an
3207    /// index inside a helper, forcing the dispatcher to fall back
3208    /// to the interpreter without committing the trace's result).
3209    pub fn trace_deopt_count(&self) -> u64 {
3210        self.jit.counters.deopt
3211    }
3212
3213    /// P15-A v1 — number of times the dispatcher started a side
3214    /// trace recording (an `exit_hit_counts` slot crossed
3215    /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3216    /// was None and trace JIT was enabled). Each unit is exactly one
3217    /// `start_side_trace` call; the actual compile success counts
3218    /// under [`Self::trace_compiled_count`] like any other trace.
3219    /// Probe use: distinguishes the "side-trace pipeline fired"
3220    /// signal from the "primary back-edge / call-trigger fired"
3221    /// signal so v0-v3 architectural progress is visible without
3222    /// reading per-counter histograms.
3223    pub fn trace_side_trace_started_count(&self) -> u64 {
3224        self.jit.counters.side_trace_started
3225    }
3226
3227    /// P15-A v2-A — number of side-trace recordings that closed,
3228    /// compiled successfully, AND patched their parent's
3229    /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3230    /// dispatch through these ptrs yet (v2-B/C job), but the
3231    /// counter + ptr write proves the compile + link pipeline is
3232    /// complete end-to-end.
3233    pub fn trace_side_trace_compiled_count(&self) -> u64 {
3234        self.jit.counters.side_trace_compiled
3235    }
3236
3237    /// P15-A v2-C-A5-C — number of side traces that compiled
3238    /// successfully but were SHEDDED by the close-handler shape-
3239    /// match gate (`exit_tags_match_entry_tags`). High ratios
3240    /// vs. `trace_side_trace_compiled_count` indicate the
3241    /// architecture is shedding lots of would-be side traces;
3242    /// useful as a tuning probe for future relaxation of the
3243    /// gate or for child-IR re-specialisation against parent's
3244    /// exit shape.
3245    pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3246        self.jit.counters.side_trace_shape_mismatch
3247    }
3248
3249    /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3250    /// classified as `crate::jit::trace::EscapeState::Sinkable`
3251    /// across every successfully compiled trace on this Vm. The
3252    /// count is post-demotion: sites pre-emit drops back to Escaped
3253    /// for not meeting v1 sunk-emit criteria are NOT counted.
3254    /// `trace_sunk_alloc_count` matches one-for-one today (every
3255    /// surviving Sinkable site goes through sunk emit).
3256    pub fn trace_sinkable_seen_count(&self) -> u64 {
3257        self.jit.counters.sinkable_seen
3258    }
3259
3260    /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3261    pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3262        self.jit.counters.accum_bufferable_seen
3263    }
3264
3265    /// P15-prep — total dispatch hits across all known traces,
3266    /// broken into hot-exit telemetry (max single-exit count,
3267    /// total dispatches, exit count). Used by probes to identify
3268    /// hot side-exits as side-trace candidates.
3269    ///
3270    /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3271    /// recursively, so inner functions' traces are reported.
3272    pub fn trace_exit_hit_summary(
3273        &self,
3274        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3275    ) -> Vec<(u32, Vec<u32>)> {
3276        fn walk(
3277            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3278            out: &mut Vec<(u32, Vec<u32>)>,
3279        ) {
3280            for ct in proto.traces.borrow().iter() {
3281                let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3282                out.push((ct.head_pc, counts));
3283            }
3284            for inner in proto.protos.iter() {
3285                walk(*inner, out);
3286            }
3287        }
3288        let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3289        walk(cl.proto, &mut out);
3290        out
3291    }
3292
3293    /// P15-A v0 — surface every side-exit slot whose hit count is
3294    /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3295    /// `cl.proto` (recursively walking `proto.protos`). Returned
3296    /// entries are side-trace candidates: each carries the parent
3297    /// trace's `(head_proto, head_pc)`, the exit's index in the
3298    /// parent's `exit_hit_counts`, and the side trace's natural
3299    /// entry shape (`cont_pc` + `exit_tags`).
3300    ///
3301    /// Layout of `exit_hit_counts` (mirrored by the iter):
3302    /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3303    ///   window-sized exit_tags).
3304    /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3305    ///   → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3306    /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3307    ///   exit_tags = `ct.exit_tags`).
3308    pub fn hot_exit_iter(
3309        &self,
3310        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3311    ) -> Vec<crate::jit::trace::HotExitInfo> {
3312        use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3313        fn walk(
3314            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3315            out: &mut Vec<HotExitInfo>,
3316        ) {
3317            for ct in proto.traces.borrow().iter() {
3318                let inline_n = ct.per_exit_inline.len();
3319                let tags_n = ct.per_exit_tags.len();
3320                debug_assert_eq!(
3321                    ct.exit_hit_counts.len(),
3322                    inline_n + tags_n + 1,
3323                    "exit_hit_counts layout invariant violated"
3324                );
3325                for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3326                    let hits = cell.get();
3327                    if hits < HOTEXIT_THRESHOLD {
3328                        continue;
3329                    }
3330                    let (cont_pc, exit_tags) = if idx < inline_n {
3331                        let ent = &ct.per_exit_inline[idx];
3332                        (ent.cont_pc, ent.exit_tags.clone())
3333                    } else if idx < inline_n + tags_n {
3334                        let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3335                        (*pc, tags.clone())
3336                    } else {
3337                        (ct.head_pc, ct.exit_tags.clone())
3338                    };
3339                    out.push(HotExitInfo {
3340                        head_proto: proto,
3341                        head_pc: ct.head_pc,
3342                        exit_idx: idx,
3343                        hits,
3344                        cont_pc,
3345                        exit_tags,
3346                    });
3347                }
3348            }
3349            for inner in proto.protos.iter() {
3350                walk(*inner, out);
3351            }
3352        }
3353        let mut out: Vec<HotExitInfo> = Vec::new();
3354        walk(cl.proto, &mut out);
3355        out
3356    }
3357
3358    /// P12-S5-B — sum of NewTable sites that actually took the
3359    /// sunk-emit path across every successfully compiled trace on
3360    /// this Vm. Each counted site skips its heap `Gc<Table>`
3361    /// allocation per dispatch; the array part lives as Cranelift
3362    /// `Variable`s for the duration of the trace.
3363    pub fn trace_sunk_alloc_count(&self) -> u64 {
3364        self.jit.counters.sunk_alloc
3365    }
3366
3367    /// P12-S5-C — sum of materialise-helper emit sites across every
3368    /// successfully compiled trace on this Vm. Each unit is a
3369    /// (site × cmp side-exit) pair whose IR reconstructs a heap
3370    /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3371    /// emit is wiring materialise into the right side-exits.
3372    pub fn trace_materialize_emit_count(&self) -> u64 {
3373        self.jit.counters.materialize_emit
3374    }
3375
3376    /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3377    /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3378    /// replaces a `Heap::new_closure_inline` call on the dispatch
3379    /// path; the count is static (one per matching op per compiled
3380    /// trace), summed at compile success.
3381    pub fn trace_closure_emit_count(&self) -> u64 {
3382        self.jit.counters.closure_emit
3383    }
3384
3385    /// v2.0 Stage 7 polish 6 fire experiment — see
3386    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3387    /// Number of compiled traces whose `per_exit_inline.len() > 0`
3388    /// (depth>0 inlined cmp side-exits emitted).
3389    pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3390        self.jit.counters.per_exit_inline_compiled
3391    }
3392
3393    /// v2.0 Stage 7 polish 6 fire experiment — see
3394    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3395    /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3396    /// `dispatchable == true` — i.e. the count of compiled traces
3397    /// that would actually exercise the AOT polish 6 chain-reloc +
3398    /// deploy-resolver path.
3399    pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3400        self.jit.counters.per_exit_inline_dispatchable
3401    }
3402
3403    /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3404    /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3405    /// whether a self-recursive function actually walked the depth
3406    /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3407    /// across traces and Vm activations; reset only on `Vm::new`.
3408    pub fn trace_max_depth_seen(&self) -> u8 {
3409        self.jit.max_depth_seen
3410    }
3411
3412    /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3413    /// dispatch time). The frame-materialization helper reads `.base`
3414    /// to compute offsets for each inlined frame's window.
3415    #[doc(hidden)]
3416    pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3417        match self.frames.last() {
3418            Some(CallFrame::Lua(f)) => Some(*f),
3419            _ => None,
3420        }
3421    }
3422
3423    /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3424    /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3425    /// by `luna-tools` (`luna-profile`'s sampler walks this from
3426    /// inside a `Count` hook). Sibling-module scope: not part of the
3427    /// public embedder surface, but `inspect::frames_for_profile` is.
3428    #[doc(hidden)]
3429    pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3430        &self.frames
3431    }
3432
3433    /// P12-S4-step4b — ensure the value stack covers indices
3434    /// `[0..need)`. Extends with Nil if shorter. Called by the
3435    /// frame-materialization helper before pushing an inlined frame
3436    /// whose register window may exceed the current stack length.
3437    #[doc(hidden)]
3438    pub fn jit_ensure_stack(&mut self, need: usize) {
3439        if self.stack.len() < need {
3440            self.stack.resize(need, Value::Nil);
3441        }
3442    }
3443
3444    /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3445    /// `__close` handlers would run (any active tbc slot ≥ from
3446    /// holding a non-nil/false Value); if so, parks a deopt sentinel
3447    /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3448    /// IR branches to the deopt block. Otherwise performs the safe
3449    /// part of close — `close_from(from)` to close open upvals +
3450    /// drop any drained tbc entries ≥ from — and returns 0.
3451    ///
3452    /// Returns are i64-shaped so the cranelift import sig stays
3453    /// trivial (i64 → i64 mapping).
3454    #[doc(hidden)]
3455    pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3456        if self.jit.pending_err.is_some() {
3457            return 1;
3458        }
3459        let Some(f) = self.jit_last_lua_frame() else {
3460            self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3461            return 1;
3462        };
3463        let from = f.base + start_offset;
3464        let has_handler = self.tbc.iter().any(|&s| {
3465            s >= from && {
3466                let v = self.stack[s as usize];
3467                !matches!(v, Value::Nil | Value::Bool(false))
3468            }
3469        });
3470        if has_handler {
3471            self.jit.pending_err =
3472                Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3473            return 1;
3474        }
3475        self.close_from(from);
3476        // Drain any tbc entries ≥ from (they're nil/false stubs the
3477        // interpreter's drive_close would have skipped silently).
3478        while let Some(&s) = self.tbc.last() {
3479            if s < from {
3480                break;
3481            }
3482            self.tbc.pop();
3483        }
3484        0
3485    }
3486
3487    /// P12-S7-B — spill the trace's current value for a register to
3488    /// the underlying `vm.stack[base + slot_offset]`. Required before
3489    /// an `Op::Closure` whose inner proto has an `in_stack: true`
3490    /// upval at `slot_offset` — the helper's `find_or_create_upval`
3491    /// captures a live pointer to `vm.stack[base + slot_offset]`,
3492    /// which must hold the right value at call time (trace IR's
3493    /// Variable hasn't yet been written back).
3494    ///
3495    /// Parameters arrive as i64 from the IR: `slot_offset` is the
3496    /// caller-frame register index (`u32` in practice, depth=0
3497    /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3498    /// `crate::runtime::value::raw` byte for the slot's RegKind;
3499    /// `raw_bits` is the trace Variable's `use_var` payload
3500    /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3501    /// raw `Gc::as_ptr` cast).
3502    #[doc(hidden)]
3503    pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3504        let Some(f) = self.jit_last_lua_frame() else {
3505            self.jit.pending_err =
3506                Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3507            return;
3508        };
3509        let idx = (f.base as usize) + (slot_offset as usize);
3510        if self.stack.len() <= idx {
3511            self.stack.resize(idx + 1, Value::Nil);
3512        }
3513        // SAFETY: caller (trace JIT IR emit) provides matching
3514        // `(tag, raw_bits)` — same shape produced by Value::unpack.
3515        let v = unsafe {
3516            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3517        };
3518        self.stack[idx] = v;
3519    }
3520
3521    /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3522    /// Mirrors the interp arm (this file ~L5316): copies the
3523    /// generator/state/control triple from `R[A..=A+2]` to
3524    /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3525    /// the iterator function via `begin_call`. v2 only handles
3526    /// `Value::Native` iterators (the canonical `ipairs_iter` /
3527    /// `next` builtins) — a Lua-closure iterator would push a Lua
3528    /// frame mid-trace, breaking `recording_frame_base`, so we
3529    /// deopt by parking a `pending_err` and returning `-1`.
3530    ///
3531    /// `slot_offset` is the caller-frame register index (=
3532    /// `inst.a()` decoded from a u32-wide field). `nvars` is
3533    /// `inst.c() as i32` — the caller's expected return count.
3534    /// P12-S12-C v1 — refresh only the raw payload of
3535    /// `vm.stack[base + slot_offset]`, preserving its existing
3536    /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3537    /// uses this when the slot's `RegKind` is `Unset` (no compile-
3538    /// time tag info; commonly `Str` slots which the trace doesn't
3539    /// model). The interp's previous execution of the same op
3540    /// already populated the slot with the right tag — the trace
3541    /// only needs to swap in its current raw value.
3542    #[doc(hidden)]
3543    pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3544        let Some(f) = self.jit_last_lua_frame() else {
3545            return;
3546        };
3547        let idx = (f.base as usize) + (slot_offset as usize);
3548        if idx >= self.stack.len() {
3549            return;
3550        }
3551        let (tag, _) = self.stack[idx].unpack();
3552        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3553        self.stack[idx] = unsafe {
3554            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3555        };
3556    }
3557
3558    /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3559    ///
3560    /// Mirrors the interp arm (this file ~L5112): `self.top =
3561    /// base + a + n; concat_run(base + a)`. Result lands at
3562    /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3563    /// deopt (any error from `concat_run` OR detection that the
3564    /// metamethod path was taken — `concat_run` returns `Ok(())`
3565    /// after `begin_meta_call` which has pushed a Lua frame the
3566    /// trace can't safely continue past).
3567    ///
3568    /// The frame-push detection uses `pre/post frames.len()` and
3569    /// unwinds any pushed frames before deopting, so the
3570    /// dispatcher's existing deopt path sees a clean stack.
3571    #[doc(hidden)]
3572    pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3573        if self.jit.pending_err.is_some() {
3574            return -1;
3575        }
3576        let Some(f) = self.jit_last_lua_frame() else {
3577            self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3578            return -1;
3579        };
3580        let abs_a = f.base + slot_offset;
3581        self.top = abs_a + n as u32;
3582        let pre_frames = self.frames.len();
3583        let result = self.concat_run(abs_a);
3584        let post_frames = self.frames.len();
3585        // Frame-push = metamethod path taken (begin_meta_call pushed
3586        // a Lua frame). The trace can't continue past it; unwind +
3587        // deopt so interp redoes Op::Concat in the slow path.
3588        while self.frames.len() > pre_frames {
3589            frames_pop_sync(&mut self.frames, &mut self.frames_top);
3590        }
3591        if let Err(e) = result {
3592            self.jit.pending_err = Some(e);
3593            return -1;
3594        }
3595        if post_frames > pre_frames {
3596            self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3597            return -1;
3598        }
3599        0
3600    }
3601
3602    /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3603    /// accumulator buffer pool, returning a raw pointer. The trace
3604    /// fn's IR holds this pointer in a stack slot through the loop
3605    /// and calls `jit_str_buf_extend` per iter. If the pool is
3606    /// empty, allocate fresh.
3607    ///
3608    /// Safety: the returned pointer is valid until
3609    /// `jit_str_buf_release` is called or the Vm is dropped. The
3610    /// caller MUST not retain it across `enter_jit` boundaries.
3611    #[doc(hidden)]
3612    pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3613        let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3614        // Move into a Box so the pointer is stable until release.
3615        Box::into_raw(Box::new(buf))
3616    }
3617
3618    /// P14-S14-B v2 — return a previously-acquired buffer to the
3619    /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3620    /// buffer is `clear`ed (capacity retained) so the next acquire
3621    /// gets a ready-to-extend Vec.
3622    ///
3623    /// Safety: `buf` must have been returned by a prior
3624    /// `jit_str_buf_acquire` on the same Vm.
3625    #[doc(hidden)]
3626    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3627    pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3628        if buf.is_null() {
3629            return;
3630        }
3631        // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3632        let mut owned = unsafe { Box::from_raw(buf) };
3633        owned.clear();
3634        if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3635            self.jit.str_buf_pool.push(*owned);
3636        }
3637        // Else: drop the buffer.
3638    }
3639
3640    /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3641    /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3642    /// the piece slot) and passes it through; we treat it as a
3643    /// `*mut LuaStr` and append its bytes.
3644    ///
3645    /// Returns 0 on success, -1 if the piece isn't a Str (would
3646    /// trip __concat metamethod path → deopt to interp).
3647    ///
3648    /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3649    /// trace's piece slot raw bits.
3650    #[doc(hidden)]
3651    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3652    pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3653        if buf.is_null() || str_ptr == 0 {
3654            return -1;
3655        }
3656        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3657        let buf = unsafe { &mut *buf };
3658        let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3659        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3660        let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3661        buf.extend_from_slice(bytes);
3662        0
3663    }
3664
3665    /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3666    /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3667    /// the trace to write into the accumulator slot.
3668    ///
3669    /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3670    /// (the v2 hard cap; the trace deopts).
3671    ///
3672    /// Safety: `buf` from prior `acquire`. The buffer is left
3673    /// CLEAR (drained) ready for `release`.
3674    #[doc(hidden)]
3675    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3676    pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3677        if buf.is_null() {
3678            return 0;
3679        }
3680        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3681        let buf = unsafe { &mut *buf };
3682        let bytes = std::mem::take(buf);
3683        // v2 hard cap at 256KB per RFC Q3.
3684        if bytes.len() > 256 * 1024 {
3685            return 0;
3686        }
3687        let gc = self.heap.intern(&bytes);
3688        gc.as_ptr() as i64
3689    }
3690
3691    /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3692    ///
3693    /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3694    /// v3: ipairs `inext` fast path at the top — skip begin_call
3695    ///     when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3696    ///     R[A+2]=Int.
3697    /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3698    ///     caller-provided buffers + return R[A+4]'s tag byte. Lets
3699    ///     emit skip 3 separate `luna_jit_stack_load` calls and 1
3700    ///     `luna_jit_stack_tag` call by reading the buffer via
3701    ///     cranelift `stack_load` IR instead. Returns -1 on deopt.
3702    #[doc(hidden)]
3703    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3704    pub fn jit_op_tforcall(
3705        &mut self,
3706        slot_offset: u32,
3707        nvars: i32,
3708        ctrl_out: *mut i64,
3709        key_out: *mut i64,
3710        val_out: *mut i64,
3711    ) -> i64 {
3712        if self.jit.pending_err.is_some() {
3713            return -1;
3714        }
3715        let Some(f) = self.jit_last_lua_frame() else {
3716            self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3717            return -1;
3718        };
3719        let abs = f.base + slot_offset;
3720        let need = (abs + 7) as usize;
3721        if self.stack.len() < need {
3722            self.stack.resize(need, Value::Nil);
3723        }
3724        // v3 fast path.
3725        let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3726            && std::ptr::fn_addr_eq(
3727                n.f,
3728                crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3729            )
3730            && let Value::Table(t) = self.stack[(abs + 1) as usize]
3731            && t.metatable().is_none()
3732            && let Value::Int(i) = self.stack[(abs + 2) as usize]
3733        {
3734            let next_i = i.wrapping_add(1);
3735            let v = t.get_int(next_i);
3736            if v.is_nil() {
3737                self.stack[(abs + 4) as usize] = Value::Nil;
3738            } else {
3739                self.stack[(abs + 4) as usize] = Value::Int(next_i);
3740                if (nvars as usize) >= 2 {
3741                    self.stack[(abs + 5) as usize] = v;
3742                }
3743                for j in 2..nvars as usize {
3744                    let slot = abs + 4 + j as u32;
3745                    if (slot as usize) < self.stack.len() {
3746                        self.stack[slot as usize] = Value::Nil;
3747                    }
3748                }
3749            }
3750            true
3751        } else {
3752            false
3753        };
3754        if !took_fast_path {
3755            // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3756            // route through begin_call. Lua-closure iters would push
3757            // a Lua frame mid-trace → deopt.
3758            self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3759            self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3760            self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3761            if !matches!(self.stack[abs as usize], Value::Native(_)) {
3762                self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3763                return -1;
3764            }
3765            if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3766                self.jit.pending_err = Some(e);
3767                return -1;
3768            }
3769        }
3770        // v4 batched writeback — fill the caller's buffers with the
3771        // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3772        // reload via cranelift `stack_load` instead of separate
3773        // `luna_jit_stack_load` helper calls.
3774        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3775        let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3776        let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3777        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3778        let key_raw = unsafe { key_rv.zero };
3779        let val_raw = if (nvars as usize) >= 2 {
3780            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3781            unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3782        } else {
3783            0u64
3784        };
3785        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3786        unsafe {
3787            ctrl_out.write(ctrl_raw as i64);
3788            key_out.write(key_raw as i64);
3789            val_out.write(val_raw as i64);
3790        }
3791        key_tag as i64
3792    }
3793
3794    /// P12-S12-B-v2 — load the raw `i64` payload of
3795    /// `vm.stack[base + slot_offset]` for the active trace's head
3796    /// Lua frame. Used to reload trace IR `Variable`s after a
3797    /// helper has written to `vm.stack` directly (e.g. TForCall's
3798    /// iter results land at `R[A+4..A+4+nvars]`).
3799    #[doc(hidden)]
3800    pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3801        let Some(f) = self.jit_last_lua_frame() else {
3802            return 0;
3803        };
3804        let idx = (f.base as usize) + (slot_offset as usize);
3805        if idx >= self.stack.len() {
3806            return 0;
3807        }
3808        let v = self.stack[idx];
3809        let (_, raw) = v.unpack();
3810        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3811        unsafe { raw.zero as i64 }
3812    }
3813
3814    /// P12-S12-B-v2 — read the tag byte of
3815    /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3816    /// to dispatch on the iterator's return-key tag at runtime
3817    /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3818    /// deopt for v2).
3819    #[doc(hidden)]
3820    pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3821        let Some(f) = self.jit_last_lua_frame() else {
3822            return crate::runtime::value::raw::NIL;
3823        };
3824        let idx = (f.base as usize) + (slot_offset as usize);
3825        if idx >= self.stack.len() {
3826            return crate::runtime::value::raw::NIL;
3827        }
3828        self.stack[idx].unpack().0
3829    }
3830
3831    /// P12-S4-step4b — push a Lua frame onto the call stack with
3832    /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3833    /// at trace side-exits to recreate the inlined call activations
3834    /// the lowerer compiled past. The contract (enforced by the
3835    /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3836    /// `nresults` is the caller's expected count (today always 1
3837    /// because the lowerer bails Op::Call C != 2), and the caller
3838    /// has already called `jit_ensure_stack` to cover
3839    /// `[0..base + cl.proto.max_stack)`.
3840    #[doc(hidden)]
3841    pub fn jit_push_inlined_frame(
3842        &mut self,
3843        cl: Gc<LuaClosure>,
3844        base: u32,
3845        pc: u32,
3846        nresults: i32,
3847    ) {
3848        frames_push_sync(
3849            &mut self.frames,
3850            &mut self.frames_top,
3851            CallFrame::Lua(Frame {
3852                closure: cl,
3853                base,
3854                pc,
3855                // Lua call ABI: callee R[0] sits at caller R[A+1], so
3856                // callee.base = caller.base + A + 1; func_slot is
3857                // caller.base + A = callee.base - 1.
3858                func_slot: base - 1,
3859                n_varargs: 0,
3860                nresults,
3861                hook_oldpc: u32::MAX,
3862                from_c: false,
3863                tm: None,
3864                is_hook: false,
3865                tailcalls: 0,
3866            }),
3867        );
3868    }
3869
3870    /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3871    /// should set to `false` so `load`/`loadstring` reject bytecode input
3872    /// (which bypasses parser limits and could exploit verifier gaps).
3873    pub fn set_bytecode_loading(&mut self, enabled: bool) {
3874        self.bytecode_loading = enabled;
3875    }
3876
3877    /// Current bytecode-loading gate state.
3878    pub fn bytecode_loading(&self) -> bool {
3879        self.bytecode_loading
3880    }
3881
3882    /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3883    /// bytecode is a strictly larger trust surface than luna's own dump
3884    /// format (third-party toolchain bugs, malformed chunks, unknown
3885    /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3886    /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3887    pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3888        self.puc_bytecode_loading = enabled;
3889    }
3890
3891    /// Current PUC bytecode-loading gate state.
3892    pub fn puc_bytecode_loading(&self) -> bool {
3893        self.puc_bytecode_loading
3894    }
3895
3896    /// Default loader input budget — 256 MiB.
3897    ///
3898    /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3899    /// sources whose byte length crosses this cap, returning the
3900    /// PUC-shaped `not enough memory` error rather than letting the
3901    /// host allocator try (and crash) to hold the next chunk.
3902    pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3903
3904    /// Set the loader input byte budget (see
3905    /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3906    /// effectively disable. Smaller caps are honored verbatim — a 0
3907    /// cap rejects every non-empty source.
3908    pub fn set_loader_input_budget(&mut self, bytes: usize) {
3909        self.loader_input_budget = bytes;
3910    }
3911
3912    /// Current loader input byte budget.
3913    pub fn loader_input_budget(&self) -> usize {
3914        self.loader_input_budget
3915    }
3916
3917    /// Take the error traceback captured at the latest error point and
3918    /// reset it. Embedders should call this immediately after a failed
3919    /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3920    /// entry clears it. Returns `None` if no error was in flight.
3921    pub fn take_error_traceback(&mut self) -> Option<String> {
3922        self.error_traceback
3923            .take()
3924            .map(|b| String::from_utf8_lossy(&b).into_owned())
3925    }
3926
3927    /// Arm the soft memory cap (P09 embedding). The run loop checks the
3928    /// heap's tracked byte usage between dispatch turns; on overshoot it
3929    /// first runs a full collect, and if `bytes` still exceeds the cap it
3930    /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3931    /// itself (fire-once: re-arm before the next `call_value` if reusing
3932    /// the Vm across requests). `None` removes the cap. The accounting is
3933    /// approximate — internal Vec/Box capacity overhead is not tracked,
3934    /// so embedders should size the cap with ~2× margin over the desired
3935    /// hard limit and additionally bound the Vm's lifetime (drop after
3936    /// each request).
3937    pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3938        self.heap.mem_cap = cap;
3939    }
3940
3941    /// Approximate bytes the heap is currently holding. Object shells plus
3942    /// every table's internal array/hash boxes (tracked via
3943    /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3944    /// bytecode and closure upvalue slices still go uncounted — this is a
3945    /// lower bound, not a precise `malloc_stats`-style total.
3946    pub fn memory_used(&self) -> usize {
3947        self.heap.bytes()
3948    }
3949
3950    /// Read upvalue slot `i` of the native function currently on top of the
3951    /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3952    /// when no native is running. Public so the C ABI trampoline can fetch
3953    /// the host C function pointer it stashed there at registration time.
3954    pub fn running_native_upvalue(&self, i: usize) -> Value {
3955        match self.running_natives.last() {
3956            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3957            Some(nc) => unsafe {
3958                let upvals = &(*nc.as_ptr()).upvals;
3959                upvals.get(i).copied().unwrap_or(Value::Nil)
3960            },
3961            None => Value::Nil,
3962        }
3963    }
3964
3965    /// Register a table for finalization if its (just-set) metatable carries a
3966    /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3967    /// `__gc` to the metatable afterwards does not retroactively register).
3968    pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3969        if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3970            self.heap.register_finalizable(t);
3971        }
3972    }
3973
3974    /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3975    /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3976    /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3977    /// metatable that `newproxy` returned, which then needs to flow through.
3978    /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3979    /// would re-check at metatable-set time); luna's only userdata
3980    /// finalizables today come via `newproxy`, which registers itself.
3981    #[allow(dead_code)]
3982    pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3983        if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3984            self.heap.register_finalizable_userdata(u);
3985        }
3986    }
3987
3988    /// Run pending `__gc` finalizers (objects the collector resurrected for
3989    /// finalization). Finalizer errors are swallowed — PUC turns them into a
3990    /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3991    fn run_finalizers(&mut self) {
3992        let _ = self.run_finalizers_or_err();
3993    }
3994
3995    fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3996        if self.gc_finalizing {
3997            return Ok(());
3998        }
3999        let pending = self.heap.take_tobefnz();
4000        if pending.is_empty() {
4001            return Ok(());
4002        }
4003        self.gc_finalizing = true;
4004        let mut first_err: Option<LuaError> = None;
4005        for obj in pending {
4006            let gc = self.get_mm(obj, Mm::Gc);
4007            // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
4008            // schedule the object for finalization (`__gc = true` is the
4009            // canonical placeholder); only call it at finalize time when it
4010            // is actually a function. gc.lua 5.2 :412 wires up exactly this
4011            // sentinel and then expects no call.
4012            let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
4013            if callable {
4014                // PUC `GCTM` sets `CIST_FIN` on the new ci so
4015                // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
4016                // `name = "__gc"`. luna threads the same outcome through the
4017                // generic `pending_tm` slot: the Lua frame born from this
4018                // call consumes it in `push_frame`. Saved/restored around the
4019                // call in case the handler is a native (which never pops it).
4020                // Bare event name; `frame_name` / `c_frame_name` add the
4021                // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
4022                // the convention used by `__close`, `__index`, …
4023                let saved_tm = self.pending_tm.replace("gc");
4024                // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
4025                // pcall, so `getinfo(2).namewhat` inside the finalizer reads
4026                // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
4027                // luna mirrors by temporarily tagging the current top Lua
4028                // frame's `tm` to "__gc" for the duration of the call.
4029                let caller_tm_idx = self
4030                    .frames
4031                    .iter()
4032                    .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
4033                let saved_caller_tm = caller_tm_idx.and_then(|i| {
4034                    if let CallFrame::Lua(fr) = &mut self.frames[i] {
4035                        let prev = fr.tm;
4036                        fr.tm = Some("gc");
4037                        Some(prev)
4038                    } else {
4039                        None
4040                    }
4041                });
4042                if let Err(e) = self.call_value(gc, &[obj]) {
4043                    // PUC 5.1 GCTM raised the finalizer's error to the
4044                    // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
4045                    // baselines on `not pcall(collectgarbage)`). 5.2/5.3
4046                    // wrapped it in `error in __gc metamethod (msg)` first
4047                    // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
4048                    // introduced the warning system and switched to "warn
4049                    // then continue" — never re-raise, just route the
4050                    // wrapped message through `warn`. gc.lua 5.5 :378 wires
4051                    // up `_WARN` capture under the `if T then …` block to
4052                    // baseline on the same wrapped string.
4053                    if self.version >= LuaVersion::Lua54 {
4054                        let inner = self.error_text(&e);
4055                        let msg = format!("error in __gc metamethod ({inner})");
4056                        self.emit_warn(msg.as_bytes(), false);
4057                    } else if first_err.is_none() {
4058                        let wrapped = if self.version >= LuaVersion::Lua52 {
4059                            let inner = self.error_text(&e);
4060                            let msg = format!("error in __gc metamethod ({inner})");
4061                            let s = Value::Str(self.heap.intern(msg.as_bytes()));
4062                            LuaError(s)
4063                        } else {
4064                            e
4065                        };
4066                        first_err = Some(wrapped);
4067                    }
4068                }
4069                self.pending_tm = saved_tm;
4070                if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
4071                    && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
4072                {
4073                    fr.tm = prev; // prev is Option<&'static str>; restore exactly
4074                }
4075            }
4076        }
4077        self.gc_finalizing = false;
4078        match first_err {
4079            Some(e) => Err(e),
4080            None => Ok(()),
4081        }
4082    }
4083
4084    /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
4085    /// Crosses up to three phases per call:
4086    ///   1. Pause      → seed Propagate (`gc_start_propagate`)
4087    ///   2. Propagate  → drain gray up to `budget`; on exhaustion run atomic
4088    ///                   (`gc_finish_atomic` → tobefnz populated; finalizers
4089    ///                   run via `run_finalizers`) and enter Sweep
4090    ///   3. Sweep      → `gc_sweep_step` up to (residual) `budget`
4091    /// Returns true when this call completed the cycle's sweep (back to
4092    /// Pause). The budget is spent generously across phases — a large `n`
4093    /// can finish a whole cycle in one call (PUC stop-the-world step).
4094    pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
4095        // Re-entry guard: never recurse — `run_finalizers` calls Lua code
4096        // that may hit a safe point and try to step again. Re-entry was OK
4097        // under STW (collect_garbage had its own guard) but here the
4098        // intermediate phase state would corrupt.
4099        if self.gc_finalizing {
4100            return false;
4101        }
4102        if self.heap.gc_phase_is_pause() {
4103            let (roots, extra) = self.gc_roots();
4104            self.heap.gc_start_propagate(&roots, &extra);
4105        }
4106        if self.heap.gc_phase_is_propagate() {
4107            if !self.heap.gc_step_propagate(budget) {
4108                return false;
4109            }
4110            self.heap.gc_finish_atomic();
4111            // any __gc scheduled by atomic — run before sweep so a finalizer
4112            // re-registering `self` re-enters the next cycle, not this sweep
4113            self.run_finalizers();
4114        }
4115        // either we just transitioned, or we entered already in Sweep, or
4116        // a finalizer started a new cycle (gc_sweep_step is a no-op then)
4117        self.heap.gc_sweep_step(budget)
4118    }
4119
4120    // ---- frames & calls ----
4121
4122    /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
4123    /// Returns true if a Lua frame was pushed (the dispatch loop continues
4124    /// there), false if a native completed inline.
4125    fn begin_call(
4126        &mut self,
4127        func_slot: u32,
4128        nargs: Option<u32>,
4129        nresults: i32,
4130        from_c: bool,
4131    ) -> Result<bool, LuaError> {
4132        let mut nargs = match nargs {
4133            Some(n) => n,
4134            None => self.top - (func_slot + 1),
4135        };
4136        // Consume `pending_is_tail` at the boundary: a tail-call op sets it
4137        // only for the immediately-following Lua activation. Native dispatch
4138        // (or `__call` resolution) below must not let it leak to the next
4139        // begin_call's frame; restore it just before push_frame for the Lua
4140        // arm so its meaning is preserved across __call chaining.
4141        let tailcalls = std::mem::take(&mut self.pending_tailcalls);
4142        // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
4143        // is inserted before the value so it becomes the first argument, and a
4144        // chain of `__call` tables resolves down to a real function.
4145        let mut chain = 0u32;
4146        loop {
4147            match self.stack[func_slot as usize] {
4148                Value::Closure(cl) => {
4149                    // P11-S2c.B JIT fast path: if the Proto's body fits
4150                    // the int-arith whitelist, every arg is `Value::Int`,
4151                    // and the cached arity matches, skip frame setup and
4152                    // run the cached native fn in-place.
4153                    if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
4154                        self.pending_tailcalls = tailcalls;
4155                        return Ok(false);
4156                    }
4157                    self.pending_tailcalls = tailcalls;
4158                    self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4159                    // P12-S4-step0 — trace-on-call trigger. The frame
4160                    // we just pushed is the callee whose body the
4161                    // recorder will trace. Bump the per-Proto call
4162                    // counter; once it crosses `CALL_HOT_THRESHOLD`
4163                    // and no other trace is in flight, snapshot the
4164                    // callee's register window (R[0..max_stack]) and
4165                    // begin recording at `pc=0`. This is what unlocks
4166                    // tracing for functions whose body has no negative
4167                    // `Op::Jmp` back-edge (`fib`, recursive helpers).
4168                    //
4169                    // Gated on `trace_jit_enabled`, so the default
4170                    // dispatch pays a single not-taken branch.
4171                    if self.jit.trace_enabled {
4172                        let proto = cl.proto;
4173                        let c = proto.call_hot_count.get();
4174                        if c < u32::MAX / 2 {
4175                            proto.call_hot_count.set(c + 1);
4176                        }
4177                        // P13-S13-H — relaxed call-trigger:
4178                        // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4179                        // `!already_cached` short-circuit. Lets a
4180                        // discarded short call-trigger close retry
4181                        // on the next call (fib(10/15/20/25)
4182                        // pathology — first capture is base-case
4183                        // [Lt,Jmp,Return1]; coverage-heuristic
4184                        // discards; next call gets to record at a
4185                        // potentially deeper recursion point).
4186                        // Without `already_cached`, the relaxed
4187                        // condition would re-record over a cached
4188                        // trace every call.
4189                        //
4190                        // P13-S13-K — additionally short-circuit on
4191                        // `proto.trace_gave_up`. The S13-I discard
4192                        // cap force-compiles a partial trace and
4193                        // flips this flag; subsequent calls into
4194                        // this Proto skip the RefCell borrow + Vec
4195                        // scan entirely.
4196                        if proto.trace_gave_up.get() {
4197                            return Ok(true);
4198                        }
4199                        let call_already_cached =
4200                            proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4201                        if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4202                            && self.jit.active_trace.is_none()
4203                            && !call_already_cached
4204                        {
4205                            // The new frame is on top: index in
4206                            // `self.frames` is `len() - 1`.
4207                            let frame_idx = self.frames.len() - 1;
4208                            // Snapshot R[0..max_stack] at the callee's
4209                            // base. `push_frame` resized `self.stack`
4210                            // to `base + max_stack`, so this window is
4211                            // guaranteed in-bounds.
4212                            let f = match &self.frames[frame_idx] {
4213                                CallFrame::Lua(f) => f,
4214                                _ => unreachable!("push_frame just pushed a Lua frame"),
4215                            };
4216                            let max_stack = cl.proto.max_stack as usize;
4217                            let base_us = f.base as usize;
4218                            let mut entry_tags = Vec::with_capacity(max_stack);
4219                            for i in 0..max_stack {
4220                                let (tag, _) = self.stack[base_us + i].unpack();
4221                                entry_tags.push(tag);
4222                            }
4223                            self.jit.active_trace =
4224                                Some(Box::new(crate::jit::trace::TraceRecord::start(
4225                                    cl.proto, 0, entry_tags, true,
4226                                )));
4227                            self.jit.recording_frame_base = frame_idx;
4228                        }
4229                    }
4230                    return Ok(true);
4231                }
4232                Value::Native(nc) => {
4233                    // v1.1 B10 Stage 2 — async-marked NativeClosure.
4234                    // Route through the cooperative-yield mechanism
4235                    // when async_mode is on; reject when called from
4236                    // a sync `eval`/`call_value` path (would have no
4237                    // executor to drive the returned future).
4238                    if nc.is_async {
4239                        if !self.async_mode {
4240                            let s = Value::Str(
4241                                self.heap.intern(b"async native called in sync context"),
4242                            );
4243                            self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4244                            return Err(LuaError(s));
4245                        }
4246                        // Same root-up bookkeeping as the sync path:
4247                        // pin args + result-count expectation so a
4248                        // collection across the suspend boundary
4249                        // keeps the arg window live.
4250                        self.native_nresults = nresults;
4251                        self.gc_top = func_slot + nargs + 1;
4252                        // v1.3 Phase AS — fire the "call" hook BEFORE
4253                        // building the future. Mirrors the sync native
4254                        // path's `hook_call(true, nargs)` site
4255                        // (`exec.rs` further down) so embedders with a
4256                        // Rust debug hook installed see a Call event
4257                        // for async natives identical to the sync
4258                        // path. The matching "return" hook fires from
4259                        // `commit_async_native_result` in
4260                        // `async_drive.rs` after the future resolves.
4261                        // Placement follows audit §"Open questions"
4262                        // Q6: after the `native_nresults` / `gc_top`
4263                        // pin, before the future is constructed, so a
4264                        // hook body that triggers GC observes the
4265                        // correct pinned window. On hook error the
4266                        // sentinel never returns and
4267                        // `pending_async_native_*` remain `None` —
4268                        // the executor sees `DispatchOutcome::Error`
4269                        // (audit §A.1 edge cases).
4270                        self.hook_call(true, nargs)?;
4271                        // Transmute the stored NativeFn back to its
4272                        // real AsyncNativeFn shape. Sound because
4273                        // `set_async_native` / `create_async_native`
4274                        // installed an AsyncNativeFn through the
4275                        // identically-sized fn-pointer slot, and the
4276                        // `is_async` marker bit is what records that
4277                        // fact.
4278                        let async_fn: crate::vm::async_drive::AsyncNativeFn =
4279                            // SAFETY: same-size fn pointers; provenance
4280                            // preserved through `mem::transmute`. The
4281                            // `is_async` marker is the only safe-to-call
4282                            // gate, set exclusively by
4283                            // `Vm::create_async_native`.
4284                            unsafe { std::mem::transmute(nc.f) };
4285                        let vm_ptr: *mut Vm = self;
4286                        let fut = async_fn(vm_ptr, func_slot, nargs);
4287                        // Stash the future + post-call context for
4288                        // `drive_one` to surface to `EvalFuture::poll`.
4289                        self.pending_async_native_fut = Some(fut);
4290                        self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4291                            func_slot,
4292                            nargs,
4293                            nresults,
4294                            gc_top: self.gc_top,
4295                        });
4296                        // Sentinel Err walked up to `drive_one` (same
4297                        // shape as `host_yield_pending`'s budget yield).
4298                        // Value::Nil — never seen by user code.
4299                        return Err(LuaError(Value::Nil));
4300                    }
4301                    // pcall/xpcall are yieldable: rather than calling the
4302                    // protected function through the Rust stack (which cannot be
4303                    // suspended), push a continuation frame and drive the call
4304                    // through the interpreter loop (PUC lua_pcallk). A yield
4305                    // inside it is preserved with the thread's saved frames.
4306                    use crate::runtime::value::NativeFn;
4307                    if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4308                        return self.begin_pcall(func_slot, nargs, nresults);
4309                    }
4310                    if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4311                        return self.begin_xpcall(func_slot, nargs, nresults);
4312                    }
4313                    // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4314                    // luaB_pairs); without one, fall through to the plain native.
4315                    if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4316                        let arg = self.stack[(func_slot + 1) as usize];
4317                        if !self.get_mm(arg, Mm::Pairs).is_nil() {
4318                            return self.begin_pairs(func_slot, nresults);
4319                        }
4320                    }
4321                    // a native that collects (e.g. `collectgarbage`) roots up to
4322                    // its own arguments — the caller's live registers all sit
4323                    // below `func_slot` and stay rooted.
4324                    self.native_nresults = nresults;
4325                    self.gc_top = func_slot + nargs + 1;
4326                    // Push the native onto the running-natives chain BEFORE
4327                    // firing the call hook so that `debug.getinfo(level)` and
4328                    // `arg_error` from inside the hook see this native as the
4329                    // currently-running C function (db.lua :344 reads
4330                    // `getinfo(2, "f").func` for the just-entered callee).
4331                    // Popped after the matching return hook fires — even on
4332                    // error, the pop must happen, so the body is bracketed
4333                    // through a scope guard.
4334                    self.running_natives.push(nc);
4335                    self.running_native_slots.push((func_slot, nargs));
4336                    // PUC C-call discipline: entering a C function sets
4337                    // L->top to func + 1 + nargs, so a collect triggered
4338                    // INSIDE the native (explicit `collectgarbage()`, or
4339                    // an allocation crossing the GC threshold) roots the
4340                    // whole caller window up to and including the
4341                    // arguments. Without this raise the cursor is stale —
4342                    // parked at some earlier, possibly much lower
4343                    // safe-point — and the collect frees register-held
4344                    // values of the native's own caller (UAF-C, v2.13
4345                    // Track WUC). Never lower it: a re-entrant chain
4346                    // (native → Lua → native) must keep the outermost
4347                    // window rooted.
4348                    self.gc_top = self.gc_top.max(func_slot + 1 + nargs);
4349                    // PUC luaD_precall fires the "call" hook for C functions too.
4350                    // A yield inside the native (coroutine.yield) propagates an
4351                    // Err and the matching "return" hook fires on resume instead.
4352                    if let Err(e) = self.hook_call(true, nargs) {
4353                        self.running_natives.pop();
4354                        self.running_native_slots.pop();
4355                        return Err(e);
4356                    }
4357                    // P09: trap a Rust panic in the native and surface it as
4358                    // a Lua error rather than letting it unwind through the
4359                    // VM into the embedder. The VM's internal state may still
4360                    // be inconsistent after a panic (half-pushed args,
4361                    // dangling GC references), so embedders that catch this
4362                    // class of error should drop and re-create the Vm — but
4363                    // it's still better than tearing the host process down.
4364                    // `AssertUnwindSafe` is sound because the caller is the
4365                    // dispatch loop and any half-done state is fenced behind
4366                    // the immediate Err return below.
4367                    use std::panic::{AssertUnwindSafe, catch_unwind};
4368                    let result =
4369                        match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4370                            Ok(r) => r,
4371                            Err(payload) => {
4372                                let msg = panic_payload_str(&payload);
4373                                let s = Value::Str(
4374                                    self.heap.intern(format!("native panic: {msg}").as_bytes()),
4375                                );
4376                                Err(LuaError(s))
4377                            }
4378                        };
4379                    let nret = match result {
4380                        Ok(n) => n,
4381                        Err(e) => {
4382                            // Stash the offending native's name BEFORE the
4383                            // pop so a dying coroutine's traceback snapshot
4384                            // can prepend `[C]: in function '<name>'`. Use
4385                            // pushglobalfuncname (PUC walks package.loaded
4386                            // to qualify); fall back to "?".
4387                            self.errored_native =
4388                                Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4389                            self.running_natives.pop();
4390                            self.running_native_slots.pop();
4391                            return Err(e);
4392                        }
4393                    };
4394                    // PUC `luaD_poscall` fires the return hook BEFORE moving
4395                    // results into the function's slot — at that point args
4396                    // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4397                    // results above them at `[func_slot + 1 + nargs, …)`.
4398                    // luna's `nat_return` has already written the results
4399                    // into `[func_slot, func_slot + nret)`, so we replay PUC's
4400                    // layout by copying the results up past the preserved
4401                    // args, firing the hook (with ftransfer = nargs + 1, so
4402                    // `getlocal(2, ftransfer..)` reads results), and then
4403                    // copying back for `finish_results`. db.lua :541 reads
4404                    // `getinfo("r").ftransfer` + `getlocal` to inspect a
4405                    // returning native's results this way.
4406                    if self.hook.ret
4407                        && !self.in_hook
4408                        && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4409                    {
4410                        let res_dst = func_slot + nargs + 1;
4411                        let need = (res_dst + nret) as usize;
4412                        if self.stack.len() < need {
4413                            self.stack.resize(need, Value::Nil);
4414                        }
4415                        for i in (0..nret).rev() {
4416                            self.stack[(res_dst + i) as usize] =
4417                                self.stack[(func_slot + i) as usize];
4418                        }
4419                        // widen the C-frame's argument window for getlocal
4420                        if let Some(slot) = self.running_native_slots.last_mut() {
4421                            slot.1 = nargs + nret;
4422                        }
4423                        let hr = self.hook_return(true, nargs + 1, nret);
4424                        if let Some(slot) = self.running_native_slots.last_mut() {
4425                            slot.1 = nargs;
4426                        }
4427                        // restore results into the slot finish_results expects
4428                        for i in 0..nret {
4429                            self.stack[(func_slot + i) as usize] =
4430                                self.stack[(res_dst + i) as usize];
4431                        }
4432                        self.running_natives.pop();
4433                        self.running_native_slots.pop();
4434                        hr?;
4435                    } else {
4436                        self.running_natives.pop();
4437                        self.running_native_slots.pop();
4438                    }
4439                    self.finish_results(func_slot, nret, nresults);
4440                    // the native may have allocated; collect with the results as
4441                    // the live boundary (PUC checks GC after a call returns).
4442                    self.maybe_collect_garbage(self.top);
4443                    return Ok(false);
4444                }
4445                v => {
4446                    let mm = self.get_mm(v, Mm::Call);
4447                    if mm.is_nil() {
4448                        return Err(self.call_err(v));
4449                    }
4450                    chain += 1;
4451                    // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4452                    // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4453                    // and the 5.5 test exercises the new tight bound directly
4454                    // (calls.lua :225 builds a 16-deep chain and expects the
4455                    // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4456                    // deep chain and expects it to succeed.
4457                    let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4458                        15
4459                    } else {
4460                        MAX_CCMT
4461                    };
4462                    if chain > cap {
4463                        return Err(self.rt_err("'__call' chain too long"));
4464                    }
4465                    // slots above shift by one; at a call site those are dead
4466                    // temps of the current frame
4467                    self.stack.insert(func_slot as usize, mm);
4468                    if self.top > func_slot {
4469                        self.top += 1;
4470                    }
4471                    nargs += 1;
4472                }
4473            }
4474        }
4475    }
4476
4477    fn push_frame(
4478        &mut self,
4479        cl: Gc<LuaClosure>,
4480        func_slot: u32,
4481        nargs: u32,
4482        nresults: i32,
4483        from_c: bool,
4484    ) -> Result<(), LuaError> {
4485        if func_slot + 256 > MAX_LUA_STACK {
4486            // PUC `stackerror`: a stack overflow that surfaces while the
4487            // current activation is inside an xpcall message handler is
4488            // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4489            // error handling". errors.lua :606 expects the inner pcall(loop)
4490            // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4491            // message matching "error handling".
4492            let msg = if self.msgh_depth > 0 {
4493                "error in error handling"
4494            } else {
4495                "stack overflow"
4496            };
4497            return Err(self.rt_err(msg));
4498        }
4499        let proto = cl.proto;
4500        let nparams = proto.num_params as u32;
4501        // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4502        // stack just below the new `base`, so a named vararg can be indexed
4503        // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4504        // `[e1..em][p1..pn]` so the fixed params land at the new base.
4505        let n_varargs = if proto.is_vararg {
4506            nargs.saturating_sub(nparams)
4507        } else {
4508            0
4509        };
4510        if n_varargs > 0 {
4511            let s = (func_slot + 1) as usize;
4512            self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4513        }
4514        let base = func_slot + 1 + n_varargs;
4515        let need = (base + proto.max_stack as u32) as usize;
4516        if self.stack.len() < need {
4517            self.stack.resize(need, Value::Nil);
4518        }
4519        // wipe the register window beyond the kept parameters (stale values —
4520        // required for GC-safety and codegen). The varargs below `base` survive.
4521        let kept = nargs.saturating_sub(n_varargs).min(nparams);
4522        // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4523        // need` since `base + nparams <= base + max_stack = need` and `kept <=
4524        // nparams`. `slice::fill` lowers to a single memset on Copy types.
4525        unsafe {
4526            self.stack
4527                .get_unchecked_mut((base + kept) as usize..need)
4528                .fill(Value::Nil);
4529        }
4530        frames_push_sync(
4531            &mut self.frames,
4532            &mut self.frames_top,
4533            CallFrame::Lua(Frame {
4534                closure: cl,
4535                base,
4536                pc: 0,
4537                func_slot,
4538                nresults,
4539                hook_oldpc: u32::MAX,
4540                from_c,
4541                n_varargs,
4542                // single-shot consume: `close_slots` sets pending_tm before each
4543                // handler call; the next Lua frame born is that handler's.
4544                tm: self.pending_tm.take(),
4545                // `run_hook` sets `pending_is_hook` before dispatching the user
4546                // hook so its frame reports `namewhat = "hook"` via getinfo.
4547                is_hook: std::mem::take(&mut self.pending_is_hook),
4548                tailcalls: std::mem::take(&mut self.pending_tailcalls),
4549            }),
4550        );
4551        // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4552        // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4553        // the slot at `base + nparams`; the extras sit just below `base` from
4554        // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4555        // hook; vararg.lua's contradictory expectations were already going to
4556        // fail either way (some asserts want `arg == nil`).
4557        if proto.has_compat_vararg_arg {
4558            let arg_slot = (base + nparams) as usize;
4559            let t = self.heap.new_table();
4560            {
4561                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4562                let tm = unsafe { t.as_mut() };
4563                for i in 0..n_varargs {
4564                    let v = self.stack[(base - n_varargs + i) as usize];
4565                    // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4566                    // below `MAX_ASIZE`
4567                    let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4568                }
4569                let nk = Value::Str(self.heap.intern(b"n"));
4570                tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4571                    .expect("'n' key");
4572            }
4573            // once-per-table barrier mirrors SETLIST: t is born BLACK during
4574            // Propagate and the bulk `set_int`/`set` calls above don't barrier
4575            self.heap
4576                .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4577            self.stack[arg_slot] = Value::Table(t);
4578        }
4579        // PUC luaD_precall fires the "call" hook with the new frame current, so
4580        // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4581        // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4582        // fixed params count — extras already live below `base`).
4583        // A frame born via OP_TailCall fires "tail call" instead (PUC
4584        // luaD_pretailcall) and skips the matching "return" hook on exit.
4585        let is_tail = self
4586            .frames
4587            .last()
4588            .and_then(|f| f.lua())
4589            .is_some_and(|f| f.tailcalls > 0);
4590        self.hook_call_with(false, nparams, is_tail)?;
4591        Ok(())
4592    }
4593
4594    /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4595    /// the protected call `f` through the interpreter loop. The protected
4596    /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4597    /// at `func_slot+1` lets its results land one slot above the continuation —
4598    /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4599    /// Always returns `Ok(true)`: a continuation is now on the stack to be
4600    /// resolved by the loop (even when `f` is a native that already ran inline).
4601    fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4602        if nargs == 0 {
4603            return Err(crate::vm::builtins::raise_str(
4604                self,
4605                "bad argument #1 to 'pcall' (value expected)",
4606            ));
4607        }
4608        if self.pcall_depth >= MAX_C_DEPTH {
4609            return Err(self.rt_err("C stack overflow"));
4610        }
4611        self.pcall_depth += 1;
4612        frames_push_sync(
4613            &mut self.frames,
4614            &mut self.frames_top,
4615            CallFrame::Cont(NativeCont {
4616                kind: ContKind::Pcall,
4617                func_slot,
4618                nresults,
4619            }),
4620        );
4621        // call f (slot func_slot+1) with the remaining args, asking for all
4622        // results; a yield or error inside propagates with the continuation kept
4623        // on the stack (caught by `unwind` / preserved across a yield).
4624        self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4625        Ok(true)
4626    }
4627
4628    /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4629    /// message handler is stashed in the continuation and the arguments are
4630    /// shifted down over the handler's slot so `f`'s args are contiguous.
4631    fn begin_xpcall(
4632        &mut self,
4633        func_slot: u32,
4634        nargs: u32,
4635        nresults: i32,
4636    ) -> Result<bool, LuaError> {
4637        if nargs < 2 {
4638            return Err(crate::vm::builtins::raise_str(
4639                self,
4640                "bad argument #2 to 'xpcall' (value expected)",
4641            ));
4642        }
4643        if self.pcall_depth >= MAX_C_DEPTH {
4644            return Err(self.rt_err("C stack overflow"));
4645        }
4646        self.pcall_depth += 1;
4647        // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4648        // close its gap so f's args become [f@+1, a1@+2, ...].
4649        let handler = self.stack[(func_slot + 2) as usize];
4650        // 5.1: `xpcall (f, err)` takes exactly two parameters — extra
4651        // arguments are NOT forwarded to `f` (5.2 added forwarding;
4652        // 5.1 calls f with zero args). v2.14 dialect fixture 5.1/519.
4653        let nfargs = if self.version <= crate::version::LuaVersion::Lua51 {
4654            0
4655        } else {
4656            nargs - 2
4657        };
4658        for i in 0..nfargs {
4659            self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4660        }
4661        self.top = func_slot + 2 + nfargs;
4662        frames_push_sync(
4663            &mut self.frames,
4664            &mut self.frames_top,
4665            CallFrame::Cont(NativeCont {
4666                kind: ContKind::Xpcall { handler },
4667                func_slot,
4668                nresults,
4669            }),
4670        );
4671        self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4672        Ok(true)
4673    }
4674
4675    /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4676    /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4677    /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4678    /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4679    /// results land exactly where `pairs`'s results belong.
4680    fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4681        let arg = self.stack[(func_slot + 1) as usize];
4682        let mm = self.get_mm(arg, Mm::Pairs);
4683        // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4684        self.stack[func_slot as usize] = mm;
4685        self.top = func_slot + 2;
4686        frames_push_sync(
4687            &mut self.frames,
4688            &mut self.frames_top,
4689            CallFrame::Cont(NativeCont {
4690                kind: ContKind::Pairs,
4691                func_slot,
4692                nresults,
4693            }),
4694        );
4695        self.begin_call(func_slot, Some(1), 4, true)?;
4696        Ok(true)
4697    }
4698
4699    /// The running (top) Lua frame. The interpreter only reads this while a Lua
4700    /// frame is on top — a continuation frame is never the running frame (it is
4701    /// consumed the instant the call it protects unwinds onto it).
4702    #[inline]
4703    fn top_frame(&self) -> &Frame {
4704        self.frames
4705            .last()
4706            .and_then(CallFrame::lua)
4707            .expect("running Lua frame")
4708    }
4709
4710    #[inline]
4711    fn top_frame_mut(&mut self) -> &mut Frame {
4712        self.frames
4713            .last_mut()
4714            .and_then(CallFrame::lua_mut)
4715            .expect("running Lua frame")
4716    }
4717
4718    /// Pad/announce results sitting at func_slot.
4719    pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4720        // v2.3 P1B-A: capture the call's high-water-mark before
4721        // setting the new top so we can Nil-clear slots that the
4722        // call temporarily wrote but no longer holds — matching
4723        // PUC's `L->top` discipline (slots past L->top are "free"
4724        // and the next push overwrites them). Without this clear,
4725        // a stale `Value::Closure` (e.g. the called function
4726        // itself, when wanted = 0) sits at `func_slot` and a
4727        // later GC with wider `gc_top` traces it after the
4728        // closure has been freed by a previous narrow safe-point
4729        // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4730        // sort.lua AA case).
4731        let prev_top = self.top as usize;
4732        if wanted < 0 {
4733            self.top = func_slot + nret;
4734        } else {
4735            let wanted = wanted as u32;
4736            let need = (func_slot + wanted) as usize;
4737            if self.stack.len() < need {
4738                self.stack.resize(need, Value::Nil);
4739            }
4740            for i in nret..wanted {
4741                self.stack[(func_slot + i) as usize] = Value::Nil;
4742            }
4743            self.top = func_slot + wanted;
4744        }
4745        let new_top = self.top as usize;
4746        let clear_end = prev_top.min(self.stack.len());
4747        if new_top < clear_end {
4748            for slot in &mut self.stack[new_top..clear_end] {
4749                *slot = Value::Nil;
4750            }
4751        }
4752    }
4753
4754    /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4755    /// Used by `EvalFuture` on the bootstrap poll to compute the
4756    /// `entry_depth` it will pass to subsequent resume slices.
4757    pub(crate) fn frame_count(&self) -> usize {
4758        self.frames.len()
4759    }
4760
4761    fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4762        let nret = self.top - func_slot;
4763        let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4764        self.stack.truncate(func_slot as usize);
4765        self.top = func_slot;
4766        out
4767    }
4768
4769    // ---- open upvalues ----
4770
4771    #[doc(hidden)]
4772    pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4773        match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4774            Ok(i) => self.open_upvals[i].1,
4775            Err(i) => {
4776                let uv = self.heap.new_upvalue(UpvalState::Open {
4777                    slot,
4778                    thread: self.current,
4779                });
4780                self.open_upvals.insert(i, (slot, uv));
4781                uv
4782            }
4783        }
4784    }
4785
4786    pub(crate) fn close_from(&mut self, slot: u32) {
4787        while let Some(&(s, uv)) = self.open_upvals.last() {
4788            if s < slot {
4789                break;
4790            }
4791            let v = self.stack[s as usize];
4792            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4793            unsafe { uv.as_mut() }.set_closed(v);
4794            self.heap
4795                .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4796            self.open_upvals.pop();
4797        }
4798    }
4799
4800    /// Register a to-be-closed slot (TBC op / generic-for closing value).
4801    fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4802        let v = self.stack[slot as usize];
4803        if matches!(v, Value::Nil | Value::Bool(false)) {
4804            return Ok(()); // nil and false are silently ignored
4805        }
4806        if self.get_mm(v, Mm::Close).is_nil() {
4807            // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4808            // (a <type> value)"; the local's name comes from the running
4809            // frame's locvars at this pc.
4810            let tn = v.type_name();
4811            let f = self.top_frame();
4812            let reg = slot - f.base;
4813            let pc = (f.pc as usize).saturating_sub(1);
4814            let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4815                Some(n) => format!("variable '{n}'"),
4816                None => "to-be-closed slot".to_string(),
4817            };
4818            return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4819        }
4820        debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4821        self.tbc.push(slot);
4822        Ok(())
4823    }
4824
4825    /// Close upvalues and run `__close` handlers for slots ≥ `from`
4826    /// (handlers in reverse registration order; PUC luaF_close).
4827    fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4828        self.close_from(from);
4829        // PUC: handlers run in reverse declaration order; an error raised by a
4830        // handler becomes the error object passed to the remaining ones, and
4831        // the rest are still closed. The last raised error propagates.
4832        let mut pending = err;
4833        let mut result = Ok(());
4834        let saved_err = self.closing_err;
4835        // On a normal close the handler runs within the closing function's
4836        // activation (debug parent = that function); during error unwinding the
4837        // function's frame is already gone, so the handler sits at the C
4838        // boundary instead (PUC: luaF_close runs after the ci is restored).
4839        let error_close = err.is_some();
4840        while let Some(&s) = self.tbc.last() {
4841            if s < from {
4842                break;
4843            }
4844            self.tbc.pop();
4845            let v = self.stack[s as usize];
4846            if matches!(v, Value::Nil | Value::Bool(false)) {
4847                continue;
4848            }
4849            let mm = self.get_mm(v, Mm::Close);
4850            if mm.is_nil() {
4851                // PUC `prepclosingmethod`: the __close metamethod was present
4852                // at OP_TBC (else we would have errored there) but has since
4853                // been removed/replaced. Treat as a non-callable target.
4854                let tn = self.obj_typename(v);
4855                let e = self.rt_err(&format!(
4856                    "attempt to call a {tn} value (metamethod 'close')"
4857                ));
4858                pending = Some(e.0);
4859                result = Err(e);
4860                continue;
4861            }
4862            // root the pending error: a handler may trigger a collection
4863            self.closing_err = pending;
4864            // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4865            // getinfo report the handler as "in metamethod 'close'". Saved/
4866            // restored around the call to cover the path where `mm` is a
4867            // native (`push_frame` never consumes it) or it raises before
4868            // reaching push_frame.
4869            let saved_tm = self.pending_tm.replace("close");
4870            // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4871            // is nil on a normal close (5.4 locals.lua :875's
4872            // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4873            // through the yield). PUC 5.5 dropped the trailing nil: a clean
4874            // close passes only `obj`, the error case still passes both
4875            // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4876            // normal-close arms, n=2 for the error arm).
4877            let call = match pending {
4878                Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4879                None => {
4880                    if self.version >= LuaVersion::Lua55 {
4881                        self.call_value_impl(mm, &[v], error_close)
4882                    } else {
4883                        self.call_value_impl(mm, &[v, Value::Nil], error_close)
4884                    }
4885                }
4886            };
4887            self.pending_tm = saved_tm;
4888            if let Err(e) = call {
4889                pending = Some(e.0);
4890                result = Err(e);
4891            }
4892        }
4893        self.closing_err = saved_err;
4894        result
4895    }
4896
4897    /// Yieldable variant of `close_slots`: drive the chain of `__close`
4898    /// handlers for slots ≥ `from` through the interpreter loop with a
4899    /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4900    /// suspends cleanly (the close iteration's state rides on the thread's
4901    /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4902    /// applied to `luaF_close`. `after` runs when every slot is closed; if
4903    /// `after` is `Return` and we've returned past `entry_depth`,
4904    /// `Ok(Some(vals))` carries the result up to the host caller.
4905    fn begin_close(
4906        &mut self,
4907        from: u32,
4908        err: Option<Value>,
4909        after: AfterClose,
4910        entry_depth: usize,
4911    ) -> Result<Option<Vec<Value>>, LuaError> {
4912        self.close_from(from);
4913        self.drive_close(from, err, after, entry_depth)
4914    }
4915
4916    /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4917    /// non-callable-mm error for an `__close` that was reset to a bad value
4918    /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4919    /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4920    /// the interpreter then drives the handler and re-enters this driver via
4921    /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4922    /// the threaded error (if any) propagates or `after` fires.
4923    fn drive_close(
4924        &mut self,
4925        from: u32,
4926        mut pending: Option<Value>,
4927        after: AfterClose,
4928        entry_depth: usize,
4929    ) -> Result<Option<Vec<Value>>, LuaError> {
4930        loop {
4931            let drained = match self.tbc.last() {
4932                None => true,
4933                Some(&s) => s < from,
4934            };
4935            if drained {
4936                return self.finish_close_after(after, pending, entry_depth);
4937            }
4938            let s = self.tbc.pop().expect("tbc non-empty");
4939            let v = self.stack[s as usize];
4940            if matches!(v, Value::Nil | Value::Bool(false)) {
4941                continue;
4942            }
4943            let mm = self.get_mm(v, Mm::Close);
4944            if mm.is_nil() {
4945                let tn = self.obj_typename(v);
4946                let e = self.rt_err(&format!(
4947                    "attempt to call a {tn} value (metamethod 'close')"
4948                ));
4949                pending = Some(e.0);
4950                continue;
4951            }
4952            // A real handler: stage [mm, v, (err?)] above the current top,
4953            // record the close iteration state in a Cont::Close, and let the
4954            // interpreter dispatch the handler. On return the run() head
4955            // re-enters this driver via the Cont::Close consumer.
4956            let func_slot = self.top;
4957            let error_close = pending.is_some();
4958            let need = (func_slot + 3) as usize;
4959            if self.stack.len() < need {
4960                self.stack.resize(need, Value::Nil);
4961            }
4962            self.stack[func_slot as usize] = mm;
4963            self.stack[func_slot as usize + 1] = v;
4964            // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4965            // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4966            let nargs = match pending {
4967                Some(e) => {
4968                    self.stack[func_slot as usize + 2] = e;
4969                    2u32
4970                }
4971                None => {
4972                    if self.version >= LuaVersion::Lua55 {
4973                        1u32
4974                    } else {
4975                        self.stack[func_slot as usize + 2] = Value::Nil;
4976                        2u32
4977                    }
4978                }
4979            };
4980            self.top = func_slot + 1 + nargs;
4981            // Root the pending error during the call (a handler may collect).
4982            let saved_err = self.closing_err;
4983            self.closing_err = pending;
4984            // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4985            // for traceback / getinfo.
4986            let saved_tm = self.pending_tm.replace("close");
4987            frames_push_sync(
4988                &mut self.frames,
4989                &mut self.frames_top,
4990                CallFrame::Cont(NativeCont {
4991                    kind: ContKind::Close(CloseCont {
4992                        from,
4993                        pending,
4994                        after,
4995                    }),
4996                    func_slot,
4997                    nresults: 0,
4998                }),
4999            );
5000            // PUC luaF_close runs a normal close *within* the closing
5001            // function's activation (debug parent = that function); during an
5002            // error unwind the function's frame is already gone and the
5003            // handler sits at the C boundary instead.
5004            let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
5005            self.pending_tm = saved_tm;
5006            self.closing_err = saved_err;
5007            r?;
5008            return Ok(None);
5009        }
5010    }
5011
5012    /// Fire `after` once every `__close` handler has run. `Block` propagates
5013    /// any remaining error or simply continues; `Return` performs OP_Return's
5014    /// tail (hook + frame pop + result delivery) and may surface results to
5015    /// the host when the function whose return triggered the close was the
5016    /// entry activation, but only on a clean drain — a pending error skips
5017    /// the return tail and propagates instead. `ResumeUnwind` pops the
5018    /// deferred Lua frame and re-raises, letting a handler's own error win
5019    /// over the original propagating one (PUC luaF_close).
5020    fn finish_close_after(
5021        &mut self,
5022        after: AfterClose,
5023        pending: Option<Value>,
5024        entry_depth: usize,
5025    ) -> Result<Option<Vec<Value>>, LuaError> {
5026        match after {
5027            AfterClose::Block => match pending {
5028                Some(e) => Err(LuaError(e)),
5029                None => Ok(None),
5030            },
5031            AfterClose::Return {
5032                abs_a,
5033                nret,
5034                from_native,
5035            } => match pending {
5036                Some(e) => Err(LuaError(e)),
5037                None => self.complete_return(abs_a, nret, from_native, entry_depth),
5038            },
5039            AfterClose::ResumeUnwind { func_slot, err } => {
5040                // The aborting Lua frame was popped before `begin_close`;
5041                // restore the catcher's stack window down to `func_slot` and
5042                // re-raise — preferring a handler-raised error over the
5043                // original (PUC luaF_close).
5044                self.stack.truncate(func_slot as usize);
5045                self.top = func_slot;
5046                self.tbc.retain(|&s| s < func_slot);
5047                Err(LuaError(pending.unwrap_or(err)))
5048            }
5049        }
5050    }
5051
5052    /// OP_Return's post-close tail: fire the "return" hook (frame still
5053    /// current), pop the Lua frame, slide results into `func_slot`, then
5054    /// either hand them to the host (`Ok(Some(vals))` when we've returned
5055    /// past `entry_depth`), leave them contiguous for an exposed
5056    /// pcall/xpcall continuation, or finish into the caller's expected
5057    /// result slot. Mirrors the synchronous OP_Return tail so both paths
5058    /// share semantics — the `from_native` flag selects the right "return"
5059    /// hook context for `hook_return`.
5060    fn complete_return(
5061        &mut self,
5062        abs_a: u32,
5063        nret: u32,
5064        from_native: bool,
5065        entry_depth: usize,
5066    ) -> Result<Option<Vec<Value>>, LuaError> {
5067        // ftransfer is the local index (1-based) of the first result, as
5068        // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
5069        // exposes locals starting at `frame.base` (= func_slot + 1 +
5070        // n_varargs for a vararg call), so the conversion is the absolute
5071        // result slot minus base, plus one to make it 1-based. db.lua 5.4
5072        // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
5073        // shape end-to-end.
5074        let ftransfer = self
5075            .frames
5076            .last()
5077            .and_then(CallFrame::lua)
5078            .map(|fr| {
5079                let raw = abs_a.saturating_sub(fr.base) + 1;
5080                // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
5081                // local injected at index `numparams + 1`, so getlocal
5082                // numbering shifts results past it (5.5 db.lua :539
5083                // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
5084                if fr.closure.proto.has_vararg_table_pseudo {
5085                    raw + 1
5086                } else {
5087                    raw
5088                }
5089            })
5090            .unwrap_or(1);
5091        // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
5092        // per tail call that collapsed into this activation, *after* its
5093        // own "return". `tailcalls` tracks that count exactly (PUC
5094        // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
5095        // "return" hook fires once even when the activation absorbed
5096        // multiple tail calls — only `istailcall` on getinfo surfaces the
5097        // collapse. 5.1 db.lua :366 pins the event ordering.
5098        let tailcalls = if self.version <= LuaVersion::Lua51 {
5099            self.frames
5100                .last()
5101                .and_then(|f| f.lua())
5102                .map(|f| f.tailcalls)
5103                .unwrap_or(0)
5104        } else {
5105            0
5106        };
5107        self.hook_return(from_native, ftransfer, nret)?;
5108        for _ in 0..tailcalls {
5109            self.hook_tail_return()?;
5110        }
5111        let CallFrame::Lua(fr) =
5112            frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
5113        else {
5114            unreachable!("returning from a non-Lua frame")
5115        };
5116        for i in 0..nret {
5117            self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
5118        }
5119        if self.frames.len() < entry_depth {
5120            self.top = fr.func_slot + nret;
5121            return Ok(Some(self.take_results(fr.func_slot)));
5122        } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
5123            self.top = fr.func_slot + nret;
5124        } else {
5125            self.finish_results(fr.func_slot, nret, fr.nresults);
5126        }
5127        Ok(None)
5128    }
5129
5130    #[doc(hidden)]
5131    pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
5132        match cl.upvals()[idx as usize].state() {
5133            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
5134            UpvalState::Closed(v) => v,
5135        }
5136    }
5137
5138    fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
5139        let uv = cl.upvals()[idx as usize];
5140        match uv.state() {
5141            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
5142            UpvalState::Closed(_) => {
5143                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5144                unsafe { uv.as_mut() }.set_closed(v);
5145                // forward barrier: a closed upvalue is single-slot, so the
5146                // forward variant is cheaper than barrier_back (PUC uses
5147                // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
5148                // tables / threads).
5149                self.heap
5150                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
5151            }
5152        }
5153    }
5154
5155    // ---- register / error helpers ----
5156
5157    #[inline(always)]
5158    fn r(&self, base: u32, i: u32) -> Value {
5159        // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
5160        // at frame entry (`push_frame` sizes the stack up to base + max_stack),
5161        // and every bytecode-generated reference falls within `[0, max_stack)`.
5162        // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
5163        // reason. The bounds check would re-validate this invariant on every
5164        // op — the dispatch hot path can't afford it.
5165        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5166        unsafe { *self.stack.get_unchecked((base + i) as usize) }
5167    }
5168
5169    #[inline(always)]
5170    fn set_r(&mut self, base: u32, i: u32, v: Value) {
5171        // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
5172        // frame-entry contract.
5173        unsafe {
5174            *self.stack.get_unchecked_mut((base + i) as usize) = v;
5175        }
5176    }
5177
5178    #[doc(hidden)]
5179    pub fn rt_err(&mut self, msg: &str) -> LuaError {
5180        let text = match self.position_prefix() {
5181            Some(p) => format!("{p}{msg}"),
5182            None => msg.to_string(),
5183        };
5184        LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5185    }
5186
5187    /// Error without the `chunk:line:` position prefix. PUC's
5188    /// `resume_error` (ldo.c) pushes its message as a bare literal,
5189    /// so `cannot resume dead coroutine` etc. must not be prefixed.
5190    pub(crate) fn plain_err(&mut self, msg: &str) -> LuaError {
5191        LuaError(Value::Str(self.heap.intern(msg.as_bytes())))
5192    }
5193
5194    pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5195        let extra = self.subject_varinfo(v);
5196        let tn = self.obj_typename(v);
5197        self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5198    }
5199
5200    /// Name the offending operand of the current instruction (PUC varinfo) for
5201    /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5202    /// to the instruction's subject register(s); a native-raised error whose
5203    /// current instruction doesn't hold `bad` simply yields "".
5204    fn subject_varinfo(&self, bad: Value) -> String {
5205        use crate::vm::isa::Op;
5206        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5207            return String::new();
5208        };
5209        let proto = f.closure.proto;
5210        let p: &crate::runtime::Proto = &proto;
5211        let pc = f.pc as usize;
5212        if pc == 0 || pc > p.code.len() {
5213            return String::new();
5214        }
5215        let instr = p.code[pc - 1];
5216        let mut cands: Vec<u32> = Vec::new();
5217        match instr.op() {
5218            // indexed reads / length / method: the table/object is in B
5219            Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5220                cands.push(instr.b());
5221            }
5222            // indexed writes / calls: the table/function is in A
5223            Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5224                cands.push(instr.a());
5225            }
5226            // arithmetic/bitwise: a register operand (B, and C unless constant)
5227            Op::Add
5228            | Op::Sub
5229            | Op::Mul
5230            | Op::Div
5231            | Op::Mod
5232            | Op::Pow
5233            | Op::IDiv
5234            | Op::BAnd
5235            | Op::BOr
5236            | Op::BXor
5237            | Op::Shl
5238            | Op::Shr => {
5239                cands.push(instr.b());
5240                if !instr.k() {
5241                    cands.push(instr.c());
5242                }
5243            }
5244            Op::Unm | Op::BNot => cands.push(instr.b()),
5245            Op::Concat => {
5246                let a = instr.a();
5247                for r in a..a + instr.b() {
5248                    cands.push(r);
5249                }
5250            }
5251            _ => {}
5252        }
5253        for reg in cands {
5254            if self.r(f.base, reg).raw_eq(bad) {
5255                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5256                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5257                    None => String::new(),
5258                };
5259            }
5260        }
5261        String::new()
5262    }
5263
5264    /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5265    /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5266    /// 'add')" when the call is a metamethod dispatched by the current opcode.
5267    fn call_err(&mut self, v: Value) -> LuaError {
5268        let extra = self.call_target_varinfo(v);
5269        let tn = self.obj_typename(v);
5270        self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5271    }
5272
5273    /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5274    /// frame before the call, so the opcode that triggered it lives in the
5275    /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5276    /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5277    fn call_target_varinfo(&self, bad: Value) -> String {
5278        use crate::vm::isa::Op;
5279        let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5280            return String::new();
5281        };
5282        let proto = f.closure.proto;
5283        let p: &crate::runtime::Proto = &proto;
5284        let pc = f.pc as usize;
5285        if pc == 0 || pc > p.code.len() {
5286            return String::new();
5287        }
5288        let instr = p.code[pc - 1];
5289        match instr.op() {
5290            Op::Call | Op::TailCall => {
5291                let reg = instr.a();
5292                if self.r(f.base, reg).raw_eq(bad) {
5293                    match crate::vm::objname::getobjname(p, pc - 1, reg) {
5294                        Some((kind, name)) => format!(" ({kind} '{name}')"),
5295                        None => String::new(),
5296                    }
5297                } else {
5298                    String::new()
5299                }
5300            }
5301            op => match mm_event_name(op) {
5302                Some(ev) => format!(" (metamethod '{ev}')"),
5303                None => String::new(),
5304            },
5305        }
5306    }
5307
5308    /// "number has no integer representation", enriched (PUC luaG_tointerror)
5309    /// with a "(field 'x')"-style suffix naming the offending operand of the
5310    /// current arithmetic instruction when it can be recovered from bytecode.
5311    fn no_int_rep_err(&mut self) -> LuaError {
5312        let extra = self.bad_operand_varinfo();
5313        self.rt_err(&format!("number{extra} has no integer representation"))
5314    }
5315
5316    /// Inspect the current frame's faulting instruction: find the register
5317    /// operand holding a float with no integer representation and name it.
5318    fn bad_operand_varinfo(&self) -> String {
5319        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5320            return String::new();
5321        };
5322        let proto = f.closure.proto;
5323        let p: &crate::runtime::Proto = &proto;
5324        let pc = f.pc as usize;
5325        if pc == 0 || pc > p.code.len() {
5326            return String::new();
5327        }
5328        let instr = p.code[pc - 1];
5329        let mut regs = vec![instr.b()];
5330        if !instr.k() {
5331            regs.push(instr.c());
5332        }
5333        for reg in regs {
5334            let v = self.r(f.base, reg);
5335            if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5336                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5337                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5338                    None => String::new(),
5339                };
5340            }
5341        }
5342        String::new()
5343    }
5344
5345    /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5346    /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5347    /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5348    /// native call), PUC pushes no prefix — match that by looking only at the
5349    /// topmost frame directly and bailing if it is anything but a Lua frame.
5350    pub(crate) fn position_prefix(&self) -> Option<String> {
5351        let f = self.frames.last().and_then(CallFrame::lua)?;
5352        let proto = f.closure.proto;
5353        if proto.source.as_bytes().is_empty() {
5354            return Some(self.stripped_prefix());
5355        }
5356        if proto.lines.is_empty() {
5357            return None;
5358        }
5359        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5360        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5361        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5362        let display = crate::vm::lib_debug::chunk_id(raw);
5363        let src = String::from_utf8_lossy(&display).into_owned();
5364        Some(format!("{src}:{line}: "))
5365    }
5366
5367    /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5368    /// for the source and renders the line as "?" (so the prefix reads
5369    /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5370    /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5371    /// matches `^%?:%-1:`).
5372    fn stripped_prefix(&self) -> String {
5373        if self.version >= crate::version::LuaVersion::Lua55 {
5374            "?:?: ".to_string()
5375        } else {
5376            "?:-1: ".to_string()
5377        }
5378    }
5379
5380    /// Position prefix of the Lua frame `level` steps up from the running C
5381    /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5382    /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5383    /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5384    /// caller's frame is reported even across pcall/xpcall continuations.
5385    /// `luaL_where(level)` for `error()`: unlike `dbg_frame` (whose 5.2+
5386    /// level numbering skips Cont activations to match db.lua's getinfo
5387    /// shape), PUC counts EVERY CallInfo — a C caller occupies a level of
5388    /// its own. `pcall(pcall, error, "msg")` must therefore resolve
5389    /// level 1 to the inner pcall (a C activation, no line info → no
5390    /// prefix), not tunnel through to the Lua frame below (v2.13
5391    /// CORPUS-IV fixture 239).
5392    pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5393        if level < 1 {
5394            return None;
5395        }
5396        let v51 = self.version <= LuaVersion::Lua51;
5397        let mut lvl = level;
5398        let mut found: Option<usize> = None;
5399        'walk: for fi in (0..self.frames.len()).rev() {
5400            match &self.frames[fi] {
5401                CallFrame::Lua(f) => {
5402                    lvl -= 1;
5403                    if lvl == 0 {
5404                        found = Some(fi);
5405                        break 'walk;
5406                    }
5407                    if v51 {
5408                        for _ in 0..f.tailcalls {
5409                            lvl -= 1;
5410                            if lvl == 0 {
5411                                return None; // synthetic tail level: no line info
5412                            }
5413                        }
5414                    }
5415                    if f.from_c {
5416                        lvl -= 1;
5417                        if lvl == 0 {
5418                            return None; // C activation: no line info
5419                        }
5420                    }
5421                }
5422                CallFrame::Cont(_) => {
5423                    // A continuation-driven native (pcall/xpcall/close)
5424                    // is a C activation — it takes a level and has no
5425                    // line info.
5426                    lvl -= 1;
5427                    if lvl == 0 {
5428                        return None;
5429                    }
5430                }
5431            }
5432        }
5433        let fi = found?;
5434        let f = self.frames[fi].lua()?;
5435        let proto = f.closure.proto;
5436        // PUC luaG_addinfo: a stripped chunk has no source — see
5437        // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5438        if proto.source.as_bytes().is_empty() {
5439            return Some(self.stripped_prefix());
5440        }
5441        // a stripped chunk carries no per-instruction line info
5442        if proto.lines.is_empty() {
5443            return None;
5444        }
5445        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5446        // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5447        // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5448        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5449        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5450        let display = crate::vm::lib_debug::chunk_id(raw);
5451        let src = String::from_utf8_lossy(&display).into_owned();
5452        Some(format!("{src}:{line}: "))
5453    }
5454
5455    // ---- the interpreter ----
5456
5457    fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5458        let entry_depth = self.frames.len();
5459        self.exec_with(entry_depth)
5460    }
5461
5462    /// Run from the current top frame down to (but not past) `entry_depth`
5463    /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5464    /// runs to completion or a yield.
5465    /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5466    /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5467    /// `EvalFuture::poll` on every poll after the first to walk the
5468    /// existing call frames until the next `BudgetExhausted` or
5469    /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5470    /// embedder reaches it through `Vm::eval_async`.
5471    pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5472        self.exec_with(entry_depth)
5473    }
5474
5475    fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5476        loop {
5477            let r = self.run(entry_depth);
5478            if r.is_err()
5479                && (self.yielding.is_some()
5480                    || self.terminating.is_some()
5481                    || self.host_yield_pending
5482                    || self.pending_async_native_fut.is_some())
5483            {
5484                // a `coroutine.yield` is in flight: keep the frames intact (they
5485                // are the suspended coroutine's saved state) and propagate to
5486                // resume. A self-close termination propagates the same way, so a
5487                // protecting pcall on the way out cannot catch (unwind) it.
5488                // v1.1 B10 — `host_yield_pending` is the async-mode
5489                // analogue: the sentinel must reach `drive_one` without
5490                // a protecting `pcall` swallowing it.
5491                return r;
5492            }
5493            match r {
5494                Ok(vals) => return Ok(vals),
5495                // unwind toward `entry_depth`. A protecting pcall/xpcall
5496                // continuation caught along the way turns the error into
5497                // `false, msg` and the loop resumes running its caller; an
5498                // uncaught error propagates out.
5499                Err(e) => match self.unwind(e.0, entry_depth) {
5500                    Unwound::Caught => continue,
5501                    Unwound::CaughtReturn(vals) => return Ok(vals),
5502                    Unwound::Propagated(err) => return Err(err),
5503                },
5504            }
5505        }
5506    }
5507
5508    /// Unwind the call stack from the error point toward `entry_depth`, running
5509    /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5510    /// continuation frame at/above `entry_depth` (the error is *caught*: its
5511    /// slot receives `false, msg`); if none is reached, the error propagates.
5512    fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5513        // The protected call runs in-place among the caller frames' registers,
5514        // so truncating the failed frames here cuts into caller windows below
5515        // the catcher. Snapshot the live length: at the error point the stack
5516        // already spans every surviving frame's window, so restoring it after a
5517        // catch reinstates them all (the reclaimed slots above are dead temps).
5518        // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5519        // we instead clamp the restore to the catcher's caller window when the
5520        // error point was at the stack limit (cause: the next `call_value_impl`
5521        // picks `func_slot = stack.len()` which would otherwise re-overflow).
5522        let saved_len = self.stack.len();
5523        // Snapshot the traceback at the error point — before any frame is
5524        // popped — so an `xpcall` msgh (which runs after the failed frames are
5525        // gone) can still describe the error site. The handler frame about to
5526        // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5527        // visible here; once popped, `debug.traceback` would miss it.
5528        // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5529        // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5530        // recovery — locals.lua:659) re-overflows. Capture-once propagates
5531        // through nested unwinds (inner→outer) without re-running msgh.
5532        if self.error_traceback.is_none() {
5533            self.error_traceback = Some(self.traceback_bytes(1));
5534        }
5535        while self.frames.len() >= entry_depth {
5536            match *self.frames.last().expect("frame") {
5537                // a yieldable-metamethod continuation does not catch: discard the
5538                // abandoned instruction and keep unwinding (PUC drops the partial
5539                // op on error).
5540                CallFrame::Cont(NativeCont {
5541                    kind: ContKind::Meta(mc),
5542                    func_slot,
5543                    ..
5544                }) => {
5545                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5546                    self.stack.truncate(func_slot as usize);
5547                    self.top = mc.saved_top.min(func_slot);
5548                    self.tbc.retain(|&s| s < func_slot);
5549                }
5550                // a __pairs continuation does not catch either: an error inside
5551                // the metamethod propagates past `pairs`.
5552                CallFrame::Cont(NativeCont {
5553                    kind: ContKind::Pairs,
5554                    func_slot,
5555                    ..
5556                }) => {
5557                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5558                    self.stack.truncate(func_slot as usize);
5559                    self.top = func_slot;
5560                    self.tbc.retain(|&s| s < func_slot);
5561                }
5562                // a __close continuation does not catch: drop the half-run
5563                // handler's window, then continue the close yieldably with
5564                // the new error threaded as `pending`. Preserve `cc.after`
5565                // verbatim — `Return`/`Block` originating from an aborting
5566                // OP_Return/OP_Close will be short-circuited by
5567                // `finish_close_after` (pending propagates as Err); a
5568                // `ResumeUnwind` originated by our own Lua-frame handler
5569                // must keep its deferred frame-pop semantics so that frame
5570                // is not orphaned. If a fresh handler yields, `drive_close`
5571                // pushes another `Cont::Close` and we return `Caught` so
5572                // `exec_with` re-enters the run loop.
5573                CallFrame::Cont(NativeCont {
5574                    kind: ContKind::Close(cc),
5575                    func_slot,
5576                    ..
5577                }) => {
5578                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5579                    self.stack.truncate(func_slot as usize);
5580                    self.top = func_slot;
5581                    self.tbc.retain(|&s| s < func_slot);
5582                    match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5583                        Ok(Some(_)) => {
5584                            unreachable!(
5585                                "Block / Return / ResumeUnwind never return host values mid-unwind"
5586                            )
5587                        }
5588                        Ok(None) => return Unwound::Caught,
5589                        Err(e) => {
5590                            err = e.0;
5591                            continue;
5592                        }
5593                    }
5594                }
5595                CallFrame::Cont(nc) => {
5596                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5597                    self.pcall_depth -= 1;
5598                    let result = match nc.kind {
5599                        ContKind::Pcall => err,
5600                        ContKind::Xpcall { handler } => {
5601                            // PUC keeps `L->errfunc` set across the handler's
5602                            // call: `luaG_errormsg` re-fires the handler when
5603                            // it raises (so `xpcall(error, err, 170)` lets the
5604                            // chain bottom out at err(0) → "END"). luna mirrors
5605                            // that by looping until the handler returns or
5606                            // luna's `iters` cap forces termination.
5607                            //
5608                            // The cap models PUC's nCcalls soft window
5609                            // (MAXCCALLS/10*11): once tripped, `stackerror`
5610                            // raises "C stack overflow" via `luaG_runerror`
5611                            // which itself re-enters `luaG_errormsg`, so the
5612                            // handler runs once more with that string and
5613                            // naturally returns it (errors.lua :637 at N=300).
5614                            // We count iterations per Cont::Xpcall rather than
5615                            // a global counter — nested xpcalls each get their
5616                            // own budget, matching the way PUC's stack frames
5617                            // accumulate per dispatch path.
5618                            const MSGH_CAP: u32 = MAX_C_DEPTH;
5619                            let mut cur_err = err;
5620                            let mut iters: u32 = 0;
5621                            let mut capped = false;
5622                            loop {
5623                                if iters >= MSGH_CAP && !capped {
5624                                    cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5625                                    capped = true;
5626                                }
5627                                iters += 1;
5628                                self.msgh_depth += 1;
5629                                let r = self.call_value(handler, &[cur_err]);
5630                                self.msgh_depth -= 1;
5631                                match r {
5632                                    Ok(hr) => {
5633                                        break hr.first().copied().unwrap_or(Value::Nil);
5634                                    }
5635                                    Err(_) if capped => {
5636                                        // the handler still errored on the
5637                                        // synthesized "C stack overflow"; fall
5638                                        // back to PUC's LUA_ERRERR string.
5639                                        break Value::Str(
5640                                            self.heap.intern(b"error in error handling"),
5641                                        );
5642                                    }
5643                                    Err(e) => {
5644                                        cur_err = e.0;
5645                                    }
5646                                }
5647                            }
5648                        }
5649                        ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5650                            unreachable!("Meta/Pairs/Close cont handled above")
5651                        }
5652                    };
5653                    // PUC 5.5 `luaG_errormsg` substitutes "<no error object>"
5654                    // for nil AFTER the message handler ran (ldebug.c:849) —
5655                    // so it applies to the pcall-caught object and to an
5656                    // xpcall HANDLER'S return value, while the handler itself
5657                    // (and a top-level propagation into the host, whose
5658                    // `error_display` plays msghandler) still sees the raw
5659                    // nil. 5.4- keep nil everywhere (errors.lua :49 asserts
5660                    // `doit("error()") == nil`). v2.14 fixture 5.5/334.
5661                    let result = if matches!(result, Value::Nil)
5662                        && self.version >= crate::version::LuaVersion::Lua55
5663                    {
5664                        Value::Str(self.heap.intern(b"<no error object>"))
5665                    } else {
5666                        result
5667                    };
5668                    // the error has been caught (pcall/xpcall): the captured
5669                    // traceback was for that error and is no longer in flight.
5670                    self.error_traceback = None;
5671                    let fs = nc.func_slot as usize;
5672                    if self.stack.len() < fs + 2 {
5673                        self.stack.resize(fs + 2, Value::Nil);
5674                    }
5675                    self.stack[fs] = Value::Bool(false);
5676                    self.stack[fs + 1] = result;
5677                    self.top = nc.func_slot + 2;
5678                    self.tbc.retain(|&s| s < nc.func_slot);
5679                    if self.frames.len() < entry_depth {
5680                        return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5681                    }
5682                    self.finish_results(nc.func_slot, 2, nc.nresults);
5683                    // reinstate the caller windows the unwind truncated into,
5684                    // clamped to the catcher's caller window + a `MIN_STACK`
5685                    // reserve. The clamp is a no-op for normal pcall catches
5686                    // (saved_len lies within the caller's max_stack window),
5687                    // and prevents the stack from staying near `MAX_LUA_STACK`
5688                    // after an overflow-recovery catch — which would make the
5689                    // next `call_value_impl` (e.g. a `__close` in the catcher's
5690                    // errorh, locals.lua:659) pick `func_slot = stack.len()`
5691                    // above the limit and re-overflow.
5692                    // Restore the caller's full register window: opcodes
5693                    // index it directly. The cap covers caller's base +
5694                    // `max_stack` + a small reserve. We always resize to
5695                    // exactly this window — previously this clamped
5696                    // `saved_len` from above to prevent staying near
5697                    // `MAX_LUA_STACK` after an overflow-recovery catch, and
5698                    // a yieldable-unwind re-entry adds the dual case where
5699                    // `saved_len` is *below* the window (a prior
5700                    // `ResumeUnwind` truncated). Using the window directly
5701                    // covers both.
5702                    let restore = self
5703                        .frames
5704                        .iter()
5705                        .rev()
5706                        .find_map(CallFrame::lua)
5707                        .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5708                        .unwrap_or(saved_len);
5709                    if self.stack.len() < restore {
5710                        self.stack.resize(restore, Value::Nil);
5711                    } else if self.stack.len() > restore {
5712                        self.stack.truncate(restore);
5713                    }
5714                    // v2.5 P1B-2B: clear slots vacated by the popped
5715                    // frames the unwind walked over. finish_results
5716                    // above clears `[nc.func_slot + nresults ..
5717                    // nc.func_slot + 2)`, which only covers the
5718                    // pcall's own result region — the unwind-popped
5719                    // frames' locals in `[nc.func_slot + 2 .. restore)`
5720                    // are still in place with whatever Gc-bearing
5721                    // Values they last held. Without this clear, a
5722                    // later GC marks the stale pointers (UAF-A family
5723                    // analog of the v2.3 Op::Return finish_results
5724                    // path). PUC's `luaD_pcall` similarly truncates
5725                    // L->top to the catcher's level — luna's
5726                    // truncate above resizes the Vec but doesn't
5727                    // touch slots [func_slot+2..restore) that were
5728                    // already present.
5729                    let clear_lo = (nc.func_slot as usize + 2).min(self.stack.len());
5730                    let clear_hi = restore.min(self.stack.len());
5731                    if clear_lo < clear_hi {
5732                        for slot in &mut self.stack[clear_lo..clear_hi] {
5733                            *slot = Value::Nil;
5734                        }
5735                    }
5736                    return Unwound::Caught;
5737                }
5738                CallFrame::Lua(f) => {
5739                    // Yieldable error-unwind close, PUC luaG_errormsg shape:
5740                    // (1) pop the Lua frame immediately so each `__close`
5741                    // handler runs at the C boundary above — `debug.getinfo`
5742                    // sees the next outer Lua frame's call site (typically
5743                    // `pcall`), not this aborting function (locals.lua:480).
5744                    // (2) drive the close yieldably with
5745                    // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5746                    // it truncates to `func_slot` and re-raises (letting a
5747                    // handler-raised error win over `err`). If a handler
5748                    // yields, `drive_close` pushes `Cont::Close` and we
5749                    // return `Caught` so `exec_with` re-enters the run loop;
5750                    // a synchronous drain returns Err exactly as the old
5751                    // path did.
5752                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5753                    let after = AfterClose::ResumeUnwind {
5754                        func_slot: f.func_slot,
5755                        err,
5756                    };
5757                    match self.begin_close(f.base, Some(err), after, entry_depth) {
5758                        Ok(Some(_)) => {
5759                            unreachable!("ResumeUnwind never returns host values")
5760                        }
5761                        Ok(None) => return Unwound::Caught,
5762                        Err(e) => {
5763                            err = e.0;
5764                            continue;
5765                        }
5766                    }
5767                }
5768            }
5769        }
5770        Unwound::Propagated(LuaError(err))
5771    }
5772
5773    fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5774        loop {
5775            // Fast-path slow-check gate: most embedders run with both
5776            // `instr_budget` and `mem_cap` as None, so a single combined
5777            // is_some test lets the hot loop skip both branches with one
5778            // load + branch instead of two.
5779            if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5780                if let Some(b) = self.instr_budget.as_mut() {
5781                    *b -= 1;
5782                    if *b <= 0 {
5783                        self.instr_budget = None;
5784                        // v1.1 B10 Stage 1 — async-mode cooperative
5785                        // yield. Set a sentinel flag so `exec_with`
5786                        // propagates the Err without `unwind` running
5787                        // (mirroring the `yielding.is_some()` path),
5788                        // and `call_value_impl` preserves the call
5789                        // frames for the next `poll`. Translation back
5790                        // to `DispatchOutcome::BudgetExhausted` happens
5791                        // in `drive_one`. The Err value itself is
5792                        // `Value::Nil` — a pure sentinel, never seen by
5793                        // user code.
5794                        if self.async_mode {
5795                            self.host_yield_pending = true;
5796                            return Err(LuaError(Value::Nil));
5797                        }
5798                        // B6: classify the trip so embedders can
5799                        // distinguish budget exhaustion from a
5800                        // generic Runtime error and retry / give up
5801                        // accordingly.
5802                        self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5803                        let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5804                        return Err(LuaError(s));
5805                    }
5806                }
5807                if let Some(cap) = self.heap.mem_cap
5808                    && self.heap.bytes() > cap
5809                {
5810                    // First try a full collect — embedders set tight caps
5811                    // and the overshoot may be reclaimable (closures kept
5812                    // by short-lived frames, intermediate strings). Only
5813                    // disarm + raise if the cap is still breached after
5814                    // collection. PUC's `LUA_GCEMERGENCY` path matches.
5815                    //
5816                    // v2.6 A.2: tighten mem-cap-fire over-root from
5817                    // entire `self.stack.len()` (whole heap) to the
5818                    // deepest Lua frame's `base + max_stack` window
5819                    // (covers register operands the current opcode
5820                    // might reference). The cap fires during table
5821                    // mutation in a tight `a[i] = i` loop where `a`
5822                    // lives at a frame-register slot past `self.top`
5823                    // (OP_NEWINDEX doesn't advance top); the deepest
5824                    // frame's max_stack window provably covers it
5825                    // since `a` is a register of the executing proto.
5826                    //
5827                    // Still over-roots caller frames' dead regs
5828                    // (slots between caller.base and the callee
5829                    // func_slot are live; slots past callee
5830                    // func_slot in caller's frame are dead until
5831                    // caller resumes). For fire-once cap path this
5832                    // residual over-root is acceptable; full
5833                    // per-frame walk was canceled per
5834                    // `.dev/rfcs/v2.6-plan-state.md` amendments log
5835                    // (charter §2.1's strong/weak pass split is
5836                    // semantically impossible — weak pass depends on
5837                    // strong-pass marks).
5838                    let cap_root_top = self
5839                        .frames
5840                        .iter()
5841                        .rev()
5842                        .find_map(CallFrame::lua)
5843                        .map(|f| f.base + f.closure.proto.max_stack as u32)
5844                        .unwrap_or(self.top);
5845                    self.gc_top = cap_root_top.max(self.top);
5846                    self.collect_garbage();
5847                    if self.heap.bytes() > cap {
5848                        self.heap.mem_cap = None;
5849                        let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5850                        return Err(LuaError(s));
5851                    }
5852                }
5853            }
5854            // Single combined frame fetch: continuation arm OR Lua arm. Saves
5855            // a second `self.frames.last()` slice access vs the prior split
5856            // form (LLVM doesn't always CSE these across the cont branch).
5857            // A continuation frame on top means the call it protected just
5858            // delivered its results — wrap as `true, results…` and hand to
5859            // the pcall/xpcall caller. The error path is handled by `unwind`;
5860            // this branch is only reached on success/resume completion.
5861            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5862            let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5863            if let &CallFrame::Cont(nc) = frame_peek {
5864                // a yieldable metamethod returned: complete the interrupted
5865                // instruction (PUC luaV_finishOp) and resume the running frame.
5866                if let ContKind::Meta(mc) = nc.kind {
5867                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5868                    let result = if self.top > nc.func_slot {
5869                        self.stack[nc.func_slot as usize]
5870                    } else {
5871                        Value::Nil
5872                    };
5873                    self.stack.truncate(nc.func_slot as usize);
5874                    self.top = mc.saved_top;
5875                    self.finish_meta(mc.action, result)?;
5876                    continue;
5877                }
5878                // a __close handler returned successfully: discard its
5879                // results, restore `top` to the slot the handler was called
5880                // at (the surrounding frame's register window above this slot
5881                // must stay alloc'd — never truncate the underlying stack),
5882                // then continue the close chain (next slot, or fire
5883                // AfterClose). When the close ends an entry activation,
5884                // drive_close hands the results up to exec_with directly.
5885                if let ContKind::Close(cc) = nc.kind {
5886                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5887                    self.top = nc.func_slot;
5888                    if let Some(vals) =
5889                        self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5890                    {
5891                        return Ok(vals);
5892                    }
5893                    continue;
5894                }
5895                // __pairs returned: normalize its results to exactly four
5896                // (iterator, state, control, closing) at pairs's slot, where
5897                // the metamethod was called, and hand them to pairs's caller.
5898                if let ContKind::Pairs = nc.kind {
5899                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5900                    let total = 4u32;
5901                    let need = (nc.func_slot + total) as usize;
5902                    if self.stack.len() < need {
5903                        self.stack.resize(need, Value::Nil);
5904                    }
5905                    for s in self.top..(nc.func_slot + total) {
5906                        self.stack[s as usize] = Value::Nil;
5907                    }
5908                    self.top = nc.func_slot + total;
5909                    if self.frames.len() < entry_depth {
5910                        return Ok(self.take_results(nc.func_slot));
5911                    }
5912                    self.finish_results(nc.func_slot, total, nc.nresults);
5913                    continue;
5914                }
5915                frames_pop_sync(&mut self.frames, &mut self.frames_top);
5916                self.pcall_depth -= 1;
5917                // f's results sit at nc.func_slot+1.. (f was called one slot
5918                // above the continuation), so writing `true` at the slot makes
5919                // `true, results…` already contiguous.
5920                let nret = self.top - (nc.func_slot + 1);
5921                self.stack[nc.func_slot as usize] = Value::Bool(true);
5922                let total = 1 + nret;
5923                self.top = nc.func_slot + total;
5924                if self.frames.len() < entry_depth {
5925                    return Ok(self.take_results(nc.func_slot));
5926                }
5927                self.finish_results(nc.func_slot, total, nc.nresults);
5928                continue;
5929            }
5930            // GC runs only at the allocation safe points below (PUC's
5931            // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5932            // no longer collects, so a stale full-window `gc_top` cannot leak in.
5933            //
5934            // Hot-path frame fetch: the Cont arm above continues the loop,
5935            // so reaching here means `frame_peek` is the Lua frame. Reuse it
5936            // rather than re-fetching `self.frames.last()`.
5937            let f = match frame_peek {
5938                CallFrame::Lua(f) => f,
5939                _ => unreachable!("Cont frame survived the dispatch loop head"),
5940            };
5941            let cl = f.closure;
5942            let base = f.base;
5943            let func_slot = f.func_slot;
5944            let n_varargs = f.n_varargs;
5945            let pc = f.pc;
5946            let oldpc = f.hook_oldpc;
5947
5948            // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5949            // — every branch / call op only sets `pc` to a valid index, and
5950            // function entry initialises pc=0 with a non-empty body. PUC's
5951            // `vmfetch` uses the equivalent unchecked load.
5952            let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5953
5954            // P12-S1.C/D — trace recording append + close detection.
5955            // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5956            // so default dispatch keeps a single not-taken branch.
5957            //
5958            // - At the head PC with a non-empty record, the trace has
5959            //   looped back to its start: mark `closed = true` and
5960            //   take the record (S2 will compile + cache).
5961            // - Otherwise, capture the op. If the record overflows
5962            //   MAX_TRACE_LEN, abort by dropping it.
5963            if self.jit.trace_enabled
5964                && let Some(_rec) = self.jit.active_trace.as_mut()
5965            {
5966                // P12-S4 — depth tracking. The trace head's frame is
5967                // at index `recording_frame_base`; every Op::Call that
5968                // pushes a new frame bumps the live depth, every
5969                // Op::Return that pops one decrements it.
5970                //
5971                // **Three clean-close conditions** (P12-S4-step4a):
5972                // - `at_head`: cur_depth == 0 AND about-to-execute the
5973                //   trace's head_pc on its head_proto (loop closed back
5974                //   to start). Same for loop-triggered and call-triggered
5975                //   traces — step4a unified the gating so call-triggered
5976                //   no longer closes on the first re-entry (that left
5977                //   fib's body at 7 depth=0 ops; step4a lets it inline
5978                //   up to MAX_INLINE_DEPTH levels before any close).
5979                // - `returned_past_head`: trace head's frame is gone
5980                //   (callee returned past it, or the call-trigger
5981                //   started a recording inside a callee that has now
5982                //   returned). Whatever ops were recorded form the
5983                //   trace body; the lowerer treats the partial trace
5984                //   the same as InlineAbort (dispatchable=false until
5985                //   step4b's frame materialization lands).
5986                // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5987                //   Recording any deeper would just bloat the IR; close
5988                //   with the body we have. Lowerer's existing length
5989                //   gate + InlineAbort path handles short bodies.
5990                let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5991                let cur_depth = if returned_past_head {
5992                    0
5993                } else {
5994                    self.frames.len() - 1 - self.jit.recording_frame_base
5995                };
5996                let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5997                let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5998                let at_head_loop = cur_depth == 0
5999                    && !rec.ops.is_empty()
6000                    && !returned_past_head
6001                    && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
6002                    && pc == rec.head_pc;
6003                // P16-A — self-link cycle catch (mirrors LuaJIT's
6004                // `check_call_unroll` at `lj_record.c:1869`). Trips when:
6005                //   1. We're about to execute the head_pc on head_proto
6006                //      at depth > 0 (we're re-entering the trace head
6007                //      from inside an inlined recursion level — UpRec).
6008                //   2. The count of ancestor frames in the recording
6009                //      window that share `head_proto` exceeds
6010                //      [`RECUNROLL_THRESHOLD`] (default 2).
6011                // For fib(N): head_pc=0, head_proto=fib. After 2 inline
6012                // recursion levels are captured, the recorder enters
6013                // the 3rd nested fib frame, sees cur_depth=3 > 2, and
6014                // trips this catch — closing with `SelfRecKind::UpRec`.
6015                // The lowerer's `TraceEnd::SelfLink` tail emits the
6016                // bump-base + branch-to-self loop body.
6017                //
6018                // TailRec vs UpRec: LJ distinguishes via
6019                // `framedepth + retdepth == 0`. luna doesn't track
6020                // retdepth separately; cur_depth == 0 with a non-empty
6021                // call chain in tail position is rare (would require
6022                // explicit Lua TCO). We use cur_depth > 0 as the UpRec
6023                // condition (fib's case); cur_depth == 0 with positive
6024                // ancestor count would route to TailRec, but luna's
6025                // recorder doesn't currently produce that shape because
6026                // tail-call elision pops the caller frame and we'd
6027                // hit `at_head_loop` instead.
6028                let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
6029                    if self.jit.p16_self_link_enabled
6030                        && !returned_past_head
6031                        && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
6032                        && pc == rec.head_pc
6033                        && cur_depth > 0
6034                    {
6035                        // Count ancestor frames sharing head_proto.
6036                        // self.frames[recording_frame_base..] currently
6037                        // includes the just-pushed frame at the top
6038                        // (the one about to execute head_pc). Ancestors
6039                        // = the slice excluding the top frame.
6040                        let head_proto_ptr = rec.head_proto.as_ptr();
6041                        let last_idx = self.frames.len() - 1;
6042                        let mut count = 0usize;
6043                        for i in self.jit.recording_frame_base..last_idx {
6044                            if let CallFrame::Lua(f) = &self.frames[i]
6045                                && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
6046                            {
6047                                count += 1;
6048                            }
6049                        }
6050                        if count > crate::jit::trace::RECUNROLL_THRESHOLD {
6051                            // cur_depth > 0 → UpRec (fib pattern).
6052                            // cur_depth == 0 wouldn't reach this arm.
6053                            Some(crate::jit::trace::SelfRecKind::UpRec)
6054                        } else {
6055                            None
6056                        }
6057                    } else {
6058                        None
6059                    }
6060                };
6061                if let Some(kind) = self_link_trip {
6062                    // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
6063                    // self-recursive patterns at frame depth >= 2.
6064                    //
6065                    // Pre sub-0: a SelfLink trip at the head_pc re-entry
6066                    // unconditionally stamped `self_link_kind`. The
6067                    // R3a `downrec_close` marker can only fire from the
6068                    // depth>0 Op::Return path (`rec.retfs` chain),
6069                    // which never reaches the recorder for fib(28)-like
6070                    // shapes that hit the SelfLink cycle catch BEFORE
6071                    // any base-case Return — leaving `downrec_close`
6072                    // None and routing the trace through R1's safe
6073                    // `dispatchable=false` `"self-link-retf-r1"` path
6074                    // (audit measured `trace_dispatched = 0`).
6075                    //
6076                    // Sub-0 lift: when the SelfLink trip fires AND
6077                    // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
6078                    // gate already requires this — kept explicit as a
6079                    // safety floor), route the close through `downrec_
6080                    // close` INSTEAD of `self_link_kind`. The recorder
6081                    // synthesises the close marker from the most
6082                    // recent Op::Call at depth `cur_depth - 1`:
6083                    //   - `return_pc` = `call.pc + 1` (caller's resume
6084                    //     PC after the recursive call returns; mirror
6085                    //     of R3a's `caller_pc` derivation at the
6086                    //     depth>0 Op::Return capture path below).
6087                    //   - `target_proto` = `call.proto` (caller's
6088                    //     proto; equals `rec.head_proto` for self-
6089                    //     recursion).
6090                    //   - `depth_delta` = `1` (today's recorder always
6091                    //     unrolls one level; R3a uses the same
6092                    //     constant).
6093                    //
6094                    // The lowerer's `end_idx` picker (`trace.rs:3729`)
6095                    // routes through `TraceEnd::DownRec` ahead of the
6096                    // `self_link_kind` arm; the R3b/R3d lowerer arm
6097                    // emits the stitch-sentinel + caller-pc-guard
6098                    // scaffold. Single-candidate guard chain (sub-0's
6099                    // recorder produces 1 caller_pc candidate because
6100                    // `rec.retfs` is empty) keeps `dispatchable=false`
6101                    // + `"downrec-stitch-pending"` label (per R3d's
6102                    // `multi_way_candidate_count >= 2` gate at
6103                    // `trace.rs:7385`). Net behaviour: trace compiles
6104                    // under DownRec routing; interp runs the
6105                    // recursion naturally → result 317811.
6106                    //
6107                    // The `cur_depth >= 2` gate is automatically
6108                    // satisfied by the count > RECUNROLL_THRESHOLD=2
6109                    // trip condition (3 ancestor frames sharing
6110                    // head_proto implies cur_depth >= 3), kept
6111                    // explicit so a future RECUNROLL_THRESHOLD tweak
6112                    // doesn't silently flip shallow-recursion
6113                    // shapes (cur_depth == 1) onto the DownRec arm.
6114                    //
6115                    // R3.3+ sub-1/2/3/4 will replace the depth-baked
6116                    // op_offsets[] addressing with runtime base_var
6117                    // threading so the trace's recorded body is
6118                    // depth-relative and the DownRec dispatch
6119                    // becomes wall-clock-positive. Sub-0 is the
6120                    // routing scaffold; it does not aim for gain.
6121                    let _ = kind;
6122                    let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
6123                        let caller_depth_u8 = (cur_depth - 1) as u8;
6124                        if let Some(call_op) = rec.ops.iter().rev().find(|r| {
6125                            r.inline_depth == caller_depth_u8
6126                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6127                        }) {
6128                            rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6129                                return_pc: call_op.pc + 1,
6130                                target_proto: call_op.proto,
6131                                depth_delta: 1,
6132                            });
6133                            true
6134                        } else {
6135                            false
6136                        }
6137                    };
6138                    if relaxed_to_downrec {
6139                        // R2 close-cause taxonomy: tag the lift so
6140                        // probes can tally the fire rate. Mirrors
6141                        // R3a's `"downrec-restart"` bump for the
6142                        // depth>0 Op::Return path (different trip
6143                        // origin, same downstream routing). The
6144                        // existing `"self-link-retf-r1"` label still
6145                        // fires for trips that DON'T relax (no
6146                        // candidate Op::Call ancestor in rec.ops, or
6147                        // cur_depth < 2) via the lowerer's
6148                        // dispatch_off_reason mirror at the close
6149                        // handler — kept as a regression safety net.
6150                        self.jit
6151                            .counters
6152                            .bump_close_cause("selflink-yields-to-downrec");
6153                    } else {
6154                        rec.self_link_kind = Some(kind);
6155                    }
6156                }
6157                let should_close =
6158                    at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
6159                if should_close {
6160                    // P13-S13-H — long-trace bias: a call-triggered
6161                    // recording that closed with a very short body
6162                    // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
6163                    // binary_trees `make(0)`: 4 ops) is pathological.
6164                    // Compiling + caching it pins `Proto.traces` to a
6165                    // trace that the length gate will refuse to
6166                    // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
6167                    // = 40`), AND blocks the back-edge / longer-call
6168                    // path from re-recording the same head_pc (the
6169                    // dedup `already_cached` check below short-
6170                    // circuits). The fix: discard the short call-
6171                    // triggered recording WITHOUT caching, and bias
6172                    // the proto's `call_hot_count` back to
6173                    // `THRESHOLD - HOT_RETRY_WINDOW` so the next
6174                    // sequence of calls retries the trigger at a
6175                    // different (hopefully deeper) recursion point.
6176                    //
6177                    // Back-edge triggered traces are exempt — a
6178                    // tight numeric-for loop's body is legitimately
6179                    // 3 ops (`Add`, ForLoop) and DOES dispatch
6180                    // usefully when re-entered many times.
6181                    // P13-S13-H — coverage heuristic to detect
6182                    // pathologically partial call-triggered traces:
6183                    // for self-recursive / branchy protos like
6184                    // `fib` (~17 bytecode ops) or
6185                    // `binary_trees.make` (~26 ops), the recorder
6186                    // can fire at a BASE-case entry (`fib(0)` or
6187                    // `make(0)`) producing a 3–4 op trace that
6188                    // covers a tiny fraction of the proto's code.
6189                    // That trace is doomed by the length gate
6190                    // post-compile AND blocks any longer follow-up
6191                    // (the dedup `already_cached` check below). The
6192                    // fix: discard call-triggered closes where
6193                    // `rec.ops.len() * 2 < head_proto.code.len()`
6194                    // (less than half the proto's bytecode), so the
6195                    // back-edge / longer call path can take over.
6196                    //
6197                    // Why coverage > raw length:protos with
6198                    // intrinsically short bodies (closure
6199                    // factories: `Closure + Return1` = 2 ops,
6200                    // simple wrappers: `LoadI + Return1` = 2 ops)
6201                    // record 100% coverage even at length 2 — those
6202                    // ARE legitimately short and the closure /
6203                    // sunk-emit lowering paths (S7-A / S9-C) make
6204                    // them worth compiling. The heuristic admits
6205                    // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
6206                    // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
6207                    // ~26) get discarded.
6208                    //
6209                    // Back-edge triggered traces are unaffected —
6210                    // a tight numeric-for body legitimately covers
6211                    // 3 of ~3 proto ops it can dispatch from
6212                    // (`Add + ForLoop`) and the recorder fires on
6213                    // the back-edge, not call entry.
6214                    //
6215                    // `call_hot_count` is intentionally NOT reset
6216                    // (an earlier draft tried `THRESHOLD - 32` but
6217                    // caused active_trace contention with the
6218                    // outer back-edge trigger — see
6219                    // setlist_b_zero_with_call_c_zero_sunk_emits).
6220                    // We give up on dispatching the pathological
6221                    // shape on the same proto; the back-edge or a
6222                    // longer call path on a deeper recursion point
6223                    // can still record + cache a real trace.
6224                    let proto_code_len = rec.head_proto.code.len();
6225                    let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
6226                    // P13-S13-I — per-Proto discard cap. The S13-H
6227                    // relaxed trigger condition (`c >= THRESHOLD &&
6228                    // !already_cached`) means a Proto whose every
6229                    // recording is partial-coverage will re-fire the
6230                    // trigger every call indefinitely (1500+ in
6231                    // `binary_trees`-pattern test). The cap stops
6232                    // discarding after `MAX_DISCARDS_PER_PROTO` —
6233                    // the next close falls through to compile (even
6234                    // if partial), caches the trace, and the
6235                    // `already_cached` short-circuit kills the
6236                    // storm. Dispatch may still be refused
6237                    // post-compile (length gate), but the recorder
6238                    // stops churning.
6239                    const MAX_DISCARDS_PER_PROTO: u32 = 5;
6240                    let prior_discards = rec.head_proto.trace_discard_count.get();
6241                    let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
6242                    // P13-S13-K — flip the `gave_up` flag the
6243                    // moment cap is reached (BEFORE the close-
6244                    // dispatching branch below). The trigger gates
6245                    // short-circuit on this flag, skipping the
6246                    // RefCell + linear `already_cached` scan on
6247                    // every subsequent call to this Proto. Useful
6248                    // for `binary_trees_pattern`-class loads where
6249                    // a single Proto sees ~20k calls post-cap.
6250                    if cap_reached
6251                        && rec.is_call_triggered
6252                        && is_partial_coverage
6253                        && !rec.head_proto.trace_gave_up.get()
6254                    {
6255                        rec.head_proto.trace_gave_up.set(true);
6256                    }
6257                    if rec.is_call_triggered && is_partial_coverage && !cap_reached {
6258                        // Tally as closed (for visibility) but DROP
6259                        // without compile/cache. Use the existing
6260                        // closed-lens accumulator so probes can
6261                        // observe the discarded shape.
6262                        // P13-S13-I — bump discard count BEFORE
6263                        // dropping the recording so the next
6264                        // close sees the updated counter.
6265                        rec.head_proto.trace_discard_count.set(prior_discards + 1);
6266                        self.jit.counters.closed += 1;
6267                        self.jit
6268                            .counters
6269                            .closed_lens
6270                            .push((rec.is_call_triggered, rec.ops.len()));
6271                        // v2.0 Track-R R2 — partial-coverage discard
6272                        // close path. Pre-R2 this site bumped `closed`
6273                        // + `closed_lens` (visibility) but no per-
6274                        // reason label, so probes couldn't separate a
6275                        // real successful close from a discard tally.
6276                        // Tag explicitly to make the recorder-side
6277                        // close-cause taxonomy single-source.
6278                        self.jit
6279                            .counters
6280                            .bump_close_cause("partial-coverage-discard");
6281                        self.jit.active_trace = None;
6282                        // Continue with interp loop — don't
6283                        // fall through to compile path.
6284                        // The op at `pc` hasn't dispatched yet;
6285                        // the outer loop iteration handles it.
6286                    } else {
6287                        rec.closed = true;
6288                        // P12-S2.C — detach the closed record, then try
6289                        // to compile it. Dedup by `head_pc`: a Proto
6290                        // already carrying a CompiledTrace for this PC
6291                        // skips recompile (the hot counter caps
6292                        // re-recording at `u32::MAX / 2` anyway, but
6293                        // explicit dedup keeps `Proto.traces` short
6294                        // for the S3 dispatcher's linear scan).
6295                        //
6296                        // No `Vm::run` change for failure: we just bump
6297                        // the failed counter and drop the record. S3
6298                        // will read `Proto.traces` to decide whether to
6299                        // dispatch — until then, this is bookkeeping.
6300                        let head_pc_val = rec.head_pc;
6301                        let closed_record = self
6302                            .jit
6303                            .active_trace
6304                            .take()
6305                            .expect("active_trace was Some this branch");
6306                        self.jit.counters.closed += 1;
6307                        self.jit
6308                            .counters
6309                            .closed_lens
6310                            .push((closed_record.is_call_triggered, closed_record.ops.len()));
6311                        // P12-S5-B fix: cache the trace on the
6312                        // recorder's *head proto*, not the current
6313                        // closure's proto. For non-recursive
6314                        // call-triggered traces, close fires after
6315                        // `Return1` pops the callee frame — `cl` at
6316                        // that point is the CALLER's closure, while
6317                        // `closed_record.head_proto` is the CALLEE's
6318                        // proto (the one we actually want the trace
6319                        // to be discoverable from on the next call).
6320                        // Self-recursive fib closed via depth-cap
6321                        // mid-recursion so `cl.proto == head_proto`
6322                        // happened to coincide — this fix makes that
6323                        // accidental coincidence intentional.
6324                        let head_proto = closed_record.head_proto;
6325                        let already_cached = head_proto
6326                            .traces
6327                            .borrow()
6328                            .iter()
6329                            .any(|t| t.head_pc == head_pc_val);
6330                        if !already_cached {
6331                            // Internal-loop = true: the trace runs in
6332                            // a native loop until a cmp side-exits, so
6333                            // the dispatcher's per-entry marshal cost
6334                            // amortizes across the whole run of
6335                            // iterations the loop's recorded direction
6336                            // stays valid. The lowerer auto-downgrades
6337                            // to one-shot for cmp-less or Call-truncating
6338                            // traces.
6339                            // P15-A v2-C-A6-5 — side traces MUST NOT
6340                            // internal-loop. The parent's recorded prefix
6341                            // (ops at PCs < side trace's head_pc) defines
6342                            // values for registers the child's body reads
6343                            // without re-writing each iter — e.g. for
6344                            // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6345                            // + R[11]` sets R[12], and the child trace
6346                            // (head_pc=24) re-runs `pc=20 Move R[1] =
6347                            // R[12]` each iter via its outer ForLoop
6348                            // internal-loop, ALWAYS reading the stale
6349                            // entry-time R[12]. The parent's Add never
6350                            // re-runs during child's loop, so R[1] gets
6351                            // pinned to one stale value. Force one-shot
6352                            // for side traces: each parent-exit round-
6353                            // trips through dispatcher → parent's Add
6354                            // runs → side trace runs ONE iter → return.
6355                            let opts = crate::jit::trace::CompileOptions {
6356                                internal_loop: closed_record.side_trace_parent.is_none(),
6357                                pre53: self.version() <= LuaVersion::Lua53,
6358                                aot: false,
6359                            };
6360                            // v1.1 A1 Session A — route through trace_compiler.
6361                            // v2.0 Track J sub-step J-B — split-borrow JitState
6362                            // so the trait method can take `&mut dyn JitStorage`.
6363                            let result = {
6364                                let jit = &mut self.jit;
6365                                let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6366                                jit.trace_compiler
6367                                    .try_compile_trace(storage, &closed_record, opts)
6368                            };
6369                            match result {
6370                                Some(mut ct) => {
6371                                    // P12-S5-A/B/C — tally Sinkable sites
6372                                    // + actually-sunk-emit sites + materialise
6373                                    // emit sites before moving `ct` into
6374                                    // Proto.traces.
6375                                    self.jit.counters.sinkable_seen +=
6376                                        ct.sinkable_sites_seen as u64;
6377                                    self.jit.counters.accum_bufferable_seen +=
6378                                        ct.accum_bufferable_seen as u64;
6379                                    self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6380                                    self.jit.counters.materialize_emit +=
6381                                        ct.materialize_emit_count as u64;
6382                                    self.jit.counters.closure_emit += ct.closure_seen as u64;
6383                                    if ct.is_inline_abort_close {
6384                                        self.jit.counters.inline_abort += 1;
6385                                    }
6386                                    // v2.0 Stage 7 polish 6 fire
6387                                    // experiment — split tally so a
6388                                    // probe can answer the AOT
6389                                    // `accepted_with_per_exit_inline`
6390                                    // gate's question at the JIT
6391                                    // surface too: how many compiled
6392                                    // traces emitted depth>0 cmp
6393                                    // side-exits, and how many of
6394                                    // those survived all the
6395                                    // `dispatchable = false` pins
6396                                    // (`InlineAbort-gate`,
6397                                    // `self-link-retf-r1`,
6398                                    // `downrec-stitch-pending`, etc.).
6399                                    if !ct.per_exit_inline.is_empty() {
6400                                        self.jit.counters.per_exit_inline_compiled += 1;
6401                                        if ct.dispatchable {
6402                                            self.jit.counters.per_exit_inline_dispatchable += 1;
6403                                        }
6404                                    }
6405                                    if let Some(reason) = ct.dispatch_off_reason {
6406                                        self.jit.counters.dispatch_off_reasons.push(reason);
6407                                        // v2.0 Track-R R2 — mirror
6408                                        // the ordered Vec push into
6409                                        // the per-reason HashMap so
6410                                        // probes can answer "how many
6411                                        // of each dispatch_off label
6412                                        // fired" in O(1) without
6413                                        // walking the Vec. Same
6414                                        // bucket as the recorder-side
6415                                        // abort/discard tags above.
6416                                        self.jit.counters.bump_close_cause(reason);
6417                                    }
6418                                    // v2.0 Track-R R3b — count
6419                                    // compiled traces that carry a
6420                                    // down-recursion stitch link.
6421                                    // Bumped here (not at the lowerer
6422                                    // emit site) because the Vm's
6423                                    // JitCounters live on the Vm,
6424                                    // and the lowerer doesn't have a
6425                                    // Vm handle. R3b's regression
6426                                    // pin reads this via
6427                                    // `Vm::trace_downrec_link_compiled_count`.
6428                                    if ct.downrec_link.is_some() {
6429                                        self.jit.counters.downrec_link_compiled += 1;
6430                                    }
6431                                    // v2.0 Track-R R3d — multi-way
6432                                    // guard emit counter. Bumped when
6433                                    // the lowerer's R3d arm collected
6434                                    // >= 2 distinct caller_pc candidates
6435                                    // and lifted `dispatchable=true`.
6436                                    // R3c's single-CMP shape stores
6437                                    // `1` here without bumping; non-
6438                                    // DownRec closes store `0`.
6439                                    if ct.downrec_multi_way_count >= 2 {
6440                                        self.jit.counters.multi_way_guard_emitted += 1;
6441                                    }
6442                                    // P15-A v2-A — side-trace finalisation.
6443                                    // Pin `dispatchable=false` so the
6444                                    // primary lookup `traces.find(|t|
6445                                    // t.head_pc == pc && t.dispatchable)`
6446                                    // never matches this entry — the
6447                                    // side trace is meant to be entered
6448                                    // ONLY through the parent's exit
6449                                    // indirection (v2-B/C IR), not the
6450                                    // back-edge / call-trigger paths.
6451                                    // Then write the entry fn ptr into
6452                                    // the parent's `exit_side_trace_ptrs`
6453                                    // slot so v2-B/C IR can read it.
6454                                    if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6455                                        closed_record.side_trace_parent
6456                                    {
6457                                        ct.dispatchable = false;
6458                                        let entry_ptr = ct.entry as *const () as *const u8;
6459                                        let _side_trace_head_pc = closed_record.head_pc;
6460                                        let parent_traces = parent_proto.traces.borrow();
6461                                        if let Some(parent_ct) = parent_traces
6462                                            .iter()
6463                                            .find(|t| t.head_pc == parent_head_pc)
6464                                        {
6465                                            // P15-A v2-C-A5-C — shape-match
6466                                            // gate. Find the parent's per-exit
6467                                            // tag snapshot at the wired exit
6468                                            // (inline / tag / global) and
6469                                            // check the child's entry_tags
6470                                            // match. If not, leave the cell
6471                                            // null + skip cache populate so
6472                                            // the future v2-C-A2 IR's
6473                                            // `call_indirect` stays inert at
6474                                            // this exit (the child's
6475                                            // shape-specialised IR would
6476                                            // mis-interpret raw bits the
6477                                            // parent writes to reg_state).
6478                                            let inline_n = parent_ct.per_exit_inline.len();
6479                                            let tags_n = parent_ct.per_exit_tags.len();
6480                                            let parent_exit_tags_slice: &[
6481                                            crate::jit::trace::ExitTag
6482                                        ] = if parent_exit_idx < inline_n {
6483                                            &parent_ct.per_exit_inline
6484                                                [parent_exit_idx]
6485                                                .exit_tags
6486                                        } else if parent_exit_idx
6487                                            < inline_n + tags_n
6488                                        {
6489                                            &parent_ct.per_exit_tags
6490                                                [parent_exit_idx - inline_n]
6491                                                .1
6492                                        } else {
6493                                            &parent_ct.exit_tags
6494                                        };
6495                                            let shape_ok =
6496                                                crate::jit::trace::exit_tags_match_entry_tags(
6497                                                    &ct.entry_tags,
6498                                                    parent_exit_tags_slice,
6499                                                    &parent_ct.entry_tags,
6500                                                );
6501                                            if !shape_ok {
6502                                                self.jit.counters.side_trace_shape_mismatch += 1;
6503                                            }
6504                                            // P15-A v2-C-A4 — write the child's
6505                                            // entry fn ptr to BOTH the legacy
6506                                            // v2-A `exit_side_trace_ptrs[idx]`
6507                                            // cell (kept so v2-A's
6508                                            // walk_any_side_ptr_non_null tests
6509                                            // stay green) AND the per-kind cell
6510                                            // whose heap address the parent's
6511                                            // IR baked (v2-C-A2). The IR-baked
6512                                            // cell is what the call_indirect
6513                                            // gate actually reads. Only write
6514                                            // when A5-C shape gate passes.
6515                                            if shape_ok {
6516                                                if let Some(cell) = parent_ct
6517                                                    .exit_side_trace_ptrs
6518                                                    .get(parent_exit_idx)
6519                                                {
6520                                                    cell.set(entry_ptr);
6521                                                }
6522                                                // Compute (kind, local) for the
6523                                                // IR-baked cell. Layout follows
6524                                                // exit_hit_counts: inline first,
6525                                                // then per_exit_tags, then the
6526                                                // global tail slot.
6527                                                let (sent_kind, sent_local) = if parent_exit_idx
6528                                                    < inline_n
6529                                                {
6530                                                    parent_ct.per_exit_inline[parent_exit_idx]
6531                                                        .side_trace_ptr
6532                                                        .set(entry_ptr);
6533                                                    (
6534                                                        crate::jit::trace::SIDE_SENT_KIND_INLINE,
6535                                                        parent_exit_idx as u32,
6536                                                    )
6537                                                } else if parent_exit_idx < inline_n + tags_n {
6538                                                    let local = parent_exit_idx - inline_n;
6539                                                    if let Some(b) =
6540                                                        parent_ct.tags_side_trace_ptrs.get(local)
6541                                                    {
6542                                                        b.set(entry_ptr);
6543                                                    }
6544                                                    (
6545                                                        crate::jit::trace::SIDE_SENT_KIND_TAG,
6546                                                        local as u32,
6547                                                    )
6548                                                } else {
6549                                                    parent_ct.global_side_trace_ptr.set(entry_ptr);
6550                                                    (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6551                                                };
6552                                                self.jit.counters.side_trace_compiled += 1;
6553                                                // P15-A v2-D-A8 — flip the
6554                                                // parent's fast-path hint so
6555                                                // the dispatcher knows to do
6556                                                // the tentative decode + cell
6557                                                // check on subsequent
6558                                                // dispatches. Set once and
6559                                                // stays true (we never unwire
6560                                                // a side trace today).
6561                                                parent_ct.has_any_side_wired.set(true);
6562
6563                                                // P15-A v2-C-A1/A4 — populate
6564                                                // the O(1) lookup cache the
6565                                                // dispatcher consults on
6566                                                // sentinel-bit-set returns.
6567                                                // Key is the encoded sentinel
6568                                                // (same encoding the IR ORs
6569                                                // into bits 56..=62 of the
6570                                                // child's i64 return).
6571                                                let sentinel =
6572                                                    crate::jit::trace::encode_side_sentinel(
6573                                                        sent_kind, sent_local,
6574                                                    );
6575                                                let predicted_idx = if std::ptr::eq(
6576                                                    parent_proto.as_ptr(),
6577                                                    head_proto.as_ptr(),
6578                                                ) {
6579                                                    parent_traces.len() as u32
6580                                                } else {
6581                                                    head_proto.traces.borrow().len() as u32
6582                                                };
6583                                                parent_ct
6584                                                    .side_trace_cache
6585                                                    .borrow_mut()
6586                                                    .insert(sentinel, predicted_idx);
6587                                            }
6588                                        }
6589                                        drop(parent_traces);
6590                                    }
6591                                    head_proto.traces.borrow_mut().push(TArc::new(ct));
6592                                    self.jit.counters.compiled += 1;
6593                                }
6594                                None => {
6595                                    self.jit.counters.compile_failed += 1;
6596                                    self.jit
6597                                        .counters
6598                                        .compile_failed_reasons
6599                                        .push(self.jit.trace_compiler.last_compile_checkpoint());
6600                                }
6601                            }
6602                        }
6603                    } // P13-S13-H — close the long-trace-bias else branch
6604                } else {
6605                    // P12-S4-step1 + step4a — depth-aware push at the
6606                    // current `cur_depth`. The `depth_cap_hit` /
6607                    // `returned_past_head` early-exit is handled by
6608                    // the `should_close` branch above; reaching here
6609                    // means `cur_depth <= MAX_INLINE_DEPTH` and the
6610                    // trace head's frame is still live.
6611                    let depth_u8 = cur_depth as u8;
6612                    if depth_u8 > self.jit.max_depth_seen {
6613                        self.jit.max_depth_seen = depth_u8;
6614                    }
6615                    // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6616                    // return / variable return count). Recorder pushed
6617                    // it with var_count=None before the call dispatched;
6618                    // now that the call has returned and we're about to
6619                    // push the next op, top reflects the actual return
6620                    // count. Snapshot top - (caller.base + call.a).
6621                    if let Some(last) = rec.ops.last_mut()
6622                        && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6623                        && last.inst.c() == 0
6624                        && last.var_count.is_none()
6625                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6626                    {
6627                        let from = f.base + last.inst.a();
6628                        if self.top >= from {
6629                            last.var_count = Some(self.top - from);
6630                        }
6631                    }
6632                    // P12-S9-A/C — for SetList B=0, snapshot the source
6633                    // count = top - A - 1 (mirrors Lua's `n = top - ra
6634                    // - 1` from lvm.c OP_SETLIST). Sources are
6635                    // R[A+1..top), exclusive top. For Call C=0's
6636                    // var_count (the return count = top - A inclusive),
6637                    // see the prior-op fix-up above; here we
6638                    // initialise the current Call op to None and let
6639                    // the fix-up on the next op's push populate it.
6640                    let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6641                        && inst.b() == 0
6642                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6643                    {
6644                        let from = f.base + inst.a();
6645                        if self.top > from {
6646                            Some(self.top - from - 1)
6647                        } else {
6648                            None
6649                        }
6650                    } else {
6651                        None
6652                    };
6653                    let op = crate::jit::trace::RecordedOp {
6654                        proto: cl.proto,
6655                        pc,
6656                        inst,
6657                        inline_depth: depth_u8,
6658                        var_count,
6659                    };
6660                    // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6661                    // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6662                    // Captured as a side-channel `RetfRecord` parallel to
6663                    // `ops` when `p16_self_link_enabled` is on. R3's
6664                    // down-rec stitch consumes these to guard side-trace
6665                    // inlined-frame topology against the recorded shape.
6666                    // Gated on the same flag as the cycle catch so the
6667                    // ship-default path (p16 off) sees zero behavior
6668                    // change. `caller_pc` is the recorded enclosing Call's
6669                    // pc + 1 — interp's resume point after the inlined
6670                    // frame pops.
6671                    if self.jit.p16_self_link_enabled
6672                        && depth_u8 > 0
6673                        && matches!(
6674                            inst.op(),
6675                            crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6676                        )
6677                    {
6678                        let results: u8 = match inst.op() {
6679                            crate::vm::isa::Op::Return0 => 0,
6680                            crate::vm::isa::Op::Return1 => 1,
6681                            _ => 0,
6682                        };
6683                        // Most recent Op::Call recorded at the caller's
6684                        // depth (`depth_u8 - 1`) is the frame this Return
6685                        // is unwinding from. Reverse scan stops at the
6686                        // first match.
6687                        let caller_depth = depth_u8 - 1;
6688                        let caller_call = rec.ops.iter().rev().find(|r| {
6689                            r.inline_depth == caller_depth
6690                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6691                        });
6692                        let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6693                        // v2.0 Track-R R3a — capture the caller's proto
6694                        // for the RetfRecord. LuaJIT `IR_RETF.op1`
6695                        // equivalent. For fib(28) the caller's proto
6696                        // equals the trace head; for future mutual
6697                        // recursion the recorded Op::Call's proto is the
6698                        // right target. Fallback to head_proto when no
6699                        // enclosing Call op was captured (mirrors
6700                        // `caller_pc`'s fallback to the Return's own pc).
6701                        let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6702                        rec.retfs.push(crate::jit::trace::RetfRecord {
6703                            from_depth: depth_u8,
6704                            to_depth: caller_depth,
6705                            results,
6706                            caller_pc,
6707                            proto: caller_proto,
6708                        });
6709                        // v2.0 Track-R R3a — DownRec close trigger:
6710                        // count RetfRecords on this recording whose
6711                        // `proto` matches `caller_proto` (LuaJIT
6712                        // `check_downrec_unroll` chain filter
6713                        // `op1 == ptref`). Threshold mirrors
6714                        // RECUNROLL_THRESHOLD; first trip stamps the
6715                        // `downrec_close` marker, subsequent retfs
6716                        // keep the marker without overwrite. The
6717                        // lowerer's end_idx picker routes through
6718                        // TraceEnd::DownRec when the marker is set;
6719                        // R3a's tail emit still falls through to R1's
6720                        // safe deopt path so fib(28) result stays
6721                        // 317_811. R3b lifts.
6722                        if rec.downrec_close.is_none() {
6723                            let caller_proto_ptr = caller_proto.as_ptr();
6724                            let prior_match_count = rec
6725                                .retfs
6726                                .iter()
6727                                .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6728                                .count();
6729                            // Strictly-greater-than threshold matches
6730                            // LuaJIT `count + J->tailcalled > recunroll`.
6731                            // The newly-pushed retf is already counted.
6732                            if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6733                                rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6734                                    return_pc: caller_pc,
6735                                    target_proto: caller_proto,
6736                                    depth_delta: 1,
6737                                });
6738                                // R2 close-cause taxonomy: tag the
6739                                // restart with `"downrec-restart"`. R3b
6740                                // adds `"downrec-stitch-failed"` when
6741                                // the lifted back-edge falls back to
6742                                // deopt.
6743                                self.jit.counters.bump_close_cause("downrec-restart");
6744                            }
6745                        }
6746                    }
6747                    // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6748                    // FIRST eligible Op::GetField site under env-gate
6749                    // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6750                    //   - R[B] is Value::Table with metatable.is_none()
6751                    //   - K[C] is Value::Str
6752                    //   - The string key actually occupies a hash slot
6753                    //     (so the IC's slot_idx is a real index, not
6754                    //     a probe sentinel).
6755                    // Once captured, subsequent GetFields skip this
6756                    // logic (rec.field_ic_snapshot.is_some() short-
6757                    // circuits). Env-OFF short-circuits on the cached
6758                    // atomic check inside field_ic_enabled().
6759                    if rec.field_ic_snapshot.is_none()
6760                        && matches!(inst.op(), crate::vm::isa::Op::GetField)
6761                        && crate::jit::trace_types::field_ic_enabled()
6762                    {
6763                        let b = inst.b();
6764                        let c_idx = inst.c() as usize;
6765                        let r_b = self.stack[(base + b) as usize];
6766                        if let Value::Table(g) = r_b
6767                            && g.metatable().is_none()
6768                            && c_idx < cl.proto.consts.len()
6769                            && let Value::Str(s) = cl.proto.consts[c_idx]
6770                        {
6771                            let key = Value::Str(s);
6772                            let tbl_ref = &*g;
6773                            if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6774                                && let Some(val) = tbl_ref.node_val_at(slot_idx)
6775                            {
6776                                let op_idx = rec.ops.len() as u32;
6777                                rec.field_ic_snapshot =
6778                                    Some(crate::jit::trace_types::FieldIcSnapshot {
6779                                        op_idx,
6780                                        nodes_len: tbl_ref.nodes_capacity() as u64,
6781                                        slot_idx: slot_idx as u64,
6782                                        key_ptr_bits: s.as_ptr() as u64,
6783                                        cached_val_tag: val.tag_byte(),
6784                                    });
6785                                self.jit.counters.field_ic_snapshot_captured += 1;
6786                            }
6787                        }
6788                    }
6789                    if !rec.push(op) {
6790                        // v2.0 Track-R R2 — recorder overflow
6791                        // (MAX_TRACE_LEN). Pre-R2 this site bumped
6792                        // `aborted` with no reason label, leaving the
6793                        // overflow indistinguishable from any other
6794                        // abort cause that might be added later.
6795                        // Tag it explicitly under the close-cause
6796                        // bucket so probes can tally overflow vs
6797                        // other abort causes in O(1).
6798                        self.jit.active_trace = None;
6799                        self.jit.counters.aborted += 1;
6800                        self.jit.counters.bump_close_cause("trace-overflow");
6801                    }
6802                }
6803            }
6804
6805            // P12-S3 — trace JIT dispatcher.
6806            //
6807            // When the dispatch loop is about to execute the op at
6808            // `pc` and there's a `numeric_only` CompiledTrace cached
6809            // for that `head_pc`, marshal the live regs into an
6810            // i64 buffer, jump into the trace, and resume the
6811            // interpreter at the returned continuation PC.
6812            //
6813            // Skipped (zero overhead) when `trace_jit_enabled` is
6814            // false; the lookup is a borrow + scan over
6815            // `cl.proto.traces`, which is a `Vec` whose size is at
6816            // most one entry per back-edge per Proto in practice.
6817            //
6818            // Marshalling contract — only Int slots survive the
6819            // round-trip cleanly (the reg_state ABI is `*mut i64`
6820            // with no tag info). Any non-Int slot in the affected
6821            // window forces a skip; interp takes over for one op
6822            // and the back-edge brings us back to try again next
6823            // pass (slots that were Nil/Float at one moment can
6824            // settle to Int by the time the next back-edge fires).
6825            //
6826            // A trace that comes back with `vm.jit.pending_err`
6827            // parked is treated as a deopt: clear the err, leave
6828            // the stack as the trace wrote it, and let the
6829            // interpreter run from the same `pc`. The trace itself
6830            // is left cached — a future entry might find no
6831            // metatable in the way and succeed.
6832            // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6833            // of 6 per-field Rc clones. proto.traces is now
6834            // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6835            // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6836            // operations per dispatch × 434k = ~2.2M Rc atomic ops
6837            // (~1-2% gain measured separately).
6838            // v2.0 Track-R R3c — one-shot consume of the
6839            // `suppress_downrec_admit_once` flag. Set by the R3c
6840            // downrec post-invoke arm below when it force-deopts the
6841            // trace (caller-pc guard miss OR cycle-budget exhausted)
6842            // so the NEXT interpreter loop iteration skips the
6843            // downrec admit, lets interp run the op at `head_pc`,
6844            // advances `pc` past `head_pc`, and breaks the otherwise-
6845            // infinite admit loop. Reading + clearing here means a
6846            // single dispatch tick consumes the suppression — the
6847            // following tick re-admits naturally (with the budget
6848            // also reset by the deopt site).
6849            let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6850            self.jit.suppress_downrec_admit_once = false;
6851            if self.jit.trace_enabled
6852                && let Some(ct) = {
6853                    let traces = cl.proto.traces.borrow();
6854                    traces
6855                        .iter()
6856                        .find(|t| {
6857                            if t.head_pc != pc {
6858                                return false;
6859                            }
6860                            let is_downrec = t.downrec_link.is_some();
6861                            // v2.0 Track-R R3c — the one-shot suppress
6862                            // flag blocks any admit (primary or fallback)
6863                            // for `downrec_link`-bearing traces so the
6864                            // next interp iter can run the natural op
6865                            // at `head_pc` and advance past it. R3d's
6866                            // `dispatchable=true` lift means the suppress
6867                            // must also cover the primary `t.dispatchable`
6868                            // arm — otherwise the lifted lookup would
6869                            // immediately re-admit after a force-deopt
6870                            // and the infinite loop returns.
6871                            if is_downrec && downrec_admit_blocked {
6872                                return false;
6873                            }
6874                            // Primary arm: `dispatchable=true` traces
6875                            // (R3d-lifted DownRec or normal traces).
6876                            // Fallback arm: R3c-shape `dispatchable=false`
6877                            // DownRec traces (single-CMP guard kept
6878                            // pinned because the 90% miss-rate would
6879                            // make blind admit perf-negative).
6880                            t.dispatchable || is_downrec
6881                        })
6882                        .cloned()
6883                }
6884            {
6885                // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6886                // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6887                // across the entire dispatch block so the fields outlive
6888                // all consumers. Saves 5 Rc::clone per dispatch.
6889                let entry_fn = ct.entry;
6890                let head_pc_val = ct.head_pc;
6891                let window_size = ct.window_size;
6892                let exit_tags = &ct.exit_tags;
6893                let per_exit_tags = &ct.per_exit_tags;
6894                let per_exit_inline = &ct.per_exit_inline;
6895                let compile_entry_tags = &ct.entry_tags;
6896                let global_tag_res_kind = ct.global_tag_res_kind;
6897                let exit_hit_counts = &ct.exit_hit_counts;
6898                let max_stack = cl.proto.max_stack as usize;
6899                let window_size_us = window_size as usize;
6900                let base_us = base as usize;
6901                // P12-S4-step3a — `reg_state` sized to the trace's
6902                // `window_size`, which today equals max_stack but
6903                // S4-step3b will expand for inlined frames.
6904                // Marshal-in still only writes [0..max_stack); slots
6905                // [max_stack..window_size) are zero-initialised and
6906                // filled by the trace's own GetUpval / arith.
6907                // P13-S13-D — reuse the Vm's amortised buffers
6908                // instead of allocating fresh Vecs each dispatch.
6909                // mem::take leaves an empty placeholder we restore
6910                // at the end of the dispatch block (success +
6911                // deopt paths both fall through to the restore).
6912                let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6913                entry_tags.clear();
6914                entry_tags.reserve(max_stack);
6915                // v2.0 Track-R R3c — this trace was admitted via the
6916                // `downrec_link.is_some()` arm rather than the normal
6917                // `dispatchable=true` arm. The pre-invoke path
6918                // populates a reserved saved-PC slot just past the
6919                // normal register window so R3b's lowerer guard load
6920                // (`reg_state[window_size]`) compares the runtime
6921                // saved caller PC against the recorded `dr_return_pc`.
6922                //
6923                // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6924                // gate. After R3d lifts `dispatchable = true` for
6925                // multi-way guards, the trace's body still emits the
6926                // R3b/R3d sentinel shape on return — the saved-PC slot
6927                // and post-invoke classifier must keep firing.
6928                // `downrec_link.is_some()` is the unique structural
6929                // signal that the trace closes via DownRec.
6930                let is_downrec_entry = ct.downrec_link.is_some();
6931                let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6932                reg_state.clear();
6933                // v2.0 Track-R R3c — when admitting a downrec trace,
6934                // size the buffer to `window_size + 1` so the lowerer
6935                // can `load(I64, ..., reg_state, window_size * 8)`
6936                // for the saved caller PC guard input. The extra slot
6937                // is the LAST element so cranelift's existing
6938                // `0..window_size` accesses are unaffected.
6939                let reg_state_len = if is_downrec_entry {
6940                    window_size_us + 1
6941                } else {
6942                    window_size_us
6943                };
6944                reg_state.resize(reg_state_len, 0i64);
6945                let mut dispatch_ok = true;
6946                for i in 0..max_stack {
6947                    let v = self.stack[base_us + i];
6948                    let (tag, raw) = v.unpack();
6949                    entry_tags.push(tag);
6950                    // P12-S12-C v3 — entry tag guard. The trace's IR
6951                    // is specialised to the compile-time entry tags
6952                    // (via current_kinds propagation from
6953                    // from_entry_tag). A runtime tag mismatch means
6954                    // body ops would mis-interpret raw bits (e.g.
6955                    // treat a Str pointer as Int payload → garbage).
6956                    // Skip dispatch on mismatch so interp handles
6957                    // this entry shape; the trace stays cached for
6958                    // future entries that match.
6959                    if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6960                        dispatch_ok = false;
6961                        break;
6962                    }
6963                    match tag {
6964                        // Int / Float / Table / Nil all marshal
6965                        // to raw payload cleanly; the trace's IR
6966                        // treats the 8-byte slot as an i64 (with
6967                        // f64 ops bitcasting around the boundary).
6968                        crate::runtime::value::raw::INT
6969                        | crate::runtime::value::raw::FLOAT
6970                        | crate::runtime::value::raw::TABLE
6971                        | crate::runtime::value::raw::CLOSURE
6972                        // P12-S12-B-v2 — Native iter slots (e.g.
6973                        // R[A] = ipairs_iter) are present in
6974                        // generic-for traces; the raw bits are a
6975                        // valid `*mut NativeClosure` and round-trip
6976                        // cleanly.
6977                        | crate::runtime::value::raw::NATIVE
6978                        // P12-S12-C v1 — Str slots show up in
6979                        // string-concat traces; raw bits = `*mut
6980                        // LuaStr` (interned, GC-managed). Round-
6981                        // trips cleanly as a heap pointer.
6982                        | crate::runtime::value::raw::STR
6983                        | crate::runtime::value::raw::NIL => {
6984                            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6985                            reg_state[i] = unsafe { raw.zero as i64 };
6986                        }
6987                        _ => {
6988                            dispatch_ok = false;
6989                            break;
6990                        }
6991                    }
6992                }
6993
6994                if dispatch_ok {
6995                    debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6996                    self.jit.pending_err = None;
6997                    // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6998                    // count. A cmp@d>0 side-exit calls the materialize
6999                    // helper which pushes inlined frames onto
7000                    // `vm.frames`; on deopt those frames must be popped
7001                    // before falling through to the interpreter, else
7002                    // the stack grows unboundedly per deopted dispatch.
7003                    let pre_frames = self.frames.len();
7004                    // v2.0 Track-R R3c — saved-PC slot population. The
7005                    // recorded `dr_return_pc` on the closing trace is
7006                    // the caller's resume PC captured at a depth>0
7007                    // Return push (recorder push site, see R3a verdict
7008                    // §3). The natural runtime analogue for self-
7009                    // stitch is the dispatching frame's PARENT frame's
7010                    // PC: the trace's head_pc sits inside a Lua frame,
7011                    // and the parent (caller) frame's `pc` is what
7012                    // luna would observe as `[base-8]` in the LJ
7013                    // `asm_retf` shape (`lj_asm_arm64.h:565`). When
7014                    // the parent isn't a Lua frame (top-level dispatch
7015                    // — first invocation through `call_value`), no
7016                    // saved PC exists; we write 0, which always
7017                    // mismatches the recorded `dr_return_pc != 0`
7018                    // invariant pinned by R3b
7019                    // (`crates/luna-jit/src/jit_backend/trace.rs:7206
7020                    // debug_assert!(dr_return_pc != 0, ...)`).
7021                    if is_downrec_entry {
7022                        let saved_pc: i64 = if pre_frames >= 2 {
7023                            match &self.frames[pre_frames - 2] {
7024                                CallFrame::Lua(parent) => parent.pc as i64,
7025                                CallFrame::Cont(_) => 0,
7026                            }
7027                        } else {
7028                            0
7029                        };
7030                        reg_state[window_size_us] = saved_pc;
7031                    }
7032                    // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
7033                    // diagnostic hook. The probe fires once per trace dispatch
7034                    // (regardless of JIT vs AOT origin — both go through this
7035                    // arm), letting the AOT smoke test verify mcode actually
7036                    // executed. Guarded behind `OnceLock` so the env read is
7037                    // a one-time cost per process; not gated on a particular
7038                    // counter so the smoke test gets a deterministic single-
7039                    // line `aot_trace_fired pc=N` per first dispatch.
7040                    if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
7041                        eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
7042                    }
7043                    let continuation_pc = {
7044                        // v1.1 A1 Session A — chunk_compiler.enter
7045                        // (CraneliftBackend delegates to enter_jit;
7046                        // NullJitBackend returns an inert guard).
7047                        let vm_ptr: *mut Vm = self;
7048                        let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7049                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7050                        unsafe { entry_fn(reg_state.as_mut_ptr()) }
7051                    };
7052                    self.jit.counters.dispatched += 1;
7053
7054                    if self.jit.pending_err.is_some() {
7055                        self.jit.pending_err = None;
7056                        self.jit.counters.deopt += 1;
7057                        // P12-S4-step4b-C-2 — unwind any helper-pushed
7058                        // inlined frames before the interpreter resumes.
7059                        // Don't restore reg_state — the trace's partial
7060                        // writes are discarded; interp re-executes from
7061                        // the original `pc`.
7062                        while self.frames.len() > pre_frames {
7063                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
7064                        }
7065                        if is_downrec_entry {
7066                            // v2.0 Track-R R3c — pending_err observed
7067                            // mid-trace inside a downrec admit. Treat
7068                            // it as a guard miss: bump `downrec_deopt`
7069                            // and suppress the next downrec admit so
7070                            // interp can advance past `head_pc` and
7071                            // the same trace doesn't immediately re-
7072                            // fire on the next loop iteration.
7073                            self.jit.counters.downrec_deopt += 1;
7074                            self.jit.suppress_downrec_admit_once = true;
7075                        }
7076                    } else if is_downrec_entry && {
7077                        // v2.0 Track-R R3d — only enter the R3c/R3d
7078                        // downrec classifier for returns whose shape
7079                        // matches the lowerer's `downrec_idx_opt` tail
7080                        // emit: either the stitch_blk DOWNREC sentinel
7081                        // (HIT) or the deopt_blk GLOBAL-sentinel-with-
7082                        // body==head_pc (MISS via guard fail). Any
7083                        // other return from a downrec trace (intermediate
7084                        // body cmp side-exit, GetField inference fail,
7085                        // etc.) carries a different sentinel/body shape
7086                        // and means the body exited BEFORE reaching the
7087                        // downrec close — classify those through the
7088                        // normal decode path (else branch below) so
7089                        // reg_state restores + pc advances correctly.
7090                        // The pre-R3d behavior (R3c) classified them all
7091                        // as MISS and skipped the normal restore, which
7092                        // inflated `downrec_deopt` with non-downrec
7093                        // events and lost the trace's mid-flight writes.
7094                        let raw_ret = continuation_pc as u64;
7095                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
7096                        let sentinel_code = if from_side_trace {
7097                            ((raw_ret >> 56) & 0x7F) as u32
7098                        } else {
7099                            0
7100                        };
7101                        let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
7102                        let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
7103                            crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
7104                            0,
7105                        );
7106                        from_side_trace
7107                            && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
7108                                || (sentinel_code == global_deopt_code
7109                                    && raw_body == head_pc_val as u64))
7110                    } {
7111                        // R3d downrec event classifier.
7112                        let raw_ret = continuation_pc as u64;
7113                        let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
7114                        if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
7115                            // Guard HIT — saved_pc matched one of the
7116                            // baked candidates and the trace's
7117                            // `stitch_blk` arm returned the DOWNREC
7118                            // sentinel. Cycle-safety checkpoint:
7119                            // decrement budget; on underflow,
7120                            // reclassify as deopt + reset budget.
7121                            // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
7122                            // ~all natural HITs in a hot loop fire
7123                            // before reset pressure.
7124                            if self.jit.stitch_depth_remaining > 0 {
7125                                self.jit.stitch_depth_remaining -= 1;
7126                                self.jit.counters.downrec_dispatched += 1;
7127                            } else {
7128                                self.jit.counters.downrec_deopt += 1;
7129                                self.jit.stitch_depth_remaining =
7130                                    crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
7131                            }
7132                        } else {
7133                            // Guard MISS via the lowerer's deopt_blk
7134                            // arm (GLOBAL sentinel + body == head_pc).
7135                            // The deopt_blk emit performs the
7136                            // store-back via `emit_store_back_and_return_pc`,
7137                            // so the live stack already reflects the
7138                            // body's writes; no extra restore needed
7139                            // from the dispatcher side.
7140                            self.jit.counters.downrec_deopt += 1;
7141                        }
7142                        self.jit.suppress_downrec_admit_once = true;
7143                        // Pop helper-pushed inlined frames (defensive —
7144                        // R3d's emit shape doesn't push frames in the
7145                        // tail, but a body side-exit before reaching
7146                        // the tail may have via the materialize helper).
7147                        while self.frames.len() > pre_frames {
7148                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
7149                        }
7150                        self.jit.reg_state_buf = reg_state;
7151                        self.jit.entry_tags_buf = entry_tags;
7152                        continue;
7153                    } else {
7154                        // Restore each slot using the trace's
7155                        // exit-tag analysis (see ExitTag docs).
7156                        // P12-S4-step4b-C-2 — decode the IR's
7157                        // side-exit shape. Upper 32 bits = (site_idx
7158                        // + 1) for inline cmp side-exits, 0 for
7159                        // legacy clean-tail / non-inline exits.
7160                        // P15-A v2-C-A0 — decode lives in
7161                        // `crate::jit::trace::decode_exit_shape` so
7162                        // v2-C-A3 can reuse it with the SIDE TRACE's
7163                        // shape inputs when the sentinel bit
7164                        // (v2-C-A2) is set on `raw_ret`.
7165                        let raw_ret = continuation_pc as u64;
7166                        // P15-A v2-C-A3 — side-trace return decode.
7167                        // Bit 63 of `raw_ret` is the side-trace
7168                        // marker the parent's IR OR'd in when it
7169                        // tail-called into a wired child trace.
7170                        // Bits 56..=62 carry the sentinel code (the
7171                        // cache key into the parent's
7172                        // `side_trace_cache`); bits 0..=55 are the
7173                        // child's own return value (encoded site or
7174                        // plain cont_pc) which we MUST decode using
7175                        // the CHILD's per_exit_inline / per_exit_tags
7176                        // / exit_tags / exit_hit_counts — not the
7177                        // parent's. The dispatcher snapshot read
7178                        // above holds the parent's shapes; when bit
7179                        // 63 is set we re-fetch the child's via the
7180                        // sentinel-keyed cache.
7181                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
7182                        let (
7183                            decode_inline,
7184                            decode_tags,
7185                            decode_exit_tags,
7186                            decode_hit_counts,
7187                            decode_body,
7188                        ) = if from_side_trace {
7189                            let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
7190                            let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
7191                            let traces = cl.proto.traces.borrow();
7192                            let child_idx = traces
7193                                .iter()
7194                                .find(|t| t.head_pc == head_pc_val)
7195                                .and_then(|pct| {
7196                                    pct.side_trace_cache.borrow().get(&sentinel_code).copied()
7197                                });
7198                            if let Some(idx) = child_idx
7199                                && let Some(child) = traces.get(idx as usize)
7200                            {
7201                                if crate::jit::trace::v2c_probe_enabled() {
7202                                    eprintln!(
7203                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
7204                                        sentinel_code,
7205                                        body,
7206                                        idx,
7207                                        child.n_ops,
7208                                        child.head_pc,
7209                                        child.window_size,
7210                                        pc,
7211                                        window_size,
7212                                        child.dispatchable,
7213                                        child.is_inline_abort_close,
7214                                    );
7215                                }
7216                                (
7217                                    child.per_exit_inline.clone(),
7218                                    child.per_exit_tags.clone(),
7219                                    child.exit_tags.clone(),
7220                                    child.exit_hit_counts.clone(),
7221                                    body,
7222                                )
7223                            } else {
7224                                if crate::jit::trace::v2c_probe_enabled() {
7225                                    eprintln!(
7226                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
7227                                        sentinel_code, body,
7228                                    );
7229                                }
7230                                // Cache miss — fall back to parent
7231                                // shapes with the body bits. Best-
7232                                // effort; the trace_side_trace_
7233                                // shape_mismatch_count records this
7234                                // path indirectly (close-handler
7235                                // skips wiring on mismatch so we
7236                                // shouldn't reach here when shape
7237                                // gate held).
7238                                (
7239                                    per_exit_inline.clone(),
7240                                    per_exit_tags.clone(),
7241                                    exit_tags.clone(),
7242                                    exit_hit_counts.clone(),
7243                                    body,
7244                                )
7245                            }
7246                        } else {
7247                            // P15-A v2-D — dispatcher-level side-trace
7248                            // invocation. Replaces v2-C's universal IR
7249                            // gate (`load + icmp + brif` at every
7250                            // emit_store_back callsite, which A6/A7
7251                            // measured as a net perf regression).
7252                            // A8 fast-path: skip the tentative decode +
7253                            // child lookup entirely when `has_any_side
7254                            // _wired == false` (the common case until
7255                            // the first side trace compiles for this
7256                            // parent). For fib_10_x10k and other tight
7257                            // short-trace workloads where most parent
7258                            // traces never get a wired child, this
7259                            // collapses the v2-D overhead to a single
7260                            // `Cell::get()` on the cold path.
7261                            // A8-revert: A8 had `parent_has_side` short-
7262                            // circuit + snapshot hoist; mini N=3 showed
7263                            // A8 lost the btrees_d8 1.02× win (dropped
7264                            // to 0.95×) WITHOUT helping fib_10 (same
7265                            // 0.86×). Drop A8 — accept the always-run
7266                            // v2-D path; the tentative decode + cell
7267                            // load is cheaper than the cost A8 added.
7268                            {
7269                                let tentative = crate::jit::trace::decode_exit_shape(
7270                                    raw_ret,
7271                                    per_exit_inline,
7272                                    per_exit_tags,
7273                                    exit_tags,
7274                                );
7275                                let tentative_exit_idx = tentative.exit_hit_idx;
7276                                let child_invoke = {
7277                                    let traces = cl.proto.traces.borrow();
7278                                    traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7279                                        |pct| {
7280                                            let cell =
7281                                                pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7282                                            let fn_ptr = cell.get();
7283                                            if fn_ptr.is_null() {
7284                                                return None;
7285                                            }
7286                                            traces
7287                                                .iter()
7288                                                .find(|t| {
7289                                                    t.entry as *const () as *const u8 == fn_ptr
7290                                                })
7291                                                .map(|child| {
7292                                                    (
7293                                                        child.entry,
7294                                                        child.per_exit_inline.clone(),
7295                                                        child.per_exit_tags.clone(),
7296                                                        child.exit_tags.clone(),
7297                                                        child.exit_hit_counts.clone(),
7298                                                    )
7299                                                })
7300                                        },
7301                                    )
7302                                };
7303                                if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7304                                    let child_raw_ret = {
7305                                        // v1.1 A1 Session A — chunk_compiler.enter
7306                                        // (side-trace entry).
7307                                        let vm_ptr: *mut Vm = self;
7308                                        let _guard =
7309                                            self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7310                                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7311                                        unsafe { cent(reg_state.as_mut_ptr()) }
7312                                    };
7313                                    (cpi, cpt, cet, chc, child_raw_ret as u64)
7314                                } else {
7315                                    (
7316                                        per_exit_inline.clone(),
7317                                        per_exit_tags.clone(),
7318                                        exit_tags.clone(),
7319                                        exit_hit_counts.clone(),
7320                                        raw_ret,
7321                                    )
7322                                }
7323                            }
7324                        };
7325                        let decoded = crate::jit::trace::decode_exit_shape(
7326                            decode_body,
7327                            &decode_inline,
7328                            &decode_tags,
7329                            &decode_exit_tags,
7330                        );
7331                        let site_id = decoded.site_id;
7332                        let cont_pc = decoded.cont_pc;
7333                        let exit_hit_idx = decoded.exit_hit_idx;
7334                        let exit_tags_for_pc = decoded.exit_tags_for_pc;
7335                        // P15-A v2-C-A3 — for side-trace returns
7336                        // force using_global_exit_tags=false so the
7337                        // restore loop always takes the per-tag slow
7338                        // path (the child's global_tag_res_kind
7339                        // classification isn't plumbed through yet
7340                        // — TODO for a future polish step).
7341                        let using_global_exit_tags = if from_side_trace {
7342                            false
7343                        } else {
7344                            decoded.using_global_exit_tags
7345                        };
7346                        // P15-prep — increment the counter (saturate
7347                        // at u32::MAX to avoid wrap on long runs).
7348                        // P15-A v1 — track whether this increment is
7349                        // the one that crossed `HOTEXIT_THRESHOLD`
7350                        // (transition: previous v < threshold, new v
7351                        // == threshold). The side-trace start is
7352                        // deferred to just before `continue;` so
7353                        // vm.stack and frame.pc are fully restored
7354                        // (the snapshot reads post-restore values).
7355                        let mut side_trace_should_start = false;
7356                        // P15-A v2-C-A3 — for side-trace returns the
7357                        // counter to bump is the CHILD's (decoded
7358                        // shape lookup) — `exit_hit_idx` is into the
7359                        // decoded layout, so use the matching
7360                        // `decode_hit_counts`. For parent decode
7361                        // they're aliased (clone of the parent's
7362                        // own Rc).
7363                        if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7364                            let v = c.get();
7365                            if v < u32::MAX {
7366                                c.set(v + 1);
7367                            }
7368                            if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7369                                && self.jit.active_trace.is_none()
7370                                && self.jit.trace_enabled
7371                            {
7372                                side_trace_should_start = true;
7373                            }
7374                        }
7375                        // P12-S4-step4b-C-2 — at an inline cmp@d>0
7376                        // side-exit, the helper has pushed N frames on
7377                        // top of the trace head's frame and
7378                        // `exit_tags_for_pc.len()` covers the full
7379                        // window (caller + each inlined frame's
7380                        // window). Slots beyond `max_stack` belong to
7381                        // an inlined frame: their `Untouched` entries
7382                        // default to Nil (no entry-tag fallback —
7383                        // marshal-in only captured caller slots) and
7384                        // we write to interp stack at `base + i` which
7385                        // mirrors `op_offsets`-derived layout.
7386                        let slot_count = exit_tags_for_pc.len();
7387                        // P12-S4-step4b-C-2 — the helper only extends
7388                        // vm.stack up to the deepest pushed frame's
7389                        // window, but the exit_tags snapshot covers
7390                        // the trace's full `window_size` (which
7391                        // includes depth-N+1 scratch slots that the
7392                        // trace's IR may have written without a
7393                        // matching pushed frame). Extend with Nil so
7394                        // the write at the tail doesn't panic; these
7395                        // slots get overwritten by the writeback loop
7396                        // and won't leak meaningful data past the
7397                        // pushed frames' R[0..max_stack) windows.
7398                        if self.stack.len() < base_us + slot_count {
7399                            self.stack
7400                                .resize(base_us + slot_count, crate::runtime::Value::Nil);
7401                        }
7402                        // P13-S13-E — fast-path restore loop. When
7403                        // we landed on the global `exit_tags`,
7404                        // dispatch on the compile-time
7405                        // classification: skip the loop entirely
7406                        // for `AllUntouched`, do a tag-free
7407                        // `Value::Int(...)` write per slot for
7408                        // `AllInt`, otherwise fall through to the
7409                        // general match-arm loop. site_id > 0
7410                        // (inline frame mat) and per_exit_tags
7411                        // hits always take the general path —
7412                        // their per-side-exit shapes aren't
7413                        // pre-classified yet.
7414                        let fast_path_taken = if using_global_exit_tags {
7415                            match global_tag_res_kind {
7416                                crate::jit::trace::TagResKind::AllUntouched => {
7417                                    // No-op: vm.stack already
7418                                    // matches the trace's post-
7419                                    // entry state for these
7420                                    // slots (entry values not
7421                                    // overridden, or already
7422                                    // spilled by helpers).
7423                                    true
7424                                }
7425                                crate::jit::trace::TagResKind::AllInt => {
7426                                    for i in 0..slot_count {
7427                                        self.stack[base_us + i] =
7428                                            crate::runtime::Value::Int(reg_state[i]);
7429                                    }
7430                                    true
7431                                }
7432                                crate::jit::trace::TagResKind::Mixed => false,
7433                            }
7434                        } else {
7435                            false
7436                        };
7437                        if !fast_path_taken {
7438                            for i in 0..slot_count {
7439                                let tag = match exit_tags_for_pc[i] {
7440                                    crate::jit::trace::ExitTag::Untouched => {
7441                                        if i < max_stack {
7442                                            entry_tags[i]
7443                                        } else {
7444                                            crate::runtime::value::raw::NIL
7445                                        }
7446                                    }
7447                                    crate::jit::trace::ExitTag::Int => {
7448                                        crate::runtime::value::raw::INT
7449                                    }
7450                                    crate::jit::trace::ExitTag::Float => {
7451                                        crate::runtime::value::raw::FLOAT
7452                                    }
7453                                    crate::jit::trace::ExitTag::Table => {
7454                                        crate::runtime::value::raw::TABLE
7455                                    }
7456                                    crate::jit::trace::ExitTag::Closure => {
7457                                        crate::runtime::value::raw::CLOSURE
7458                                    }
7459                                    // P12-S6-A1 — trace actively wrote Nil
7460                                    // to this slot (e.g. via Op::LoadNil).
7461                                    // Restore as Nil regardless of the entry
7462                                    // tag, since the i64 payload is 0 and
7463                                    // packing as the entry tag (e.g. INT)
7464                                    // would mis-type the slot.
7465                                    crate::jit::trace::ExitTag::Nil => {
7466                                        crate::runtime::value::raw::NIL
7467                                    }
7468                                    // P12-S12-C v2 — trace wrote a Str ptr
7469                                    // to this slot (LoadK Str / Move from
7470                                    // Str / Concat result). Restore as
7471                                    // Value::Str with raw bits round-
7472                                    // tripped.
7473                                    crate::jit::trace::ExitTag::Str => {
7474                                        crate::runtime::value::raw::STR
7475                                    }
7476                                };
7477                                // SAFETY: tag is from a verified slot
7478                                // (entry validated above) or pinned by
7479                                // the exit-tag analysis to INT/TABLE.
7480                                // The raw payload sits in reg_state[i].
7481                                // Stack was extended by the materialize
7482                                // helper for inline frames.
7483                                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7484                                self.stack[base_us + i] = unsafe {
7485                                    Value::pack(
7486                                        tag,
7487                                        crate::runtime::value::RawVal {
7488                                            zero: reg_state[i] as u64,
7489                                        },
7490                                    )
7491                                };
7492                            }
7493                        }
7494                        // P12-S4-step4b-C-2 — for non-inline exits the
7495                        // helper was never called (no metas chain for
7496                        // this cont_pc), so `frames.last()` is the
7497                        // trace head's frame and we set its pc to
7498                        // cont_pc as before. For inline exits the
7499                        // helper baked the side-exit PC into the
7500                        // innermost frame's `pc` at push time
7501                        // (chain.last().pc was overridden at emit),
7502                        // so this assignment to `frames.last_mut().pc
7503                        // = cont_pc` is a redundant-but-correct
7504                        // confirmation.
7505                        let _ = &per_exit_inline; // hold the Rc alive across dispatch
7506                        // P12-S4-step4b-C-2 — for inline side-exits the
7507                        // helper has pushed N frames on top. The trace
7508                        // head frame is at `pre_frames - 1`; set its
7509                        // pc to `head_resume_pc` so when the chain
7510                        // eventually pops back to it, interp resumes
7511                        // PAST the trace's depth-0 Op::Call instead of
7512                        // restarting from `head_pc` and re-triggering
7513                        // dispatch (infinite loop). The innermost
7514                        // (helper-pushed) frame already has its pc
7515                        // baked in at compile time, but we still
7516                        // assign `cont_pc` below for parity with the
7517                        // non-inline path (no-op).
7518                        if site_id > 0 {
7519                            let idx = (site_id - 1) as usize;
7520                            let head_resume_pc = decode_inline[idx].head_resume_pc;
7521                            if pre_frames > 0 {
7522                                if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7523                                    f.pc = head_resume_pc;
7524                                }
7525                            }
7526                        }
7527                        let frames_len_now = self.frames.len();
7528                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7529                        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7530                            CallFrame::Lua(fmut) => {
7531                                if crate::jit::trace::v2c_probe_enabled() {
7532                                    eprintln!(
7533                                        "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7534                                        from_side_trace,
7535                                        raw_ret,
7536                                        fmut.pc,
7537                                        cont_pc,
7538                                        site_id,
7539                                        frames_len_now,
7540                                        pre_frames,
7541                                        max_stack,
7542                                    );
7543                                }
7544                                fmut.pc = cont_pc;
7545                            }
7546                            _ => unreachable!("Cont frame at trace dispatch"),
7547                        }
7548                        // P15-A v1 — deferred side-trace start. The
7549                        // increment block above flagged this exit's
7550                        // hit count crossing HOTEXIT_THRESHOLD; now
7551                        // that vm.stack is restored and frame.pc is
7552                        // settled, snapshot entry_tags from the
7553                        // resume frame's window and create the
7554                        // recorder. The recorder's first push fires
7555                        // on the next interp iteration at cont_pc.
7556                        //
7557                        // `head_proto` for the side trace = cl.proto
7558                        // (trace JIT only inlines self-recursive
7559                        // calls today, so cont_pc always lands in
7560                        // the same proto as the parent). Frame base
7561                        // is the resume frame (top of `self.frames`
7562                        // — inline-pushed frames moved this).
7563                        if side_trace_should_start {
7564                            let (resume_base, resume_proto) = match self.frames.last() {
7565                                Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7566                                _ => (base_us, cl.proto),
7567                            };
7568                            let resume_max_stack = resume_proto.max_stack as usize;
7569                            let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7570                            // Extend stack if cont_pc's frame window
7571                            // overhangs the current stack len (rare,
7572                            // but inline-pushed frame stack writes
7573                            // only covered the trace's writeback).
7574                            if self.stack.len() < resume_base + resume_max_stack {
7575                                self.stack.resize(
7576                                    resume_base + resume_max_stack,
7577                                    crate::runtime::Value::Nil,
7578                                );
7579                            }
7580                            for i in 0..resume_max_stack {
7581                                let (tag, _) = self.stack[resume_base + i].unpack();
7582                                side_entry_tags.push(tag);
7583                            }
7584                            self.jit.active_trace =
7585                                Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7586                                    resume_proto,
7587                                    cont_pc,
7588                                    side_entry_tags,
7589                                    cl.proto,
7590                                    head_pc_val,
7591                                    exit_hit_idx,
7592                                )));
7593                            self.jit.recording_frame_base = self.frames.len() - 1;
7594                            self.jit.counters.side_trace_started += 1;
7595                        }
7596                        // P13-S13-D — put the dispatch buffers back
7597                        // before the `continue;` so the next
7598                        // dispatch picks up the same allocation.
7599                        self.jit.reg_state_buf = reg_state;
7600                        self.jit.entry_tags_buf = entry_tags;
7601                        continue;
7602                    }
7603                }
7604                // P13-S13-D — !dispatch_ok / deopt path / non-cont
7605                // exit also restore the buffers before falling
7606                // through to the interp.
7607                self.jit.reg_state_buf = reg_state;
7608                self.jit.entry_tags_buf = entry_tags;
7609            }
7610
7611            // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7612            // hook code that consults `currentpc = savedpc - 1` lands on the
7613            // instruction now executing. luna mirrors that by advancing
7614            // `f.pc` to `pc + 1` before the hook block — local_at /
7615            // getinfo / line attribution all read f.pc, and the existing
7616            // `pc - 1` convention in those helpers then yields the current
7617            // instruction's pc (db.lua :696: local `A` visible at the
7618            // chunk's return line once OP_CLOSURE has advanced pc).
7619            //
7620            // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7621            // (cont frames drained above) so the and_then/Option layers are
7622            // dead weight.
7623            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7624            match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7625                CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7626                _ => unreachable!("Cont frame at pc bump"),
7627            }
7628
7629            // count + line hooks (PUC traceexec): before executing the
7630            // instruction. Skipped while the hook itself runs.
7631            // (Parens here are load-bearing — without them `&&` binds tighter
7632            // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7633            // letting a Lua line hook recurse into itself → stack overflow
7634            // on db.lua line-hook assertions. Matches the `hook_call_with` /
7635            // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7636            if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7637                let lines = &cl.proto.lines;
7638                let cur_line = if lines.is_empty() {
7639                    None
7640                } else {
7641                    Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7642                };
7643                // count hook: fire every `count_base` instructions
7644                if self.hook.count {
7645                    self.hook.count_left -= 1;
7646                    if self.hook.count_left <= 0 {
7647                        self.hook.count_left = self.hook.count_base;
7648                        // hooked function is the running Lua frame: its frame
7649                        // is on the stack, so no synthetic C level is needed.
7650                        self.run_hook(b"count", cur_line, false)?;
7651                    }
7652                }
7653                // line hook: fire on a fresh frame, a backward jump (loop), or a
7654                // change of source line.
7655                if self.hook.line {
7656                    if lines.is_empty() {
7657                        // PUC: a stripped chunk has no line info, so
7658                        // `getfuncline` returns -1. The line hook still fires
7659                        // on the first instruction of the new frame (where
7660                        // `npci <= oldpc` holds at oldpc=0), with the line
7661                        // pushed as `nil` instead of an integer (db.lua :1030
7662                        // "hook called without debug info for 1st instruction").
7663                        if oldpc == u32::MAX {
7664                            self.run_hook(b"line", None, false)?;
7665                            self.top_frame_mut().hook_oldpc = pc;
7666                        }
7667                    } else {
7668                        let newline = lines[(pc as usize).min(lines.len() - 1)];
7669                        // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7670                        // on a backward jump (`pc < oldpc` — strict; an equal pc
7671                        // would re-fire the install-site after `oldpc = pc`),
7672                        // or when the source line changes.
7673                        let fire = oldpc == u32::MAX
7674                            || pc < oldpc
7675                            || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7676                        if fire {
7677                            self.run_hook(b"line", Some(newline as i64), false)?;
7678                        }
7679                        self.top_frame_mut().hook_oldpc = pc;
7680                    }
7681                }
7682            }
7683
7684            match inst.op() {
7685                Op::Move => {
7686                    let v = self.r(base, inst.b());
7687                    self.set_r(base, inst.a(), v);
7688                }
7689                Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7690                Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7691                Op::LoadK => {
7692                    let v = cl.proto.consts[inst.bx() as usize];
7693                    self.set_r(base, inst.a(), v);
7694                }
7695                Op::LoadKx => {
7696                    let extra = cl.proto.code[self.pc_of_top() as usize];
7697                    self.bump_pc();
7698                    let v = cl.proto.consts[extra.ax() as usize];
7699                    self.set_r(base, inst.a(), v);
7700                }
7701                Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7702                Op::LFalseSkip => {
7703                    self.set_r(base, inst.a(), Value::Bool(false));
7704                    self.bump_pc();
7705                }
7706                Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7707                Op::LoadNil => {
7708                    let a = inst.a();
7709                    for i in 0..=inst.b() {
7710                        self.set_r(base, a + i, Value::Nil);
7711                    }
7712                }
7713                Op::GetUpval => {
7714                    let v = self.upval_get(cl, inst.b());
7715                    self.set_r(base, inst.a(), v);
7716                }
7717                Op::SetUpval => {
7718                    let v = self.r(base, inst.a());
7719                    self.upval_set(cl, inst.b(), v);
7720                }
7721                Op::GetTabUp => {
7722                    let t = self.upval_get(cl, inst.b());
7723                    let key = cl.proto.consts[inst.c() as usize];
7724                    self.op_index(t, key, base + inst.a())?;
7725                }
7726                Op::GetTable => {
7727                    let t = self.r(base, inst.b());
7728                    let key = self.r(base, inst.c());
7729                    self.op_index(t, key, base + inst.a())?;
7730                }
7731                Op::GetI => {
7732                    let t = self.r(base, inst.b());
7733                    self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7734                }
7735                Op::GetField => {
7736                    let t = self.r(base, inst.b());
7737                    let key = cl.proto.consts[inst.c() as usize];
7738                    // v1.2 D4 A1 — fast path: known-Str const key + no
7739                    // metatable on the table → skip `op_index` /
7740                    // `index_step`'s MAX_TAG_LOOP setup and the outer
7741                    // `Value` match. Falls through to the slow path
7742                    // unchanged when either invariant breaks (so
7743                    // `__index` metamethods, non-Table receivers, and
7744                    // non-Str keys behave exactly as before).
7745                    if let Value::Table(tb) = t
7746                        && tb.metatable().is_none()
7747                        && let Value::Str(s) = key
7748                    {
7749                        let v = tb.get_str(s);
7750                        self.stack[(base + inst.a()) as usize] = v;
7751                    } else {
7752                        self.op_index(t, key, base + inst.a())?;
7753                    }
7754                }
7755                Op::SetTabUp => {
7756                    let t = self.upval_get(cl, inst.a());
7757                    let key = cl.proto.consts[inst.b() as usize];
7758                    let v = self.r(base, inst.c());
7759                    self.op_newindex(t, key, v)?;
7760                }
7761                Op::SetTable => {
7762                    let t = self.r(base, inst.a());
7763                    let key = self.r(base, inst.b());
7764                    let v = self.r(base, inst.c());
7765                    self.op_newindex(t, key, v)?;
7766                }
7767                Op::SetI => {
7768                    let t = self.r(base, inst.a());
7769                    let v = self.r(base, inst.c());
7770                    self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7771                }
7772                Op::SetField => {
7773                    let t = self.r(base, inst.a());
7774                    let key = cl.proto.consts[inst.b() as usize];
7775                    let v = self.r(base, inst.c());
7776                    self.op_newindex(t, key, v)?;
7777                }
7778                Op::NewTable => {
7779                    let t = self.heap.new_table();
7780                    self.set_r(base, inst.a(), Value::Table(t));
7781                    self.maybe_collect_garbage(base + inst.a() + 1);
7782                }
7783                Op::SetList => {
7784                    let a = inst.a();
7785                    let abs_a = base + a;
7786                    let n = if inst.b() == 0 {
7787                        self.top - (abs_a + 1)
7788                    } else {
7789                        inst.b()
7790                    };
7791                    let offset = if inst.k() {
7792                        let extra = cl.proto.code[self.pc_of_top() as usize];
7793                        self.bump_pc();
7794                        extra.ax() as i64
7795                    } else {
7796                        inst.c() as i64
7797                    };
7798                    let Value::Table(t) = self.r(base, a) else {
7799                        unreachable!("SETLIST on non-table");
7800                    };
7801                    for i in 1..=n {
7802                        let v = self.r(base, a + i);
7803                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7804                        if let Err(TableError::Overflow) =
7805                            unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7806                        {
7807                            return Err(self.rt_err("table overflow"));
7808                        }
7809                    }
7810                    // one barrier_back covers every store this op did — PUC's
7811                    // `luaC_barrierback_` once-per-table optimisation
7812                    self.heap
7813                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7814                    // the element temps above the table are now consumed
7815                    self.maybe_collect_garbage(base + a + 1);
7816                }
7817                Op::SelfOp => {
7818                    let o = self.r(base, inst.b());
7819                    self.set_r(base, inst.a() + 1, o);
7820                    // PUC OP_SELF's C is a constant index when the k-flag is
7821                    // set; otherwise it points to a register that holds the
7822                    // (constant-loaded) key. luna's compiler falls back to the
7823                    // register form when the constant index exceeds OP_SELF's
7824                    // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7825                    // a table with 250+ string keys, where "findfield" lands
7826                    // past const #255). The exec must honour the same split.
7827                    let key = if inst.k() {
7828                        cl.proto.consts[inst.c() as usize]
7829                    } else {
7830                        self.r(base, inst.c())
7831                    };
7832                    self.op_index(o, key, base + inst.a())?;
7833                }
7834                Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7835                Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7836                Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7837                Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7838                Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7839                Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7840                Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7841                Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7842                Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7843                Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7844                Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7845                Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7846                Op::Unm => {
7847                    let v = self.r(base, inst.b());
7848                    match coerce_num(v) {
7849                        Some(Num::Int(i)) => {
7850                            self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7851                        }
7852                        Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7853                        None => {
7854                            let mm = self.get_mm(v, Mm::Unm);
7855                            if mm.is_nil() {
7856                                return Err(self.type_err("perform arithmetic on", v));
7857                            }
7858                            let dst = base + inst.a();
7859                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7860                        }
7861                    }
7862                }
7863                Op::BNot => {
7864                    let v = self.r(base, inst.b());
7865                    match coerce_num(v) {
7866                        Some(n) => {
7867                            let i = self.int_from_num(n)?;
7868                            self.set_r(base, inst.a(), Value::Int(!i));
7869                        }
7870                        None => {
7871                            let mm = self.get_mm(v, Mm::BNot);
7872                            if mm.is_nil() {
7873                                return Err(self.type_err("perform bitwise operation on", v));
7874                            }
7875                            let dst = base + inst.a();
7876                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7877                        }
7878                    }
7879                }
7880                Op::Not => {
7881                    let v = self.r(base, inst.b());
7882                    self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7883                }
7884                Op::Len => {
7885                    let v = self.r(base, inst.b());
7886                    match self.len_step(v)? {
7887                        MmOut::Done(r) => self.set_r(base, inst.a(), r),
7888                        MmOut::Mm { func, recv } => {
7889                            let dst = base + inst.a();
7890                            self.begin_meta_call(
7891                                func,
7892                                &[recv, recv],
7893                                MetaAction::Store { dst },
7894                                "len",
7895                            )?;
7896                        }
7897                        MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7898                    }
7899                }
7900                Op::Concat => {
7901                    // right-associative fold over operands at base+a .. base+a+n,
7902                    // in place on the stack so a yielding __concat can suspend.
7903                    let a = inst.a();
7904                    let n = inst.b();
7905                    self.top = base + a + n;
7906                    self.concat_run(base + a)?;
7907                }
7908                Op::Close => {
7909                    // Yieldable: drive __close handlers through the
7910                    // interpreter loop so a coroutine.yield() inside a
7911                    // handler suspends cleanly (locals.lua block-end yield).
7912                    // `drive_close` parks the handler call at `self.top`, so
7913                    // raise `top` past this frame's full register window
7914                    // first — a goto out of a nested for-loop can fire
7915                    // OP_Close while `self.top` still sits at the inner
7916                    // body's working top, which would let `push_frame`'s
7917                    // wipe clobber the outer tbc slot before it could be
7918                    // closed (locals.lua:1219 nested-for goto regression).
7919                    self.top = self.top.max(base + cl.proto.max_stack as u32);
7920                    let _ =
7921                        self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7922                }
7923                Op::Tbc => {
7924                    self.register_tbc(base + inst.a())?;
7925                }
7926                Op::Jmp => {
7927                    let off = inst.sj();
7928                    // P12-S1.B — trace JIT back-edge counter. A negative
7929                    // jump offset is a loop back-edge (the only canonical
7930                    // backward jumps the compiler emits — `while`, `for`,
7931                    // `repeat`). Tick the per-Proto counter and, once it
7932                    // exceeds the threshold, log a stub promotion that
7933                    // S1.C will turn into actual trace recording. The
7934                    // whole block is gated on `trace_jit_enabled` so
7935                    // existing benches see one branch-not-taken and no
7936                    // counter writes.
7937                    if self.jit.trace_enabled && off < 0 {
7938                        let proto = cl.proto;
7939                        let c = proto.trace_hot_count.get();
7940                        if c < u32::MAX / 2 {
7941                            proto.trace_hot_count.set(c + 1);
7942                        }
7943                        // P13-S13-H — relaxed back-edge trigger:
7944                        // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7945                        // a missed crossing (active_trace busy with
7946                        // a call-trigger, or the recorder slot
7947                        // happened to be in use) doesn't permanently
7948                        // lock this back-edge target out. The
7949                        // `already_cached` short-circuit prevents
7950                        // duplicate recordings: once a trace is
7951                        // cached for this target, subsequent
7952                        // crossings skip the start. This pairs with
7953                        // S13-H's discard-on-partial-coverage close
7954                        // handling — when a short call-trigger is
7955                        // discarded, the back-edge can still find an
7956                        // open slot at the next iteration.
7957                        let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7958                        // P13-S13-K — gave-up short-circuit. Skip
7959                        // the RefCell borrow + scan when the
7960                        // S13-I cap force-compiled a partial
7961                        // trace on this Proto.
7962                        let back_edge_already_cached = if proto.trace_gave_up.get() {
7963                            true
7964                        } else {
7965                            proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7966                        };
7967                        if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7968                            && self.jit.active_trace.is_none()
7969                            && !back_edge_already_cached
7970                        {
7971                            // Back-edge target = pc after `add_pc(off)`,
7972                            // i.e. current `pc + 1 + off` (the dispatch
7973                            // loop has already advanced f.pc to pc+1).
7974                            let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7975                            // Snapshot per-slot Value tag at trace
7976                            // entry so the lowerer's kind tracker
7977                            // knows which arith path to lower
7978                            // (iadd vs fadd, etc.).
7979                            let max_stack = cl.proto.max_stack as usize;
7980                            let base_us = base as usize;
7981                            let mut entry_tags = Vec::with_capacity(max_stack);
7982                            for i in 0..max_stack {
7983                                let (tag, _) = self.stack[base_us + i].unpack();
7984                                entry_tags.push(tag);
7985                            }
7986                            self.jit.active_trace =
7987                                Some(Box::new(crate::jit::trace::TraceRecord::start(
7988                                    cl.proto, target, entry_tags, false,
7989                                )));
7990                            // P12-S4 — record the frame the trace
7991                            // started in. `self.frames.len() - 1`
7992                            // since we're inside the currently-running
7993                            // Lua frame's dispatch.
7994                            self.jit.recording_frame_base = self.frames.len() - 1;
7995                        }
7996                    }
7997                    self.add_pc(off);
7998                }
7999                Op::Eq => {
8000                    let l = self.r(base, inst.a());
8001                    let r = self.r(base, inst.b());
8002                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
8003                        if (a == b) != inst.k() {
8004                            self.bump_pc();
8005                        }
8006                    } else {
8007                        let step = self.eq_step(l, r);
8008                        self.op_compare(step, l, r, inst.k(), "eq")?;
8009                    }
8010                }
8011                Op::EqK => {
8012                    let l = self.r(base, inst.a());
8013                    let r = cl.proto.consts[inst.b() as usize];
8014                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
8015                        if (a == b) != inst.k() {
8016                            self.bump_pc();
8017                        }
8018                    } else {
8019                        let step = self.eq_step(l, r);
8020                        self.op_compare(step, l, r, inst.k(), "eq")?;
8021                    }
8022                }
8023                Op::Lt => {
8024                    let l = self.r(base, inst.a());
8025                    let r = self.r(base, inst.b());
8026                    // hot path: Int < Int — drops the MmOut + op_compare match
8027                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
8028                        if (a < b) != inst.k() {
8029                            self.bump_pc();
8030                        }
8031                    } else {
8032                        let step = self.less_step(l, r, false)?;
8033                        self.op_compare(step, l, r, inst.k(), "lt")?;
8034                    }
8035                }
8036                Op::Le => {
8037                    let l = self.r(base, inst.a());
8038                    let r = self.r(base, inst.b());
8039                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
8040                        if (a <= b) != inst.k() {
8041                            self.bump_pc();
8042                        }
8043                    } else {
8044                        let step = self.less_step(l, r, true)?;
8045                        self.op_compare(step, l, r, inst.k(), "le")?;
8046                    }
8047                }
8048                Op::Test => {
8049                    let cond = self.r(base, inst.a()).truthy();
8050                    self.cond_skip(cond, inst.k());
8051                }
8052                Op::TestSet => {
8053                    let v = self.r(base, inst.b());
8054                    if v.truthy() == inst.k() {
8055                        self.set_r(base, inst.a(), v);
8056                    } else {
8057                        self.bump_pc();
8058                    }
8059                }
8060                Op::Call => {
8061                    let abs = base + inst.a();
8062                    let nargs = if inst.b() == 0 {
8063                        None
8064                    } else {
8065                        Some(inst.b() - 1)
8066                    };
8067                    let wanted = inst.c() as i32 - 1;
8068                    self.begin_call(abs, nargs, wanted, false)?;
8069                }
8070                Op::TailCall => {
8071                    let fr = *self.top_frame();
8072                    let abs = base + inst.a();
8073                    let mut nargs = if inst.b() == 0 {
8074                        self.top - (abs + 1)
8075                    } else {
8076                        inst.b() - 1
8077                    };
8078                    // A tail call pops this frame before begin_call, so a
8079                    // non-callable target would lose its name/position. Report
8080                    // it now (PUC reads funcname from the still-current ci),
8081                    // while the frame is intact, for "(field 'x')"-style info.
8082                    let mut func = self.stack[abs as usize];
8083                    if !matches!(func, Value::Closure(_) | Value::Native(_))
8084                        && self.get_mm(func, Mm::Call).is_nil()
8085                    {
8086                        return Err(self.call_err(func));
8087                    }
8088                    // PUC `luaD_pretailcall` resolves a chain of `__call`
8089                    // metamethods *in place* before deciding whether to
8090                    // collapse this frame. Without that, each __call hop
8091                    // would push a fresh Lua frame and a 10000-deep
8092                    // tail-recursion through a 100-deep __call chain
8093                    // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
8094                    // shift args right, install the handler at `abs`, retry.
8095                    // Chain depth limit matches the call-site `begin_call`
8096                    // version cap (5.5 calls.lua :223 — 15 max, then "too
8097                    // long"; 16th wrap fails the call). An infinite
8098                    // self-referential `__call` would otherwise spin.
8099                    let chain_cap = if self.version >= LuaVersion::Lua55 {
8100                        15
8101                    } else {
8102                        MAX_CCMT
8103                    };
8104                    let mut chain = 0u32;
8105                    while !matches!(func, Value::Closure(_) | Value::Native(_)) {
8106                        let mm = self.get_mm(func, Mm::Call);
8107                        if mm.is_nil() {
8108                            return Err(self.call_err(func));
8109                        }
8110                        chain += 1;
8111                        if chain > chain_cap {
8112                            return Err(self.rt_err("'__call' chain too long"));
8113                        }
8114                        let end = (abs + 1 + nargs) as usize;
8115                        if self.stack.len() < end + 1 {
8116                            self.stack.resize(end + 1, Value::Nil);
8117                        }
8118                        for i in (0..=nargs).rev() {
8119                            self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
8120                        }
8121                        self.stack[abs as usize] = mm;
8122                        nargs += 1;
8123                        self.top = abs + 1 + nargs;
8124                        func = mm;
8125                    }
8126                    // PUC's tail-call collapse is Lua→Lua only. A tail call to
8127                    // a C function runs the C function under the *current* Lua
8128                    // activation (no frame fold — a C frame has nothing to
8129                    // collapse into); after the C function returns, the
8130                    // calling Lua function returns those results normally.
8131                    // Mirror that: keep our Lua frame on the stack, call the
8132                    // target through `begin_call(abs, …)` as a regular call,
8133                    // and let the fallback `Op::Return` that the compiler
8134                    // emits right after `Op::TailCall` forward the results.
8135                    // 5.1 closure.lua :177's `return getfenv()` from inside
8136                    // foo needs level 1 to resolve to foo, not to the
8137                    // thread's globals fallback that happens when no Lua
8138                    // frame is on the stack.
8139                    let lua_target = matches!(func, Value::Closure(_));
8140                    if lua_target {
8141                        self.close_slots(fr.base, None)?;
8142                        for i in 0..=nargs {
8143                            self.stack[(fr.func_slot + i) as usize] =
8144                                self.stack[(abs + i) as usize];
8145                        }
8146                        // v2.5 P1B-2A: clear the slot range that's now
8147                        // stranded by the tail-call collapse. The args
8148                        // were copied to `[fr.func_slot..fr.func_slot+
8149                        // nargs+1)`; the source slots `[abs..abs+
8150                        // nargs+1)` still hold the same `Value::Closure
8151                        // / Value::Str / ...` entries, but they're past
8152                        // the new call's window. Without this clear, a
8153                        // later GC with wider gc_top would mark stale
8154                        // pointers there (same UAF-A family the v2.3
8155                        // finish_results slot-clear closed for the
8156                        // Op::Return path).
8157                        let new_top_lower_bound = fr.func_slot + nargs + 1;
8158                        let prev_top = (self.top as usize).min(self.stack.len());
8159                        if (new_top_lower_bound as usize) < prev_top {
8160                            for slot in &mut self.stack[new_top_lower_bound as usize..prev_top] {
8161                                *slot = Value::Nil;
8162                            }
8163                        }
8164                        // PUC `CIST_TAIL`: the new Lua activation inherits
8165                        // the popped frame's tailcalls count plus one for
8166                        // this collapse. 5.1 db.lua :372 hammers 30000
8167                        // recursive tail calls and expects to see the
8168                        // synthetic tail level for every one of them.
8169                        self.pending_tailcalls = fr.tailcalls.saturating_add(1);
8170                        frames_pop_sync(&mut self.frames, &mut self.frames_top);
8171                        if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
8172                            && self.frames.len() < entry_depth
8173                        {
8174                            // a native completed what was this function's result
8175                            return Ok(self.take_results(fr.func_slot));
8176                        }
8177                    } else {
8178                        // Native (or __call-bearing) target: regular call. The
8179                        // results land at `abs..self.top` and the next op (the
8180                        // fallback `Op::Return`) forwards them. `wanted = -1`
8181                        // because the caller will multret them through Return.
8182                        self.begin_call(abs, Some(nargs), -1, false)?;
8183                    }
8184                }
8185                Op::Return | Op::Return0 | Op::Return1 => {
8186                    let (abs_a, nret) = match inst.op() {
8187                        Op::Return0 => (base, 0),
8188                        Op::Return1 => (base + inst.a(), 1),
8189                        _ => {
8190                            let abs_a = base + inst.a();
8191                            let nret = if inst.b() == 0 {
8192                                self.top - abs_a
8193                            } else {
8194                                inst.b() - 1
8195                            };
8196                            (abs_a, nret)
8197                        }
8198                    };
8199                    // close before moving results: __close handlers run above
8200                    // the stack top, so the result region [abs_a..abs_a+nret)
8201                    // stays intact across any yields the close performs.
8202                    // Fixed-count returns may leave `self.top` below the last
8203                    // result slot (the compiler does not always re-bump it);
8204                    // raise it past the result region so `drive_close` parks
8205                    // the handler call *above* — landing at `self.top` would
8206                    // otherwise clobber a result with the handler closure.
8207                    self.top = self.top.max(abs_a + nret);
8208                    if let Some(vals) = self.begin_close(
8209                        base,
8210                        None,
8211                        AfterClose::Return {
8212                            abs_a,
8213                            nret,
8214                            from_native: false,
8215                        },
8216                        entry_depth,
8217                    )? {
8218                        return Ok(vals);
8219                    }
8220                }
8221                Op::ForPrep => self.for_prep(inst, base)?,
8222                Op::ForLoop => {
8223                    // P12 — trace JIT back-edge counter on the
8224                    // numeric-for back-edge. ForLoop is always at
8225                    // a back-edge position (when it continues);
8226                    // for the trace recorder we treat it as the
8227                    // close-detection equivalent of `Op::Jmp` with
8228                    // negative offset. Counter only ticks when the
8229                    // back-edge will actually fire (count > 0 in
8230                    // the 5.4+ Int form, comparable predicates in
8231                    // pre-5.3 / Float). The cheap check up front
8232                    // matches the for_loop helper's branch.
8233                    if self.jit.trace_enabled {
8234                        let a = inst.a();
8235                        let pre53 = self.version() <= LuaVersion::Lua53;
8236                        let take_back_edge =
8237                            match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
8238                                (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
8239                                    count > 0
8240                                }
8241                                (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
8242                                    let next = cur.wrapping_add(st);
8243                                    if st > 0 { next <= lim } else { next >= lim }
8244                                }
8245                                (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
8246                                    let next = cur + st;
8247                                    if st > 0.0 { next <= lim } else { next >= lim }
8248                                }
8249                                _ => false,
8250                            };
8251                        if take_back_edge {
8252                            let proto = cl.proto;
8253                            let c = proto.trace_hot_count.get();
8254                            if c < u32::MAX / 2 {
8255                                proto.trace_hot_count.set(c + 1);
8256                            }
8257                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8258                                && self.jit.active_trace.is_none()
8259                            {
8260                                // ForLoop's back-edge target = pc
8261                                // after `add_pc(-bx)` runs from the
8262                                // already-bumped f.pc (= pc + 1).
8263                                // So target = (pc + 1) - bx.
8264                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8265                                let max_stack = cl.proto.max_stack as usize;
8266                                let base_us = base as usize;
8267                                let mut entry_tags = Vec::with_capacity(max_stack);
8268                                for i in 0..max_stack {
8269                                    let (tag, _) = self.stack[base_us + i].unpack();
8270                                    entry_tags.push(tag);
8271                                }
8272                                self.jit.active_trace =
8273                                    Some(Box::new(crate::jit::trace::TraceRecord::start(
8274                                        cl.proto, target, entry_tags, false,
8275                                    )));
8276                                // P12-S4 — record the frame the trace
8277                                // started in. The currently-running
8278                                // Lua frame is at len() - 1.
8279                                self.jit.recording_frame_base = self.frames.len() - 1;
8280                            }
8281                        }
8282                    }
8283                    self.for_loop(inst, base);
8284                }
8285                Op::TForPrep => {
8286                    // the 4th control slot is the iterator's closing value
8287                    self.register_tbc(base + inst.a() + 3)?;
8288                    self.add_pc(inst.bx() as i32);
8289                }
8290                Op::TForCall => {
8291                    let abs = base + inst.a();
8292                    let need = (abs + 7) as usize;
8293                    if self.stack.len() < need {
8294                        self.stack.resize(need, Value::Nil);
8295                    }
8296                    self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8297                    self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8298                    self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8299                    let nvars = inst.c() as i32;
8300                    self.begin_call(abs + 4, Some(2), nvars, false)?;
8301                }
8302                Op::TForLoop => {
8303                    let a = inst.a();
8304                    let ctrl = self.r(base, a + 4);
8305                    if !ctrl.is_nil() {
8306                        // P12-S12-B v1 — trace JIT back-edge counter on
8307                        // generic-for back-edge. TForLoop sits at the
8308                        // tail of `for k,v in expr do ... end`; recorder
8309                        // treats it as the close-detection equivalent of
8310                        // a negative Op::Jmp. Gate on `take_back_edge`
8311                        // (= `ctrl != nil`) so empty-iter loops don't
8312                        // pollute hot_count. v1 only adds the trigger;
8313                        // whitelist + helper + emit live in v2.
8314                        if self.jit.trace_enabled {
8315                            let proto = cl.proto;
8316                            let c = proto.trace_hot_count.get();
8317                            if c < u32::MAX / 2 {
8318                                proto.trace_hot_count.set(c + 1);
8319                            }
8320                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8321                                && self.jit.active_trace.is_none()
8322                            {
8323                                // TForLoop back-edge target = pc after
8324                                // `add_pc(-bx)` runs from the already-
8325                                // bumped f.pc (= pc + 1). So target =
8326                                // (pc + 1) - bx, normally landing on
8327                                // body_top (the op right after TForPrep).
8328                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8329                                let max_stack = cl.proto.max_stack as usize;
8330                                let base_us = base as usize;
8331                                let mut entry_tags = Vec::with_capacity(max_stack);
8332                                for i in 0..max_stack {
8333                                    let (tag, _) = self.stack[base_us + i].unpack();
8334                                    entry_tags.push(tag);
8335                                }
8336                                // P12-S12-B-v5 — snapshot the iter
8337                                // fn's address if Native, so the
8338                                // lowerer can specialise ipairs into
8339                                // inline Table aget IR.
8340                                let iter_ptr =
8341                                    if let Value::Native(n) = self.stack[base_us + a as usize] {
8342                                        Some(n.f as usize)
8343                                    } else {
8344                                        None
8345                                    };
8346                                // P12-S12-C v3 — snapshot R[A+5]'s
8347                                // tag (= current iter's val from
8348                                // the just-fired TForCall). The v5
8349                                // inline aget fast_blk emits a
8350                                // runtime guard against this tag;
8351                                // mixed-tag arrays deopt rather
8352                                // than producing garbage pointers
8353                                // through the v2 spill path.
8354                                let val_slot = base_us + (a as usize) + 5;
8355                                let val_tag = if val_slot < self.stack.len() {
8356                                    Some(self.stack[val_slot].unpack().0)
8357                                } else {
8358                                    None
8359                                };
8360                                let mut rec = crate::jit::trace::TraceRecord::start(
8361                                    cl.proto, target, entry_tags, false,
8362                                );
8363                                rec.tfor_iter_ptr = iter_ptr;
8364                                rec.tfor_val_tag = val_tag;
8365                                self.jit.active_trace = Some(Box::new(rec));
8366                                self.jit.recording_frame_base = self.frames.len() - 1;
8367                            }
8368                        }
8369                        self.set_r(base, a + 2, ctrl);
8370                        self.add_pc(-(inst.bx() as i32));
8371                    }
8372                }
8373                Op::Closure => {
8374                    let proto = cl.proto.protos[inst.bx() as usize];
8375                    let n_ups = proto.upvals.len();
8376                    // P11-S5d.M — build upvals on the stack for small
8377                    // closures, skipping the per-call Vec/Box alloc
8378                    // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8379                    // = 2 covers most Lua source (1 captured local, or
8380                    // _ENV + a single capture). Beyond that, fall back
8381                    // to a heap Vec.
8382                    use crate::runtime::function::INLINE_UPVALS_N;
8383                    let mut stack_buf: [std::mem::MaybeUninit<
8384                        Gc<crate::runtime::function::Upvalue>,
8385                    >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8386                    let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8387                    let use_inline = n_ups <= INLINE_UPVALS_N;
8388                    if !use_inline {
8389                        heap_buf.reserve_exact(n_ups);
8390                    }
8391                    for (i, d) in proto.upvals.iter().enumerate() {
8392                        let uv = if d.in_stack {
8393                            self.find_or_create_upval(base + d.index as u32)
8394                        } else {
8395                            cl.upvals()[d.index as usize]
8396                        };
8397                        if use_inline {
8398                            stack_buf[i] = std::mem::MaybeUninit::new(uv);
8399                        } else {
8400                            heap_buf.push(uv);
8401                        }
8402                    }
8403                    // Tiny shim around the two paths so the 5.1 _ENV
8404                    // clone + cache check below see one uniform
8405                    // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8406                    // into the local frame (still valid through the
8407                    // rest of this Op::Closure handler).
8408                    let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8409                        // SAFETY: the first n_ups slots of stack_buf
8410                        // were initialised above; we hand out a slice
8411                        // covering exactly them.
8412                        unsafe {
8413                            std::slice::from_raw_parts_mut(
8414                                stack_buf.as_mut_ptr()
8415                                    as *mut Gc<crate::runtime::function::Upvalue>,
8416                                n_ups,
8417                            )
8418                        }
8419                    } else {
8420                        &mut heap_buf[..]
8421                    };
8422                    // PUC 5.1 had per-function environments: every Lua
8423                    // function carried its own `env` slot, snapshotted from
8424                    // the creating function's env at closure time, so a
8425                    // `setfenv` on one closure never bled into a sibling.
8426                    // luna models that by giving the 5.1 closure a *fresh*
8427                    // closed upvalue for whichever cell holds `_ENV`, seeded
8428                    // from the parent's current env value. Only that cell is
8429                    // cloned — every other upvalue keeps its open/shared
8430                    // identity (so e.g. `local function range(...) ...
8431                    // range(...) ... end` still sees its self-reference). 5.2+
8432                    // keeps the shared-upval model (and the proto cache that
8433                    // depends on it).
8434                    let v51 = self.version() <= LuaVersion::Lua51;
8435                    if v51 && proto.env_upval_idx != u8::MAX {
8436                        let i = proto.env_upval_idx as usize;
8437                        let cur = match ups[i].state() {
8438                            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8439                            UpvalState::Closed(v) => v,
8440                        };
8441                        ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8442                    }
8443                    let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8444                    // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8445                    // and reuses it when every fresh-upvalue binding still
8446                    // points to the same Upvalue object as the cached one.
8447                    // That keeps `function() return outer end` repeated in a
8448                    // loop comparing equal across iterations (the captured
8449                    // outer is a shared open upvalue), while `function()
8450                    // return loop_var end` gets a fresh closure each round
8451                    // because the loop var is re-created per iteration. PUC
8452                    // 5.1 predated the cache, and the per-closure `_ENV`
8453                    // clone above would defeat it anyway, so skip it.
8454                    let nc = if v51 {
8455                        self.heap.new_closure_inline(proto, ups_slice)
8456                    } else {
8457                        let cached = proto.cache.get().filter(|c| {
8458                            c.upvals().len() == ups_slice.len()
8459                                && c.upvals()
8460                                    .iter()
8461                                    .zip(ups_slice.iter())
8462                                    .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8463                        });
8464                        match cached {
8465                            Some(c) => c,
8466                            None => {
8467                                let n = self.heap.new_closure_inline(proto, ups_slice);
8468                                proto.cache.set(Some(n));
8469                                n
8470                            }
8471                        }
8472                    };
8473                    self.set_r(base, inst.a(), Value::Closure(nc));
8474                    self.maybe_collect_garbage(base + inst.a() + 1);
8475                }
8476                Op::Vararg => {
8477                    let abs_a = base + inst.a();
8478                    let wanted = inst.c() as i32 - 1;
8479                    // A materialized named vararg lives in func_slot (its writes
8480                    // must be visible to `...`); otherwise spread the extra args
8481                    // straight off the stack at func_slot+1 .. +n_varargs.
8482                    let vt = match self.stack[func_slot as usize] {
8483                        Value::Table(t) => Some(t),
8484                        _ => None,
8485                    };
8486                    let n = match vt {
8487                        Some(t) => {
8488                            let n_key = Value::Str(self.heap.intern(b"n"));
8489                            // PUC getnumargs: a named vararg `t.n` set out of the
8490                            // integer range [0, INT_MAX/2] is rejected here
8491                            match t.get(n_key) {
8492                                Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8493                                _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8494                            }
8495                        }
8496                        None => n_varargs,
8497                    };
8498                    let count = if wanted < 0 { n } else { wanted as u32 };
8499                    let need = (abs_a + count) as usize;
8500                    if self.stack.len() < need {
8501                        self.stack.resize(need, Value::Nil);
8502                    }
8503                    for i in 0..count {
8504                        let v = if i >= n {
8505                            Value::Nil
8506                        } else if let Some(t) = vt {
8507                            t.get_int(i as i64 + 1)
8508                        } else {
8509                            self.stack[(func_slot + 1 + i) as usize]
8510                        };
8511                        self.stack[(abs_a + i) as usize] = v;
8512                    }
8513                    if wanted < 0 {
8514                        self.top = abs_a + count;
8515                    }
8516                }
8517                Op::GetVarg => {
8518                    // materialize the vararg table (PUC table.pack shape) from the
8519                    // stack varargs — used when the named vararg is written /
8520                    // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8521                    // sees later writes) and in the local register R[A].
8522                    let n = n_varargs;
8523                    let t = self.heap.new_table();
8524                    {
8525                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8526                        let tm = unsafe { t.as_mut() };
8527                        for i in 0..n {
8528                            let _ = tm.set_int(
8529                                &mut self.heap,
8530                                i as i64 + 1,
8531                                self.stack[(func_slot + 1 + i) as usize],
8532                            );
8533                        }
8534                    }
8535                    let n_key = Value::Str(self.heap.intern(b"n"));
8536                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8537                    unsafe { t.as_mut() }
8538                        .set(&mut self.heap, n_key, Value::Int(n as i64))
8539                        .expect("'n' is a valid key");
8540                    // once-per-table barrier (mirror SETLIST): t is born BLACK
8541                    // during Propagate; the bulk inserts above don't barrier.
8542                    self.heap
8543                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8544                    self.stack[func_slot as usize] = Value::Table(t);
8545                    self.set_r(base, inst.a(), Value::Table(t));
8546                }
8547                Op::VargIdx => {
8548                    // R[A] := vararg[R[C]] without allocating: integer key in
8549                    // [1,n] → that vararg, "n" → the count, else nil.
8550                    let key = self.r(base, inst.c());
8551                    let n = n_varargs;
8552                    let v = match key {
8553                        Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8554                            self.stack[(func_slot + k as u32) as usize]
8555                        }
8556                        Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8557                            self.stack[(func_slot + f as u32) as usize]
8558                        }
8559                        Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8560                        _ => Value::Nil,
8561                    };
8562                    self.set_r(base, inst.a(), v);
8563                }
8564                Op::ErrNNil => {
8565                    let v = self.r(base, inst.a());
8566                    if !matches!(v, Value::Nil) {
8567                        let bx = inst.bx();
8568                        let name = if bx == 0 {
8569                            "?".to_string()
8570                        } else {
8571                            match cl.proto.consts[(bx - 1) as usize] {
8572                                Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8573                                _ => "?".to_string(),
8574                            }
8575                        };
8576                        return Err(self.rt_err(&format!("global '{name}' already defined")));
8577                    }
8578                }
8579                Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8580            }
8581        }
8582    }
8583
8584    #[inline(always)]
8585    fn pc_of_top(&self) -> u32 {
8586        self.top_frame().pc
8587    }
8588
8589    #[inline(always)]
8590    fn bump_pc(&mut self) {
8591        // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8592        // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8593        // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8594        // up over `fib_28`'s ~500k jumps.
8595        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8596        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8597            CallFrame::Lua(f) => f.pc += 1,
8598            _ => unreachable!("Cont frame at bump_pc"),
8599        }
8600    }
8601
8602    #[inline(always)]
8603    fn add_pc(&mut self, d: i32) {
8604        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8605        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8606            CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8607            _ => unreachable!("Cont frame at add_pc"),
8608        }
8609    }
8610
8611    /// PUC conditional-skip convention: the JMP that follows is executed when
8612    /// `cond == k`; otherwise it is skipped.
8613    #[inline(always)]
8614    fn cond_skip(&mut self, cond: bool, k: bool) {
8615        if cond != k {
8616            self.bump_pc();
8617        }
8618    }
8619
8620    // ---- indexing (with __index/__newindex chains) ----
8621
8622    /// The `#` length operation: string byte length, `__len` if present, else
8623    /// the raw table border. Returns the raw length value (may be non-integer
8624    /// when `__len` is exotic).
8625    pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8626        match self.len_step(v)? {
8627            MmOut::Done(n) => Ok(n),
8628            // PUC calls unary metamethods with the operand twice
8629            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8630            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8631        }
8632    }
8633
8634    /// Length fast path: a string's byte count or a table's raw border when no
8635    /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8636    /// called with the operand twice. Errors for a non-table with no `__len`.
8637    fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8638        match v {
8639            Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8640            Value::Table(t) => {
8641                let mm = self.get_mm(v, Mm::Len);
8642                if mm.is_nil() {
8643                    Ok(MmOut::Done(Value::Int(t.len())))
8644                } else {
8645                    Ok(MmOut::Mm { func: mm, recv: v })
8646                }
8647            }
8648            _ => {
8649                let mm = self.get_mm(v, Mm::Len);
8650                if mm.is_nil() {
8651                    Err(self.type_err("get length of", v))
8652                } else {
8653                    Ok(MmOut::Mm { func: mm, recv: v })
8654                }
8655            }
8656        }
8657    }
8658
8659    /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8660    /// value with no integer representation.
8661    pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8662        match self.len_value(v)? {
8663            Value::Int(i) => Ok(i),
8664            Value::Float(f) => crate::runtime::value::f2i_exact(f)
8665                .ok_or_else(|| self.rt_err("object length is not an integer")),
8666            _ => Err(self.rt_err("object length is not an integer")),
8667        }
8668    }
8669
8670    pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8671        match self.index_step(t, key)? {
8672            MmOut::Done(v) => Ok(v),
8673            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8674            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8675        }
8676    }
8677
8678    /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8679    /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8680    /// are followed inline (no yield possible); only a function link can yield.
8681    fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8682        let mut cur = t;
8683        for _ in 0..MAX_TAG_LOOP {
8684            let mm = match cur {
8685                Value::Table(tb) => {
8686                    let v = tb.get(key);
8687                    if !v.is_nil() {
8688                        return Ok(MmOut::Done(v));
8689                    }
8690                    let mm = self.get_mm(cur, Mm::Index);
8691                    if mm.is_nil() {
8692                        return Ok(MmOut::Done(Value::Nil));
8693                    }
8694                    mm
8695                }
8696                v => {
8697                    let mm = self.get_mm(v, Mm::Index);
8698                    if mm.is_nil() {
8699                        return Err(self.type_err("index", v));
8700                    }
8701                    mm
8702                }
8703            };
8704            match mm {
8705                Value::Closure(_) | Value::Native(_) => {
8706                    return Ok(MmOut::Mm {
8707                        func: mm,
8708                        recv: cur,
8709                    });
8710                }
8711                next => cur = next,
8712            }
8713        }
8714        Err(self.rt_err("'__index' chain too long; possible loop"))
8715    }
8716
8717    pub(crate) fn newindex_value(
8718        &mut self,
8719        t: Value,
8720        key: Value,
8721        v: Value,
8722    ) -> Result<(), LuaError> {
8723        match self.newindex_step(t, key, v)? {
8724            MmOut::Done(_) => Ok(()),
8725            MmOut::Mm { func, recv } => {
8726                self.call_value(func, &[recv, key, v])?;
8727                Ok(())
8728            }
8729            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8730        }
8731    }
8732
8733    /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8734    /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8735    /// needs an actual call — which the caller may run yieldably.
8736    fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8737        // v2.13 WUC read-time probe (gc-verify): a dead query key at a
8738        // WRITE site, attributed to the instruction that produced it.
8739        #[cfg(feature = "gc-verify")]
8740        if let Some(p) = (match key {
8741            Value::Str(s) => Some(s.as_ptr() as usize),
8742            Value::Table(t2) => Some(t2.as_ptr() as usize),
8743            _ => None,
8744        }) {
8745            if crate::runtime::gc_verify_probe::is_freed(p) {
8746                let detail = match self.frames.last() {
8747                    Some(CallFrame::Lua(f)) => {
8748                        let pc = f.pc as usize;
8749                        let mut w = String::new();
8750                        for q in pc.saturating_sub(6)..(pc + 2) {
8751                            if let Some(inst) = f.closure.proto.code.get(q) {
8752                                w.push_str(&format!(
8753                                    "\n  [{q}] {:?} a={} b={} c={} k={}",
8754                                    inst.op(),
8755                                    inst.a(),
8756                                    inst.b(),
8757                                    inst.c(),
8758                                    inst.k()
8759                                ));
8760                            }
8761                        }
8762                        format!("pc={pc} base={} gc_top={} window:{w}", f.base, self.gc_top)
8763                    }
8764                    _ => "non-Lua frame".into(),
8765                };
8766                panic!("[gc-verify] newindex_step QUERY key {p:#x} freed. {detail}");
8767            }
8768        }
8769        let mut cur = t;
8770        for _ in 0..MAX_TAG_LOOP {
8771            let mm = match cur {
8772                Value::Table(tb) => {
8773                    // PI-A3 single-walk collapse — Table::try_set_existing
8774                    // fuses the prior `tb.get(key).is_nil()` gate and
8775                    // `raw_set` walk into one chain traversal when the
8776                    // key is already present with a non-nil value. The
8777                    // __newindex chain semantics are preserved by the
8778                    // identity (slot_nil ⇔ fire_newindex); see
8779                    // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8780                    //
8781                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8782                    // heap is single-threaded and the pointer is live as
8783                    // long as it is reachable from active roots (see
8784                    // heap.rs:5-7). Mirrors the raw_set wrapper below.
8785                    if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8786                        self.heap
8787                            .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8788                        return Ok(MmOut::Done(Value::Nil));
8789                    }
8790                    let mm = self.get_mm(cur, Mm::NewIndex);
8791                    if mm.is_nil() {
8792                        self.raw_set(tb, key, v)?;
8793                        return Ok(MmOut::Done(Value::Nil));
8794                    }
8795                    mm
8796                }
8797                bad => {
8798                    let mm = self.get_mm(bad, Mm::NewIndex);
8799                    if mm.is_nil() {
8800                        return Err(self.type_err("index", bad));
8801                    }
8802                    mm
8803                }
8804            };
8805            match mm {
8806                Value::Closure(_) | Value::Native(_) => {
8807                    return Ok(MmOut::Mm {
8808                        func: mm,
8809                        recv: cur,
8810                    });
8811                }
8812                next => cur = next,
8813            }
8814        }
8815        Err(self.rt_err("'__newindex' chain too long; possible loop"))
8816    }
8817
8818    fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8819        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8820        match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8821            Ok(()) => {
8822                self.heap
8823                    .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8824                Ok(())
8825            }
8826            Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8827            Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8828            Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8829            Err(TableError::InvalidNext) => unreachable!(),
8830        }
8831    }
8832
8833    /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8834    /// the boolean result; `Mm` (when raw equality fails and both are tables
8835    /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8836    fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8837        if l.raw_eq(r) {
8838            return MmOut::Done(Value::Bool(true));
8839        }
8840        if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8841            (l, r)
8842        {
8843            // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8844            // (and earlier) required the two operands' metatables to expose a
8845            // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8846            // metatable falls straight back to raw inequality. events.lua 5.1
8847            // :262 bakes this in.
8848            let mm = if self.version() <= LuaVersion::Lua51 {
8849                self.get_comp_mm(l, r, Mm::Eq)
8850            } else {
8851                let mut m = self.get_mm(l, Mm::Eq);
8852                if m.is_nil() {
8853                    m = self.get_mm(r, Mm::Eq);
8854                }
8855                m
8856            };
8857            if !mm.is_nil() {
8858                return MmOut::Mm { func: mm, recv: l };
8859            }
8860        }
8861        MmOut::Done(Value::Bool(false))
8862    }
8863
8864    // ---- arithmetic ----
8865
8866    #[inline(always)]
8867    fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8868        let l = self.r(base, inst.b());
8869        let r = self.r(base, inst.c());
8870        // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8871        // binary_trees all hammer these. Skipping coerce_num + the big
8872        // arith_fast match shaves several conditional moves per op.
8873        if let (Value::Int(a), Value::Int(b)) = (l, r) {
8874            let fast = match op {
8875                ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8876                ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8877                ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8878                _ => None,
8879            };
8880            if let Some(v) = fast {
8881                self.set_r(base, inst.a(), v);
8882                return Ok(());
8883            }
8884        }
8885        // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8886        // and any numeric workload with non-integer accumulators benefits.
8887        if let (Value::Float(a), Value::Float(b)) = (l, r) {
8888            let fast = match op {
8889                ArithOp::Add => Some(Value::Float(a + b)),
8890                ArithOp::Sub => Some(Value::Float(a - b)),
8891                ArithOp::Mul => Some(Value::Float(a * b)),
8892                ArithOp::Div => Some(Value::Float(a / b)),
8893                _ => None,
8894            };
8895            if let Some(v) = fast {
8896                self.set_r(base, inst.a(), v);
8897                return Ok(());
8898            }
8899        }
8900        match self.arith_fast(op, l, r)? {
8901            Some(v) => self.set_r(base, inst.a(), v),
8902            None => {
8903                let mm = self.arith_mm_func(op, l, r)?;
8904                let dst = base + inst.a();
8905                self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8906            }
8907        }
8908        Ok(())
8909    }
8910
8911    /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8912    /// directly, `Ok(None)` when a metamethod is required (the caller decides
8913    /// whether to call it synchronously or yieldably).
8914    fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8915        use ArithOp::*;
8916        match op {
8917            BAnd | BOr | BXor | Shl | Shr => {
8918                // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8919                match (coerce_num(l), coerce_num(r)) {
8920                    (Some(a), Some(b)) => {
8921                        let to_int = |n: Num| match n {
8922                            Num::Int(i) => Some(i),
8923                            Num::Float(f) => crate::runtime::value::f2i_exact(f),
8924                        };
8925                        let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8926                            // PUC luaG_tointerror: name the offending operand
8927                            return Err(self.no_int_rep_err());
8928                        };
8929                        let v = match op {
8930                            BAnd => a & b,
8931                            BOr => a | b,
8932                            BXor => a ^ b,
8933                            Shl => shift_left(a, b),
8934                            Shr => shift_left(a, b.wrapping_neg()),
8935                            _ => unreachable!(),
8936                        };
8937                        return Ok(Some(Value::Int(v)));
8938                    }
8939                    _ => return Ok(None),
8940                }
8941            }
8942            _ => {}
8943        }
8944        let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8945            (Some(a), Some(b)) => (a, b),
8946            _ => return Ok(None),
8947        };
8948        let v = match (op, ln, rn) {
8949            (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8950            (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8951            (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8952            (IDiv, Num::Int(a), Num::Int(b)) => {
8953                if b == 0 {
8954                    return Err(self.rt_err("attempt to divide by zero"));
8955                }
8956                let mut q = a.wrapping_div(b);
8957                if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8958                    q -= 1;
8959                }
8960                Value::Int(q)
8961            }
8962            (Mod, Num::Int(a), Num::Int(b)) => {
8963                if b == 0 {
8964                    return Err(self.rt_err("attempt to perform 'n%0'"));
8965                }
8966                let mut m = a.wrapping_rem(b);
8967                if m != 0 && (m ^ b) < 0 {
8968                    m += b;
8969                }
8970                Value::Int(m)
8971            }
8972            (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8973            (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8974            (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8975            (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8976            (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8977            (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8978            (Mod, a, b) => {
8979                let (x, y) = (a.as_f64(), b.as_f64());
8980                // PUC luai_nummod: correct fmod's sign without the `m*y`
8981                // product, which underflows to 0 for tiny denormals
8982                let mut m = x % y;
8983                if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8984                    m += y;
8985                }
8986                Value::Float(m)
8987            }
8988            _ => unreachable!(),
8989        };
8990        Ok(Some(v))
8991    }
8992
8993    pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8994        match v {
8995            Value::Int(i) => Ok(i),
8996            Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8997                Some(i) => Ok(i),
8998                None => Err(self.rt_err("number has no integer representation")),
8999            },
9000            v => Err(self.type_err(what, v)),
9001        }
9002    }
9003
9004    fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
9005        match n {
9006            Num::Int(i) => Ok(i),
9007            Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
9008                Some(i) => Ok(i),
9009                None => Err(self.rt_err("number has no integer representation")),
9010            },
9011        }
9012    }
9013
9014    /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
9015    /// PUC type error when neither operand provides one.
9016    fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
9017        use ArithOp::*;
9018        let event = match op {
9019            Add => Mm::Add,
9020            Sub => Mm::Sub,
9021            Mul => Mm::Mul,
9022            Div => Mm::Div,
9023            Mod => Mm::Mod,
9024            Pow => Mm::Pow,
9025            IDiv => Mm::IDiv,
9026            BAnd => Mm::BAnd,
9027            BOr => Mm::BOr,
9028            BXor => Mm::BXor,
9029            Shl => Mm::Shl,
9030            Shr => Mm::Shr,
9031        };
9032        let mut mm = self.get_mm(l, event);
9033        if mm.is_nil() {
9034            mm = self.get_mm(r, event);
9035        }
9036        if mm.is_nil() {
9037            let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
9038                "perform bitwise operation on"
9039            } else {
9040                // 5.4+ report string-involved arithmetic faults through
9041                // lstrlib's string-metatable arithmetic handlers, which
9042                // emit the per-op wording `attempt to add a 'string'
9043                // with a 'number'` (operands in syntactic order, quoted
9044                // type names, no varinfo). Non-string faults (nil+1,
9045                // {}+{}) keep the classic VM wording on every dialect —
9046                // v2.14 HC.4, probed against stock 5.1.5-5.5.0.
9047                if self.version >= crate::version::LuaVersion::Lua54
9048                    && (matches!(l, Value::Str(_)) || matches!(r, Value::Str(_)))
9049                {
9050                    let verb = match op {
9051                        Add => "add",
9052                        Sub => "sub",
9053                        Mul => "mul",
9054                        Div => "div",
9055                        Mod => "mod",
9056                        Pow => "pow",
9057                        IDiv => "idiv",
9058                        BAnd | BOr | BXor | Shl | Shr => unreachable!(),
9059                    };
9060                    let t1 = self.obj_typename(l);
9061                    let t2 = self.obj_typename(r);
9062                    return Err(self.rt_err(&format!("attempt to {verb} a '{t1}' with a '{t2}'")));
9063                }
9064                "perform arithmetic on"
9065            };
9066            let bad = if coerce_num(l).is_none() { l } else { r };
9067            return Err(self.type_err(what, bad));
9068        }
9069        Ok(mm)
9070    }
9071
9072    // ---- comparison ----
9073
9074    pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
9075        match self.less_step(l, r, or_eq)? {
9076            MmOut::Done(v) => Ok(v.truthy()),
9077            MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
9078            MmOut::CompareSynth { func } => {
9079                // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
9080                // by library code (sort comparator etc.) — no yield expected
9081                // here (a yield would have hit `call_noyield`'s C boundary).
9082                Ok(!self.call_mm1(func, &[r, l])?.truthy())
9083            }
9084        }
9085    }
9086
9087    /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
9088    /// carries the boolean result; `Mm` (for non-number/string operands) carries
9089    /// the metamethod — called with `(l, r)`; raises the PUC compare error when
9090    /// neither operand provides one.
9091    fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
9092        let b = match (l, r) {
9093            (Value::Int(a), Value::Int(b)) => {
9094                if or_eq {
9095                    a <= b
9096                } else {
9097                    a < b
9098                }
9099            }
9100            (Value::Float(a), Value::Float(b)) => {
9101                if or_eq {
9102                    a <= b
9103                } else {
9104                    a < b
9105                }
9106            }
9107            (Value::Int(a), Value::Float(b)) => {
9108                if or_eq {
9109                    int_le_float(a, b)
9110                } else {
9111                    int_lt_float(a, b)
9112                }
9113            }
9114            (Value::Float(a), Value::Int(b)) => {
9115                if a.is_nan() {
9116                    false
9117                } else if or_eq {
9118                    !int_lt_float(b, a)
9119                } else {
9120                    !int_le_float(b, a)
9121                }
9122            }
9123            (Value::Str(a), Value::Str(b)) => {
9124                let (a, b) = (a.as_bytes(), b.as_bytes());
9125                if or_eq { a <= b } else { a < b }
9126            }
9127            (l, r) => {
9128                let event = if or_eq { Mm::Le } else { Mm::Lt };
9129                // PUC 5.1's `get_compTM` rule applies to ordered comparisons
9130                // too: both operands' metatables must expose the same
9131                // implementation for `__lt` / `__le` to fire. events.lua 5.1
9132                // :262 expects `c < d` (where `d` has no metatable) to error
9133                // with the default "attempt to compare two table values"
9134                // rather than running c's `__lt` blindly.
9135                let mm = if self.version() <= LuaVersion::Lua51 {
9136                    self.get_comp_mm(l, r, event)
9137                } else {
9138                    let mut m = self.get_mm(l, event);
9139                    if m.is_nil() {
9140                        m = self.get_mm(r, event);
9141                    }
9142                    m
9143                };
9144                // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
9145                // operand carries `__le`. 5.4 dropped the synthesis (now
9146                // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
9147                // on the synthesis — its metatable defines only `__lt`.
9148                // The fallback calls `__lt(r, l)` synchronously (the suite's
9149                // `__lt` doesn't yield) and negates the result; the yieldable
9150                // `__lt` path stays reserved for the explicit `<` operator.
9151                if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
9152                    let lt = Mm::Lt;
9153                    let mut mm_lt = self.get_mm(l, lt);
9154                    if mm_lt.is_nil() {
9155                        mm_lt = self.get_mm(r, lt);
9156                    }
9157                    if !mm_lt.is_nil() {
9158                        return Ok(MmOut::CompareSynth { func: mm_lt });
9159                    }
9160                }
9161                if mm.is_nil() {
9162                    // PUC luaG_ordererror: "two X values" when the operand
9163                    // types match, "X with Y" otherwise (objtypename-aware).
9164                    let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
9165                    return Err(self.rt_err(&if t1 == t2 {
9166                        format!("attempt to compare two {t1} values")
9167                    } else {
9168                        format!("attempt to compare {t1} with {t2}")
9169                    }));
9170                }
9171                return Ok(MmOut::Mm { func: mm, recv: l });
9172            }
9173        };
9174        Ok(MmOut::Done(Value::Bool(b)))
9175    }
9176
9177    // ---- numeric for ----
9178
9179    fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
9180        let a = inst.a();
9181        let init = self.r(base, a);
9182        let limit = self.r(base, a + 1);
9183        let step = self.r(base, a + 2);
9184        let (Some(init_n), Some(limit_n), Some(step_n)) =
9185            (as_num(init), as_num(limit), as_num(step))
9186        else {
9187            // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
9188            // PUC checks limit, then step, then initial value.
9189            let (what, bad) = if as_num(limit).is_none() {
9190                ("limit", limit)
9191            } else if as_num(step).is_none() {
9192                ("step", step)
9193            } else {
9194                ("initial value", init)
9195            };
9196            let tn = self.obj_typename(bad);
9197            return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
9198        };
9199        // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
9200        // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
9201        // first test, so each successful iteration emits a backward `OP_FORLOOP`
9202        // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
9203        // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
9204        // distance in luna's encoding is `loop_pc - prep_pc`; firing
9205        // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
9206        let pre53 = self.version() <= LuaVersion::Lua53;
9207        match (init_n, step_n) {
9208            (Num::Int(i0), Num::Int(st)) => {
9209                if st == 0 {
9210                    return Err(self.rt_err("'for' step is zero"));
9211                }
9212                if pre53 {
9213                    // PUC 5.3 `forlimit`: int limit passes through; float limit
9214                    // gets clamped to MIN/MAX with a `stopnow` flag set only
9215                    // when the clamp is unreachable (positive float with a
9216                    // negative step → limit=MAX, stopnow; negative float with
9217                    // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
9218                    // `init = 0` so OP_FORLOOP's first test against the
9219                    // unreachable clamp fails cleanly. An ordinary in-range
9220                    // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
9221                    // lets OP_FORLOOP's natural test reject the first step.
9222                    let (lim, stopnow) = match limit_n {
9223                        Num::Int(l) => (l, false),
9224                        Num::Float(f) => {
9225                            if f.is_nan() {
9226                                (0, true)
9227                            } else if f >= i64::MAX as f64 + 1.0 {
9228                                // beyond +MAX: unreachable for a decreasing loop
9229                                (i64::MAX, st < 0)
9230                            } else if f <= i64::MIN as f64 {
9231                                // beyond -MIN: unreachable for an increasing loop
9232                                (i64::MIN, st >= 0)
9233                            } else if st > 0 {
9234                                (f.floor() as i64, false)
9235                            } else {
9236                                (f.ceil() as i64, false)
9237                            }
9238                        }
9239                    };
9240                    let initv = if stopnow { 0 } else { i0 };
9241                    let pre = initv.wrapping_sub(st);
9242                    self.set_r(base, a, Value::Int(pre));
9243                    self.set_r(base, a + 1, Value::Int(lim));
9244                    self.set_r(base, a + 2, Value::Int(st));
9245                    self.add_pc(inst.bx() as i32 - 1);
9246                    return Ok(());
9247                }
9248                let (lim, empty) = int_for_limit(limit_n, i0, st);
9249                if empty {
9250                    self.add_pc(inst.bx() as i32);
9251                    return Ok(());
9252                }
9253                let count = if st > 0 {
9254                    (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
9255                } else {
9256                    (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
9257                };
9258                self.set_r(base, a, Value::Int(i0));
9259                self.set_r(base, a + 1, Value::Int(count as i64));
9260                self.set_r(base, a + 2, Value::Int(st));
9261                self.set_r(base, a + 3, Value::Int(i0));
9262            }
9263            _ => {
9264                let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
9265                if st == 0.0 {
9266                    return Err(self.rt_err("'for' step is zero"));
9267                }
9268                if pre53 {
9269                    let pre = x0 - st;
9270                    self.set_r(base, a, Value::Float(pre));
9271                    self.set_r(base, a + 1, Value::Float(lim));
9272                    self.set_r(base, a + 2, Value::Float(st));
9273                    self.add_pc(inst.bx() as i32 - 1);
9274                    return Ok(());
9275                }
9276                let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
9277                if !runs {
9278                    self.add_pc(inst.bx() as i32);
9279                    return Ok(());
9280                }
9281                self.set_r(base, a, Value::Float(x0));
9282                self.set_r(base, a + 1, Value::Float(lim));
9283                self.set_r(base, a + 2, Value::Float(st));
9284                self.set_r(base, a + 3, Value::Float(x0));
9285            }
9286        }
9287        Ok(())
9288    }
9289
9290    #[inline(always)]
9291    fn for_loop(&mut self, inst: Inst, base: u32) {
9292        let a = inst.a();
9293        // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
9294        // directly (R[a+1] holds the limit, *not* a remaining-count) so the
9295        // first iteration's test fires through the same backward-jump path as
9296        // every later iteration. 5.4+ switched to the count-based form luna
9297        // already uses for `Int`; the float branch was already PUC-3.x-style.
9298        let pre53 = self.version() <= LuaVersion::Lua53;
9299        match self.r(base, a) {
9300            Value::Int(cur) if pre53 => {
9301                let Value::Int(lim) = self.r(base, a + 1) else {
9302                    unreachable!()
9303                };
9304                let Value::Int(st) = self.r(base, a + 2) else {
9305                    unreachable!()
9306                };
9307                let next = cur.wrapping_add(st);
9308                let cont = if st > 0 { next <= lim } else { next >= lim };
9309                if cont {
9310                    self.set_r(base, a, Value::Int(next));
9311                    self.set_r(base, a + 3, Value::Int(next));
9312                    self.add_pc(-(inst.bx() as i32));
9313                }
9314            }
9315            Value::Int(cur) => {
9316                let Value::Int(count) = self.r(base, a + 1) else {
9317                    unreachable!()
9318                };
9319                if count > 0 {
9320                    let Value::Int(st) = self.r(base, a + 2) else {
9321                        unreachable!()
9322                    };
9323                    let next = cur.wrapping_add(st);
9324                    self.set_r(base, a, Value::Int(next));
9325                    self.set_r(base, a + 1, Value::Int(count - 1));
9326                    self.set_r(base, a + 3, Value::Int(next));
9327                    self.add_pc(-(inst.bx() as i32));
9328                }
9329            }
9330            Value::Float(cur) => {
9331                let Value::Float(lim) = self.r(base, a + 1) else {
9332                    unreachable!()
9333                };
9334                let Value::Float(st) = self.r(base, a + 2) else {
9335                    unreachable!()
9336                };
9337                let next = cur + st;
9338                let cont = if st > 0.0 { next <= lim } else { next >= lim };
9339                if cont {
9340                    self.set_r(base, a, Value::Float(next));
9341                    self.set_r(base, a + 3, Value::Float(next));
9342                    self.add_pc(-(inst.bx() as i32));
9343                }
9344            }
9345            _ => unreachable!("corrupt for-loop state"),
9346        }
9347    }
9348
9349    // ---- native helpers (used by builtins) ----
9350
9351    /// A native function's own captured upvalue (self lives at func_slot).
9352    ///
9353    /// Public so `native_typed` trampolines and embedders authoring
9354    /// stateful natives via `native_with(...)` can read their upvals.
9355    pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9356        let Value::Native(nc) = self.stack[func_slot as usize] else {
9357            unreachable!("native frame without native closure");
9358        };
9359        nc.upvals[i]
9360    }
9361
9362    /// Number of upvalues captured by the native at `func_slot` (variadic
9363    /// captures such as the `io.lines` format list).
9364    pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9365        let Value::Native(nc) = self.stack[func_slot as usize] else {
9366            unreachable!("native frame without native closure");
9367        };
9368        nc.upvals.len()
9369    }
9370
9371    /// Write a native function's own upvalue (stateful iterators).
9372    pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9373        let Value::Native(nc) = self.stack[func_slot as usize] else {
9374            unreachable!("native frame without native closure");
9375        };
9376        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9377        unsafe { nc.as_mut() }.upvals[i] = v;
9378        // NativeClosure.upvals is traced as part of its Trace; a long-lived
9379        // stateful iterator closure (e.g. string.gmatch) sees many writes —
9380        // barrier_back once-and-done is cheaper than per-child forward.
9381        self.heap
9382            .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9383    }
9384
9385    /// Read the i-th positional argument inside a `NativeFn` body
9386    /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9387    /// matching PUC's "missing arg is nil" contract. Public so embedders
9388    /// can author their own natives.
9389    pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9390        if i < nargs {
9391            self.stack[(func_slot + 1 + i) as usize]
9392        } else {
9393            Value::Nil
9394        }
9395    }
9396
9397    /// Push the return values of a `NativeFn` and return their count
9398    /// (analogous to pushing N values then `return N` from a C function).
9399    /// Public so embedders can author their own natives.
9400    pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9401        let need = func_slot as usize + vals.len();
9402        if self.stack.len() < need {
9403            self.stack.resize(need, Value::Nil);
9404        }
9405        for (i, &v) in vals.iter().enumerate() {
9406            self.stack[func_slot as usize + i] = v;
9407        }
9408        vals.len() as u32
9409    }
9410
9411    /// Fast string concatenation of an adjacent pair, or `None` when a
9412    /// `__concat` metamethod is required.
9413    fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9414        let legacy = self.float_fmt();
9415        // Length-check fast paths for both string operands BEFORE the
9416        // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9417        // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9418        // overflow on the first pair that would exceed `INT_MAX` instead
9419        // of allocating multi-GB intermediates first.
9420        let max_str = i32::MAX as usize;
9421        if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9422            let a_len = ls.as_bytes().len();
9423            let b_len = rs.as_bytes().len();
9424            let new_len = a_len.checked_add(b_len);
9425            if new_len.is_none() || new_len.unwrap() > max_str {
9426                return Err(self.rt_err("string length overflow"));
9427            }
9428        }
9429        match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9430            (Some(a), Some(b)) => {
9431                // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9432                // concat past it raises "string length overflow"
9433                // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9434                // exactly this wall).
9435                let new_len = a.len().checked_add(b.len());
9436                if new_len.is_none() || new_len.unwrap() > max_str {
9437                    return Err(self.rt_err("string length overflow"));
9438                }
9439                let mut combined = a;
9440                combined.extend_from_slice(&b);
9441                Ok(Some(Value::Str(self.heap.intern(&combined))))
9442            }
9443            _ => Ok(None),
9444        }
9445    }
9446
9447    /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9448    /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9449    /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9450    /// call — its `Meta` continuation re-enters here on the metamethod's return.
9451    fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9452        // Sum the lengths of all all-Str operands BEFORE starting the
9453        // right-associative fold so a 129-operand `a..a..…` chain
9454        // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9455        // not after dozens of multi-GB intermediate intern+hash rounds.
9456        // A non-Str operand falls through to the per-pair check.
9457        let max_str = i32::MAX as usize;
9458        let mut total: usize = 0;
9459        let mut all_str = true;
9460        for slot in base_a..self.top {
9461            match self.stack[slot as usize] {
9462                Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9463                    Some(t) if t <= max_str => total = t,
9464                    _ => return Err(self.rt_err("string length overflow")),
9465                },
9466                _ => {
9467                    all_str = false;
9468                    break;
9469                }
9470            }
9471        }
9472        let _ = all_str; // discrimination already captured by early returns above
9473        while self.top.saturating_sub(base_a) >= 2 {
9474            let i = self.top - 1; // rightmost operand
9475            let x = self.stack[(i - 1) as usize];
9476            let y = self.stack[i as usize];
9477            match self.concat_pair(x, y)? {
9478                Some(s) => {
9479                    self.stack[(i - 1) as usize] = s;
9480                    self.top = i; // consumed y
9481                }
9482                None => {
9483                    let mut mm = self.get_mm(x, Mm::Concat);
9484                    if mm.is_nil() {
9485                        mm = self.get_mm(y, Mm::Concat);
9486                    }
9487                    if mm.is_nil() {
9488                        let legacy = self.float_fmt();
9489                        let bad = if concat_piece(x, legacy).is_none() {
9490                            x
9491                        } else {
9492                            y
9493                        };
9494                        return Err(self.type_err("concatenate", bad));
9495                    }
9496                    // result lands at i-1, dropping y (top→i); resume continues.
9497                    let dst = i - 1;
9498                    self.begin_meta_call(
9499                        mm,
9500                        &[x, y],
9501                        MetaAction::Concat { dst, base_a },
9502                        "concat",
9503                    )?;
9504                    return Ok(());
9505                }
9506            }
9507        }
9508        self.maybe_collect_garbage(base_a + 1);
9509        Ok(())
9510    }
9511
9512    /// tostring with __tostring / __name support.
9513    pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9514        let mm = self.get_mm(v, Mm::ToString);
9515        if !mm.is_nil() {
9516            return match self.call_mm1(mm, &[v])? {
9517                Value::Str(s) => Ok(s.as_bytes().to_vec()),
9518                _ => Err(self.rt_err("'__tostring' must return a string")),
9519            };
9520        }
9521        if let Value::Table(t) = v
9522            && let Value::Str(name) = self.get_mm(v, Mm::Name)
9523        {
9524            let mut out = name.as_bytes().to_vec();
9525            out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9526            return Ok(out);
9527        }
9528        Ok(self.tostring_basic(v))
9529    }
9530
9531    /// The dialect's float-rendering flavor (v2.14 HD): ≤5.2 %.14g
9532    /// bare, 5.3/5.4 %.14g + ".0", 5.5 two-stage %.15g/%.17g + ".0".
9533    pub(crate) fn float_fmt(&self) -> numeric::FloatFmt {
9534        use crate::version::LuaVersion::*;
9535        match self.version {
9536            Lua51 | Lua52 => numeric::FloatFmt::Legacy14,
9537            Lua53 | Lua54 => numeric::FloatFmt::G14,
9538            _ => numeric::FloatFmt::TwoStage55,
9539        }
9540    }
9541
9542    /// Basic tostring (no metamethods).
9543    pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9544        match v {
9545            Value::Nil => b"nil".to_vec(),
9546            Value::Bool(true) => b"true".to_vec(),
9547            Value::Bool(false) => b"false".to_vec(),
9548            Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9549            // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9550            // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9551            // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9552            // concatenating these renderings.
9553            Value::Float(f) => {
9554                numeric::num_to_string_for(Num::Float(f), self.float_fmt()).into_bytes()
9555            }
9556            Value::Str(s) => s.as_bytes().to_vec(),
9557            Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9558            Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9559            Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9560            Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9561            // PUC names file handles `file (0x…)`; a bare userdata is
9562            // `userdata: 0x…`. The io library overrides this via __tostring.
9563            Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9564            // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9565            // (the "light" qualifier only appears in `luaL_typeerror`).
9566            Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9567        }
9568    }
9569}
9570
9571#[derive(Clone, Copy, PartialEq, Eq)]
9572enum ArithOp {
9573    Add,
9574    Sub,
9575    Mul,
9576    Mod,
9577    Pow,
9578    Div,
9579    IDiv,
9580    BAnd,
9581    BOr,
9582    BXor,
9583    Shl,
9584    Shr,
9585}
9586
9587impl ArithOp {
9588    /// PUC metamethod event name (`__add` → "add" etc.) used by
9589    /// `debug.getinfo(level, "n")` inside a metamethod handler.
9590    fn mm_name(self) -> &'static str {
9591        match self {
9592            ArithOp::Add => "add",
9593            ArithOp::Sub => "sub",
9594            ArithOp::Mul => "mul",
9595            ArithOp::Mod => "mod",
9596            ArithOp::Pow => "pow",
9597            ArithOp::Div => "div",
9598            ArithOp::IDiv => "idiv",
9599            ArithOp::BAnd => "band",
9600            ArithOp::BOr => "bor",
9601            ArithOp::BXor => "bxor",
9602            ArithOp::Shl => "shl",
9603            ArithOp::Shr => "shr",
9604        }
9605    }
9606}
9607
9608fn as_num(v: Value) -> Option<Num> {
9609    match v {
9610        Value::Int(i) => Some(Num::Int(i)),
9611        Value::Float(f) => Some(Num::Float(f)),
9612        // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9613        Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9614        _ => None,
9615    }
9616}
9617
9618/// A concatenable operand's byte form (string, or a number coerced to its
9619/// string), or `None` when only a `__concat` metamethod can handle it.
9620/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9621/// suffix on integer-valued floats) — see `num_to_string_for`.
9622fn concat_piece(v: Value, float_fmt: numeric::FloatFmt) -> Option<Vec<u8>> {
9623    match v {
9624        Value::Str(s) => Some(s.as_bytes().to_vec()),
9625        Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9626        Value::Float(x) => Some(numeric::num_to_string_for(Num::Float(x), float_fmt).into_bytes()),
9627        _ => None,
9628    }
9629}
9630
9631/// Index into the per-basic-type metatable table for a non-table value
9632/// (None for tables, which carry their own metatable).
9633fn type_mt_slot(v: Value) -> Option<usize> {
9634    match v {
9635        Value::Nil => Some(0),
9636        Value::Bool(_) => Some(1),
9637        Value::Int(_) | Value::Float(_) => Some(2),
9638        Value::Str(_) => Some(3),
9639        Value::Closure(_) | Value::Native(_) => Some(4),
9640        // tables and full userdata carry their own metatable; threads and
9641        // light userdata have none (PUC keeps a shared per-type mt slot for
9642        // light, but luna doesn't expose it — no test gates on it yet).
9643        Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9644    }
9645}
9646
9647/// Number, or string coerced to number (5.5 default string-arith coercion).
9648fn coerce_num(v: Value) -> Option<Num> {
9649    match v {
9650        Value::Int(i) => Some(Num::Int(i)),
9651        Value::Float(f) => Some(Num::Float(f)),
9652        Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9653        _ => None,
9654    }
9655}
9656
9657/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9658/// reverse direction.
9659fn shift_left(a: i64, b: i64) -> i64 {
9660    if b < 0 {
9661        if b <= -64 {
9662            0
9663        } else {
9664            ((a as u64) >> (-b as u32)) as i64
9665        }
9666    } else if b >= 64 {
9667        0
9668    } else {
9669        ((a as u64) << (b as u32)) as i64
9670    }
9671}
9672
9673/// i < f, exactly (PUC LTintfloat shape).
9674fn int_lt_float(i: i64, f: f64) -> bool {
9675    if f.is_nan() {
9676        return false;
9677    }
9678    if f >= 9_223_372_036_854_775_808.0 {
9679        return true;
9680    }
9681    if f < -9_223_372_036_854_775_808.0 {
9682        return false;
9683    }
9684    let ff = f.floor();
9685    let fi = ff as i64;
9686    if f == ff { i < fi } else { i <= fi }
9687}
9688
9689/// i <= f, exactly.
9690fn int_le_float(i: i64, f: f64) -> bool {
9691    if f.is_nan() {
9692        return false;
9693    }
9694    if f >= 9_223_372_036_854_775_808.0 {
9695        return true;
9696    }
9697    if f < -9_223_372_036_854_775_808.0 {
9698        return false;
9699    }
9700    i <= f.floor() as i64
9701}
9702
9703/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9704/// (clipped limit, loop-is-empty).
9705fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9706    match limit {
9707        Num::Int(l) => {
9708            let empty = if step > 0 { init > l } else { init < l };
9709            (l, empty)
9710        }
9711        Num::Float(f) => {
9712            if f.is_nan() {
9713                return (0, true);
9714            }
9715            if step > 0 {
9716                if f >= 9_223_372_036_854_775_808.0 {
9717                    (i64::MAX, false)
9718                } else {
9719                    let l = f.floor();
9720                    if l < -9_223_372_036_854_775_808.0 {
9721                        (i64::MIN, true)
9722                    } else {
9723                        let li = l as i64;
9724                        (li, init > li)
9725                    }
9726                }
9727            } else if f <= -9_223_372_036_854_775_808.0 {
9728                (i64::MIN, false)
9729            } else {
9730                let l = f.ceil();
9731                if l >= 9_223_372_036_854_775_808.0 {
9732                    // PUC forlimit: a positive limit beyond the integer range
9733                    // is unreachable for a decreasing loop — empty.
9734                    (i64::MAX, true)
9735                } else {
9736                    let li = l as i64;
9737                    (li, init < li)
9738                }
9739            }
9740        }
9741    }
9742}
9743
9744/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9745/// `@file` / `=name` markers in `source`).
9746fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9747    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9748    let b = unsafe { crate::runtime::string::bytes_of(p) };
9749    match b.first() {
9750        Some(b'@') | Some(b'=') => &b[1..],
9751        _ => b,
9752    }
9753}
9754
9755impl Vm {
9756    /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9757    /// that called the current native. Returns (closure, current line,
9758    /// extra vararg count).
9759    /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9760    /// function running at `level`, recovered from the caller's call
9761    /// instruction (PUC funcnamefromcode). None for the main chunk or a
9762    /// tail/anonymous call with no recoverable name.
9763    /// A debug-level position: either a real Lua frame (by index) or a synthetic
9764    /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9765    /// coroutine body), which `debug.getinfo` and traceback report as "C".
9766    /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9767    /// frame at `level`'s current pc, as (name, value). Locals are visited in
9768    /// registration order (start pc, then register) to match luaF_getlocalname.
9769    pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9770        if n == 0 {
9771            return None;
9772        }
9773        let fi = match self.dbg_frame(level)? {
9774            DbgKind::Lua(fi) => fi,
9775            // Tail-call placeholder has no real frame backing it — no locals
9776            // exist to read or write here. PUC `findlocal` returns NULL on
9777            // a CIST_TAIL activation.
9778            DbgKind::Tail(_) => return None,
9779            // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9780            // for slot `n` inside the argument window (db.lua :408-:413, and
9781            // the call/return hook reads of math.sin / select args via
9782            // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9783            // meaningful for a C frame here.
9784            DbgKind::C(fi) => {
9785                if n < 1 {
9786                    return None;
9787                }
9788                let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9789                if (n as u32) > nargs {
9790                    return None;
9791                }
9792                let slot = (func_slot + n as u32) as usize;
9793                let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9794                return Some((self.temporary_locvar_name().to_string(), val));
9795            }
9796        };
9797        let f = self.frames[fi].lua()?;
9798        // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9799        // is the first extra arg passed to the function (`...[1]`), `-2` the
9800        // second, etc. The 5.5 stack layout parks varargs in
9801        // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9802        if n < 0 {
9803            let i = (-n) as u32;
9804            if i == 0 || i > f.n_varargs {
9805                return None;
9806            }
9807            let val = self
9808                .stack
9809                .get((f.func_slot + i) as usize)
9810                .copied()
9811                .unwrap_or(Value::Nil);
9812            return Some((self.vararg_locvar_name().to_string(), val));
9813        }
9814        let proto = f.closure.proto;
9815        // PUC's parser injects a hidden `(vararg table)` locvar for an
9816        // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9817        // right after the fixed parameters (`numparams + 1`). Main chunks
9818        // and `(...t)` named-vararg funcs do NOT get one — gate on the
9819        // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9820        // their declared registers (no shadow slot allocated), so we expose
9821        // that hidden index purely in this debug view.
9822        let num_params = proto.num_params as i64;
9823        let vararg_slot = if proto.has_vararg_table_pseudo {
9824            Some(num_params + 1)
9825        } else {
9826            None
9827        };
9828        if vararg_slot == Some(n) {
9829            return Some(("(vararg table)".to_string(), Value::Nil));
9830        }
9831        let pc = (f.pc as usize).saturating_sub(1);
9832        let mut active: Vec<&crate::runtime::LocVar> = proto
9833            .locvars
9834            .iter()
9835            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9836            .collect();
9837        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9838        let mut idx: i64 = n - 1;
9839        if let Some(vs) = vararg_slot
9840            && n > vs
9841        {
9842            idx -= 1;
9843        }
9844        let idx = idx as usize;
9845        if let Some(lv) = active.get(idx) {
9846            let val = self
9847                .stack
9848                .get((f.base + lv.reg) as usize)
9849                .copied()
9850                .unwrap_or(Value::Nil);
9851            return Some((lv.name.to_string(), val));
9852        }
9853        // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9854        // still inside the frame's live register window — report a
9855        // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9856        // the next frame's func slot (`ci->next->func.p`) so the
9857        // temporary window stops where the callee's frame begins
9858        // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9859        // an out-of-range slot).
9860        let limit = self
9861            .frames
9862            .get(fi + 1)
9863            .and_then(|cf| cf.lua())
9864            .map(|nf| nf.func_slot)
9865            .unwrap_or_else(|| self.top.max(f.base));
9866        let temp_reg = idx as u32;
9867        if f.base + temp_reg < limit {
9868            let val = self
9869                .stack
9870                .get((f.base + temp_reg) as usize)
9871                .copied()
9872                .unwrap_or(Value::Nil);
9873            return Some((self.lua_temporary_locvar_name().to_string(), val));
9874        }
9875        None
9876    }
9877
9878    /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9879    /// the local / vararg name on success, `None` when the slot does not
9880    /// resolve. Mirrors `local_at`'s indexing exactly.
9881    pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9882        if n == 0 {
9883            return None;
9884        }
9885        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9886            return None;
9887        };
9888        let f = self.frames[fi].lua()?;
9889        if n < 0 {
9890            let i = (-n) as u32;
9891            if i == 0 || i > f.n_varargs {
9892                return None;
9893            }
9894            let slot = (f.func_slot + i) as usize;
9895            if let Some(s) = self.stack.get_mut(slot) {
9896                *s = v;
9897            }
9898            return Some(self.vararg_locvar_name().to_string());
9899        }
9900        let proto = f.closure.proto;
9901        let num_params = proto.num_params as i64;
9902        let vararg_slot = if proto.has_vararg_table_pseudo {
9903            Some(num_params + 1)
9904        } else {
9905            None
9906        };
9907        if vararg_slot == Some(n) {
9908            // hidden (vararg table) slot has no real storage — accept the
9909            // write as a no-op for PUC parity (db.lua doesn't write to it).
9910            return Some("(vararg table)".to_string());
9911        }
9912        let pc = (f.pc as usize).saturating_sub(1);
9913        let mut active: Vec<&crate::runtime::LocVar> = proto
9914            .locvars
9915            .iter()
9916            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9917            .collect();
9918        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9919        let mut idx: i64 = n - 1;
9920        if let Some(vs) = vararg_slot
9921            && n > vs
9922        {
9923            idx -= 1;
9924        }
9925        let idx = idx as usize;
9926        let (name, reg) = if let Some(lv) = active.get(idx) {
9927            (lv.name.to_string(), lv.reg)
9928        } else {
9929            // PUC `luaG_findlocal` fallback into the temporary window —
9930            // bounded by the next frame's func slot (see local_at).
9931            let limit = self
9932                .frames
9933                .get(fi + 1)
9934                .and_then(|cf| cf.lua())
9935                .map(|nf| nf.func_slot)
9936                .unwrap_or_else(|| self.top.max(f.base));
9937            let temp_reg = idx as u32;
9938            if f.base + temp_reg >= limit {
9939                return None;
9940            }
9941            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9942        };
9943        let slot = (f.base + reg) as usize;
9944        if let Some(s) = self.stack.get_mut(slot) {
9945            *s = v;
9946        }
9947        Some(name)
9948    }
9949
9950    /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9951    /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9952    /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9953    /// vararg indexing past `n_varargs`, or for a register past the live
9954    /// window. Naming follows the same priority as `local_at`: named locals,
9955    /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9956    /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9957    /// window.
9958    pub(crate) fn local_at_coro(
9959        &self,
9960        co: Gc<crate::runtime::Coro>,
9961        level: i64,
9962        n: i64,
9963    ) -> Option<(String, Value)> {
9964        if level < 1 || n == 0 {
9965            return None;
9966        }
9967        let frames = &co.frames;
9968        // Logical level: iterate Lua frames from the top.
9969        let lua_indices: Vec<usize> = (0..frames.len())
9970            .rev()
9971            .filter(|&i| frames[i].lua().is_some())
9972            .collect();
9973        let fi = *lua_indices.get((level - 1) as usize)?;
9974        let f = frames[fi].lua()?;
9975        if n < 0 {
9976            let i = (-n) as u32;
9977            if i == 0 || i > f.n_varargs {
9978                return None;
9979            }
9980            let val = co
9981                .stack
9982                .get((f.func_slot + i) as usize)
9983                .copied()
9984                .unwrap_or(Value::Nil);
9985            return Some((self.vararg_locvar_name().to_string(), val));
9986        }
9987        let proto = f.closure.proto;
9988        let num_params = proto.num_params as i64;
9989        let vararg_slot = if proto.has_vararg_table_pseudo {
9990            Some(num_params + 1)
9991        } else {
9992            None
9993        };
9994        if vararg_slot == Some(n) {
9995            return Some(("(vararg table)".to_string(), Value::Nil));
9996        }
9997        let pc = (f.pc as usize).saturating_sub(1);
9998        let mut active: Vec<&crate::runtime::LocVar> = proto
9999            .locvars
10000            .iter()
10001            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
10002            .collect();
10003        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
10004        let mut idx: i64 = n - 1;
10005        if let Some(vs) = vararg_slot
10006            && n > vs
10007        {
10008            idx -= 1;
10009        }
10010        let idx = idx as usize;
10011        if let Some(lv) = active.get(idx) {
10012            let val = co
10013                .stack
10014                .get((f.base + lv.reg) as usize)
10015                .copied()
10016                .unwrap_or(Value::Nil);
10017            return Some((lv.name.to_string(), val));
10018        }
10019        let limit = frames
10020            .get(fi + 1)
10021            .and_then(|cf| cf.lua())
10022            .map(|nf| nf.func_slot)
10023            .unwrap_or(co.top.max(f.base));
10024        let temp_reg = idx as u32;
10025        if f.base + temp_reg < limit {
10026            let val = co
10027                .stack
10028                .get((f.base + temp_reg) as usize)
10029                .copied()
10030                .unwrap_or(Value::Nil);
10031            return Some((self.lua_temporary_locvar_name().to_string(), val));
10032        }
10033        None
10034    }
10035
10036    /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
10037    /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
10038    pub(crate) fn local_set_coro(
10039        &mut self,
10040        co: Gc<crate::runtime::Coro>,
10041        level: i64,
10042        n: i64,
10043        v: Value,
10044    ) -> Option<String> {
10045        if level < 1 || n == 0 {
10046            return None;
10047        }
10048        let lua_indices: Vec<usize> = (0..co.frames.len())
10049            .rev()
10050            .filter(|&i| co.frames[i].lua().is_some())
10051            .collect();
10052        let fi = *lua_indices.get((level - 1) as usize)?;
10053        let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
10054            let f = co.frames[fi].lua()?;
10055            (
10056                f.func_slot,
10057                f.n_varargs,
10058                f.base,
10059                f.closure.proto,
10060                co.top.max(f.base),
10061                co.frames
10062                    .get(fi + 1)
10063                    .and_then(|cf| cf.lua())
10064                    .map(|nf| nf.func_slot),
10065            )
10066        };
10067        if n < 0 {
10068            let i = (-n) as u32;
10069            if i == 0 || i > n_varargs {
10070                return None;
10071            }
10072            let slot = (func_slot + i) as usize;
10073            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10074            let stack = unsafe { &mut co.as_mut().stack };
10075            if let Some(s) = stack.get_mut(slot) {
10076                *s = v;
10077            }
10078            // co.stack values are traced — once-per-call barrier so propagate
10079            // sees the new value if co was already BLACK this cycle.
10080            self.heap
10081                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
10082            return Some(self.vararg_locvar_name().to_string());
10083        }
10084        let num_params = proto.num_params as i64;
10085        let vararg_slot = if proto.has_vararg_table_pseudo {
10086            Some(num_params + 1)
10087        } else {
10088            None
10089        };
10090        if vararg_slot == Some(n) {
10091            return Some("(vararg table)".to_string());
10092        }
10093        let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
10094        let mut active: Vec<&crate::runtime::LocVar> = proto
10095            .locvars
10096            .iter()
10097            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
10098            .collect();
10099        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
10100        let mut idx: i64 = n - 1;
10101        if let Some(vs) = vararg_slot
10102            && n > vs
10103        {
10104            idx -= 1;
10105        }
10106        let idx = idx as usize;
10107        let (name, reg) = if let Some(lv) = active.get(idx) {
10108            (lv.name.to_string(), lv.reg)
10109        } else {
10110            let limit = next_func_slot.unwrap_or(top_for_temp);
10111            let temp_reg = idx as u32;
10112            if base + temp_reg >= limit {
10113                return None;
10114            }
10115            (self.lua_temporary_locvar_name().to_string(), temp_reg)
10116        };
10117        let slot = (base + reg) as usize;
10118        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10119        let stack = unsafe { &mut co.as_mut().stack };
10120        if let Some(s) = stack.get_mut(slot) {
10121            *s = v;
10122        }
10123        // co.stack values are traced — once-per-call barrier so propagate
10124        // sees the new value if co was already BLACK this cycle.
10125        self.heap
10126            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
10127        Some(name)
10128    }
10129
10130    /// Frame info for a level on a suspended coroutine (PUC
10131    /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
10132    /// Returns the closure + currentline + extraargs + istailcall for the
10133    /// level-th Lua activation in `co.frames`. None if level overshoots.
10134    pub(crate) fn coro_frame_info(
10135        &self,
10136        co: Gc<crate::runtime::Coro>,
10137        level: i64,
10138    ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
10139        if level < 1 {
10140            return None;
10141        }
10142        let lua_indices: Vec<usize> = (0..co.frames.len())
10143            .rev()
10144            .filter(|&i| co.frames[i].lua().is_some())
10145            .collect();
10146        let fi = *lua_indices.get((level - 1) as usize)?;
10147        let f = co.frames[fi].lua()?;
10148        let proto = f.closure.proto;
10149        let pc = (f.pc as usize)
10150            .saturating_sub(1)
10151            .min(proto.lines.len().saturating_sub(1));
10152        let line = proto.lines.get(pc).copied().unwrap_or(0);
10153        Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
10154    }
10155
10156    /// Whether `level` resolves to any live activation (PUC lua_getstack).
10157    pub(crate) fn level_in_range(&self, level: i64) -> bool {
10158        self.dbg_frame(level).is_some()
10159    }
10160
10161    /// PUC's debug-API placeholder for an unnamed vararg slot returned by
10162    /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
10163    /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
10164    /// 5.3 :195 / 5.4 :286 baseline on their respective form.
10165    pub(crate) fn vararg_locvar_name(&self) -> &'static str {
10166        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
10167            "(*vararg)"
10168        } else {
10169            "(vararg)"
10170        }
10171    }
10172
10173    /// PUC's debug-API placeholder for an unnamed temporary on a C
10174    /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
10175    /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
10176    /// their spelling.
10177    pub(crate) fn temporary_locvar_name(&self) -> &'static str {
10178        if matches!(
10179            self.version,
10180            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
10181        ) {
10182            // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
10183            // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
10184            // to `(C temporary)`.
10185            "(*temporary)"
10186        } else {
10187            "(C temporary)"
10188        }
10189    }
10190
10191    /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
10192    /// (an arithmetic intermediate sitting past the last named local on a
10193    /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
10194    /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
10195    /// spelling.
10196    pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
10197        if matches!(
10198            self.version,
10199            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
10200        ) {
10201            "(*temporary)"
10202        } else {
10203            "(temporary)"
10204        }
10205    }
10206
10207    /// The Lua closure running at `level` on the current thread, or `None`
10208    /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
10209    /// need this to reach the function whose env they read or rewrite.
10210    pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
10211        // `DbgKind::Tail` also falls into the else branch — a tail-call
10212        // placeholder has no closure of its own, so PUC's `lua_getstack` +
10213        // `getfunc` for that level returns no function, and `getfenv(level)`
10214        // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
10215        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
10216            return None;
10217        };
10218        Some(self.frames[fi].lua()?.closure)
10219    }
10220
10221    pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
10222        if level < 1 {
10223            return false;
10224        }
10225        let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
10226        (level as usize) <= count
10227    }
10228
10229    pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
10230        if level < 1 {
10231            return None;
10232        }
10233        // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
10234        // activation counts as a level, and each Lua activation's
10235        // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
10236        // dropped the synthetic shape: `istailcall` becomes a flag on the
10237        // real frame and Cont activations no longer count separately.
10238        // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
10239        // `getinfo(2).func == g1` pins the 5.2+ shape.
10240        let v51 = self.version <= LuaVersion::Lua51;
10241        let mut lvl = level;
10242        for fi in (0..self.frames.len()).rev() {
10243            match &self.frames[fi] {
10244                CallFrame::Lua(f) => {
10245                    lvl -= 1;
10246                    if lvl == 0 {
10247                        return Some(DbgKind::Lua(fi));
10248                    }
10249                    if v51 {
10250                        // 5.1 reports one synthetic CIST_TAIL level per
10251                        // collapsed tail call (PUC `lua_getstack` subtracts
10252                        // `ci->u.l.tailcalls` from the remaining level).
10253                        for _ in 0..f.tailcalls {
10254                            lvl -= 1;
10255                            if lvl == 0 {
10256                                return Some(DbgKind::Tail(fi));
10257                            }
10258                        }
10259                    }
10260                    if f.from_c {
10261                        lvl -= 1;
10262                        if lvl == 0 {
10263                            return Some(DbgKind::C(fi));
10264                        }
10265                    }
10266                }
10267                CallFrame::Cont(_) => {
10268                    if !v51 {
10269                        continue;
10270                    }
10271                    lvl -= 1;
10272                    if lvl == 0 {
10273                        let parent = (0..fi)
10274                            .rev()
10275                            .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
10276                        return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
10277                    }
10278                }
10279            }
10280        }
10281        None
10282    }
10283
10284    pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10285        let f = self.frames[fi].lua()?;
10286        // metamethod handler frames carry the event tag (e.g. "close" for
10287        // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
10288        if f.is_hook {
10289            return Some(("hook", "?".to_string()));
10290        }
10291        if let Some(tm) = f.tm {
10292            return Some(("metamethod", tm_debug_name(self.version, tm)));
10293        }
10294        // a frame entered across a C boundary has no naming call instruction
10295        if fi == 0 || f.from_c {
10296            return None;
10297        }
10298        // the caller's call instruction names this frame; a continuation frame
10299        // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
10300        // already short-circuits those.
10301        let caller = self.frames[fi - 1].lua()?;
10302        let caller_proto = caller.closure.proto;
10303        let p: &crate::runtime::Proto = &caller_proto;
10304        let call_pc = (caller.pc as usize).checked_sub(1)?;
10305        let instr = *p.code.get(call_pc)?;
10306        match instr.op() {
10307            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10308            Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10309            _ => None,
10310        }
10311    }
10312
10313    /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
10314    /// (PUC names a C function from the call instruction that invoked it). The
10315    /// native was called by the nearest Lua frame below `fi` (skipping pcall/
10316    /// xpcall continuations); that frame's call instruction names it.
10317    pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10318        // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
10319        // the synthetic C edge between the __gc finalizer (top Lua frame, has
10320        // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
10321        // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
10322        // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
10323        // sets the tag on the handler frame only, so level 2 there still
10324        // names the calling Lua frame's call instruction (5.5 locals.lua
10325        // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
10326        if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
10327            && fr.tm == Some("gc")
10328        {
10329            let name = tm_debug_name(self.version, "gc");
10330            return Some(("metamethod", name));
10331        }
10332        let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
10333        let caller = self.frames[caller_fi].lua()?;
10334        let p = &caller.closure.proto;
10335        let call_pc = (caller.pc as usize).checked_sub(1)?;
10336        let instr = *p.code.get(call_pc)?;
10337        match instr.op() {
10338            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10339            _ => None,
10340        }
10341    }
10342
10343    /// Native value currently sitting on the synthetic C edge identified by
10344    /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
10345    /// above `fi` (each one corresponds to one native pushing the hook) and
10346    /// indexes into `running_natives` from the top, also skipping the caller
10347    /// of `getinfo` itself (the native that is currently asking).
10348    /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10349    /// expects the just-entered C function.
10350    pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10351        let idx = self.c_frame_native_idx(fi)?;
10352        Some(Value::Native(self.running_natives[idx]))
10353    }
10354
10355    /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10356    /// so `local_at` can index the native's argument window like PUC's
10357    /// `(C temporary)` path. Returns `None` when no matching native exists
10358    /// (e.g. the C edge corresponds to a non-native boundary).
10359    pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10360        let idx = self.c_frame_native_idx(fi)?;
10361        self.running_native_slots.get(idx).copied()
10362    }
10363
10364    fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10365        let n_above = self.frames[fi..]
10366            .iter()
10367            .filter_map(CallFrame::lua)
10368            .filter(|f| f.from_c)
10369            .count();
10370        if n_above == 0 {
10371            return None;
10372        }
10373        // running_natives.last() is the native currently executing (the one
10374        // that called getinfo). Pop it conceptually, then take the n_above-th
10375        // entry from the top of what remains.
10376        let nr = self.running_natives.len().checked_sub(1)?;
10377        nr.checked_sub(n_above)
10378    }
10379
10380    /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10381    /// native whose function pointer matches `target`, and return its qualified
10382    /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10383    /// `None` if no match is found. Used by `arg_error` when the running native
10384    /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10385    /// True when the innermost call frame is a pcall/xpcall
10386    /// continuation — i.e. the currently-running native was invoked
10387    /// DIRECTLY by pcall/xpcall rather than by Lua code. PUC's
10388    /// luaL_argerror sees ar.name == NULL there (the caller is C)
10389    /// and qualifies the name via pushglobalfuncname — so
10390    /// `pcall(coroutine.resume, 42)` blames 'coroutine.resume'
10391    /// (v2.14 fixture 5.5/365).
10392    pub(crate) fn caller_is_protected_cont(&self) -> bool {
10393        matches!(
10394            self.frames.last(),
10395            Some(CallFrame::Cont(nc))
10396                if matches!(nc.kind, ContKind::Pcall | ContKind::Xpcall { .. })
10397        )
10398    }
10399
10400    pub(crate) fn pushglobalfuncname(
10401        &mut self,
10402        target: crate::runtime::value::NativeFn,
10403    ) -> Option<String> {
10404        let pkg_k = Value::Str(self.heap.intern(b"package"));
10405        let pkg = match self.globals().get(pkg_k) {
10406            Value::Table(t) => t,
10407            _ => return None,
10408        };
10409        let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10410        let loaded = match pkg.get(loaded_k) {
10411            Value::Table(t) => t,
10412            _ => return None,
10413        };
10414        let matches = |v: Value| -> bool {
10415            matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10416        };
10417        let mut k = Value::Nil;
10418        while let Ok(Some((nk, nv))) = loaded.next(k) {
10419            k = nk;
10420            let Value::Str(outer) = nk else { continue };
10421            let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10422            if matches(nv) {
10423                return Some(if outer == "_G" { String::new() } else { outer });
10424            }
10425            if let Value::Table(inner_t) = nv {
10426                let mut k2 = Value::Nil;
10427                while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10428                    k2 = nk2;
10429                    if matches(nv2)
10430                        && let Value::Str(inner) = nk2
10431                    {
10432                        let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10433                        return Some(if outer == "_G" {
10434                            inner
10435                        } else {
10436                            format!("{outer}.{inner}")
10437                        });
10438                    }
10439                }
10440            }
10441        }
10442        None
10443    }
10444
10445    /// Name and namewhat of the native currently running on behalf of the top
10446    /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10447    /// `luaL_argerror` rewrite a method call's self-argument error.
10448    pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10449        let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10450        let p = &caller.closure.proto;
10451        let call_pc = (caller.pc as usize).checked_sub(1)?;
10452        let instr = *p.code.get(call_pc)?;
10453        match instr.op() {
10454            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10455            _ => None,
10456        }
10457    }
10458
10459    pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10460        let f = self.frames[fi].lua().expect("Lua frame");
10461        let proto = f.closure.proto;
10462        let pc = (f.pc as usize)
10463            .saturating_sub(1)
10464            .min(proto.lines.len().saturating_sub(1));
10465        let line = proto.lines.get(pc).copied().unwrap_or(0);
10466        // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10467        // (independent of any later write to a materialized vararg table's `n`).
10468        // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10469        // any nonzero `tailcalls` count flips it true.
10470        (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10471    }
10472
10473    /// Read an upvalue cell of a closure (debug.getupvalue).
10474    pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10475        match cl.upvals()[idx].state() {
10476            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10477            UpvalState::Closed(v) => v,
10478        }
10479    }
10480
10481    /// Write an upvalue cell of a closure (debug.setupvalue).
10482    pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10483        let uv = cl.upvals()[idx];
10484        match uv.state() {
10485            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10486            UpvalState::Closed(_) => {
10487                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10488                unsafe { uv.as_mut() }.set_closed(v);
10489                self.heap
10490                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10491            }
10492        }
10493    }
10494
10495    /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10496    /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10497    /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10498    /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10499    /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10500    /// else `"function <src:line_defined>"` for an anonymous Lua function.
10501    /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10502    /// Walks the coroutine's saved frames and prepends a synthetic C-level
10503    /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10504    /// (its `resume_at` marker is set). `level` skips entries from the top
10505    /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10506    /// frame; etc.). db.lua :764-:768 sample several levels.
10507    pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10508        use crate::runtime::CoroStatus;
10509        const LEVELS1: usize = 10;
10510        const LEVELS2: usize = 11;
10511        #[derive(Clone, Copy)]
10512        enum VFrame<'a> {
10513            Lua(&'a crate::runtime::function::Frame),
10514            CPcall,
10515            CXpcall,
10516            CYield,
10517            /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10518            /// call collapsed into the next Lua frame down the chain.
10519            Tail,
10520        }
10521        let v51 = self.version <= LuaVersion::Lua51;
10522        let mut visible: Vec<VFrame<'_>> = Vec::new();
10523        // PUC's level 0 entry on a suspended coroutine is the C call where it
10524        // paused — `coroutine.yield` for a yielded thread.
10525        if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10526            visible.push(VFrame::CYield);
10527        }
10528        for cf in co.frames.iter().rev() {
10529            match cf {
10530                CallFrame::Lua(f) => {
10531                    visible.push(VFrame::Lua(f));
10532                    if v51 {
10533                        for _ in 0..f.tailcalls {
10534                            visible.push(VFrame::Tail);
10535                        }
10536                    }
10537                }
10538                CallFrame::Cont(nc) => match nc.kind {
10539                    ContKind::Pcall => visible.push(VFrame::CPcall),
10540                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10541                    _ => {}
10542                },
10543            }
10544        }
10545        if level < 0 {
10546            level = 0;
10547        }
10548        if (level as usize) >= visible.len() {
10549            return Vec::new();
10550        }
10551        let visible = &visible[level as usize..];
10552        let total = visible.len();
10553        let mut out = Vec::new();
10554        // To name a Lua frame, PUC consults the caller's OP_CALL via
10555        // getobjname: find the index `fi` of the current frame in co.frames,
10556        // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10557        let coro_frame_name = |frames: &[CallFrame],
10558                               target: &crate::runtime::function::Frame|
10559         -> Option<(&'static str, String)> {
10560            let fi = frames
10561                .iter()
10562                .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10563            if fi == 0 || target.from_c {
10564                return None;
10565            }
10566            let caller = frames[fi - 1].lua()?;
10567            let p = &caller.closure.proto;
10568            let call_pc = (caller.pc as usize).checked_sub(1)?;
10569            let instr = *p.code.get(call_pc)?;
10570            match instr.op() {
10571                Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10572                Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10573                _ => None,
10574            }
10575        };
10576        let frames = &co.frames;
10577        let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10578            VFrame::Lua(f) => {
10579                let proto = f.closure.proto;
10580                let src = chunk_display_name(proto.source.as_ptr());
10581                let pc = (f.pc as usize)
10582                    .saturating_sub(1)
10583                    .min(proto.lines.len().saturating_sub(1));
10584                let line = proto.lines.get(pc).copied().unwrap_or(0);
10585                out.extend_from_slice(b"\n\t");
10586                out.extend_from_slice(src);
10587                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10588                if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10589                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10590                } else if proto.line_defined == 0 {
10591                    out.extend_from_slice(b"main chunk");
10592                } else {
10593                    out.extend_from_slice(
10594                        format!(
10595                            "function <{}:{}>",
10596                            String::from_utf8_lossy(src),
10597                            proto.line_defined
10598                        )
10599                        .as_bytes(),
10600                    );
10601                }
10602            }
10603            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10604            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10605            VFrame::CYield => {
10606                // PUC `pushglobalfuncname` reports `yield` as
10607                // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10608                // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10609                // `'yield'` (5.5 :841).
10610                let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10611                if qualified {
10612                    out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10613                } else {
10614                    out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10615                }
10616            }
10617            VFrame::Tail => {
10618                // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10619                // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10620                // :403 asserts these appear once per collapsed tail call.
10621                out.extend_from_slice(b"\n\t(...tail calls...)");
10622            }
10623        };
10624        if total <= LEVELS1 + LEVELS2 {
10625            for &v in visible {
10626                emit(&mut out, v);
10627            }
10628        } else {
10629            for &v in &visible[..LEVELS1] {
10630                emit(&mut out, v);
10631            }
10632            let skip = total - LEVELS1 - LEVELS2;
10633            out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10634            for &v in &visible[total - LEVELS2..] {
10635                emit(&mut out, v);
10636            }
10637        }
10638        out
10639    }
10640
10641    pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10642        // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10643        // (11) bottom frames; if there are more, the middle is collapsed into
10644        // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10645        // overflow traceback would balloon to tens of megabytes (errors.lua's
10646        // stack-overflow test ran string.gmatch over the resulting buffer).
10647        const LEVELS1: usize = 10;
10648        const LEVELS2: usize = 11;
10649        // Collect visible frames in top-down order (deepest first). Both Lua
10650        // activations and pcall/xpcall continuations (which stand in for a
10651        // C-level pcall on the stack) are visible; PUC's traceback enumerates
10652        // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10653        #[derive(Clone, Copy)]
10654        enum VFrame {
10655            Lua(usize),
10656            CPcall,
10657            CXpcall,
10658        }
10659        let mut visible: Vec<VFrame> = Vec::new();
10660        for (fi, cf) in self.frames.iter().enumerate().rev() {
10661            match cf {
10662                CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10663                CallFrame::Cont(nc) => match nc.kind {
10664                    ContKind::Pcall => visible.push(VFrame::CPcall),
10665                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10666                    _ => {}
10667                },
10668            }
10669        }
10670        // PUC `luaL_traceback` starts enumerating at the given `level` (in
10671        // terms of L1's CallInfo chain). For the running-thread case the C
10672        // frame for debug.traceback itself is level 0 and luna's `visible`
10673        // doesn't include it — so level=1 (PUC default) means "emit from the
10674        // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10675        // the top. level<=0 emits nothing extra here (d_traceback handles the
10676        // "[C]: in function 'traceback'" prefix for level==0 separately).
10677        let skip = (level - 1).max(0) as usize;
10678        if skip >= visible.len() {
10679            return Vec::new();
10680        }
10681        let visible = &visible[skip..];
10682        let total = visible.len();
10683        let mut out = Vec::new();
10684        let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10685            VFrame::Lua(fi) => {
10686                let f = this.frames[fi].lua().expect("Lua frame");
10687                let proto = f.closure.proto;
10688                let src = chunk_display_name(proto.source.as_ptr());
10689                let pc = (f.pc as usize)
10690                    .saturating_sub(1)
10691                    .min(proto.lines.len().saturating_sub(1));
10692                let line = proto.lines.get(pc).copied().unwrap_or(0);
10693                out.extend_from_slice(b"\n\t");
10694                out.extend_from_slice(src);
10695                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10696                if let Some((namewhat, name)) = this.frame_name(fi) {
10697                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10698                } else if proto.line_defined == 0 {
10699                    out.extend_from_slice(b"main chunk");
10700                } else {
10701                    out.extend_from_slice(
10702                        format!(
10703                            "function <{}:{}>",
10704                            String::from_utf8_lossy(src),
10705                            proto.line_defined
10706                        )
10707                        .as_bytes(),
10708                    );
10709                }
10710            }
10711            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10712            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10713        };
10714        if total <= LEVELS1 + LEVELS2 {
10715            for &v in visible {
10716                emit_frame(&mut out, v, self);
10717            }
10718        } else {
10719            for &v in &visible[..LEVELS1] {
10720                emit_frame(&mut out, v, self);
10721            }
10722            let dropped = total - LEVELS1 - LEVELS2;
10723            out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10724            for &v in &visible[total - LEVELS2..] {
10725                emit_frame(&mut out, v, self);
10726            }
10727        }
10728        out
10729    }
10730}
10731
10732// ────────────────────────────────────────────────────────────────────
10733// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10734//
10735// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10736// trace-meta section after `vm.load`, resolves each entry's
10737// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10738// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10739// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10740// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10741// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10742// mcode without further plumbing — same code path the runtime JIT
10743// uses.
10744//
10745// Why a separate impl block: keeps the AOT API surface (one fn) easy
10746// to locate when grep'ing for `install_aot_trace`, without dragging
10747// the 8500-line `impl Vm` block above.
10748// ────────────────────────────────────────────────────────────────────
10749
10750impl Vm {
10751    /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10752    /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10753    /// fires it at the trace's `head_pc`. This is the runtime install
10754    /// API the deploy-side `luna-runtime-helpers` resolver calls once
10755    /// per AOT-emitted trace meta entry, after looking up `proto` by
10756    /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10757    ///
10758    /// # What this does
10759    ///
10760    /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10761    /// The trace's `entry` fn ptr must already point at runnable
10762    /// machine code (the AOT linker resolved the symbol at link time;
10763    /// the deploy resolver passes the address verbatim).
10764    ///
10765    /// # What this does NOT do
10766    ///
10767    /// - **No deduplication.** Calling twice with the same `head_pc`
10768    ///   pushes two entries; the dispatcher's `find` will pick the
10769    ///   first match. The deploy resolver is responsible for not
10770    ///   double-installing.
10771    /// - **No invalidation of the runtime JIT cache.** If the runtime
10772    ///   JIT later records + compiles a trace for the same
10773    ///   `(proto, head_pc)`, both coexist on `proto.traces` and the
10774    ///   dispatcher's `find` picks whichever appears first. AOT
10775    ///   traces install before any runtime recording is possible
10776    ///   (resolver runs before `vm.load` returns its first closure),
10777    ///   so AOT traces win the race for the same site.
10778    /// - **No coverage gating.** AOT traces are trusted by
10779    ///   construction — they were validated at compile time. Setting
10780    ///   `dispatchable: false` on the input would silently disable
10781    ///   dispatch; the caller controls that flag.
10782    ///
10783    /// # Safety / soundness
10784    ///
10785    /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10786    /// machine code). Soundness contract:
10787    ///
10788    /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10789    ///   In the AOT-binary deploy shape this is trivially satisfied —
10790    ///   the fn lives in the binary's `.text`.
10791    /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10792    ///   what the trace's IR actually compiled against; the dispatcher
10793    ///   uses them to marshal `reg_state` in and out without further
10794    ///   validation. A mismatch corrupts vm.stack.
10795    ///
10796    /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10797    /// invariants hold; this fn is a plain push — no validation that
10798    /// would slow the dispatcher's hot path either.
10799    pub fn install_aot_trace(
10800        &mut self,
10801        proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10802        trace: crate::jit::trace::CompiledTrace,
10803    ) {
10804        let _ = self; // resolver passes &mut Vm for symmetry with future
10805        // pending-install + hash-walk variants; nothing on `self` to
10806        // mutate today because the install target lives on the Proto.
10807        proto.traces.borrow_mut().push(TArc::new(trace));
10808    }
10809
10810    /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10811    /// reachable from `root` and return `(proto, stable_hash)` pairs
10812    /// for every Proto found. Used by the deploy-side resolver to
10813    /// match AOT-emitted `proto_hash` keys against the freshly
10814    /// `undump`'d chunk's protos.
10815    ///
10816    /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10817    /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10818    /// multiple nested closures (rare; the cache field would catch
10819    /// the closure-side dedup, not the Proto side) is reported once.
10820    ///
10821    /// # Why on `&Vm` and not a free fn
10822    ///
10823    /// Keeps the AOT install API discoverable on the Vm surface —
10824    /// `vm.collect_proto_hashes(root)` reads naturally next to
10825    /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10826    /// any Vm field, so `&self` (read-only) is enough.
10827    pub fn collect_proto_hashes(
10828        &self,
10829        root: crate::runtime::Gc<crate::runtime::function::Proto>,
10830    ) -> Vec<(
10831        crate::runtime::Gc<crate::runtime::function::Proto>,
10832        [u8; 16],
10833    )> {
10834        let _ = self;
10835        let mut out = Vec::new();
10836        let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10837            std::collections::HashSet::new();
10838        let mut queue: std::collections::VecDeque<
10839            crate::runtime::Gc<crate::runtime::function::Proto>,
10840        > = std::collections::VecDeque::new();
10841        queue.push_back(root);
10842        while let Some(p) = queue.pop_front() {
10843            let key = p.as_ptr() as *const _;
10844            if !seen.insert(key) {
10845                continue;
10846            }
10847            out.push((p, p.stable_hash()));
10848            for &child in p.protos.iter() {
10849                queue.push_back(child);
10850            }
10851        }
10852        out
10853    }
10854}