Skip to main content

luna_core/vm/
exec.rs

1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16    AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17    MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48    /// The GC heap owned by this VM. Embedders normally interact via the
49    /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50    /// the heap directly.
51    pub heap: Heap,
52    stack: Vec<Value>,
53    frames: Vec<CallFrame>,
54    /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55    /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56    /// helpers (debug-asserted on use). NOT consumed by readers yet;
57    /// week 1 is pure scaffold. Week 2-N migrations replace readers
58    /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59    /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60    frames_top: u32,
61    /// open upvalues, sorted ascending by stack slot
62    open_upvals: Vec<(u32, Gc<Upvalue>)>,
63    /// to-be-closed slots, ascending
64    tbc: Vec<u32>,
65    /// logical stack top for multi-result sequences
66    pub(crate) top: u32,
67    globals: Gc<Table>,
68    /// shared metatable for all strings (populated by the string lib, P04)
69    /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70    /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71    /// own. Settable via debug.setmetatable.
72    type_mt: [Option<Gc<Table>>; 5],
73    /// pre-interned metamethod event names, indexed by `Mm`
74    mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75    /// native↔Lua nesting depth (PUC C-stack guard analogue)
76    c_depth: u32,
77    /// number of live pcall/xpcall continuation frames on the running thread
78    /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79    /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80    /// with the coroutine context, since continuations survive a yield.
81    pcall_depth: u32,
82    /// number of non-yieldable C calls in flight on the running thread (PUC's
83    /// `L->nny`). A library callback that runs via synchronous Rust recursion
84    /// (sort comparator, gsub replacement) cannot be continued across a yield,
85    /// so it bumps this for its duration; `coroutine.yield` inside hits the
86    /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87    /// never cross such a call), so it needs no per-thread save/restore.
88    nny: u32,
89    /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90    /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91    /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92    /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93    /// loop)` then matches. PUC tracks this via the soft-cap window
94    /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95    /// scope explicitly.
96    msgh_depth: u32,
97    /// set by a coroutine closing itself (`coroutine.close()` on the running
98    /// thread): the to-be-closed handlers have already run; the thread must now
99    /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100    /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101    /// unwind, so a protecting pcall cannot catch it) the termination.
102    terminating: Option<Option<Value>>,
103    /// xoshiro256** state (math.random)
104    rng: [u64; 4],
105    /// VM creation time (os.clock)
106    started: std::time::Instant,
107    version: LuaVersion,
108    /// error object being threaded through a chain of __close handlers; a GC
109    /// root for the duration (a handler may trigger collection)
110    closing_err: Option<Value>,
111    /// the coroutine whose context is currently live in the fields above;
112    /// `None` while the main thread runs (P05)
113    current: Option<Gc<crate::runtime::Coro>>,
114    /// the main thread's saved execution context while a coroutine runs
115    main_ctx: Option<SavedCtx>,
116    /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117    /// values plus the slot/result-count needed to finish the yielding call on
118    /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119    yielding: Option<(Vec<Value>, u32, i32)>,
120    /// results expected by the in-flight native call (so `yield` knows how many
121    /// values its call site wants when it suspends)
122    native_nresults: i32,
123    /// identity object for the main thread, returned by `coroutine.running`
124    /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125    main_coro: Option<Gc<Coro>>,
126    /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127    /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128    /// switches report the previous one, as PUC does.
129    gc_mode: &'static str,
130    /// the live-register boundary of the running thread for GC rooting (PUC's
131    /// `L->top`): set precisely at each GC safe point so freed temporary
132    /// registers above it are not rooted. Without this the collector roots the
133    /// whole stack window, pinning weak-table values stranded in stale temps
134    /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135    pub(crate) gc_top: u32,
136    /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137    /// is still stop-the-world, so these are stored/returned for API fidelity
138    /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139    /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140    gc_pause: i64,
141    gc_stepmul: i64,
142    gc_stepsize: i64,
143    /// true while `__gc` finalizers are being run, so a finalizer that calls
144    /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145    /// `collectgarbage` yields fail).
146    gc_finalizing: bool,
147    /// C ABI scratch (`capi` module): the host-visible value stack that C
148    /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149    /// Kept here (instead of in a separate `LuaState` wrapper) so the
150    /// trampoline that bridges to a `LuaCFunction` can safely cast the
151    /// Vm pointer it already holds to the public `*mut LuaState` type
152    /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153    pub capi_stack: Vec<crate::runtime::Value>,
154    /// Pinned CString backing the pointer last returned by `lua_tostring`;
155    /// valid until the next `lua_tostring` on the same Vm.
156    pub capi_cstr_pin: Option<std::ffi::CString>,
157    /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158    /// concatenate across continuation calls until a non-`tocont` call
159    /// flushes; the default warnf recognises `@on`/`@off` control messages
160    /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161    /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162    /// keep the older raise semantics).
163    pub(crate) warn_state: WarnState,
164    pub(crate) warn_buf: Vec<u8>,
165    /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166    /// loop decrements once per dispatch turn. When it hits zero the loop
167    /// raises a catchable "instruction budget exceeded" error so the embedder
168    /// can yield control back to its caller (short-script eval, game
169    /// frame budgets). `None` = unbounded; reset on each call via
170    /// `set_instr_budget`.
171    pub(crate) instr_budget: Option<i64>,
172    // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173    // `self.jit` below + `crate::vm::jit_state` for field docs.
174    // (Was: jit_enabled here.)
175    // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176    // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177    // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178    // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179    // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180    // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181    // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182    // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183    // materialize_emit,closure_emit}_count — all moved to JitState.
184    /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185    /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186    /// precompiled chunks (which bypass the parser's depth / opcode
187    /// limits). When `false`, the loader rejects any source whose first
188    /// byte is the bytecode signature `\27` ("`\27Lua`").
189    pub(crate) bytecode_loading: bool,
190    /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191    /// a strictly larger trust surface than luna's own dump format
192    /// (third-party toolchain bugs, malformed chunks, unknown opcode
193    /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194    /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195    /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196    /// Embedder toggles via `set_puc_bytecode_loading`.
197    pub(crate) puc_bytecode_loading: bool,
198    /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199    /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200    /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201    /// source exceeds this, the loader returns the PUC-shaped
202    /// `not enough memory` error before the host allocator is asked to
203    /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204    /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205    /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206    /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207    /// Embedders that genuinely need to load > 256 MiB sources widen the
208    /// cap via [`Vm::set_loader_input_budget`].
209    pub(crate) loader_input_budget: usize,
210    /// In-process log of fully-emitted warnings (each entry = one flushed
211    /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212    /// tests assert what was warned without scraping stderr.
213    pub(crate) warn_log: Vec<Vec<u8>>,
214    /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215    /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216    /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217    /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218    /// shape stub for db.lua :328; if other registry-keyed APIs land later
219    /// they can share this table.
220    pub(crate) registry: Option<Gc<Table>>,
221    /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222    /// registry entry); attached to every file userdata the io library makes
223    pub(crate) file_mt: Option<Gc<Table>>,
224    /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225    pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226    pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227    /// the running thread's debug hook state (`debug.sethook`); per-thread,
228    /// swapped with the execution context on a coroutine resume/yield
229    pub(crate) hook: HookState,
230    /// true while the hook itself runs, so its own execution fires no events
231    /// (PUC clears the mask for the duration)
232    pub(crate) in_hook: bool,
233    /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234    /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235    /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236    /// accumulate the count instead of capping at 1.
237    pub(crate) pending_tailcalls: u32,
238    /// Name of the C native that just propagated an error (captured before
239    /// the native is popped from `running_natives`). Lets a dying coroutine
240    /// preserve `[C]: in function '<name>'` at the top of its traceback
241    /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242    /// luna's native frames are off-stack so we stash the name explicitly.
243    pub(crate) errored_native: Option<String>,
244    /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245    /// (relative to the activation's func slot) and the number transferred.
246    /// Set just before firing a call/return hook, read by `getinfo("r")`.
247    pub(crate) hook_ftransfer: u16,
248    pub(crate) hook_ntransfer: u16,
249    /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250    /// pushed by `push_frame`; `close_slots` sets this before calling a
251    /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252    /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253    pending_tm: Option<&'static str>,
254    /// `true` when the next `push_frame` is the user hook function itself,
255    /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256    /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257    pending_is_hook: bool,
258    /// traceback snapshot taken at the error point (the first `unwind` entry
259    /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260    /// the failed frames are popped — can still see the error point's stack
261    /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262    /// stack intact; we approximate by snapshotting the string and letting
263    /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264    /// `call_value` entry (`public_call_depth == 0`).
265    pub(crate) error_traceback: Option<Vec<u8>>,
266    /// nesting depth of public `call_value` entries (host vs. internal). The
267    /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268    /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269    public_call_depth: u32,
270    /// stack of native (`Value::Native`) closures currently running on the
271    /// Rust call stack. `begin_call` pushes the closure before invoking
272    /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273    /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274    /// caller is C, not Lua) and qualify the running function's name via
275    /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276    pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277    /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278    /// the native's argument-window head and width, so `debug.getlocal`
279    /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280    pub(crate) running_native_slots: Vec<(u32, u32)>,
281    // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282    // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283    // trace_compiler — all moved to JitState. See `jit` below.
284    /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285    /// when `chunk_compiler` / `trace_compiler` are
286    /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287    ///
288    /// `#[doc(hidden)] pub` so the `luna` crate's
289    /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290    /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291    /// field). Not part of the embedder-facing API surface.
292    #[doc(hidden)]
293    pub jit: crate::vm::jit_state::JitState,
294
295    /// B12 host roots — append-only `Vec<Value>` traced as an extra
296    /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297    /// `LuaRoot`) hold indices into this vector so the underlying
298    /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299    ///
300    /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301    /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302    /// the trade-offs between `Drop` plumbing and append-only memory
303    /// growth have a richer ergonomics envelope to live in.
304    pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305    /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306    /// back if non-empty, else extends `host_roots`. Generation
307    /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308    pub(crate) host_roots_free: Vec<u32>,
309
310    /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311    /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312    /// callback). Each entry is one in-flight working buffer; `gc_roots`
313    /// extends with every contained `Value` so a `collectgarbage()`
314    /// inside the comparator cannot free strings/tables snapshotted
315    /// here. Nested sorts push a new buffer on entry, pop on exit
316    /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317    /// regression).
318    pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320    /// v1.3 Phase ML — MacroLua compile-time macro registry.
321    /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322    /// `@if` / `@gensym`) at construction time when `version ==
323    /// LuaVersion::MacroLua`; embedders register custom macros via
324    /// [`Vm::define_macro`]. The expander runs once per `load()` call
325    /// between lexing and parsing (only when `is_macro_lua()`).
326    pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328    /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329    /// by `TypeId::of::<T>()` for embedder types implementing
330    /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331    /// [`Vm::register_userdata`]; metatables are pinned via
332    /// [`Vm::pin_host`] at registration time so the entry's
333    /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334    pub(crate) userdata_metatables:
335        std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337    /// B6 — classification of the most recent error raised on this Vm.
338    /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339    /// at well-known sites (syntax errors, instr-budget trips, native
340    /// callback errors, type errors).
341    pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343    /// B6 — `(source_name, line)` of the most recent error. Set by the
344    /// dispatcher / lexer / parser; cleared when a new call_value
345    /// enters cleanly.
346    pub(crate) last_error_source: Option<(String, u32)>,
347
348    /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349    /// the dispatcher hot loop yields cooperatively (sets
350    /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351    /// to `EvalFuture::poll`) instead of returning a real
352    /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353    /// for the duration of the future; restored to `false` on
354    /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355    /// leave it `false` so v1.0 behavior is preserved exactly.
356    pub(crate) async_mode: bool,
357
358    /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359    /// before driving a slice. The dispatcher itself does not call it
360    /// (the future's poll loop does `wake_by_ref` after observing
361    /// `BudgetExhausted`), but storing the waker keeps the door open
362    /// for Stage 2 async natives to wake the host directly from a
363    /// helper future.
364    pub(crate) async_waker: Option<std::task::Waker>,
365
366    /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367    /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368    /// Default 10_000 (RFC §D5). Tunable via
369    /// [`Vm::set_async_slice`].
370    pub(crate) async_slice_size: i64,
371
372    /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373    /// budget exhaustion fires; checked by `exec_with` (so the
374    /// sentinel propagates without `unwind` running, mirroring
375    /// `yielding.is_some()`) and by `call_value_impl` (so the call
376    /// frames survive for the next poll). Cleared by `drive_one`
377    /// after translating it to `DispatchOutcome::BudgetExhausted`.
378    pub(crate) host_yield_pending: bool,
379
380    /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381    /// when an async-marked [`NativeClosure`] is invoked under
382    /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383    /// mechanism as `host_yield_pending` — see `exec_with` +
384    /// `call_value_impl`), stashes the in-flight future +
385    /// post-completion context here, and surfaces them to
386    /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387    /// once the future is moved out into a
388    /// `DispatchOutcome::AsyncNativeAwaiting`.
389    pub(crate) pending_async_native_fut:
390        Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392    /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393    /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394    /// commit the future's eventual `Ok(nret)` back into the calling
395    /// frame's expected result slots. Recorded by the dispatcher;
396    /// consumed by [`Vm::commit_async_native_result`] after the
397    /// future resolves.
398    pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411    pub func_slot: u32,
412    /// Recorded for parity with the sync native-call path's
413    /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414    /// hook firing + traceback shaping. Not yet read in Stage 2.
415    #[allow(dead_code)]
416    pub nargs: u32,
417    pub nresults: i32,
418    /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419    /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420    /// unread today; carried so an Stage 3 audit can confirm the
421    /// pre-suspend root window matches the post-resume one.
422    #[allow(dead_code)]
423    pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430    /// the hook function (`None` when no hook is installed)
431    pub func: Option<Value>,
432    /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433    /// (Rust first); both can be installed simultaneously, but most
434    /// embedders pick one.
435    pub rust_func: Option<RustDebugHook>,
436    /// LUA_MASKCALL — fire on function entry
437    pub call: bool,
438    /// LUA_MASKRET — fire on function return
439    pub ret: bool,
440    /// LUA_MASKLINE — fire on source-line change
441    pub line: bool,
442    /// LUA_MASKCOUNT — fire every `count_base` instructions
443    pub count: bool,
444    /// instruction count between count events (PUC basehookcount)
445    pub count_base: i64,
446    /// instructions left until the next count event (PUC hookcount)
447    pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459    /// Function entry (`hook_call` analogue).
460    Call,
461    /// Function return (`hook_return` analogue).
462    Return,
463    /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464    TailCall,
465    /// Source-line change (the `u32` is the 1-based line number).
466    Line(u32),
467    /// Instruction count event (fires every `count_base` instructions).
468    Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483    stack: Vec<Value>,
484    frames: Vec<CallFrame>,
485    open_upvals: Vec<(u32, Gc<Upvalue>)>,
486    tbc: Vec<u32>,
487    top: u32,
488    pcall_depth: u32,
489    hook: HookState,
490    /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491    /// the rest of the suspended state so each thread can keep its own
492    /// `setfenv(0, env)` rewire without the swap leaking into another
493    /// thread (5.1 closure.lua :177).
494    globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499    /// caught by a pcall/xpcall continuation; resume running its caller
500    Caught,
501    /// caught by a continuation that was the entry-level activation; these are
502    /// the call's (wrapped) results
503    CaughtReturn(Vec<Value>),
504    /// no protecting continuation up to `entry_depth`; propagate the error
505    Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511    Lua(usize),
512    /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513    /// used to name the native via its invoking call instruction.
514    C(usize),
515    /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516    /// caller's activation, so `debug.getinfo(level)` at this slot returns
517    /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518    /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519    /// both shapes). The index points at the *tail-called* frame whose
520    /// `is_tail` flag induced this synthetic level.
521    Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528    /// index → the looked-up value; newindex → done (raw set performed);
529    /// comparison → the boolean result already known
530    Done(Value),
531    /// a metamethod to call; `recv` is the chain element it was found on (the
532    /// extra args — key / value — are supplied by the caller)
533    Mm { func: Value, recv: Value },
534    /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535    /// carries `__le` — `op_compare` swaps the args and negates the result.
536    /// Lives separate from `Mm` so the synth path can stay yieldable without
537    /// every other Mm caller learning a swap flag they would never set.
538    CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545    Index,
546    NewIndex,
547    Call,
548    ToString,
549    Metatable,
550    Name,
551    Eq,
552    Lt,
553    Le,
554    Concat,
555    Len,
556    Add,
557    Sub,
558    Mul,
559    Div,
560    Mod,
561    Pow,
562    IDiv,
563    BAnd,
564    BOr,
565    BXor,
566    Shl,
567    Shr,
568    Unm,
569    BNot,
570    Close,
571    Gc,
572    Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576    "__index",
577    "__newindex",
578    "__call",
579    "__tostring",
580    "__metatable",
581    "__name",
582    "__eq",
583    "__lt",
584    "__le",
585    "__concat",
586    "__len",
587    "__add",
588    "__sub",
589    "__mul",
590    "__div",
591    "__mod",
592    "__pow",
593    "__idiv",
594    "__band",
595    "__bor",
596    "__bxor",
597    "__shl",
598    "__shr",
599    "__unm",
600    "__bnot",
601    "__close",
602    "__gc",
603    "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614    if version <= LuaVersion::Lua53 {
615        format!("__{tm}")
616    } else if tm == "gc" {
617        "__gc".to_string()
618    } else {
619        tm.to_string()
620    }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626    use crate::vm::isa::Op;
627    Some(match op {
628        Op::Add => "add",
629        Op::Sub => "sub",
630        Op::Mul => "mul",
631        Op::Div => "div",
632        Op::Mod => "mod",
633        Op::Pow => "pow",
634        Op::IDiv => "idiv",
635        Op::BAnd => "band",
636        Op::BOr => "bor",
637        Op::BXor => "bxor",
638        Op::Shl => "shl",
639        Op::Shr => "shr",
640        Op::Unm => "unm",
641        Op::BNot => "bnot",
642        Op::Concat => "concat",
643        Op::Len => "len",
644        Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645        Op::SetField | Op::SetTable | Op::SetI => "newindex",
646        Op::Eq | Op::EqK => "eq",
647        Op::Lt => "lt",
648        Op::Le => "le",
649        _ => return None,
650    })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678    /// `warn` calls are silently dropped (default after `warn("@off")`).
679    Off,
680    /// `warn` calls are delivered to stderr (after `warn("@on")`).
681    On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689    if let Some(s) = payload.downcast_ref::<String>() {
690        return s.clone();
691    }
692    if let Some(s) = payload.downcast_ref::<&'static str>() {
693        return (*s).to_string();
694    }
695    "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702    /// Parse or compile failure.
703    Syntax(SyntaxError),
704    /// Runtime error raised during execution.
705    Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709    fn from(e: SyntaxError) -> Error {
710        Error::Syntax(e)
711    }
712}
713
714impl From<LuaError> for Error {
715    fn from(e: LuaError) -> Error {
716        Error::Runtime(e)
717    }
718}
719
720impl Drop for Vm {
721    fn drop(&mut self) {
722        // state close: run `__gc` for every still-registered finalizable before
723        // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724        // single pass — objects created by a closing finalizer are not
725        // re-finalized (they go to the heap's free list directly).
726        self.heap.queue_all_finalizers();
727        self.run_finalizers();
728    }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742    frames.push(cf);
743    // Shadow maintenance is debug-only: release builds skip the
744    // increment + assertion entirely. The shadow's purpose in Week 1
745    // is to VERIFY the assumed invariant (frames_top == frames.len())
746    // across all push/pop sites; once Week 2+ migrates readers to
747    // consume the shadow, release will run the increment unconditionally.
748    #[cfg(debug_assertions)]
749    {
750        *frames_top += 1;
751        debug_assert_eq!(
752            *frames_top as usize,
753            frames.len(),
754            "P17-D frames_top out of sync after push",
755        );
756    }
757    #[cfg(not(debug_assertions))]
758    let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763    let r = frames.pop();
764    #[cfg(debug_assertions)]
765    {
766        if r.is_some() {
767            *frames_top = frames_top.saturating_sub(1);
768        }
769        debug_assert_eq!(
770            *frames_top as usize,
771            frames.len(),
772            "P17-D frames_top out of sync after pop",
773        );
774    }
775    #[cfg(not(debug_assertions))]
776    let _ = frames_top;
777    r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786    static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787    *PROBE_ON.get_or_init(|| {
788        std::env::var("LUNA_AOT_PROBE")
789            .ok()
790            .filter(|v| !v.is_empty())
791            .is_some()
792    })
793}
794
795impl Vm {
796    /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797    /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798    /// the Vec replacement to keep the shadow valid.
799    #[inline(always)]
800    fn frames_resync(&mut self) {
801        // Debug-only Week 1 — see `frames_push_sync` comment.
802        #[cfg(debug_assertions)]
803        {
804            self.frames_top = self.frames.len() as u32;
805        }
806    }
807
808    // ====================================================================
809    // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810    //
811    // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812    // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813    // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814    // sites to consume them. Phase 4 removes Vec<CallFrame>.
815    //
816    // Preconditions (debug-asserted):
817    // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818    // - self.stack.len() > base + max_stack (caller has grown stack)
819    // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820    // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821    //
822    // No release-build cost when unused (LTO strips dead methods).
823    // ====================================================================
824
825    /// Write a Lua frame's closure pointer into `stack[base-2]`.
826    /// The caller must ensure `base >= 2` and the slot is within the
827    /// stack's allocated range.
828    #[inline]
829    #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830    fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831        debug_assert!(
832            base >= 2,
833            "frame closure slot needs base >= 2; got {}",
834            base
835        );
836        let idx = (base - 2) as usize;
837        debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838        self.stack[idx] = Value::Closure(cl);
839    }
840
841    /// Read a Lua frame's closure pointer from `stack[base-2]`.
842    /// Returns `None` if the slot doesn't hold a closure (caller is
843    /// expected to treat that as a corrupt frame).
844    ///
845    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846    /// to avoid the enum-match cost on the hot path. Tag check via
847    /// 1-byte load + branch + `as_closure_unchecked` payload load.
848    #[inline]
849    #[allow(dead_code)]
850    fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851        debug_assert!(base >= 2);
852        let v = self.stack.get((base - 2) as usize)?;
853        if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854            // SAFETY: tag byte just verified == CLOSURE.
855            Some(unsafe { v.as_closure_unchecked() })
856        } else {
857            None
858        }
859    }
860
861    /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862    /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863    /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864    /// cleanly through the value stack without losing bits.
865    #[inline]
866    #[allow(dead_code)]
867    fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868        debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869        let idx = (base - 1) as usize;
870        debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871        self.stack[idx] = Value::Int(marker.to_raw());
872    }
873
874    /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875    /// `None` if the slot isn't a `Value::Int` (caller treats as a
876    /// corrupt frame); the kind tag itself may still be invalid, in
877    /// which case [`FrameMarker::kind`] returns `None` on the result.
878    ///
879    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880    /// for the tag check + `as_int_unchecked` for the payload load.
881    #[inline]
882    #[allow(dead_code)]
883    fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884        debug_assert!(base >= 1);
885        let v = self.stack.get((base - 1) as usize)?;
886        if v.tag_byte() == crate::runtime::value::tag::INT {
887            // SAFETY: tag byte just verified == INT.
888            Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889                unsafe { v.as_int_unchecked() },
890            ))
891        } else {
892            None
893        }
894    }
895
896    /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897    /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898    /// caller is responsible for the rest of the bring-up.
899    fn new_inner(version: LuaVersion) -> Vm {
900        let mut heap = Heap::new();
901        // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902        // values strongly. gc.lua's "weak tables" section relies on that.
903        heap.no_ephemeron = version <= LuaVersion::Lua51;
904        // PUC 5.3 needs two GC cycles to finalize a table caught in a
905        // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906        // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907        heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908        let globals = heap.new_table();
909        let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911        Vm {
912            heap,
913            stack: Vec::new(),
914            frames: Vec::new(),
915            frames_top: 0,
916            open_upvals: Vec::new(),
917            tbc: Vec::new(),
918            top: 0,
919            globals,
920            type_mt: [None; 5],
921            mm_names,
922            c_depth: 0,
923            pcall_depth: 0,
924            nny: 0,
925            msgh_depth: 0,
926            terminating: None,
927            rng: [0; 4],
928            started: std::time::Instant::now(),
929            version,
930            closing_err: None,
931            current: None,
932            main_ctx: None,
933            yielding: None,
934            native_nresults: -1,
935            main_coro: None,
936            gc_mode: "incremental",
937            gc_top: 0,
938            gc_pause: 200,
939            gc_stepmul: 100,
940            gc_stepsize: 13,
941            gc_finalizing: false,
942            capi_stack: Vec::new(),
943            capi_cstr_pin: None,
944            warn_state: WarnState::Off,
945            warn_buf: Vec::new(),
946            warn_log: Vec::new(),
947            instr_budget: None,
948            bytecode_loading: true,
949            puc_bytecode_loading: false,
950            loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951            registry: None,
952            file_mt: None,
953            io_input: None,
954            io_output: None,
955            hook: HookState::default(),
956            in_hook: false,
957            pending_tailcalls: 0,
958            errored_native: None,
959            hook_ftransfer: 0,
960            hook_ntransfer: 0,
961            pending_tm: None,
962            pending_is_hook: false,
963            error_traceback: None,
964            public_call_depth: 0,
965            running_natives: Vec::new(),
966            running_native_slots: Vec::new(),
967            // v1.1 A2 — JIT-specific state factored into `JitState`
968            // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969            // `install_jit_backend` / `luaL_newstate` swap in
970            // `CraneliftBackend` for callers that want JIT acceleration.
971            jit: crate::vm::jit_state::JitState::with_null_backend(),
972            // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973            host_roots: Vec::new(),
974            // v1.3 Phase ML — MacroLua registry. Pre-populated with
975            // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976            // when this Vm is constructed under `LuaVersion::MacroLua`.
977            macro_registry: if version == LuaVersion::MacroLua {
978                crate::frontend::macro_expander::MacroRegistry::with_builtins()
979            } else {
980                crate::frontend::macro_expander::MacroRegistry::new()
981            },
982            host_roots_free: Vec::new(),
983            sort_scratch: Vec::new(),
984            // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985            // metatable cache. Populated lazily by register_userdata.
986            userdata_metatables: std::collections::HashMap::new(),
987            // v1.1 B6 — error classification metadata. Defaults to
988            // Runtime; set at known sites (syntax / budget trip /
989            // native error / type error).
990            last_error_kind: crate::vm::error::LuaErrorKind::default(),
991            last_error_source: None,
992            // v1.1 B10 Stage 1 — async embedder fields. Defaults
993            // preserve sync behavior bit-for-bit (`async_mode = false`
994            // means the budget hot loop errors out exactly as v1.0).
995            async_mode: false,
996            async_waker: None,
997            async_slice_size: 10_000,
998            host_yield_pending: false,
999            // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000            // default; populated only by the dispatcher when an
1001            // async-marked NativeClosure is invoked under async_mode.
1002            pending_async_native_fut: None,
1003            pending_async_native_ctx: None,
1004        }
1005    }
1006
1007    /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008    /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009    /// followed by `vm.open_all_libs()`.
1010    pub fn new(version: LuaVersion) -> Vm {
1011        let mut vm = Vm::new_minimal(version);
1012        vm.open_all_libs();
1013        vm
1014    }
1015
1016    /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017    /// that want a sandbox (Redis-style scripts, in-game scripting with
1018    /// a curated API) call this and then `open_base` / `open_math` / etc.
1019    /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020    /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021    pub fn new_minimal(version: LuaVersion) -> Vm {
1022        let mut vm = Vm::new_inner(version);
1023        let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025        unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026        vm.main_coro = Some(mc);
1027        let (a, b) = vm.rng_auto_seed();
1028        vm.rng_seed(a as u64, b as u64);
1029        vm
1030    }
1031
1032    /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033    /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034    /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035    /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036    /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037    /// does NOT evict those cached entries — call right after
1038    /// construction for a clean swap.
1039    ///
1040    /// Naming: `install_jit_backend` (not `install_default_jit`)
1041    /// because the "default" in luna-core is `NullJitBackend`; the
1042    /// "default JIT" lives in the `luna` crate.
1043    pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044    where
1045        C: crate::jit::IntChunkCompiler + 'static,
1046        T: crate::jit::TraceCompiler + 'static,
1047    {
1048        self.jit.chunk_compiler = Box::new(chunk);
1049        self.jit.trace_compiler = Box::new(trace);
1050    }
1051
1052    /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053    /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054    /// the `luna_jit` crate's `install_default_jit` pairs this with
1055    /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056    /// also install a fresh `CraneliftJitStorage`. Storage holds
1057    /// the per-`Vm` JIT cache + handle collections that used to be
1058    /// `thread_local!`s in `luna_jit::jit_backend`.
1059    ///
1060    /// Idempotency: re-installing storage on a Vm that already
1061    /// holds compiled-trace pointers WILL evict their owners (the
1062    /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063    /// pages). Call right after construction for a clean swap.
1064    pub fn install_jit_storage<S>(&mut self, storage: S)
1065    where
1066        S: crate::jit::JitStorage + 'static,
1067    {
1068        self.jit.storage = Box::new(storage);
1069    }
1070
1071    /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072    /// reports "skipped" so every closure stays on the interpreter
1073    /// path, and the trace recorder's compile attempt always returns
1074    /// `None`. Intended for tests that want to verify the trait
1075    /// boundary works in a JIT-free configuration, and for the future
1076    /// `luna-core` build path that ships without Cranelift.
1077    ///
1078    /// Calling this on a Vm whose closures already populated
1079    /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080    /// cached entries — the dispatcher will still call into them. For
1081    /// a truly JIT-free run, call this immediately after construction.
1082    pub fn install_null_jit(&mut self) {
1083        self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084        self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085    }
1086
1087    /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088    /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089    /// time instead (`open_base`, `open_math`, `open_table`, …).
1090    pub fn open_all_libs(&mut self) {
1091        self.open_base();
1092        self.open_math();
1093        self.open_table();
1094        self.open_string();
1095        self.open_utf8();
1096        self.open_os_io();
1097        self.open_debug();
1098        self.open_coroutine();
1099        self.open_package();
1100        // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101        // operators replace it on 64-bit integers). Only expose it under 5.2
1102        // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103        // leaking the global into newer dialects.
1104        if self.version == LuaVersion::Lua52 {
1105            self.open_bit32();
1106        }
1107    }
1108
1109    /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110    /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111    /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112    /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113    /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114    /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115    /// once per Vm.
1116    pub fn open_base(&mut self) {
1117        crate::vm::builtins::open_base(self);
1118    }
1119    /// Install the `math` standard library.
1120    pub fn open_math(&mut self) {
1121        crate::vm::lib_math::open_math(self);
1122    }
1123    /// Install the `table` standard library.
1124    pub fn open_table(&mut self) {
1125        crate::vm::lib_table::open_table(self);
1126    }
1127    /// Install the `string` standard library (and the shared string metatable).
1128    pub fn open_string(&mut self) {
1129        crate::vm::lib_string::open_string(self);
1130    }
1131    /// Install the `utf8` standard library (5.3+).
1132    pub fn open_utf8(&mut self) {
1133        crate::vm::lib_utf8::open_utf8(self);
1134    }
1135    /// `os` and `io` are merged because file userdata shares state with both
1136    /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137    /// wraps `os.execute`'s shell).
1138    pub fn open_os_io(&mut self) {
1139        crate::vm::lib_os_io::open_os_io(self);
1140    }
1141    /// Install the `debug` standard library (introspection / hooks). Off by
1142    /// default for sandbox embedders.
1143    pub fn open_debug(&mut self) {
1144        crate::vm::lib_debug::open_debug(self);
1145    }
1146    /// Install the `coroutine` standard library.
1147    pub fn open_coroutine(&mut self) {
1148        crate::vm::lib_coroutine::open_coroutine(self);
1149    }
1150    /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151    pub fn open_package(&mut self) {
1152        crate::vm::lib_os_io::open_package(self);
1153    }
1154    /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155    /// ops on 64-bit integers).
1156    pub fn open_bit32(&mut self) {
1157        crate::vm::lib_bit32::open_bit32(self);
1158    }
1159
1160    /// xoshiro256** next.
1161    pub(crate) fn rng_next(&mut self) -> u64 {
1162        let s = &mut self.rng;
1163        let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164        let t = s[1] << 17;
1165        s[2] ^= s[0];
1166        s[3] ^= s[1];
1167        s[1] ^= s[2];
1168        s[0] ^= s[3];
1169        s[2] ^= t;
1170        s[3] = s[3].rotate_left(45);
1171        result
1172    }
1173
1174    /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175    pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176        // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177        // state), then 16 discards to spread the seed. Matches PUC's exact
1178        // sequence so the low-level conformance test passes.
1179        self.rng = [a, 0xff, b, 0];
1180        for _ in 0..16 {
1181            self.rng_next();
1182        }
1183    }
1184
1185    /// Wall-clock since VM creation (os.clock approximation).
1186    pub(crate) fn uptime(&self) -> std::time::Duration {
1187        self.started.elapsed()
1188    }
1189
1190    /// Entropy for math.randomseed() with no arguments.
1191    pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192        let t = std::time::SystemTime::now()
1193            .duration_since(std::time::UNIX_EPOCH)
1194            .map(|d| d.as_nanos() as u64)
1195            .unwrap_or(0);
1196        let addr = &self.rng as *const _ as u64;
1197        (t as i64, addr as i64)
1198    }
1199
1200    /// Allocate a native function object (no upvalues): builtin registration.
1201    pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202        Value::Native(self.heap.new_native(f, Box::new([])))
1203    }
1204
1205    /// Allocate a native function object with captured upvalues.
1206    pub fn native_with(
1207        &mut self,
1208        f: crate::runtime::value::NativeFn,
1209        upvals: Box<[Value]>,
1210    ) -> Value {
1211        Value::Native(self.heap.new_native(f, upvals))
1212    }
1213
1214    /// Install the shared string metatable (string library, P04).
1215    pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216        self.type_mt[3] = mt;
1217    }
1218
1219    /// The current globals table (`_G` / `_ENV` source for new chunks).
1220    pub fn globals(&self) -> Gc<Table> {
1221        self.globals
1222    }
1223
1224    /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225    /// Library code that pushes a known number of fresh slots — e.g.
1226    /// `table.unpack` returning N values — consults this to refuse when
1227    /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228    /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229    /// coroutine's already-built table eats a few slots, so an unpack of
1230    /// ~lim values can't fit.
1231    pub(crate) fn stack_room(&self) -> i64 {
1232        PUC_MAXSTACK - (self.stack.len() as i64)
1233    }
1234
1235    /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236    /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237    /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238    /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239    pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240        self.globals = env;
1241    }
1242
1243    /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244    /// 5.5). Determines numeric semantics, available standard libraries, and
1245    /// metamethod behavior.
1246    pub fn version(&self) -> LuaVersion {
1247        self.version
1248    }
1249
1250    /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251    /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252    /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253    /// `Gc<NativeClosure>` handle.
1254    ///
1255    /// Returns `Err(LuaError)` only if the globals table overflows
1256    /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257    /// String interning + key construction cannot fail.
1258    ///
1259    /// ```
1260    /// # use luna_core::vm::Vm;
1261    /// # use luna_core::version::LuaVersion;
1262    /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263    /// vm.set_global("answer", 42).unwrap();
1264    /// vm.set_global("ratio", 0.5_f64).unwrap();
1265    /// vm.set_global("hello", "world").unwrap();
1266    /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267    /// assert_eq!(r.len(), 3);
1268    /// ```
1269    pub fn set_global<V: crate::vm::IntoValue>(
1270        &mut self,
1271        name: &str,
1272        v: V,
1273    ) -> Result<(), LuaError> {
1274        let v = v.into_value(self);
1275        let k = Value::Str(self.heap.intern(name.as_bytes()));
1276        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277        unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278        self.heap
1279            .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280        Ok(())
1281    }
1282
1283    /// Backward write barrier shorthand for native lib code: demote `t` from
1284    /// BLACK back to gray so the next propagate step re-traces its fields.
1285    /// No-op outside Propagate (parent is never BLACK at mutation time).
1286    pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287        self.heap
1288            .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289    }
1290
1291    /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292    /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293    /// No-op outside Propagate.
1294    pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295        self.heap
1296            .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297    }
1298
1299    /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300    /// under non-MacroLua dialects (the macro is stored but the load
1301    /// path only consults the registry when
1302    /// `self.version == LuaVersion::MacroLua`).
1303    ///
1304    /// `name` is stored without the leading `@` — source code writes
1305    /// `@double(x)` to invoke a macro registered as `"double"`.
1306    pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307        self.macro_registry.register(name, m);
1308    }
1309
1310    /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311    /// Mostly useful for tests / dogfood resets.
1312    pub fn clear_macros(&mut self) {
1313        self.macro_registry.clear();
1314    }
1315
1316    /// Parse + compile a chunk and close it over the globals table.
1317    pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318        // Reject oversize input *before* handing the parser/lexer a
1319        // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320        // message keeps `heavy.lua::loadrep` compatibility: that test
1321        // accepts either `string length overflow` or `not enough memory`
1322        // as the failure mode for a feeder loop that outruns the host
1323        // allocator. See `set_loader_input_budget`.
1324        if src.len() > self.loader_input_budget {
1325            return Err(SyntaxError {
1326                line: 0,
1327                msg: b"not enough memory".to_vec(),
1328            });
1329        }
1330        // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331        let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332        if is_bytecode && !self.bytecode_loading {
1333            return Err(SyntaxError {
1334                line: 0,
1335                msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336            });
1337        }
1338        let proto = if is_bytecode {
1339            let allow_puc = self.puc_bytecode_loading;
1340            crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341                |msg| SyntaxError {
1342                    line: 0,
1343                    msg: msg.into_bytes(),
1344                },
1345            )?
1346        } else if self.version.is_macro_lua() {
1347            // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348            // token vec, run the macro expander pre-pass against the
1349            // per-Vm registry, then hand the rewritten stream to
1350            // `parse_tokens`. The AST + compiler are dialect-agnostic
1351            // because by this point all `@`/quote tokens are gone.
1352            let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353            let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354            loop {
1355                let t = lexer.next_token()?;
1356                let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357                raw.push(t);
1358                if eof {
1359                    break;
1360                }
1361            }
1362            // Drop the trailing Eof — expander operates on the body and
1363            // `parse_tokens` reinserts Eof when it runs out of tokens.
1364            raw.pop();
1365            let expanded = self.macro_registry.expand(raw)?;
1366            let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368        } else {
1369            let ast = parse(src, self.version)?;
1370            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371        };
1372        // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373        // upvalue with the globals table when the closure has *exactly* one
1374        // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375        // function with two-or-more upvalues keeps every cell at nil; the
1376        // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377        // :293's `assert(x() == nil)` pins this contract.
1378        let n = proto.upvals.len();
1379        let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380        if n == 0 {
1381            // synthetic main chunk has no declared upvalues, but the engine
1382            // still expects at least one cell so the host can probe via
1383            // `debug.upvalueid` etc. Match the historical luna shape.
1384            ups.push(
1385                self.heap
1386                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387            );
1388        } else if n == 1 {
1389            ups.push(
1390                self.heap
1391                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392            );
1393        } else {
1394            for _ in 0..n {
1395                ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396            }
1397        }
1398        Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399    }
1400
1401    /// Compile and run `src` as an anonymous chunk; return its results.
1402    /// Source name in the traceback is `"=eval"`. Syntax errors are
1403    /// surfaced as `LuaError` carrying the formatted PUC-style message
1404    /// (interned through the heap so the error value composes with
1405    /// `pcall` / `error_text` like any runtime error).
1406    pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407        self.eval_chunk(src, "=eval")
1408    }
1409
1410    /// Render an error value for messages/tests. Non-string errors —
1411    /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412    /// (`"(error object is a table value)"`); embedders that need
1413    /// structured payloads should inspect `e.0` directly. Errors whose
1414    /// text starts with `"native panic:"` indicate a Rust panic
1415    /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416    /// be dropped (do not reuse).
1417    pub fn error_text(&self, e: &LuaError) -> String {
1418        match e.0 {
1419            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420            v => format!("(error object is a {} value)", v.type_name()),
1421        }
1422    }
1423
1424    /// Call any callable value from the host (or from natives like pcall).
1425    pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426        // host-level entry (no enclosing exec): drop any error state from a
1427        // prior call that propagated uncaught (`error_traceback` would
1428        // otherwise leak into the next debug.traceback call).
1429        if self.public_call_depth == 0 {
1430            self.error_traceback = None;
1431        }
1432        self.public_call_depth += 1;
1433        // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434        // chunk whose body fits the S1 int-arith whitelist short-circuits
1435        // the whole interpreter dispatch and runs straight through the
1436        // mmap'd native code. The lookup is one Cell::get + one match —
1437        // the slow path (compile attempt on first reach) is paid once per
1438        // Proto.
1439        if args.is_empty()
1440            && let Value::Closure(cl) = f
1441            && let Some(vs) = self.try_jit_call(cl)
1442        {
1443            self.public_call_depth -= 1;
1444            return Ok(vs);
1445        }
1446        let r = self.call_value_impl(f, args, true);
1447        self.public_call_depth -= 1;
1448        r
1449    }
1450
1451    /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452    /// `Some(values)` when the cached native fn is callable for a
1453    /// zero-arg call. (Non-zero-arg dispatch is handled by
1454    /// `try_jit_call_op` from inside `begin_call`.)
1455    fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456        use crate::runtime::function::JitProtoState;
1457        if !self.jit.enabled {
1458            return None;
1459        }
1460        let proto = cl.proto;
1461        if let JitProtoState::Untried = proto.jit.get() {
1462            self.populate_jit_cache(proto);
1463        }
1464        match proto.jit.get() {
1465            JitProtoState::Compiled {
1466                entry,
1467                num_args: 0,
1468                returns_one,
1469                arg_float_mask: _,
1470                arg_table_mask: _,
1471                ret_is_float,
1472                ret_is_table,
1473            } => {
1474                // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475                let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476                // P11-S5c / S5d.J — install the active Vm + closure
1477                // for any Rust helper the JIT'd code may call (e.g.
1478                // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479                // cranelift `Linkage::Import`. RAII clear on return.
1480                // Chunks with no upvalue reads don't touch the closure
1481                // slot, paying nothing.
1482                // v1.1 A1 Session A — route through chunk_compiler so
1483                // the NullJitBackend path stays inert. Raw-ptr arg
1484                // avoids the &mut self borrow conflict against the
1485                // shared self.jit.chunk_compiler read.
1486                let vm_ptr: *mut Vm = self;
1487                let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489                let r = unsafe { f() };
1490                drop(_jit_vm_guard);
1491                // P11-S5d.E' — a JIT helper may have detected a metatable
1492                // on a table operand and parked a deopt request here.
1493                // Discard the sentinel value and return None so the caller
1494                // re-runs the call through the interpreter, which honours
1495                // __index/__newindex.
1496                if self.jit.pending_err.take().is_some() {
1497                    return None;
1498                }
1499                Some(if returns_one {
1500                    let v = if ret_is_float {
1501                        Value::Float(f64::from_bits(r as u64))
1502                    } else if ret_is_table {
1503                        Value::Table(crate::runtime::Gc::from_ptr(
1504                            r as *mut crate::runtime::Table,
1505                        ))
1506                    } else {
1507                        Value::Int(r)
1508                    };
1509                    vec![v]
1510                } else {
1511                    Vec::new()
1512                })
1513            }
1514            // Non-zero-arg Compiled state: call_value's empty-args
1515            // fast path can't drive it. Op::Call handles those.
1516            JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517        }
1518    }
1519
1520    /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521    /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522    /// states (call sites guard with a get before invoking).
1523    ///
1524    /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525    /// `proto.code`. Compiled artefacts live in the thread-local
1526    /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527    /// `Vm`s loading the same source skip the cranelift compile step
1528    /// entirely.
1529    fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530        use crate::runtime::function::JitProtoState;
1531        let version = self.version();
1532        let pre53 = version <= crate::version::LuaVersion::Lua53;
1533        // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534        // are Float). The JIT's `GetUpval` ValueRead path uses this
1535        // to default-pin upvalue reads to Float without a tag check.
1536        let float_only = version <= crate::version::LuaVersion::Lua52;
1537        // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538        // trait method can take `&mut dyn JitStorage` without
1539        // double-borrowing self.jit.
1540        let jit = &mut self.jit;
1541        let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542        match jit
1543            .chunk_compiler
1544            .try_compile(storage, proto, pre53, float_only)
1545        {
1546            crate::jit::CompileResult::Compiled {
1547                entry,
1548                num_args,
1549                returns_one,
1550                arg_float_mask,
1551                arg_table_mask,
1552                ret_is_float,
1553                ret_is_table,
1554            } => {
1555                proto.jit.set(JitProtoState::Compiled {
1556                    entry,
1557                    num_args,
1558                    returns_one,
1559                    arg_float_mask,
1560                    arg_table_mask,
1561                    ret_is_float,
1562                    ret_is_table,
1563                });
1564            }
1565            crate::jit::CompileResult::Skipped => {
1566                proto.jit.set(JitProtoState::Failed);
1567            }
1568        }
1569    }
1570
1571    /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572    /// before `push_frame`. Returns `true` when the call was handled
1573    /// in-place (no new Lua frame). Constraints: every arg slot must
1574    /// be `Value::Int`, the cached arity must match the call site's
1575    /// `nargs`, the host wanted-count `wanted` is honoured by
1576    /// `finish_results`. Also bails when a debug hook is armed —
1577    /// JIT'd code does not fire line / call / return hooks, so any
1578    /// active hook makes the interpreter the source of truth.
1579    fn try_jit_call_op(
1580        &mut self,
1581        cl: Gc<LuaClosure>,
1582        func_slot: u32,
1583        nargs: u32,
1584        wanted: i32,
1585    ) -> bool {
1586        use crate::runtime::function::JitProtoState;
1587        if !self.jit.enabled {
1588            return false;
1589        }
1590        // Any active debug hook means the interpreter has to run the
1591        // call so the hook gets the expected events.
1592        if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593            return false;
1594        }
1595        let proto = cl.proto;
1596        if let JitProtoState::Untried = proto.jit.get() {
1597            self.populate_jit_cache(proto);
1598        }
1599        let JitProtoState::Compiled {
1600            entry,
1601            num_args,
1602            returns_one,
1603            arg_float_mask,
1604            arg_table_mask,
1605            ret_is_float,
1606            ret_is_table,
1607        } = proto.jit.get()
1608        else {
1609            return false;
1610        };
1611        if num_args as u32 != nargs {
1612            return false;
1613        }
1614        // Pack args into i64 bit-patterns per the per-slot expected
1615        // kind. A Float-typed slot accepts Value::Float verbatim and
1616        // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617        // accepts only Value::Table and passes the raw Gc ptr; an
1618        // Int-typed slot accepts only Value::Int. Any other shape
1619        // bails to the interpreter so the call's actual dynamics
1620        // (metamethod dispatch / type-coerce) take over.
1621        let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622            [0; crate::jit::MAX_JIT_ARITY as usize];
1623        for i in 0..num_args as usize {
1624            let v = self.stack[(func_slot + 1) as usize + i];
1625            let want_float = (arg_float_mask >> i) & 1 == 1;
1626            let want_table = (arg_table_mask >> i) & 1 == 1;
1627            args[i] = match (want_table, want_float, v) {
1628                (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629                (false, false, Value::Int(x)) => x,
1630                (false, true, Value::Float(f)) => f.to_bits() as i64,
1631                (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632                _ => return false,
1633            };
1634        }
1635        // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636        // matching guard in `try_jit_call`.
1637        // v1.1 A1 Session A — route through chunk_compiler.
1638        let vm_ptr: *mut Vm = self;
1639        let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640        // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641        let r = unsafe {
1642            match num_args {
1643                0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644                1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645                2 => {
1646                    (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647                }
1648                3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649                    args[0], args[1], args[2],
1650                ),
1651                4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652                    args[0], args[1], args[2], args[3],
1653                ),
1654                _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655            }
1656        };
1657        drop(_jit_vm_guard);
1658        // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659        // flagged a metatable on a table operand; bail to the interpreter
1660        // so `push_frame` runs the call from scratch.
1661        if self.jit.pending_err.take().is_some() {
1662            return false;
1663        }
1664        // Write result at func_slot, replacing the closure value, then
1665        // hand to finish_results to pad/truncate per the call site's
1666        // `wanted` count.
1667        if returns_one {
1668            let v = if ret_is_float {
1669                Value::Float(f64::from_bits(r as u64))
1670            } else if ret_is_table {
1671                Value::Table(crate::runtime::Gc::from_ptr(
1672                    r as *mut crate::runtime::Table,
1673                ))
1674            } else {
1675                Value::Int(r)
1676            };
1677            self.stack[func_slot as usize] = v;
1678            self.finish_results(func_slot, 1, wanted);
1679        } else {
1680            self.finish_results(func_slot, 0, wanted);
1681        }
1682        true
1683    }
1684
1685    /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686    /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687    /// invokes it inside that ci), so it is called with `from_c = false`: its
1688    /// debug parent is the closing function, not a synthetic C level.
1689    fn call_value_impl(
1690        &mut self,
1691        f: Value,
1692        args: &[Value],
1693        from_c: bool,
1694    ) -> Result<Vec<Value>, LuaError> {
1695        if self.c_depth >= MAX_C_DEPTH {
1696            return Err(self.rt_err("stack overflow"));
1697        }
1698        self.c_depth += 1;
1699        let func_slot = self.stack.len() as u32;
1700        self.stack.push(f);
1701        self.stack.extend_from_slice(args);
1702        self.top = self.stack.len() as u32;
1703        let r = self.call_at(func_slot, args.len() as u32, from_c);
1704        self.c_depth -= 1;
1705        if r.is_err()
1706            && self.yielding.is_none()
1707            && self.terminating.is_none()
1708            && !self.host_yield_pending
1709            && self.pending_async_native_fut.is_none()
1710        {
1711            // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712            // Rust stack, but the suspended coroutine's frames/registers (which
1713            // sit at/above `func_slot`) must survive for the next resume — so we
1714            // only truncate on a real error. A self-close termination is in the
1715            // same boat: the dying thread's state is discarded wholesale.
1716            // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717            // the same boat as `yielding`: the next `EvalFuture::poll`
1718            // resumes the same call, so the in-flight frames must
1719            // survive.
1720            self.stack.truncate(func_slot as usize);
1721            self.top = func_slot;
1722        }
1723        r
1724    }
1725
1726    /// Invoke `f` with the running thread marked non-yieldable for the duration
1727    /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728    /// boundary and errors instead of suspending. Used by library callbacks
1729    /// (sort comparator, gsub replacement) that run via synchronous Rust
1730    /// recursion and so could not be re-entered after a yield.
1731    pub(crate) fn call_noyield(
1732        &mut self,
1733        f: Value,
1734        args: &[Value],
1735    ) -> Result<Vec<Value>, LuaError> {
1736        self.nny += 1;
1737        let r = self.call_value(f, args);
1738        self.nny -= 1;
1739        r
1740    }
1741
1742    // ---- coroutines (P05) ----
1743
1744    pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745        // The new coroutine inherits the creating thread's current globals
1746        // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747        // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748        // it here picks the creator regardless of which coro is running.
1749        self.heap.new_coro(body, self.globals)
1750    }
1751
1752    /// Is `t` the thread whose context is currently live in the VM?
1753    pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754        match (self.current, t) {
1755            (None, None) => true,
1756            (Some(a), Some(b)) => a.ptr_eq(b),
1757            _ => false,
1758        }
1759    }
1760
1761    /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762    /// stack if that thread is current, else its saved context).
1763    #[doc(hidden)]
1764    pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765        let s = slot as usize;
1766        if self.is_current_thread(thread) {
1767            self.stack[s]
1768        } else {
1769            match thread {
1770                Some(co) => co.stack[s],
1771                None => self.main_ctx.as_ref().expect("main context").stack[s],
1772            }
1773        }
1774    }
1775
1776    fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777        let s = slot as usize;
1778        if self.is_current_thread(thread) {
1779            self.stack[s] = v;
1780        } else {
1781            match thread {
1782                Some(co) => {
1783                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784                    unsafe { co.as_mut() }.stack[s] = v;
1785                    // co.stack is traced by Coro::trace; demote co back to
1786                    // gray so propagate re-traces this slot if it was
1787                    // already black.
1788                    self.heap
1789                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790                }
1791                None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792            }
1793        }
1794    }
1795
1796    /// Whether `co` is the main thread's identity object.
1797    pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798        self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799    }
1800
1801    /// The status of `co` from the caller's view. The main thread's identity
1802    /// object has no stored status — it is "running" when nothing else runs,
1803    /// else "normal" (it resumed the active coroutine).
1804    pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805        if self.is_main_coro(co) {
1806            if self.current.is_none() {
1807                CoroStatus::Running
1808            } else {
1809                CoroStatus::Normal
1810            }
1811        } else {
1812            co.status
1813        }
1814    }
1815
1816    /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817    /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818    /// context. Handlers see the coroutine's death error (if it died by error)
1819    /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820    /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821    pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822        // re-entrant close: a __close handler closed its own coroutine while the
1823        // outer close is mid-flight (its context is live). Report success and let
1824        // the outer close finish — re-entering the swap would corrupt the stack.
1825        if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826            return Ok(None);
1827        }
1828        // A chain of coroutines whose `__close` handlers each close the previous
1829        // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830        // The calling handler's `call_value` has already pushed `c_depth` to the
1831        // cap, so here it reads as full first — report PUC's "C stack overflow"
1832        // before the next handler call would surface the plainer "stack overflow".
1833        if self.c_depth >= MAX_C_DEPTH {
1834            return Err(self.rt_err("C stack overflow"));
1835        }
1836        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837        let death_err = unsafe { co.as_mut() }.error_value.take();
1838        // swap the caller's live context out (into a GC-rooted home) and the
1839        // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840        // the coroutine's stack while everything stays rooted.
1841        let resumer = self.current;
1842        let rctx = self.take_ctx();
1843        match resumer {
1844            Some(r) => {
1845                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846                let m = unsafe { r.as_mut() };
1847                m.stack = rctx.stack;
1848                m.frames = rctx.frames;
1849                m.open_upvals = rctx.open_upvals;
1850                m.tbc = rctx.tbc;
1851                m.top = rctx.top;
1852                m.pcall_depth = rctx.pcall_depth;
1853            }
1854            None => self.main_ctx = Some(rctx),
1855        }
1856        self.load_coro_ctx(co);
1857        self.current = Some(co);
1858        let result = self.close_slots(0, death_err);
1859        // discard the (now-closed) coroutine context and restore the caller
1860        let _ = self.take_ctx();
1861        match resumer {
1862            Some(r) => {
1863                self.load_coro_ctx(r);
1864                self.current = Some(r);
1865            }
1866            None => {
1867                let m = self.main_ctx.take().expect("main context saved");
1868                self.put_ctx(m);
1869                self.current = None;
1870            }
1871        }
1872        {
1873            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874            let m = unsafe { co.as_mut() };
1875            m.status = CoroStatus::Dead;
1876            m.stack = Vec::new();
1877            m.frames = Vec::new();
1878            m.open_upvals = Vec::new();
1879            m.tbc = Vec::new();
1880            m.top = 0;
1881            m.pcall_depth = 0;
1882            m.resume_at = None;
1883            m.error_value = None;
1884        }
1885        result.map(|()| death_err)
1886    }
1887
1888    /// `coroutine.running`: the running thread plus whether it is the main one.
1889    pub(crate) fn running_thread(&self) -> (Value, bool) {
1890        match self.current {
1891            Some(co) => (Value::Coro(co), false),
1892            None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893        }
1894    }
1895
1896    /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897    /// thread) can yield. The main thread never can; any other coroutine can
1898    /// unless it is dead.
1899    pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900        match co {
1901            Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902            // the running thread can yield only outside any non-yieldable C call
1903            None => self.current.is_some() && self.nny == 0,
1904        }
1905    }
1906
1907    /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908    /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909    /// from "inside an unyieldable C call" (sort/gsub callback).
1910    pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911        if self.current.is_none() {
1912            Some("attempt to yield from outside a coroutine")
1913        } else if self.nny > 0 {
1914            Some("attempt to yield across a C-call boundary")
1915        } else {
1916            None
1917        }
1918    }
1919
1920    /// The coroutine whose context is currently live (`None` on the main thread).
1921    pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922        self.current
1923    }
1924
1925    /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926    /// its pending `__close` handlers, then signal termination. The handlers run
1927    /// here, in place, with the thread still non-yieldable (a yield in one hits
1928    /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929    /// way a yield does — `exec_with` propagates it past any protecting pcall
1930    /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931    /// clean death (or, if a handler raised, the coroutine's error).
1932    pub(crate) fn close_running(&mut self) -> LuaError {
1933        let death = match self.close_slots(0, None) {
1934            Ok(()) => None,
1935            Err(e) => Some(e.0),
1936        };
1937        self.terminating = Some(death);
1938        LuaError(Value::Nil)
1939    }
1940
1941    /// `coroutine.status` as seen by the caller.
1942    pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943        match self.effective_coro_status(co) {
1944            CoroStatus::Suspended => "suspended",
1945            CoroStatus::Running => "running",
1946            CoroStatus::Normal => "normal",
1947            CoroStatus::Dead => "dead",
1948        }
1949    }
1950
1951    fn take_ctx(&mut self) -> SavedCtx {
1952        let saved = SavedCtx {
1953            stack: std::mem::take(&mut self.stack),
1954            frames: std::mem::take(&mut self.frames),
1955            open_upvals: std::mem::take(&mut self.open_upvals),
1956            tbc: std::mem::take(&mut self.tbc),
1957            top: self.top,
1958            pcall_depth: self.pcall_depth,
1959            hook: self.hook,
1960            globals: self.globals,
1961        };
1962        self.frames_resync(); // P17-D Week 1 — frames now empty.
1963        saved
1964    }
1965
1966    fn put_ctx(&mut self, c: SavedCtx) {
1967        self.stack = c.stack;
1968        self.frames = c.frames;
1969        self.open_upvals = c.open_upvals;
1970        self.tbc = c.tbc;
1971        self.top = c.top;
1972        self.pcall_depth = c.pcall_depth;
1973        self.hook = c.hook;
1974        self.globals = c.globals;
1975        self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976    }
1977
1978    /// Move a coroutine's saved context into the live VM fields.
1979    fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981        let m = unsafe { co.as_mut() };
1982        self.stack = std::mem::take(&mut m.stack);
1983        self.frames = std::mem::take(&mut m.frames);
1984        self.open_upvals = std::mem::take(&mut m.open_upvals);
1985        self.tbc = std::mem::take(&mut m.tbc);
1986        self.top = m.top;
1987        self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988        self.pcall_depth = m.pcall_depth;
1989        self.hook = m.hook;
1990        self.globals = m.globals;
1991    }
1992
1993    /// Save the live VM context back into a coroutine object.
1994    fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995        let c = self.take_ctx();
1996        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997        let m = unsafe { co.as_mut() };
1998        m.stack = c.stack;
1999        m.frames = c.frames;
2000        m.open_upvals = c.open_upvals;
2001        m.tbc = c.tbc;
2002        m.top = c.top;
2003        m.pcall_depth = c.pcall_depth;
2004        m.hook = c.hook;
2005        m.globals = c.globals;
2006        // bulk-overwrite of every collectable field traced by Coro::trace:
2007        // demote the coro back to gray so propagate re-traces its new state.
2008        self.heap
2009            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010    }
2011
2012    /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013    /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014    /// error raised inside the coroutine (the coroutine becomes dead).
2015    pub(crate) fn resume_coro(
2016        &mut self,
2017        co: Gc<Coro>,
2018        args: Vec<Value>,
2019    ) -> Result<Vec<Value>, LuaError> {
2020        match co.status {
2021            CoroStatus::Suspended => {}
2022            CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
2023            _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
2024        }
2025        if self.c_depth >= MAX_C_DEPTH {
2026            return Err(self.rt_err("C stack overflow"));
2027        }
2028        self.c_depth += 1;
2029        let resumer = self.current;
2030        // save the resumer's live context away
2031        let rctx = self.take_ctx();
2032        match resumer {
2033            Some(r) => {
2034                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035                let m = unsafe { r.as_mut() };
2036                m.stack = rctx.stack;
2037                m.frames = rctx.frames;
2038                m.open_upvals = rctx.open_upvals;
2039                m.tbc = rctx.tbc;
2040                m.top = rctx.top;
2041                m.pcall_depth = rctx.pcall_depth;
2042                m.globals = rctx.globals;
2043                m.status = CoroStatus::Normal;
2044                // bulk overwrite of every traced field on r — mirror
2045                // store_coro_ctx's barrier_back so propagate re-traces r.
2046                self.heap
2047                    .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048            }
2049            None => self.main_ctx = Some(rctx),
2050        }
2051        // swap the coroutine in
2052        self.load_coro_ctx(co);
2053        {
2054            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055            let m = unsafe { co.as_mut() };
2056            m.status = CoroStatus::Running;
2057            m.resumer = resumer;
2058        }
2059        // co.resumer is a traced Gc field; barrier_back covers the new
2060        // resumer reference and any future field writes during this call.
2061        self.heap
2062            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063        self.current = Some(co);
2064
2065        // drive it
2066        let drive = if co.started {
2067            self.coro_continue(&args)
2068        } else {
2069            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070            unsafe { co.as_mut() }.started = true;
2071            self.coro_first(co.body, &args)
2072        };
2073
2074        // classify: a self-close termination or a pending yield each win over
2075        // the (sentinel) error they raised to unwind the Rust stack.
2076        let (outcome, status) = if let Some(death) = self.terminating.take() {
2077            // the coroutine closed itself: it dies now, cleanly or with the
2078            // error a `__close` handler raised.
2079            match death {
2080                Some(e) => {
2081                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082                    unsafe { co.as_mut() }.error_value = Some(e);
2083                    self.heap
2084                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085                    (Err(LuaError(e)), CoroStatus::Dead)
2086                }
2087                None => (Ok(Vec::new()), CoroStatus::Dead),
2088            }
2089        } else {
2090            match self.yielding.take() {
2091                Some((vals, fslot, nres)) => {
2092                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093                    unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094                    (Ok(vals), CoroStatus::Suspended)
2095                }
2096                None => {
2097                    // died: a return is clean, an error is remembered so a later
2098                    // `coroutine.close` can report it (PUC lua_closethread).
2099                    // Capture the error-point traceback (set by `unwind` before
2100                    // popping the failing frames) and prepend a synthetic
2101                    // top entry for the C native that initiated the error
2102                    // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103                    // on the dead coroutine still shows the error site
2104                    // (db.lua :848 family).
2105                    if drive.is_err() {
2106                        let mut tb = self.error_traceback.take().unwrap_or_default();
2107                        if let Some(nm) = self.errored_native.take() {
2108                            let mut prefixed: Vec<u8> = Vec::new();
2109                            prefixed.extend_from_slice(
2110                                format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111                            );
2112                            prefixed.extend(tb);
2113                            tb = prefixed;
2114                        }
2115                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116                        unsafe { co.as_mut() }.error_traceback = Some(tb);
2117                    }
2118                    if let Err(e) = drive {
2119                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120                        unsafe { co.as_mut() }.error_value = Some(e.0);
2121                        self.heap
2122                            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123                    }
2124                    (drive, CoroStatus::Dead)
2125                }
2126            }
2127        };
2128
2129        // save the coroutine's context back and restore the resumer
2130        self.store_coro_ctx(co);
2131        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132        unsafe { co.as_mut() }.status = status;
2133        match resumer {
2134            Some(r) => {
2135                self.load_coro_ctx(r);
2136                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137                unsafe { r.as_mut() }.status = CoroStatus::Running;
2138                self.current = Some(r);
2139            }
2140            None => {
2141                let m = self.main_ctx.take().expect("main context saved");
2142                self.put_ctx(m);
2143                self.current = None;
2144            }
2145        }
2146        self.c_depth -= 1;
2147        outcome
2148    }
2149
2150    /// First resume: install the body function at slot 0 and run.
2151    fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152        self.stack.clear();
2153        self.stack.push(body);
2154        self.stack.extend_from_slice(args);
2155        self.top = self.stack.len() as u32;
2156        match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157            Ok(true) => self.exec_with(1),
2158            Ok(false) => Ok(self.take_results(0)),
2159            Err(e) => Err(e),
2160        }
2161    }
2162
2163    /// Resume after a yield: deliver `args` as the results of the call that
2164    /// yielded, then continue the suspended thread.
2165    fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166        let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167        let n = args.len() as u32;
2168        // Restore the full register window of the suspended top frame: a yield
2169        // that unwound through a native (call_value) may have left the stack
2170        // shorter than the frame needs. `base + max_stack` is what push_frame
2171        // allocates; `fslot + n` covers the delivered yield results.
2172        let frame_need = self
2173            .frames
2174            .last()
2175            .and_then(CallFrame::lua)
2176            .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177            .unwrap_or(0);
2178        let need = frame_need.max((fslot + n) as usize);
2179        if self.stack.len() < need {
2180            self.stack.resize(need, Value::Nil);
2181        }
2182        for (i, &v) in args.iter().enumerate() {
2183            self.stack[fslot as usize + i] = v;
2184        }
2185        self.finish_results(fslot, n, nres);
2186        // the suspended `coroutine.yield` (a C call) now returns its resume
2187        // values: fire the matching "return" hook PUC defers until the resume.
2188        self.hook_return(true, 1, n)?;
2189        self.exec_with(1)
2190    }
2191
2192    /// `coroutine.yield`: suspend the running coroutine, recording where to
2193    /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194    /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195    pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196        let nres = self.native_nresults;
2197        self.yielding = Some((vals, func_slot, nres));
2198        // value is irrelevant: resume_coro consults `self.yielding`, not this
2199        LuaError(Value::Nil)
2200    }
2201
2202    /// Install or clear the debug hook on the running thread (`debug.sethook`
2203    /// without a thread argument). Arms the calling frame's `oldpc` to the
2204    /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205    /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206    /// native return: the very next traceexec compares against the sethook
2207    /// CALL's line. When the install statement and the following statement are
2208    /// on different source lines (db.lua :322), `changedline` fires for that
2209    /// first statement; when they share a line (db.lua :25 wrapper), they do
2210    /// not, so the wrapper line is not re-fired.
2211    pub(crate) fn install_hook(&mut self, hook: HookState) {
2212        self.hook = hook;
2213        if self.hook.line
2214            && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215        {
2216            f.hook_oldpc = f.pc.saturating_sub(1);
2217        }
2218    }
2219
2220    /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221    /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222    /// with an optional thread argument.
2223    ///
2224    /// `target == None` means "no explicit thread argument" — PUC binds that
2225    /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226    /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227    /// of whether that's the main thread or a currently-resumed coroutine
2228    /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229    /// `store_coro_ctx`). So a `None` target should always route to
2230    /// `install_hook` on the live fields. The pre-fix predicate gate
2231    /// `is_current_thread(target)` returned `false` when running inside a
2232    /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233    /// and silently dropped the hook on the floor — the install happened on
2234    /// no thread at all.
2235    pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236        if target.is_none() || self.is_current_thread(target) {
2237            self.install_hook(state);
2238        } else if let Some(co) = target {
2239            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240            let m = unsafe { co.as_mut() };
2241            m.hook = state;
2242            if state.line
2243                && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244            {
2245                f.hook_oldpc = u32::MAX;
2246            }
2247            // co.hook.func is a traced Value (Coro::trace covers it); demote
2248            // co back to gray so propagate sees the new hook function.
2249            self.heap
2250                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251        }
2252    }
2253
2254    /// The hook state of `target` (`None`/current → the live VM state).
2255    pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256        match target {
2257            t if self.is_current_thread(t) => self.hook,
2258            Some(co) => co.hook,
2259            None => self.hook,
2260        }
2261    }
2262
2263    /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264    /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265    /// discarded so the interrupted frame's register window is untouched.
2266    /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267    fn run_hook(
2268        &mut self,
2269        event: &[u8],
2270        line: Option<i64>,
2271        from_native: bool,
2272    ) -> Result<(), LuaError> {
2273        // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274        // synchronous fn pointer call). Both Rust and Lua hooks may be
2275        // installed; both observe each event.
2276        if let Some(rh) = self.hook.rust_func {
2277            let evt = match event {
2278                b"call" => Some(RustHookEvent::Call),
2279                b"return" => Some(RustHookEvent::Return),
2280                b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281                b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282                b"count" => Some(RustHookEvent::Count),
2283                _ => None,
2284            };
2285            if let Some(evt) = evt {
2286                let was_in_hook = self.in_hook;
2287                self.in_hook = true;
2288                rh(self, evt);
2289                self.in_hook = was_in_hook;
2290            }
2291        }
2292        let Some(hook) = self.hook.func else {
2293            return Ok(());
2294        };
2295        let saved_top = self.top;
2296        let saved_len = self.stack.len();
2297        let name = Value::Str(self.heap.intern(event));
2298        let lv = line.map_or(Value::Nil, Value::Int);
2299        self.in_hook = true;
2300        // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301        // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302        // ci sat below `hookf` (the function being hooked). When that hooked
2303        // function is native, no Lua frame for it exists in luna's `frames`;
2304        // model it as a synthetic C level by pushing the hook with
2305        // `from_c = true` (then `c_frame_name` reads the caller's call
2306        // instruction → e.g. `name = "sethook"`). When the hooked function is
2307        // Lua (its frame is still on the stack), push with `from_c = false`
2308        // so the level descent lands on it directly. The hook's own frame
2309        // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310        // (PUC `CIST_HOOKED`).
2311        self.pending_is_hook = true;
2312        let r = self.call_value_impl(hook, &[name, lv], from_native);
2313        self.pending_is_hook = false;
2314        self.in_hook = false;
2315        self.stack.truncate(saved_len);
2316        self.top = saved_top;
2317        r.map(|_| ())
2318    }
2319
2320    /// Fire the "call" hook on entry to a function, if armed and not already in
2321    /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322    /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323    /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324    /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325    fn hook_call_with(
2326        &mut self,
2327        from_native: bool,
2328        nargs: u32,
2329        is_tail: bool,
2330    ) -> Result<(), LuaError> {
2331        if self.hook.call
2332            && !self.in_hook
2333            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334        {
2335            self.hook_ftransfer = 1;
2336            self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337            // PUC 5.1 didn't distinguish tail-call events — every call,
2338            // including tail-calls, fired plain `"call"`. 5.2 introduced
2339            // the separate `"tail call"` event (mask `"c"` covers both).
2340            // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341            // "return","tail return","return","tail return"}`.
2342            let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343                b"tail call"
2344            } else {
2345                b"call"
2346            };
2347            self.run_hook(event, None, from_native)?;
2348        }
2349        Ok(())
2350    }
2351
2352    pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353        self.hook_call_with(from_native, nargs, false)
2354    }
2355
2356    /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357    /// the first result slot relative to the activation's func slot, ntransfer
2358    /// the number of results.
2359    pub(crate) fn hook_return(
2360        &mut self,
2361        from_native: bool,
2362        ftransfer: u32,
2363        nresults: u32,
2364    ) -> Result<(), LuaError> {
2365        if self.hook.ret
2366            && !self.in_hook
2367            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368        {
2369            self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370            self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371            self.run_hook(b"return", None, from_native)?;
2372        }
2373        Ok(())
2374    }
2375
2376    /// PUC "tail return" event — fires once per tail call that collapsed
2377    /// into the activation now returning, *after* its own "return" event.
2378    /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379    fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380        if self.hook.ret
2381            && !self.in_hook
2382            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383        {
2384            self.run_hook(b"tail return", None, false)?;
2385        }
2386        Ok(())
2387    }
2388
2389    /// Call a metamethod with a single expected result.
2390    fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391        let mut r = self.call_value(f, args)?;
2392        Ok(if r.is_empty() {
2393            Value::Nil
2394        } else {
2395            r.swap_remove(0)
2396        })
2397    }
2398
2399    /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400    /// driven through the interpreter loop with a `Meta` continuation, so a
2401    /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402    /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403    /// Returns to the caller with the call set up — the opcode arm must do no
2404    /// further work on the running frame and let the loop iterate. `tm` is
2405    /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406    /// born from this call inherits it via `pending_tm`, so
2407    /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408    /// (db.lua :878).
2409    fn begin_meta_call(
2410        &mut self,
2411        func: Value,
2412        args: &[Value],
2413        action: MetaAction,
2414        tm: &'static str,
2415    ) -> Result<(), LuaError> {
2416        let saved_top = self.top;
2417        let cont_slot = self.stack.len() as u32;
2418        self.stack.push(func);
2419        self.stack.extend_from_slice(args);
2420        self.top = self.stack.len() as u32;
2421        frames_push_sync(
2422            &mut self.frames,
2423            &mut self.frames_top,
2424            CallFrame::Cont(NativeCont {
2425                kind: ContKind::Meta(MetaCont { action, saved_top }),
2426                func_slot: cont_slot,
2427                nresults: 1,
2428            }),
2429        );
2430        let saved_tm = self.pending_tm.replace(tm);
2431        // begin_call drives a Lua metamethod through the loop (returns true) or
2432        // runs a native one inline (returns false, leaving results at cont_slot
2433        // for the loop head to pick up); either way the Meta cont resolves there.
2434        let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435        // Native callees never consumed pending_tm (push_frame is only hit on
2436        // a Lua callee); restore so it doesn't leak to a later push_frame.
2437        self.pending_tm = saved_tm;
2438        r?;
2439        Ok(())
2440    }
2441
2442    /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443    fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444        match self.index_step(t, key)? {
2445            MmOut::Done(v) => self.stack[dst as usize] = v,
2446            MmOut::Mm { func, recv } => {
2447                self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2448            }
2449            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2450        }
2451        Ok(())
2452    }
2453
2454    /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2455    fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2456        match self.newindex_step(t, key, v)? {
2457            MmOut::Done(_) => {}
2458            MmOut::Mm { func, recv } => {
2459                self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2460            }
2461            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2462        }
2463        Ok(())
2464    }
2465
2466    /// Apply a comparison opcode's outcome: a known boolean drives the
2467    /// conditional skip directly; a metamethod is called yieldably, its
2468    /// truthiness driving the skip on return.
2469    fn op_compare(
2470        &mut self,
2471        step: MmOut,
2472        l: Value,
2473        r: Value,
2474        k: bool,
2475        tm: &'static str,
2476    ) -> Result<(), LuaError> {
2477        match step {
2478            MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2479            MmOut::Mm { func, .. } => {
2480                self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2481            }
2482            MmOut::CompareSynth { func } => {
2483                // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2484                // negation are driven through `MetaAction::Compare` so the
2485                // metamethod call can yield like any other compare.
2486                self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2487            }
2488        }
2489        Ok(())
2490    }
2491
2492    /// Complete a VM instruction whose metamethod just returned `result` (PUC
2493    /// `luaV_finishOp`). The running frame is already back on top.
2494    fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2495        match action {
2496            MetaAction::Store { dst } => self.stack[dst as usize] = result,
2497            MetaAction::Discard => {}
2498            MetaAction::Compare { k, negate } => {
2499                let t = if negate {
2500                    !result.truthy()
2501                } else {
2502                    result.truthy()
2503                };
2504                self.cond_skip(t, k);
2505            }
2506            MetaAction::Concat { dst, base_a } => {
2507                self.stack[dst as usize] = result;
2508                self.top = dst + 1;
2509                self.concat_run(base_a)?;
2510            }
2511        }
2512        Ok(())
2513    }
2514
2515    // ---- metatables ----
2516
2517    pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2518        match v {
2519            Value::Table(t) => t.metatable(),
2520            Value::Userdata(u) => u.metatable(),
2521            v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2522        }
2523    }
2524
2525    /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2526    /// non-table). No-op for tables (they carry their own).
2527    pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2528        if let Some(i) = type_mt_slot(v) {
2529            self.type_mt[i] = mt;
2530        }
2531    }
2532
2533    /// The metamethod of `v` for `mm`, or nil.
2534    pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2535        match self.metatable_of(v) {
2536            Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2537            None => Value::Nil,
2538        }
2539    }
2540
2541    /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2542    /// only fires when both operands carry a metatable that exposes the same
2543    /// implementation. Returns the metamethod to call, or `Nil` when no
2544    /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2545    /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2546    pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2547        let mt1 = self.metatable_of(l);
2548        let Some(mt1) = mt1 else { return Value::Nil };
2549        let key = Value::Str(self.mm_names[mm as usize]);
2550        let tm1 = mt1.get(key);
2551        if tm1.is_nil() {
2552            return Value::Nil;
2553        }
2554        let mt2 = self.metatable_of(r);
2555        let Some(mt2) = mt2 else { return Value::Nil };
2556        if mt1.as_ptr() == mt2.as_ptr() {
2557            return tm1;
2558        }
2559        let tm2 = mt2.get(key);
2560        if tm2.is_nil() {
2561            return Value::Nil;
2562        }
2563        if tm1.raw_eq(tm2) {
2564            return tm1;
2565        }
2566        Value::Nil
2567    }
2568
2569    /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2570    /// or full userdata whose metatable carries a string `__name` reports that
2571    /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2572    pub(crate) fn obj_typename(&self, v: Value) -> String {
2573        if matches!(v, Value::Table(_) | Value::Userdata(_))
2574            && let Value::Str(s) = self.get_mm(v, Mm::Name)
2575        {
2576            return String::from_utf8_lossy(s.as_bytes()).into_owned();
2577        }
2578        v.type_name().to_string()
2579    }
2580
2581    fn call_at(
2582        &mut self,
2583        func_slot: u32,
2584        nargs: u32,
2585        from_c: bool,
2586    ) -> Result<Vec<Value>, LuaError> {
2587        if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2588            self.exec()
2589        } else {
2590            // native completed inline; results at func_slot..top
2591            Ok(self.take_results(func_slot))
2592        }
2593    }
2594
2595    /// Switch the `collectgarbage` mode, returning the previous mode name.
2596    pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2597        std::mem::replace(&mut self.gc_mode, new)
2598    }
2599
2600    /// Whether the current `collectgarbage` mode is "generational" (where a
2601    /// "step" is a minor collection — a full atomic pass — rather than a paced
2602    /// incremental sweep).
2603    pub(crate) fn gc_mode_is_generational(&self) -> bool {
2604        self.gc_mode == "generational"
2605    }
2606
2607    /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2608    /// completes a whole cycle at once).
2609    pub(crate) fn gc_stepsize(&self) -> i64 {
2610        self.gc_stepsize
2611    }
2612
2613    /// `collectgarbage("param", name [,value])`: read (or set, returning the
2614    /// previous value of) a pacing parameter. Returns `None` for an unknown
2615    /// name so the caller can raise PUC's `invalid parameter` error. The
2616    /// collector is stop-the-world, so these only round-trip for API fidelity.
2617    pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2618        let slot = match name {
2619            b"pause" => &mut self.gc_pause,
2620            b"stepmul" => &mut self.gc_stepmul,
2621            b"stepsize" => &mut self.gc_stepsize,
2622            _ => return None,
2623        };
2624        let prev = *slot;
2625        if let Some(v) = set {
2626            *slot = v;
2627        }
2628        Some(prev)
2629    }
2630
2631    /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2632    /// paced sweep via `Vm::gc_step`.
2633    ///
2634    /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2635    /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2636    /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2637    /// suspected UAF is structurally closed — born objects no longer become
2638    /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2639    ///
2640    /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2641    /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2642    /// O(SWEEP_DIVISOR) safe-points regardless of size.
2643    #[inline(always)]
2644    pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2645        if self.gc_finalizing {
2646            return;
2647        }
2648        if !self.heap.gc_due() {
2649            return;
2650        }
2651        // v2.2 UAF-A fix: the historical `gc_top = live_top` narrowed
2652        // past slots that prior bytecode left holding Gc-bearing
2653        // Values (slots are never auto-cleared on frame pop, only
2654        // overwritten). The narrow GC swept the closure, the slot
2655        // kept the stale `Value::Closure`, and a later wider GC
2656        // OOB'd in `Marker::header`. Use `max(live_top, self.top)`
2657        // — `self.top` is the multi-result top maintained across
2658        // calls/returns, so it leads the live frontier closely
2659        // enough to cover stale closure refs without over-rooting
2660        // the whole `Vec` (which broke gc.lua / db.lua weak-table
2661        // semantics).
2662        self.gc_top = live_top.max(self.top);
2663        // PUC stepmul: % of allocation rate. Higher = more GC work per
2664        // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2665        // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2666        const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2667        const MIN_BUDGET: usize = 64_000;
2668        let stepmul = self.gc_stepmul.max(1) as usize;
2669        let divisor = (SWEEP_BASE / stepmul).max(1);
2670        let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2671        if self.gc_step(budget) {
2672            self.heap.rearm_gc_pause(self.gc_pause);
2673        }
2674    }
2675
2676    /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2677    /// roots (open upvalues, which are not first-class Values). Shared by the
2678    /// full collector and the incremental-sweep driver so both snapshot the
2679    /// exact same live set.
2680    fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2681        let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2682        roots.push(Value::Table(self.globals));
2683        for mt in self.type_mt.into_iter().flatten() {
2684            roots.push(Value::Table(mt));
2685        }
2686        for &n in &self.mm_names {
2687            roots.push(Value::Str(n));
2688        }
2689        // root only the running thread's live registers (PUC marks [stack, top)):
2690        // freed temporaries above `gc_top` are excluded so weak values stranded
2691        // there are not pinned. Suspended threads (main_ctx, other coroutines)
2692        // stay whole-rooted below — safe over-rooting, and they are not the
2693        // thread whose weak-table loop is under test.
2694        let live = (self.gc_top as usize).min(self.stack.len());
2695        roots.extend_from_slice(&self.stack[..live]);
2696        for cf in &self.frames {
2697            match cf {
2698                CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2699                CallFrame::Cont(NativeCont {
2700                    kind: ContKind::Xpcall { handler },
2701                    ..
2702                }) => roots.push(*handler),
2703                CallFrame::Cont(NativeCont {
2704                    kind: ContKind::Close(cc),
2705                    ..
2706                }) => {
2707                    // Root the error threaded through this close chain so a
2708                    // `collectgarbage()` inside a sibling `__close` handler
2709                    // does not free it before the next handler is invoked
2710                    // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2711                    if let Some(e) = cc.pending {
2712                        roots.push(e);
2713                    }
2714                    if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2715                        roots.push(err);
2716                    }
2717                }
2718                CallFrame::Cont(_) => {}
2719            }
2720        }
2721        if let Some(e) = self.closing_err {
2722            roots.push(e);
2723        }
2724        // B12 host roots — Lua-facade handles keep their referenced
2725        // values alive across calls/yields. Trace the whole vector;
2726        // unused slots (post-`unpin_all`) carry Value::Nil which the
2727        // GC ignores.
2728        for slot in &self.host_roots {
2729            // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2730            roots.push(slot.value);
2731        }
2732        // v2.1 — `table.sort` and similar builtins stash their working
2733        // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2734        // comparator callback doesn't free strings/tables snapshotted
2735        // off the live table (sort.lua's `load(..)(); collectgarbage()`
2736        // compare regression).
2737        for buf in &self.sort_scratch {
2738            roots.extend_from_slice(buf);
2739        }
2740        // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2741        // mid-execution. Without rooting them here, a `collectgarbage()`
2742        // invoked inside the running native (sort.lua AA `load(..)();
2743        // collectgarbage()` compare callback regression) sweeps the
2744        // closure that's actively executing, leaving `nc.upvals`
2745        // dangling and the Rust local `nc` pointing at recycled memory
2746        // — the SIGSEGV pops on the very next field access or pop.
2747        for &nc in &self.running_natives {
2748            roots.push(Value::Native(nc));
2749        }
2750        // the running thread's debug hook (suspended threads root theirs via
2751        // Coro::trace / the main_ctx sweep below)
2752        if let Some(h) = self.hook.func {
2753            roots.push(h);
2754        }
2755        // the running coroutine (its saved-context fields live in the VM, but
2756        // the object itself + its resumer chain must stay reachable)
2757        if let Some(co) = self.current {
2758            roots.push(Value::Coro(co));
2759        }
2760        if let Some(mc) = self.main_coro {
2761            roots.push(Value::Coro(mc));
2762        }
2763        // debug.getregistry() and io library state
2764        if let Some(r) = self.registry {
2765            roots.push(Value::Table(r));
2766        }
2767        if let Some(mt) = self.file_mt {
2768            roots.push(Value::Table(mt));
2769        }
2770        if let Some(f) = self.io_input {
2771            roots.push(Value::Userdata(f));
2772        }
2773        if let Some(f) = self.io_output {
2774            roots.push(Value::Userdata(f));
2775        }
2776        // the main thread's saved context while a coroutine runs
2777        if let Some(m) = &self.main_ctx {
2778            roots.extend_from_slice(&m.stack);
2779            if let Some(h) = m.hook.func {
2780                roots.push(h);
2781            }
2782            for cf in &m.frames {
2783                match cf {
2784                    CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2785                    CallFrame::Cont(NativeCont {
2786                        kind: ContKind::Xpcall { handler },
2787                        ..
2788                    }) => roots.push(*handler),
2789                    CallFrame::Cont(_) => {}
2790                }
2791            }
2792        }
2793        let mut extra: Vec<*mut GcHeader> = self
2794            .open_upvals
2795            .iter()
2796            .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2797            .collect();
2798        if let Some(m) = &self.main_ctx {
2799            extra.extend(
2800                m.open_upvals
2801                    .iter()
2802                    .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2803            );
2804        }
2805        (roots, extra)
2806    }
2807
2808    /// Run a full collection with the VM's roots, then run any `__gc`
2809    /// finalizers the collection scheduled. A no-op (returns 0) when already
2810    /// inside a finalizer — the collector is not reentrant (PUC).
2811    pub fn collect_garbage(&mut self) -> usize {
2812        if self.gc_finalizing {
2813            return 0;
2814        }
2815        let (roots, extra) = self.gc_roots();
2816        let freed = self.heap.collect_ex(&roots, &extra);
2817        self.run_finalizers();
2818        freed
2819    }
2820
2821    /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2822    /// threw; gc.lua's "errors during collection" probe relies on it. This
2823    /// variant runs the same cycle but propagates the captured finalizer
2824    /// error to the explicit caller.
2825    pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2826        if self.gc_finalizing {
2827            return Ok(0);
2828        }
2829        let (roots, extra) = self.gc_roots();
2830        let freed = self.heap.collect_ex(&roots, &extra);
2831        self.run_finalizers_or_err()?;
2832        Ok(freed)
2833    }
2834
2835    /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2836    /// should report fail rather than collect).
2837    pub(crate) fn gc_is_finalizing(&self) -> bool {
2838        self.gc_finalizing
2839    }
2840
2841    /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2842    /// = true indicates more pieces follow (concatenated until the first
2843    /// `to_cont = false` call flushes the whole line). Mirrors
2844    /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2845    ///   * If the buffer is fresh, `to_cont` is false, and the message is
2846    ///     `@<word>`, treat as a control message — only `@on` / `@off` are
2847    ///     recognised; any other `@…` is silently ignored.
2848    ///   * Otherwise, while the state is `Off`, drop the piece; while `On`,
2849    ///     accumulate, and flush to stderr + `warn_log` on the
2850    ///     non-continuation call.
2851    pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2852        if self.warn_buf.is_empty()
2853            && !to_cont
2854            && let Some(b'@') = msg.first().copied()
2855        {
2856            match &msg[1..] {
2857                b"on" => self.warn_state = WarnState::On,
2858                b"off" => self.warn_state = WarnState::Off,
2859                _ => {} // unknown control — silently ignored (PUC checkcontrol)
2860            }
2861            return;
2862        }
2863        if self.warn_state == WarnState::Off {
2864            // drop continuation pieces too — PUC `warnfoff` is the trampoline
2865            return;
2866        }
2867        self.warn_buf.extend_from_slice(msg);
2868        if !to_cont {
2869            let line = std::mem::take(&mut self.warn_buf);
2870            eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2871            self.warn_log.push(line);
2872        }
2873    }
2874
2875    /// Drain the in-process warning log (one entry per emitted message, sans
2876    /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2877    /// assert on warn output without scraping stderr.
2878    pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2879        std::mem::take(&mut self.warn_log)
2880    }
2881
2882    /// Arm the cooperative instruction budget (P09 embedding). The run loop
2883    /// decrements this once per dispatch turn; on zero it raises a catchable
2884    /// `"instruction budget exceeded"` error and disarms itself so the host
2885    /// can resume with a fresh budget on the next call. `None` removes the
2886    /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2887    /// short-script semantics.
2888    pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2889        self.instr_budget = budget;
2890    }
2891
2892    /// Remaining instruction budget (None when unbounded).
2893    pub fn instr_budget_remaining(&self) -> Option<i64> {
2894        self.instr_budget
2895    }
2896
2897    /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2898    /// **must** disable JIT when relying on `instr_budget` — see the
2899    /// `jit_enabled` field doc for the rationale.
2900    pub fn set_jit_enabled(&mut self, enabled: bool) {
2901        self.jit.enabled = enabled;
2902    }
2903
2904    /// Current JIT enable state.
2905    pub fn jit_enabled(&self) -> bool {
2906        self.jit.enabled
2907    }
2908
2909    /// Toggle the trace JIT (P12). Off by default while the sprint
2910    /// develops. When enabled, hot back-edges are counted on
2911    /// `Proto.trace_hot_count`; once the counter passes
2912    /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2913    /// mode at the back-edge target. Stays a no-op until S2's
2914    /// trace lowerer and S3's dispatcher land.
2915    pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2916        self.jit.trace_enabled = enabled;
2917    }
2918
2919    /// P16-A — opt-in flag for the self-link cycle catch. See field
2920    /// docs for the correctness blocker. Default `false`.
2921    pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2922        self.jit.p16_self_link_enabled = enabled;
2923    }
2924
2925    /// Current state of the P16-A self-link cycle catch.
2926    pub fn p16_self_link_enabled(&self) -> bool {
2927        self.jit.p16_self_link_enabled
2928    }
2929
2930    /// Current trace-JIT enable state.
2931    pub fn trace_jit_enabled(&self) -> bool {
2932        self.jit.trace_enabled
2933    }
2934
2935    /// Number of traces that have closed cleanly (looped back to the
2936    /// head PC) since this Vm was constructed. Cumulative; used by
2937    /// tests + tuning. Will become the dominant signal once S2's
2938    /// compile + cache lands.
2939    pub fn trace_closed_count(&self) -> u64 {
2940        self.jit.counters.closed
2941    }
2942
2943    /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2944    /// hit an un-recordable op — the latter lands at S2).
2945    pub fn trace_aborted_count(&self) -> u64 {
2946        self.jit.counters.aborted
2947    }
2948
2949    /// P13-S13-G v2 — number of compiled traces whose close shape
2950    /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2951    /// pin `dispatchable=false` because the dispatcher can't
2952    /// resume at a depth>0 PC without the matching CallFrames.
2953    /// S4-step4b's frame-mat helper could synthesise those, but
2954    /// the InlineAbort emit path isn't wired up yet — fresh
2955    /// pickup work for S13-G v2-full.
2956    pub fn trace_inline_abort_count(&self) -> u64 {
2957        self.jit.counters.inline_abort
2958    }
2959
2960    /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2961    pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2962        &self.jit.counters.dispatch_off_reasons
2963    }
2964
2965    /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2966    pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2967        &self.jit.counters.compile_failed_reasons
2968    }
2969
2970    /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2971    /// `(is_call_triggered, ops_len)` for every trace that closed.
2972    pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2973        &self.jit.counters.closed_lens
2974    }
2975
2976    /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
2977    /// Per-reason close-cause counts (recorder-side abort/discard +
2978    /// lowerer-side dispatch_off labels) keyed by `&'static str`.
2979    pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
2980        &self.jit.counters.close_cause_counts
2981    }
2982
2983    /// v2.0 Track-R R3b — number of compiled traces whose
2984    /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
2985    /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
2986    /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
2987    /// hot loop with p16-on. R3b keeps `dispatchable = false` even
2988    /// when this count bumps; R3d will lift it.
2989    pub fn trace_downrec_link_compiled_count(&self) -> u64 {
2990        self.jit.counters.downrec_link_compiled
2991    }
2992
2993    /// v2.0 Track-R R3c — see
2994    /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
2995    /// of times the dispatcher's `is_downrec_sentinel` arm fired and
2996    /// classified the return as a caller-pc-guard HIT.
2997    pub fn trace_downrec_dispatched_count(&self) -> u64 {
2998        self.jit.counters.downrec_dispatched
2999    }
3000
3001    /// v2.0 Track-R R3c — see
3002    /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
3003    /// times the dispatcher entered a `downrec_link`-bearing trace and
3004    /// the trace returned via the lowerer's deopt block (caller-pc
3005    /// guard MISS), or the dispatcher itself force-deopted via the
3006    /// stitch-cycle checkpoint.
3007    pub fn trace_downrec_deopt_count(&self) -> u64 {
3008        self.jit.counters.downrec_deopt
3009    }
3010
3011    /// v2.0 Track-R R3d — see
3012    /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3013    /// Number of compiled traces whose lowerer emitted a multi-way
3014    /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3015    /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3016    pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3017        self.jit.counters.multi_way_guard_emitted
3018    }
3019
3020    /// P12-S2.C — number of closed traces the lowerer compiled and
3021    /// parked on `Proto.traces`. Re-records of the same head_pc are
3022    /// deduped (the second close finds the head_pc already cached
3023    /// and skips compile), so this never exceeds `trace_closed_count`.
3024    pub fn trace_compiled_count(&self) -> u64 {
3025        self.jit.counters.compiled
3026    }
3027
3028    /// v2.1 Phase 1I.B — number of times the recorder captured a
3029    /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3030    /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3031    /// by the Phase 1I.B opt-in fire test to verify the env gate
3032    /// wiring round-trips end-to-end (env -> recorder -> snapshot
3033    /// -> counter -> getter -> assertion).
3034    pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3035        self.jit.counters.field_ic_snapshot_captured
3036    }
3037
3038    /// P12-S2.C — number of closed traces the lowerer rejected
3039    /// (any of the bail conditions in
3040    /// `crate::jit::trace::try_compile_trace`).
3041    pub fn trace_compile_failed_count(&self) -> u64 {
3042        self.jit.counters.compile_failed
3043    }
3044
3045    /// P12-S3 — number of times the dispatcher jumped into a
3046    /// compiled trace. Bumps on every entry; `trace_deopt_count`
3047    /// counts the subset where the trace returned with a parked
3048    /// `jit_pending_err`.
3049    pub fn trace_dispatched_count(&self) -> u64 {
3050        self.jit.counters.dispatched
3051    }
3052
3053    /// P12-S3 — number of trace entries that came back with
3054    /// `jit_pending_err` set (typically a metatable shadowed an
3055    /// index inside a helper, forcing the dispatcher to fall back
3056    /// to the interpreter without committing the trace's result).
3057    pub fn trace_deopt_count(&self) -> u64 {
3058        self.jit.counters.deopt
3059    }
3060
3061    /// P15-A v1 — number of times the dispatcher started a side
3062    /// trace recording (an `exit_hit_counts` slot crossed
3063    /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3064    /// was None and trace JIT was enabled). Each unit is exactly one
3065    /// `start_side_trace` call; the actual compile success counts
3066    /// under [`Self::trace_compiled_count`] like any other trace.
3067    /// Probe use: distinguishes the "side-trace pipeline fired"
3068    /// signal from the "primary back-edge / call-trigger fired"
3069    /// signal so v0-v3 architectural progress is visible without
3070    /// reading per-counter histograms.
3071    pub fn trace_side_trace_started_count(&self) -> u64 {
3072        self.jit.counters.side_trace_started
3073    }
3074
3075    /// P15-A v2-A — number of side-trace recordings that closed,
3076    /// compiled successfully, AND patched their parent's
3077    /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3078    /// dispatch through these ptrs yet (v2-B/C job), but the
3079    /// counter + ptr write proves the compile + link pipeline is
3080    /// complete end-to-end.
3081    pub fn trace_side_trace_compiled_count(&self) -> u64 {
3082        self.jit.counters.side_trace_compiled
3083    }
3084
3085    /// P15-A v2-C-A5-C — number of side traces that compiled
3086    /// successfully but were SHEDDED by the close-handler shape-
3087    /// match gate (`exit_tags_match_entry_tags`). High ratios
3088    /// vs. `trace_side_trace_compiled_count` indicate the
3089    /// architecture is shedding lots of would-be side traces;
3090    /// useful as a tuning probe for future relaxation of the
3091    /// gate or for child-IR re-specialisation against parent's
3092    /// exit shape.
3093    pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3094        self.jit.counters.side_trace_shape_mismatch
3095    }
3096
3097    /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3098    /// classified as `crate::jit::trace::EscapeState::Sinkable`
3099    /// across every successfully compiled trace on this Vm. The
3100    /// count is post-demotion: sites pre-emit drops back to Escaped
3101    /// for not meeting v1 sunk-emit criteria are NOT counted.
3102    /// `trace_sunk_alloc_count` matches one-for-one today (every
3103    /// surviving Sinkable site goes through sunk emit).
3104    pub fn trace_sinkable_seen_count(&self) -> u64 {
3105        self.jit.counters.sinkable_seen
3106    }
3107
3108    /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3109    pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3110        self.jit.counters.accum_bufferable_seen
3111    }
3112
3113    /// P15-prep — total dispatch hits across all known traces,
3114    /// broken into hot-exit telemetry (max single-exit count,
3115    /// total dispatches, exit count). Used by probes to identify
3116    /// hot side-exits as side-trace candidates.
3117    ///
3118    /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3119    /// recursively, so inner functions' traces are reported.
3120    pub fn trace_exit_hit_summary(
3121        &self,
3122        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3123    ) -> Vec<(u32, Vec<u32>)> {
3124        fn walk(
3125            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3126            out: &mut Vec<(u32, Vec<u32>)>,
3127        ) {
3128            for ct in proto.traces.borrow().iter() {
3129                let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3130                out.push((ct.head_pc, counts));
3131            }
3132            for inner in proto.protos.iter() {
3133                walk(*inner, out);
3134            }
3135        }
3136        let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3137        walk(cl.proto, &mut out);
3138        out
3139    }
3140
3141    /// P15-A v0 — surface every side-exit slot whose hit count is
3142    /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3143    /// `cl.proto` (recursively walking `proto.protos`). Returned
3144    /// entries are side-trace candidates: each carries the parent
3145    /// trace's `(head_proto, head_pc)`, the exit's index in the
3146    /// parent's `exit_hit_counts`, and the side trace's natural
3147    /// entry shape (`cont_pc` + `exit_tags`).
3148    ///
3149    /// Layout of `exit_hit_counts` (mirrored by the iter):
3150    /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3151    ///   window-sized exit_tags).
3152    /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3153    ///   → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3154    /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3155    ///   exit_tags = `ct.exit_tags`).
3156    pub fn hot_exit_iter(
3157        &self,
3158        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3159    ) -> Vec<crate::jit::trace::HotExitInfo> {
3160        use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3161        fn walk(
3162            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3163            out: &mut Vec<HotExitInfo>,
3164        ) {
3165            for ct in proto.traces.borrow().iter() {
3166                let inline_n = ct.per_exit_inline.len();
3167                let tags_n = ct.per_exit_tags.len();
3168                debug_assert_eq!(
3169                    ct.exit_hit_counts.len(),
3170                    inline_n + tags_n + 1,
3171                    "exit_hit_counts layout invariant violated"
3172                );
3173                for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3174                    let hits = cell.get();
3175                    if hits < HOTEXIT_THRESHOLD {
3176                        continue;
3177                    }
3178                    let (cont_pc, exit_tags) = if idx < inline_n {
3179                        let ent = &ct.per_exit_inline[idx];
3180                        (ent.cont_pc, ent.exit_tags.clone())
3181                    } else if idx < inline_n + tags_n {
3182                        let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3183                        (*pc, tags.clone())
3184                    } else {
3185                        (ct.head_pc, ct.exit_tags.clone())
3186                    };
3187                    out.push(HotExitInfo {
3188                        head_proto: proto,
3189                        head_pc: ct.head_pc,
3190                        exit_idx: idx,
3191                        hits,
3192                        cont_pc,
3193                        exit_tags,
3194                    });
3195                }
3196            }
3197            for inner in proto.protos.iter() {
3198                walk(*inner, out);
3199            }
3200        }
3201        let mut out: Vec<HotExitInfo> = Vec::new();
3202        walk(cl.proto, &mut out);
3203        out
3204    }
3205
3206    /// P12-S5-B — sum of NewTable sites that actually took the
3207    /// sunk-emit path across every successfully compiled trace on
3208    /// this Vm. Each counted site skips its heap `Gc<Table>`
3209    /// allocation per dispatch; the array part lives as Cranelift
3210    /// `Variable`s for the duration of the trace.
3211    pub fn trace_sunk_alloc_count(&self) -> u64 {
3212        self.jit.counters.sunk_alloc
3213    }
3214
3215    /// P12-S5-C — sum of materialise-helper emit sites across every
3216    /// successfully compiled trace on this Vm. Each unit is a
3217    /// (site × cmp side-exit) pair whose IR reconstructs a heap
3218    /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3219    /// emit is wiring materialise into the right side-exits.
3220    pub fn trace_materialize_emit_count(&self) -> u64 {
3221        self.jit.counters.materialize_emit
3222    }
3223
3224    /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3225    /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3226    /// replaces a `Heap::new_closure_inline` call on the dispatch
3227    /// path; the count is static (one per matching op per compiled
3228    /// trace), summed at compile success.
3229    pub fn trace_closure_emit_count(&self) -> u64 {
3230        self.jit.counters.closure_emit
3231    }
3232
3233    /// v2.0 Stage 7 polish 6 fire experiment — see
3234    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3235    /// Number of compiled traces whose `per_exit_inline.len() > 0`
3236    /// (depth>0 inlined cmp side-exits emitted).
3237    pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3238        self.jit.counters.per_exit_inline_compiled
3239    }
3240
3241    /// v2.0 Stage 7 polish 6 fire experiment — see
3242    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3243    /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3244    /// `dispatchable == true` — i.e. the count of compiled traces
3245    /// that would actually exercise the AOT polish 6 chain-reloc +
3246    /// deploy-resolver path.
3247    pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3248        self.jit.counters.per_exit_inline_dispatchable
3249    }
3250
3251    /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3252    /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3253    /// whether a self-recursive function actually walked the depth
3254    /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3255    /// across traces and Vm activations; reset only on `Vm::new`.
3256    pub fn trace_max_depth_seen(&self) -> u8 {
3257        self.jit.max_depth_seen
3258    }
3259
3260    /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3261    /// dispatch time). The frame-materialization helper reads `.base`
3262    /// to compute offsets for each inlined frame's window.
3263    #[doc(hidden)]
3264    pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3265        match self.frames.last() {
3266            Some(CallFrame::Lua(f)) => Some(*f),
3267            _ => None,
3268        }
3269    }
3270
3271    /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3272    /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3273    /// by `luna-tools` (`luna-profile`'s sampler walks this from
3274    /// inside a `Count` hook). Sibling-module scope: not part of the
3275    /// public embedder surface, but `inspect::frames_for_profile` is.
3276    #[doc(hidden)]
3277    pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3278        &self.frames
3279    }
3280
3281    /// P12-S4-step4b — ensure the value stack covers indices
3282    /// `[0..need)`. Extends with Nil if shorter. Called by the
3283    /// frame-materialization helper before pushing an inlined frame
3284    /// whose register window may exceed the current stack length.
3285    #[doc(hidden)]
3286    pub fn jit_ensure_stack(&mut self, need: usize) {
3287        if self.stack.len() < need {
3288            self.stack.resize(need, Value::Nil);
3289        }
3290    }
3291
3292    /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3293    /// `__close` handlers would run (any active tbc slot ≥ from
3294    /// holding a non-nil/false Value); if so, parks a deopt sentinel
3295    /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3296    /// IR branches to the deopt block. Otherwise performs the safe
3297    /// part of close — `close_from(from)` to close open upvals +
3298    /// drop any drained tbc entries ≥ from — and returns 0.
3299    ///
3300    /// Returns are i64-shaped so the cranelift import sig stays
3301    /// trivial (i64 → i64 mapping).
3302    #[doc(hidden)]
3303    pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3304        if self.jit.pending_err.is_some() {
3305            return 1;
3306        }
3307        let Some(f) = self.jit_last_lua_frame() else {
3308            self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3309            return 1;
3310        };
3311        let from = f.base + start_offset;
3312        let has_handler = self.tbc.iter().any(|&s| {
3313            s >= from && {
3314                let v = self.stack[s as usize];
3315                !matches!(v, Value::Nil | Value::Bool(false))
3316            }
3317        });
3318        if has_handler {
3319            self.jit.pending_err =
3320                Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3321            return 1;
3322        }
3323        self.close_from(from);
3324        // Drain any tbc entries ≥ from (they're nil/false stubs the
3325        // interpreter's drive_close would have skipped silently).
3326        while let Some(&s) = self.tbc.last() {
3327            if s < from {
3328                break;
3329            }
3330            self.tbc.pop();
3331        }
3332        0
3333    }
3334
3335    /// P12-S7-B — spill the trace's current value for a register to
3336    /// the underlying `vm.stack[base + slot_offset]`. Required before
3337    /// an `Op::Closure` whose inner proto has an `in_stack: true`
3338    /// upval at `slot_offset` — the helper's `find_or_create_upval`
3339    /// captures a live pointer to `vm.stack[base + slot_offset]`,
3340    /// which must hold the right value at call time (trace IR's
3341    /// Variable hasn't yet been written back).
3342    ///
3343    /// Parameters arrive as i64 from the IR: `slot_offset` is the
3344    /// caller-frame register index (`u32` in practice, depth=0
3345    /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3346    /// `crate::runtime::value::raw` byte for the slot's RegKind;
3347    /// `raw_bits` is the trace Variable's `use_var` payload
3348    /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3349    /// raw `Gc::as_ptr` cast).
3350    #[doc(hidden)]
3351    pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3352        let Some(f) = self.jit_last_lua_frame() else {
3353            self.jit.pending_err =
3354                Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3355            return;
3356        };
3357        let idx = (f.base as usize) + (slot_offset as usize);
3358        if self.stack.len() <= idx {
3359            self.stack.resize(idx + 1, Value::Nil);
3360        }
3361        // SAFETY: caller (trace JIT IR emit) provides matching
3362        // `(tag, raw_bits)` — same shape produced by Value::unpack.
3363        let v = unsafe {
3364            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3365        };
3366        self.stack[idx] = v;
3367    }
3368
3369    /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3370    /// Mirrors the interp arm (this file ~L5316): copies the
3371    /// generator/state/control triple from `R[A..=A+2]` to
3372    /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3373    /// the iterator function via `begin_call`. v2 only handles
3374    /// `Value::Native` iterators (the canonical `ipairs_iter` /
3375    /// `next` builtins) — a Lua-closure iterator would push a Lua
3376    /// frame mid-trace, breaking `recording_frame_base`, so we
3377    /// deopt by parking a `pending_err` and returning `-1`.
3378    ///
3379    /// `slot_offset` is the caller-frame register index (=
3380    /// `inst.a()` decoded from a u32-wide field). `nvars` is
3381    /// `inst.c() as i32` — the caller's expected return count.
3382    /// P12-S12-C v1 — refresh only the raw payload of
3383    /// `vm.stack[base + slot_offset]`, preserving its existing
3384    /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3385    /// uses this when the slot's `RegKind` is `Unset` (no compile-
3386    /// time tag info; commonly `Str` slots which the trace doesn't
3387    /// model). The interp's previous execution of the same op
3388    /// already populated the slot with the right tag — the trace
3389    /// only needs to swap in its current raw value.
3390    #[doc(hidden)]
3391    pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3392        let Some(f) = self.jit_last_lua_frame() else {
3393            return;
3394        };
3395        let idx = (f.base as usize) + (slot_offset as usize);
3396        if idx >= self.stack.len() {
3397            return;
3398        }
3399        let (tag, _) = self.stack[idx].unpack();
3400        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3401        self.stack[idx] = unsafe {
3402            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3403        };
3404    }
3405
3406    /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3407    ///
3408    /// Mirrors the interp arm (this file ~L5112): `self.top =
3409    /// base + a + n; concat_run(base + a)`. Result lands at
3410    /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3411    /// deopt (any error from `concat_run` OR detection that the
3412    /// metamethod path was taken — `concat_run` returns `Ok(())`
3413    /// after `begin_meta_call` which has pushed a Lua frame the
3414    /// trace can't safely continue past).
3415    ///
3416    /// The frame-push detection uses `pre/post frames.len()` and
3417    /// unwinds any pushed frames before deopting, so the
3418    /// dispatcher's existing deopt path sees a clean stack.
3419    #[doc(hidden)]
3420    pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3421        if self.jit.pending_err.is_some() {
3422            return -1;
3423        }
3424        let Some(f) = self.jit_last_lua_frame() else {
3425            self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3426            return -1;
3427        };
3428        let abs_a = f.base + slot_offset;
3429        self.top = abs_a + n as u32;
3430        let pre_frames = self.frames.len();
3431        let result = self.concat_run(abs_a);
3432        let post_frames = self.frames.len();
3433        // Frame-push = metamethod path taken (begin_meta_call pushed
3434        // a Lua frame). The trace can't continue past it; unwind +
3435        // deopt so interp redoes Op::Concat in the slow path.
3436        while self.frames.len() > pre_frames {
3437            frames_pop_sync(&mut self.frames, &mut self.frames_top);
3438        }
3439        if let Err(e) = result {
3440            self.jit.pending_err = Some(e);
3441            return -1;
3442        }
3443        if post_frames > pre_frames {
3444            self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3445            return -1;
3446        }
3447        0
3448    }
3449
3450    /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3451    /// accumulator buffer pool, returning a raw pointer. The trace
3452    /// fn's IR holds this pointer in a stack slot through the loop
3453    /// and calls `jit_str_buf_extend` per iter. If the pool is
3454    /// empty, allocate fresh.
3455    ///
3456    /// Safety: the returned pointer is valid until
3457    /// `jit_str_buf_release` is called or the Vm is dropped. The
3458    /// caller MUST not retain it across `enter_jit` boundaries.
3459    #[doc(hidden)]
3460    pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3461        let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3462        // Move into a Box so the pointer is stable until release.
3463        Box::into_raw(Box::new(buf))
3464    }
3465
3466    /// P14-S14-B v2 — return a previously-acquired buffer to the
3467    /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3468    /// buffer is `clear`ed (capacity retained) so the next acquire
3469    /// gets a ready-to-extend Vec.
3470    ///
3471    /// Safety: `buf` must have been returned by a prior
3472    /// `jit_str_buf_acquire` on the same Vm.
3473    #[doc(hidden)]
3474    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3475    pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3476        if buf.is_null() {
3477            return;
3478        }
3479        // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3480        let mut owned = unsafe { Box::from_raw(buf) };
3481        owned.clear();
3482        if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3483            self.jit.str_buf_pool.push(*owned);
3484        }
3485        // Else: drop the buffer.
3486    }
3487
3488    /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3489    /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3490    /// the piece slot) and passes it through; we treat it as a
3491    /// `*mut LuaStr` and append its bytes.
3492    ///
3493    /// Returns 0 on success, -1 if the piece isn't a Str (would
3494    /// trip __concat metamethod path → deopt to interp).
3495    ///
3496    /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3497    /// trace's piece slot raw bits.
3498    #[doc(hidden)]
3499    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3500    pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3501        if buf.is_null() || str_ptr == 0 {
3502            return -1;
3503        }
3504        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3505        let buf = unsafe { &mut *buf };
3506        let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3507        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3508        let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3509        buf.extend_from_slice(bytes);
3510        0
3511    }
3512
3513    /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3514    /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3515    /// the trace to write into the accumulator slot.
3516    ///
3517    /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3518    /// (the v2 hard cap; the trace deopts).
3519    ///
3520    /// Safety: `buf` from prior `acquire`. The buffer is left
3521    /// CLEAR (drained) ready for `release`.
3522    #[doc(hidden)]
3523    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3524    pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3525        if buf.is_null() {
3526            return 0;
3527        }
3528        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3529        let buf = unsafe { &mut *buf };
3530        let bytes = std::mem::take(buf);
3531        // v2 hard cap at 256KB per RFC Q3.
3532        if bytes.len() > 256 * 1024 {
3533            return 0;
3534        }
3535        let gc = self.heap.intern(&bytes);
3536        gc.as_ptr() as i64
3537    }
3538
3539    /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3540    ///
3541    /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3542    /// v3: ipairs `inext` fast path at the top — skip begin_call
3543    ///     when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3544    ///     R[A+2]=Int.
3545    /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3546    ///     caller-provided buffers + return R[A+4]'s tag byte. Lets
3547    ///     emit skip 3 separate `luna_jit_stack_load` calls and 1
3548    ///     `luna_jit_stack_tag` call by reading the buffer via
3549    ///     cranelift `stack_load` IR instead. Returns -1 on deopt.
3550    #[doc(hidden)]
3551    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3552    pub fn jit_op_tforcall(
3553        &mut self,
3554        slot_offset: u32,
3555        nvars: i32,
3556        ctrl_out: *mut i64,
3557        key_out: *mut i64,
3558        val_out: *mut i64,
3559    ) -> i64 {
3560        if self.jit.pending_err.is_some() {
3561            return -1;
3562        }
3563        let Some(f) = self.jit_last_lua_frame() else {
3564            self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3565            return -1;
3566        };
3567        let abs = f.base + slot_offset;
3568        let need = (abs + 7) as usize;
3569        if self.stack.len() < need {
3570            self.stack.resize(need, Value::Nil);
3571        }
3572        // v3 fast path.
3573        let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3574            && std::ptr::fn_addr_eq(
3575                n.f,
3576                crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3577            )
3578            && let Value::Table(t) = self.stack[(abs + 1) as usize]
3579            && t.metatable().is_none()
3580            && let Value::Int(i) = self.stack[(abs + 2) as usize]
3581        {
3582            let next_i = i.wrapping_add(1);
3583            let v = t.get_int(next_i);
3584            if v.is_nil() {
3585                self.stack[(abs + 4) as usize] = Value::Nil;
3586            } else {
3587                self.stack[(abs + 4) as usize] = Value::Int(next_i);
3588                if (nvars as usize) >= 2 {
3589                    self.stack[(abs + 5) as usize] = v;
3590                }
3591                for j in 2..nvars as usize {
3592                    let slot = abs + 4 + j as u32;
3593                    if (slot as usize) < self.stack.len() {
3594                        self.stack[slot as usize] = Value::Nil;
3595                    }
3596                }
3597            }
3598            true
3599        } else {
3600            false
3601        };
3602        if !took_fast_path {
3603            // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3604            // route through begin_call. Lua-closure iters would push
3605            // a Lua frame mid-trace → deopt.
3606            self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3607            self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3608            self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3609            if !matches!(self.stack[abs as usize], Value::Native(_)) {
3610                self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3611                return -1;
3612            }
3613            if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3614                self.jit.pending_err = Some(e);
3615                return -1;
3616            }
3617        }
3618        // v4 batched writeback — fill the caller's buffers with the
3619        // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3620        // reload via cranelift `stack_load` instead of separate
3621        // `luna_jit_stack_load` helper calls.
3622        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3623        let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3624        let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3625        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3626        let key_raw = unsafe { key_rv.zero };
3627        let val_raw = if (nvars as usize) >= 2 {
3628            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3629            unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3630        } else {
3631            0u64
3632        };
3633        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3634        unsafe {
3635            ctrl_out.write(ctrl_raw as i64);
3636            key_out.write(key_raw as i64);
3637            val_out.write(val_raw as i64);
3638        }
3639        key_tag as i64
3640    }
3641
3642    /// P12-S12-B-v2 — load the raw `i64` payload of
3643    /// `vm.stack[base + slot_offset]` for the active trace's head
3644    /// Lua frame. Used to reload trace IR `Variable`s after a
3645    /// helper has written to `vm.stack` directly (e.g. TForCall's
3646    /// iter results land at `R[A+4..A+4+nvars]`).
3647    #[doc(hidden)]
3648    pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3649        let Some(f) = self.jit_last_lua_frame() else {
3650            return 0;
3651        };
3652        let idx = (f.base as usize) + (slot_offset as usize);
3653        if idx >= self.stack.len() {
3654            return 0;
3655        }
3656        let v = self.stack[idx];
3657        let (_, raw) = v.unpack();
3658        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3659        unsafe { raw.zero as i64 }
3660    }
3661
3662    /// P12-S12-B-v2 — read the tag byte of
3663    /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3664    /// to dispatch on the iterator's return-key tag at runtime
3665    /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3666    /// deopt for v2).
3667    #[doc(hidden)]
3668    pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3669        let Some(f) = self.jit_last_lua_frame() else {
3670            return crate::runtime::value::raw::NIL;
3671        };
3672        let idx = (f.base as usize) + (slot_offset as usize);
3673        if idx >= self.stack.len() {
3674            return crate::runtime::value::raw::NIL;
3675        }
3676        self.stack[idx].unpack().0
3677    }
3678
3679    /// P12-S4-step4b — push a Lua frame onto the call stack with
3680    /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3681    /// at trace side-exits to recreate the inlined call activations
3682    /// the lowerer compiled past. The contract (enforced by the
3683    /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3684    /// `nresults` is the caller's expected count (today always 1
3685    /// because the lowerer bails Op::Call C != 2), and the caller
3686    /// has already called `jit_ensure_stack` to cover
3687    /// `[0..base + cl.proto.max_stack)`.
3688    #[doc(hidden)]
3689    pub fn jit_push_inlined_frame(
3690        &mut self,
3691        cl: Gc<LuaClosure>,
3692        base: u32,
3693        pc: u32,
3694        nresults: i32,
3695    ) {
3696        frames_push_sync(
3697            &mut self.frames,
3698            &mut self.frames_top,
3699            CallFrame::Lua(Frame {
3700                closure: cl,
3701                base,
3702                pc,
3703                // Lua call ABI: callee R[0] sits at caller R[A+1], so
3704                // callee.base = caller.base + A + 1; func_slot is
3705                // caller.base + A = callee.base - 1.
3706                func_slot: base - 1,
3707                n_varargs: 0,
3708                nresults,
3709                hook_oldpc: u32::MAX,
3710                from_c: false,
3711                tm: None,
3712                is_hook: false,
3713                tailcalls: 0,
3714            }),
3715        );
3716    }
3717
3718    /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3719    /// should set to `false` so `load`/`loadstring` reject bytecode input
3720    /// (which bypasses parser limits and could exploit verifier gaps).
3721    pub fn set_bytecode_loading(&mut self, enabled: bool) {
3722        self.bytecode_loading = enabled;
3723    }
3724
3725    /// Current bytecode-loading gate state.
3726    pub fn bytecode_loading(&self) -> bool {
3727        self.bytecode_loading
3728    }
3729
3730    /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3731    /// bytecode is a strictly larger trust surface than luna's own dump
3732    /// format (third-party toolchain bugs, malformed chunks, unknown
3733    /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3734    /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3735    pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3736        self.puc_bytecode_loading = enabled;
3737    }
3738
3739    /// Current PUC bytecode-loading gate state.
3740    pub fn puc_bytecode_loading(&self) -> bool {
3741        self.puc_bytecode_loading
3742    }
3743
3744    /// Default loader input budget — 256 MiB.
3745    ///
3746    /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3747    /// sources whose byte length crosses this cap, returning the
3748    /// PUC-shaped `not enough memory` error rather than letting the
3749    /// host allocator try (and crash) to hold the next chunk.
3750    pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3751
3752    /// Set the loader input byte budget (see
3753    /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3754    /// effectively disable. Smaller caps are honored verbatim — a 0
3755    /// cap rejects every non-empty source.
3756    pub fn set_loader_input_budget(&mut self, bytes: usize) {
3757        self.loader_input_budget = bytes;
3758    }
3759
3760    /// Current loader input byte budget.
3761    pub fn loader_input_budget(&self) -> usize {
3762        self.loader_input_budget
3763    }
3764
3765    /// Take the error traceback captured at the latest error point and
3766    /// reset it. Embedders should call this immediately after a failed
3767    /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3768    /// entry clears it. Returns `None` if no error was in flight.
3769    pub fn take_error_traceback(&mut self) -> Option<String> {
3770        self.error_traceback
3771            .take()
3772            .map(|b| String::from_utf8_lossy(&b).into_owned())
3773    }
3774
3775    /// Arm the soft memory cap (P09 embedding). The run loop checks the
3776    /// heap's tracked byte usage between dispatch turns; on overshoot it
3777    /// first runs a full collect, and if `bytes` still exceeds the cap it
3778    /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3779    /// itself (fire-once: re-arm before the next `call_value` if reusing
3780    /// the Vm across requests). `None` removes the cap. The accounting is
3781    /// approximate — internal Vec/Box capacity overhead is not tracked,
3782    /// so embedders should size the cap with ~2× margin over the desired
3783    /// hard limit and additionally bound the Vm's lifetime (drop after
3784    /// each request).
3785    pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3786        self.heap.mem_cap = cap;
3787    }
3788
3789    /// Approximate bytes the heap is currently holding. Object shells plus
3790    /// every table's internal array/hash boxes (tracked via
3791    /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3792    /// bytecode and closure upvalue slices still go uncounted — this is a
3793    /// lower bound, not a precise `malloc_stats`-style total.
3794    pub fn memory_used(&self) -> usize {
3795        self.heap.bytes()
3796    }
3797
3798    /// Read upvalue slot `i` of the native function currently on top of the
3799    /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3800    /// when no native is running. Public so the C ABI trampoline can fetch
3801    /// the host C function pointer it stashed there at registration time.
3802    pub fn running_native_upvalue(&self, i: usize) -> Value {
3803        match self.running_natives.last() {
3804            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3805            Some(nc) => unsafe {
3806                let upvals = &(*nc.as_ptr()).upvals;
3807                upvals.get(i).copied().unwrap_or(Value::Nil)
3808            },
3809            None => Value::Nil,
3810        }
3811    }
3812
3813    /// Register a table for finalization if its (just-set) metatable carries a
3814    /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3815    /// `__gc` to the metatable afterwards does not retroactively register).
3816    pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3817        if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3818            self.heap.register_finalizable(t);
3819        }
3820    }
3821
3822    /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3823    /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3824    /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3825    /// metatable that `newproxy` returned, which then needs to flow through.
3826    /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3827    /// would re-check at metatable-set time); luna's only userdata
3828    /// finalizables today come via `newproxy`, which registers itself.
3829    #[allow(dead_code)]
3830    pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3831        if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3832            self.heap.register_finalizable_userdata(u);
3833        }
3834    }
3835
3836    /// Run pending `__gc` finalizers (objects the collector resurrected for
3837    /// finalization). Finalizer errors are swallowed — PUC turns them into a
3838    /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3839    fn run_finalizers(&mut self) {
3840        let _ = self.run_finalizers_or_err();
3841    }
3842
3843    fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3844        if self.gc_finalizing {
3845            return Ok(());
3846        }
3847        let pending = self.heap.take_tobefnz();
3848        if pending.is_empty() {
3849            return Ok(());
3850        }
3851        self.gc_finalizing = true;
3852        let mut first_err: Option<LuaError> = None;
3853        for obj in pending {
3854            let gc = self.get_mm(obj, Mm::Gc);
3855            // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3856            // schedule the object for finalization (`__gc = true` is the
3857            // canonical placeholder); only call it at finalize time when it
3858            // is actually a function. gc.lua 5.2 :412 wires up exactly this
3859            // sentinel and then expects no call.
3860            let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3861            if callable {
3862                // PUC `GCTM` sets `CIST_FIN` on the new ci so
3863                // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3864                // `name = "__gc"`. luna threads the same outcome through the
3865                // generic `pending_tm` slot: the Lua frame born from this
3866                // call consumes it in `push_frame`. Saved/restored around the
3867                // call in case the handler is a native (which never pops it).
3868                // Bare event name; `frame_name` / `c_frame_name` add the
3869                // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3870                // the convention used by `__close`, `__index`, …
3871                let saved_tm = self.pending_tm.replace("gc");
3872                // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3873                // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3874                // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3875                // luna mirrors by temporarily tagging the current top Lua
3876                // frame's `tm` to "__gc" for the duration of the call.
3877                let caller_tm_idx = self
3878                    .frames
3879                    .iter()
3880                    .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3881                let saved_caller_tm = caller_tm_idx.and_then(|i| {
3882                    if let CallFrame::Lua(fr) = &mut self.frames[i] {
3883                        let prev = fr.tm;
3884                        fr.tm = Some("gc");
3885                        Some(prev)
3886                    } else {
3887                        None
3888                    }
3889                });
3890                if let Err(e) = self.call_value(gc, &[obj]) {
3891                    // PUC 5.1 GCTM raised the finalizer's error to the
3892                    // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3893                    // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3894                    // wrapped it in `error in __gc metamethod (msg)` first
3895                    // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3896                    // introduced the warning system and switched to "warn
3897                    // then continue" — never re-raise, just route the
3898                    // wrapped message through `warn`. gc.lua 5.5 :378 wires
3899                    // up `_WARN` capture under the `if T then …` block to
3900                    // baseline on the same wrapped string.
3901                    if self.version >= LuaVersion::Lua54 {
3902                        let inner = self.error_text(&e);
3903                        let msg = format!("error in __gc metamethod ({inner})");
3904                        self.emit_warn(msg.as_bytes(), false);
3905                    } else if first_err.is_none() {
3906                        let wrapped = if self.version >= LuaVersion::Lua52 {
3907                            let inner = self.error_text(&e);
3908                            let msg = format!("error in __gc metamethod ({inner})");
3909                            let s = Value::Str(self.heap.intern(msg.as_bytes()));
3910                            LuaError(s)
3911                        } else {
3912                            e
3913                        };
3914                        first_err = Some(wrapped);
3915                    }
3916                }
3917                self.pending_tm = saved_tm;
3918                if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3919                    && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3920                {
3921                    fr.tm = prev; // prev is Option<&'static str>; restore exactly
3922                }
3923            }
3924        }
3925        self.gc_finalizing = false;
3926        match first_err {
3927            Some(e) => Err(e),
3928            None => Ok(()),
3929        }
3930    }
3931
3932    /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3933    /// Crosses up to three phases per call:
3934    ///   1. Pause      → seed Propagate (`gc_start_propagate`)
3935    ///   2. Propagate  → drain gray up to `budget`; on exhaustion run atomic
3936    ///                   (`gc_finish_atomic` → tobefnz populated; finalizers
3937    ///                   run via `run_finalizers`) and enter Sweep
3938    ///   3. Sweep      → `gc_sweep_step` up to (residual) `budget`
3939    /// Returns true when this call completed the cycle's sweep (back to
3940    /// Pause). The budget is spent generously across phases — a large `n`
3941    /// can finish a whole cycle in one call (PUC stop-the-world step).
3942    pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3943        // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3944        // that may hit a safe point and try to step again. Re-entry was OK
3945        // under STW (collect_garbage had its own guard) but here the
3946        // intermediate phase state would corrupt.
3947        if self.gc_finalizing {
3948            return false;
3949        }
3950        if self.heap.gc_phase_is_pause() {
3951            let (roots, extra) = self.gc_roots();
3952            self.heap.gc_start_propagate(&roots, &extra);
3953        }
3954        if self.heap.gc_phase_is_propagate() {
3955            if !self.heap.gc_step_propagate(budget) {
3956                return false;
3957            }
3958            self.heap.gc_finish_atomic();
3959            // any __gc scheduled by atomic — run before sweep so a finalizer
3960            // re-registering `self` re-enters the next cycle, not this sweep
3961            self.run_finalizers();
3962        }
3963        // either we just transitioned, or we entered already in Sweep, or
3964        // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3965        self.heap.gc_sweep_step(budget)
3966    }
3967
3968    // ---- frames & calls ----
3969
3970    /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3971    /// Returns true if a Lua frame was pushed (the dispatch loop continues
3972    /// there), false if a native completed inline.
3973    fn begin_call(
3974        &mut self,
3975        func_slot: u32,
3976        nargs: Option<u32>,
3977        nresults: i32,
3978        from_c: bool,
3979    ) -> Result<bool, LuaError> {
3980        let mut nargs = match nargs {
3981            Some(n) => n,
3982            None => self.top - (func_slot + 1),
3983        };
3984        // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3985        // only for the immediately-following Lua activation. Native dispatch
3986        // (or `__call` resolution) below must not let it leak to the next
3987        // begin_call's frame; restore it just before push_frame for the Lua
3988        // arm so its meaning is preserved across __call chaining.
3989        let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3990        // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3991        // is inserted before the value so it becomes the first argument, and a
3992        // chain of `__call` tables resolves down to a real function.
3993        let mut chain = 0u32;
3994        loop {
3995            match self.stack[func_slot as usize] {
3996                Value::Closure(cl) => {
3997                    // P11-S2c.B JIT fast path: if the Proto's body fits
3998                    // the int-arith whitelist, every arg is `Value::Int`,
3999                    // and the cached arity matches, skip frame setup and
4000                    // run the cached native fn in-place.
4001                    if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
4002                        self.pending_tailcalls = tailcalls;
4003                        return Ok(false);
4004                    }
4005                    self.pending_tailcalls = tailcalls;
4006                    self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4007                    // P12-S4-step0 — trace-on-call trigger. The frame
4008                    // we just pushed is the callee whose body the
4009                    // recorder will trace. Bump the per-Proto call
4010                    // counter; once it crosses `CALL_HOT_THRESHOLD`
4011                    // and no other trace is in flight, snapshot the
4012                    // callee's register window (R[0..max_stack]) and
4013                    // begin recording at `pc=0`. This is what unlocks
4014                    // tracing for functions whose body has no negative
4015                    // `Op::Jmp` back-edge (`fib`, recursive helpers).
4016                    //
4017                    // Gated on `trace_jit_enabled`, so the default
4018                    // dispatch pays a single not-taken branch.
4019                    if self.jit.trace_enabled {
4020                        let proto = cl.proto;
4021                        let c = proto.call_hot_count.get();
4022                        if c < u32::MAX / 2 {
4023                            proto.call_hot_count.set(c + 1);
4024                        }
4025                        // P13-S13-H — relaxed call-trigger:
4026                        // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4027                        // `!already_cached` short-circuit. Lets a
4028                        // discarded short call-trigger close retry
4029                        // on the next call (fib(10/15/20/25)
4030                        // pathology — first capture is base-case
4031                        // [Lt,Jmp,Return1]; coverage-heuristic
4032                        // discards; next call gets to record at a
4033                        // potentially deeper recursion point).
4034                        // Without `already_cached`, the relaxed
4035                        // condition would re-record over a cached
4036                        // trace every call.
4037                        //
4038                        // P13-S13-K — additionally short-circuit on
4039                        // `proto.trace_gave_up`. The S13-I discard
4040                        // cap force-compiles a partial trace and
4041                        // flips this flag; subsequent calls into
4042                        // this Proto skip the RefCell borrow + Vec
4043                        // scan entirely.
4044                        if proto.trace_gave_up.get() {
4045                            return Ok(true);
4046                        }
4047                        let call_already_cached =
4048                            proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4049                        if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4050                            && self.jit.active_trace.is_none()
4051                            && !call_already_cached
4052                        {
4053                            // The new frame is on top: index in
4054                            // `self.frames` is `len() - 1`.
4055                            let frame_idx = self.frames.len() - 1;
4056                            // Snapshot R[0..max_stack] at the callee's
4057                            // base. `push_frame` resized `self.stack`
4058                            // to `base + max_stack`, so this window is
4059                            // guaranteed in-bounds.
4060                            let f = match &self.frames[frame_idx] {
4061                                CallFrame::Lua(f) => f,
4062                                _ => unreachable!("push_frame just pushed a Lua frame"),
4063                            };
4064                            let max_stack = cl.proto.max_stack as usize;
4065                            let base_us = f.base as usize;
4066                            let mut entry_tags = Vec::with_capacity(max_stack);
4067                            for i in 0..max_stack {
4068                                let (tag, _) = self.stack[base_us + i].unpack();
4069                                entry_tags.push(tag);
4070                            }
4071                            self.jit.active_trace =
4072                                Some(Box::new(crate::jit::trace::TraceRecord::start(
4073                                    cl.proto, 0, entry_tags, true,
4074                                )));
4075                            self.jit.recording_frame_base = frame_idx;
4076                        }
4077                    }
4078                    return Ok(true);
4079                }
4080                Value::Native(nc) => {
4081                    // v1.1 B10 Stage 2 — async-marked NativeClosure.
4082                    // Route through the cooperative-yield mechanism
4083                    // when async_mode is on; reject when called from
4084                    // a sync `eval`/`call_value` path (would have no
4085                    // executor to drive the returned future).
4086                    if nc.is_async {
4087                        if !self.async_mode {
4088                            let s = Value::Str(
4089                                self.heap.intern(b"async native called in sync context"),
4090                            );
4091                            self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4092                            return Err(LuaError(s));
4093                        }
4094                        // Same root-up bookkeeping as the sync path:
4095                        // pin args + result-count expectation so a
4096                        // collection across the suspend boundary
4097                        // keeps the arg window live.
4098                        self.native_nresults = nresults;
4099                        self.gc_top = func_slot + nargs + 1;
4100                        // v1.3 Phase AS — fire the "call" hook BEFORE
4101                        // building the future. Mirrors the sync native
4102                        // path's `hook_call(true, nargs)` site
4103                        // (`exec.rs` further down) so embedders with a
4104                        // Rust debug hook installed see a Call event
4105                        // for async natives identical to the sync
4106                        // path. The matching "return" hook fires from
4107                        // `commit_async_native_result` in
4108                        // `async_drive.rs` after the future resolves.
4109                        // Placement follows audit §"Open questions"
4110                        // Q6: after the `native_nresults` / `gc_top`
4111                        // pin, before the future is constructed, so a
4112                        // hook body that triggers GC observes the
4113                        // correct pinned window. On hook error the
4114                        // sentinel never returns and
4115                        // `pending_async_native_*` remain `None` —
4116                        // the executor sees `DispatchOutcome::Error`
4117                        // (audit §A.1 edge cases).
4118                        self.hook_call(true, nargs)?;
4119                        // Transmute the stored NativeFn back to its
4120                        // real AsyncNativeFn shape. Sound because
4121                        // `set_async_native` / `create_async_native`
4122                        // installed an AsyncNativeFn through the
4123                        // identically-sized fn-pointer slot, and the
4124                        // `is_async` marker bit is what records that
4125                        // fact.
4126                        let async_fn: crate::vm::async_drive::AsyncNativeFn =
4127                            // SAFETY: same-size fn pointers; provenance
4128                            // preserved through `mem::transmute`. The
4129                            // `is_async` marker is the only safe-to-call
4130                            // gate, set exclusively by
4131                            // `Vm::create_async_native`.
4132                            unsafe { std::mem::transmute(nc.f) };
4133                        let vm_ptr: *mut Vm = self;
4134                        let fut = async_fn(vm_ptr, func_slot, nargs);
4135                        // Stash the future + post-call context for
4136                        // `drive_one` to surface to `EvalFuture::poll`.
4137                        self.pending_async_native_fut = Some(fut);
4138                        self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4139                            func_slot,
4140                            nargs,
4141                            nresults,
4142                            gc_top: self.gc_top,
4143                        });
4144                        // Sentinel Err walked up to `drive_one` (same
4145                        // shape as `host_yield_pending`'s budget yield).
4146                        // Value::Nil — never seen by user code.
4147                        return Err(LuaError(Value::Nil));
4148                    }
4149                    // pcall/xpcall are yieldable: rather than calling the
4150                    // protected function through the Rust stack (which cannot be
4151                    // suspended), push a continuation frame and drive the call
4152                    // through the interpreter loop (PUC lua_pcallk). A yield
4153                    // inside it is preserved with the thread's saved frames.
4154                    use crate::runtime::value::NativeFn;
4155                    if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4156                        return self.begin_pcall(func_slot, nargs, nresults);
4157                    }
4158                    if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4159                        return self.begin_xpcall(func_slot, nargs, nresults);
4160                    }
4161                    // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4162                    // luaB_pairs); without one, fall through to the plain native.
4163                    if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4164                        let arg = self.stack[(func_slot + 1) as usize];
4165                        if !self.get_mm(arg, Mm::Pairs).is_nil() {
4166                            return self.begin_pairs(func_slot, nresults);
4167                        }
4168                    }
4169                    // a native that collects (e.g. `collectgarbage`) roots up to
4170                    // its own arguments — the caller's live registers all sit
4171                    // below `func_slot` and stay rooted.
4172                    self.native_nresults = nresults;
4173                    self.gc_top = func_slot + nargs + 1;
4174                    // Push the native onto the running-natives chain BEFORE
4175                    // firing the call hook so that `debug.getinfo(level)` and
4176                    // `arg_error` from inside the hook see this native as the
4177                    // currently-running C function (db.lua :344 reads
4178                    // `getinfo(2, "f").func` for the just-entered callee).
4179                    // Popped after the matching return hook fires — even on
4180                    // error, the pop must happen, so the body is bracketed
4181                    // through a scope guard.
4182                    self.running_natives.push(nc);
4183                    self.running_native_slots.push((func_slot, nargs));
4184                    // PUC luaD_precall fires the "call" hook for C functions too.
4185                    // A yield inside the native (coroutine.yield) propagates an
4186                    // Err and the matching "return" hook fires on resume instead.
4187                    if let Err(e) = self.hook_call(true, nargs) {
4188                        self.running_natives.pop();
4189                        self.running_native_slots.pop();
4190                        return Err(e);
4191                    }
4192                    // P09: trap a Rust panic in the native and surface it as
4193                    // a Lua error rather than letting it unwind through the
4194                    // VM into the embedder. The VM's internal state may still
4195                    // be inconsistent after a panic (half-pushed args,
4196                    // dangling GC references), so embedders that catch this
4197                    // class of error should drop and re-create the Vm — but
4198                    // it's still better than tearing the host process down.
4199                    // `AssertUnwindSafe` is sound because the caller is the
4200                    // dispatch loop and any half-done state is fenced behind
4201                    // the immediate Err return below.
4202                    use std::panic::{AssertUnwindSafe, catch_unwind};
4203                    let result =
4204                        match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4205                            Ok(r) => r,
4206                            Err(payload) => {
4207                                let msg = panic_payload_str(&payload);
4208                                let s = Value::Str(
4209                                    self.heap.intern(format!("native panic: {msg}").as_bytes()),
4210                                );
4211                                Err(LuaError(s))
4212                            }
4213                        };
4214                    let nret = match result {
4215                        Ok(n) => n,
4216                        Err(e) => {
4217                            // Stash the offending native's name BEFORE the
4218                            // pop so a dying coroutine's traceback snapshot
4219                            // can prepend `[C]: in function '<name>'`. Use
4220                            // pushglobalfuncname (PUC walks package.loaded
4221                            // to qualify); fall back to "?".
4222                            self.errored_native =
4223                                Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4224                            self.running_natives.pop();
4225                            self.running_native_slots.pop();
4226                            return Err(e);
4227                        }
4228                    };
4229                    // PUC `luaD_poscall` fires the return hook BEFORE moving
4230                    // results into the function's slot — at that point args
4231                    // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4232                    // results above them at `[func_slot + 1 + nargs, …)`.
4233                    // luna's `nat_return` has already written the results
4234                    // into `[func_slot, func_slot + nret)`, so we replay PUC's
4235                    // layout by copying the results up past the preserved
4236                    // args, firing the hook (with ftransfer = nargs + 1, so
4237                    // `getlocal(2, ftransfer..)` reads results), and then
4238                    // copying back for `finish_results`. db.lua :541 reads
4239                    // `getinfo("r").ftransfer` + `getlocal` to inspect a
4240                    // returning native's results this way.
4241                    if self.hook.ret
4242                        && !self.in_hook
4243                        && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4244                    {
4245                        let res_dst = func_slot + nargs + 1;
4246                        let need = (res_dst + nret) as usize;
4247                        if self.stack.len() < need {
4248                            self.stack.resize(need, Value::Nil);
4249                        }
4250                        for i in (0..nret).rev() {
4251                            self.stack[(res_dst + i) as usize] =
4252                                self.stack[(func_slot + i) as usize];
4253                        }
4254                        // widen the C-frame's argument window for getlocal
4255                        if let Some(slot) = self.running_native_slots.last_mut() {
4256                            slot.1 = nargs + nret;
4257                        }
4258                        let hr = self.hook_return(true, nargs + 1, nret);
4259                        if let Some(slot) = self.running_native_slots.last_mut() {
4260                            slot.1 = nargs;
4261                        }
4262                        // restore results into the slot finish_results expects
4263                        for i in 0..nret {
4264                            self.stack[(func_slot + i) as usize] =
4265                                self.stack[(res_dst + i) as usize];
4266                        }
4267                        self.running_natives.pop();
4268                        self.running_native_slots.pop();
4269                        hr?;
4270                    } else {
4271                        self.running_natives.pop();
4272                        self.running_native_slots.pop();
4273                    }
4274                    self.finish_results(func_slot, nret, nresults);
4275                    // the native may have allocated; collect with the results as
4276                    // the live boundary (PUC checks GC after a call returns).
4277                    self.maybe_collect_garbage(self.top);
4278                    return Ok(false);
4279                }
4280                v => {
4281                    let mm = self.get_mm(v, Mm::Call);
4282                    if mm.is_nil() {
4283                        return Err(self.call_err(v));
4284                    }
4285                    chain += 1;
4286                    // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4287                    // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4288                    // and the 5.5 test exercises the new tight bound directly
4289                    // (calls.lua :225 builds a 16-deep chain and expects the
4290                    // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4291                    // deep chain and expects it to succeed.
4292                    let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4293                        15
4294                    } else {
4295                        MAX_CCMT
4296                    };
4297                    if chain > cap {
4298                        return Err(self.rt_err("'__call' chain too long"));
4299                    }
4300                    // slots above shift by one; at a call site those are dead
4301                    // temps of the current frame
4302                    self.stack.insert(func_slot as usize, mm);
4303                    if self.top > func_slot {
4304                        self.top += 1;
4305                    }
4306                    nargs += 1;
4307                }
4308            }
4309        }
4310    }
4311
4312    fn push_frame(
4313        &mut self,
4314        cl: Gc<LuaClosure>,
4315        func_slot: u32,
4316        nargs: u32,
4317        nresults: i32,
4318        from_c: bool,
4319    ) -> Result<(), LuaError> {
4320        if func_slot + 256 > MAX_LUA_STACK {
4321            // PUC `stackerror`: a stack overflow that surfaces while the
4322            // current activation is inside an xpcall message handler is
4323            // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4324            // error handling". errors.lua :606 expects the inner pcall(loop)
4325            // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4326            // message matching "error handling".
4327            let msg = if self.msgh_depth > 0 {
4328                "error in error handling"
4329            } else {
4330                "stack overflow"
4331            };
4332            return Err(self.rt_err(msg));
4333        }
4334        let proto = cl.proto;
4335        let nparams = proto.num_params as u32;
4336        // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4337        // stack just below the new `base`, so a named vararg can be indexed
4338        // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4339        // `[e1..em][p1..pn]` so the fixed params land at the new base.
4340        let n_varargs = if proto.is_vararg {
4341            nargs.saturating_sub(nparams)
4342        } else {
4343            0
4344        };
4345        if n_varargs > 0 {
4346            let s = (func_slot + 1) as usize;
4347            self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4348        }
4349        let base = func_slot + 1 + n_varargs;
4350        let need = (base + proto.max_stack as u32) as usize;
4351        if self.stack.len() < need {
4352            self.stack.resize(need, Value::Nil);
4353        }
4354        // wipe the register window beyond the kept parameters (stale values —
4355        // required for GC-safety and codegen). The varargs below `base` survive.
4356        let kept = nargs.saturating_sub(n_varargs).min(nparams);
4357        // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4358        // need` since `base + nparams <= base + max_stack = need` and `kept <=
4359        // nparams`. `slice::fill` lowers to a single memset on Copy types.
4360        unsafe {
4361            self.stack
4362                .get_unchecked_mut((base + kept) as usize..need)
4363                .fill(Value::Nil);
4364        }
4365        frames_push_sync(
4366            &mut self.frames,
4367            &mut self.frames_top,
4368            CallFrame::Lua(Frame {
4369                closure: cl,
4370                base,
4371                pc: 0,
4372                func_slot,
4373                nresults,
4374                hook_oldpc: u32::MAX,
4375                from_c,
4376                n_varargs,
4377                // single-shot consume: `close_slots` sets pending_tm before each
4378                // handler call; the next Lua frame born is that handler's.
4379                tm: self.pending_tm.take(),
4380                // `run_hook` sets `pending_is_hook` before dispatching the user
4381                // hook so its frame reports `namewhat = "hook"` via getinfo.
4382                is_hook: std::mem::take(&mut self.pending_is_hook),
4383                tailcalls: std::mem::take(&mut self.pending_tailcalls),
4384            }),
4385        );
4386        // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4387        // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4388        // the slot at `base + nparams`; the extras sit just below `base` from
4389        // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4390        // hook; vararg.lua's contradictory expectations were already going to
4391        // fail either way (some asserts want `arg == nil`).
4392        if proto.has_compat_vararg_arg {
4393            let arg_slot = (base + nparams) as usize;
4394            let t = self.heap.new_table();
4395            {
4396                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4397                let tm = unsafe { t.as_mut() };
4398                for i in 0..n_varargs {
4399                    let v = self.stack[(base - n_varargs + i) as usize];
4400                    // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4401                    // below `MAX_ASIZE`
4402                    let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4403                }
4404                let nk = Value::Str(self.heap.intern(b"n"));
4405                tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4406                    .expect("'n' key");
4407            }
4408            // once-per-table barrier mirrors SETLIST: t is born BLACK during
4409            // Propagate and the bulk `set_int`/`set` calls above don't barrier
4410            self.heap
4411                .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4412            self.stack[arg_slot] = Value::Table(t);
4413        }
4414        // PUC luaD_precall fires the "call" hook with the new frame current, so
4415        // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4416        // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4417        // fixed params count — extras already live below `base`).
4418        // A frame born via OP_TailCall fires "tail call" instead (PUC
4419        // luaD_pretailcall) and skips the matching "return" hook on exit.
4420        let is_tail = self
4421            .frames
4422            .last()
4423            .and_then(|f| f.lua())
4424            .is_some_and(|f| f.tailcalls > 0);
4425        self.hook_call_with(false, nparams, is_tail)?;
4426        Ok(())
4427    }
4428
4429    /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4430    /// the protected call `f` through the interpreter loop. The protected
4431    /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4432    /// at `func_slot+1` lets its results land one slot above the continuation —
4433    /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4434    /// Always returns `Ok(true)`: a continuation is now on the stack to be
4435    /// resolved by the loop (even when `f` is a native that already ran inline).
4436    fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4437        if nargs == 0 {
4438            return Err(crate::vm::builtins::raise_str(
4439                self,
4440                "bad argument #1 to 'pcall' (value expected)",
4441            ));
4442        }
4443        if self.pcall_depth >= MAX_C_DEPTH {
4444            return Err(self.rt_err("C stack overflow"));
4445        }
4446        self.pcall_depth += 1;
4447        frames_push_sync(
4448            &mut self.frames,
4449            &mut self.frames_top,
4450            CallFrame::Cont(NativeCont {
4451                kind: ContKind::Pcall,
4452                func_slot,
4453                nresults,
4454            }),
4455        );
4456        // call f (slot func_slot+1) with the remaining args, asking for all
4457        // results; a yield or error inside propagates with the continuation kept
4458        // on the stack (caught by `unwind` / preserved across a yield).
4459        self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4460        Ok(true)
4461    }
4462
4463    /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4464    /// message handler is stashed in the continuation and the arguments are
4465    /// shifted down over the handler's slot so `f`'s args are contiguous.
4466    fn begin_xpcall(
4467        &mut self,
4468        func_slot: u32,
4469        nargs: u32,
4470        nresults: i32,
4471    ) -> Result<bool, LuaError> {
4472        if nargs < 2 {
4473            return Err(crate::vm::builtins::raise_str(
4474                self,
4475                "bad argument #2 to 'xpcall' (value expected)",
4476            ));
4477        }
4478        if self.pcall_depth >= MAX_C_DEPTH {
4479            return Err(self.rt_err("C stack overflow"));
4480        }
4481        self.pcall_depth += 1;
4482        // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4483        // close its gap so f's args become [f@+1, a1@+2, ...].
4484        let handler = self.stack[(func_slot + 2) as usize];
4485        let nfargs = nargs - 2;
4486        for i in 0..nfargs {
4487            self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4488        }
4489        self.top = func_slot + 2 + nfargs;
4490        frames_push_sync(
4491            &mut self.frames,
4492            &mut self.frames_top,
4493            CallFrame::Cont(NativeCont {
4494                kind: ContKind::Xpcall { handler },
4495                func_slot,
4496                nresults,
4497            }),
4498        );
4499        self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4500        Ok(true)
4501    }
4502
4503    /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4504    /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4505    /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4506    /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4507    /// results land exactly where `pairs`'s results belong.
4508    fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4509        let arg = self.stack[(func_slot + 1) as usize];
4510        let mm = self.get_mm(arg, Mm::Pairs);
4511        // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4512        self.stack[func_slot as usize] = mm;
4513        self.top = func_slot + 2;
4514        frames_push_sync(
4515            &mut self.frames,
4516            &mut self.frames_top,
4517            CallFrame::Cont(NativeCont {
4518                kind: ContKind::Pairs,
4519                func_slot,
4520                nresults,
4521            }),
4522        );
4523        self.begin_call(func_slot, Some(1), 4, true)?;
4524        Ok(true)
4525    }
4526
4527    /// The running (top) Lua frame. The interpreter only reads this while a Lua
4528    /// frame is on top — a continuation frame is never the running frame (it is
4529    /// consumed the instant the call it protects unwinds onto it).
4530    #[inline]
4531    fn top_frame(&self) -> &Frame {
4532        self.frames
4533            .last()
4534            .and_then(CallFrame::lua)
4535            .expect("running Lua frame")
4536    }
4537
4538    #[inline]
4539    fn top_frame_mut(&mut self) -> &mut Frame {
4540        self.frames
4541            .last_mut()
4542            .and_then(CallFrame::lua_mut)
4543            .expect("running Lua frame")
4544    }
4545
4546    /// Pad/announce results sitting at func_slot.
4547    pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4548        if wanted < 0 {
4549            self.top = func_slot + nret;
4550        } else {
4551            let wanted = wanted as u32;
4552            let need = (func_slot + wanted) as usize;
4553            if self.stack.len() < need {
4554                self.stack.resize(need, Value::Nil);
4555            }
4556            for i in nret..wanted {
4557                self.stack[(func_slot + i) as usize] = Value::Nil;
4558            }
4559            self.top = func_slot + wanted;
4560        }
4561    }
4562
4563    /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4564    /// Used by `EvalFuture` on the bootstrap poll to compute the
4565    /// `entry_depth` it will pass to subsequent resume slices.
4566    pub(crate) fn frame_count(&self) -> usize {
4567        self.frames.len()
4568    }
4569
4570    fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4571        let nret = self.top - func_slot;
4572        let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4573        self.stack.truncate(func_slot as usize);
4574        self.top = func_slot;
4575        out
4576    }
4577
4578    // ---- open upvalues ----
4579
4580    #[doc(hidden)]
4581    pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4582        match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4583            Ok(i) => self.open_upvals[i].1,
4584            Err(i) => {
4585                let uv = self.heap.new_upvalue(UpvalState::Open {
4586                    slot,
4587                    thread: self.current,
4588                });
4589                self.open_upvals.insert(i, (slot, uv));
4590                uv
4591            }
4592        }
4593    }
4594
4595    pub(crate) fn close_from(&mut self, slot: u32) {
4596        while let Some(&(s, uv)) = self.open_upvals.last() {
4597            if s < slot {
4598                break;
4599            }
4600            let v = self.stack[s as usize];
4601            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4602            unsafe { uv.as_mut() }.set_closed(v);
4603            self.heap
4604                .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4605            self.open_upvals.pop();
4606        }
4607    }
4608
4609    /// Register a to-be-closed slot (TBC op / generic-for closing value).
4610    fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4611        let v = self.stack[slot as usize];
4612        if matches!(v, Value::Nil | Value::Bool(false)) {
4613            return Ok(()); // nil and false are silently ignored
4614        }
4615        if self.get_mm(v, Mm::Close).is_nil() {
4616            // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4617            // (a <type> value)"; the local's name comes from the running
4618            // frame's locvars at this pc.
4619            let tn = v.type_name();
4620            let f = self.top_frame();
4621            let reg = slot - f.base;
4622            let pc = (f.pc as usize).saturating_sub(1);
4623            let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4624                Some(n) => format!("variable '{n}'"),
4625                None => "to-be-closed slot".to_string(),
4626            };
4627            return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4628        }
4629        debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4630        self.tbc.push(slot);
4631        Ok(())
4632    }
4633
4634    /// Close upvalues and run `__close` handlers for slots ≥ `from`
4635    /// (handlers in reverse registration order; PUC luaF_close).
4636    fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4637        self.close_from(from);
4638        // PUC: handlers run in reverse declaration order; an error raised by a
4639        // handler becomes the error object passed to the remaining ones, and
4640        // the rest are still closed. The last raised error propagates.
4641        let mut pending = err;
4642        let mut result = Ok(());
4643        let saved_err = self.closing_err;
4644        // On a normal close the handler runs within the closing function's
4645        // activation (debug parent = that function); during error unwinding the
4646        // function's frame is already gone, so the handler sits at the C
4647        // boundary instead (PUC: luaF_close runs after the ci is restored).
4648        let error_close = err.is_some();
4649        while let Some(&s) = self.tbc.last() {
4650            if s < from {
4651                break;
4652            }
4653            self.tbc.pop();
4654            let v = self.stack[s as usize];
4655            if matches!(v, Value::Nil | Value::Bool(false)) {
4656                continue;
4657            }
4658            let mm = self.get_mm(v, Mm::Close);
4659            if mm.is_nil() {
4660                // PUC `prepclosingmethod`: the __close metamethod was present
4661                // at OP_TBC (else we would have errored there) but has since
4662                // been removed/replaced. Treat as a non-callable target.
4663                let tn = self.obj_typename(v);
4664                let e = self.rt_err(&format!(
4665                    "attempt to call a {tn} value (metamethod 'close')"
4666                ));
4667                pending = Some(e.0);
4668                result = Err(e);
4669                continue;
4670            }
4671            // root the pending error: a handler may trigger a collection
4672            self.closing_err = pending;
4673            // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4674            // getinfo report the handler as "in metamethod 'close'". Saved/
4675            // restored around the call to cover the path where `mm` is a
4676            // native (`push_frame` never consumes it) or it raises before
4677            // reaching push_frame.
4678            let saved_tm = self.pending_tm.replace("close");
4679            // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4680            // is nil on a normal close (5.4 locals.lua :875's
4681            // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4682            // through the yield). PUC 5.5 dropped the trailing nil: a clean
4683            // close passes only `obj`, the error case still passes both
4684            // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4685            // normal-close arms, n=2 for the error arm).
4686            let call = match pending {
4687                Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4688                None => {
4689                    if self.version >= LuaVersion::Lua55 {
4690                        self.call_value_impl(mm, &[v], error_close)
4691                    } else {
4692                        self.call_value_impl(mm, &[v, Value::Nil], error_close)
4693                    }
4694                }
4695            };
4696            self.pending_tm = saved_tm;
4697            if let Err(e) = call {
4698                pending = Some(e.0);
4699                result = Err(e);
4700            }
4701        }
4702        self.closing_err = saved_err;
4703        result
4704    }
4705
4706    /// Yieldable variant of `close_slots`: drive the chain of `__close`
4707    /// handlers for slots ≥ `from` through the interpreter loop with a
4708    /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4709    /// suspends cleanly (the close iteration's state rides on the thread's
4710    /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4711    /// applied to `luaF_close`. `after` runs when every slot is closed; if
4712    /// `after` is `Return` and we've returned past `entry_depth`,
4713    /// `Ok(Some(vals))` carries the result up to the host caller.
4714    fn begin_close(
4715        &mut self,
4716        from: u32,
4717        err: Option<Value>,
4718        after: AfterClose,
4719        entry_depth: usize,
4720    ) -> Result<Option<Vec<Value>>, LuaError> {
4721        self.close_from(from);
4722        self.drive_close(from, err, after, entry_depth)
4723    }
4724
4725    /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4726    /// non-callable-mm error for an `__close` that was reset to a bad value
4727    /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4728    /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4729    /// the interpreter then drives the handler and re-enters this driver via
4730    /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4731    /// the threaded error (if any) propagates or `after` fires.
4732    fn drive_close(
4733        &mut self,
4734        from: u32,
4735        mut pending: Option<Value>,
4736        after: AfterClose,
4737        entry_depth: usize,
4738    ) -> Result<Option<Vec<Value>>, LuaError> {
4739        loop {
4740            let drained = match self.tbc.last() {
4741                None => true,
4742                Some(&s) => s < from,
4743            };
4744            if drained {
4745                return self.finish_close_after(after, pending, entry_depth);
4746            }
4747            let s = self.tbc.pop().expect("tbc non-empty");
4748            let v = self.stack[s as usize];
4749            if matches!(v, Value::Nil | Value::Bool(false)) {
4750                continue;
4751            }
4752            let mm = self.get_mm(v, Mm::Close);
4753            if mm.is_nil() {
4754                let tn = self.obj_typename(v);
4755                let e = self.rt_err(&format!(
4756                    "attempt to call a {tn} value (metamethod 'close')"
4757                ));
4758                pending = Some(e.0);
4759                continue;
4760            }
4761            // A real handler: stage [mm, v, (err?)] above the current top,
4762            // record the close iteration state in a Cont::Close, and let the
4763            // interpreter dispatch the handler. On return the run() head
4764            // re-enters this driver via the Cont::Close consumer.
4765            let func_slot = self.top;
4766            let error_close = pending.is_some();
4767            let need = (func_slot + 3) as usize;
4768            if self.stack.len() < need {
4769                self.stack.resize(need, Value::Nil);
4770            }
4771            self.stack[func_slot as usize] = mm;
4772            self.stack[func_slot as usize + 1] = v;
4773            // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4774            // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4775            let nargs = match pending {
4776                Some(e) => {
4777                    self.stack[func_slot as usize + 2] = e;
4778                    2u32
4779                }
4780                None => {
4781                    if self.version >= LuaVersion::Lua55 {
4782                        1u32
4783                    } else {
4784                        self.stack[func_slot as usize + 2] = Value::Nil;
4785                        2u32
4786                    }
4787                }
4788            };
4789            self.top = func_slot + 1 + nargs;
4790            // Root the pending error during the call (a handler may collect).
4791            let saved_err = self.closing_err;
4792            self.closing_err = pending;
4793            // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4794            // for traceback / getinfo.
4795            let saved_tm = self.pending_tm.replace("close");
4796            frames_push_sync(
4797                &mut self.frames,
4798                &mut self.frames_top,
4799                CallFrame::Cont(NativeCont {
4800                    kind: ContKind::Close(CloseCont {
4801                        from,
4802                        pending,
4803                        after,
4804                    }),
4805                    func_slot,
4806                    nresults: 0,
4807                }),
4808            );
4809            // PUC luaF_close runs a normal close *within* the closing
4810            // function's activation (debug parent = that function); during an
4811            // error unwind the function's frame is already gone and the
4812            // handler sits at the C boundary instead.
4813            let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4814            self.pending_tm = saved_tm;
4815            self.closing_err = saved_err;
4816            r?;
4817            return Ok(None);
4818        }
4819    }
4820
4821    /// Fire `after` once every `__close` handler has run. `Block` propagates
4822    /// any remaining error or simply continues; `Return` performs OP_Return's
4823    /// tail (hook + frame pop + result delivery) and may surface results to
4824    /// the host when the function whose return triggered the close was the
4825    /// entry activation, but only on a clean drain — a pending error skips
4826    /// the return tail and propagates instead. `ResumeUnwind` pops the
4827    /// deferred Lua frame and re-raises, letting a handler's own error win
4828    /// over the original propagating one (PUC luaF_close).
4829    fn finish_close_after(
4830        &mut self,
4831        after: AfterClose,
4832        pending: Option<Value>,
4833        entry_depth: usize,
4834    ) -> Result<Option<Vec<Value>>, LuaError> {
4835        match after {
4836            AfterClose::Block => match pending {
4837                Some(e) => Err(LuaError(e)),
4838                None => Ok(None),
4839            },
4840            AfterClose::Return {
4841                abs_a,
4842                nret,
4843                from_native,
4844            } => match pending {
4845                Some(e) => Err(LuaError(e)),
4846                None => self.complete_return(abs_a, nret, from_native, entry_depth),
4847            },
4848            AfterClose::ResumeUnwind { func_slot, err } => {
4849                // The aborting Lua frame was popped before `begin_close`;
4850                // restore the catcher's stack window down to `func_slot` and
4851                // re-raise — preferring a handler-raised error over the
4852                // original (PUC luaF_close).
4853                self.stack.truncate(func_slot as usize);
4854                self.top = func_slot;
4855                self.tbc.retain(|&s| s < func_slot);
4856                Err(LuaError(pending.unwrap_or(err)))
4857            }
4858        }
4859    }
4860
4861    /// OP_Return's post-close tail: fire the "return" hook (frame still
4862    /// current), pop the Lua frame, slide results into `func_slot`, then
4863    /// either hand them to the host (`Ok(Some(vals))` when we've returned
4864    /// past `entry_depth`), leave them contiguous for an exposed
4865    /// pcall/xpcall continuation, or finish into the caller's expected
4866    /// result slot. Mirrors the synchronous OP_Return tail so both paths
4867    /// share semantics — the `from_native` flag selects the right "return"
4868    /// hook context for `hook_return`.
4869    fn complete_return(
4870        &mut self,
4871        abs_a: u32,
4872        nret: u32,
4873        from_native: bool,
4874        entry_depth: usize,
4875    ) -> Result<Option<Vec<Value>>, LuaError> {
4876        // ftransfer is the local index (1-based) of the first result, as
4877        // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4878        // exposes locals starting at `frame.base` (= func_slot + 1 +
4879        // n_varargs for a vararg call), so the conversion is the absolute
4880        // result slot minus base, plus one to make it 1-based. db.lua 5.4
4881        // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4882        // shape end-to-end.
4883        let ftransfer = self
4884            .frames
4885            .last()
4886            .and_then(CallFrame::lua)
4887            .map(|fr| {
4888                let raw = abs_a.saturating_sub(fr.base) + 1;
4889                // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4890                // local injected at index `numparams + 1`, so getlocal
4891                // numbering shifts results past it (5.5 db.lua :539
4892                // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4893                if fr.closure.proto.has_vararg_table_pseudo {
4894                    raw + 1
4895                } else {
4896                    raw
4897                }
4898            })
4899            .unwrap_or(1);
4900        // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4901        // per tail call that collapsed into this activation, *after* its
4902        // own "return". `tailcalls` tracks that count exactly (PUC
4903        // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4904        // "return" hook fires once even when the activation absorbed
4905        // multiple tail calls — only `istailcall` on getinfo surfaces the
4906        // collapse. 5.1 db.lua :366 pins the event ordering.
4907        let tailcalls = if self.version <= LuaVersion::Lua51 {
4908            self.frames
4909                .last()
4910                .and_then(|f| f.lua())
4911                .map(|f| f.tailcalls)
4912                .unwrap_or(0)
4913        } else {
4914            0
4915        };
4916        self.hook_return(from_native, ftransfer, nret)?;
4917        for _ in 0..tailcalls {
4918            self.hook_tail_return()?;
4919        }
4920        let CallFrame::Lua(fr) =
4921            frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4922        else {
4923            unreachable!("returning from a non-Lua frame")
4924        };
4925        for i in 0..nret {
4926            self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4927        }
4928        if self.frames.len() < entry_depth {
4929            self.top = fr.func_slot + nret;
4930            return Ok(Some(self.take_results(fr.func_slot)));
4931        } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4932            self.top = fr.func_slot + nret;
4933        } else {
4934            self.finish_results(fr.func_slot, nret, fr.nresults);
4935        }
4936        Ok(None)
4937    }
4938
4939    #[doc(hidden)]
4940    pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4941        match cl.upvals()[idx as usize].state() {
4942            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4943            UpvalState::Closed(v) => v,
4944        }
4945    }
4946
4947    fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4948        let uv = cl.upvals()[idx as usize];
4949        match uv.state() {
4950            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4951            UpvalState::Closed(_) => {
4952                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4953                unsafe { uv.as_mut() }.set_closed(v);
4954                // forward barrier: a closed upvalue is single-slot, so the
4955                // forward variant is cheaper than barrier_back (PUC uses
4956                // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4957                // tables / threads).
4958                self.heap
4959                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4960            }
4961        }
4962    }
4963
4964    // ---- register / error helpers ----
4965
4966    #[inline(always)]
4967    fn r(&self, base: u32, i: u32) -> Value {
4968        // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4969        // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4970        // and every bytecode-generated reference falls within `[0, max_stack)`.
4971        // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4972        // reason. The bounds check would re-validate this invariant on every
4973        // op — the dispatch hot path can't afford it.
4974        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4975        unsafe { *self.stack.get_unchecked((base + i) as usize) }
4976    }
4977
4978    #[inline(always)]
4979    fn set_r(&mut self, base: u32, i: u32, v: Value) {
4980        // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
4981        // frame-entry contract.
4982        unsafe {
4983            *self.stack.get_unchecked_mut((base + i) as usize) = v;
4984        }
4985    }
4986
4987    #[doc(hidden)]
4988    pub fn rt_err(&mut self, msg: &str) -> LuaError {
4989        let text = match self.position_prefix() {
4990            Some(p) => format!("{p}{msg}"),
4991            None => msg.to_string(),
4992        };
4993        LuaError(Value::Str(self.heap.intern(text.as_bytes())))
4994    }
4995
4996    pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
4997        let extra = self.subject_varinfo(v);
4998        let tn = self.obj_typename(v);
4999        self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5000    }
5001
5002    /// Name the offending operand of the current instruction (PUC varinfo) for
5003    /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5004    /// to the instruction's subject register(s); a native-raised error whose
5005    /// current instruction doesn't hold `bad` simply yields "".
5006    fn subject_varinfo(&self, bad: Value) -> String {
5007        use crate::vm::isa::Op;
5008        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5009            return String::new();
5010        };
5011        let proto = f.closure.proto;
5012        let p: &crate::runtime::Proto = &proto;
5013        let pc = f.pc as usize;
5014        if pc == 0 || pc > p.code.len() {
5015            return String::new();
5016        }
5017        let instr = p.code[pc - 1];
5018        let mut cands: Vec<u32> = Vec::new();
5019        match instr.op() {
5020            // indexed reads / length / method: the table/object is in B
5021            Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5022                cands.push(instr.b());
5023            }
5024            // indexed writes / calls: the table/function is in A
5025            Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5026                cands.push(instr.a());
5027            }
5028            // arithmetic/bitwise: a register operand (B, and C unless constant)
5029            Op::Add
5030            | Op::Sub
5031            | Op::Mul
5032            | Op::Div
5033            | Op::Mod
5034            | Op::Pow
5035            | Op::IDiv
5036            | Op::BAnd
5037            | Op::BOr
5038            | Op::BXor
5039            | Op::Shl
5040            | Op::Shr => {
5041                cands.push(instr.b());
5042                if !instr.k() {
5043                    cands.push(instr.c());
5044                }
5045            }
5046            Op::Unm | Op::BNot => cands.push(instr.b()),
5047            Op::Concat => {
5048                let a = instr.a();
5049                for r in a..a + instr.b() {
5050                    cands.push(r);
5051                }
5052            }
5053            _ => {}
5054        }
5055        for reg in cands {
5056            if self.r(f.base, reg).raw_eq(bad) {
5057                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5058                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5059                    None => String::new(),
5060                };
5061            }
5062        }
5063        String::new()
5064    }
5065
5066    /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5067    /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5068    /// 'add')" when the call is a metamethod dispatched by the current opcode.
5069    fn call_err(&mut self, v: Value) -> LuaError {
5070        let extra = self.call_target_varinfo(v);
5071        let tn = self.obj_typename(v);
5072        self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5073    }
5074
5075    /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5076    /// frame before the call, so the opcode that triggered it lives in the
5077    /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5078    /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5079    fn call_target_varinfo(&self, bad: Value) -> String {
5080        use crate::vm::isa::Op;
5081        let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5082            return String::new();
5083        };
5084        let proto = f.closure.proto;
5085        let p: &crate::runtime::Proto = &proto;
5086        let pc = f.pc as usize;
5087        if pc == 0 || pc > p.code.len() {
5088            return String::new();
5089        }
5090        let instr = p.code[pc - 1];
5091        match instr.op() {
5092            Op::Call | Op::TailCall => {
5093                let reg = instr.a();
5094                if self.r(f.base, reg).raw_eq(bad) {
5095                    match crate::vm::objname::getobjname(p, pc - 1, reg) {
5096                        Some((kind, name)) => format!(" ({kind} '{name}')"),
5097                        None => String::new(),
5098                    }
5099                } else {
5100                    String::new()
5101                }
5102            }
5103            op => match mm_event_name(op) {
5104                Some(ev) => format!(" (metamethod '{ev}')"),
5105                None => String::new(),
5106            },
5107        }
5108    }
5109
5110    /// "number has no integer representation", enriched (PUC luaG_tointerror)
5111    /// with a "(field 'x')"-style suffix naming the offending operand of the
5112    /// current arithmetic instruction when it can be recovered from bytecode.
5113    fn no_int_rep_err(&mut self) -> LuaError {
5114        let extra = self.bad_operand_varinfo();
5115        self.rt_err(&format!("number{extra} has no integer representation"))
5116    }
5117
5118    /// Inspect the current frame's faulting instruction: find the register
5119    /// operand holding a float with no integer representation and name it.
5120    fn bad_operand_varinfo(&self) -> String {
5121        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5122            return String::new();
5123        };
5124        let proto = f.closure.proto;
5125        let p: &crate::runtime::Proto = &proto;
5126        let pc = f.pc as usize;
5127        if pc == 0 || pc > p.code.len() {
5128            return String::new();
5129        }
5130        let instr = p.code[pc - 1];
5131        let mut regs = vec![instr.b()];
5132        if !instr.k() {
5133            regs.push(instr.c());
5134        }
5135        for reg in regs {
5136            let v = self.r(f.base, reg);
5137            if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5138                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5139                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5140                    None => String::new(),
5141                };
5142            }
5143        }
5144        String::new()
5145    }
5146
5147    /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5148    /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5149    /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5150    /// native call), PUC pushes no prefix — match that by looking only at the
5151    /// topmost frame directly and bailing if it is anything but a Lua frame.
5152    pub(crate) fn position_prefix(&self) -> Option<String> {
5153        let f = self.frames.last().and_then(CallFrame::lua)?;
5154        let proto = f.closure.proto;
5155        if proto.source.as_bytes().is_empty() {
5156            return Some(self.stripped_prefix());
5157        }
5158        if proto.lines.is_empty() {
5159            return None;
5160        }
5161        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5162        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5163        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5164        let display = crate::vm::lib_debug::chunk_id(raw);
5165        let src = String::from_utf8_lossy(&display).into_owned();
5166        Some(format!("{src}:{line}: "))
5167    }
5168
5169    /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5170    /// for the source and renders the line as "?" (so the prefix reads
5171    /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5172    /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5173    /// matches `^%?:%-1:`).
5174    fn stripped_prefix(&self) -> String {
5175        if self.version >= crate::version::LuaVersion::Lua55 {
5176            "?:?: ".to_string()
5177        } else {
5178            "?:-1: ".to_string()
5179        }
5180    }
5181
5182    /// Position prefix of the Lua frame `level` steps up from the running C
5183    /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5184    /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5185    /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5186    /// caller's frame is reported even across pcall/xpcall continuations.
5187    pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5188        let fi = match self.dbg_frame(level)? {
5189            DbgKind::Lua(fi) => fi,
5190            DbgKind::C(_) | DbgKind::Tail(_) => return None,
5191        };
5192        let f = self.frames[fi].lua()?;
5193        let proto = f.closure.proto;
5194        // PUC luaG_addinfo: a stripped chunk has no source — see
5195        // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5196        if proto.source.as_bytes().is_empty() {
5197            return Some(self.stripped_prefix());
5198        }
5199        // a stripped chunk carries no per-instruction line info
5200        if proto.lines.is_empty() {
5201            return None;
5202        }
5203        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5204        // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5205        // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5206        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5207        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5208        let display = crate::vm::lib_debug::chunk_id(raw);
5209        let src = String::from_utf8_lossy(&display).into_owned();
5210        Some(format!("{src}:{line}: "))
5211    }
5212
5213    // ---- the interpreter ----
5214
5215    fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5216        let entry_depth = self.frames.len();
5217        self.exec_with(entry_depth)
5218    }
5219
5220    /// Run from the current top frame down to (but not past) `entry_depth`
5221    /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5222    /// runs to completion or a yield.
5223    /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5224    /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5225    /// `EvalFuture::poll` on every poll after the first to walk the
5226    /// existing call frames until the next `BudgetExhausted` or
5227    /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5228    /// embedder reaches it through `Vm::eval_async`.
5229    pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5230        self.exec_with(entry_depth)
5231    }
5232
5233    fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5234        loop {
5235            let r = self.run(entry_depth);
5236            if r.is_err()
5237                && (self.yielding.is_some()
5238                    || self.terminating.is_some()
5239                    || self.host_yield_pending
5240                    || self.pending_async_native_fut.is_some())
5241            {
5242                // a `coroutine.yield` is in flight: keep the frames intact (they
5243                // are the suspended coroutine's saved state) and propagate to
5244                // resume. A self-close termination propagates the same way, so a
5245                // protecting pcall on the way out cannot catch (unwind) it.
5246                // v1.1 B10 — `host_yield_pending` is the async-mode
5247                // analogue: the sentinel must reach `drive_one` without
5248                // a protecting `pcall` swallowing it.
5249                return r;
5250            }
5251            match r {
5252                Ok(vals) => return Ok(vals),
5253                // unwind toward `entry_depth`. A protecting pcall/xpcall
5254                // continuation caught along the way turns the error into
5255                // `false, msg` and the loop resumes running its caller; an
5256                // uncaught error propagates out.
5257                Err(e) => match self.unwind(e.0, entry_depth) {
5258                    Unwound::Caught => continue,
5259                    Unwound::CaughtReturn(vals) => return Ok(vals),
5260                    Unwound::Propagated(err) => return Err(err),
5261                },
5262            }
5263        }
5264    }
5265
5266    /// Unwind the call stack from the error point toward `entry_depth`, running
5267    /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5268    /// continuation frame at/above `entry_depth` (the error is *caught*: its
5269    /// slot receives `false, msg`); if none is reached, the error propagates.
5270    fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5271        // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5272        // error object is nil — so `pcall(function() error(nil) end)` returns
5273        // that string instead of nil, and `assert(nil, nil)` (whose path
5274        // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5275        // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5276        // `doit("error()") == nil` and luna would fail that if it always
5277        // substituted. luna's native `error()` still does its own conversion
5278        // for direct callers.
5279        if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5280            err = Value::Str(self.heap.intern(b"<no error object>"));
5281        }
5282        // The protected call runs in-place among the caller frames' registers,
5283        // so truncating the failed frames here cuts into caller windows below
5284        // the catcher. Snapshot the live length: at the error point the stack
5285        // already spans every surviving frame's window, so restoring it after a
5286        // catch reinstates them all (the reclaimed slots above are dead temps).
5287        // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5288        // we instead clamp the restore to the catcher's caller window when the
5289        // error point was at the stack limit (cause: the next `call_value_impl`
5290        // picks `func_slot = stack.len()` which would otherwise re-overflow).
5291        let saved_len = self.stack.len();
5292        // Snapshot the traceback at the error point — before any frame is
5293        // popped — so an `xpcall` msgh (which runs after the failed frames are
5294        // gone) can still describe the error site. The handler frame about to
5295        // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5296        // visible here; once popped, `debug.traceback` would miss it.
5297        // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5298        // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5299        // recovery — locals.lua:659) re-overflows. Capture-once propagates
5300        // through nested unwinds (inner→outer) without re-running msgh.
5301        if self.error_traceback.is_none() {
5302            self.error_traceback = Some(self.traceback_bytes(1));
5303        }
5304        while self.frames.len() >= entry_depth {
5305            match *self.frames.last().expect("frame") {
5306                // a yieldable-metamethod continuation does not catch: discard the
5307                // abandoned instruction and keep unwinding (PUC drops the partial
5308                // op on error).
5309                CallFrame::Cont(NativeCont {
5310                    kind: ContKind::Meta(mc),
5311                    func_slot,
5312                    ..
5313                }) => {
5314                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5315                    self.stack.truncate(func_slot as usize);
5316                    self.top = mc.saved_top.min(func_slot);
5317                    self.tbc.retain(|&s| s < func_slot);
5318                }
5319                // a __pairs continuation does not catch either: an error inside
5320                // the metamethod propagates past `pairs`.
5321                CallFrame::Cont(NativeCont {
5322                    kind: ContKind::Pairs,
5323                    func_slot,
5324                    ..
5325                }) => {
5326                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5327                    self.stack.truncate(func_slot as usize);
5328                    self.top = func_slot;
5329                    self.tbc.retain(|&s| s < func_slot);
5330                }
5331                // a __close continuation does not catch: drop the half-run
5332                // handler's window, then continue the close yieldably with
5333                // the new error threaded as `pending`. Preserve `cc.after`
5334                // verbatim — `Return`/`Block` originating from an aborting
5335                // OP_Return/OP_Close will be short-circuited by
5336                // `finish_close_after` (pending propagates as Err); a
5337                // `ResumeUnwind` originated by our own Lua-frame handler
5338                // must keep its deferred frame-pop semantics so that frame
5339                // is not orphaned. If a fresh handler yields, `drive_close`
5340                // pushes another `Cont::Close` and we return `Caught` so
5341                // `exec_with` re-enters the run loop.
5342                CallFrame::Cont(NativeCont {
5343                    kind: ContKind::Close(cc),
5344                    func_slot,
5345                    ..
5346                }) => {
5347                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5348                    self.stack.truncate(func_slot as usize);
5349                    self.top = func_slot;
5350                    self.tbc.retain(|&s| s < func_slot);
5351                    match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5352                        Ok(Some(_)) => {
5353                            unreachable!(
5354                                "Block / Return / ResumeUnwind never return host values mid-unwind"
5355                            )
5356                        }
5357                        Ok(None) => return Unwound::Caught,
5358                        Err(e) => {
5359                            err = e.0;
5360                            continue;
5361                        }
5362                    }
5363                }
5364                CallFrame::Cont(nc) => {
5365                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5366                    self.pcall_depth -= 1;
5367                    let result = match nc.kind {
5368                        ContKind::Pcall => err,
5369                        ContKind::Xpcall { handler } => {
5370                            // PUC keeps `L->errfunc` set across the handler's
5371                            // call: `luaG_errormsg` re-fires the handler when
5372                            // it raises (so `xpcall(error, err, 170)` lets the
5373                            // chain bottom out at err(0) → "END"). luna mirrors
5374                            // that by looping until the handler returns or
5375                            // luna's `iters` cap forces termination.
5376                            //
5377                            // The cap models PUC's nCcalls soft window
5378                            // (MAXCCALLS/10*11): once tripped, `stackerror`
5379                            // raises "C stack overflow" via `luaG_runerror`
5380                            // which itself re-enters `luaG_errormsg`, so the
5381                            // handler runs once more with that string and
5382                            // naturally returns it (errors.lua :637 at N=300).
5383                            // We count iterations per Cont::Xpcall rather than
5384                            // a global counter — nested xpcalls each get their
5385                            // own budget, matching the way PUC's stack frames
5386                            // accumulate per dispatch path.
5387                            const MSGH_CAP: u32 = MAX_C_DEPTH;
5388                            let mut cur_err = err;
5389                            let mut iters: u32 = 0;
5390                            let mut capped = false;
5391                            loop {
5392                                if iters >= MSGH_CAP && !capped {
5393                                    cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5394                                    capped = true;
5395                                }
5396                                iters += 1;
5397                                self.msgh_depth += 1;
5398                                let r = self.call_value(handler, &[cur_err]);
5399                                self.msgh_depth -= 1;
5400                                match r {
5401                                    Ok(hr) => {
5402                                        break hr.first().copied().unwrap_or(Value::Nil);
5403                                    }
5404                                    Err(_) if capped => {
5405                                        // the handler still errored on the
5406                                        // synthesized "C stack overflow"; fall
5407                                        // back to PUC's LUA_ERRERR string.
5408                                        break Value::Str(
5409                                            self.heap.intern(b"error in error handling"),
5410                                        );
5411                                    }
5412                                    Err(e) => {
5413                                        cur_err = e.0;
5414                                    }
5415                                }
5416                            }
5417                        }
5418                        ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5419                            unreachable!("Meta/Pairs/Close cont handled above")
5420                        }
5421                    };
5422                    // the error has been caught (pcall/xpcall): the captured
5423                    // traceback was for that error and is no longer in flight.
5424                    self.error_traceback = None;
5425                    let fs = nc.func_slot as usize;
5426                    if self.stack.len() < fs + 2 {
5427                        self.stack.resize(fs + 2, Value::Nil);
5428                    }
5429                    self.stack[fs] = Value::Bool(false);
5430                    self.stack[fs + 1] = result;
5431                    self.top = nc.func_slot + 2;
5432                    self.tbc.retain(|&s| s < nc.func_slot);
5433                    if self.frames.len() < entry_depth {
5434                        return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5435                    }
5436                    self.finish_results(nc.func_slot, 2, nc.nresults);
5437                    // reinstate the caller windows the unwind truncated into,
5438                    // clamped to the catcher's caller window + a `MIN_STACK`
5439                    // reserve. The clamp is a no-op for normal pcall catches
5440                    // (saved_len lies within the caller's max_stack window),
5441                    // and prevents the stack from staying near `MAX_LUA_STACK`
5442                    // after an overflow-recovery catch — which would make the
5443                    // next `call_value_impl` (e.g. a `__close` in the catcher's
5444                    // errorh, locals.lua:659) pick `func_slot = stack.len()`
5445                    // above the limit and re-overflow.
5446                    // Restore the caller's full register window: opcodes
5447                    // index it directly. The cap covers caller's base +
5448                    // `max_stack` + a small reserve. We always resize to
5449                    // exactly this window — previously this clamped
5450                    // `saved_len` from above to prevent staying near
5451                    // `MAX_LUA_STACK` after an overflow-recovery catch, and
5452                    // a yieldable-unwind re-entry adds the dual case where
5453                    // `saved_len` is *below* the window (a prior
5454                    // `ResumeUnwind` truncated). Using the window directly
5455                    // covers both.
5456                    let restore = self
5457                        .frames
5458                        .iter()
5459                        .rev()
5460                        .find_map(CallFrame::lua)
5461                        .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5462                        .unwrap_or(saved_len);
5463                    if self.stack.len() < restore {
5464                        self.stack.resize(restore, Value::Nil);
5465                    } else if self.stack.len() > restore {
5466                        self.stack.truncate(restore);
5467                    }
5468                    return Unwound::Caught;
5469                }
5470                CallFrame::Lua(f) => {
5471                    // Yieldable error-unwind close, PUC luaG_errormsg shape:
5472                    // (1) pop the Lua frame immediately so each `__close`
5473                    // handler runs at the C boundary above — `debug.getinfo`
5474                    // sees the next outer Lua frame's call site (typically
5475                    // `pcall`), not this aborting function (locals.lua:480).
5476                    // (2) drive the close yieldably with
5477                    // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5478                    // it truncates to `func_slot` and re-raises (letting a
5479                    // handler-raised error win over `err`). If a handler
5480                    // yields, `drive_close` pushes `Cont::Close` and we
5481                    // return `Caught` so `exec_with` re-enters the run loop;
5482                    // a synchronous drain returns Err exactly as the old
5483                    // path did.
5484                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5485                    let after = AfterClose::ResumeUnwind {
5486                        func_slot: f.func_slot,
5487                        err,
5488                    };
5489                    match self.begin_close(f.base, Some(err), after, entry_depth) {
5490                        Ok(Some(_)) => {
5491                            unreachable!("ResumeUnwind never returns host values")
5492                        }
5493                        Ok(None) => return Unwound::Caught,
5494                        Err(e) => {
5495                            err = e.0;
5496                            continue;
5497                        }
5498                    }
5499                }
5500            }
5501        }
5502        Unwound::Propagated(LuaError(err))
5503    }
5504
5505    fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5506        loop {
5507            // Fast-path slow-check gate: most embedders run with both
5508            // `instr_budget` and `mem_cap` as None, so a single combined
5509            // is_some test lets the hot loop skip both branches with one
5510            // load + branch instead of two.
5511            if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5512                if let Some(b) = self.instr_budget.as_mut() {
5513                    *b -= 1;
5514                    if *b <= 0 {
5515                        self.instr_budget = None;
5516                        // v1.1 B10 Stage 1 — async-mode cooperative
5517                        // yield. Set a sentinel flag so `exec_with`
5518                        // propagates the Err without `unwind` running
5519                        // (mirroring the `yielding.is_some()` path),
5520                        // and `call_value_impl` preserves the call
5521                        // frames for the next `poll`. Translation back
5522                        // to `DispatchOutcome::BudgetExhausted` happens
5523                        // in `drive_one`. The Err value itself is
5524                        // `Value::Nil` — a pure sentinel, never seen by
5525                        // user code.
5526                        if self.async_mode {
5527                            self.host_yield_pending = true;
5528                            return Err(LuaError(Value::Nil));
5529                        }
5530                        // B6: classify the trip so embedders can
5531                        // distinguish budget exhaustion from a
5532                        // generic Runtime error and retry / give up
5533                        // accordingly.
5534                        self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5535                        let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5536                        return Err(LuaError(s));
5537                    }
5538                }
5539                if let Some(cap) = self.heap.mem_cap
5540                    && self.heap.bytes() > cap
5541                {
5542                    // First try a full collect — embedders set tight caps
5543                    // and the overshoot may be reclaimable (closures kept
5544                    // by short-lived frames, intermediate strings). Only
5545                    // disarm + raise if the cap is still breached after
5546                    // collection. PUC's `LUA_GCEMERGENCY` path matches.
5547                    //
5548                    // v2.2 UAF-B fix: the historical `gc_top = self.top`
5549                    // under-rooted a Lua-level `a[i] = i` loop's `a`
5550                    // table — `a` sits at a slot above the multi-result
5551                    // `self.top`, so cap-fire collect swept `a`'s
5552                    // internal buckets and the next bytecode read them
5553                    // → heap-use-after-free in `Table::try_set_existing`.
5554                    // Use `self.stack.len()` here (full over-root) — the
5555                    // cap-fire path is rare + a memory cap takes priority
5556                    // over weak-table precision (the fire-once semantics
5557                    // means a wrong-collected weak ref is recoverable;
5558                    // a UAF in a table mutation is not).
5559                    self.gc_top = self.stack.len() as u32;
5560                    self.collect_garbage();
5561                    if self.heap.bytes() > cap {
5562                        self.heap.mem_cap = None;
5563                        let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5564                        return Err(LuaError(s));
5565                    }
5566                }
5567            }
5568            // Single combined frame fetch: continuation arm OR Lua arm. Saves
5569            // a second `self.frames.last()` slice access vs the prior split
5570            // form (LLVM doesn't always CSE these across the cont branch).
5571            // A continuation frame on top means the call it protected just
5572            // delivered its results — wrap as `true, results…` and hand to
5573            // the pcall/xpcall caller. The error path is handled by `unwind`;
5574            // this branch is only reached on success/resume completion.
5575            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5576            let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5577            if let &CallFrame::Cont(nc) = frame_peek {
5578                // a yieldable metamethod returned: complete the interrupted
5579                // instruction (PUC luaV_finishOp) and resume the running frame.
5580                if let ContKind::Meta(mc) = nc.kind {
5581                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5582                    let result = if self.top > nc.func_slot {
5583                        self.stack[nc.func_slot as usize]
5584                    } else {
5585                        Value::Nil
5586                    };
5587                    self.stack.truncate(nc.func_slot as usize);
5588                    self.top = mc.saved_top;
5589                    self.finish_meta(mc.action, result)?;
5590                    continue;
5591                }
5592                // a __close handler returned successfully: discard its
5593                // results, restore `top` to the slot the handler was called
5594                // at (the surrounding frame's register window above this slot
5595                // must stay alloc'd — never truncate the underlying stack),
5596                // then continue the close chain (next slot, or fire
5597                // AfterClose). When the close ends an entry activation,
5598                // drive_close hands the results up to exec_with directly.
5599                if let ContKind::Close(cc) = nc.kind {
5600                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5601                    self.top = nc.func_slot;
5602                    if let Some(vals) =
5603                        self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5604                    {
5605                        return Ok(vals);
5606                    }
5607                    continue;
5608                }
5609                // __pairs returned: normalize its results to exactly four
5610                // (iterator, state, control, closing) at pairs's slot, where
5611                // the metamethod was called, and hand them to pairs's caller.
5612                if let ContKind::Pairs = nc.kind {
5613                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5614                    let total = 4u32;
5615                    let need = (nc.func_slot + total) as usize;
5616                    if self.stack.len() < need {
5617                        self.stack.resize(need, Value::Nil);
5618                    }
5619                    for s in self.top..(nc.func_slot + total) {
5620                        self.stack[s as usize] = Value::Nil;
5621                    }
5622                    self.top = nc.func_slot + total;
5623                    if self.frames.len() < entry_depth {
5624                        return Ok(self.take_results(nc.func_slot));
5625                    }
5626                    self.finish_results(nc.func_slot, total, nc.nresults);
5627                    continue;
5628                }
5629                frames_pop_sync(&mut self.frames, &mut self.frames_top);
5630                self.pcall_depth -= 1;
5631                // f's results sit at nc.func_slot+1.. (f was called one slot
5632                // above the continuation), so writing `true` at the slot makes
5633                // `true, results…` already contiguous.
5634                let nret = self.top - (nc.func_slot + 1);
5635                self.stack[nc.func_slot as usize] = Value::Bool(true);
5636                let total = 1 + nret;
5637                self.top = nc.func_slot + total;
5638                if self.frames.len() < entry_depth {
5639                    return Ok(self.take_results(nc.func_slot));
5640                }
5641                self.finish_results(nc.func_slot, total, nc.nresults);
5642                continue;
5643            }
5644            // GC runs only at the allocation safe points below (PUC's
5645            // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5646            // no longer collects, so a stale full-window `gc_top` cannot leak in.
5647            //
5648            // Hot-path frame fetch: the Cont arm above continues the loop,
5649            // so reaching here means `frame_peek` is the Lua frame. Reuse it
5650            // rather than re-fetching `self.frames.last()`.
5651            let f = match frame_peek {
5652                CallFrame::Lua(f) => f,
5653                _ => unreachable!("Cont frame survived the dispatch loop head"),
5654            };
5655            let cl = f.closure;
5656            let base = f.base;
5657            let func_slot = f.func_slot;
5658            let n_varargs = f.n_varargs;
5659            let pc = f.pc;
5660            let oldpc = f.hook_oldpc;
5661
5662            // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5663            // — every branch / call op only sets `pc` to a valid index, and
5664            // function entry initialises pc=0 with a non-empty body. PUC's
5665            // `vmfetch` uses the equivalent unchecked load.
5666            let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5667
5668            // P12-S1.C/D — trace recording append + close detection.
5669            // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5670            // so default dispatch keeps a single not-taken branch.
5671            //
5672            // - At the head PC with a non-empty record, the trace has
5673            //   looped back to its start: mark `closed = true` and
5674            //   take the record (S2 will compile + cache).
5675            // - Otherwise, capture the op. If the record overflows
5676            //   MAX_TRACE_LEN, abort by dropping it.
5677            if self.jit.trace_enabled
5678                && let Some(_rec) = self.jit.active_trace.as_mut()
5679            {
5680                // P12-S4 — depth tracking. The trace head's frame is
5681                // at index `recording_frame_base`; every Op::Call that
5682                // pushes a new frame bumps the live depth, every
5683                // Op::Return that pops one decrements it.
5684                //
5685                // **Three clean-close conditions** (P12-S4-step4a):
5686                // - `at_head`: cur_depth == 0 AND about-to-execute the
5687                //   trace's head_pc on its head_proto (loop closed back
5688                //   to start). Same for loop-triggered and call-triggered
5689                //   traces — step4a unified the gating so call-triggered
5690                //   no longer closes on the first re-entry (that left
5691                //   fib's body at 7 depth=0 ops; step4a lets it inline
5692                //   up to MAX_INLINE_DEPTH levels before any close).
5693                // - `returned_past_head`: trace head's frame is gone
5694                //   (callee returned past it, or the call-trigger
5695                //   started a recording inside a callee that has now
5696                //   returned). Whatever ops were recorded form the
5697                //   trace body; the lowerer treats the partial trace
5698                //   the same as InlineAbort (dispatchable=false until
5699                //   step4b's frame materialization lands).
5700                // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5701                //   Recording any deeper would just bloat the IR; close
5702                //   with the body we have. Lowerer's existing length
5703                //   gate + InlineAbort path handles short bodies.
5704                let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5705                let cur_depth = if returned_past_head {
5706                    0
5707                } else {
5708                    self.frames.len() - 1 - self.jit.recording_frame_base
5709                };
5710                let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5711                let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5712                let at_head_loop = cur_depth == 0
5713                    && !rec.ops.is_empty()
5714                    && !returned_past_head
5715                    && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5716                    && pc == rec.head_pc;
5717                // P16-A — self-link cycle catch (mirrors LuaJIT's
5718                // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5719                //   1. We're about to execute the head_pc on head_proto
5720                //      at depth > 0 (we're re-entering the trace head
5721                //      from inside an inlined recursion level — UpRec).
5722                //   2. The count of ancestor frames in the recording
5723                //      window that share `head_proto` exceeds
5724                //      [`RECUNROLL_THRESHOLD`] (default 2).
5725                // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5726                // recursion levels are captured, the recorder enters
5727                // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5728                // trips this catch — closing with `SelfRecKind::UpRec`.
5729                // The lowerer's `TraceEnd::SelfLink` tail emits the
5730                // bump-base + branch-to-self loop body.
5731                //
5732                // TailRec vs UpRec: LJ distinguishes via
5733                // `framedepth + retdepth == 0`. luna doesn't track
5734                // retdepth separately; cur_depth == 0 with a non-empty
5735                // call chain in tail position is rare (would require
5736                // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5737                // condition (fib's case); cur_depth == 0 with positive
5738                // ancestor count would route to TailRec, but luna's
5739                // recorder doesn't currently produce that shape because
5740                // tail-call elision pops the caller frame and we'd
5741                // hit `at_head_loop` instead.
5742                let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5743                    if self.jit.p16_self_link_enabled
5744                        && !returned_past_head
5745                        && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5746                        && pc == rec.head_pc
5747                        && cur_depth > 0
5748                    {
5749                        // Count ancestor frames sharing head_proto.
5750                        // self.frames[recording_frame_base..] currently
5751                        // includes the just-pushed frame at the top
5752                        // (the one about to execute head_pc). Ancestors
5753                        // = the slice excluding the top frame.
5754                        let head_proto_ptr = rec.head_proto.as_ptr();
5755                        let last_idx = self.frames.len() - 1;
5756                        let mut count = 0usize;
5757                        for i in self.jit.recording_frame_base..last_idx {
5758                            if let CallFrame::Lua(f) = &self.frames[i]
5759                                && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5760                            {
5761                                count += 1;
5762                            }
5763                        }
5764                        if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5765                            // cur_depth > 0 → UpRec (fib pattern).
5766                            // cur_depth == 0 wouldn't reach this arm.
5767                            Some(crate::jit::trace::SelfRecKind::UpRec)
5768                        } else {
5769                            None
5770                        }
5771                    } else {
5772                        None
5773                    }
5774                };
5775                if let Some(kind) = self_link_trip {
5776                    // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
5777                    // self-recursive patterns at frame depth >= 2.
5778                    //
5779                    // Pre sub-0: a SelfLink trip at the head_pc re-entry
5780                    // unconditionally stamped `self_link_kind`. The
5781                    // R3a `downrec_close` marker can only fire from the
5782                    // depth>0 Op::Return path (`rec.retfs` chain),
5783                    // which never reaches the recorder for fib(28)-like
5784                    // shapes that hit the SelfLink cycle catch BEFORE
5785                    // any base-case Return — leaving `downrec_close`
5786                    // None and routing the trace through R1's safe
5787                    // `dispatchable=false` `"self-link-retf-r1"` path
5788                    // (audit measured `trace_dispatched = 0`).
5789                    //
5790                    // Sub-0 lift: when the SelfLink trip fires AND
5791                    // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
5792                    // gate already requires this — kept explicit as a
5793                    // safety floor), route the close through `downrec_
5794                    // close` INSTEAD of `self_link_kind`. The recorder
5795                    // synthesises the close marker from the most
5796                    // recent Op::Call at depth `cur_depth - 1`:
5797                    //   - `return_pc` = `call.pc + 1` (caller's resume
5798                    //     PC after the recursive call returns; mirror
5799                    //     of R3a's `caller_pc` derivation at the
5800                    //     depth>0 Op::Return capture path below).
5801                    //   - `target_proto` = `call.proto` (caller's
5802                    //     proto; equals `rec.head_proto` for self-
5803                    //     recursion).
5804                    //   - `depth_delta` = `1` (today's recorder always
5805                    //     unrolls one level; R3a uses the same
5806                    //     constant).
5807                    //
5808                    // The lowerer's `end_idx` picker (`trace.rs:3729`)
5809                    // routes through `TraceEnd::DownRec` ahead of the
5810                    // `self_link_kind` arm; the R3b/R3d lowerer arm
5811                    // emits the stitch-sentinel + caller-pc-guard
5812                    // scaffold. Single-candidate guard chain (sub-0's
5813                    // recorder produces 1 caller_pc candidate because
5814                    // `rec.retfs` is empty) keeps `dispatchable=false`
5815                    // + `"downrec-stitch-pending"` label (per R3d's
5816                    // `multi_way_candidate_count >= 2` gate at
5817                    // `trace.rs:7385`). Net behaviour: trace compiles
5818                    // under DownRec routing; interp runs the
5819                    // recursion naturally → result 317811.
5820                    //
5821                    // The `cur_depth >= 2` gate is automatically
5822                    // satisfied by the count > RECUNROLL_THRESHOLD=2
5823                    // trip condition (3 ancestor frames sharing
5824                    // head_proto implies cur_depth >= 3), kept
5825                    // explicit so a future RECUNROLL_THRESHOLD tweak
5826                    // doesn't silently flip shallow-recursion
5827                    // shapes (cur_depth == 1) onto the DownRec arm.
5828                    //
5829                    // R3.3+ sub-1/2/3/4 will replace the depth-baked
5830                    // op_offsets[] addressing with runtime base_var
5831                    // threading so the trace's recorded body is
5832                    // depth-relative and the DownRec dispatch
5833                    // becomes wall-clock-positive. Sub-0 is the
5834                    // routing scaffold; it does not aim for gain.
5835                    let _ = kind;
5836                    let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
5837                        let caller_depth_u8 = (cur_depth - 1) as u8;
5838                        if let Some(call_op) = rec.ops.iter().rev().find(|r| {
5839                            r.inline_depth == caller_depth_u8
5840                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
5841                        }) {
5842                            rec.downrec_close = Some(crate::jit::trace::DownRecClose {
5843                                return_pc: call_op.pc + 1,
5844                                target_proto: call_op.proto,
5845                                depth_delta: 1,
5846                            });
5847                            true
5848                        } else {
5849                            false
5850                        }
5851                    };
5852                    if relaxed_to_downrec {
5853                        // R2 close-cause taxonomy: tag the lift so
5854                        // probes can tally the fire rate. Mirrors
5855                        // R3a's `"downrec-restart"` bump for the
5856                        // depth>0 Op::Return path (different trip
5857                        // origin, same downstream routing). The
5858                        // existing `"self-link-retf-r1"` label still
5859                        // fires for trips that DON'T relax (no
5860                        // candidate Op::Call ancestor in rec.ops, or
5861                        // cur_depth < 2) via the lowerer's
5862                        // dispatch_off_reason mirror at the close
5863                        // handler — kept as a regression safety net.
5864                        self.jit
5865                            .counters
5866                            .bump_close_cause("selflink-yields-to-downrec");
5867                    } else {
5868                        rec.self_link_kind = Some(kind);
5869                    }
5870                }
5871                let should_close =
5872                    at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5873                if should_close {
5874                    // P13-S13-H — long-trace bias: a call-triggered
5875                    // recording that closed with a very short body
5876                    // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5877                    // binary_trees `make(0)`: 4 ops) is pathological.
5878                    // Compiling + caching it pins `Proto.traces` to a
5879                    // trace that the length gate will refuse to
5880                    // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5881                    // = 40`), AND blocks the back-edge / longer-call
5882                    // path from re-recording the same head_pc (the
5883                    // dedup `already_cached` check below short-
5884                    // circuits). The fix: discard the short call-
5885                    // triggered recording WITHOUT caching, and bias
5886                    // the proto's `call_hot_count` back to
5887                    // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5888                    // sequence of calls retries the trigger at a
5889                    // different (hopefully deeper) recursion point.
5890                    //
5891                    // Back-edge triggered traces are exempt — a
5892                    // tight numeric-for loop's body is legitimately
5893                    // 3 ops (`Add`, ForLoop) and DOES dispatch
5894                    // usefully when re-entered many times.
5895                    // P13-S13-H — coverage heuristic to detect
5896                    // pathologically partial call-triggered traces:
5897                    // for self-recursive / branchy protos like
5898                    // `fib` (~17 bytecode ops) or
5899                    // `binary_trees.make` (~26 ops), the recorder
5900                    // can fire at a BASE-case entry (`fib(0)` or
5901                    // `make(0)`) producing a 3–4 op trace that
5902                    // covers a tiny fraction of the proto's code.
5903                    // That trace is doomed by the length gate
5904                    // post-compile AND blocks any longer follow-up
5905                    // (the dedup `already_cached` check below). The
5906                    // fix: discard call-triggered closes where
5907                    // `rec.ops.len() * 2 < head_proto.code.len()`
5908                    // (less than half the proto's bytecode), so the
5909                    // back-edge / longer call path can take over.
5910                    //
5911                    // Why coverage > raw length:protos with
5912                    // intrinsically short bodies (closure
5913                    // factories: `Closure + Return1` = 2 ops,
5914                    // simple wrappers: `LoadI + Return1` = 2 ops)
5915                    // record 100% coverage even at length 2 — those
5916                    // ARE legitimately short and the closure /
5917                    // sunk-emit lowering paths (S7-A / S9-C) make
5918                    // them worth compiling. The heuristic admits
5919                    // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5920                    // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5921                    // ~26) get discarded.
5922                    //
5923                    // Back-edge triggered traces are unaffected —
5924                    // a tight numeric-for body legitimately covers
5925                    // 3 of ~3 proto ops it can dispatch from
5926                    // (`Add + ForLoop`) and the recorder fires on
5927                    // the back-edge, not call entry.
5928                    //
5929                    // `call_hot_count` is intentionally NOT reset
5930                    // (an earlier draft tried `THRESHOLD - 32` but
5931                    // caused active_trace contention with the
5932                    // outer back-edge trigger — see
5933                    // setlist_b_zero_with_call_c_zero_sunk_emits).
5934                    // We give up on dispatching the pathological
5935                    // shape on the same proto; the back-edge or a
5936                    // longer call path on a deeper recursion point
5937                    // can still record + cache a real trace.
5938                    let proto_code_len = rec.head_proto.code.len();
5939                    let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5940                    // P13-S13-I — per-Proto discard cap. The S13-H
5941                    // relaxed trigger condition (`c >= THRESHOLD &&
5942                    // !already_cached`) means a Proto whose every
5943                    // recording is partial-coverage will re-fire the
5944                    // trigger every call indefinitely (1500+ in
5945                    // `binary_trees`-pattern test). The cap stops
5946                    // discarding after `MAX_DISCARDS_PER_PROTO` —
5947                    // the next close falls through to compile (even
5948                    // if partial), caches the trace, and the
5949                    // `already_cached` short-circuit kills the
5950                    // storm. Dispatch may still be refused
5951                    // post-compile (length gate), but the recorder
5952                    // stops churning.
5953                    const MAX_DISCARDS_PER_PROTO: u32 = 5;
5954                    let prior_discards = rec.head_proto.trace_discard_count.get();
5955                    let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
5956                    // P13-S13-K — flip the `gave_up` flag the
5957                    // moment cap is reached (BEFORE the close-
5958                    // dispatching branch below). The trigger gates
5959                    // short-circuit on this flag, skipping the
5960                    // RefCell + linear `already_cached` scan on
5961                    // every subsequent call to this Proto. Useful
5962                    // for `binary_trees_pattern`-class loads where
5963                    // a single Proto sees ~20k calls post-cap.
5964                    if cap_reached
5965                        && rec.is_call_triggered
5966                        && is_partial_coverage
5967                        && !rec.head_proto.trace_gave_up.get()
5968                    {
5969                        rec.head_proto.trace_gave_up.set(true);
5970                    }
5971                    if rec.is_call_triggered && is_partial_coverage && !cap_reached {
5972                        // Tally as closed (for visibility) but DROP
5973                        // without compile/cache. Use the existing
5974                        // closed-lens accumulator so probes can
5975                        // observe the discarded shape.
5976                        // P13-S13-I — bump discard count BEFORE
5977                        // dropping the recording so the next
5978                        // close sees the updated counter.
5979                        rec.head_proto.trace_discard_count.set(prior_discards + 1);
5980                        self.jit.counters.closed += 1;
5981                        self.jit
5982                            .counters
5983                            .closed_lens
5984                            .push((rec.is_call_triggered, rec.ops.len()));
5985                        // v2.0 Track-R R2 — partial-coverage discard
5986                        // close path. Pre-R2 this site bumped `closed`
5987                        // + `closed_lens` (visibility) but no per-
5988                        // reason label, so probes couldn't separate a
5989                        // real successful close from a discard tally.
5990                        // Tag explicitly to make the recorder-side
5991                        // close-cause taxonomy single-source.
5992                        self.jit
5993                            .counters
5994                            .bump_close_cause("partial-coverage-discard");
5995                        self.jit.active_trace = None;
5996                        // Continue with interp loop — don't
5997                        // fall through to compile path.
5998                        // The op at `pc` hasn't dispatched yet;
5999                        // the outer loop iteration handles it.
6000                    } else {
6001                        rec.closed = true;
6002                        // P12-S2.C — detach the closed record, then try
6003                        // to compile it. Dedup by `head_pc`: a Proto
6004                        // already carrying a CompiledTrace for this PC
6005                        // skips recompile (the hot counter caps
6006                        // re-recording at `u32::MAX / 2` anyway, but
6007                        // explicit dedup keeps `Proto.traces` short
6008                        // for the S3 dispatcher's linear scan).
6009                        //
6010                        // No `Vm::run` change for failure: we just bump
6011                        // the failed counter and drop the record. S3
6012                        // will read `Proto.traces` to decide whether to
6013                        // dispatch — until then, this is bookkeeping.
6014                        let head_pc_val = rec.head_pc;
6015                        let closed_record = self
6016                            .jit
6017                            .active_trace
6018                            .take()
6019                            .expect("active_trace was Some this branch");
6020                        self.jit.counters.closed += 1;
6021                        self.jit
6022                            .counters
6023                            .closed_lens
6024                            .push((closed_record.is_call_triggered, closed_record.ops.len()));
6025                        // P12-S5-B fix: cache the trace on the
6026                        // recorder's *head proto*, not the current
6027                        // closure's proto. For non-recursive
6028                        // call-triggered traces, close fires after
6029                        // `Return1` pops the callee frame — `cl` at
6030                        // that point is the CALLER's closure, while
6031                        // `closed_record.head_proto` is the CALLEE's
6032                        // proto (the one we actually want the trace
6033                        // to be discoverable from on the next call).
6034                        // Self-recursive fib closed via depth-cap
6035                        // mid-recursion so `cl.proto == head_proto`
6036                        // happened to coincide — this fix makes that
6037                        // accidental coincidence intentional.
6038                        let head_proto = closed_record.head_proto;
6039                        let already_cached = head_proto
6040                            .traces
6041                            .borrow()
6042                            .iter()
6043                            .any(|t| t.head_pc == head_pc_val);
6044                        if !already_cached {
6045                            // Internal-loop = true: the trace runs in
6046                            // a native loop until a cmp side-exits, so
6047                            // the dispatcher's per-entry marshal cost
6048                            // amortizes across the whole run of
6049                            // iterations the loop's recorded direction
6050                            // stays valid. The lowerer auto-downgrades
6051                            // to one-shot for cmp-less or Call-truncating
6052                            // traces.
6053                            // P15-A v2-C-A6-5 — side traces MUST NOT
6054                            // internal-loop. The parent's recorded prefix
6055                            // (ops at PCs < side trace's head_pc) defines
6056                            // values for registers the child's body reads
6057                            // without re-writing each iter — e.g. for
6058                            // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6059                            // + R[11]` sets R[12], and the child trace
6060                            // (head_pc=24) re-runs `pc=20 Move R[1] =
6061                            // R[12]` each iter via its outer ForLoop
6062                            // internal-loop, ALWAYS reading the stale
6063                            // entry-time R[12]. The parent's Add never
6064                            // re-runs during child's loop, so R[1] gets
6065                            // pinned to one stale value. Force one-shot
6066                            // for side traces: each parent-exit round-
6067                            // trips through dispatcher → parent's Add
6068                            // runs → side trace runs ONE iter → return.
6069                            let opts = crate::jit::trace::CompileOptions {
6070                                internal_loop: closed_record.side_trace_parent.is_none(),
6071                                pre53: self.version() <= LuaVersion::Lua53,
6072                                aot: false,
6073                            };
6074                            // v1.1 A1 Session A — route through trace_compiler.
6075                            // v2.0 Track J sub-step J-B — split-borrow JitState
6076                            // so the trait method can take `&mut dyn JitStorage`.
6077                            let result = {
6078                                let jit = &mut self.jit;
6079                                let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6080                                jit.trace_compiler
6081                                    .try_compile_trace(storage, &closed_record, opts)
6082                            };
6083                            match result {
6084                                Some(mut ct) => {
6085                                    // P12-S5-A/B/C — tally Sinkable sites
6086                                    // + actually-sunk-emit sites + materialise
6087                                    // emit sites before moving `ct` into
6088                                    // Proto.traces.
6089                                    self.jit.counters.sinkable_seen +=
6090                                        ct.sinkable_sites_seen as u64;
6091                                    self.jit.counters.accum_bufferable_seen +=
6092                                        ct.accum_bufferable_seen as u64;
6093                                    self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6094                                    self.jit.counters.materialize_emit +=
6095                                        ct.materialize_emit_count as u64;
6096                                    self.jit.counters.closure_emit += ct.closure_seen as u64;
6097                                    if ct.is_inline_abort_close {
6098                                        self.jit.counters.inline_abort += 1;
6099                                    }
6100                                    // v2.0 Stage 7 polish 6 fire
6101                                    // experiment — split tally so a
6102                                    // probe can answer the AOT
6103                                    // `accepted_with_per_exit_inline`
6104                                    // gate's question at the JIT
6105                                    // surface too: how many compiled
6106                                    // traces emitted depth>0 cmp
6107                                    // side-exits, and how many of
6108                                    // those survived all the
6109                                    // `dispatchable = false` pins
6110                                    // (`InlineAbort-gate`,
6111                                    // `self-link-retf-r1`,
6112                                    // `downrec-stitch-pending`, etc.).
6113                                    if !ct.per_exit_inline.is_empty() {
6114                                        self.jit.counters.per_exit_inline_compiled += 1;
6115                                        if ct.dispatchable {
6116                                            self.jit.counters.per_exit_inline_dispatchable += 1;
6117                                        }
6118                                    }
6119                                    if let Some(reason) = ct.dispatch_off_reason {
6120                                        self.jit.counters.dispatch_off_reasons.push(reason);
6121                                        // v2.0 Track-R R2 — mirror
6122                                        // the ordered Vec push into
6123                                        // the per-reason HashMap so
6124                                        // probes can answer "how many
6125                                        // of each dispatch_off label
6126                                        // fired" in O(1) without
6127                                        // walking the Vec. Same
6128                                        // bucket as the recorder-side
6129                                        // abort/discard tags above.
6130                                        self.jit.counters.bump_close_cause(reason);
6131                                    }
6132                                    // v2.0 Track-R R3b — count
6133                                    // compiled traces that carry a
6134                                    // down-recursion stitch link.
6135                                    // Bumped here (not at the lowerer
6136                                    // emit site) because the Vm's
6137                                    // JitCounters live on the Vm,
6138                                    // and the lowerer doesn't have a
6139                                    // Vm handle. R3b's regression
6140                                    // pin reads this via
6141                                    // `Vm::trace_downrec_link_compiled_count`.
6142                                    if ct.downrec_link.is_some() {
6143                                        self.jit.counters.downrec_link_compiled += 1;
6144                                    }
6145                                    // v2.0 Track-R R3d — multi-way
6146                                    // guard emit counter. Bumped when
6147                                    // the lowerer's R3d arm collected
6148                                    // >= 2 distinct caller_pc candidates
6149                                    // and lifted `dispatchable=true`.
6150                                    // R3c's single-CMP shape stores
6151                                    // `1` here without bumping; non-
6152                                    // DownRec closes store `0`.
6153                                    if ct.downrec_multi_way_count >= 2 {
6154                                        self.jit.counters.multi_way_guard_emitted += 1;
6155                                    }
6156                                    // P15-A v2-A — side-trace finalisation.
6157                                    // Pin `dispatchable=false` so the
6158                                    // primary lookup `traces.find(|t|
6159                                    // t.head_pc == pc && t.dispatchable)`
6160                                    // never matches this entry — the
6161                                    // side trace is meant to be entered
6162                                    // ONLY through the parent's exit
6163                                    // indirection (v2-B/C IR), not the
6164                                    // back-edge / call-trigger paths.
6165                                    // Then write the entry fn ptr into
6166                                    // the parent's `exit_side_trace_ptrs`
6167                                    // slot so v2-B/C IR can read it.
6168                                    if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6169                                        closed_record.side_trace_parent
6170                                    {
6171                                        ct.dispatchable = false;
6172                                        let entry_ptr = ct.entry as *const () as *const u8;
6173                                        let _side_trace_head_pc = closed_record.head_pc;
6174                                        let parent_traces = parent_proto.traces.borrow();
6175                                        if let Some(parent_ct) = parent_traces
6176                                            .iter()
6177                                            .find(|t| t.head_pc == parent_head_pc)
6178                                        {
6179                                            // P15-A v2-C-A5-C — shape-match
6180                                            // gate. Find the parent's per-exit
6181                                            // tag snapshot at the wired exit
6182                                            // (inline / tag / global) and
6183                                            // check the child's entry_tags
6184                                            // match. If not, leave the cell
6185                                            // null + skip cache populate so
6186                                            // the future v2-C-A2 IR's
6187                                            // `call_indirect` stays inert at
6188                                            // this exit (the child's
6189                                            // shape-specialised IR would
6190                                            // mis-interpret raw bits the
6191                                            // parent writes to reg_state).
6192                                            let inline_n = parent_ct.per_exit_inline.len();
6193                                            let tags_n = parent_ct.per_exit_tags.len();
6194                                            let parent_exit_tags_slice: &[
6195                                            crate::jit::trace::ExitTag
6196                                        ] = if parent_exit_idx < inline_n {
6197                                            &parent_ct.per_exit_inline
6198                                                [parent_exit_idx]
6199                                                .exit_tags
6200                                        } else if parent_exit_idx
6201                                            < inline_n + tags_n
6202                                        {
6203                                            &parent_ct.per_exit_tags
6204                                                [parent_exit_idx - inline_n]
6205                                                .1
6206                                        } else {
6207                                            &parent_ct.exit_tags
6208                                        };
6209                                            let shape_ok =
6210                                                crate::jit::trace::exit_tags_match_entry_tags(
6211                                                    &ct.entry_tags,
6212                                                    parent_exit_tags_slice,
6213                                                    &parent_ct.entry_tags,
6214                                                );
6215                                            if !shape_ok {
6216                                                self.jit.counters.side_trace_shape_mismatch += 1;
6217                                            }
6218                                            // P15-A v2-C-A4 — write the child's
6219                                            // entry fn ptr to BOTH the legacy
6220                                            // v2-A `exit_side_trace_ptrs[idx]`
6221                                            // cell (kept so v2-A's
6222                                            // walk_any_side_ptr_non_null tests
6223                                            // stay green) AND the per-kind cell
6224                                            // whose heap address the parent's
6225                                            // IR baked (v2-C-A2). The IR-baked
6226                                            // cell is what the call_indirect
6227                                            // gate actually reads. Only write
6228                                            // when A5-C shape gate passes.
6229                                            if shape_ok {
6230                                                if let Some(cell) = parent_ct
6231                                                    .exit_side_trace_ptrs
6232                                                    .get(parent_exit_idx)
6233                                                {
6234                                                    cell.set(entry_ptr);
6235                                                }
6236                                                // Compute (kind, local) for the
6237                                                // IR-baked cell. Layout follows
6238                                                // exit_hit_counts: inline first,
6239                                                // then per_exit_tags, then the
6240                                                // global tail slot.
6241                                                let (sent_kind, sent_local) = if parent_exit_idx
6242                                                    < inline_n
6243                                                {
6244                                                    parent_ct.per_exit_inline[parent_exit_idx]
6245                                                        .side_trace_ptr
6246                                                        .set(entry_ptr);
6247                                                    (
6248                                                        crate::jit::trace::SIDE_SENT_KIND_INLINE,
6249                                                        parent_exit_idx as u32,
6250                                                    )
6251                                                } else if parent_exit_idx < inline_n + tags_n {
6252                                                    let local = parent_exit_idx - inline_n;
6253                                                    if let Some(b) =
6254                                                        parent_ct.tags_side_trace_ptrs.get(local)
6255                                                    {
6256                                                        b.set(entry_ptr);
6257                                                    }
6258                                                    (
6259                                                        crate::jit::trace::SIDE_SENT_KIND_TAG,
6260                                                        local as u32,
6261                                                    )
6262                                                } else {
6263                                                    parent_ct.global_side_trace_ptr.set(entry_ptr);
6264                                                    (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6265                                                };
6266                                                self.jit.counters.side_trace_compiled += 1;
6267                                                // P15-A v2-D-A8 — flip the
6268                                                // parent's fast-path hint so
6269                                                // the dispatcher knows to do
6270                                                // the tentative decode + cell
6271                                                // check on subsequent
6272                                                // dispatches. Set once and
6273                                                // stays true (we never unwire
6274                                                // a side trace today).
6275                                                parent_ct.has_any_side_wired.set(true);
6276
6277                                                // P15-A v2-C-A1/A4 — populate
6278                                                // the O(1) lookup cache the
6279                                                // dispatcher consults on
6280                                                // sentinel-bit-set returns.
6281                                                // Key is the encoded sentinel
6282                                                // (same encoding the IR ORs
6283                                                // into bits 56..=62 of the
6284                                                // child's i64 return).
6285                                                let sentinel =
6286                                                    crate::jit::trace::encode_side_sentinel(
6287                                                        sent_kind, sent_local,
6288                                                    );
6289                                                let predicted_idx = if std::ptr::eq(
6290                                                    parent_proto.as_ptr(),
6291                                                    head_proto.as_ptr(),
6292                                                ) {
6293                                                    parent_traces.len() as u32
6294                                                } else {
6295                                                    head_proto.traces.borrow().len() as u32
6296                                                };
6297                                                parent_ct
6298                                                    .side_trace_cache
6299                                                    .borrow_mut()
6300                                                    .insert(sentinel, predicted_idx);
6301                                            }
6302                                        }
6303                                        drop(parent_traces);
6304                                    }
6305                                    head_proto.traces.borrow_mut().push(TArc::new(ct));
6306                                    self.jit.counters.compiled += 1;
6307                                }
6308                                None => {
6309                                    self.jit.counters.compile_failed += 1;
6310                                    self.jit
6311                                        .counters
6312                                        .compile_failed_reasons
6313                                        .push(self.jit.trace_compiler.last_compile_checkpoint());
6314                                }
6315                            }
6316                        }
6317                    } // P13-S13-H — close the long-trace-bias else branch
6318                } else {
6319                    // P12-S4-step1 + step4a — depth-aware push at the
6320                    // current `cur_depth`. The `depth_cap_hit` /
6321                    // `returned_past_head` early-exit is handled by
6322                    // the `should_close` branch above; reaching here
6323                    // means `cur_depth <= MAX_INLINE_DEPTH` and the
6324                    // trace head's frame is still live.
6325                    let depth_u8 = cur_depth as u8;
6326                    if depth_u8 > self.jit.max_depth_seen {
6327                        self.jit.max_depth_seen = depth_u8;
6328                    }
6329                    // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6330                    // return / variable return count). Recorder pushed
6331                    // it with var_count=None before the call dispatched;
6332                    // now that the call has returned and we're about to
6333                    // push the next op, top reflects the actual return
6334                    // count. Snapshot top - (caller.base + call.a).
6335                    if let Some(last) = rec.ops.last_mut()
6336                        && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6337                        && last.inst.c() == 0
6338                        && last.var_count.is_none()
6339                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6340                    {
6341                        let from = f.base + last.inst.a();
6342                        if self.top >= from {
6343                            last.var_count = Some(self.top - from);
6344                        }
6345                    }
6346                    // P12-S9-A/C — for SetList B=0, snapshot the source
6347                    // count = top - A - 1 (mirrors Lua's `n = top - ra
6348                    // - 1` from lvm.c OP_SETLIST). Sources are
6349                    // R[A+1..top), exclusive top. For Call C=0's
6350                    // var_count (the return count = top - A inclusive),
6351                    // see the prior-op fix-up above; here we
6352                    // initialise the current Call op to None and let
6353                    // the fix-up on the next op's push populate it.
6354                    let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6355                        && inst.b() == 0
6356                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6357                    {
6358                        let from = f.base + inst.a();
6359                        if self.top > from {
6360                            Some(self.top - from - 1)
6361                        } else {
6362                            None
6363                        }
6364                    } else {
6365                        None
6366                    };
6367                    let op = crate::jit::trace::RecordedOp {
6368                        proto: cl.proto,
6369                        pc,
6370                        inst,
6371                        inline_depth: depth_u8,
6372                        var_count,
6373                    };
6374                    // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6375                    // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6376                    // Captured as a side-channel `RetfRecord` parallel to
6377                    // `ops` when `p16_self_link_enabled` is on. R3's
6378                    // down-rec stitch consumes these to guard side-trace
6379                    // inlined-frame topology against the recorded shape.
6380                    // Gated on the same flag as the cycle catch so the
6381                    // ship-default path (p16 off) sees zero behavior
6382                    // change. `caller_pc` is the recorded enclosing Call's
6383                    // pc + 1 — interp's resume point after the inlined
6384                    // frame pops.
6385                    if self.jit.p16_self_link_enabled
6386                        && depth_u8 > 0
6387                        && matches!(
6388                            inst.op(),
6389                            crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6390                        )
6391                    {
6392                        let results: u8 = match inst.op() {
6393                            crate::vm::isa::Op::Return0 => 0,
6394                            crate::vm::isa::Op::Return1 => 1,
6395                            _ => 0,
6396                        };
6397                        // Most recent Op::Call recorded at the caller's
6398                        // depth (`depth_u8 - 1`) is the frame this Return
6399                        // is unwinding from. Reverse scan stops at the
6400                        // first match.
6401                        let caller_depth = depth_u8 - 1;
6402                        let caller_call = rec.ops.iter().rev().find(|r| {
6403                            r.inline_depth == caller_depth
6404                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6405                        });
6406                        let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6407                        // v2.0 Track-R R3a — capture the caller's proto
6408                        // for the RetfRecord. LuaJIT `IR_RETF.op1`
6409                        // equivalent. For fib(28) the caller's proto
6410                        // equals the trace head; for future mutual
6411                        // recursion the recorded Op::Call's proto is the
6412                        // right target. Fallback to head_proto when no
6413                        // enclosing Call op was captured (mirrors
6414                        // `caller_pc`'s fallback to the Return's own pc).
6415                        let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6416                        rec.retfs.push(crate::jit::trace::RetfRecord {
6417                            from_depth: depth_u8,
6418                            to_depth: caller_depth,
6419                            results,
6420                            caller_pc,
6421                            proto: caller_proto,
6422                        });
6423                        // v2.0 Track-R R3a — DownRec close trigger:
6424                        // count RetfRecords on this recording whose
6425                        // `proto` matches `caller_proto` (LuaJIT
6426                        // `check_downrec_unroll` chain filter
6427                        // `op1 == ptref`). Threshold mirrors
6428                        // RECUNROLL_THRESHOLD; first trip stamps the
6429                        // `downrec_close` marker, subsequent retfs
6430                        // keep the marker without overwrite. The
6431                        // lowerer's end_idx picker routes through
6432                        // TraceEnd::DownRec when the marker is set;
6433                        // R3a's tail emit still falls through to R1's
6434                        // safe deopt path so fib(28) result stays
6435                        // 317_811. R3b lifts.
6436                        if rec.downrec_close.is_none() {
6437                            let caller_proto_ptr = caller_proto.as_ptr();
6438                            let prior_match_count = rec
6439                                .retfs
6440                                .iter()
6441                                .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6442                                .count();
6443                            // Strictly-greater-than threshold matches
6444                            // LuaJIT `count + J->tailcalled > recunroll`.
6445                            // The newly-pushed retf is already counted.
6446                            if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6447                                rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6448                                    return_pc: caller_pc,
6449                                    target_proto: caller_proto,
6450                                    depth_delta: 1,
6451                                });
6452                                // R2 close-cause taxonomy: tag the
6453                                // restart with `"downrec-restart"`. R3b
6454                                // adds `"downrec-stitch-failed"` when
6455                                // the lifted back-edge falls back to
6456                                // deopt.
6457                                self.jit.counters.bump_close_cause("downrec-restart");
6458                            }
6459                        }
6460                    }
6461                    // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6462                    // FIRST eligible Op::GetField site under env-gate
6463                    // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6464                    //   - R[B] is Value::Table with metatable.is_none()
6465                    //   - K[C] is Value::Str
6466                    //   - The string key actually occupies a hash slot
6467                    //     (so the IC's slot_idx is a real index, not
6468                    //     a probe sentinel).
6469                    // Once captured, subsequent GetFields skip this
6470                    // logic (rec.field_ic_snapshot.is_some() short-
6471                    // circuits). Env-OFF short-circuits on the cached
6472                    // atomic check inside field_ic_enabled().
6473                    if rec.field_ic_snapshot.is_none()
6474                        && matches!(inst.op(), crate::vm::isa::Op::GetField)
6475                        && crate::jit::trace_types::field_ic_enabled()
6476                    {
6477                        let b = inst.b();
6478                        let c_idx = inst.c() as usize;
6479                        let r_b = self.stack[(base + b) as usize];
6480                        if let Value::Table(g) = r_b
6481                            && g.metatable().is_none()
6482                            && c_idx < cl.proto.consts.len()
6483                            && let Value::Str(s) = cl.proto.consts[c_idx]
6484                        {
6485                            let key = Value::Str(s);
6486                            let tbl_ref = &*g;
6487                            if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6488                                && let Some(val) = tbl_ref.node_val_at(slot_idx)
6489                            {
6490                                let op_idx = rec.ops.len() as u32;
6491                                rec.field_ic_snapshot =
6492                                    Some(crate::jit::trace_types::FieldIcSnapshot {
6493                                        op_idx,
6494                                        nodes_len: tbl_ref.nodes_capacity() as u64,
6495                                        slot_idx: slot_idx as u64,
6496                                        key_ptr_bits: s.as_ptr() as u64,
6497                                        cached_val_tag: val.tag_byte(),
6498                                    });
6499                                self.jit.counters.field_ic_snapshot_captured += 1;
6500                            }
6501                        }
6502                    }
6503                    if !rec.push(op) {
6504                        // v2.0 Track-R R2 — recorder overflow
6505                        // (MAX_TRACE_LEN). Pre-R2 this site bumped
6506                        // `aborted` with no reason label, leaving the
6507                        // overflow indistinguishable from any other
6508                        // abort cause that might be added later.
6509                        // Tag it explicitly under the close-cause
6510                        // bucket so probes can tally overflow vs
6511                        // other abort causes in O(1).
6512                        self.jit.active_trace = None;
6513                        self.jit.counters.aborted += 1;
6514                        self.jit.counters.bump_close_cause("trace-overflow");
6515                    }
6516                }
6517            }
6518
6519            // P12-S3 — trace JIT dispatcher.
6520            //
6521            // When the dispatch loop is about to execute the op at
6522            // `pc` and there's a `numeric_only` CompiledTrace cached
6523            // for that `head_pc`, marshal the live regs into an
6524            // i64 buffer, jump into the trace, and resume the
6525            // interpreter at the returned continuation PC.
6526            //
6527            // Skipped (zero overhead) when `trace_jit_enabled` is
6528            // false; the lookup is a borrow + scan over
6529            // `cl.proto.traces`, which is a `Vec` whose size is at
6530            // most one entry per back-edge per Proto in practice.
6531            //
6532            // Marshalling contract — only Int slots survive the
6533            // round-trip cleanly (the reg_state ABI is `*mut i64`
6534            // with no tag info). Any non-Int slot in the affected
6535            // window forces a skip; interp takes over for one op
6536            // and the back-edge brings us back to try again next
6537            // pass (slots that were Nil/Float at one moment can
6538            // settle to Int by the time the next back-edge fires).
6539            //
6540            // A trace that comes back with `vm.jit.pending_err`
6541            // parked is treated as a deopt: clear the err, leave
6542            // the stack as the trace wrote it, and let the
6543            // interpreter run from the same `pc`. The trace itself
6544            // is left cached — a future entry might find no
6545            // metatable in the way and succeed.
6546            // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6547            // of 6 per-field Rc clones. proto.traces is now
6548            // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6549            // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6550            // operations per dispatch × 434k = ~2.2M Rc atomic ops
6551            // (~1-2% gain measured separately).
6552            // v2.0 Track-R R3c — one-shot consume of the
6553            // `suppress_downrec_admit_once` flag. Set by the R3c
6554            // downrec post-invoke arm below when it force-deopts the
6555            // trace (caller-pc guard miss OR cycle-budget exhausted)
6556            // so the NEXT interpreter loop iteration skips the
6557            // downrec admit, lets interp run the op at `head_pc`,
6558            // advances `pc` past `head_pc`, and breaks the otherwise-
6559            // infinite admit loop. Reading + clearing here means a
6560            // single dispatch tick consumes the suppression — the
6561            // following tick re-admits naturally (with the budget
6562            // also reset by the deopt site).
6563            let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6564            self.jit.suppress_downrec_admit_once = false;
6565            if self.jit.trace_enabled
6566                && let Some(ct) = {
6567                    let traces = cl.proto.traces.borrow();
6568                    traces
6569                        .iter()
6570                        .find(|t| {
6571                            if t.head_pc != pc {
6572                                return false;
6573                            }
6574                            let is_downrec = t.downrec_link.is_some();
6575                            // v2.0 Track-R R3c — the one-shot suppress
6576                            // flag blocks any admit (primary or fallback)
6577                            // for `downrec_link`-bearing traces so the
6578                            // next interp iter can run the natural op
6579                            // at `head_pc` and advance past it. R3d's
6580                            // `dispatchable=true` lift means the suppress
6581                            // must also cover the primary `t.dispatchable`
6582                            // arm — otherwise the lifted lookup would
6583                            // immediately re-admit after a force-deopt
6584                            // and the infinite loop returns.
6585                            if is_downrec && downrec_admit_blocked {
6586                                return false;
6587                            }
6588                            // Primary arm: `dispatchable=true` traces
6589                            // (R3d-lifted DownRec or normal traces).
6590                            // Fallback arm: R3c-shape `dispatchable=false`
6591                            // DownRec traces (single-CMP guard kept
6592                            // pinned because the 90% miss-rate would
6593                            // make blind admit perf-negative).
6594                            t.dispatchable || is_downrec
6595                        })
6596                        .cloned()
6597                }
6598            {
6599                // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6600                // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6601                // across the entire dispatch block so the fields outlive
6602                // all consumers. Saves 5 Rc::clone per dispatch.
6603                let entry_fn = ct.entry;
6604                let head_pc_val = ct.head_pc;
6605                let window_size = ct.window_size;
6606                let exit_tags = &ct.exit_tags;
6607                let per_exit_tags = &ct.per_exit_tags;
6608                let per_exit_inline = &ct.per_exit_inline;
6609                let compile_entry_tags = &ct.entry_tags;
6610                let global_tag_res_kind = ct.global_tag_res_kind;
6611                let exit_hit_counts = &ct.exit_hit_counts;
6612                let max_stack = cl.proto.max_stack as usize;
6613                let window_size_us = window_size as usize;
6614                let base_us = base as usize;
6615                // P12-S4-step3a — `reg_state` sized to the trace's
6616                // `window_size`, which today equals max_stack but
6617                // S4-step3b will expand for inlined frames.
6618                // Marshal-in still only writes [0..max_stack); slots
6619                // [max_stack..window_size) are zero-initialised and
6620                // filled by the trace's own GetUpval / arith.
6621                // P13-S13-D — reuse the Vm's amortised buffers
6622                // instead of allocating fresh Vecs each dispatch.
6623                // mem::take leaves an empty placeholder we restore
6624                // at the end of the dispatch block (success +
6625                // deopt paths both fall through to the restore).
6626                let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6627                entry_tags.clear();
6628                entry_tags.reserve(max_stack);
6629                // v2.0 Track-R R3c — this trace was admitted via the
6630                // `downrec_link.is_some()` arm rather than the normal
6631                // `dispatchable=true` arm. The pre-invoke path
6632                // populates a reserved saved-PC slot just past the
6633                // normal register window so R3b's lowerer guard load
6634                // (`reg_state[window_size]`) compares the runtime
6635                // saved caller PC against the recorded `dr_return_pc`.
6636                //
6637                // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6638                // gate. After R3d lifts `dispatchable = true` for
6639                // multi-way guards, the trace's body still emits the
6640                // R3b/R3d sentinel shape on return — the saved-PC slot
6641                // and post-invoke classifier must keep firing.
6642                // `downrec_link.is_some()` is the unique structural
6643                // signal that the trace closes via DownRec.
6644                let is_downrec_entry = ct.downrec_link.is_some();
6645                let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6646                reg_state.clear();
6647                // v2.0 Track-R R3c — when admitting a downrec trace,
6648                // size the buffer to `window_size + 1` so the lowerer
6649                // can `load(I64, ..., reg_state, window_size * 8)`
6650                // for the saved caller PC guard input. The extra slot
6651                // is the LAST element so cranelift's existing
6652                // `0..window_size` accesses are unaffected.
6653                let reg_state_len = if is_downrec_entry {
6654                    window_size_us + 1
6655                } else {
6656                    window_size_us
6657                };
6658                reg_state.resize(reg_state_len, 0i64);
6659                let mut dispatch_ok = true;
6660                for i in 0..max_stack {
6661                    let v = self.stack[base_us + i];
6662                    let (tag, raw) = v.unpack();
6663                    entry_tags.push(tag);
6664                    // P12-S12-C v3 — entry tag guard. The trace's IR
6665                    // is specialised to the compile-time entry tags
6666                    // (via current_kinds propagation from
6667                    // from_entry_tag). A runtime tag mismatch means
6668                    // body ops would mis-interpret raw bits (e.g.
6669                    // treat a Str pointer as Int payload → garbage).
6670                    // Skip dispatch on mismatch so interp handles
6671                    // this entry shape; the trace stays cached for
6672                    // future entries that match.
6673                    if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6674                        dispatch_ok = false;
6675                        break;
6676                    }
6677                    match tag {
6678                        // Int / Float / Table / Nil all marshal
6679                        // to raw payload cleanly; the trace's IR
6680                        // treats the 8-byte slot as an i64 (with
6681                        // f64 ops bitcasting around the boundary).
6682                        crate::runtime::value::raw::INT
6683                        | crate::runtime::value::raw::FLOAT
6684                        | crate::runtime::value::raw::TABLE
6685                        | crate::runtime::value::raw::CLOSURE
6686                        // P12-S12-B-v2 — Native iter slots (e.g.
6687                        // R[A] = ipairs_iter) are present in
6688                        // generic-for traces; the raw bits are a
6689                        // valid `*mut NativeClosure` and round-trip
6690                        // cleanly.
6691                        | crate::runtime::value::raw::NATIVE
6692                        // P12-S12-C v1 — Str slots show up in
6693                        // string-concat traces; raw bits = `*mut
6694                        // LuaStr` (interned, GC-managed). Round-
6695                        // trips cleanly as a heap pointer.
6696                        | crate::runtime::value::raw::STR
6697                        | crate::runtime::value::raw::NIL => {
6698                            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6699                            reg_state[i] = unsafe { raw.zero as i64 };
6700                        }
6701                        _ => {
6702                            dispatch_ok = false;
6703                            break;
6704                        }
6705                    }
6706                }
6707
6708                if dispatch_ok {
6709                    debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6710                    self.jit.pending_err = None;
6711                    // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6712                    // count. A cmp@d>0 side-exit calls the materialize
6713                    // helper which pushes inlined frames onto
6714                    // `vm.frames`; on deopt those frames must be popped
6715                    // before falling through to the interpreter, else
6716                    // the stack grows unboundedly per deopted dispatch.
6717                    let pre_frames = self.frames.len();
6718                    // v2.0 Track-R R3c — saved-PC slot population. The
6719                    // recorded `dr_return_pc` on the closing trace is
6720                    // the caller's resume PC captured at a depth>0
6721                    // Return push (recorder push site, see R3a verdict
6722                    // §3). The natural runtime analogue for self-
6723                    // stitch is the dispatching frame's PARENT frame's
6724                    // PC: the trace's head_pc sits inside a Lua frame,
6725                    // and the parent (caller) frame's `pc` is what
6726                    // luna would observe as `[base-8]` in the LJ
6727                    // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6728                    // the parent isn't a Lua frame (top-level dispatch
6729                    // — first invocation through `call_value`), no
6730                    // saved PC exists; we write 0, which always
6731                    // mismatches the recorded `dr_return_pc != 0`
6732                    // invariant pinned by R3b
6733                    // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6734                    // debug_assert!(dr_return_pc != 0, ...)`).
6735                    if is_downrec_entry {
6736                        let saved_pc: i64 = if pre_frames >= 2 {
6737                            match &self.frames[pre_frames - 2] {
6738                                CallFrame::Lua(parent) => parent.pc as i64,
6739                                CallFrame::Cont(_) => 0,
6740                            }
6741                        } else {
6742                            0
6743                        };
6744                        reg_state[window_size_us] = saved_pc;
6745                    }
6746                    // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6747                    // diagnostic hook. The probe fires once per trace dispatch
6748                    // (regardless of JIT vs AOT origin — both go through this
6749                    // arm), letting the AOT smoke test verify mcode actually
6750                    // executed. Guarded behind `OnceLock` so the env read is
6751                    // a one-time cost per process; not gated on a particular
6752                    // counter so the smoke test gets a deterministic single-
6753                    // line `aot_trace_fired pc=N` per first dispatch.
6754                    if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6755                        eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6756                    }
6757                    let continuation_pc = {
6758                        // v1.1 A1 Session A — chunk_compiler.enter
6759                        // (CraneliftBackend delegates to enter_jit;
6760                        // NullJitBackend returns an inert guard).
6761                        let vm_ptr: *mut Vm = self;
6762                        let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6763                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6764                        unsafe { entry_fn(reg_state.as_mut_ptr()) }
6765                    };
6766                    self.jit.counters.dispatched += 1;
6767
6768                    if self.jit.pending_err.is_some() {
6769                        self.jit.pending_err = None;
6770                        self.jit.counters.deopt += 1;
6771                        // P12-S4-step4b-C-2 — unwind any helper-pushed
6772                        // inlined frames before the interpreter resumes.
6773                        // Don't restore reg_state — the trace's partial
6774                        // writes are discarded; interp re-executes from
6775                        // the original `pc`.
6776                        while self.frames.len() > pre_frames {
6777                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6778                        }
6779                        if is_downrec_entry {
6780                            // v2.0 Track-R R3c — pending_err observed
6781                            // mid-trace inside a downrec admit. Treat
6782                            // it as a guard miss: bump `downrec_deopt`
6783                            // and suppress the next downrec admit so
6784                            // interp can advance past `head_pc` and
6785                            // the same trace doesn't immediately re-
6786                            // fire on the next loop iteration.
6787                            self.jit.counters.downrec_deopt += 1;
6788                            self.jit.suppress_downrec_admit_once = true;
6789                        }
6790                    } else if is_downrec_entry && {
6791                        // v2.0 Track-R R3d — only enter the R3c/R3d
6792                        // downrec classifier for returns whose shape
6793                        // matches the lowerer's `downrec_idx_opt` tail
6794                        // emit: either the stitch_blk DOWNREC sentinel
6795                        // (HIT) or the deopt_blk GLOBAL-sentinel-with-
6796                        // body==head_pc (MISS via guard fail). Any
6797                        // other return from a downrec trace (intermediate
6798                        // body cmp side-exit, GetField inference fail,
6799                        // etc.) carries a different sentinel/body shape
6800                        // and means the body exited BEFORE reaching the
6801                        // downrec close — classify those through the
6802                        // normal decode path (else branch below) so
6803                        // reg_state restores + pc advances correctly.
6804                        // The pre-R3d behavior (R3c) classified them all
6805                        // as MISS and skipped the normal restore, which
6806                        // inflated `downrec_deopt` with non-downrec
6807                        // events and lost the trace's mid-flight writes.
6808                        let raw_ret = continuation_pc as u64;
6809                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6810                        let sentinel_code = if from_side_trace {
6811                            ((raw_ret >> 56) & 0x7F) as u32
6812                        } else {
6813                            0
6814                        };
6815                        let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6816                        let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
6817                            crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
6818                            0,
6819                        );
6820                        from_side_trace
6821                            && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
6822                                || (sentinel_code == global_deopt_code
6823                                    && raw_body == head_pc_val as u64))
6824                    } {
6825                        // R3d downrec event classifier.
6826                        let raw_ret = continuation_pc as u64;
6827                        let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6828                        if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
6829                            // Guard HIT — saved_pc matched one of the
6830                            // baked candidates and the trace's
6831                            // `stitch_blk` arm returned the DOWNREC
6832                            // sentinel. Cycle-safety checkpoint:
6833                            // decrement budget; on underflow,
6834                            // reclassify as deopt + reset budget.
6835                            // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
6836                            // ~all natural HITs in a hot loop fire
6837                            // before reset pressure.
6838                            if self.jit.stitch_depth_remaining > 0 {
6839                                self.jit.stitch_depth_remaining -= 1;
6840                                self.jit.counters.downrec_dispatched += 1;
6841                            } else {
6842                                self.jit.counters.downrec_deopt += 1;
6843                                self.jit.stitch_depth_remaining =
6844                                    crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
6845                            }
6846                        } else {
6847                            // Guard MISS via the lowerer's deopt_blk
6848                            // arm (GLOBAL sentinel + body == head_pc).
6849                            // The deopt_blk emit performs the
6850                            // store-back via `emit_store_back_and_return_pc`,
6851                            // so the live stack already reflects the
6852                            // body's writes; no extra restore needed
6853                            // from the dispatcher side.
6854                            self.jit.counters.downrec_deopt += 1;
6855                        }
6856                        self.jit.suppress_downrec_admit_once = true;
6857                        // Pop helper-pushed inlined frames (defensive —
6858                        // R3d's emit shape doesn't push frames in the
6859                        // tail, but a body side-exit before reaching
6860                        // the tail may have via the materialize helper).
6861                        while self.frames.len() > pre_frames {
6862                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6863                        }
6864                        self.jit.reg_state_buf = reg_state;
6865                        self.jit.entry_tags_buf = entry_tags;
6866                        continue;
6867                    } else {
6868                        // Restore each slot using the trace's
6869                        // exit-tag analysis (see ExitTag docs).
6870                        // P12-S4-step4b-C-2 — decode the IR's
6871                        // side-exit shape. Upper 32 bits = (site_idx
6872                        // + 1) for inline cmp side-exits, 0 for
6873                        // legacy clean-tail / non-inline exits.
6874                        // P15-A v2-C-A0 — decode lives in
6875                        // `crate::jit::trace::decode_exit_shape` so
6876                        // v2-C-A3 can reuse it with the SIDE TRACE's
6877                        // shape inputs when the sentinel bit
6878                        // (v2-C-A2) is set on `raw_ret`.
6879                        let raw_ret = continuation_pc as u64;
6880                        // P15-A v2-C-A3 — side-trace return decode.
6881                        // Bit 63 of `raw_ret` is the side-trace
6882                        // marker the parent's IR OR'd in when it
6883                        // tail-called into a wired child trace.
6884                        // Bits 56..=62 carry the sentinel code (the
6885                        // cache key into the parent's
6886                        // `side_trace_cache`); bits 0..=55 are the
6887                        // child's own return value (encoded site or
6888                        // plain cont_pc) which we MUST decode using
6889                        // the CHILD's per_exit_inline / per_exit_tags
6890                        // / exit_tags / exit_hit_counts — not the
6891                        // parent's. The dispatcher snapshot read
6892                        // above holds the parent's shapes; when bit
6893                        // 63 is set we re-fetch the child's via the
6894                        // sentinel-keyed cache.
6895                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6896                        let (
6897                            decode_inline,
6898                            decode_tags,
6899                            decode_exit_tags,
6900                            decode_hit_counts,
6901                            decode_body,
6902                        ) = if from_side_trace {
6903                            let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6904                            let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6905                            let traces = cl.proto.traces.borrow();
6906                            let child_idx = traces
6907                                .iter()
6908                                .find(|t| t.head_pc == head_pc_val)
6909                                .and_then(|pct| {
6910                                    pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6911                                });
6912                            if let Some(idx) = child_idx
6913                                && let Some(child) = traces.get(idx as usize)
6914                            {
6915                                if crate::jit::trace::v2c_probe_enabled() {
6916                                    eprintln!(
6917                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6918                                        sentinel_code,
6919                                        body,
6920                                        idx,
6921                                        child.n_ops,
6922                                        child.head_pc,
6923                                        child.window_size,
6924                                        pc,
6925                                        window_size,
6926                                        child.dispatchable,
6927                                        child.is_inline_abort_close,
6928                                    );
6929                                }
6930                                (
6931                                    child.per_exit_inline.clone(),
6932                                    child.per_exit_tags.clone(),
6933                                    child.exit_tags.clone(),
6934                                    child.exit_hit_counts.clone(),
6935                                    body,
6936                                )
6937                            } else {
6938                                if crate::jit::trace::v2c_probe_enabled() {
6939                                    eprintln!(
6940                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6941                                        sentinel_code, body,
6942                                    );
6943                                }
6944                                // Cache miss — fall back to parent
6945                                // shapes with the body bits. Best-
6946                                // effort; the trace_side_trace_
6947                                // shape_mismatch_count records this
6948                                // path indirectly (close-handler
6949                                // skips wiring on mismatch so we
6950                                // shouldn't reach here when shape
6951                                // gate held).
6952                                (
6953                                    per_exit_inline.clone(),
6954                                    per_exit_tags.clone(),
6955                                    exit_tags.clone(),
6956                                    exit_hit_counts.clone(),
6957                                    body,
6958                                )
6959                            }
6960                        } else {
6961                            // P15-A v2-D — dispatcher-level side-trace
6962                            // invocation. Replaces v2-C's universal IR
6963                            // gate (`load + icmp + brif` at every
6964                            // emit_store_back callsite, which A6/A7
6965                            // measured as a net perf regression).
6966                            // A8 fast-path: skip the tentative decode +
6967                            // child lookup entirely when `has_any_side
6968                            // _wired == false` (the common case until
6969                            // the first side trace compiles for this
6970                            // parent). For fib_10_x10k and other tight
6971                            // short-trace workloads where most parent
6972                            // traces never get a wired child, this
6973                            // collapses the v2-D overhead to a single
6974                            // `Cell::get()` on the cold path.
6975                            // A8-revert: A8 had `parent_has_side` short-
6976                            // circuit + snapshot hoist; mini N=3 showed
6977                            // A8 lost the btrees_d8 1.02× win (dropped
6978                            // to 0.95×) WITHOUT helping fib_10 (same
6979                            // 0.86×). Drop A8 — accept the always-run
6980                            // v2-D path; the tentative decode + cell
6981                            // load is cheaper than the cost A8 added.
6982                            {
6983                                let tentative = crate::jit::trace::decode_exit_shape(
6984                                    raw_ret,
6985                                    per_exit_inline,
6986                                    per_exit_tags,
6987                                    exit_tags,
6988                                );
6989                                let tentative_exit_idx = tentative.exit_hit_idx;
6990                                let child_invoke = {
6991                                    let traces = cl.proto.traces.borrow();
6992                                    traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
6993                                        |pct| {
6994                                            let cell =
6995                                                pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
6996                                            let fn_ptr = cell.get();
6997                                            if fn_ptr.is_null() {
6998                                                return None;
6999                                            }
7000                                            traces
7001                                                .iter()
7002                                                .find(|t| {
7003                                                    t.entry as *const () as *const u8 == fn_ptr
7004                                                })
7005                                                .map(|child| {
7006                                                    (
7007                                                        child.entry,
7008                                                        child.per_exit_inline.clone(),
7009                                                        child.per_exit_tags.clone(),
7010                                                        child.exit_tags.clone(),
7011                                                        child.exit_hit_counts.clone(),
7012                                                    )
7013                                                })
7014                                        },
7015                                    )
7016                                };
7017                                if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7018                                    let child_raw_ret = {
7019                                        // v1.1 A1 Session A — chunk_compiler.enter
7020                                        // (side-trace entry).
7021                                        let vm_ptr: *mut Vm = self;
7022                                        let _guard =
7023                                            self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7024                                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7025                                        unsafe { cent(reg_state.as_mut_ptr()) }
7026                                    };
7027                                    (cpi, cpt, cet, chc, child_raw_ret as u64)
7028                                } else {
7029                                    (
7030                                        per_exit_inline.clone(),
7031                                        per_exit_tags.clone(),
7032                                        exit_tags.clone(),
7033                                        exit_hit_counts.clone(),
7034                                        raw_ret,
7035                                    )
7036                                }
7037                            }
7038                        };
7039                        let decoded = crate::jit::trace::decode_exit_shape(
7040                            decode_body,
7041                            &decode_inline,
7042                            &decode_tags,
7043                            &decode_exit_tags,
7044                        );
7045                        let site_id = decoded.site_id;
7046                        let cont_pc = decoded.cont_pc;
7047                        let exit_hit_idx = decoded.exit_hit_idx;
7048                        let exit_tags_for_pc = decoded.exit_tags_for_pc;
7049                        // P15-A v2-C-A3 — for side-trace returns
7050                        // force using_global_exit_tags=false so the
7051                        // restore loop always takes the per-tag slow
7052                        // path (the child's global_tag_res_kind
7053                        // classification isn't plumbed through yet
7054                        // — TODO for a future polish step).
7055                        let using_global_exit_tags = if from_side_trace {
7056                            false
7057                        } else {
7058                            decoded.using_global_exit_tags
7059                        };
7060                        // P15-prep — increment the counter (saturate
7061                        // at u32::MAX to avoid wrap on long runs).
7062                        // P15-A v1 — track whether this increment is
7063                        // the one that crossed `HOTEXIT_THRESHOLD`
7064                        // (transition: previous v < threshold, new v
7065                        // == threshold). The side-trace start is
7066                        // deferred to just before `continue;` so
7067                        // vm.stack and frame.pc are fully restored
7068                        // (the snapshot reads post-restore values).
7069                        let mut side_trace_should_start = false;
7070                        // P15-A v2-C-A3 — for side-trace returns the
7071                        // counter to bump is the CHILD's (decoded
7072                        // shape lookup) — `exit_hit_idx` is into the
7073                        // decoded layout, so use the matching
7074                        // `decode_hit_counts`. For parent decode
7075                        // they're aliased (clone of the parent's
7076                        // own Rc).
7077                        if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7078                            let v = c.get();
7079                            if v < u32::MAX {
7080                                c.set(v + 1);
7081                            }
7082                            if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7083                                && self.jit.active_trace.is_none()
7084                                && self.jit.trace_enabled
7085                            {
7086                                side_trace_should_start = true;
7087                            }
7088                        }
7089                        // P12-S4-step4b-C-2 — at an inline cmp@d>0
7090                        // side-exit, the helper has pushed N frames on
7091                        // top of the trace head's frame and
7092                        // `exit_tags_for_pc.len()` covers the full
7093                        // window (caller + each inlined frame's
7094                        // window). Slots beyond `max_stack` belong to
7095                        // an inlined frame: their `Untouched` entries
7096                        // default to Nil (no entry-tag fallback —
7097                        // marshal-in only captured caller slots) and
7098                        // we write to interp stack at `base + i` which
7099                        // mirrors `op_offsets`-derived layout.
7100                        let slot_count = exit_tags_for_pc.len();
7101                        // P12-S4-step4b-C-2 — the helper only extends
7102                        // vm.stack up to the deepest pushed frame's
7103                        // window, but the exit_tags snapshot covers
7104                        // the trace's full `window_size` (which
7105                        // includes depth-N+1 scratch slots that the
7106                        // trace's IR may have written without a
7107                        // matching pushed frame). Extend with Nil so
7108                        // the write at the tail doesn't panic; these
7109                        // slots get overwritten by the writeback loop
7110                        // and won't leak meaningful data past the
7111                        // pushed frames' R[0..max_stack) windows.
7112                        if self.stack.len() < base_us + slot_count {
7113                            self.stack
7114                                .resize(base_us + slot_count, crate::runtime::Value::Nil);
7115                        }
7116                        // P13-S13-E — fast-path restore loop. When
7117                        // we landed on the global `exit_tags`,
7118                        // dispatch on the compile-time
7119                        // classification: skip the loop entirely
7120                        // for `AllUntouched`, do a tag-free
7121                        // `Value::Int(...)` write per slot for
7122                        // `AllInt`, otherwise fall through to the
7123                        // general match-arm loop. site_id > 0
7124                        // (inline frame mat) and per_exit_tags
7125                        // hits always take the general path —
7126                        // their per-side-exit shapes aren't
7127                        // pre-classified yet.
7128                        let fast_path_taken = if using_global_exit_tags {
7129                            match global_tag_res_kind {
7130                                crate::jit::trace::TagResKind::AllUntouched => {
7131                                    // No-op: vm.stack already
7132                                    // matches the trace's post-
7133                                    // entry state for these
7134                                    // slots (entry values not
7135                                    // overridden, or already
7136                                    // spilled by helpers).
7137                                    true
7138                                }
7139                                crate::jit::trace::TagResKind::AllInt => {
7140                                    for i in 0..slot_count {
7141                                        self.stack[base_us + i] =
7142                                            crate::runtime::Value::Int(reg_state[i]);
7143                                    }
7144                                    true
7145                                }
7146                                crate::jit::trace::TagResKind::Mixed => false,
7147                            }
7148                        } else {
7149                            false
7150                        };
7151                        if !fast_path_taken {
7152                            for i in 0..slot_count {
7153                                let tag = match exit_tags_for_pc[i] {
7154                                    crate::jit::trace::ExitTag::Untouched => {
7155                                        if i < max_stack {
7156                                            entry_tags[i]
7157                                        } else {
7158                                            crate::runtime::value::raw::NIL
7159                                        }
7160                                    }
7161                                    crate::jit::trace::ExitTag::Int => {
7162                                        crate::runtime::value::raw::INT
7163                                    }
7164                                    crate::jit::trace::ExitTag::Float => {
7165                                        crate::runtime::value::raw::FLOAT
7166                                    }
7167                                    crate::jit::trace::ExitTag::Table => {
7168                                        crate::runtime::value::raw::TABLE
7169                                    }
7170                                    crate::jit::trace::ExitTag::Closure => {
7171                                        crate::runtime::value::raw::CLOSURE
7172                                    }
7173                                    // P12-S6-A1 — trace actively wrote Nil
7174                                    // to this slot (e.g. via Op::LoadNil).
7175                                    // Restore as Nil regardless of the entry
7176                                    // tag, since the i64 payload is 0 and
7177                                    // packing as the entry tag (e.g. INT)
7178                                    // would mis-type the slot.
7179                                    crate::jit::trace::ExitTag::Nil => {
7180                                        crate::runtime::value::raw::NIL
7181                                    }
7182                                    // P12-S12-C v2 — trace wrote a Str ptr
7183                                    // to this slot (LoadK Str / Move from
7184                                    // Str / Concat result). Restore as
7185                                    // Value::Str with raw bits round-
7186                                    // tripped.
7187                                    crate::jit::trace::ExitTag::Str => {
7188                                        crate::runtime::value::raw::STR
7189                                    }
7190                                };
7191                                // SAFETY: tag is from a verified slot
7192                                // (entry validated above) or pinned by
7193                                // the exit-tag analysis to INT/TABLE.
7194                                // The raw payload sits in reg_state[i].
7195                                // Stack was extended by the materialize
7196                                // helper for inline frames.
7197                                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7198                                self.stack[base_us + i] = unsafe {
7199                                    Value::pack(
7200                                        tag,
7201                                        crate::runtime::value::RawVal {
7202                                            zero: reg_state[i] as u64,
7203                                        },
7204                                    )
7205                                };
7206                            }
7207                        }
7208                        // P12-S4-step4b-C-2 — for non-inline exits the
7209                        // helper was never called (no metas chain for
7210                        // this cont_pc), so `frames.last()` is the
7211                        // trace head's frame and we set its pc to
7212                        // cont_pc as before. For inline exits the
7213                        // helper baked the side-exit PC into the
7214                        // innermost frame's `pc` at push time
7215                        // (chain.last().pc was overridden at emit),
7216                        // so this assignment to `frames.last_mut().pc
7217                        // = cont_pc` is a redundant-but-correct
7218                        // confirmation.
7219                        let _ = &per_exit_inline; // hold the Rc alive across dispatch
7220                        // P12-S4-step4b-C-2 — for inline side-exits the
7221                        // helper has pushed N frames on top. The trace
7222                        // head frame is at `pre_frames - 1`; set its
7223                        // pc to `head_resume_pc` so when the chain
7224                        // eventually pops back to it, interp resumes
7225                        // PAST the trace's depth-0 Op::Call instead of
7226                        // restarting from `head_pc` and re-triggering
7227                        // dispatch (infinite loop). The innermost
7228                        // (helper-pushed) frame already has its pc
7229                        // baked in at compile time, but we still
7230                        // assign `cont_pc` below for parity with the
7231                        // non-inline path (no-op).
7232                        if site_id > 0 {
7233                            let idx = (site_id - 1) as usize;
7234                            let head_resume_pc = decode_inline[idx].head_resume_pc;
7235                            if pre_frames > 0 {
7236                                if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7237                                    f.pc = head_resume_pc;
7238                                }
7239                            }
7240                        }
7241                        let frames_len_now = self.frames.len();
7242                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7243                        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7244                            CallFrame::Lua(fmut) => {
7245                                if crate::jit::trace::v2c_probe_enabled() {
7246                                    eprintln!(
7247                                        "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7248                                        from_side_trace,
7249                                        raw_ret,
7250                                        fmut.pc,
7251                                        cont_pc,
7252                                        site_id,
7253                                        frames_len_now,
7254                                        pre_frames,
7255                                        max_stack,
7256                                    );
7257                                }
7258                                fmut.pc = cont_pc;
7259                            }
7260                            _ => unreachable!("Cont frame at trace dispatch"),
7261                        }
7262                        // P15-A v1 — deferred side-trace start. The
7263                        // increment block above flagged this exit's
7264                        // hit count crossing HOTEXIT_THRESHOLD; now
7265                        // that vm.stack is restored and frame.pc is
7266                        // settled, snapshot entry_tags from the
7267                        // resume frame's window and create the
7268                        // recorder. The recorder's first push fires
7269                        // on the next interp iteration at cont_pc.
7270                        //
7271                        // `head_proto` for the side trace = cl.proto
7272                        // (trace JIT only inlines self-recursive
7273                        // calls today, so cont_pc always lands in
7274                        // the same proto as the parent). Frame base
7275                        // is the resume frame (top of `self.frames`
7276                        // — inline-pushed frames moved this).
7277                        if side_trace_should_start {
7278                            let (resume_base, resume_proto) = match self.frames.last() {
7279                                Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7280                                _ => (base_us, cl.proto),
7281                            };
7282                            let resume_max_stack = resume_proto.max_stack as usize;
7283                            let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7284                            // Extend stack if cont_pc's frame window
7285                            // overhangs the current stack len (rare,
7286                            // but inline-pushed frame stack writes
7287                            // only covered the trace's writeback).
7288                            if self.stack.len() < resume_base + resume_max_stack {
7289                                self.stack.resize(
7290                                    resume_base + resume_max_stack,
7291                                    crate::runtime::Value::Nil,
7292                                );
7293                            }
7294                            for i in 0..resume_max_stack {
7295                                let (tag, _) = self.stack[resume_base + i].unpack();
7296                                side_entry_tags.push(tag);
7297                            }
7298                            self.jit.active_trace =
7299                                Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7300                                    resume_proto,
7301                                    cont_pc,
7302                                    side_entry_tags,
7303                                    cl.proto,
7304                                    head_pc_val,
7305                                    exit_hit_idx,
7306                                )));
7307                            self.jit.recording_frame_base = self.frames.len() - 1;
7308                            self.jit.counters.side_trace_started += 1;
7309                        }
7310                        // P13-S13-D — put the dispatch buffers back
7311                        // before the `continue;` so the next
7312                        // dispatch picks up the same allocation.
7313                        self.jit.reg_state_buf = reg_state;
7314                        self.jit.entry_tags_buf = entry_tags;
7315                        continue;
7316                    }
7317                }
7318                // P13-S13-D — !dispatch_ok / deopt path / non-cont
7319                // exit also restore the buffers before falling
7320                // through to the interp.
7321                self.jit.reg_state_buf = reg_state;
7322                self.jit.entry_tags_buf = entry_tags;
7323            }
7324
7325            // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7326            // hook code that consults `currentpc = savedpc - 1` lands on the
7327            // instruction now executing. luna mirrors that by advancing
7328            // `f.pc` to `pc + 1` before the hook block — local_at /
7329            // getinfo / line attribution all read f.pc, and the existing
7330            // `pc - 1` convention in those helpers then yields the current
7331            // instruction's pc (db.lua :696: local `A` visible at the
7332            // chunk's return line once OP_CLOSURE has advanced pc).
7333            //
7334            // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7335            // (cont frames drained above) so the and_then/Option layers are
7336            // dead weight.
7337            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7338            match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7339                CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7340                _ => unreachable!("Cont frame at pc bump"),
7341            }
7342
7343            // count + line hooks (PUC traceexec): before executing the
7344            // instruction. Skipped while the hook itself runs.
7345            // (Parens here are load-bearing — without them `&&` binds tighter
7346            // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7347            // letting a Lua line hook recurse into itself → stack overflow
7348            // on db.lua line-hook assertions. Matches the `hook_call_with` /
7349            // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7350            if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7351                let lines = &cl.proto.lines;
7352                let cur_line = if lines.is_empty() {
7353                    None
7354                } else {
7355                    Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7356                };
7357                // count hook: fire every `count_base` instructions
7358                if self.hook.count {
7359                    self.hook.count_left -= 1;
7360                    if self.hook.count_left <= 0 {
7361                        self.hook.count_left = self.hook.count_base;
7362                        // hooked function is the running Lua frame: its frame
7363                        // is on the stack, so no synthetic C level is needed.
7364                        self.run_hook(b"count", cur_line, false)?;
7365                    }
7366                }
7367                // line hook: fire on a fresh frame, a backward jump (loop), or a
7368                // change of source line.
7369                if self.hook.line {
7370                    if lines.is_empty() {
7371                        // PUC: a stripped chunk has no line info, so
7372                        // `getfuncline` returns -1. The line hook still fires
7373                        // on the first instruction of the new frame (where
7374                        // `npci <= oldpc` holds at oldpc=0), with the line
7375                        // pushed as `nil` instead of an integer (db.lua :1030
7376                        // "hook called without debug info for 1st instruction").
7377                        if oldpc == u32::MAX {
7378                            self.run_hook(b"line", None, false)?;
7379                            self.top_frame_mut().hook_oldpc = pc;
7380                        }
7381                    } else {
7382                        let newline = lines[(pc as usize).min(lines.len() - 1)];
7383                        // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7384                        // on a backward jump (`pc < oldpc` — strict; an equal pc
7385                        // would re-fire the install-site after `oldpc = pc`),
7386                        // or when the source line changes.
7387                        let fire = oldpc == u32::MAX
7388                            || pc < oldpc
7389                            || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7390                        if fire {
7391                            self.run_hook(b"line", Some(newline as i64), false)?;
7392                        }
7393                        self.top_frame_mut().hook_oldpc = pc;
7394                    }
7395                }
7396            }
7397
7398            match inst.op() {
7399                Op::Move => {
7400                    let v = self.r(base, inst.b());
7401                    self.set_r(base, inst.a(), v);
7402                }
7403                Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7404                Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7405                Op::LoadK => {
7406                    let v = cl.proto.consts[inst.bx() as usize];
7407                    self.set_r(base, inst.a(), v);
7408                }
7409                Op::LoadKx => {
7410                    let extra = cl.proto.code[self.pc_of_top() as usize];
7411                    self.bump_pc();
7412                    let v = cl.proto.consts[extra.ax() as usize];
7413                    self.set_r(base, inst.a(), v);
7414                }
7415                Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7416                Op::LFalseSkip => {
7417                    self.set_r(base, inst.a(), Value::Bool(false));
7418                    self.bump_pc();
7419                }
7420                Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7421                Op::LoadNil => {
7422                    let a = inst.a();
7423                    for i in 0..=inst.b() {
7424                        self.set_r(base, a + i, Value::Nil);
7425                    }
7426                }
7427                Op::GetUpval => {
7428                    let v = self.upval_get(cl, inst.b());
7429                    self.set_r(base, inst.a(), v);
7430                }
7431                Op::SetUpval => {
7432                    let v = self.r(base, inst.a());
7433                    self.upval_set(cl, inst.b(), v);
7434                }
7435                Op::GetTabUp => {
7436                    let t = self.upval_get(cl, inst.b());
7437                    let key = cl.proto.consts[inst.c() as usize];
7438                    self.op_index(t, key, base + inst.a())?;
7439                }
7440                Op::GetTable => {
7441                    let t = self.r(base, inst.b());
7442                    let key = self.r(base, inst.c());
7443                    self.op_index(t, key, base + inst.a())?;
7444                }
7445                Op::GetI => {
7446                    let t = self.r(base, inst.b());
7447                    self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7448                }
7449                Op::GetField => {
7450                    let t = self.r(base, inst.b());
7451                    let key = cl.proto.consts[inst.c() as usize];
7452                    // v1.2 D4 A1 — fast path: known-Str const key + no
7453                    // metatable on the table → skip `op_index` /
7454                    // `index_step`'s MAX_TAG_LOOP setup and the outer
7455                    // `Value` match. Falls through to the slow path
7456                    // unchanged when either invariant breaks (so
7457                    // `__index` metamethods, non-Table receivers, and
7458                    // non-Str keys behave exactly as before).
7459                    if let Value::Table(tb) = t
7460                        && tb.metatable().is_none()
7461                        && let Value::Str(s) = key
7462                    {
7463                        let v = tb.get_str(s);
7464                        self.stack[(base + inst.a()) as usize] = v;
7465                    } else {
7466                        self.op_index(t, key, base + inst.a())?;
7467                    }
7468                }
7469                Op::SetTabUp => {
7470                    let t = self.upval_get(cl, inst.a());
7471                    let key = cl.proto.consts[inst.b() as usize];
7472                    let v = self.r(base, inst.c());
7473                    self.op_newindex(t, key, v)?;
7474                }
7475                Op::SetTable => {
7476                    let t = self.r(base, inst.a());
7477                    let key = self.r(base, inst.b());
7478                    let v = self.r(base, inst.c());
7479                    self.op_newindex(t, key, v)?;
7480                }
7481                Op::SetI => {
7482                    let t = self.r(base, inst.a());
7483                    let v = self.r(base, inst.c());
7484                    self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7485                }
7486                Op::SetField => {
7487                    let t = self.r(base, inst.a());
7488                    let key = cl.proto.consts[inst.b() as usize];
7489                    let v = self.r(base, inst.c());
7490                    self.op_newindex(t, key, v)?;
7491                }
7492                Op::NewTable => {
7493                    let t = self.heap.new_table();
7494                    self.set_r(base, inst.a(), Value::Table(t));
7495                    self.maybe_collect_garbage(base + inst.a() + 1);
7496                }
7497                Op::SetList => {
7498                    let a = inst.a();
7499                    let abs_a = base + a;
7500                    let n = if inst.b() == 0 {
7501                        self.top - (abs_a + 1)
7502                    } else {
7503                        inst.b()
7504                    };
7505                    let offset = if inst.k() {
7506                        let extra = cl.proto.code[self.pc_of_top() as usize];
7507                        self.bump_pc();
7508                        extra.ax() as i64
7509                    } else {
7510                        inst.c() as i64
7511                    };
7512                    let Value::Table(t) = self.r(base, a) else {
7513                        unreachable!("SETLIST on non-table");
7514                    };
7515                    for i in 1..=n {
7516                        let v = self.r(base, a + i);
7517                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7518                        if let Err(TableError::Overflow) =
7519                            unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7520                        {
7521                            return Err(self.rt_err("table overflow"));
7522                        }
7523                    }
7524                    // one barrier_back covers every store this op did — PUC's
7525                    // `luaC_barrierback_` once-per-table optimisation
7526                    self.heap
7527                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7528                    // the element temps above the table are now consumed
7529                    self.maybe_collect_garbage(base + a + 1);
7530                }
7531                Op::SelfOp => {
7532                    let o = self.r(base, inst.b());
7533                    self.set_r(base, inst.a() + 1, o);
7534                    // PUC OP_SELF's C is a constant index when the k-flag is
7535                    // set; otherwise it points to a register that holds the
7536                    // (constant-loaded) key. luna's compiler falls back to the
7537                    // register form when the constant index exceeds OP_SELF's
7538                    // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7539                    // a table with 250+ string keys, where "findfield" lands
7540                    // past const #255). The exec must honour the same split.
7541                    let key = if inst.k() {
7542                        cl.proto.consts[inst.c() as usize]
7543                    } else {
7544                        self.r(base, inst.c())
7545                    };
7546                    self.op_index(o, key, base + inst.a())?;
7547                }
7548                Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7549                Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7550                Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7551                Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7552                Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7553                Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7554                Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7555                Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7556                Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7557                Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7558                Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7559                Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7560                Op::Unm => {
7561                    let v = self.r(base, inst.b());
7562                    match coerce_num(v) {
7563                        Some(Num::Int(i)) => {
7564                            self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7565                        }
7566                        Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7567                        None => {
7568                            let mm = self.get_mm(v, Mm::Unm);
7569                            if mm.is_nil() {
7570                                return Err(self.type_err("perform arithmetic on", v));
7571                            }
7572                            let dst = base + inst.a();
7573                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7574                        }
7575                    }
7576                }
7577                Op::BNot => {
7578                    let v = self.r(base, inst.b());
7579                    match coerce_num(v) {
7580                        Some(n) => {
7581                            let i = self.int_from_num(n)?;
7582                            self.set_r(base, inst.a(), Value::Int(!i));
7583                        }
7584                        None => {
7585                            let mm = self.get_mm(v, Mm::BNot);
7586                            if mm.is_nil() {
7587                                return Err(self.type_err("perform bitwise operation on", v));
7588                            }
7589                            let dst = base + inst.a();
7590                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7591                        }
7592                    }
7593                }
7594                Op::Not => {
7595                    let v = self.r(base, inst.b());
7596                    self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7597                }
7598                Op::Len => {
7599                    let v = self.r(base, inst.b());
7600                    match self.len_step(v)? {
7601                        MmOut::Done(r) => self.set_r(base, inst.a(), r),
7602                        MmOut::Mm { func, recv } => {
7603                            let dst = base + inst.a();
7604                            self.begin_meta_call(
7605                                func,
7606                                &[recv, recv],
7607                                MetaAction::Store { dst },
7608                                "len",
7609                            )?;
7610                        }
7611                        MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7612                    }
7613                }
7614                Op::Concat => {
7615                    // right-associative fold over operands at base+a .. base+a+n,
7616                    // in place on the stack so a yielding __concat can suspend.
7617                    let a = inst.a();
7618                    let n = inst.b();
7619                    self.top = base + a + n;
7620                    self.concat_run(base + a)?;
7621                }
7622                Op::Close => {
7623                    // Yieldable: drive __close handlers through the
7624                    // interpreter loop so a coroutine.yield() inside a
7625                    // handler suspends cleanly (locals.lua block-end yield).
7626                    // `drive_close` parks the handler call at `self.top`, so
7627                    // raise `top` past this frame's full register window
7628                    // first — a goto out of a nested for-loop can fire
7629                    // OP_Close while `self.top` still sits at the inner
7630                    // body's working top, which would let `push_frame`'s
7631                    // wipe clobber the outer tbc slot before it could be
7632                    // closed (locals.lua:1219 nested-for goto regression).
7633                    self.top = self.top.max(base + cl.proto.max_stack as u32);
7634                    let _ =
7635                        self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7636                }
7637                Op::Tbc => {
7638                    self.register_tbc(base + inst.a())?;
7639                }
7640                Op::Jmp => {
7641                    let off = inst.sj();
7642                    // P12-S1.B — trace JIT back-edge counter. A negative
7643                    // jump offset is a loop back-edge (the only canonical
7644                    // backward jumps the compiler emits — `while`, `for`,
7645                    // `repeat`). Tick the per-Proto counter and, once it
7646                    // exceeds the threshold, log a stub promotion that
7647                    // S1.C will turn into actual trace recording. The
7648                    // whole block is gated on `trace_jit_enabled` so
7649                    // existing benches see one branch-not-taken and no
7650                    // counter writes.
7651                    if self.jit.trace_enabled && off < 0 {
7652                        let proto = cl.proto;
7653                        let c = proto.trace_hot_count.get();
7654                        if c < u32::MAX / 2 {
7655                            proto.trace_hot_count.set(c + 1);
7656                        }
7657                        // P13-S13-H — relaxed back-edge trigger:
7658                        // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7659                        // a missed crossing (active_trace busy with
7660                        // a call-trigger, or the recorder slot
7661                        // happened to be in use) doesn't permanently
7662                        // lock this back-edge target out. The
7663                        // `already_cached` short-circuit prevents
7664                        // duplicate recordings: once a trace is
7665                        // cached for this target, subsequent
7666                        // crossings skip the start. This pairs with
7667                        // S13-H's discard-on-partial-coverage close
7668                        // handling — when a short call-trigger is
7669                        // discarded, the back-edge can still find an
7670                        // open slot at the next iteration.
7671                        let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7672                        // P13-S13-K — gave-up short-circuit. Skip
7673                        // the RefCell borrow + scan when the
7674                        // S13-I cap force-compiled a partial
7675                        // trace on this Proto.
7676                        let back_edge_already_cached = if proto.trace_gave_up.get() {
7677                            true
7678                        } else {
7679                            proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7680                        };
7681                        if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7682                            && self.jit.active_trace.is_none()
7683                            && !back_edge_already_cached
7684                        {
7685                            // Back-edge target = pc after `add_pc(off)`,
7686                            // i.e. current `pc + 1 + off` (the dispatch
7687                            // loop has already advanced f.pc to pc+1).
7688                            let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7689                            // Snapshot per-slot Value tag at trace
7690                            // entry so the lowerer's kind tracker
7691                            // knows which arith path to lower
7692                            // (iadd vs fadd, etc.).
7693                            let max_stack = cl.proto.max_stack as usize;
7694                            let base_us = base as usize;
7695                            let mut entry_tags = Vec::with_capacity(max_stack);
7696                            for i in 0..max_stack {
7697                                let (tag, _) = self.stack[base_us + i].unpack();
7698                                entry_tags.push(tag);
7699                            }
7700                            self.jit.active_trace =
7701                                Some(Box::new(crate::jit::trace::TraceRecord::start(
7702                                    cl.proto, target, entry_tags, false,
7703                                )));
7704                            // P12-S4 — record the frame the trace
7705                            // started in. `self.frames.len() - 1`
7706                            // since we're inside the currently-running
7707                            // Lua frame's dispatch.
7708                            self.jit.recording_frame_base = self.frames.len() - 1;
7709                        }
7710                    }
7711                    self.add_pc(off);
7712                }
7713                Op::Eq => {
7714                    let l = self.r(base, inst.a());
7715                    let r = self.r(base, inst.b());
7716                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7717                        if (a == b) != inst.k() {
7718                            self.bump_pc();
7719                        }
7720                    } else {
7721                        let step = self.eq_step(l, r);
7722                        self.op_compare(step, l, r, inst.k(), "eq")?;
7723                    }
7724                }
7725                Op::EqK => {
7726                    let l = self.r(base, inst.a());
7727                    let r = cl.proto.consts[inst.b() as usize];
7728                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7729                        if (a == b) != inst.k() {
7730                            self.bump_pc();
7731                        }
7732                    } else {
7733                        let step = self.eq_step(l, r);
7734                        self.op_compare(step, l, r, inst.k(), "eq")?;
7735                    }
7736                }
7737                Op::Lt => {
7738                    let l = self.r(base, inst.a());
7739                    let r = self.r(base, inst.b());
7740                    // hot path: Int < Int — drops the MmOut + op_compare match
7741                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7742                        if (a < b) != inst.k() {
7743                            self.bump_pc();
7744                        }
7745                    } else {
7746                        let step = self.less_step(l, r, false)?;
7747                        self.op_compare(step, l, r, inst.k(), "lt")?;
7748                    }
7749                }
7750                Op::Le => {
7751                    let l = self.r(base, inst.a());
7752                    let r = self.r(base, inst.b());
7753                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7754                        if (a <= b) != inst.k() {
7755                            self.bump_pc();
7756                        }
7757                    } else {
7758                        let step = self.less_step(l, r, true)?;
7759                        self.op_compare(step, l, r, inst.k(), "le")?;
7760                    }
7761                }
7762                Op::Test => {
7763                    let cond = self.r(base, inst.a()).truthy();
7764                    self.cond_skip(cond, inst.k());
7765                }
7766                Op::TestSet => {
7767                    let v = self.r(base, inst.b());
7768                    if v.truthy() == inst.k() {
7769                        self.set_r(base, inst.a(), v);
7770                    } else {
7771                        self.bump_pc();
7772                    }
7773                }
7774                Op::Call => {
7775                    let abs = base + inst.a();
7776                    let nargs = if inst.b() == 0 {
7777                        None
7778                    } else {
7779                        Some(inst.b() - 1)
7780                    };
7781                    let wanted = inst.c() as i32 - 1;
7782                    self.begin_call(abs, nargs, wanted, false)?;
7783                }
7784                Op::TailCall => {
7785                    let fr = *self.top_frame();
7786                    let abs = base + inst.a();
7787                    let mut nargs = if inst.b() == 0 {
7788                        self.top - (abs + 1)
7789                    } else {
7790                        inst.b() - 1
7791                    };
7792                    // A tail call pops this frame before begin_call, so a
7793                    // non-callable target would lose its name/position. Report
7794                    // it now (PUC reads funcname from the still-current ci),
7795                    // while the frame is intact, for "(field 'x')"-style info.
7796                    let mut func = self.stack[abs as usize];
7797                    if !matches!(func, Value::Closure(_) | Value::Native(_))
7798                        && self.get_mm(func, Mm::Call).is_nil()
7799                    {
7800                        return Err(self.call_err(func));
7801                    }
7802                    // PUC `luaD_pretailcall` resolves a chain of `__call`
7803                    // metamethods *in place* before deciding whether to
7804                    // collapse this frame. Without that, each __call hop
7805                    // would push a fresh Lua frame and a 10000-deep
7806                    // tail-recursion through a 100-deep __call chain
7807                    // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7808                    // shift args right, install the handler at `abs`, retry.
7809                    // Chain depth limit matches the call-site `begin_call`
7810                    // version cap (5.5 calls.lua :223 — 15 max, then "too
7811                    // long"; 16th wrap fails the call). An infinite
7812                    // self-referential `__call` would otherwise spin.
7813                    let chain_cap = if self.version >= LuaVersion::Lua55 {
7814                        15
7815                    } else {
7816                        MAX_CCMT
7817                    };
7818                    let mut chain = 0u32;
7819                    while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7820                        let mm = self.get_mm(func, Mm::Call);
7821                        if mm.is_nil() {
7822                            return Err(self.call_err(func));
7823                        }
7824                        chain += 1;
7825                        if chain > chain_cap {
7826                            return Err(self.rt_err("'__call' chain too long"));
7827                        }
7828                        let end = (abs + 1 + nargs) as usize;
7829                        if self.stack.len() < end + 1 {
7830                            self.stack.resize(end + 1, Value::Nil);
7831                        }
7832                        for i in (0..=nargs).rev() {
7833                            self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7834                        }
7835                        self.stack[abs as usize] = mm;
7836                        nargs += 1;
7837                        self.top = abs + 1 + nargs;
7838                        func = mm;
7839                    }
7840                    // PUC's tail-call collapse is Lua→Lua only. A tail call to
7841                    // a C function runs the C function under the *current* Lua
7842                    // activation (no frame fold — a C frame has nothing to
7843                    // collapse into); after the C function returns, the
7844                    // calling Lua function returns those results normally.
7845                    // Mirror that: keep our Lua frame on the stack, call the
7846                    // target through `begin_call(abs, …)` as a regular call,
7847                    // and let the fallback `Op::Return` that the compiler
7848                    // emits right after `Op::TailCall` forward the results.
7849                    // 5.1 closure.lua :177's `return getfenv()` from inside
7850                    // foo needs level 1 to resolve to foo, not to the
7851                    // thread's globals fallback that happens when no Lua
7852                    // frame is on the stack.
7853                    let lua_target = matches!(func, Value::Closure(_));
7854                    if lua_target {
7855                        self.close_slots(fr.base, None)?;
7856                        for i in 0..=nargs {
7857                            self.stack[(fr.func_slot + i) as usize] =
7858                                self.stack[(abs + i) as usize];
7859                        }
7860                        // PUC `CIST_TAIL`: the new Lua activation inherits
7861                        // the popped frame's tailcalls count plus one for
7862                        // this collapse. 5.1 db.lua :372 hammers 30000
7863                        // recursive tail calls and expects to see the
7864                        // synthetic tail level for every one of them.
7865                        self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7866                        frames_pop_sync(&mut self.frames, &mut self.frames_top);
7867                        if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7868                            && self.frames.len() < entry_depth
7869                        {
7870                            // a native completed what was this function's result
7871                            return Ok(self.take_results(fr.func_slot));
7872                        }
7873                    } else {
7874                        // Native (or __call-bearing) target: regular call. The
7875                        // results land at `abs..self.top` and the next op (the
7876                        // fallback `Op::Return`) forwards them. `wanted = -1`
7877                        // because the caller will multret them through Return.
7878                        self.begin_call(abs, Some(nargs), -1, false)?;
7879                    }
7880                }
7881                Op::Return | Op::Return0 | Op::Return1 => {
7882                    let (abs_a, nret) = match inst.op() {
7883                        Op::Return0 => (base, 0),
7884                        Op::Return1 => (base + inst.a(), 1),
7885                        _ => {
7886                            let abs_a = base + inst.a();
7887                            let nret = if inst.b() == 0 {
7888                                self.top - abs_a
7889                            } else {
7890                                inst.b() - 1
7891                            };
7892                            (abs_a, nret)
7893                        }
7894                    };
7895                    // close before moving results: __close handlers run above
7896                    // the stack top, so the result region [abs_a..abs_a+nret)
7897                    // stays intact across any yields the close performs.
7898                    // Fixed-count returns may leave `self.top` below the last
7899                    // result slot (the compiler does not always re-bump it);
7900                    // raise it past the result region so `drive_close` parks
7901                    // the handler call *above* — landing at `self.top` would
7902                    // otherwise clobber a result with the handler closure.
7903                    self.top = self.top.max(abs_a + nret);
7904                    if let Some(vals) = self.begin_close(
7905                        base,
7906                        None,
7907                        AfterClose::Return {
7908                            abs_a,
7909                            nret,
7910                            from_native: false,
7911                        },
7912                        entry_depth,
7913                    )? {
7914                        return Ok(vals);
7915                    }
7916                }
7917                Op::ForPrep => self.for_prep(inst, base)?,
7918                Op::ForLoop => {
7919                    // P12 — trace JIT back-edge counter on the
7920                    // numeric-for back-edge. ForLoop is always at
7921                    // a back-edge position (when it continues);
7922                    // for the trace recorder we treat it as the
7923                    // close-detection equivalent of `Op::Jmp` with
7924                    // negative offset. Counter only ticks when the
7925                    // back-edge will actually fire (count > 0 in
7926                    // the 5.4+ Int form, comparable predicates in
7927                    // pre-5.3 / Float). The cheap check up front
7928                    // matches the for_loop helper's branch.
7929                    if self.jit.trace_enabled {
7930                        let a = inst.a();
7931                        let pre53 = self.version() <= LuaVersion::Lua53;
7932                        let take_back_edge =
7933                            match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
7934                                (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
7935                                    count > 0
7936                                }
7937                                (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
7938                                    let next = cur.wrapping_add(st);
7939                                    if st > 0 { next <= lim } else { next >= lim }
7940                                }
7941                                (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
7942                                    let next = cur + st;
7943                                    if st > 0.0 { next <= lim } else { next >= lim }
7944                                }
7945                                _ => false,
7946                            };
7947                        if take_back_edge {
7948                            let proto = cl.proto;
7949                            let c = proto.trace_hot_count.get();
7950                            if c < u32::MAX / 2 {
7951                                proto.trace_hot_count.set(c + 1);
7952                            }
7953                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7954                                && self.jit.active_trace.is_none()
7955                            {
7956                                // ForLoop's back-edge target = pc
7957                                // after `add_pc(-bx)` runs from the
7958                                // already-bumped f.pc (= pc + 1).
7959                                // So target = (pc + 1) - bx.
7960                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
7961                                let max_stack = cl.proto.max_stack as usize;
7962                                let base_us = base as usize;
7963                                let mut entry_tags = Vec::with_capacity(max_stack);
7964                                for i in 0..max_stack {
7965                                    let (tag, _) = self.stack[base_us + i].unpack();
7966                                    entry_tags.push(tag);
7967                                }
7968                                self.jit.active_trace =
7969                                    Some(Box::new(crate::jit::trace::TraceRecord::start(
7970                                        cl.proto, target, entry_tags, false,
7971                                    )));
7972                                // P12-S4 — record the frame the trace
7973                                // started in. The currently-running
7974                                // Lua frame is at len() - 1.
7975                                self.jit.recording_frame_base = self.frames.len() - 1;
7976                            }
7977                        }
7978                    }
7979                    self.for_loop(inst, base);
7980                }
7981                Op::TForPrep => {
7982                    // the 4th control slot is the iterator's closing value
7983                    self.register_tbc(base + inst.a() + 3)?;
7984                    self.add_pc(inst.bx() as i32);
7985                }
7986                Op::TForCall => {
7987                    let abs = base + inst.a();
7988                    let need = (abs + 7) as usize;
7989                    if self.stack.len() < need {
7990                        self.stack.resize(need, Value::Nil);
7991                    }
7992                    self.stack[(abs + 4) as usize] = self.stack[abs as usize];
7993                    self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
7994                    self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
7995                    let nvars = inst.c() as i32;
7996                    self.begin_call(abs + 4, Some(2), nvars, false)?;
7997                }
7998                Op::TForLoop => {
7999                    let a = inst.a();
8000                    let ctrl = self.r(base, a + 4);
8001                    if !ctrl.is_nil() {
8002                        // P12-S12-B v1 — trace JIT back-edge counter on
8003                        // generic-for back-edge. TForLoop sits at the
8004                        // tail of `for k,v in expr do ... end`; recorder
8005                        // treats it as the close-detection equivalent of
8006                        // a negative Op::Jmp. Gate on `take_back_edge`
8007                        // (= `ctrl != nil`) so empty-iter loops don't
8008                        // pollute hot_count. v1 only adds the trigger;
8009                        // whitelist + helper + emit live in v2.
8010                        if self.jit.trace_enabled {
8011                            let proto = cl.proto;
8012                            let c = proto.trace_hot_count.get();
8013                            if c < u32::MAX / 2 {
8014                                proto.trace_hot_count.set(c + 1);
8015                            }
8016                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8017                                && self.jit.active_trace.is_none()
8018                            {
8019                                // TForLoop back-edge target = pc after
8020                                // `add_pc(-bx)` runs from the already-
8021                                // bumped f.pc (= pc + 1). So target =
8022                                // (pc + 1) - bx, normally landing on
8023                                // body_top (the op right after TForPrep).
8024                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8025                                let max_stack = cl.proto.max_stack as usize;
8026                                let base_us = base as usize;
8027                                let mut entry_tags = Vec::with_capacity(max_stack);
8028                                for i in 0..max_stack {
8029                                    let (tag, _) = self.stack[base_us + i].unpack();
8030                                    entry_tags.push(tag);
8031                                }
8032                                // P12-S12-B-v5 — snapshot the iter
8033                                // fn's address if Native, so the
8034                                // lowerer can specialise ipairs into
8035                                // inline Table aget IR.
8036                                let iter_ptr =
8037                                    if let Value::Native(n) = self.stack[base_us + a as usize] {
8038                                        Some(n.f as usize)
8039                                    } else {
8040                                        None
8041                                    };
8042                                // P12-S12-C v3 — snapshot R[A+5]'s
8043                                // tag (= current iter's val from
8044                                // the just-fired TForCall). The v5
8045                                // inline aget fast_blk emits a
8046                                // runtime guard against this tag;
8047                                // mixed-tag arrays deopt rather
8048                                // than producing garbage pointers
8049                                // through the v2 spill path.
8050                                let val_slot = base_us + (a as usize) + 5;
8051                                let val_tag = if val_slot < self.stack.len() {
8052                                    Some(self.stack[val_slot].unpack().0)
8053                                } else {
8054                                    None
8055                                };
8056                                let mut rec = crate::jit::trace::TraceRecord::start(
8057                                    cl.proto, target, entry_tags, false,
8058                                );
8059                                rec.tfor_iter_ptr = iter_ptr;
8060                                rec.tfor_val_tag = val_tag;
8061                                self.jit.active_trace = Some(Box::new(rec));
8062                                self.jit.recording_frame_base = self.frames.len() - 1;
8063                            }
8064                        }
8065                        self.set_r(base, a + 2, ctrl);
8066                        self.add_pc(-(inst.bx() as i32));
8067                    }
8068                }
8069                Op::Closure => {
8070                    let proto = cl.proto.protos[inst.bx() as usize];
8071                    let n_ups = proto.upvals.len();
8072                    // P11-S5d.M — build upvals on the stack for small
8073                    // closures, skipping the per-call Vec/Box alloc
8074                    // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8075                    // = 2 covers most Lua source (1 captured local, or
8076                    // _ENV + a single capture). Beyond that, fall back
8077                    // to a heap Vec.
8078                    use crate::runtime::function::INLINE_UPVALS_N;
8079                    let mut stack_buf: [std::mem::MaybeUninit<
8080                        Gc<crate::runtime::function::Upvalue>,
8081                    >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8082                    let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8083                    let use_inline = n_ups <= INLINE_UPVALS_N;
8084                    if !use_inline {
8085                        heap_buf.reserve_exact(n_ups);
8086                    }
8087                    for (i, d) in proto.upvals.iter().enumerate() {
8088                        let uv = if d.in_stack {
8089                            self.find_or_create_upval(base + d.index as u32)
8090                        } else {
8091                            cl.upvals()[d.index as usize]
8092                        };
8093                        if use_inline {
8094                            stack_buf[i] = std::mem::MaybeUninit::new(uv);
8095                        } else {
8096                            heap_buf.push(uv);
8097                        }
8098                    }
8099                    // Tiny shim around the two paths so the 5.1 _ENV
8100                    // clone + cache check below see one uniform
8101                    // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8102                    // into the local frame (still valid through the
8103                    // rest of this Op::Closure handler).
8104                    let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8105                        // SAFETY: the first n_ups slots of stack_buf
8106                        // were initialised above; we hand out a slice
8107                        // covering exactly them.
8108                        unsafe {
8109                            std::slice::from_raw_parts_mut(
8110                                stack_buf.as_mut_ptr()
8111                                    as *mut Gc<crate::runtime::function::Upvalue>,
8112                                n_ups,
8113                            )
8114                        }
8115                    } else {
8116                        &mut heap_buf[..]
8117                    };
8118                    // PUC 5.1 had per-function environments: every Lua
8119                    // function carried its own `env` slot, snapshotted from
8120                    // the creating function's env at closure time, so a
8121                    // `setfenv` on one closure never bled into a sibling.
8122                    // luna models that by giving the 5.1 closure a *fresh*
8123                    // closed upvalue for whichever cell holds `_ENV`, seeded
8124                    // from the parent's current env value. Only that cell is
8125                    // cloned — every other upvalue keeps its open/shared
8126                    // identity (so e.g. `local function range(...) ...
8127                    // range(...) ... end` still sees its self-reference). 5.2+
8128                    // keeps the shared-upval model (and the proto cache that
8129                    // depends on it).
8130                    let v51 = self.version() <= LuaVersion::Lua51;
8131                    if v51 && proto.env_upval_idx != u8::MAX {
8132                        let i = proto.env_upval_idx as usize;
8133                        let cur = match ups[i].state() {
8134                            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8135                            UpvalState::Closed(v) => v,
8136                        };
8137                        ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8138                    }
8139                    let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8140                    // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8141                    // and reuses it when every fresh-upvalue binding still
8142                    // points to the same Upvalue object as the cached one.
8143                    // That keeps `function() return outer end` repeated in a
8144                    // loop comparing equal across iterations (the captured
8145                    // outer is a shared open upvalue), while `function()
8146                    // return loop_var end` gets a fresh closure each round
8147                    // because the loop var is re-created per iteration. PUC
8148                    // 5.1 predated the cache, and the per-closure `_ENV`
8149                    // clone above would defeat it anyway, so skip it.
8150                    let nc = if v51 {
8151                        self.heap.new_closure_inline(proto, ups_slice)
8152                    } else {
8153                        let cached = proto.cache.get().filter(|c| {
8154                            c.upvals().len() == ups_slice.len()
8155                                && c.upvals()
8156                                    .iter()
8157                                    .zip(ups_slice.iter())
8158                                    .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8159                        });
8160                        match cached {
8161                            Some(c) => c,
8162                            None => {
8163                                let n = self.heap.new_closure_inline(proto, ups_slice);
8164                                proto.cache.set(Some(n));
8165                                n
8166                            }
8167                        }
8168                    };
8169                    self.set_r(base, inst.a(), Value::Closure(nc));
8170                    self.maybe_collect_garbage(base + inst.a() + 1);
8171                }
8172                Op::Vararg => {
8173                    let abs_a = base + inst.a();
8174                    let wanted = inst.c() as i32 - 1;
8175                    // A materialized named vararg lives in func_slot (its writes
8176                    // must be visible to `...`); otherwise spread the extra args
8177                    // straight off the stack at func_slot+1 .. +n_varargs.
8178                    let vt = match self.stack[func_slot as usize] {
8179                        Value::Table(t) => Some(t),
8180                        _ => None,
8181                    };
8182                    let n = match vt {
8183                        Some(t) => {
8184                            let n_key = Value::Str(self.heap.intern(b"n"));
8185                            // PUC getnumargs: a named vararg `t.n` set out of the
8186                            // integer range [0, INT_MAX/2] is rejected here
8187                            match t.get(n_key) {
8188                                Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8189                                _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8190                            }
8191                        }
8192                        None => n_varargs,
8193                    };
8194                    let count = if wanted < 0 { n } else { wanted as u32 };
8195                    let need = (abs_a + count) as usize;
8196                    if self.stack.len() < need {
8197                        self.stack.resize(need, Value::Nil);
8198                    }
8199                    for i in 0..count {
8200                        let v = if i >= n {
8201                            Value::Nil
8202                        } else if let Some(t) = vt {
8203                            t.get_int(i as i64 + 1)
8204                        } else {
8205                            self.stack[(func_slot + 1 + i) as usize]
8206                        };
8207                        self.stack[(abs_a + i) as usize] = v;
8208                    }
8209                    if wanted < 0 {
8210                        self.top = abs_a + count;
8211                    }
8212                }
8213                Op::GetVarg => {
8214                    // materialize the vararg table (PUC table.pack shape) from the
8215                    // stack varargs — used when the named vararg is written /
8216                    // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8217                    // sees later writes) and in the local register R[A].
8218                    let n = n_varargs;
8219                    let t = self.heap.new_table();
8220                    {
8221                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8222                        let tm = unsafe { t.as_mut() };
8223                        for i in 0..n {
8224                            let _ = tm.set_int(
8225                                &mut self.heap,
8226                                i as i64 + 1,
8227                                self.stack[(func_slot + 1 + i) as usize],
8228                            );
8229                        }
8230                    }
8231                    let n_key = Value::Str(self.heap.intern(b"n"));
8232                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8233                    unsafe { t.as_mut() }
8234                        .set(&mut self.heap, n_key, Value::Int(n as i64))
8235                        .expect("'n' is a valid key");
8236                    // once-per-table barrier (mirror SETLIST): t is born BLACK
8237                    // during Propagate; the bulk inserts above don't barrier.
8238                    self.heap
8239                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8240                    self.stack[func_slot as usize] = Value::Table(t);
8241                    self.set_r(base, inst.a(), Value::Table(t));
8242                }
8243                Op::VargIdx => {
8244                    // R[A] := vararg[R[C]] without allocating: integer key in
8245                    // [1,n] → that vararg, "n" → the count, else nil.
8246                    let key = self.r(base, inst.c());
8247                    let n = n_varargs;
8248                    let v = match key {
8249                        Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8250                            self.stack[(func_slot + k as u32) as usize]
8251                        }
8252                        Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8253                            self.stack[(func_slot + f as u32) as usize]
8254                        }
8255                        Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8256                        _ => Value::Nil,
8257                    };
8258                    self.set_r(base, inst.a(), v);
8259                }
8260                Op::ErrNNil => {
8261                    let v = self.r(base, inst.a());
8262                    if !matches!(v, Value::Nil) {
8263                        let bx = inst.bx();
8264                        let name = if bx == 0 {
8265                            "?".to_string()
8266                        } else {
8267                            match cl.proto.consts[(bx - 1) as usize] {
8268                                Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8269                                _ => "?".to_string(),
8270                            }
8271                        };
8272                        return Err(self.rt_err(&format!("global '{name}' already defined")));
8273                    }
8274                }
8275                Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8276            }
8277        }
8278    }
8279
8280    #[inline(always)]
8281    fn pc_of_top(&self) -> u32 {
8282        self.top_frame().pc
8283    }
8284
8285    #[inline(always)]
8286    fn bump_pc(&mut self) {
8287        // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8288        // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8289        // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8290        // up over `fib_28`'s ~500k jumps.
8291        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8292        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8293            CallFrame::Lua(f) => f.pc += 1,
8294            _ => unreachable!("Cont frame at bump_pc"),
8295        }
8296    }
8297
8298    #[inline(always)]
8299    fn add_pc(&mut self, d: i32) {
8300        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8301        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8302            CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8303            _ => unreachable!("Cont frame at add_pc"),
8304        }
8305    }
8306
8307    /// PUC conditional-skip convention: the JMP that follows is executed when
8308    /// `cond == k`; otherwise it is skipped.
8309    #[inline(always)]
8310    fn cond_skip(&mut self, cond: bool, k: bool) {
8311        if cond != k {
8312            self.bump_pc();
8313        }
8314    }
8315
8316    // ---- indexing (with __index/__newindex chains) ----
8317
8318    /// The `#` length operation: string byte length, `__len` if present, else
8319    /// the raw table border. Returns the raw length value (may be non-integer
8320    /// when `__len` is exotic).
8321    pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8322        match self.len_step(v)? {
8323            MmOut::Done(n) => Ok(n),
8324            // PUC calls unary metamethods with the operand twice
8325            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8326            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8327        }
8328    }
8329
8330    /// Length fast path: a string's byte count or a table's raw border when no
8331    /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8332    /// called with the operand twice. Errors for a non-table with no `__len`.
8333    fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8334        match v {
8335            Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8336            Value::Table(t) => {
8337                let mm = self.get_mm(v, Mm::Len);
8338                if mm.is_nil() {
8339                    Ok(MmOut::Done(Value::Int(t.len())))
8340                } else {
8341                    Ok(MmOut::Mm { func: mm, recv: v })
8342                }
8343            }
8344            _ => {
8345                let mm = self.get_mm(v, Mm::Len);
8346                if mm.is_nil() {
8347                    Err(self.type_err("get length of", v))
8348                } else {
8349                    Ok(MmOut::Mm { func: mm, recv: v })
8350                }
8351            }
8352        }
8353    }
8354
8355    /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8356    /// value with no integer representation.
8357    pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8358        match self.len_value(v)? {
8359            Value::Int(i) => Ok(i),
8360            Value::Float(f) => crate::runtime::value::f2i_exact(f)
8361                .ok_or_else(|| self.rt_err("object length is not an integer")),
8362            _ => Err(self.rt_err("object length is not an integer")),
8363        }
8364    }
8365
8366    pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8367        match self.index_step(t, key)? {
8368            MmOut::Done(v) => Ok(v),
8369            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8370            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8371        }
8372    }
8373
8374    /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8375    /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8376    /// are followed inline (no yield possible); only a function link can yield.
8377    fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8378        let mut cur = t;
8379        for _ in 0..MAX_TAG_LOOP {
8380            let mm = match cur {
8381                Value::Table(tb) => {
8382                    let v = tb.get(key);
8383                    if !v.is_nil() {
8384                        return Ok(MmOut::Done(v));
8385                    }
8386                    let mm = self.get_mm(cur, Mm::Index);
8387                    if mm.is_nil() {
8388                        return Ok(MmOut::Done(Value::Nil));
8389                    }
8390                    mm
8391                }
8392                v => {
8393                    let mm = self.get_mm(v, Mm::Index);
8394                    if mm.is_nil() {
8395                        return Err(self.type_err("index", v));
8396                    }
8397                    mm
8398                }
8399            };
8400            match mm {
8401                Value::Closure(_) | Value::Native(_) => {
8402                    return Ok(MmOut::Mm {
8403                        func: mm,
8404                        recv: cur,
8405                    });
8406                }
8407                next => cur = next,
8408            }
8409        }
8410        Err(self.rt_err("'__index' chain too long; possible loop"))
8411    }
8412
8413    pub(crate) fn newindex_value(
8414        &mut self,
8415        t: Value,
8416        key: Value,
8417        v: Value,
8418    ) -> Result<(), LuaError> {
8419        match self.newindex_step(t, key, v)? {
8420            MmOut::Done(_) => Ok(()),
8421            MmOut::Mm { func, recv } => {
8422                self.call_value(func, &[recv, key, v])?;
8423                Ok(())
8424            }
8425            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8426        }
8427    }
8428
8429    /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8430    /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8431    /// needs an actual call — which the caller may run yieldably.
8432    fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8433        let mut cur = t;
8434        for _ in 0..MAX_TAG_LOOP {
8435            let mm = match cur {
8436                Value::Table(tb) => {
8437                    // PI-A3 single-walk collapse — Table::try_set_existing
8438                    // fuses the prior `tb.get(key).is_nil()` gate and
8439                    // `raw_set` walk into one chain traversal when the
8440                    // key is already present with a non-nil value. The
8441                    // __newindex chain semantics are preserved by the
8442                    // identity (slot_nil ⇔ fire_newindex); see
8443                    // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8444                    //
8445                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8446                    // heap is single-threaded and the pointer is live as
8447                    // long as it is reachable from active roots (see
8448                    // heap.rs:5-7). Mirrors the raw_set wrapper below.
8449                    if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8450                        self.heap
8451                            .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8452                        return Ok(MmOut::Done(Value::Nil));
8453                    }
8454                    let mm = self.get_mm(cur, Mm::NewIndex);
8455                    if mm.is_nil() {
8456                        self.raw_set(tb, key, v)?;
8457                        return Ok(MmOut::Done(Value::Nil));
8458                    }
8459                    mm
8460                }
8461                bad => {
8462                    let mm = self.get_mm(bad, Mm::NewIndex);
8463                    if mm.is_nil() {
8464                        return Err(self.type_err("index", bad));
8465                    }
8466                    mm
8467                }
8468            };
8469            match mm {
8470                Value::Closure(_) | Value::Native(_) => {
8471                    return Ok(MmOut::Mm {
8472                        func: mm,
8473                        recv: cur,
8474                    });
8475                }
8476                next => cur = next,
8477            }
8478        }
8479        Err(self.rt_err("'__newindex' chain too long; possible loop"))
8480    }
8481
8482    fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8483        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8484        match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8485            Ok(()) => {
8486                self.heap
8487                    .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8488                Ok(())
8489            }
8490            Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8491            Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8492            Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8493            Err(TableError::InvalidNext) => unreachable!(),
8494        }
8495    }
8496
8497    /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8498    /// the boolean result; `Mm` (when raw equality fails and both are tables
8499    /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8500    fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8501        if l.raw_eq(r) {
8502            return MmOut::Done(Value::Bool(true));
8503        }
8504        if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8505            (l, r)
8506        {
8507            // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8508            // (and earlier) required the two operands' metatables to expose a
8509            // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8510            // metatable falls straight back to raw inequality. events.lua 5.1
8511            // :262 bakes this in.
8512            let mm = if self.version() <= LuaVersion::Lua51 {
8513                self.get_comp_mm(l, r, Mm::Eq)
8514            } else {
8515                let mut m = self.get_mm(l, Mm::Eq);
8516                if m.is_nil() {
8517                    m = self.get_mm(r, Mm::Eq);
8518                }
8519                m
8520            };
8521            if !mm.is_nil() {
8522                return MmOut::Mm { func: mm, recv: l };
8523            }
8524        }
8525        MmOut::Done(Value::Bool(false))
8526    }
8527
8528    // ---- arithmetic ----
8529
8530    #[inline(always)]
8531    fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8532        let l = self.r(base, inst.b());
8533        let r = self.r(base, inst.c());
8534        // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8535        // binary_trees all hammer these. Skipping coerce_num + the big
8536        // arith_fast match shaves several conditional moves per op.
8537        if let (Value::Int(a), Value::Int(b)) = (l, r) {
8538            let fast = match op {
8539                ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8540                ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8541                ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8542                _ => None,
8543            };
8544            if let Some(v) = fast {
8545                self.set_r(base, inst.a(), v);
8546                return Ok(());
8547            }
8548        }
8549        // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8550        // and any numeric workload with non-integer accumulators benefits.
8551        if let (Value::Float(a), Value::Float(b)) = (l, r) {
8552            let fast = match op {
8553                ArithOp::Add => Some(Value::Float(a + b)),
8554                ArithOp::Sub => Some(Value::Float(a - b)),
8555                ArithOp::Mul => Some(Value::Float(a * b)),
8556                ArithOp::Div => Some(Value::Float(a / b)),
8557                _ => None,
8558            };
8559            if let Some(v) = fast {
8560                self.set_r(base, inst.a(), v);
8561                return Ok(());
8562            }
8563        }
8564        match self.arith_fast(op, l, r)? {
8565            Some(v) => self.set_r(base, inst.a(), v),
8566            None => {
8567                let mm = self.arith_mm_func(op, l, r)?;
8568                let dst = base + inst.a();
8569                self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8570            }
8571        }
8572        Ok(())
8573    }
8574
8575    /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8576    /// directly, `Ok(None)` when a metamethod is required (the caller decides
8577    /// whether to call it synchronously or yieldably).
8578    fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8579        use ArithOp::*;
8580        match op {
8581            BAnd | BOr | BXor | Shl | Shr => {
8582                // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8583                match (coerce_num(l), coerce_num(r)) {
8584                    (Some(a), Some(b)) => {
8585                        let to_int = |n: Num| match n {
8586                            Num::Int(i) => Some(i),
8587                            Num::Float(f) => crate::runtime::value::f2i_exact(f),
8588                        };
8589                        let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8590                            // PUC luaG_tointerror: name the offending operand
8591                            return Err(self.no_int_rep_err());
8592                        };
8593                        let v = match op {
8594                            BAnd => a & b,
8595                            BOr => a | b,
8596                            BXor => a ^ b,
8597                            Shl => shift_left(a, b),
8598                            Shr => shift_left(a, b.wrapping_neg()),
8599                            _ => unreachable!(),
8600                        };
8601                        return Ok(Some(Value::Int(v)));
8602                    }
8603                    _ => return Ok(None),
8604                }
8605            }
8606            _ => {}
8607        }
8608        let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8609            (Some(a), Some(b)) => (a, b),
8610            _ => return Ok(None),
8611        };
8612        let v = match (op, ln, rn) {
8613            (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8614            (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8615            (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8616            (IDiv, Num::Int(a), Num::Int(b)) => {
8617                if b == 0 {
8618                    return Err(self.rt_err("attempt to divide by zero"));
8619                }
8620                let mut q = a.wrapping_div(b);
8621                if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8622                    q -= 1;
8623                }
8624                Value::Int(q)
8625            }
8626            (Mod, Num::Int(a), Num::Int(b)) => {
8627                if b == 0 {
8628                    return Err(self.rt_err("attempt to perform 'n%0'"));
8629                }
8630                let mut m = a.wrapping_rem(b);
8631                if m != 0 && (m ^ b) < 0 {
8632                    m += b;
8633                }
8634                Value::Int(m)
8635            }
8636            (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8637            (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8638            (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8639            (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8640            (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8641            (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8642            (Mod, a, b) => {
8643                let (x, y) = (a.as_f64(), b.as_f64());
8644                // PUC luai_nummod: correct fmod's sign without the `m*y`
8645                // product, which underflows to 0 for tiny denormals
8646                let mut m = x % y;
8647                if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8648                    m += y;
8649                }
8650                Value::Float(m)
8651            }
8652            _ => unreachable!(),
8653        };
8654        Ok(Some(v))
8655    }
8656
8657    pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8658        match v {
8659            Value::Int(i) => Ok(i),
8660            Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8661                Some(i) => Ok(i),
8662                None => Err(self.rt_err("number has no integer representation")),
8663            },
8664            v => Err(self.type_err(what, v)),
8665        }
8666    }
8667
8668    fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8669        match n {
8670            Num::Int(i) => Ok(i),
8671            Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8672                Some(i) => Ok(i),
8673                None => Err(self.rt_err("number has no integer representation")),
8674            },
8675        }
8676    }
8677
8678    /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8679    /// PUC type error when neither operand provides one.
8680    fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8681        use ArithOp::*;
8682        let event = match op {
8683            Add => Mm::Add,
8684            Sub => Mm::Sub,
8685            Mul => Mm::Mul,
8686            Div => Mm::Div,
8687            Mod => Mm::Mod,
8688            Pow => Mm::Pow,
8689            IDiv => Mm::IDiv,
8690            BAnd => Mm::BAnd,
8691            BOr => Mm::BOr,
8692            BXor => Mm::BXor,
8693            Shl => Mm::Shl,
8694            Shr => Mm::Shr,
8695        };
8696        let mut mm = self.get_mm(l, event);
8697        if mm.is_nil() {
8698            mm = self.get_mm(r, event);
8699        }
8700        if mm.is_nil() {
8701            let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8702                "perform bitwise operation on"
8703            } else {
8704                "perform arithmetic on"
8705            };
8706            let bad = if coerce_num(l).is_none() { l } else { r };
8707            return Err(self.type_err(what, bad));
8708        }
8709        Ok(mm)
8710    }
8711
8712    // ---- comparison ----
8713
8714    pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8715        match self.less_step(l, r, or_eq)? {
8716            MmOut::Done(v) => Ok(v.truthy()),
8717            MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8718            MmOut::CompareSynth { func } => {
8719                // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8720                // by library code (sort comparator etc.) — no yield expected
8721                // here (a yield would have hit `call_noyield`'s C boundary).
8722                Ok(!self.call_mm1(func, &[r, l])?.truthy())
8723            }
8724        }
8725    }
8726
8727    /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8728    /// carries the boolean result; `Mm` (for non-number/string operands) carries
8729    /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8730    /// neither operand provides one.
8731    fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8732        let b = match (l, r) {
8733            (Value::Int(a), Value::Int(b)) => {
8734                if or_eq {
8735                    a <= b
8736                } else {
8737                    a < b
8738                }
8739            }
8740            (Value::Float(a), Value::Float(b)) => {
8741                if or_eq {
8742                    a <= b
8743                } else {
8744                    a < b
8745                }
8746            }
8747            (Value::Int(a), Value::Float(b)) => {
8748                if or_eq {
8749                    int_le_float(a, b)
8750                } else {
8751                    int_lt_float(a, b)
8752                }
8753            }
8754            (Value::Float(a), Value::Int(b)) => {
8755                if a.is_nan() {
8756                    false
8757                } else if or_eq {
8758                    !int_lt_float(b, a)
8759                } else {
8760                    !int_le_float(b, a)
8761                }
8762            }
8763            (Value::Str(a), Value::Str(b)) => {
8764                let (a, b) = (a.as_bytes(), b.as_bytes());
8765                if or_eq { a <= b } else { a < b }
8766            }
8767            (l, r) => {
8768                let event = if or_eq { Mm::Le } else { Mm::Lt };
8769                // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8770                // too: both operands' metatables must expose the same
8771                // implementation for `__lt` / `__le` to fire. events.lua 5.1
8772                // :262 expects `c < d` (where `d` has no metatable) to error
8773                // with the default "attempt to compare two table values"
8774                // rather than running c's `__lt` blindly.
8775                let mm = if self.version() <= LuaVersion::Lua51 {
8776                    self.get_comp_mm(l, r, event)
8777                } else {
8778                    let mut m = self.get_mm(l, event);
8779                    if m.is_nil() {
8780                        m = self.get_mm(r, event);
8781                    }
8782                    m
8783                };
8784                // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8785                // operand carries `__le`. 5.4 dropped the synthesis (now
8786                // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8787                // on the synthesis — its metatable defines only `__lt`.
8788                // The fallback calls `__lt(r, l)` synchronously (the suite's
8789                // `__lt` doesn't yield) and negates the result; the yieldable
8790                // `__lt` path stays reserved for the explicit `<` operator.
8791                if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8792                    let lt = Mm::Lt;
8793                    let mut mm_lt = self.get_mm(l, lt);
8794                    if mm_lt.is_nil() {
8795                        mm_lt = self.get_mm(r, lt);
8796                    }
8797                    if !mm_lt.is_nil() {
8798                        return Ok(MmOut::CompareSynth { func: mm_lt });
8799                    }
8800                }
8801                if mm.is_nil() {
8802                    // PUC luaG_ordererror: "two X values" when the operand
8803                    // types match, "X with Y" otherwise (objtypename-aware).
8804                    let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8805                    return Err(self.rt_err(&if t1 == t2 {
8806                        format!("attempt to compare two {t1} values")
8807                    } else {
8808                        format!("attempt to compare {t1} with {t2}")
8809                    }));
8810                }
8811                return Ok(MmOut::Mm { func: mm, recv: l });
8812            }
8813        };
8814        Ok(MmOut::Done(Value::Bool(b)))
8815    }
8816
8817    // ---- numeric for ----
8818
8819    fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8820        let a = inst.a();
8821        let init = self.r(base, a);
8822        let limit = self.r(base, a + 1);
8823        let step = self.r(base, a + 2);
8824        let (Some(init_n), Some(limit_n), Some(step_n)) =
8825            (as_num(init), as_num(limit), as_num(step))
8826        else {
8827            // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8828            // PUC checks limit, then step, then initial value.
8829            let (what, bad) = if as_num(limit).is_none() {
8830                ("limit", limit)
8831            } else if as_num(step).is_none() {
8832                ("step", step)
8833            } else {
8834                ("initial value", init)
8835            };
8836            let tn = self.obj_typename(bad);
8837            return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8838        };
8839        // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8840        // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8841        // first test, so each successful iteration emits a backward `OP_FORLOOP`
8842        // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8843        // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8844        // distance in luna's encoding is `loop_pc - prep_pc`; firing
8845        // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8846        let pre53 = self.version() <= LuaVersion::Lua53;
8847        match (init_n, step_n) {
8848            (Num::Int(i0), Num::Int(st)) => {
8849                if st == 0 {
8850                    return Err(self.rt_err("'for' step is zero"));
8851                }
8852                if pre53 {
8853                    // PUC 5.3 `forlimit`: int limit passes through; float limit
8854                    // gets clamped to MIN/MAX with a `stopnow` flag set only
8855                    // when the clamp is unreachable (positive float with a
8856                    // negative step → limit=MAX, stopnow; negative float with
8857                    // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8858                    // `init = 0` so OP_FORLOOP's first test against the
8859                    // unreachable clamp fails cleanly. An ordinary in-range
8860                    // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8861                    // lets OP_FORLOOP's natural test reject the first step.
8862                    let (lim, stopnow) = match limit_n {
8863                        Num::Int(l) => (l, false),
8864                        Num::Float(f) => {
8865                            if f.is_nan() {
8866                                (0, true)
8867                            } else if f >= i64::MAX as f64 + 1.0 {
8868                                // beyond +MAX: unreachable for a decreasing loop
8869                                (i64::MAX, st < 0)
8870                            } else if f <= i64::MIN as f64 {
8871                                // beyond -MIN: unreachable for an increasing loop
8872                                (i64::MIN, st >= 0)
8873                            } else if st > 0 {
8874                                (f.floor() as i64, false)
8875                            } else {
8876                                (f.ceil() as i64, false)
8877                            }
8878                        }
8879                    };
8880                    let initv = if stopnow { 0 } else { i0 };
8881                    let pre = initv.wrapping_sub(st);
8882                    self.set_r(base, a, Value::Int(pre));
8883                    self.set_r(base, a + 1, Value::Int(lim));
8884                    self.set_r(base, a + 2, Value::Int(st));
8885                    self.add_pc(inst.bx() as i32 - 1);
8886                    return Ok(());
8887                }
8888                let (lim, empty) = int_for_limit(limit_n, i0, st);
8889                if empty {
8890                    self.add_pc(inst.bx() as i32);
8891                    return Ok(());
8892                }
8893                let count = if st > 0 {
8894                    (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8895                } else {
8896                    (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8897                };
8898                self.set_r(base, a, Value::Int(i0));
8899                self.set_r(base, a + 1, Value::Int(count as i64));
8900                self.set_r(base, a + 2, Value::Int(st));
8901                self.set_r(base, a + 3, Value::Int(i0));
8902            }
8903            _ => {
8904                let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8905                if st == 0.0 {
8906                    return Err(self.rt_err("'for' step is zero"));
8907                }
8908                if pre53 {
8909                    let pre = x0 - st;
8910                    self.set_r(base, a, Value::Float(pre));
8911                    self.set_r(base, a + 1, Value::Float(lim));
8912                    self.set_r(base, a + 2, Value::Float(st));
8913                    self.add_pc(inst.bx() as i32 - 1);
8914                    return Ok(());
8915                }
8916                let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8917                if !runs {
8918                    self.add_pc(inst.bx() as i32);
8919                    return Ok(());
8920                }
8921                self.set_r(base, a, Value::Float(x0));
8922                self.set_r(base, a + 1, Value::Float(lim));
8923                self.set_r(base, a + 2, Value::Float(st));
8924                self.set_r(base, a + 3, Value::Float(x0));
8925            }
8926        }
8927        Ok(())
8928    }
8929
8930    #[inline(always)]
8931    fn for_loop(&mut self, inst: Inst, base: u32) {
8932        let a = inst.a();
8933        // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
8934        // directly (R[a+1] holds the limit, *not* a remaining-count) so the
8935        // first iteration's test fires through the same backward-jump path as
8936        // every later iteration. 5.4+ switched to the count-based form luna
8937        // already uses for `Int`; the float branch was already PUC-3.x-style.
8938        let pre53 = self.version() <= LuaVersion::Lua53;
8939        match self.r(base, a) {
8940            Value::Int(cur) if pre53 => {
8941                let Value::Int(lim) = self.r(base, a + 1) else {
8942                    unreachable!()
8943                };
8944                let Value::Int(st) = self.r(base, a + 2) else {
8945                    unreachable!()
8946                };
8947                let next = cur.wrapping_add(st);
8948                let cont = if st > 0 { next <= lim } else { next >= lim };
8949                if cont {
8950                    self.set_r(base, a, Value::Int(next));
8951                    self.set_r(base, a + 3, Value::Int(next));
8952                    self.add_pc(-(inst.bx() as i32));
8953                }
8954            }
8955            Value::Int(cur) => {
8956                let Value::Int(count) = self.r(base, a + 1) else {
8957                    unreachable!()
8958                };
8959                if count > 0 {
8960                    let Value::Int(st) = self.r(base, a + 2) else {
8961                        unreachable!()
8962                    };
8963                    let next = cur.wrapping_add(st);
8964                    self.set_r(base, a, Value::Int(next));
8965                    self.set_r(base, a + 1, Value::Int(count - 1));
8966                    self.set_r(base, a + 3, Value::Int(next));
8967                    self.add_pc(-(inst.bx() as i32));
8968                }
8969            }
8970            Value::Float(cur) => {
8971                let Value::Float(lim) = self.r(base, a + 1) else {
8972                    unreachable!()
8973                };
8974                let Value::Float(st) = self.r(base, a + 2) else {
8975                    unreachable!()
8976                };
8977                let next = cur + st;
8978                let cont = if st > 0.0 { next <= lim } else { next >= lim };
8979                if cont {
8980                    self.set_r(base, a, Value::Float(next));
8981                    self.set_r(base, a + 3, Value::Float(next));
8982                    self.add_pc(-(inst.bx() as i32));
8983                }
8984            }
8985            _ => unreachable!("corrupt for-loop state"),
8986        }
8987    }
8988
8989    // ---- native helpers (used by builtins) ----
8990
8991    /// A native function's own captured upvalue (self lives at func_slot).
8992    ///
8993    /// Public so `native_typed` trampolines and embedders authoring
8994    /// stateful natives via `native_with(...)` can read their upvals.
8995    pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
8996        let Value::Native(nc) = self.stack[func_slot as usize] else {
8997            unreachable!("native frame without native closure");
8998        };
8999        nc.upvals[i]
9000    }
9001
9002    /// Number of upvalues captured by the native at `func_slot` (variadic
9003    /// captures such as the `io.lines` format list).
9004    pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9005        let Value::Native(nc) = self.stack[func_slot as usize] else {
9006            unreachable!("native frame without native closure");
9007        };
9008        nc.upvals.len()
9009    }
9010
9011    /// Write a native function's own upvalue (stateful iterators).
9012    pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9013        let Value::Native(nc) = self.stack[func_slot as usize] else {
9014            unreachable!("native frame without native closure");
9015        };
9016        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9017        unsafe { nc.as_mut() }.upvals[i] = v;
9018        // NativeClosure.upvals is traced as part of its Trace; a long-lived
9019        // stateful iterator closure (e.g. string.gmatch) sees many writes —
9020        // barrier_back once-and-done is cheaper than per-child forward.
9021        self.heap
9022            .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9023    }
9024
9025    /// Read the i-th positional argument inside a `NativeFn` body
9026    /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9027    /// matching PUC's "missing arg is nil" contract. Public so embedders
9028    /// can author their own natives.
9029    pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9030        if i < nargs {
9031            self.stack[(func_slot + 1 + i) as usize]
9032        } else {
9033            Value::Nil
9034        }
9035    }
9036
9037    /// Push the return values of a `NativeFn` and return their count
9038    /// (analogous to pushing N values then `return N` from a C function).
9039    /// Public so embedders can author their own natives.
9040    pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9041        let need = func_slot as usize + vals.len();
9042        if self.stack.len() < need {
9043            self.stack.resize(need, Value::Nil);
9044        }
9045        for (i, &v) in vals.iter().enumerate() {
9046            self.stack[func_slot as usize + i] = v;
9047        }
9048        vals.len() as u32
9049    }
9050
9051    /// Fast string concatenation of an adjacent pair, or `None` when a
9052    /// `__concat` metamethod is required.
9053    fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9054        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9055        // Length-check fast paths for both string operands BEFORE the
9056        // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9057        // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9058        // overflow on the first pair that would exceed `INT_MAX` instead
9059        // of allocating multi-GB intermediates first.
9060        let max_str = i32::MAX as usize;
9061        if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9062            let a_len = ls.as_bytes().len();
9063            let b_len = rs.as_bytes().len();
9064            let new_len = a_len.checked_add(b_len);
9065            if new_len.is_none() || new_len.unwrap() > max_str {
9066                return Err(self.rt_err("string length overflow"));
9067            }
9068        }
9069        match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9070            (Some(a), Some(b)) => {
9071                // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9072                // concat past it raises "string length overflow"
9073                // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9074                // exactly this wall).
9075                let new_len = a.len().checked_add(b.len());
9076                if new_len.is_none() || new_len.unwrap() > max_str {
9077                    return Err(self.rt_err("string length overflow"));
9078                }
9079                let mut combined = a;
9080                combined.extend_from_slice(&b);
9081                Ok(Some(Value::Str(self.heap.intern(&combined))))
9082            }
9083            _ => Ok(None),
9084        }
9085    }
9086
9087    /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9088    /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9089    /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9090    /// call — its `Meta` continuation re-enters here on the metamethod's return.
9091    fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9092        // Sum the lengths of all all-Str operands BEFORE starting the
9093        // right-associative fold so a 129-operand `a..a..…` chain
9094        // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9095        // not after dozens of multi-GB intermediate intern+hash rounds.
9096        // A non-Str operand falls through to the per-pair check.
9097        let max_str = i32::MAX as usize;
9098        let mut total: usize = 0;
9099        let mut all_str = true;
9100        for slot in base_a..self.top {
9101            match self.stack[slot as usize] {
9102                Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9103                    Some(t) if t <= max_str => total = t,
9104                    _ => return Err(self.rt_err("string length overflow")),
9105                },
9106                _ => {
9107                    all_str = false;
9108                    break;
9109                }
9110            }
9111        }
9112        let _ = all_str; // discrimination already captured by early returns above
9113        while self.top.saturating_sub(base_a) >= 2 {
9114            let i = self.top - 1; // rightmost operand
9115            let x = self.stack[(i - 1) as usize];
9116            let y = self.stack[i as usize];
9117            match self.concat_pair(x, y)? {
9118                Some(s) => {
9119                    self.stack[(i - 1) as usize] = s;
9120                    self.top = i; // consumed y
9121                }
9122                None => {
9123                    let mut mm = self.get_mm(x, Mm::Concat);
9124                    if mm.is_nil() {
9125                        mm = self.get_mm(y, Mm::Concat);
9126                    }
9127                    if mm.is_nil() {
9128                        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9129                        let bad = if concat_piece(x, legacy).is_none() {
9130                            x
9131                        } else {
9132                            y
9133                        };
9134                        return Err(self.type_err("concatenate", bad));
9135                    }
9136                    // result lands at i-1, dropping y (top→i); resume continues.
9137                    let dst = i - 1;
9138                    self.begin_meta_call(
9139                        mm,
9140                        &[x, y],
9141                        MetaAction::Concat { dst, base_a },
9142                        "concat",
9143                    )?;
9144                    return Ok(());
9145                }
9146            }
9147        }
9148        self.maybe_collect_garbage(base_a + 1);
9149        Ok(())
9150    }
9151
9152    /// tostring with __tostring / __name support.
9153    pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9154        let mm = self.get_mm(v, Mm::ToString);
9155        if !mm.is_nil() {
9156            return match self.call_mm1(mm, &[v])? {
9157                Value::Str(s) => Ok(s.as_bytes().to_vec()),
9158                _ => Err(self.rt_err("'__tostring' must return a string")),
9159            };
9160        }
9161        if let Value::Table(t) = v
9162            && let Value::Str(name) = self.get_mm(v, Mm::Name)
9163        {
9164            let mut out = name.as_bytes().to_vec();
9165            out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9166            return Ok(out);
9167        }
9168        Ok(self.tostring_basic(v))
9169    }
9170
9171    /// Basic tostring (no metamethods).
9172    pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9173        match v {
9174            Value::Nil => b"nil".to_vec(),
9175            Value::Bool(true) => b"true".to_vec(),
9176            Value::Bool(false) => b"false".to_vec(),
9177            Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9178            // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9179            // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9180            // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9181            // concatenating these renderings.
9182            Value::Float(f) => {
9183                let legacy = self.version <= crate::version::LuaVersion::Lua52;
9184                numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9185            }
9186            Value::Str(s) => s.as_bytes().to_vec(),
9187            Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9188            Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9189            Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9190            Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9191            // PUC names file handles `file (0x…)`; a bare userdata is
9192            // `userdata: 0x…`. The io library overrides this via __tostring.
9193            Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9194            // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9195            // (the "light" qualifier only appears in `luaL_typeerror`).
9196            Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9197        }
9198    }
9199}
9200
9201#[derive(Clone, Copy, PartialEq, Eq)]
9202enum ArithOp {
9203    Add,
9204    Sub,
9205    Mul,
9206    Mod,
9207    Pow,
9208    Div,
9209    IDiv,
9210    BAnd,
9211    BOr,
9212    BXor,
9213    Shl,
9214    Shr,
9215}
9216
9217impl ArithOp {
9218    /// PUC metamethod event name (`__add` → "add" etc.) used by
9219    /// `debug.getinfo(level, "n")` inside a metamethod handler.
9220    fn mm_name(self) -> &'static str {
9221        match self {
9222            ArithOp::Add => "add",
9223            ArithOp::Sub => "sub",
9224            ArithOp::Mul => "mul",
9225            ArithOp::Mod => "mod",
9226            ArithOp::Pow => "pow",
9227            ArithOp::Div => "div",
9228            ArithOp::IDiv => "idiv",
9229            ArithOp::BAnd => "band",
9230            ArithOp::BOr => "bor",
9231            ArithOp::BXor => "bxor",
9232            ArithOp::Shl => "shl",
9233            ArithOp::Shr => "shr",
9234        }
9235    }
9236}
9237
9238fn as_num(v: Value) -> Option<Num> {
9239    match v {
9240        Value::Int(i) => Some(Num::Int(i)),
9241        Value::Float(f) => Some(Num::Float(f)),
9242        // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9243        Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9244        _ => None,
9245    }
9246}
9247
9248/// A concatenable operand's byte form (string, or a number coerced to its
9249/// string), or `None` when only a `__concat` metamethod can handle it.
9250/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9251/// suffix on integer-valued floats) — see `num_to_string_for`.
9252fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9253    match v {
9254        Value::Str(s) => Some(s.as_bytes().to_vec()),
9255        Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9256        Value::Float(x) => {
9257            Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9258        }
9259        _ => None,
9260    }
9261}
9262
9263/// Index into the per-basic-type metatable table for a non-table value
9264/// (None for tables, which carry their own metatable).
9265fn type_mt_slot(v: Value) -> Option<usize> {
9266    match v {
9267        Value::Nil => Some(0),
9268        Value::Bool(_) => Some(1),
9269        Value::Int(_) | Value::Float(_) => Some(2),
9270        Value::Str(_) => Some(3),
9271        Value::Closure(_) | Value::Native(_) => Some(4),
9272        // tables and full userdata carry their own metatable; threads and
9273        // light userdata have none (PUC keeps a shared per-type mt slot for
9274        // light, but luna doesn't expose it — no test gates on it yet).
9275        Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9276    }
9277}
9278
9279/// Number, or string coerced to number (5.5 default string-arith coercion).
9280fn coerce_num(v: Value) -> Option<Num> {
9281    match v {
9282        Value::Int(i) => Some(Num::Int(i)),
9283        Value::Float(f) => Some(Num::Float(f)),
9284        Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9285        _ => None,
9286    }
9287}
9288
9289/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9290/// reverse direction.
9291fn shift_left(a: i64, b: i64) -> i64 {
9292    if b < 0 {
9293        if b <= -64 {
9294            0
9295        } else {
9296            ((a as u64) >> (-b as u32)) as i64
9297        }
9298    } else if b >= 64 {
9299        0
9300    } else {
9301        ((a as u64) << (b as u32)) as i64
9302    }
9303}
9304
9305/// i < f, exactly (PUC LTintfloat shape).
9306fn int_lt_float(i: i64, f: f64) -> bool {
9307    if f.is_nan() {
9308        return false;
9309    }
9310    if f >= 9_223_372_036_854_775_808.0 {
9311        return true;
9312    }
9313    if f < -9_223_372_036_854_775_808.0 {
9314        return false;
9315    }
9316    let ff = f.floor();
9317    let fi = ff as i64;
9318    if f == ff { i < fi } else { i <= fi }
9319}
9320
9321/// i <= f, exactly.
9322fn int_le_float(i: i64, f: f64) -> bool {
9323    if f.is_nan() {
9324        return false;
9325    }
9326    if f >= 9_223_372_036_854_775_808.0 {
9327        return true;
9328    }
9329    if f < -9_223_372_036_854_775_808.0 {
9330        return false;
9331    }
9332    i <= f.floor() as i64
9333}
9334
9335/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9336/// (clipped limit, loop-is-empty).
9337fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9338    match limit {
9339        Num::Int(l) => {
9340            let empty = if step > 0 { init > l } else { init < l };
9341            (l, empty)
9342        }
9343        Num::Float(f) => {
9344            if f.is_nan() {
9345                return (0, true);
9346            }
9347            if step > 0 {
9348                if f >= 9_223_372_036_854_775_808.0 {
9349                    (i64::MAX, false)
9350                } else {
9351                    let l = f.floor();
9352                    if l < -9_223_372_036_854_775_808.0 {
9353                        (i64::MIN, true)
9354                    } else {
9355                        let li = l as i64;
9356                        (li, init > li)
9357                    }
9358                }
9359            } else if f <= -9_223_372_036_854_775_808.0 {
9360                (i64::MIN, false)
9361            } else {
9362                let l = f.ceil();
9363                if l >= 9_223_372_036_854_775_808.0 {
9364                    // PUC forlimit: a positive limit beyond the integer range
9365                    // is unreachable for a decreasing loop — empty.
9366                    (i64::MAX, true)
9367                } else {
9368                    let li = l as i64;
9369                    (li, init < li)
9370                }
9371            }
9372        }
9373    }
9374}
9375
9376/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9377/// `@file` / `=name` markers in `source`).
9378fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9379    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9380    let b = unsafe { crate::runtime::string::bytes_of(p) };
9381    match b.first() {
9382        Some(b'@') | Some(b'=') => &b[1..],
9383        _ => b,
9384    }
9385}
9386
9387impl Vm {
9388    /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9389    /// that called the current native. Returns (closure, current line,
9390    /// extra vararg count).
9391    /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9392    /// function running at `level`, recovered from the caller's call
9393    /// instruction (PUC funcnamefromcode). None for the main chunk or a
9394    /// tail/anonymous call with no recoverable name.
9395    /// A debug-level position: either a real Lua frame (by index) or a synthetic
9396    /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9397    /// coroutine body), which `debug.getinfo` and traceback report as "C".
9398    /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9399    /// frame at `level`'s current pc, as (name, value). Locals are visited in
9400    /// registration order (start pc, then register) to match luaF_getlocalname.
9401    pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9402        if n == 0 {
9403            return None;
9404        }
9405        let fi = match self.dbg_frame(level)? {
9406            DbgKind::Lua(fi) => fi,
9407            // Tail-call placeholder has no real frame backing it — no locals
9408            // exist to read or write here. PUC `findlocal` returns NULL on
9409            // a CIST_TAIL activation.
9410            DbgKind::Tail(_) => return None,
9411            // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9412            // for slot `n` inside the argument window (db.lua :408-:413, and
9413            // the call/return hook reads of math.sin / select args via
9414            // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9415            // meaningful for a C frame here.
9416            DbgKind::C(fi) => {
9417                if n < 1 {
9418                    return None;
9419                }
9420                let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9421                if (n as u32) > nargs {
9422                    return None;
9423                }
9424                let slot = (func_slot + n as u32) as usize;
9425                let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9426                return Some((self.temporary_locvar_name().to_string(), val));
9427            }
9428        };
9429        let f = self.frames[fi].lua()?;
9430        // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9431        // is the first extra arg passed to the function (`...[1]`), `-2` the
9432        // second, etc. The 5.5 stack layout parks varargs in
9433        // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9434        if n < 0 {
9435            let i = (-n) as u32;
9436            if i == 0 || i > f.n_varargs {
9437                return None;
9438            }
9439            let val = self
9440                .stack
9441                .get((f.func_slot + i) as usize)
9442                .copied()
9443                .unwrap_or(Value::Nil);
9444            return Some((self.vararg_locvar_name().to_string(), val));
9445        }
9446        let proto = f.closure.proto;
9447        // PUC's parser injects a hidden `(vararg table)` locvar for an
9448        // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9449        // right after the fixed parameters (`numparams + 1`). Main chunks
9450        // and `(...t)` named-vararg funcs do NOT get one — gate on the
9451        // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9452        // their declared registers (no shadow slot allocated), so we expose
9453        // that hidden index purely in this debug view.
9454        let num_params = proto.num_params as i64;
9455        let vararg_slot = if proto.has_vararg_table_pseudo {
9456            Some(num_params + 1)
9457        } else {
9458            None
9459        };
9460        if vararg_slot == Some(n) {
9461            return Some(("(vararg table)".to_string(), Value::Nil));
9462        }
9463        let pc = (f.pc as usize).saturating_sub(1);
9464        let mut active: Vec<&crate::runtime::LocVar> = proto
9465            .locvars
9466            .iter()
9467            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9468            .collect();
9469        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9470        let mut idx: i64 = n - 1;
9471        if let Some(vs) = vararg_slot
9472            && n > vs
9473        {
9474            idx -= 1;
9475        }
9476        let idx = idx as usize;
9477        if let Some(lv) = active.get(idx) {
9478            let val = self
9479                .stack
9480                .get((f.base + lv.reg) as usize)
9481                .copied()
9482                .unwrap_or(Value::Nil);
9483            return Some((lv.name.to_string(), val));
9484        }
9485        // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9486        // still inside the frame's live register window — report a
9487        // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9488        // the next frame's func slot (`ci->next->func.p`) so the
9489        // temporary window stops where the callee's frame begins
9490        // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9491        // an out-of-range slot).
9492        let limit = self
9493            .frames
9494            .get(fi + 1)
9495            .and_then(|cf| cf.lua())
9496            .map(|nf| nf.func_slot)
9497            .unwrap_or_else(|| self.top.max(f.base));
9498        let temp_reg = idx as u32;
9499        if f.base + temp_reg < limit {
9500            let val = self
9501                .stack
9502                .get((f.base + temp_reg) as usize)
9503                .copied()
9504                .unwrap_or(Value::Nil);
9505            return Some((self.lua_temporary_locvar_name().to_string(), val));
9506        }
9507        None
9508    }
9509
9510    /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9511    /// the local / vararg name on success, `None` when the slot does not
9512    /// resolve. Mirrors `local_at`'s indexing exactly.
9513    pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9514        if n == 0 {
9515            return None;
9516        }
9517        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9518            return None;
9519        };
9520        let f = self.frames[fi].lua()?;
9521        if n < 0 {
9522            let i = (-n) as u32;
9523            if i == 0 || i > f.n_varargs {
9524                return None;
9525            }
9526            let slot = (f.func_slot + i) as usize;
9527            if let Some(s) = self.stack.get_mut(slot) {
9528                *s = v;
9529            }
9530            return Some(self.vararg_locvar_name().to_string());
9531        }
9532        let proto = f.closure.proto;
9533        let num_params = proto.num_params as i64;
9534        let vararg_slot = if proto.has_vararg_table_pseudo {
9535            Some(num_params + 1)
9536        } else {
9537            None
9538        };
9539        if vararg_slot == Some(n) {
9540            // hidden (vararg table) slot has no real storage — accept the
9541            // write as a no-op for PUC parity (db.lua doesn't write to it).
9542            return Some("(vararg table)".to_string());
9543        }
9544        let pc = (f.pc as usize).saturating_sub(1);
9545        let mut active: Vec<&crate::runtime::LocVar> = proto
9546            .locvars
9547            .iter()
9548            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9549            .collect();
9550        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9551        let mut idx: i64 = n - 1;
9552        if let Some(vs) = vararg_slot
9553            && n > vs
9554        {
9555            idx -= 1;
9556        }
9557        let idx = idx as usize;
9558        let (name, reg) = if let Some(lv) = active.get(idx) {
9559            (lv.name.to_string(), lv.reg)
9560        } else {
9561            // PUC `luaG_findlocal` fallback into the temporary window —
9562            // bounded by the next frame's func slot (see local_at).
9563            let limit = self
9564                .frames
9565                .get(fi + 1)
9566                .and_then(|cf| cf.lua())
9567                .map(|nf| nf.func_slot)
9568                .unwrap_or_else(|| self.top.max(f.base));
9569            let temp_reg = idx as u32;
9570            if f.base + temp_reg >= limit {
9571                return None;
9572            }
9573            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9574        };
9575        let slot = (f.base + reg) as usize;
9576        if let Some(s) = self.stack.get_mut(slot) {
9577            *s = v;
9578        }
9579        Some(name)
9580    }
9581
9582    /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9583    /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9584    /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9585    /// vararg indexing past `n_varargs`, or for a register past the live
9586    /// window. Naming follows the same priority as `local_at`: named locals,
9587    /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9588    /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9589    /// window.
9590    pub(crate) fn local_at_coro(
9591        &self,
9592        co: Gc<crate::runtime::Coro>,
9593        level: i64,
9594        n: i64,
9595    ) -> Option<(String, Value)> {
9596        if level < 1 || n == 0 {
9597            return None;
9598        }
9599        let frames = &co.frames;
9600        // Logical level: iterate Lua frames from the top.
9601        let lua_indices: Vec<usize> = (0..frames.len())
9602            .rev()
9603            .filter(|&i| frames[i].lua().is_some())
9604            .collect();
9605        let fi = *lua_indices.get((level - 1) as usize)?;
9606        let f = frames[fi].lua()?;
9607        if n < 0 {
9608            let i = (-n) as u32;
9609            if i == 0 || i > f.n_varargs {
9610                return None;
9611            }
9612            let val = co
9613                .stack
9614                .get((f.func_slot + i) as usize)
9615                .copied()
9616                .unwrap_or(Value::Nil);
9617            return Some((self.vararg_locvar_name().to_string(), val));
9618        }
9619        let proto = f.closure.proto;
9620        let num_params = proto.num_params as i64;
9621        let vararg_slot = if proto.has_vararg_table_pseudo {
9622            Some(num_params + 1)
9623        } else {
9624            None
9625        };
9626        if vararg_slot == Some(n) {
9627            return Some(("(vararg table)".to_string(), Value::Nil));
9628        }
9629        let pc = (f.pc as usize).saturating_sub(1);
9630        let mut active: Vec<&crate::runtime::LocVar> = proto
9631            .locvars
9632            .iter()
9633            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9634            .collect();
9635        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9636        let mut idx: i64 = n - 1;
9637        if let Some(vs) = vararg_slot
9638            && n > vs
9639        {
9640            idx -= 1;
9641        }
9642        let idx = idx as usize;
9643        if let Some(lv) = active.get(idx) {
9644            let val = co
9645                .stack
9646                .get((f.base + lv.reg) as usize)
9647                .copied()
9648                .unwrap_or(Value::Nil);
9649            return Some((lv.name.to_string(), val));
9650        }
9651        let limit = frames
9652            .get(fi + 1)
9653            .and_then(|cf| cf.lua())
9654            .map(|nf| nf.func_slot)
9655            .unwrap_or(co.top.max(f.base));
9656        let temp_reg = idx as u32;
9657        if f.base + temp_reg < limit {
9658            let val = co
9659                .stack
9660                .get((f.base + temp_reg) as usize)
9661                .copied()
9662                .unwrap_or(Value::Nil);
9663            return Some((self.lua_temporary_locvar_name().to_string(), val));
9664        }
9665        None
9666    }
9667
9668    /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9669    /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9670    pub(crate) fn local_set_coro(
9671        &mut self,
9672        co: Gc<crate::runtime::Coro>,
9673        level: i64,
9674        n: i64,
9675        v: Value,
9676    ) -> Option<String> {
9677        if level < 1 || n == 0 {
9678            return None;
9679        }
9680        let lua_indices: Vec<usize> = (0..co.frames.len())
9681            .rev()
9682            .filter(|&i| co.frames[i].lua().is_some())
9683            .collect();
9684        let fi = *lua_indices.get((level - 1) as usize)?;
9685        let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9686            let f = co.frames[fi].lua()?;
9687            (
9688                f.func_slot,
9689                f.n_varargs,
9690                f.base,
9691                f.closure.proto,
9692                co.top.max(f.base),
9693                co.frames
9694                    .get(fi + 1)
9695                    .and_then(|cf| cf.lua())
9696                    .map(|nf| nf.func_slot),
9697            )
9698        };
9699        if n < 0 {
9700            let i = (-n) as u32;
9701            if i == 0 || i > n_varargs {
9702                return None;
9703            }
9704            let slot = (func_slot + i) as usize;
9705            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9706            let stack = unsafe { &mut co.as_mut().stack };
9707            if let Some(s) = stack.get_mut(slot) {
9708                *s = v;
9709            }
9710            // co.stack values are traced — once-per-call barrier so propagate
9711            // sees the new value if co was already BLACK this cycle.
9712            self.heap
9713                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9714            return Some(self.vararg_locvar_name().to_string());
9715        }
9716        let num_params = proto.num_params as i64;
9717        let vararg_slot = if proto.has_vararg_table_pseudo {
9718            Some(num_params + 1)
9719        } else {
9720            None
9721        };
9722        if vararg_slot == Some(n) {
9723            return Some("(vararg table)".to_string());
9724        }
9725        let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9726        let mut active: Vec<&crate::runtime::LocVar> = proto
9727            .locvars
9728            .iter()
9729            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9730            .collect();
9731        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9732        let mut idx: i64 = n - 1;
9733        if let Some(vs) = vararg_slot
9734            && n > vs
9735        {
9736            idx -= 1;
9737        }
9738        let idx = idx as usize;
9739        let (name, reg) = if let Some(lv) = active.get(idx) {
9740            (lv.name.to_string(), lv.reg)
9741        } else {
9742            let limit = next_func_slot.unwrap_or(top_for_temp);
9743            let temp_reg = idx as u32;
9744            if base + temp_reg >= limit {
9745                return None;
9746            }
9747            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9748        };
9749        let slot = (base + reg) as usize;
9750        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9751        let stack = unsafe { &mut co.as_mut().stack };
9752        if let Some(s) = stack.get_mut(slot) {
9753            *s = v;
9754        }
9755        // co.stack values are traced — once-per-call barrier so propagate
9756        // sees the new value if co was already BLACK this cycle.
9757        self.heap
9758            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9759        Some(name)
9760    }
9761
9762    /// Frame info for a level on a suspended coroutine (PUC
9763    /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9764    /// Returns the closure + currentline + extraargs + istailcall for the
9765    /// level-th Lua activation in `co.frames`. None if level overshoots.
9766    pub(crate) fn coro_frame_info(
9767        &self,
9768        co: Gc<crate::runtime::Coro>,
9769        level: i64,
9770    ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9771        if level < 1 {
9772            return None;
9773        }
9774        let lua_indices: Vec<usize> = (0..co.frames.len())
9775            .rev()
9776            .filter(|&i| co.frames[i].lua().is_some())
9777            .collect();
9778        let fi = *lua_indices.get((level - 1) as usize)?;
9779        let f = co.frames[fi].lua()?;
9780        let proto = f.closure.proto;
9781        let pc = (f.pc as usize)
9782            .saturating_sub(1)
9783            .min(proto.lines.len().saturating_sub(1));
9784        let line = proto.lines.get(pc).copied().unwrap_or(0);
9785        Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9786    }
9787
9788    /// Whether `level` resolves to any live activation (PUC lua_getstack).
9789    pub(crate) fn level_in_range(&self, level: i64) -> bool {
9790        self.dbg_frame(level).is_some()
9791    }
9792
9793    /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9794    /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9795    /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9796    /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9797    pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9798        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9799            "(*vararg)"
9800        } else {
9801            "(vararg)"
9802        }
9803    }
9804
9805    /// PUC's debug-API placeholder for an unnamed temporary on a C
9806    /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9807    /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9808    /// their spelling.
9809    pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9810        if matches!(
9811            self.version,
9812            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9813        ) {
9814            // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9815            // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9816            // to `(C temporary)`.
9817            "(*temporary)"
9818        } else {
9819            "(C temporary)"
9820        }
9821    }
9822
9823    /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9824    /// (an arithmetic intermediate sitting past the last named local on a
9825    /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9826    /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9827    /// spelling.
9828    pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9829        if matches!(
9830            self.version,
9831            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9832        ) {
9833            "(*temporary)"
9834        } else {
9835            "(temporary)"
9836        }
9837    }
9838
9839    /// The Lua closure running at `level` on the current thread, or `None`
9840    /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9841    /// need this to reach the function whose env they read or rewrite.
9842    pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9843        // `DbgKind::Tail` also falls into the else branch — a tail-call
9844        // placeholder has no closure of its own, so PUC's `lua_getstack` +
9845        // `getfunc` for that level returns no function, and `getfenv(level)`
9846        // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9847        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9848            return None;
9849        };
9850        Some(self.frames[fi].lua()?.closure)
9851    }
9852
9853    pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9854        if level < 1 {
9855            return false;
9856        }
9857        let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9858        (level as usize) <= count
9859    }
9860
9861    pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9862        if level < 1 {
9863            return None;
9864        }
9865        // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9866        // activation counts as a level, and each Lua activation's
9867        // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9868        // dropped the synthetic shape: `istailcall` becomes a flag on the
9869        // real frame and Cont activations no longer count separately.
9870        // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9871        // `getinfo(2).func == g1` pins the 5.2+ shape.
9872        let v51 = self.version <= LuaVersion::Lua51;
9873        let mut lvl = level;
9874        for fi in (0..self.frames.len()).rev() {
9875            match &self.frames[fi] {
9876                CallFrame::Lua(f) => {
9877                    lvl -= 1;
9878                    if lvl == 0 {
9879                        return Some(DbgKind::Lua(fi));
9880                    }
9881                    if v51 {
9882                        // 5.1 reports one synthetic CIST_TAIL level per
9883                        // collapsed tail call (PUC `lua_getstack` subtracts
9884                        // `ci->u.l.tailcalls` from the remaining level).
9885                        for _ in 0..f.tailcalls {
9886                            lvl -= 1;
9887                            if lvl == 0 {
9888                                return Some(DbgKind::Tail(fi));
9889                            }
9890                        }
9891                    }
9892                    if f.from_c {
9893                        lvl -= 1;
9894                        if lvl == 0 {
9895                            return Some(DbgKind::C(fi));
9896                        }
9897                    }
9898                }
9899                CallFrame::Cont(_) => {
9900                    if !v51 {
9901                        continue;
9902                    }
9903                    lvl -= 1;
9904                    if lvl == 0 {
9905                        let parent = (0..fi)
9906                            .rev()
9907                            .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9908                        return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9909                    }
9910                }
9911            }
9912        }
9913        None
9914    }
9915
9916    pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9917        let f = self.frames[fi].lua()?;
9918        // metamethod handler frames carry the event tag (e.g. "close" for
9919        // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9920        if f.is_hook {
9921            return Some(("hook", "?".to_string()));
9922        }
9923        if let Some(tm) = f.tm {
9924            return Some(("metamethod", tm_debug_name(self.version, tm)));
9925        }
9926        // a frame entered across a C boundary has no naming call instruction
9927        if fi == 0 || f.from_c {
9928            return None;
9929        }
9930        // the caller's call instruction names this frame; a continuation frame
9931        // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
9932        // already short-circuits those.
9933        let caller = self.frames[fi - 1].lua()?;
9934        let caller_proto = caller.closure.proto;
9935        let p: &crate::runtime::Proto = &caller_proto;
9936        let call_pc = (caller.pc as usize).checked_sub(1)?;
9937        let instr = *p.code.get(call_pc)?;
9938        match instr.op() {
9939            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9940            Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9941            _ => None,
9942        }
9943    }
9944
9945    /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
9946    /// (PUC names a C function from the call instruction that invoked it). The
9947    /// native was called by the nearest Lua frame below `fi` (skipping pcall/
9948    /// xpcall continuations); that frame's call instruction names it.
9949    pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9950        // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
9951        // the synthetic C edge between the __gc finalizer (top Lua frame, has
9952        // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
9953        // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
9954        // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
9955        // sets the tag on the handler frame only, so level 2 there still
9956        // names the calling Lua frame's call instruction (5.5 locals.lua
9957        // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
9958        if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
9959            && fr.tm == Some("gc")
9960        {
9961            let name = tm_debug_name(self.version, "gc");
9962            return Some(("metamethod", name));
9963        }
9964        let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
9965        let caller = self.frames[caller_fi].lua()?;
9966        let p = &caller.closure.proto;
9967        let call_pc = (caller.pc as usize).checked_sub(1)?;
9968        let instr = *p.code.get(call_pc)?;
9969        match instr.op() {
9970            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9971            _ => None,
9972        }
9973    }
9974
9975    /// Native value currently sitting on the synthetic C edge identified by
9976    /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
9977    /// above `fi` (each one corresponds to one native pushing the hook) and
9978    /// indexes into `running_natives` from the top, also skipping the caller
9979    /// of `getinfo` itself (the native that is currently asking).
9980    /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
9981    /// expects the just-entered C function.
9982    pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
9983        let idx = self.c_frame_native_idx(fi)?;
9984        Some(Value::Native(self.running_natives[idx]))
9985    }
9986
9987    /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
9988    /// so `local_at` can index the native's argument window like PUC's
9989    /// `(C temporary)` path. Returns `None` when no matching native exists
9990    /// (e.g. the C edge corresponds to a non-native boundary).
9991    pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
9992        let idx = self.c_frame_native_idx(fi)?;
9993        self.running_native_slots.get(idx).copied()
9994    }
9995
9996    fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
9997        let n_above = self.frames[fi..]
9998            .iter()
9999            .filter_map(CallFrame::lua)
10000            .filter(|f| f.from_c)
10001            .count();
10002        if n_above == 0 {
10003            return None;
10004        }
10005        // running_natives.last() is the native currently executing (the one
10006        // that called getinfo). Pop it conceptually, then take the n_above-th
10007        // entry from the top of what remains.
10008        let nr = self.running_natives.len().checked_sub(1)?;
10009        nr.checked_sub(n_above)
10010    }
10011
10012    /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10013    /// native whose function pointer matches `target`, and return its qualified
10014    /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10015    /// `None` if no match is found. Used by `arg_error` when the running native
10016    /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10017    pub(crate) fn pushglobalfuncname(
10018        &mut self,
10019        target: crate::runtime::value::NativeFn,
10020    ) -> Option<String> {
10021        let pkg_k = Value::Str(self.heap.intern(b"package"));
10022        let pkg = match self.globals().get(pkg_k) {
10023            Value::Table(t) => t,
10024            _ => return None,
10025        };
10026        let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10027        let loaded = match pkg.get(loaded_k) {
10028            Value::Table(t) => t,
10029            _ => return None,
10030        };
10031        let matches = |v: Value| -> bool {
10032            matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10033        };
10034        let mut k = Value::Nil;
10035        while let Ok(Some((nk, nv))) = loaded.next(k) {
10036            k = nk;
10037            let Value::Str(outer) = nk else { continue };
10038            let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10039            if matches(nv) {
10040                return Some(if outer == "_G" { String::new() } else { outer });
10041            }
10042            if let Value::Table(inner_t) = nv {
10043                let mut k2 = Value::Nil;
10044                while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10045                    k2 = nk2;
10046                    if matches(nv2)
10047                        && let Value::Str(inner) = nk2
10048                    {
10049                        let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10050                        return Some(if outer == "_G" {
10051                            inner
10052                        } else {
10053                            format!("{outer}.{inner}")
10054                        });
10055                    }
10056                }
10057            }
10058        }
10059        None
10060    }
10061
10062    /// Name and namewhat of the native currently running on behalf of the top
10063    /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10064    /// `luaL_argerror` rewrite a method call's self-argument error.
10065    pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10066        let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10067        let p = &caller.closure.proto;
10068        let call_pc = (caller.pc as usize).checked_sub(1)?;
10069        let instr = *p.code.get(call_pc)?;
10070        match instr.op() {
10071            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10072            _ => None,
10073        }
10074    }
10075
10076    pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10077        let f = self.frames[fi].lua().expect("Lua frame");
10078        let proto = f.closure.proto;
10079        let pc = (f.pc as usize)
10080            .saturating_sub(1)
10081            .min(proto.lines.len().saturating_sub(1));
10082        let line = proto.lines.get(pc).copied().unwrap_or(0);
10083        // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10084        // (independent of any later write to a materialized vararg table's `n`).
10085        // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10086        // any nonzero `tailcalls` count flips it true.
10087        (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10088    }
10089
10090    /// Read an upvalue cell of a closure (debug.getupvalue).
10091    pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10092        match cl.upvals()[idx].state() {
10093            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10094            UpvalState::Closed(v) => v,
10095        }
10096    }
10097
10098    /// Write an upvalue cell of a closure (debug.setupvalue).
10099    pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10100        let uv = cl.upvals()[idx];
10101        match uv.state() {
10102            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10103            UpvalState::Closed(_) => {
10104                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10105                unsafe { uv.as_mut() }.set_closed(v);
10106                self.heap
10107                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10108            }
10109        }
10110    }
10111
10112    /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10113    /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10114    /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10115    /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10116    /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10117    /// else `"function <src:line_defined>"` for an anonymous Lua function.
10118    /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10119    /// Walks the coroutine's saved frames and prepends a synthetic C-level
10120    /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10121    /// (its `resume_at` marker is set). `level` skips entries from the top
10122    /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10123    /// frame; etc.). db.lua :764-:768 sample several levels.
10124    pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10125        use crate::runtime::CoroStatus;
10126        const LEVELS1: usize = 10;
10127        const LEVELS2: usize = 11;
10128        #[derive(Clone, Copy)]
10129        enum VFrame<'a> {
10130            Lua(&'a crate::runtime::function::Frame),
10131            CPcall,
10132            CXpcall,
10133            CYield,
10134            /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10135            /// call collapsed into the next Lua frame down the chain.
10136            Tail,
10137        }
10138        let v51 = self.version <= LuaVersion::Lua51;
10139        let mut visible: Vec<VFrame<'_>> = Vec::new();
10140        // PUC's level 0 entry on a suspended coroutine is the C call where it
10141        // paused — `coroutine.yield` for a yielded thread.
10142        if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10143            visible.push(VFrame::CYield);
10144        }
10145        for cf in co.frames.iter().rev() {
10146            match cf {
10147                CallFrame::Lua(f) => {
10148                    visible.push(VFrame::Lua(f));
10149                    if v51 {
10150                        for _ in 0..f.tailcalls {
10151                            visible.push(VFrame::Tail);
10152                        }
10153                    }
10154                }
10155                CallFrame::Cont(nc) => match nc.kind {
10156                    ContKind::Pcall => visible.push(VFrame::CPcall),
10157                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10158                    _ => {}
10159                },
10160            }
10161        }
10162        if level < 0 {
10163            level = 0;
10164        }
10165        if (level as usize) >= visible.len() {
10166            return Vec::new();
10167        }
10168        let visible = &visible[level as usize..];
10169        let total = visible.len();
10170        let mut out = Vec::new();
10171        // To name a Lua frame, PUC consults the caller's OP_CALL via
10172        // getobjname: find the index `fi` of the current frame in co.frames,
10173        // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10174        let coro_frame_name = |frames: &[CallFrame],
10175                               target: &crate::runtime::function::Frame|
10176         -> Option<(&'static str, String)> {
10177            let fi = frames
10178                .iter()
10179                .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10180            if fi == 0 || target.from_c {
10181                return None;
10182            }
10183            let caller = frames[fi - 1].lua()?;
10184            let p = &caller.closure.proto;
10185            let call_pc = (caller.pc as usize).checked_sub(1)?;
10186            let instr = *p.code.get(call_pc)?;
10187            match instr.op() {
10188                Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10189                Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10190                _ => None,
10191            }
10192        };
10193        let frames = &co.frames;
10194        let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10195            VFrame::Lua(f) => {
10196                let proto = f.closure.proto;
10197                let src = chunk_display_name(proto.source.as_ptr());
10198                let pc = (f.pc as usize)
10199                    .saturating_sub(1)
10200                    .min(proto.lines.len().saturating_sub(1));
10201                let line = proto.lines.get(pc).copied().unwrap_or(0);
10202                out.extend_from_slice(b"\n\t");
10203                out.extend_from_slice(src);
10204                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10205                if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10206                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10207                } else if proto.line_defined == 0 {
10208                    out.extend_from_slice(b"main chunk");
10209                } else {
10210                    out.extend_from_slice(
10211                        format!(
10212                            "function <{}:{}>",
10213                            String::from_utf8_lossy(src),
10214                            proto.line_defined
10215                        )
10216                        .as_bytes(),
10217                    );
10218                }
10219            }
10220            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10221            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10222            VFrame::CYield => {
10223                // PUC `pushglobalfuncname` reports `yield` as
10224                // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10225                // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10226                // `'yield'` (5.5 :841).
10227                let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10228                if qualified {
10229                    out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10230                } else {
10231                    out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10232                }
10233            }
10234            VFrame::Tail => {
10235                // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10236                // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10237                // :403 asserts these appear once per collapsed tail call.
10238                out.extend_from_slice(b"\n\t(...tail calls...)");
10239            }
10240        };
10241        if total <= LEVELS1 + LEVELS2 {
10242            for &v in visible {
10243                emit(&mut out, v);
10244            }
10245        } else {
10246            for &v in &visible[..LEVELS1] {
10247                emit(&mut out, v);
10248            }
10249            let skip = total - LEVELS1 - LEVELS2;
10250            out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10251            for &v in &visible[total - LEVELS2..] {
10252                emit(&mut out, v);
10253            }
10254        }
10255        out
10256    }
10257
10258    pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10259        // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10260        // (11) bottom frames; if there are more, the middle is collapsed into
10261        // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10262        // overflow traceback would balloon to tens of megabytes (errors.lua's
10263        // stack-overflow test ran string.gmatch over the resulting buffer).
10264        const LEVELS1: usize = 10;
10265        const LEVELS2: usize = 11;
10266        // Collect visible frames in top-down order (deepest first). Both Lua
10267        // activations and pcall/xpcall continuations (which stand in for a
10268        // C-level pcall on the stack) are visible; PUC's traceback enumerates
10269        // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10270        #[derive(Clone, Copy)]
10271        enum VFrame {
10272            Lua(usize),
10273            CPcall,
10274            CXpcall,
10275        }
10276        let mut visible: Vec<VFrame> = Vec::new();
10277        for (fi, cf) in self.frames.iter().enumerate().rev() {
10278            match cf {
10279                CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10280                CallFrame::Cont(nc) => match nc.kind {
10281                    ContKind::Pcall => visible.push(VFrame::CPcall),
10282                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10283                    _ => {}
10284                },
10285            }
10286        }
10287        // PUC `luaL_traceback` starts enumerating at the given `level` (in
10288        // terms of L1's CallInfo chain). For the running-thread case the C
10289        // frame for debug.traceback itself is level 0 and luna's `visible`
10290        // doesn't include it — so level=1 (PUC default) means "emit from the
10291        // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10292        // the top. level<=0 emits nothing extra here (d_traceback handles the
10293        // "[C]: in function 'traceback'" prefix for level==0 separately).
10294        let skip = (level - 1).max(0) as usize;
10295        if skip >= visible.len() {
10296            return Vec::new();
10297        }
10298        let visible = &visible[skip..];
10299        let total = visible.len();
10300        let mut out = Vec::new();
10301        let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10302            VFrame::Lua(fi) => {
10303                let f = this.frames[fi].lua().expect("Lua frame");
10304                let proto = f.closure.proto;
10305                let src = chunk_display_name(proto.source.as_ptr());
10306                let pc = (f.pc as usize)
10307                    .saturating_sub(1)
10308                    .min(proto.lines.len().saturating_sub(1));
10309                let line = proto.lines.get(pc).copied().unwrap_or(0);
10310                out.extend_from_slice(b"\n\t");
10311                out.extend_from_slice(src);
10312                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10313                if let Some((namewhat, name)) = this.frame_name(fi) {
10314                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10315                } else if proto.line_defined == 0 {
10316                    out.extend_from_slice(b"main chunk");
10317                } else {
10318                    out.extend_from_slice(
10319                        format!(
10320                            "function <{}:{}>",
10321                            String::from_utf8_lossy(src),
10322                            proto.line_defined
10323                        )
10324                        .as_bytes(),
10325                    );
10326                }
10327            }
10328            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10329            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10330        };
10331        if total <= LEVELS1 + LEVELS2 {
10332            for &v in visible {
10333                emit_frame(&mut out, v, self);
10334            }
10335        } else {
10336            for &v in &visible[..LEVELS1] {
10337                emit_frame(&mut out, v, self);
10338            }
10339            let dropped = total - LEVELS1 - LEVELS2;
10340            out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10341            for &v in &visible[total - LEVELS2..] {
10342                emit_frame(&mut out, v, self);
10343            }
10344        }
10345        out
10346    }
10347}
10348
10349// ────────────────────────────────────────────────────────────────────
10350// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10351//
10352// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10353// trace-meta section after `vm.load`, resolves each entry's
10354// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10355// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10356// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10357// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10358// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10359// mcode without further plumbing — same code path the runtime JIT
10360// uses.
10361//
10362// Why a separate impl block: keeps the AOT API surface (one fn) easy
10363// to locate when grep'ing for `install_aot_trace`, without dragging
10364// the 8500-line `impl Vm` block above.
10365// ────────────────────────────────────────────────────────────────────
10366
10367impl Vm {
10368    /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10369    /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10370    /// fires it at the trace's `head_pc`. This is the runtime install
10371    /// API the deploy-side `luna-runtime-helpers` resolver calls once
10372    /// per AOT-emitted trace meta entry, after looking up `proto` by
10373    /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10374    ///
10375    /// # What this does
10376    ///
10377    /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10378    /// The trace's `entry` fn ptr must already point at runnable
10379    /// machine code (the AOT linker resolved the symbol at link time;
10380    /// the deploy resolver passes the address verbatim).
10381    ///
10382    /// # What this does NOT do
10383    ///
10384    /// - **No deduplication.** Calling twice with the same `head_pc`
10385    ///   pushes two entries; the dispatcher's `find` will pick the
10386    ///   first match. The deploy resolver is responsible for not
10387    ///   double-installing.
10388    /// - **No invalidation of the runtime JIT cache.** If the runtime
10389    ///   JIT later records + compiles a trace for the same
10390    ///   `(proto, head_pc)`, both coexist on `proto.traces` and the
10391    ///   dispatcher's `find` picks whichever appears first. AOT
10392    ///   traces install before any runtime recording is possible
10393    ///   (resolver runs before `vm.load` returns its first closure),
10394    ///   so AOT traces win the race for the same site.
10395    /// - **No coverage gating.** AOT traces are trusted by
10396    ///   construction — they were validated at compile time. Setting
10397    ///   `dispatchable: false` on the input would silently disable
10398    ///   dispatch; the caller controls that flag.
10399    ///
10400    /// # Safety / soundness
10401    ///
10402    /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10403    /// machine code). Soundness contract:
10404    ///
10405    /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10406    ///   In the AOT-binary deploy shape this is trivially satisfied —
10407    ///   the fn lives in the binary's `.text`.
10408    /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10409    ///   what the trace's IR actually compiled against; the dispatcher
10410    ///   uses them to marshal `reg_state` in and out without further
10411    ///   validation. A mismatch corrupts vm.stack.
10412    ///
10413    /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10414    /// invariants hold; this fn is a plain push — no validation that
10415    /// would slow the dispatcher's hot path either.
10416    pub fn install_aot_trace(
10417        &mut self,
10418        proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10419        trace: crate::jit::trace::CompiledTrace,
10420    ) {
10421        let _ = self; // resolver passes &mut Vm for symmetry with future
10422        // pending-install + hash-walk variants; nothing on `self` to
10423        // mutate today because the install target lives on the Proto.
10424        proto.traces.borrow_mut().push(TArc::new(trace));
10425    }
10426
10427    /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10428    /// reachable from `root` and return `(proto, stable_hash)` pairs
10429    /// for every Proto found. Used by the deploy-side resolver to
10430    /// match AOT-emitted `proto_hash` keys against the freshly
10431    /// `undump`'d chunk's protos.
10432    ///
10433    /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10434    /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10435    /// multiple nested closures (rare; the cache field would catch
10436    /// the closure-side dedup, not the Proto side) is reported once.
10437    ///
10438    /// # Why on `&Vm` and not a free fn
10439    ///
10440    /// Keeps the AOT install API discoverable on the Vm surface —
10441    /// `vm.collect_proto_hashes(root)` reads naturally next to
10442    /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10443    /// any Vm field, so `&self` (read-only) is enough.
10444    pub fn collect_proto_hashes(
10445        &self,
10446        root: crate::runtime::Gc<crate::runtime::function::Proto>,
10447    ) -> Vec<(
10448        crate::runtime::Gc<crate::runtime::function::Proto>,
10449        [u8; 16],
10450    )> {
10451        let _ = self;
10452        let mut out = Vec::new();
10453        let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10454            std::collections::HashSet::new();
10455        let mut queue: std::collections::VecDeque<
10456            crate::runtime::Gc<crate::runtime::function::Proto>,
10457        > = std::collections::VecDeque::new();
10458        queue.push_back(root);
10459        while let Some(p) = queue.pop_front() {
10460            let key = p.as_ptr() as *const _;
10461            if !seen.insert(key) {
10462                continue;
10463            }
10464            out.push((p, p.stable_hash()));
10465            for &child in p.protos.iter() {
10466                queue.push_back(child);
10467            }
10468        }
10469        out
10470    }
10471}