Skip to main content

luna_core/vm/
exec.rs

1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16    AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17    MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48    /// The GC heap owned by this VM. Embedders normally interact via the
49    /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50    /// the heap directly.
51    pub heap: Heap,
52    stack: Vec<Value>,
53    frames: Vec<CallFrame>,
54    /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55    /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56    /// helpers (debug-asserted on use). NOT consumed by readers yet;
57    /// week 1 is pure scaffold. Week 2-N migrations replace readers
58    /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59    /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60    frames_top: u32,
61    /// open upvalues, sorted ascending by stack slot
62    open_upvals: Vec<(u32, Gc<Upvalue>)>,
63    /// to-be-closed slots, ascending
64    tbc: Vec<u32>,
65    /// logical stack top for multi-result sequences
66    pub(crate) top: u32,
67    globals: Gc<Table>,
68    /// shared metatable for all strings (populated by the string lib, P04)
69    /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70    /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71    /// own. Settable via debug.setmetatable.
72    type_mt: [Option<Gc<Table>>; 5],
73    /// pre-interned metamethod event names, indexed by `Mm`
74    mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75    /// native↔Lua nesting depth (PUC C-stack guard analogue)
76    c_depth: u32,
77    /// number of live pcall/xpcall continuation frames on the running thread
78    /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79    /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80    /// with the coroutine context, since continuations survive a yield.
81    pcall_depth: u32,
82    /// number of non-yieldable C calls in flight on the running thread (PUC's
83    /// `L->nny`). A library callback that runs via synchronous Rust recursion
84    /// (sort comparator, gsub replacement) cannot be continued across a yield,
85    /// so it bumps this for its duration; `coroutine.yield` inside hits the
86    /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87    /// never cross such a call), so it needs no per-thread save/restore.
88    nny: u32,
89    /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90    /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91    /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92    /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93    /// loop)` then matches. PUC tracks this via the soft-cap window
94    /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95    /// scope explicitly.
96    msgh_depth: u32,
97    /// set by a coroutine closing itself (`coroutine.close()` on the running
98    /// thread): the to-be-closed handlers have already run; the thread must now
99    /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100    /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101    /// unwind, so a protecting pcall cannot catch it) the termination.
102    terminating: Option<Option<Value>>,
103    /// xoshiro256** state (math.random)
104    rng: [u64; 4],
105    /// VM creation time (os.clock)
106    started: std::time::Instant,
107    version: LuaVersion,
108    /// error object being threaded through a chain of __close handlers; a GC
109    /// root for the duration (a handler may trigger collection)
110    closing_err: Option<Value>,
111    /// the coroutine whose context is currently live in the fields above;
112    /// `None` while the main thread runs (P05)
113    current: Option<Gc<crate::runtime::Coro>>,
114    /// the main thread's saved execution context while a coroutine runs
115    main_ctx: Option<SavedCtx>,
116    /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117    /// values plus the slot/result-count needed to finish the yielding call on
118    /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119    yielding: Option<(Vec<Value>, u32, i32)>,
120    /// results expected by the in-flight native call (so `yield` knows how many
121    /// values its call site wants when it suspends)
122    native_nresults: i32,
123    /// identity object for the main thread, returned by `coroutine.running`
124    /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125    main_coro: Option<Gc<Coro>>,
126    /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127    /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128    /// switches report the previous one, as PUC does.
129    gc_mode: &'static str,
130    /// the live-register boundary of the running thread for GC rooting (PUC's
131    /// `L->top`): set precisely at each GC safe point so freed temporary
132    /// registers above it are not rooted. Without this the collector roots the
133    /// whole stack window, pinning weak-table values stranded in stale temps
134    /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135    pub(crate) gc_top: u32,
136    /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137    /// is still stop-the-world, so these are stored/returned for API fidelity
138    /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139    /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140    gc_pause: i64,
141    gc_stepmul: i64,
142    gc_stepsize: i64,
143    /// true while `__gc` finalizers are being run, so a finalizer that calls
144    /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145    /// `collectgarbage` yields fail).
146    gc_finalizing: bool,
147    /// C ABI scratch (`capi` module): the host-visible value stack that C
148    /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149    /// Kept here (instead of in a separate `LuaState` wrapper) so the
150    /// trampoline that bridges to a `LuaCFunction` can safely cast the
151    /// Vm pointer it already holds to the public `*mut LuaState` type
152    /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153    pub capi_stack: Vec<crate::runtime::Value>,
154    /// Pinned CString backing the pointer last returned by `lua_tostring`;
155    /// valid until the next `lua_tostring` on the same Vm.
156    pub capi_cstr_pin: Option<std::ffi::CString>,
157    /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158    /// concatenate across continuation calls until a non-`tocont` call
159    /// flushes; the default warnf recognises `@on`/`@off` control messages
160    /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161    /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162    /// keep the older raise semantics).
163    pub(crate) warn_state: WarnState,
164    pub(crate) warn_buf: Vec<u8>,
165    /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166    /// loop decrements once per dispatch turn. When it hits zero the loop
167    /// raises a catchable "instruction budget exceeded" error so the embedder
168    /// can yield control back to its caller (short-script eval, game
169    /// frame budgets). `None` = unbounded; reset on each call via
170    /// `set_instr_budget`.
171    pub(crate) instr_budget: Option<i64>,
172    // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173    // `self.jit` below + `crate::vm::jit_state` for field docs.
174    // (Was: jit_enabled here.)
175    // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176    // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177    // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178    // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179    // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180    // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181    // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182    // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183    // materialize_emit,closure_emit}_count — all moved to JitState.
184    /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185    /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186    /// precompiled chunks (which bypass the parser's depth / opcode
187    /// limits). When `false`, the loader rejects any source whose first
188    /// byte is the bytecode signature `\27` ("`\27Lua`").
189    pub(crate) bytecode_loading: bool,
190    /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191    /// a strictly larger trust surface than luna's own dump format
192    /// (third-party toolchain bugs, malformed chunks, unknown opcode
193    /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194    /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195    /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196    /// Embedder toggles via `set_puc_bytecode_loading`.
197    pub(crate) puc_bytecode_loading: bool,
198    /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199    /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200    /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201    /// source exceeds this, the loader returns the PUC-shaped
202    /// `not enough memory` error before the host allocator is asked to
203    /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204    /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205    /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206    /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207    /// Embedders that genuinely need to load > 256 MiB sources widen the
208    /// cap via [`Vm::set_loader_input_budget`].
209    pub(crate) loader_input_budget: usize,
210    /// In-process log of fully-emitted warnings (each entry = one flushed
211    /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212    /// tests assert what was warned without scraping stderr.
213    pub(crate) warn_log: Vec<Vec<u8>>,
214    /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215    /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216    /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217    /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218    /// shape stub for db.lua :328; if other registry-keyed APIs land later
219    /// they can share this table.
220    pub(crate) registry: Option<Gc<Table>>,
221    /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222    /// registry entry); attached to every file userdata the io library makes
223    pub(crate) file_mt: Option<Gc<Table>>,
224    /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225    pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226    pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227    /// the running thread's debug hook state (`debug.sethook`); per-thread,
228    /// swapped with the execution context on a coroutine resume/yield
229    pub(crate) hook: HookState,
230    /// true while the hook itself runs, so its own execution fires no events
231    /// (PUC clears the mask for the duration)
232    pub(crate) in_hook: bool,
233    /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234    /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235    /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236    /// accumulate the count instead of capping at 1.
237    pub(crate) pending_tailcalls: u32,
238    /// Name of the C native that just propagated an error (captured before
239    /// the native is popped from `running_natives`). Lets a dying coroutine
240    /// preserve `[C]: in function '<name>'` at the top of its traceback
241    /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242    /// luna's native frames are off-stack so we stash the name explicitly.
243    pub(crate) errored_native: Option<String>,
244    /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245    /// (relative to the activation's func slot) and the number transferred.
246    /// Set just before firing a call/return hook, read by `getinfo("r")`.
247    pub(crate) hook_ftransfer: u16,
248    pub(crate) hook_ntransfer: u16,
249    /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250    /// pushed by `push_frame`; `close_slots` sets this before calling a
251    /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252    /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253    pending_tm: Option<&'static str>,
254    /// `true` when the next `push_frame` is the user hook function itself,
255    /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256    /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257    pending_is_hook: bool,
258    /// traceback snapshot taken at the error point (the first `unwind` entry
259    /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260    /// the failed frames are popped — can still see the error point's stack
261    /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262    /// stack intact; we approximate by snapshotting the string and letting
263    /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264    /// `call_value` entry (`public_call_depth == 0`).
265    pub(crate) error_traceback: Option<Vec<u8>>,
266    /// nesting depth of public `call_value` entries (host vs. internal). The
267    /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268    /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269    public_call_depth: u32,
270    /// stack of native (`Value::Native`) closures currently running on the
271    /// Rust call stack. `begin_call` pushes the closure before invoking
272    /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273    /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274    /// caller is C, not Lua) and qualify the running function's name via
275    /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276    pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277    /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278    /// the native's argument-window head and width, so `debug.getlocal`
279    /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280    pub(crate) running_native_slots: Vec<(u32, u32)>,
281    // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282    // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283    // trace_compiler — all moved to JitState. See `jit` below.
284    /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285    /// when `chunk_compiler` / `trace_compiler` are
286    /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287    ///
288    /// `#[doc(hidden)] pub` so the `luna` crate's
289    /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290    /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291    /// field). Not part of the embedder-facing API surface.
292    #[doc(hidden)]
293    pub jit: crate::vm::jit_state::JitState,
294
295    /// B12 host roots — append-only `Vec<Value>` traced as an extra
296    /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297    /// `LuaRoot`) hold indices into this vector so the underlying
298    /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299    ///
300    /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301    /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302    /// the trade-offs between `Drop` plumbing and append-only memory
303    /// growth have a richer ergonomics envelope to live in.
304    pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305    /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306    /// back if non-empty, else extends `host_roots`. Generation
307    /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308    pub(crate) host_roots_free: Vec<u32>,
309
310    /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311    /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312    /// callback). Each entry is one in-flight working buffer; `gc_roots`
313    /// extends with every contained `Value` so a `collectgarbage()`
314    /// inside the comparator cannot free strings/tables snapshotted
315    /// here. Nested sorts push a new buffer on entry, pop on exit
316    /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317    /// regression).
318    pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320    /// v1.3 Phase ML — MacroLua compile-time macro registry.
321    /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322    /// `@if` / `@gensym`) at construction time when `version ==
323    /// LuaVersion::MacroLua`; embedders register custom macros via
324    /// [`Vm::define_macro`]. The expander runs once per `load()` call
325    /// between lexing and parsing (only when `is_macro_lua()`).
326    pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328    /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329    /// by `TypeId::of::<T>()` for embedder types implementing
330    /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331    /// [`Vm::register_userdata`]; metatables are pinned via
332    /// [`Vm::pin_host`] at registration time so the entry's
333    /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334    pub(crate) userdata_metatables:
335        std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337    /// B6 — classification of the most recent error raised on this Vm.
338    /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339    /// at well-known sites (syntax errors, instr-budget trips, native
340    /// callback errors, type errors).
341    pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343    /// B6 — `(source_name, line)` of the most recent error. Set by the
344    /// dispatcher / lexer / parser; cleared when a new call_value
345    /// enters cleanly.
346    pub(crate) last_error_source: Option<(String, u32)>,
347
348    /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349    /// the dispatcher hot loop yields cooperatively (sets
350    /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351    /// to `EvalFuture::poll`) instead of returning a real
352    /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353    /// for the duration of the future; restored to `false` on
354    /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355    /// leave it `false` so v1.0 behavior is preserved exactly.
356    pub(crate) async_mode: bool,
357
358    /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359    /// before driving a slice. The dispatcher itself does not call it
360    /// (the future's poll loop does `wake_by_ref` after observing
361    /// `BudgetExhausted`), but storing the waker keeps the door open
362    /// for Stage 2 async natives to wake the host directly from a
363    /// helper future.
364    pub(crate) async_waker: Option<std::task::Waker>,
365
366    /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367    /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368    /// Default 10_000 (RFC §D5). Tunable via
369    /// [`Vm::set_async_slice`].
370    pub(crate) async_slice_size: i64,
371
372    /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373    /// budget exhaustion fires; checked by `exec_with` (so the
374    /// sentinel propagates without `unwind` running, mirroring
375    /// `yielding.is_some()`) and by `call_value_impl` (so the call
376    /// frames survive for the next poll). Cleared by `drive_one`
377    /// after translating it to `DispatchOutcome::BudgetExhausted`.
378    pub(crate) host_yield_pending: bool,
379
380    /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381    /// when an async-marked [`NativeClosure`] is invoked under
382    /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383    /// mechanism as `host_yield_pending` — see `exec_with` +
384    /// `call_value_impl`), stashes the in-flight future +
385    /// post-completion context here, and surfaces them to
386    /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387    /// once the future is moved out into a
388    /// `DispatchOutcome::AsyncNativeAwaiting`.
389    pub(crate) pending_async_native_fut:
390        Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392    /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393    /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394    /// commit the future's eventual `Ok(nret)` back into the calling
395    /// frame's expected result slots. Recorded by the dispatcher;
396    /// consumed by [`Vm::commit_async_native_result`] after the
397    /// future resolves.
398    pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411    pub func_slot: u32,
412    /// Recorded for parity with the sync native-call path's
413    /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414    /// hook firing + traceback shaping. Not yet read in Stage 2.
415    #[allow(dead_code)]
416    pub nargs: u32,
417    pub nresults: i32,
418    /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419    /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420    /// unread today; carried so an Stage 3 audit can confirm the
421    /// pre-suspend root window matches the post-resume one.
422    #[allow(dead_code)]
423    pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430    /// the hook function (`None` when no hook is installed)
431    pub func: Option<Value>,
432    /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433    /// (Rust first); both can be installed simultaneously, but most
434    /// embedders pick one.
435    pub rust_func: Option<RustDebugHook>,
436    /// LUA_MASKCALL — fire on function entry
437    pub call: bool,
438    /// LUA_MASKRET — fire on function return
439    pub ret: bool,
440    /// LUA_MASKLINE — fire on source-line change
441    pub line: bool,
442    /// LUA_MASKCOUNT — fire every `count_base` instructions
443    pub count: bool,
444    /// instruction count between count events (PUC basehookcount)
445    pub count_base: i64,
446    /// instructions left until the next count event (PUC hookcount)
447    pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459    /// Function entry (`hook_call` analogue).
460    Call,
461    /// Function return (`hook_return` analogue).
462    Return,
463    /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464    TailCall,
465    /// Source-line change (the `u32` is the 1-based line number).
466    Line(u32),
467    /// Instruction count event (fires every `count_base` instructions).
468    Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483    stack: Vec<Value>,
484    frames: Vec<CallFrame>,
485    open_upvals: Vec<(u32, Gc<Upvalue>)>,
486    tbc: Vec<u32>,
487    top: u32,
488    pcall_depth: u32,
489    hook: HookState,
490    /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491    /// the rest of the suspended state so each thread can keep its own
492    /// `setfenv(0, env)` rewire without the swap leaking into another
493    /// thread (5.1 closure.lua :177).
494    globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499    /// caught by a pcall/xpcall continuation; resume running its caller
500    Caught,
501    /// caught by a continuation that was the entry-level activation; these are
502    /// the call's (wrapped) results
503    CaughtReturn(Vec<Value>),
504    /// no protecting continuation up to `entry_depth`; propagate the error
505    Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511    Lua(usize),
512    /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513    /// used to name the native via its invoking call instruction.
514    C(usize),
515    /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516    /// caller's activation, so `debug.getinfo(level)` at this slot returns
517    /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518    /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519    /// both shapes). The index points at the *tail-called* frame whose
520    /// `is_tail` flag induced this synthetic level.
521    Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528    /// index → the looked-up value; newindex → done (raw set performed);
529    /// comparison → the boolean result already known
530    Done(Value),
531    /// a metamethod to call; `recv` is the chain element it was found on (the
532    /// extra args — key / value — are supplied by the caller)
533    Mm { func: Value, recv: Value },
534    /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535    /// carries `__le` — `op_compare` swaps the args and negates the result.
536    /// Lives separate from `Mm` so the synth path can stay yieldable without
537    /// every other Mm caller learning a swap flag they would never set.
538    CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545    Index,
546    NewIndex,
547    Call,
548    ToString,
549    Metatable,
550    Name,
551    Eq,
552    Lt,
553    Le,
554    Concat,
555    Len,
556    Add,
557    Sub,
558    Mul,
559    Div,
560    Mod,
561    Pow,
562    IDiv,
563    BAnd,
564    BOr,
565    BXor,
566    Shl,
567    Shr,
568    Unm,
569    BNot,
570    Close,
571    Gc,
572    Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576    "__index",
577    "__newindex",
578    "__call",
579    "__tostring",
580    "__metatable",
581    "__name",
582    "__eq",
583    "__lt",
584    "__le",
585    "__concat",
586    "__len",
587    "__add",
588    "__sub",
589    "__mul",
590    "__div",
591    "__mod",
592    "__pow",
593    "__idiv",
594    "__band",
595    "__bor",
596    "__bxor",
597    "__shl",
598    "__shr",
599    "__unm",
600    "__bnot",
601    "__close",
602    "__gc",
603    "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614    if version <= LuaVersion::Lua53 {
615        format!("__{tm}")
616    } else if tm == "gc" {
617        "__gc".to_string()
618    } else {
619        tm.to_string()
620    }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626    use crate::vm::isa::Op;
627    Some(match op {
628        Op::Add => "add",
629        Op::Sub => "sub",
630        Op::Mul => "mul",
631        Op::Div => "div",
632        Op::Mod => "mod",
633        Op::Pow => "pow",
634        Op::IDiv => "idiv",
635        Op::BAnd => "band",
636        Op::BOr => "bor",
637        Op::BXor => "bxor",
638        Op::Shl => "shl",
639        Op::Shr => "shr",
640        Op::Unm => "unm",
641        Op::BNot => "bnot",
642        Op::Concat => "concat",
643        Op::Len => "len",
644        Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645        Op::SetField | Op::SetTable | Op::SetI => "newindex",
646        Op::Eq | Op::EqK => "eq",
647        Op::Lt => "lt",
648        Op::Le => "le",
649        _ => return None,
650    })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678    /// `warn` calls are silently dropped (default after `warn("@off")`).
679    Off,
680    /// `warn` calls are delivered to stderr (after `warn("@on")`).
681    On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689    if let Some(s) = payload.downcast_ref::<String>() {
690        return s.clone();
691    }
692    if let Some(s) = payload.downcast_ref::<&'static str>() {
693        return (*s).to_string();
694    }
695    "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702    /// Parse or compile failure.
703    Syntax(SyntaxError),
704    /// Runtime error raised during execution.
705    Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709    fn from(e: SyntaxError) -> Error {
710        Error::Syntax(e)
711    }
712}
713
714impl From<LuaError> for Error {
715    fn from(e: LuaError) -> Error {
716        Error::Runtime(e)
717    }
718}
719
720impl Drop for Vm {
721    fn drop(&mut self) {
722        // state close: run `__gc` for every still-registered finalizable before
723        // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724        // single pass — objects created by a closing finalizer are not
725        // re-finalized (they go to the heap's free list directly).
726        self.heap.queue_all_finalizers();
727        self.run_finalizers();
728    }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742    frames.push(cf);
743    // Shadow maintenance is debug-only: release builds skip the
744    // increment + assertion entirely. The shadow's purpose in Week 1
745    // is to VERIFY the assumed invariant (frames_top == frames.len())
746    // across all push/pop sites; once Week 2+ migrates readers to
747    // consume the shadow, release will run the increment unconditionally.
748    #[cfg(debug_assertions)]
749    {
750        *frames_top += 1;
751        debug_assert_eq!(
752            *frames_top as usize,
753            frames.len(),
754            "P17-D frames_top out of sync after push",
755        );
756    }
757    #[cfg(not(debug_assertions))]
758    let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763    let r = frames.pop();
764    #[cfg(debug_assertions)]
765    {
766        if r.is_some() {
767            *frames_top = frames_top.saturating_sub(1);
768        }
769        debug_assert_eq!(
770            *frames_top as usize,
771            frames.len(),
772            "P17-D frames_top out of sync after pop",
773        );
774    }
775    #[cfg(not(debug_assertions))]
776    let _ = frames_top;
777    r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786    static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787    *PROBE_ON.get_or_init(|| {
788        std::env::var("LUNA_AOT_PROBE")
789            .ok()
790            .filter(|v| !v.is_empty())
791            .is_some()
792    })
793}
794
795impl Vm {
796    /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797    /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798    /// the Vec replacement to keep the shadow valid.
799    #[inline(always)]
800    fn frames_resync(&mut self) {
801        // Debug-only Week 1 — see `frames_push_sync` comment.
802        #[cfg(debug_assertions)]
803        {
804            self.frames_top = self.frames.len() as u32;
805        }
806    }
807
808    // ====================================================================
809    // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810    //
811    // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812    // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813    // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814    // sites to consume them. Phase 4 removes Vec<CallFrame>.
815    //
816    // Preconditions (debug-asserted):
817    // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818    // - self.stack.len() > base + max_stack (caller has grown stack)
819    // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820    // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821    //
822    // No release-build cost when unused (LTO strips dead methods).
823    // ====================================================================
824
825    /// Write a Lua frame's closure pointer into `stack[base-2]`.
826    /// The caller must ensure `base >= 2` and the slot is within the
827    /// stack's allocated range.
828    #[inline]
829    #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830    fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831        debug_assert!(
832            base >= 2,
833            "frame closure slot needs base >= 2; got {}",
834            base
835        );
836        let idx = (base - 2) as usize;
837        debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838        self.stack[idx] = Value::Closure(cl);
839    }
840
841    /// Read a Lua frame's closure pointer from `stack[base-2]`.
842    /// Returns `None` if the slot doesn't hold a closure (caller is
843    /// expected to treat that as a corrupt frame).
844    ///
845    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846    /// to avoid the enum-match cost on the hot path. Tag check via
847    /// 1-byte load + branch + `as_closure_unchecked` payload load.
848    #[inline]
849    #[allow(dead_code)]
850    fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851        debug_assert!(base >= 2);
852        let v = self.stack.get((base - 2) as usize)?;
853        if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854            // SAFETY: tag byte just verified == CLOSURE.
855            Some(unsafe { v.as_closure_unchecked() })
856        } else {
857            None
858        }
859    }
860
861    /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862    /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863    /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864    /// cleanly through the value stack without losing bits.
865    #[inline]
866    #[allow(dead_code)]
867    fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868        debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869        let idx = (base - 1) as usize;
870        debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871        self.stack[idx] = Value::Int(marker.to_raw());
872    }
873
874    /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875    /// `None` if the slot isn't a `Value::Int` (caller treats as a
876    /// corrupt frame); the kind tag itself may still be invalid, in
877    /// which case [`FrameMarker::kind`] returns `None` on the result.
878    ///
879    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880    /// for the tag check + `as_int_unchecked` for the payload load.
881    #[inline]
882    #[allow(dead_code)]
883    fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884        debug_assert!(base >= 1);
885        let v = self.stack.get((base - 1) as usize)?;
886        if v.tag_byte() == crate::runtime::value::tag::INT {
887            // SAFETY: tag byte just verified == INT.
888            Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889                unsafe { v.as_int_unchecked() },
890            ))
891        } else {
892            None
893        }
894    }
895
896    /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897    /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898    /// caller is responsible for the rest of the bring-up.
899    fn new_inner(version: LuaVersion) -> Vm {
900        let mut heap = Heap::new();
901        // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902        // values strongly. gc.lua's "weak tables" section relies on that.
903        heap.no_ephemeron = version <= LuaVersion::Lua51;
904        // PUC 5.3 needs two GC cycles to finalize a table caught in a
905        // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906        // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907        heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908        let globals = heap.new_table();
909        let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911        Vm {
912            heap,
913            stack: Vec::new(),
914            frames: Vec::new(),
915            frames_top: 0,
916            open_upvals: Vec::new(),
917            tbc: Vec::new(),
918            top: 0,
919            globals,
920            type_mt: [None; 5],
921            mm_names,
922            c_depth: 0,
923            pcall_depth: 0,
924            nny: 0,
925            msgh_depth: 0,
926            terminating: None,
927            rng: [0; 4],
928            started: std::time::Instant::now(),
929            version,
930            closing_err: None,
931            current: None,
932            main_ctx: None,
933            yielding: None,
934            native_nresults: -1,
935            main_coro: None,
936            gc_mode: "incremental",
937            gc_top: 0,
938            gc_pause: 200,
939            gc_stepmul: 100,
940            gc_stepsize: 13,
941            gc_finalizing: false,
942            capi_stack: Vec::new(),
943            capi_cstr_pin: None,
944            warn_state: WarnState::Off,
945            warn_buf: Vec::new(),
946            warn_log: Vec::new(),
947            instr_budget: None,
948            bytecode_loading: true,
949            puc_bytecode_loading: false,
950            loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951            registry: None,
952            file_mt: None,
953            io_input: None,
954            io_output: None,
955            hook: HookState::default(),
956            in_hook: false,
957            pending_tailcalls: 0,
958            errored_native: None,
959            hook_ftransfer: 0,
960            hook_ntransfer: 0,
961            pending_tm: None,
962            pending_is_hook: false,
963            error_traceback: None,
964            public_call_depth: 0,
965            running_natives: Vec::new(),
966            running_native_slots: Vec::new(),
967            // v1.1 A2 — JIT-specific state factored into `JitState`
968            // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969            // `install_jit_backend` / `luaL_newstate` swap in
970            // `CraneliftBackend` for callers that want JIT acceleration.
971            jit: crate::vm::jit_state::JitState::with_null_backend(),
972            // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973            host_roots: Vec::new(),
974            // v1.3 Phase ML — MacroLua registry. Pre-populated with
975            // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976            // when this Vm is constructed under `LuaVersion::MacroLua`.
977            macro_registry: if version == LuaVersion::MacroLua {
978                crate::frontend::macro_expander::MacroRegistry::with_builtins()
979            } else {
980                crate::frontend::macro_expander::MacroRegistry::new()
981            },
982            host_roots_free: Vec::new(),
983            sort_scratch: Vec::new(),
984            // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985            // metatable cache. Populated lazily by register_userdata.
986            userdata_metatables: std::collections::HashMap::new(),
987            // v1.1 B6 — error classification metadata. Defaults to
988            // Runtime; set at known sites (syntax / budget trip /
989            // native error / type error).
990            last_error_kind: crate::vm::error::LuaErrorKind::default(),
991            last_error_source: None,
992            // v1.1 B10 Stage 1 — async embedder fields. Defaults
993            // preserve sync behavior bit-for-bit (`async_mode = false`
994            // means the budget hot loop errors out exactly as v1.0).
995            async_mode: false,
996            async_waker: None,
997            async_slice_size: 10_000,
998            host_yield_pending: false,
999            // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000            // default; populated only by the dispatcher when an
1001            // async-marked NativeClosure is invoked under async_mode.
1002            pending_async_native_fut: None,
1003            pending_async_native_ctx: None,
1004        }
1005    }
1006
1007    /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008    /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009    /// followed by `vm.open_all_libs()`.
1010    pub fn new(version: LuaVersion) -> Vm {
1011        let mut vm = Vm::new_minimal(version);
1012        vm.open_all_libs();
1013        vm
1014    }
1015
1016    /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017    /// that want a sandbox (Redis-style scripts, in-game scripting with
1018    /// a curated API) call this and then `open_base` / `open_math` / etc.
1019    /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020    /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021    pub fn new_minimal(version: LuaVersion) -> Vm {
1022        let mut vm = Vm::new_inner(version);
1023        let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025        unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026        vm.main_coro = Some(mc);
1027        let (a, b) = vm.rng_auto_seed();
1028        vm.rng_seed(a as u64, b as u64);
1029        vm
1030    }
1031
1032    /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033    /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034    /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035    /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036    /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037    /// does NOT evict those cached entries — call right after
1038    /// construction for a clean swap.
1039    ///
1040    /// Naming: `install_jit_backend` (not `install_default_jit`)
1041    /// because the "default" in luna-core is `NullJitBackend`; the
1042    /// "default JIT" lives in the `luna` crate.
1043    pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044    where
1045        C: crate::jit::IntChunkCompiler + 'static,
1046        T: crate::jit::TraceCompiler + 'static,
1047    {
1048        self.jit.chunk_compiler = Box::new(chunk);
1049        self.jit.trace_compiler = Box::new(trace);
1050    }
1051
1052    /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053    /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054    /// the `luna_jit` crate's `install_default_jit` pairs this with
1055    /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056    /// also install a fresh `CraneliftJitStorage`. Storage holds
1057    /// the per-`Vm` JIT cache + handle collections that used to be
1058    /// `thread_local!`s in `luna_jit::jit_backend`.
1059    ///
1060    /// Idempotency: re-installing storage on a Vm that already
1061    /// holds compiled-trace pointers WILL evict their owners (the
1062    /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063    /// pages). Call right after construction for a clean swap.
1064    pub fn install_jit_storage<S>(&mut self, storage: S)
1065    where
1066        S: crate::jit::JitStorage + 'static,
1067    {
1068        self.jit.storage = Box::new(storage);
1069    }
1070
1071    /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072    /// reports "skipped" so every closure stays on the interpreter
1073    /// path, and the trace recorder's compile attempt always returns
1074    /// `None`. Intended for tests that want to verify the trait
1075    /// boundary works in a JIT-free configuration, and for the future
1076    /// `luna-core` build path that ships without Cranelift.
1077    ///
1078    /// Calling this on a Vm whose closures already populated
1079    /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080    /// cached entries — the dispatcher will still call into them. For
1081    /// a truly JIT-free run, call this immediately after construction.
1082    pub fn install_null_jit(&mut self) {
1083        self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084        self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085    }
1086
1087    /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088    /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089    /// time instead (`open_base`, `open_math`, `open_table`, …).
1090    pub fn open_all_libs(&mut self) {
1091        self.open_base();
1092        self.open_math();
1093        self.open_table();
1094        self.open_string();
1095        self.open_utf8();
1096        self.open_os_io();
1097        self.open_debug();
1098        self.open_coroutine();
1099        self.open_package();
1100        // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101        // operators replace it on 64-bit integers). Only expose it under 5.2
1102        // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103        // leaking the global into newer dialects.
1104        if self.version == LuaVersion::Lua52 {
1105            self.open_bit32();
1106        }
1107    }
1108
1109    /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110    /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111    /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112    /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113    /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114    /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115    /// once per Vm.
1116    pub fn open_base(&mut self) {
1117        crate::vm::builtins::open_base(self);
1118    }
1119    /// Install the `math` standard library.
1120    pub fn open_math(&mut self) {
1121        crate::vm::lib_math::open_math(self);
1122    }
1123    /// Install the `table` standard library.
1124    pub fn open_table(&mut self) {
1125        crate::vm::lib_table::open_table(self);
1126    }
1127    /// Install the `string` standard library (and the shared string metatable).
1128    pub fn open_string(&mut self) {
1129        crate::vm::lib_string::open_string(self);
1130    }
1131    /// Install the `utf8` standard library (5.3+).
1132    pub fn open_utf8(&mut self) {
1133        crate::vm::lib_utf8::open_utf8(self);
1134    }
1135    /// `os` and `io` are merged because file userdata shares state with both
1136    /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137    /// wraps `os.execute`'s shell).
1138    pub fn open_os_io(&mut self) {
1139        crate::vm::lib_os_io::open_os_io(self);
1140    }
1141    /// Install the `debug` standard library (introspection / hooks). Off by
1142    /// default for sandbox embedders.
1143    pub fn open_debug(&mut self) {
1144        crate::vm::lib_debug::open_debug(self);
1145    }
1146    /// Install the `coroutine` standard library.
1147    pub fn open_coroutine(&mut self) {
1148        crate::vm::lib_coroutine::open_coroutine(self);
1149    }
1150    /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151    pub fn open_package(&mut self) {
1152        crate::vm::lib_os_io::open_package(self);
1153    }
1154    /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155    /// ops on 64-bit integers).
1156    pub fn open_bit32(&mut self) {
1157        crate::vm::lib_bit32::open_bit32(self);
1158    }
1159
1160    /// xoshiro256** next.
1161    pub(crate) fn rng_next(&mut self) -> u64 {
1162        let s = &mut self.rng;
1163        let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164        let t = s[1] << 17;
1165        s[2] ^= s[0];
1166        s[3] ^= s[1];
1167        s[1] ^= s[2];
1168        s[0] ^= s[3];
1169        s[2] ^= t;
1170        s[3] = s[3].rotate_left(45);
1171        result
1172    }
1173
1174    /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175    pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176        // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177        // state), then 16 discards to spread the seed. Matches PUC's exact
1178        // sequence so the low-level conformance test passes.
1179        self.rng = [a, 0xff, b, 0];
1180        for _ in 0..16 {
1181            self.rng_next();
1182        }
1183    }
1184
1185    /// Wall-clock since VM creation (os.clock approximation).
1186    pub(crate) fn uptime(&self) -> std::time::Duration {
1187        self.started.elapsed()
1188    }
1189
1190    /// Entropy for math.randomseed() with no arguments.
1191    pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192        let t = std::time::SystemTime::now()
1193            .duration_since(std::time::UNIX_EPOCH)
1194            .map(|d| d.as_nanos() as u64)
1195            .unwrap_or(0);
1196        let addr = &self.rng as *const _ as u64;
1197        (t as i64, addr as i64)
1198    }
1199
1200    /// Allocate a native function object (no upvalues): builtin registration.
1201    pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202        Value::Native(self.heap.new_native(f, Box::new([])))
1203    }
1204
1205    /// Allocate a native function object with captured upvalues.
1206    pub fn native_with(
1207        &mut self,
1208        f: crate::runtime::value::NativeFn,
1209        upvals: Box<[Value]>,
1210    ) -> Value {
1211        Value::Native(self.heap.new_native(f, upvals))
1212    }
1213
1214    /// Install the shared string metatable (string library, P04).
1215    pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216        self.type_mt[3] = mt;
1217    }
1218
1219    /// The current globals table (`_G` / `_ENV` source for new chunks).
1220    pub fn globals(&self) -> Gc<Table> {
1221        self.globals
1222    }
1223
1224    /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225    /// Library code that pushes a known number of fresh slots — e.g.
1226    /// `table.unpack` returning N values — consults this to refuse when
1227    /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228    /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229    /// coroutine's already-built table eats a few slots, so an unpack of
1230    /// ~lim values can't fit.
1231    pub(crate) fn stack_room(&self) -> i64 {
1232        PUC_MAXSTACK - (self.stack.len() as i64)
1233    }
1234
1235    /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236    /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237    /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238    /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239    pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240        self.globals = env;
1241    }
1242
1243    /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244    /// 5.5). Determines numeric semantics, available standard libraries, and
1245    /// metamethod behavior.
1246    pub fn version(&self) -> LuaVersion {
1247        self.version
1248    }
1249
1250    /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251    /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252    /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253    /// `Gc<NativeClosure>` handle.
1254    ///
1255    /// Returns `Err(LuaError)` only if the globals table overflows
1256    /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257    /// String interning + key construction cannot fail.
1258    ///
1259    /// ```
1260    /// # use luna_core::vm::Vm;
1261    /// # use luna_core::version::LuaVersion;
1262    /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263    /// vm.set_global("answer", 42).unwrap();
1264    /// vm.set_global("ratio", 0.5_f64).unwrap();
1265    /// vm.set_global("hello", "world").unwrap();
1266    /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267    /// assert_eq!(r.len(), 3);
1268    /// ```
1269    pub fn set_global<V: crate::vm::IntoValue>(
1270        &mut self,
1271        name: &str,
1272        v: V,
1273    ) -> Result<(), LuaError> {
1274        let v = v.into_value(self);
1275        let k = Value::Str(self.heap.intern(name.as_bytes()));
1276        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277        unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278        self.heap
1279            .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280        Ok(())
1281    }
1282
1283    /// Backward write barrier shorthand for native lib code: demote `t` from
1284    /// BLACK back to gray so the next propagate step re-traces its fields.
1285    /// No-op outside Propagate (parent is never BLACK at mutation time).
1286    pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287        self.heap
1288            .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289    }
1290
1291    /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292    /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293    /// No-op outside Propagate.
1294    pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295        self.heap
1296            .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297    }
1298
1299    /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300    /// under non-MacroLua dialects (the macro is stored but the load
1301    /// path only consults the registry when
1302    /// `self.version == LuaVersion::MacroLua`).
1303    ///
1304    /// `name` is stored without the leading `@` — source code writes
1305    /// `@double(x)` to invoke a macro registered as `"double"`.
1306    pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307        self.macro_registry.register(name, m);
1308    }
1309
1310    /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311    /// Mostly useful for tests / dogfood resets.
1312    pub fn clear_macros(&mut self) {
1313        self.macro_registry.clear();
1314    }
1315
1316    /// Parse + compile a chunk and close it over the globals table.
1317    pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318        // Reject oversize input *before* handing the parser/lexer a
1319        // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320        // message keeps `heavy.lua::loadrep` compatibility: that test
1321        // accepts either `string length overflow` or `not enough memory`
1322        // as the failure mode for a feeder loop that outruns the host
1323        // allocator. See `set_loader_input_budget`.
1324        if src.len() > self.loader_input_budget {
1325            return Err(SyntaxError {
1326                line: 0,
1327                msg: b"not enough memory".to_vec(),
1328            });
1329        }
1330        // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331        let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332        if is_bytecode && !self.bytecode_loading {
1333            return Err(SyntaxError {
1334                line: 0,
1335                msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336            });
1337        }
1338        let proto = if is_bytecode {
1339            let allow_puc = self.puc_bytecode_loading;
1340            crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341                |msg| SyntaxError {
1342                    line: 0,
1343                    msg: msg.into_bytes(),
1344                },
1345            )?
1346        } else if self.version.is_macro_lua() {
1347            // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348            // token vec, run the macro expander pre-pass against the
1349            // per-Vm registry, then hand the rewritten stream to
1350            // `parse_tokens`. The AST + compiler are dialect-agnostic
1351            // because by this point all `@`/quote tokens are gone.
1352            let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353            let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354            loop {
1355                let t = lexer.next_token()?;
1356                let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357                raw.push(t);
1358                if eof {
1359                    break;
1360                }
1361            }
1362            // Drop the trailing Eof — expander operates on the body and
1363            // `parse_tokens` reinserts Eof when it runs out of tokens.
1364            raw.pop();
1365            let expanded = self.macro_registry.expand(raw)?;
1366            let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368        } else {
1369            let ast = parse(src, self.version)?;
1370            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371        };
1372        // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373        // upvalue with the globals table when the closure has *exactly* one
1374        // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375        // function with two-or-more upvalues keeps every cell at nil; the
1376        // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377        // :293's `assert(x() == nil)` pins this contract.
1378        let n = proto.upvals.len();
1379        let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380        if n == 0 {
1381            // synthetic main chunk has no declared upvalues, but the engine
1382            // still expects at least one cell so the host can probe via
1383            // `debug.upvalueid` etc. Match the historical luna shape.
1384            ups.push(
1385                self.heap
1386                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387            );
1388        } else if n == 1 {
1389            ups.push(
1390                self.heap
1391                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392            );
1393        } else {
1394            for _ in 0..n {
1395                ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396            }
1397        }
1398        Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399    }
1400
1401    /// Compile and run `src` as an anonymous chunk; return its results.
1402    /// Source name in the traceback is `"=eval"`. Syntax errors are
1403    /// surfaced as `LuaError` carrying the formatted PUC-style message
1404    /// (interned through the heap so the error value composes with
1405    /// `pcall` / `error_text` like any runtime error).
1406    pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407        self.eval_chunk(src, "=eval")
1408    }
1409
1410    /// Render an error value for messages/tests. Non-string errors —
1411    /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412    /// (`"(error object is a table value)"`); embedders that need
1413    /// structured payloads should inspect `e.0` directly. Errors whose
1414    /// text starts with `"native panic:"` indicate a Rust panic
1415    /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416    /// be dropped (do not reuse).
1417    pub fn error_text(&self, e: &LuaError) -> String {
1418        match e.0 {
1419            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420            v => format!("(error object is a {} value)", v.type_name()),
1421        }
1422    }
1423
1424    /// Call any callable value from the host (or from natives like pcall).
1425    pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426        // host-level entry (no enclosing exec): drop any error state from a
1427        // prior call that propagated uncaught (`error_traceback` would
1428        // otherwise leak into the next debug.traceback call).
1429        if self.public_call_depth == 0 {
1430            self.error_traceback = None;
1431        }
1432        self.public_call_depth += 1;
1433        // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434        // chunk whose body fits the S1 int-arith whitelist short-circuits
1435        // the whole interpreter dispatch and runs straight through the
1436        // mmap'd native code. The lookup is one Cell::get + one match —
1437        // the slow path (compile attempt on first reach) is paid once per
1438        // Proto.
1439        if args.is_empty()
1440            && let Value::Closure(cl) = f
1441            && let Some(vs) = self.try_jit_call(cl)
1442        {
1443            self.public_call_depth -= 1;
1444            return Ok(vs);
1445        }
1446        let r = self.call_value_impl(f, args, true);
1447        self.public_call_depth -= 1;
1448        r
1449    }
1450
1451    /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452    /// `Some(values)` when the cached native fn is callable for a
1453    /// zero-arg call. (Non-zero-arg dispatch is handled by
1454    /// `try_jit_call_op` from inside `begin_call`.)
1455    fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456        use crate::runtime::function::JitProtoState;
1457        if !self.jit.enabled {
1458            return None;
1459        }
1460        let proto = cl.proto;
1461        if let JitProtoState::Untried = proto.jit.get() {
1462            self.populate_jit_cache(proto);
1463        }
1464        match proto.jit.get() {
1465            JitProtoState::Compiled {
1466                entry,
1467                num_args: 0,
1468                returns_one,
1469                arg_float_mask: _,
1470                arg_table_mask: _,
1471                ret_is_float,
1472                ret_is_table,
1473            } => {
1474                // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475                let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476                // P11-S5c / S5d.J — install the active Vm + closure
1477                // for any Rust helper the JIT'd code may call (e.g.
1478                // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479                // cranelift `Linkage::Import`. RAII clear on return.
1480                // Chunks with no upvalue reads don't touch the closure
1481                // slot, paying nothing.
1482                // v1.1 A1 Session A — route through chunk_compiler so
1483                // the NullJitBackend path stays inert. Raw-ptr arg
1484                // avoids the &mut self borrow conflict against the
1485                // shared self.jit.chunk_compiler read.
1486                let vm_ptr: *mut Vm = self;
1487                let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489                let r = unsafe { f() };
1490                drop(_jit_vm_guard);
1491                // P11-S5d.E' — a JIT helper may have detected a metatable
1492                // on a table operand and parked a deopt request here.
1493                // Discard the sentinel value and return None so the caller
1494                // re-runs the call through the interpreter, which honours
1495                // __index/__newindex.
1496                if self.jit.pending_err.take().is_some() {
1497                    return None;
1498                }
1499                Some(if returns_one {
1500                    let v = if ret_is_float {
1501                        Value::Float(f64::from_bits(r as u64))
1502                    } else if ret_is_table {
1503                        Value::Table(crate::runtime::Gc::from_ptr(
1504                            r as *mut crate::runtime::Table,
1505                        ))
1506                    } else {
1507                        Value::Int(r)
1508                    };
1509                    vec![v]
1510                } else {
1511                    Vec::new()
1512                })
1513            }
1514            // Non-zero-arg Compiled state: call_value's empty-args
1515            // fast path can't drive it. Op::Call handles those.
1516            JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517        }
1518    }
1519
1520    /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521    /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522    /// states (call sites guard with a get before invoking).
1523    ///
1524    /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525    /// `proto.code`. Compiled artefacts live in the thread-local
1526    /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527    /// `Vm`s loading the same source skip the cranelift compile step
1528    /// entirely.
1529    fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530        use crate::runtime::function::JitProtoState;
1531        let version = self.version();
1532        let pre53 = version <= crate::version::LuaVersion::Lua53;
1533        // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534        // are Float). The JIT's `GetUpval` ValueRead path uses this
1535        // to default-pin upvalue reads to Float without a tag check.
1536        let float_only = version <= crate::version::LuaVersion::Lua52;
1537        // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538        // trait method can take `&mut dyn JitStorage` without
1539        // double-borrowing self.jit.
1540        let jit = &mut self.jit;
1541        let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542        match jit
1543            .chunk_compiler
1544            .try_compile(storage, proto, pre53, float_only)
1545        {
1546            crate::jit::CompileResult::Compiled {
1547                entry,
1548                num_args,
1549                returns_one,
1550                arg_float_mask,
1551                arg_table_mask,
1552                ret_is_float,
1553                ret_is_table,
1554            } => {
1555                proto.jit.set(JitProtoState::Compiled {
1556                    entry,
1557                    num_args,
1558                    returns_one,
1559                    arg_float_mask,
1560                    arg_table_mask,
1561                    ret_is_float,
1562                    ret_is_table,
1563                });
1564            }
1565            crate::jit::CompileResult::Skipped => {
1566                proto.jit.set(JitProtoState::Failed);
1567            }
1568        }
1569    }
1570
1571    /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572    /// before `push_frame`. Returns `true` when the call was handled
1573    /// in-place (no new Lua frame). Constraints: every arg slot must
1574    /// be `Value::Int`, the cached arity must match the call site's
1575    /// `nargs`, the host wanted-count `wanted` is honoured by
1576    /// `finish_results`. Also bails when a debug hook is armed —
1577    /// JIT'd code does not fire line / call / return hooks, so any
1578    /// active hook makes the interpreter the source of truth.
1579    fn try_jit_call_op(
1580        &mut self,
1581        cl: Gc<LuaClosure>,
1582        func_slot: u32,
1583        nargs: u32,
1584        wanted: i32,
1585    ) -> bool {
1586        use crate::runtime::function::JitProtoState;
1587        if !self.jit.enabled {
1588            return false;
1589        }
1590        // Any active debug hook means the interpreter has to run the
1591        // call so the hook gets the expected events.
1592        if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593            return false;
1594        }
1595        let proto = cl.proto;
1596        if let JitProtoState::Untried = proto.jit.get() {
1597            self.populate_jit_cache(proto);
1598        }
1599        let JitProtoState::Compiled {
1600            entry,
1601            num_args,
1602            returns_one,
1603            arg_float_mask,
1604            arg_table_mask,
1605            ret_is_float,
1606            ret_is_table,
1607        } = proto.jit.get()
1608        else {
1609            return false;
1610        };
1611        if num_args as u32 != nargs {
1612            return false;
1613        }
1614        // Pack args into i64 bit-patterns per the per-slot expected
1615        // kind. A Float-typed slot accepts Value::Float verbatim and
1616        // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617        // accepts only Value::Table and passes the raw Gc ptr; an
1618        // Int-typed slot accepts only Value::Int. Any other shape
1619        // bails to the interpreter so the call's actual dynamics
1620        // (metamethod dispatch / type-coerce) take over.
1621        let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622            [0; crate::jit::MAX_JIT_ARITY as usize];
1623        for i in 0..num_args as usize {
1624            let v = self.stack[(func_slot + 1) as usize + i];
1625            let want_float = (arg_float_mask >> i) & 1 == 1;
1626            let want_table = (arg_table_mask >> i) & 1 == 1;
1627            args[i] = match (want_table, want_float, v) {
1628                (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629                (false, false, Value::Int(x)) => x,
1630                (false, true, Value::Float(f)) => f.to_bits() as i64,
1631                (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632                _ => return false,
1633            };
1634        }
1635        // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636        // matching guard in `try_jit_call`.
1637        // v1.1 A1 Session A — route through chunk_compiler.
1638        let vm_ptr: *mut Vm = self;
1639        let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640        // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641        let r = unsafe {
1642            match num_args {
1643                0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644                1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645                2 => {
1646                    (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647                }
1648                3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649                    args[0], args[1], args[2],
1650                ),
1651                4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652                    args[0], args[1], args[2], args[3],
1653                ),
1654                _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655            }
1656        };
1657        drop(_jit_vm_guard);
1658        // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659        // flagged a metatable on a table operand; bail to the interpreter
1660        // so `push_frame` runs the call from scratch.
1661        if self.jit.pending_err.take().is_some() {
1662            return false;
1663        }
1664        // Write result at func_slot, replacing the closure value, then
1665        // hand to finish_results to pad/truncate per the call site's
1666        // `wanted` count.
1667        if returns_one {
1668            let v = if ret_is_float {
1669                Value::Float(f64::from_bits(r as u64))
1670            } else if ret_is_table {
1671                Value::Table(crate::runtime::Gc::from_ptr(
1672                    r as *mut crate::runtime::Table,
1673                ))
1674            } else {
1675                Value::Int(r)
1676            };
1677            self.stack[func_slot as usize] = v;
1678            self.finish_results(func_slot, 1, wanted);
1679        } else {
1680            self.finish_results(func_slot, 0, wanted);
1681        }
1682        true
1683    }
1684
1685    /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686    /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687    /// invokes it inside that ci), so it is called with `from_c = false`: its
1688    /// debug parent is the closing function, not a synthetic C level.
1689    fn call_value_impl(
1690        &mut self,
1691        f: Value,
1692        args: &[Value],
1693        from_c: bool,
1694    ) -> Result<Vec<Value>, LuaError> {
1695        if self.c_depth >= MAX_C_DEPTH {
1696            return Err(self.rt_err("stack overflow"));
1697        }
1698        self.c_depth += 1;
1699        let func_slot = self.stack.len() as u32;
1700        self.stack.push(f);
1701        self.stack.extend_from_slice(args);
1702        self.top = self.stack.len() as u32;
1703        let r = self.call_at(func_slot, args.len() as u32, from_c);
1704        self.c_depth -= 1;
1705        if r.is_err()
1706            && self.yielding.is_none()
1707            && self.terminating.is_none()
1708            && !self.host_yield_pending
1709            && self.pending_async_native_fut.is_none()
1710        {
1711            // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712            // Rust stack, but the suspended coroutine's frames/registers (which
1713            // sit at/above `func_slot`) must survive for the next resume — so we
1714            // only truncate on a real error. A self-close termination is in the
1715            // same boat: the dying thread's state is discarded wholesale.
1716            // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717            // the same boat as `yielding`: the next `EvalFuture::poll`
1718            // resumes the same call, so the in-flight frames must
1719            // survive.
1720            self.stack.truncate(func_slot as usize);
1721            self.top = func_slot;
1722        }
1723        r
1724    }
1725
1726    /// Invoke `f` with the running thread marked non-yieldable for the duration
1727    /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728    /// boundary and errors instead of suspending. Used by library callbacks
1729    /// (sort comparator, gsub replacement) that run via synchronous Rust
1730    /// recursion and so could not be re-entered after a yield.
1731    pub(crate) fn call_noyield(
1732        &mut self,
1733        f: Value,
1734        args: &[Value],
1735    ) -> Result<Vec<Value>, LuaError> {
1736        self.nny += 1;
1737        let r = self.call_value(f, args);
1738        self.nny -= 1;
1739        r
1740    }
1741
1742    // ---- coroutines (P05) ----
1743
1744    pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745        // The new coroutine inherits the creating thread's current globals
1746        // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747        // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748        // it here picks the creator regardless of which coro is running.
1749        self.heap.new_coro(body, self.globals)
1750    }
1751
1752    /// Is `t` the thread whose context is currently live in the VM?
1753    pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754        match (self.current, t) {
1755            (None, None) => true,
1756            (Some(a), Some(b)) => a.ptr_eq(b),
1757            _ => false,
1758        }
1759    }
1760
1761    /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762    /// stack if that thread is current, else its saved context).
1763    #[doc(hidden)]
1764    pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765        let s = slot as usize;
1766        if self.is_current_thread(thread) {
1767            self.stack[s]
1768        } else {
1769            match thread {
1770                Some(co) => co.stack[s],
1771                None => self.main_ctx.as_ref().expect("main context").stack[s],
1772            }
1773        }
1774    }
1775
1776    fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777        let s = slot as usize;
1778        if self.is_current_thread(thread) {
1779            self.stack[s] = v;
1780        } else {
1781            match thread {
1782                Some(co) => {
1783                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784                    unsafe { co.as_mut() }.stack[s] = v;
1785                    // co.stack is traced by Coro::trace; demote co back to
1786                    // gray so propagate re-traces this slot if it was
1787                    // already black.
1788                    self.heap
1789                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790                }
1791                None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792            }
1793        }
1794    }
1795
1796    /// Whether `co` is the main thread's identity object.
1797    pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798        self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799    }
1800
1801    /// The status of `co` from the caller's view. The main thread's identity
1802    /// object has no stored status — it is "running" when nothing else runs,
1803    /// else "normal" (it resumed the active coroutine).
1804    pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805        if self.is_main_coro(co) {
1806            if self.current.is_none() {
1807                CoroStatus::Running
1808            } else {
1809                CoroStatus::Normal
1810            }
1811        } else {
1812            co.status
1813        }
1814    }
1815
1816    /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817    /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818    /// context. Handlers see the coroutine's death error (if it died by error)
1819    /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820    /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821    pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822        // re-entrant close: a __close handler closed its own coroutine while the
1823        // outer close is mid-flight (its context is live). Report success and let
1824        // the outer close finish — re-entering the swap would corrupt the stack.
1825        if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826            return Ok(None);
1827        }
1828        // A chain of coroutines whose `__close` handlers each close the previous
1829        // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830        // The calling handler's `call_value` has already pushed `c_depth` to the
1831        // cap, so here it reads as full first — report PUC's "C stack overflow"
1832        // before the next handler call would surface the plainer "stack overflow".
1833        if self.c_depth >= MAX_C_DEPTH {
1834            return Err(self.rt_err("C stack overflow"));
1835        }
1836        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837        let death_err = unsafe { co.as_mut() }.error_value.take();
1838        // swap the caller's live context out (into a GC-rooted home) and the
1839        // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840        // the coroutine's stack while everything stays rooted.
1841        let resumer = self.current;
1842        let rctx = self.take_ctx();
1843        match resumer {
1844            Some(r) => {
1845                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846                let m = unsafe { r.as_mut() };
1847                m.stack = rctx.stack;
1848                m.frames = rctx.frames;
1849                m.open_upvals = rctx.open_upvals;
1850                m.tbc = rctx.tbc;
1851                m.top = rctx.top;
1852                m.pcall_depth = rctx.pcall_depth;
1853            }
1854            None => self.main_ctx = Some(rctx),
1855        }
1856        self.load_coro_ctx(co);
1857        self.current = Some(co);
1858        let result = self.close_slots(0, death_err);
1859        // discard the (now-closed) coroutine context and restore the caller
1860        let _ = self.take_ctx();
1861        match resumer {
1862            Some(r) => {
1863                self.load_coro_ctx(r);
1864                self.current = Some(r);
1865            }
1866            None => {
1867                let m = self.main_ctx.take().expect("main context saved");
1868                self.put_ctx(m);
1869                self.current = None;
1870            }
1871        }
1872        {
1873            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874            let m = unsafe { co.as_mut() };
1875            m.status = CoroStatus::Dead;
1876            m.stack = Vec::new();
1877            m.frames = Vec::new();
1878            m.open_upvals = Vec::new();
1879            m.tbc = Vec::new();
1880            m.top = 0;
1881            m.pcall_depth = 0;
1882            m.resume_at = None;
1883            m.error_value = None;
1884        }
1885        result.map(|()| death_err)
1886    }
1887
1888    /// `coroutine.running`: the running thread plus whether it is the main one.
1889    pub(crate) fn running_thread(&self) -> (Value, bool) {
1890        match self.current {
1891            Some(co) => (Value::Coro(co), false),
1892            None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893        }
1894    }
1895
1896    /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897    /// thread) can yield. The main thread never can; any other coroutine can
1898    /// unless it is dead.
1899    pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900        match co {
1901            Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902            // the running thread can yield only outside any non-yieldable C call
1903            None => self.current.is_some() && self.nny == 0,
1904        }
1905    }
1906
1907    /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908    /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909    /// from "inside an unyieldable C call" (sort/gsub callback).
1910    pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911        if self.current.is_none() {
1912            Some("attempt to yield from outside a coroutine")
1913        } else if self.nny > 0 {
1914            Some("attempt to yield across a C-call boundary")
1915        } else {
1916            None
1917        }
1918    }
1919
1920    /// The coroutine whose context is currently live (`None` on the main thread).
1921    pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922        self.current
1923    }
1924
1925    /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926    /// its pending `__close` handlers, then signal termination. The handlers run
1927    /// here, in place, with the thread still non-yieldable (a yield in one hits
1928    /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929    /// way a yield does — `exec_with` propagates it past any protecting pcall
1930    /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931    /// clean death (or, if a handler raised, the coroutine's error).
1932    pub(crate) fn close_running(&mut self) -> LuaError {
1933        let death = match self.close_slots(0, None) {
1934            Ok(()) => None,
1935            Err(e) => Some(e.0),
1936        };
1937        self.terminating = Some(death);
1938        LuaError(Value::Nil)
1939    }
1940
1941    /// `coroutine.status` as seen by the caller.
1942    pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943        match self.effective_coro_status(co) {
1944            CoroStatus::Suspended => "suspended",
1945            CoroStatus::Running => "running",
1946            CoroStatus::Normal => "normal",
1947            CoroStatus::Dead => "dead",
1948        }
1949    }
1950
1951    fn take_ctx(&mut self) -> SavedCtx {
1952        let saved = SavedCtx {
1953            stack: std::mem::take(&mut self.stack),
1954            frames: std::mem::take(&mut self.frames),
1955            open_upvals: std::mem::take(&mut self.open_upvals),
1956            tbc: std::mem::take(&mut self.tbc),
1957            top: self.top,
1958            pcall_depth: self.pcall_depth,
1959            hook: self.hook,
1960            globals: self.globals,
1961        };
1962        self.frames_resync(); // P17-D Week 1 — frames now empty.
1963        saved
1964    }
1965
1966    fn put_ctx(&mut self, c: SavedCtx) {
1967        self.stack = c.stack;
1968        self.frames = c.frames;
1969        self.open_upvals = c.open_upvals;
1970        self.tbc = c.tbc;
1971        self.top = c.top;
1972        self.pcall_depth = c.pcall_depth;
1973        self.hook = c.hook;
1974        self.globals = c.globals;
1975        self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976    }
1977
1978    /// Move a coroutine's saved context into the live VM fields.
1979    fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981        let m = unsafe { co.as_mut() };
1982        self.stack = std::mem::take(&mut m.stack);
1983        self.frames = std::mem::take(&mut m.frames);
1984        self.open_upvals = std::mem::take(&mut m.open_upvals);
1985        self.tbc = std::mem::take(&mut m.tbc);
1986        self.top = m.top;
1987        self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988        self.pcall_depth = m.pcall_depth;
1989        self.hook = m.hook;
1990        self.globals = m.globals;
1991    }
1992
1993    /// Save the live VM context back into a coroutine object.
1994    fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995        let c = self.take_ctx();
1996        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997        let m = unsafe { co.as_mut() };
1998        m.stack = c.stack;
1999        m.frames = c.frames;
2000        m.open_upvals = c.open_upvals;
2001        m.tbc = c.tbc;
2002        m.top = c.top;
2003        m.pcall_depth = c.pcall_depth;
2004        m.hook = c.hook;
2005        m.globals = c.globals;
2006        // bulk-overwrite of every collectable field traced by Coro::trace:
2007        // demote the coro back to gray so propagate re-traces its new state.
2008        self.heap
2009            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010    }
2011
2012    /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013    /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014    /// error raised inside the coroutine (the coroutine becomes dead).
2015    pub(crate) fn resume_coro(
2016        &mut self,
2017        co: Gc<Coro>,
2018        args: Vec<Value>,
2019    ) -> Result<Vec<Value>, LuaError> {
2020        match co.status {
2021            CoroStatus::Suspended => {}
2022            CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
2023            _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
2024        }
2025        if self.c_depth >= MAX_C_DEPTH {
2026            return Err(self.rt_err("C stack overflow"));
2027        }
2028        self.c_depth += 1;
2029        let resumer = self.current;
2030        // save the resumer's live context away
2031        let rctx = self.take_ctx();
2032        match resumer {
2033            Some(r) => {
2034                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035                let m = unsafe { r.as_mut() };
2036                m.stack = rctx.stack;
2037                m.frames = rctx.frames;
2038                m.open_upvals = rctx.open_upvals;
2039                m.tbc = rctx.tbc;
2040                m.top = rctx.top;
2041                m.pcall_depth = rctx.pcall_depth;
2042                m.globals = rctx.globals;
2043                m.status = CoroStatus::Normal;
2044                // bulk overwrite of every traced field on r — mirror
2045                // store_coro_ctx's barrier_back so propagate re-traces r.
2046                self.heap
2047                    .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048            }
2049            None => self.main_ctx = Some(rctx),
2050        }
2051        // swap the coroutine in
2052        self.load_coro_ctx(co);
2053        {
2054            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055            let m = unsafe { co.as_mut() };
2056            m.status = CoroStatus::Running;
2057            m.resumer = resumer;
2058        }
2059        // co.resumer is a traced Gc field; barrier_back covers the new
2060        // resumer reference and any future field writes during this call.
2061        self.heap
2062            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063        self.current = Some(co);
2064
2065        // drive it
2066        let drive = if co.started {
2067            self.coro_continue(&args)
2068        } else {
2069            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070            unsafe { co.as_mut() }.started = true;
2071            self.coro_first(co.body, &args)
2072        };
2073
2074        // classify: a self-close termination or a pending yield each win over
2075        // the (sentinel) error they raised to unwind the Rust stack.
2076        let (outcome, status) = if let Some(death) = self.terminating.take() {
2077            // the coroutine closed itself: it dies now, cleanly or with the
2078            // error a `__close` handler raised.
2079            match death {
2080                Some(e) => {
2081                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082                    unsafe { co.as_mut() }.error_value = Some(e);
2083                    self.heap
2084                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085                    (Err(LuaError(e)), CoroStatus::Dead)
2086                }
2087                None => (Ok(Vec::new()), CoroStatus::Dead),
2088            }
2089        } else {
2090            match self.yielding.take() {
2091                Some((vals, fslot, nres)) => {
2092                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093                    unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094                    (Ok(vals), CoroStatus::Suspended)
2095                }
2096                None => {
2097                    // died: a return is clean, an error is remembered so a later
2098                    // `coroutine.close` can report it (PUC lua_closethread).
2099                    // Capture the error-point traceback (set by `unwind` before
2100                    // popping the failing frames) and prepend a synthetic
2101                    // top entry for the C native that initiated the error
2102                    // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103                    // on the dead coroutine still shows the error site
2104                    // (db.lua :848 family).
2105                    if drive.is_err() {
2106                        let mut tb = self.error_traceback.take().unwrap_or_default();
2107                        if let Some(nm) = self.errored_native.take() {
2108                            let mut prefixed: Vec<u8> = Vec::new();
2109                            prefixed.extend_from_slice(
2110                                format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111                            );
2112                            prefixed.extend(tb);
2113                            tb = prefixed;
2114                        }
2115                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116                        unsafe { co.as_mut() }.error_traceback = Some(tb);
2117                    }
2118                    if let Err(e) = drive {
2119                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120                        unsafe { co.as_mut() }.error_value = Some(e.0);
2121                        self.heap
2122                            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123                    }
2124                    (drive, CoroStatus::Dead)
2125                }
2126            }
2127        };
2128
2129        // save the coroutine's context back and restore the resumer
2130        self.store_coro_ctx(co);
2131        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132        unsafe { co.as_mut() }.status = status;
2133        match resumer {
2134            Some(r) => {
2135                self.load_coro_ctx(r);
2136                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137                unsafe { r.as_mut() }.status = CoroStatus::Running;
2138                self.current = Some(r);
2139            }
2140            None => {
2141                let m = self.main_ctx.take().expect("main context saved");
2142                self.put_ctx(m);
2143                self.current = None;
2144            }
2145        }
2146        self.c_depth -= 1;
2147        outcome
2148    }
2149
2150    /// First resume: install the body function at slot 0 and run.
2151    fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152        self.stack.clear();
2153        self.stack.push(body);
2154        self.stack.extend_from_slice(args);
2155        self.top = self.stack.len() as u32;
2156        match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157            Ok(true) => self.exec_with(1),
2158            Ok(false) => Ok(self.take_results(0)),
2159            Err(e) => Err(e),
2160        }
2161    }
2162
2163    /// Resume after a yield: deliver `args` as the results of the call that
2164    /// yielded, then continue the suspended thread.
2165    fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166        let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167        let n = args.len() as u32;
2168        // Restore the full register window of the suspended top frame: a yield
2169        // that unwound through a native (call_value) may have left the stack
2170        // shorter than the frame needs. `base + max_stack` is what push_frame
2171        // allocates; `fslot + n` covers the delivered yield results.
2172        let frame_need = self
2173            .frames
2174            .last()
2175            .and_then(CallFrame::lua)
2176            .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177            .unwrap_or(0);
2178        let need = frame_need.max((fslot + n) as usize);
2179        if self.stack.len() < need {
2180            self.stack.resize(need, Value::Nil);
2181        }
2182        for (i, &v) in args.iter().enumerate() {
2183            self.stack[fslot as usize + i] = v;
2184        }
2185        self.finish_results(fslot, n, nres);
2186        // the suspended `coroutine.yield` (a C call) now returns its resume
2187        // values: fire the matching "return" hook PUC defers until the resume.
2188        self.hook_return(true, 1, n)?;
2189        self.exec_with(1)
2190    }
2191
2192    /// `coroutine.yield`: suspend the running coroutine, recording where to
2193    /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194    /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195    pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196        let nres = self.native_nresults;
2197        self.yielding = Some((vals, func_slot, nres));
2198        // value is irrelevant: resume_coro consults `self.yielding`, not this
2199        LuaError(Value::Nil)
2200    }
2201
2202    /// Install or clear the debug hook on the running thread (`debug.sethook`
2203    /// without a thread argument). Arms the calling frame's `oldpc` to the
2204    /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205    /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206    /// native return: the very next traceexec compares against the sethook
2207    /// CALL's line. When the install statement and the following statement are
2208    /// on different source lines (db.lua :322), `changedline` fires for that
2209    /// first statement; when they share a line (db.lua :25 wrapper), they do
2210    /// not, so the wrapper line is not re-fired.
2211    pub(crate) fn install_hook(&mut self, hook: HookState) {
2212        self.hook = hook;
2213        if self.hook.line
2214            && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215        {
2216            f.hook_oldpc = f.pc.saturating_sub(1);
2217        }
2218    }
2219
2220    /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221    /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222    /// with an optional thread argument.
2223    ///
2224    /// `target == None` means "no explicit thread argument" — PUC binds that
2225    /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226    /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227    /// of whether that's the main thread or a currently-resumed coroutine
2228    /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229    /// `store_coro_ctx`). So a `None` target should always route to
2230    /// `install_hook` on the live fields. The pre-fix predicate gate
2231    /// `is_current_thread(target)` returned `false` when running inside a
2232    /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233    /// and silently dropped the hook on the floor — the install happened on
2234    /// no thread at all.
2235    pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236        if target.is_none() || self.is_current_thread(target) {
2237            self.install_hook(state);
2238        } else if let Some(co) = target {
2239            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240            let m = unsafe { co.as_mut() };
2241            m.hook = state;
2242            if state.line
2243                && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244            {
2245                f.hook_oldpc = u32::MAX;
2246            }
2247            // co.hook.func is a traced Value (Coro::trace covers it); demote
2248            // co back to gray so propagate sees the new hook function.
2249            self.heap
2250                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251        }
2252    }
2253
2254    /// The hook state of `target` (`None`/current → the live VM state).
2255    pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256        match target {
2257            t if self.is_current_thread(t) => self.hook,
2258            Some(co) => co.hook,
2259            None => self.hook,
2260        }
2261    }
2262
2263    /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264    /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265    /// discarded so the interrupted frame's register window is untouched.
2266    /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267    fn run_hook(
2268        &mut self,
2269        event: &[u8],
2270        line: Option<i64>,
2271        from_native: bool,
2272    ) -> Result<(), LuaError> {
2273        // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274        // synchronous fn pointer call). Both Rust and Lua hooks may be
2275        // installed; both observe each event.
2276        if let Some(rh) = self.hook.rust_func {
2277            let evt = match event {
2278                b"call" => Some(RustHookEvent::Call),
2279                b"return" => Some(RustHookEvent::Return),
2280                b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281                b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282                b"count" => Some(RustHookEvent::Count),
2283                _ => None,
2284            };
2285            if let Some(evt) = evt {
2286                let was_in_hook = self.in_hook;
2287                self.in_hook = true;
2288                rh(self, evt);
2289                self.in_hook = was_in_hook;
2290            }
2291        }
2292        let Some(hook) = self.hook.func else {
2293            return Ok(());
2294        };
2295        let saved_top = self.top;
2296        let saved_len = self.stack.len();
2297        let name = Value::Str(self.heap.intern(event));
2298        let lv = line.map_or(Value::Nil, Value::Int);
2299        self.in_hook = true;
2300        // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301        // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302        // ci sat below `hookf` (the function being hooked). When that hooked
2303        // function is native, no Lua frame for it exists in luna's `frames`;
2304        // model it as a synthetic C level by pushing the hook with
2305        // `from_c = true` (then `c_frame_name` reads the caller's call
2306        // instruction → e.g. `name = "sethook"`). When the hooked function is
2307        // Lua (its frame is still on the stack), push with `from_c = false`
2308        // so the level descent lands on it directly. The hook's own frame
2309        // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310        // (PUC `CIST_HOOKED`).
2311        self.pending_is_hook = true;
2312        let r = self.call_value_impl(hook, &[name, lv], from_native);
2313        self.pending_is_hook = false;
2314        self.in_hook = false;
2315        self.stack.truncate(saved_len);
2316        self.top = saved_top;
2317        r.map(|_| ())
2318    }
2319
2320    /// Fire the "call" hook on entry to a function, if armed and not already in
2321    /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322    /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323    /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324    /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325    fn hook_call_with(
2326        &mut self,
2327        from_native: bool,
2328        nargs: u32,
2329        is_tail: bool,
2330    ) -> Result<(), LuaError> {
2331        if self.hook.call
2332            && !self.in_hook
2333            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334        {
2335            self.hook_ftransfer = 1;
2336            self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337            // PUC 5.1 didn't distinguish tail-call events — every call,
2338            // including tail-calls, fired plain `"call"`. 5.2 introduced
2339            // the separate `"tail call"` event (mask `"c"` covers both).
2340            // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341            // "return","tail return","return","tail return"}`.
2342            let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343                b"tail call"
2344            } else {
2345                b"call"
2346            };
2347            self.run_hook(event, None, from_native)?;
2348        }
2349        Ok(())
2350    }
2351
2352    pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353        self.hook_call_with(from_native, nargs, false)
2354    }
2355
2356    /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357    /// the first result slot relative to the activation's func slot, ntransfer
2358    /// the number of results.
2359    pub(crate) fn hook_return(
2360        &mut self,
2361        from_native: bool,
2362        ftransfer: u32,
2363        nresults: u32,
2364    ) -> Result<(), LuaError> {
2365        if self.hook.ret
2366            && !self.in_hook
2367            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368        {
2369            self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370            self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371            self.run_hook(b"return", None, from_native)?;
2372        }
2373        Ok(())
2374    }
2375
2376    /// PUC "tail return" event — fires once per tail call that collapsed
2377    /// into the activation now returning, *after* its own "return" event.
2378    /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379    fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380        if self.hook.ret
2381            && !self.in_hook
2382            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383        {
2384            self.run_hook(b"tail return", None, false)?;
2385        }
2386        Ok(())
2387    }
2388
2389    /// Call a metamethod with a single expected result.
2390    fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391        let mut r = self.call_value(f, args)?;
2392        Ok(if r.is_empty() {
2393            Value::Nil
2394        } else {
2395            r.swap_remove(0)
2396        })
2397    }
2398
2399    /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400    /// driven through the interpreter loop with a `Meta` continuation, so a
2401    /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402    /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403    /// Returns to the caller with the call set up — the opcode arm must do no
2404    /// further work on the running frame and let the loop iterate. `tm` is
2405    /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406    /// born from this call inherits it via `pending_tm`, so
2407    /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408    /// (db.lua :878).
2409    fn begin_meta_call(
2410        &mut self,
2411        func: Value,
2412        args: &[Value],
2413        action: MetaAction,
2414        tm: &'static str,
2415    ) -> Result<(), LuaError> {
2416        let saved_top = self.top;
2417        let cont_slot = self.stack.len() as u32;
2418        self.stack.push(func);
2419        self.stack.extend_from_slice(args);
2420        self.top = self.stack.len() as u32;
2421        frames_push_sync(
2422            &mut self.frames,
2423            &mut self.frames_top,
2424            CallFrame::Cont(NativeCont {
2425                kind: ContKind::Meta(MetaCont { action, saved_top }),
2426                func_slot: cont_slot,
2427                nresults: 1,
2428            }),
2429        );
2430        let saved_tm = self.pending_tm.replace(tm);
2431        // begin_call drives a Lua metamethod through the loop (returns true) or
2432        // runs a native one inline (returns false, leaving results at cont_slot
2433        // for the loop head to pick up); either way the Meta cont resolves there.
2434        let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435        // Native callees never consumed pending_tm (push_frame is only hit on
2436        // a Lua callee); restore so it doesn't leak to a later push_frame.
2437        self.pending_tm = saved_tm;
2438        r?;
2439        Ok(())
2440    }
2441
2442    /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443    fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444        match self.index_step(t, key)? {
2445            MmOut::Done(v) => self.stack[dst as usize] = v,
2446            MmOut::Mm { func, recv } => {
2447                self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2448            }
2449            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2450        }
2451        Ok(())
2452    }
2453
2454    /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2455    fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2456        match self.newindex_step(t, key, v)? {
2457            MmOut::Done(_) => {}
2458            MmOut::Mm { func, recv } => {
2459                self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2460            }
2461            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2462        }
2463        Ok(())
2464    }
2465
2466    /// Apply a comparison opcode's outcome: a known boolean drives the
2467    /// conditional skip directly; a metamethod is called yieldably, its
2468    /// truthiness driving the skip on return.
2469    fn op_compare(
2470        &mut self,
2471        step: MmOut,
2472        l: Value,
2473        r: Value,
2474        k: bool,
2475        tm: &'static str,
2476    ) -> Result<(), LuaError> {
2477        match step {
2478            MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2479            MmOut::Mm { func, .. } => {
2480                self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2481            }
2482            MmOut::CompareSynth { func } => {
2483                // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2484                // negation are driven through `MetaAction::Compare` so the
2485                // metamethod call can yield like any other compare.
2486                self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2487            }
2488        }
2489        Ok(())
2490    }
2491
2492    /// Complete a VM instruction whose metamethod just returned `result` (PUC
2493    /// `luaV_finishOp`). The running frame is already back on top.
2494    fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2495        match action {
2496            MetaAction::Store { dst } => self.stack[dst as usize] = result,
2497            MetaAction::Discard => {}
2498            MetaAction::Compare { k, negate } => {
2499                let t = if negate {
2500                    !result.truthy()
2501                } else {
2502                    result.truthy()
2503                };
2504                self.cond_skip(t, k);
2505            }
2506            MetaAction::Concat { dst, base_a } => {
2507                self.stack[dst as usize] = result;
2508                self.top = dst + 1;
2509                self.concat_run(base_a)?;
2510            }
2511        }
2512        Ok(())
2513    }
2514
2515    // ---- metatables ----
2516
2517    pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2518        match v {
2519            Value::Table(t) => t.metatable(),
2520            Value::Userdata(u) => u.metatable(),
2521            v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2522        }
2523    }
2524
2525    /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2526    /// non-table). No-op for tables (they carry their own).
2527    pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2528        if let Some(i) = type_mt_slot(v) {
2529            self.type_mt[i] = mt;
2530        }
2531    }
2532
2533    /// The metamethod of `v` for `mm`, or nil.
2534    pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2535        match self.metatable_of(v) {
2536            Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2537            None => Value::Nil,
2538        }
2539    }
2540
2541    /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2542    /// only fires when both operands carry a metatable that exposes the same
2543    /// implementation. Returns the metamethod to call, or `Nil` when no
2544    /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2545    /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2546    pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2547        let mt1 = self.metatable_of(l);
2548        let Some(mt1) = mt1 else { return Value::Nil };
2549        let key = Value::Str(self.mm_names[mm as usize]);
2550        let tm1 = mt1.get(key);
2551        if tm1.is_nil() {
2552            return Value::Nil;
2553        }
2554        let mt2 = self.metatable_of(r);
2555        let Some(mt2) = mt2 else { return Value::Nil };
2556        if mt1.as_ptr() == mt2.as_ptr() {
2557            return tm1;
2558        }
2559        let tm2 = mt2.get(key);
2560        if tm2.is_nil() {
2561            return Value::Nil;
2562        }
2563        if tm1.raw_eq(tm2) {
2564            return tm1;
2565        }
2566        Value::Nil
2567    }
2568
2569    /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2570    /// or full userdata whose metatable carries a string `__name` reports that
2571    /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2572    pub(crate) fn obj_typename(&self, v: Value) -> String {
2573        if matches!(v, Value::Table(_) | Value::Userdata(_))
2574            && let Value::Str(s) = self.get_mm(v, Mm::Name)
2575        {
2576            return String::from_utf8_lossy(s.as_bytes()).into_owned();
2577        }
2578        v.type_name().to_string()
2579    }
2580
2581    fn call_at(
2582        &mut self,
2583        func_slot: u32,
2584        nargs: u32,
2585        from_c: bool,
2586    ) -> Result<Vec<Value>, LuaError> {
2587        if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2588            self.exec()
2589        } else {
2590            // native completed inline; results at func_slot..top
2591            Ok(self.take_results(func_slot))
2592        }
2593    }
2594
2595    /// Switch the `collectgarbage` mode, returning the previous mode name.
2596    pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2597        std::mem::replace(&mut self.gc_mode, new)
2598    }
2599
2600    /// Whether the current `collectgarbage` mode is "generational" (where a
2601    /// "step" is a minor collection — a full atomic pass — rather than a paced
2602    /// incremental sweep).
2603    pub(crate) fn gc_mode_is_generational(&self) -> bool {
2604        self.gc_mode == "generational"
2605    }
2606
2607    /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2608    /// completes a whole cycle at once).
2609    pub(crate) fn gc_stepsize(&self) -> i64 {
2610        self.gc_stepsize
2611    }
2612
2613    /// `collectgarbage("param", name [,value])`: read (or set, returning the
2614    /// previous value of) a pacing parameter. Returns `None` for an unknown
2615    /// name so the caller can raise PUC's `invalid parameter` error. The
2616    /// collector is stop-the-world, so these only round-trip for API fidelity.
2617    pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2618        let slot = match name {
2619            b"pause" => &mut self.gc_pause,
2620            b"stepmul" => &mut self.gc_stepmul,
2621            b"stepsize" => &mut self.gc_stepsize,
2622            _ => return None,
2623        };
2624        let prev = *slot;
2625        if let Some(v) = set {
2626            *slot = v;
2627        }
2628        Some(prev)
2629    }
2630
2631    /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2632    /// paced sweep via `Vm::gc_step`.
2633    ///
2634    /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2635    /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2636    /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2637    /// suspected UAF is structurally closed — born objects no longer become
2638    /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2639    ///
2640    /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2641    /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2642    /// O(SWEEP_DIVISOR) safe-points regardless of size.
2643    #[inline(always)]
2644    pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2645        if self.gc_finalizing {
2646            return;
2647        }
2648        if !self.heap.gc_due() {
2649            return;
2650        }
2651        self.gc_top = live_top;
2652        // PUC stepmul: % of allocation rate. Higher = more GC work per
2653        // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2654        // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2655        const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2656        const MIN_BUDGET: usize = 64_000;
2657        let stepmul = self.gc_stepmul.max(1) as usize;
2658        let divisor = (SWEEP_BASE / stepmul).max(1);
2659        let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2660        if self.gc_step(budget) {
2661            self.heap.rearm_gc_pause(self.gc_pause);
2662        }
2663    }
2664
2665    /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2666    /// roots (open upvalues, which are not first-class Values). Shared by the
2667    /// full collector and the incremental-sweep driver so both snapshot the
2668    /// exact same live set.
2669    fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2670        let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2671        roots.push(Value::Table(self.globals));
2672        for mt in self.type_mt.into_iter().flatten() {
2673            roots.push(Value::Table(mt));
2674        }
2675        for &n in &self.mm_names {
2676            roots.push(Value::Str(n));
2677        }
2678        // root only the running thread's live registers (PUC marks [stack, top)):
2679        // freed temporaries above `gc_top` are excluded so weak values stranded
2680        // there are not pinned. Suspended threads (main_ctx, other coroutines)
2681        // stay whole-rooted below — safe over-rooting, and they are not the
2682        // thread whose weak-table loop is under test.
2683        let live = (self.gc_top as usize).min(self.stack.len());
2684        roots.extend_from_slice(&self.stack[..live]);
2685        for cf in &self.frames {
2686            match cf {
2687                CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2688                CallFrame::Cont(NativeCont {
2689                    kind: ContKind::Xpcall { handler },
2690                    ..
2691                }) => roots.push(*handler),
2692                CallFrame::Cont(NativeCont {
2693                    kind: ContKind::Close(cc),
2694                    ..
2695                }) => {
2696                    // Root the error threaded through this close chain so a
2697                    // `collectgarbage()` inside a sibling `__close` handler
2698                    // does not free it before the next handler is invoked
2699                    // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2700                    if let Some(e) = cc.pending {
2701                        roots.push(e);
2702                    }
2703                    if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2704                        roots.push(err);
2705                    }
2706                }
2707                CallFrame::Cont(_) => {}
2708            }
2709        }
2710        if let Some(e) = self.closing_err {
2711            roots.push(e);
2712        }
2713        // B12 host roots — Lua-facade handles keep their referenced
2714        // values alive across calls/yields. Trace the whole vector;
2715        // unused slots (post-`unpin_all`) carry Value::Nil which the
2716        // GC ignores.
2717        for slot in &self.host_roots {
2718            // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2719            roots.push(slot.value);
2720        }
2721        // v2.1 — `table.sort` and similar builtins stash their working
2722        // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2723        // comparator callback doesn't free strings/tables snapshotted
2724        // off the live table (sort.lua's `load(..)(); collectgarbage()`
2725        // compare regression).
2726        for buf in &self.sort_scratch {
2727            roots.extend_from_slice(buf);
2728        }
2729        // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2730        // mid-execution. Without rooting them here, a `collectgarbage()`
2731        // invoked inside the running native (sort.lua AA `load(..)();
2732        // collectgarbage()` compare callback regression) sweeps the
2733        // closure that's actively executing, leaving `nc.upvals`
2734        // dangling and the Rust local `nc` pointing at recycled memory
2735        // — the SIGSEGV pops on the very next field access or pop.
2736        for &nc in &self.running_natives {
2737            roots.push(Value::Native(nc));
2738        }
2739        // the running thread's debug hook (suspended threads root theirs via
2740        // Coro::trace / the main_ctx sweep below)
2741        if let Some(h) = self.hook.func {
2742            roots.push(h);
2743        }
2744        // the running coroutine (its saved-context fields live in the VM, but
2745        // the object itself + its resumer chain must stay reachable)
2746        if let Some(co) = self.current {
2747            roots.push(Value::Coro(co));
2748        }
2749        if let Some(mc) = self.main_coro {
2750            roots.push(Value::Coro(mc));
2751        }
2752        // debug.getregistry() and io library state
2753        if let Some(r) = self.registry {
2754            roots.push(Value::Table(r));
2755        }
2756        if let Some(mt) = self.file_mt {
2757            roots.push(Value::Table(mt));
2758        }
2759        if let Some(f) = self.io_input {
2760            roots.push(Value::Userdata(f));
2761        }
2762        if let Some(f) = self.io_output {
2763            roots.push(Value::Userdata(f));
2764        }
2765        // the main thread's saved context while a coroutine runs
2766        if let Some(m) = &self.main_ctx {
2767            roots.extend_from_slice(&m.stack);
2768            if let Some(h) = m.hook.func {
2769                roots.push(h);
2770            }
2771            for cf in &m.frames {
2772                match cf {
2773                    CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2774                    CallFrame::Cont(NativeCont {
2775                        kind: ContKind::Xpcall { handler },
2776                        ..
2777                    }) => roots.push(*handler),
2778                    CallFrame::Cont(_) => {}
2779                }
2780            }
2781        }
2782        let mut extra: Vec<*mut GcHeader> = self
2783            .open_upvals
2784            .iter()
2785            .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2786            .collect();
2787        if let Some(m) = &self.main_ctx {
2788            extra.extend(
2789                m.open_upvals
2790                    .iter()
2791                    .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2792            );
2793        }
2794        (roots, extra)
2795    }
2796
2797    /// Run a full collection with the VM's roots, then run any `__gc`
2798    /// finalizers the collection scheduled. A no-op (returns 0) when already
2799    /// inside a finalizer — the collector is not reentrant (PUC).
2800    pub fn collect_garbage(&mut self) -> usize {
2801        if self.gc_finalizing {
2802            return 0;
2803        }
2804        let (roots, extra) = self.gc_roots();
2805        let freed = self.heap.collect_ex(&roots, &extra);
2806        self.run_finalizers();
2807        freed
2808    }
2809
2810    /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2811    /// threw; gc.lua's "errors during collection" probe relies on it. This
2812    /// variant runs the same cycle but propagates the captured finalizer
2813    /// error to the explicit caller.
2814    pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2815        if self.gc_finalizing {
2816            return Ok(0);
2817        }
2818        let (roots, extra) = self.gc_roots();
2819        let freed = self.heap.collect_ex(&roots, &extra);
2820        self.run_finalizers_or_err()?;
2821        Ok(freed)
2822    }
2823
2824    /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2825    /// should report fail rather than collect).
2826    pub(crate) fn gc_is_finalizing(&self) -> bool {
2827        self.gc_finalizing
2828    }
2829
2830    /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2831    /// = true indicates more pieces follow (concatenated until the first
2832    /// `to_cont = false` call flushes the whole line). Mirrors
2833    /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2834    ///   * If the buffer is fresh, `to_cont` is false, and the message is
2835    ///     `@<word>`, treat as a control message — only `@on` / `@off` are
2836    ///     recognised; any other `@…` is silently ignored.
2837    ///   * Otherwise, while the state is `Off`, drop the piece; while `On`,
2838    ///     accumulate, and flush to stderr + `warn_log` on the
2839    ///     non-continuation call.
2840    pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2841        if self.warn_buf.is_empty()
2842            && !to_cont
2843            && let Some(b'@') = msg.first().copied()
2844        {
2845            match &msg[1..] {
2846                b"on" => self.warn_state = WarnState::On,
2847                b"off" => self.warn_state = WarnState::Off,
2848                _ => {} // unknown control — silently ignored (PUC checkcontrol)
2849            }
2850            return;
2851        }
2852        if self.warn_state == WarnState::Off {
2853            // drop continuation pieces too — PUC `warnfoff` is the trampoline
2854            return;
2855        }
2856        self.warn_buf.extend_from_slice(msg);
2857        if !to_cont {
2858            let line = std::mem::take(&mut self.warn_buf);
2859            eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2860            self.warn_log.push(line);
2861        }
2862    }
2863
2864    /// Drain the in-process warning log (one entry per emitted message, sans
2865    /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2866    /// assert on warn output without scraping stderr.
2867    pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2868        std::mem::take(&mut self.warn_log)
2869    }
2870
2871    /// Arm the cooperative instruction budget (P09 embedding). The run loop
2872    /// decrements this once per dispatch turn; on zero it raises a catchable
2873    /// `"instruction budget exceeded"` error and disarms itself so the host
2874    /// can resume with a fresh budget on the next call. `None` removes the
2875    /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2876    /// short-script semantics.
2877    pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2878        self.instr_budget = budget;
2879    }
2880
2881    /// Remaining instruction budget (None when unbounded).
2882    pub fn instr_budget_remaining(&self) -> Option<i64> {
2883        self.instr_budget
2884    }
2885
2886    /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2887    /// **must** disable JIT when relying on `instr_budget` — see the
2888    /// `jit_enabled` field doc for the rationale.
2889    pub fn set_jit_enabled(&mut self, enabled: bool) {
2890        self.jit.enabled = enabled;
2891    }
2892
2893    /// Current JIT enable state.
2894    pub fn jit_enabled(&self) -> bool {
2895        self.jit.enabled
2896    }
2897
2898    /// Toggle the trace JIT (P12). Off by default while the sprint
2899    /// develops. When enabled, hot back-edges are counted on
2900    /// `Proto.trace_hot_count`; once the counter passes
2901    /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2902    /// mode at the back-edge target. Stays a no-op until S2's
2903    /// trace lowerer and S3's dispatcher land.
2904    pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2905        self.jit.trace_enabled = enabled;
2906    }
2907
2908    /// P16-A — opt-in flag for the self-link cycle catch. See field
2909    /// docs for the correctness blocker. Default `false`.
2910    pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2911        self.jit.p16_self_link_enabled = enabled;
2912    }
2913
2914    /// Current state of the P16-A self-link cycle catch.
2915    pub fn p16_self_link_enabled(&self) -> bool {
2916        self.jit.p16_self_link_enabled
2917    }
2918
2919    /// Current trace-JIT enable state.
2920    pub fn trace_jit_enabled(&self) -> bool {
2921        self.jit.trace_enabled
2922    }
2923
2924    /// Number of traces that have closed cleanly (looped back to the
2925    /// head PC) since this Vm was constructed. Cumulative; used by
2926    /// tests + tuning. Will become the dominant signal once S2's
2927    /// compile + cache lands.
2928    pub fn trace_closed_count(&self) -> u64 {
2929        self.jit.counters.closed
2930    }
2931
2932    /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2933    /// hit an un-recordable op — the latter lands at S2).
2934    pub fn trace_aborted_count(&self) -> u64 {
2935        self.jit.counters.aborted
2936    }
2937
2938    /// P13-S13-G v2 — number of compiled traces whose close shape
2939    /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2940    /// pin `dispatchable=false` because the dispatcher can't
2941    /// resume at a depth>0 PC without the matching CallFrames.
2942    /// S4-step4b's frame-mat helper could synthesise those, but
2943    /// the InlineAbort emit path isn't wired up yet — fresh
2944    /// pickup work for S13-G v2-full.
2945    pub fn trace_inline_abort_count(&self) -> u64 {
2946        self.jit.counters.inline_abort
2947    }
2948
2949    /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2950    pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2951        &self.jit.counters.dispatch_off_reasons
2952    }
2953
2954    /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2955    pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2956        &self.jit.counters.compile_failed_reasons
2957    }
2958
2959    /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2960    /// `(is_call_triggered, ops_len)` for every trace that closed.
2961    pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2962        &self.jit.counters.closed_lens
2963    }
2964
2965    /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
2966    /// Per-reason close-cause counts (recorder-side abort/discard +
2967    /// lowerer-side dispatch_off labels) keyed by `&'static str`.
2968    pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
2969        &self.jit.counters.close_cause_counts
2970    }
2971
2972    /// v2.0 Track-R R3b — number of compiled traces whose
2973    /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
2974    /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
2975    /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
2976    /// hot loop with p16-on. R3b keeps `dispatchable = false` even
2977    /// when this count bumps; R3d will lift it.
2978    pub fn trace_downrec_link_compiled_count(&self) -> u64 {
2979        self.jit.counters.downrec_link_compiled
2980    }
2981
2982    /// v2.0 Track-R R3c — see
2983    /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
2984    /// of times the dispatcher's `is_downrec_sentinel` arm fired and
2985    /// classified the return as a caller-pc-guard HIT.
2986    pub fn trace_downrec_dispatched_count(&self) -> u64 {
2987        self.jit.counters.downrec_dispatched
2988    }
2989
2990    /// v2.0 Track-R R3c — see
2991    /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
2992    /// times the dispatcher entered a `downrec_link`-bearing trace and
2993    /// the trace returned via the lowerer's deopt block (caller-pc
2994    /// guard MISS), or the dispatcher itself force-deopted via the
2995    /// stitch-cycle checkpoint.
2996    pub fn trace_downrec_deopt_count(&self) -> u64 {
2997        self.jit.counters.downrec_deopt
2998    }
2999
3000    /// v2.0 Track-R R3d — see
3001    /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3002    /// Number of compiled traces whose lowerer emitted a multi-way
3003    /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3004    /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3005    pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3006        self.jit.counters.multi_way_guard_emitted
3007    }
3008
3009    /// P12-S2.C — number of closed traces the lowerer compiled and
3010    /// parked on `Proto.traces`. Re-records of the same head_pc are
3011    /// deduped (the second close finds the head_pc already cached
3012    /// and skips compile), so this never exceeds `trace_closed_count`.
3013    pub fn trace_compiled_count(&self) -> u64 {
3014        self.jit.counters.compiled
3015    }
3016
3017    /// v2.1 Phase 1I.B — number of times the recorder captured a
3018    /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3019    /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3020    /// by the Phase 1I.B opt-in fire test to verify the env gate
3021    /// wiring round-trips end-to-end (env -> recorder -> snapshot
3022    /// -> counter -> getter -> assertion).
3023    pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3024        self.jit.counters.field_ic_snapshot_captured
3025    }
3026
3027    /// P12-S2.C — number of closed traces the lowerer rejected
3028    /// (any of the bail conditions in
3029    /// `crate::jit::trace::try_compile_trace`).
3030    pub fn trace_compile_failed_count(&self) -> u64 {
3031        self.jit.counters.compile_failed
3032    }
3033
3034    /// P12-S3 — number of times the dispatcher jumped into a
3035    /// compiled trace. Bumps on every entry; `trace_deopt_count`
3036    /// counts the subset where the trace returned with a parked
3037    /// `jit_pending_err`.
3038    pub fn trace_dispatched_count(&self) -> u64 {
3039        self.jit.counters.dispatched
3040    }
3041
3042    /// P12-S3 — number of trace entries that came back with
3043    /// `jit_pending_err` set (typically a metatable shadowed an
3044    /// index inside a helper, forcing the dispatcher to fall back
3045    /// to the interpreter without committing the trace's result).
3046    pub fn trace_deopt_count(&self) -> u64 {
3047        self.jit.counters.deopt
3048    }
3049
3050    /// P15-A v1 — number of times the dispatcher started a side
3051    /// trace recording (an `exit_hit_counts` slot crossed
3052    /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3053    /// was None and trace JIT was enabled). Each unit is exactly one
3054    /// `start_side_trace` call; the actual compile success counts
3055    /// under [`Self::trace_compiled_count`] like any other trace.
3056    /// Probe use: distinguishes the "side-trace pipeline fired"
3057    /// signal from the "primary back-edge / call-trigger fired"
3058    /// signal so v0-v3 architectural progress is visible without
3059    /// reading per-counter histograms.
3060    pub fn trace_side_trace_started_count(&self) -> u64 {
3061        self.jit.counters.side_trace_started
3062    }
3063
3064    /// P15-A v2-A — number of side-trace recordings that closed,
3065    /// compiled successfully, AND patched their parent's
3066    /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3067    /// dispatch through these ptrs yet (v2-B/C job), but the
3068    /// counter + ptr write proves the compile + link pipeline is
3069    /// complete end-to-end.
3070    pub fn trace_side_trace_compiled_count(&self) -> u64 {
3071        self.jit.counters.side_trace_compiled
3072    }
3073
3074    /// P15-A v2-C-A5-C — number of side traces that compiled
3075    /// successfully but were SHEDDED by the close-handler shape-
3076    /// match gate (`exit_tags_match_entry_tags`). High ratios
3077    /// vs. `trace_side_trace_compiled_count` indicate the
3078    /// architecture is shedding lots of would-be side traces;
3079    /// useful as a tuning probe for future relaxation of the
3080    /// gate or for child-IR re-specialisation against parent's
3081    /// exit shape.
3082    pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3083        self.jit.counters.side_trace_shape_mismatch
3084    }
3085
3086    /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3087    /// classified as `crate::jit::trace::EscapeState::Sinkable`
3088    /// across every successfully compiled trace on this Vm. The
3089    /// count is post-demotion: sites pre-emit drops back to Escaped
3090    /// for not meeting v1 sunk-emit criteria are NOT counted.
3091    /// `trace_sunk_alloc_count` matches one-for-one today (every
3092    /// surviving Sinkable site goes through sunk emit).
3093    pub fn trace_sinkable_seen_count(&self) -> u64 {
3094        self.jit.counters.sinkable_seen
3095    }
3096
3097    /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3098    pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3099        self.jit.counters.accum_bufferable_seen
3100    }
3101
3102    /// P15-prep — total dispatch hits across all known traces,
3103    /// broken into hot-exit telemetry (max single-exit count,
3104    /// total dispatches, exit count). Used by probes to identify
3105    /// hot side-exits as side-trace candidates.
3106    ///
3107    /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3108    /// recursively, so inner functions' traces are reported.
3109    pub fn trace_exit_hit_summary(
3110        &self,
3111        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3112    ) -> Vec<(u32, Vec<u32>)> {
3113        fn walk(
3114            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3115            out: &mut Vec<(u32, Vec<u32>)>,
3116        ) {
3117            for ct in proto.traces.borrow().iter() {
3118                let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3119                out.push((ct.head_pc, counts));
3120            }
3121            for inner in proto.protos.iter() {
3122                walk(*inner, out);
3123            }
3124        }
3125        let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3126        walk(cl.proto, &mut out);
3127        out
3128    }
3129
3130    /// P15-A v0 — surface every side-exit slot whose hit count is
3131    /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3132    /// `cl.proto` (recursively walking `proto.protos`). Returned
3133    /// entries are side-trace candidates: each carries the parent
3134    /// trace's `(head_proto, head_pc)`, the exit's index in the
3135    /// parent's `exit_hit_counts`, and the side trace's natural
3136    /// entry shape (`cont_pc` + `exit_tags`).
3137    ///
3138    /// Layout of `exit_hit_counts` (mirrored by the iter):
3139    /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3140    ///   window-sized exit_tags).
3141    /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3142    ///   → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3143    /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3144    ///   exit_tags = `ct.exit_tags`).
3145    pub fn hot_exit_iter(
3146        &self,
3147        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3148    ) -> Vec<crate::jit::trace::HotExitInfo> {
3149        use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3150        fn walk(
3151            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3152            out: &mut Vec<HotExitInfo>,
3153        ) {
3154            for ct in proto.traces.borrow().iter() {
3155                let inline_n = ct.per_exit_inline.len();
3156                let tags_n = ct.per_exit_tags.len();
3157                debug_assert_eq!(
3158                    ct.exit_hit_counts.len(),
3159                    inline_n + tags_n + 1,
3160                    "exit_hit_counts layout invariant violated"
3161                );
3162                for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3163                    let hits = cell.get();
3164                    if hits < HOTEXIT_THRESHOLD {
3165                        continue;
3166                    }
3167                    let (cont_pc, exit_tags) = if idx < inline_n {
3168                        let ent = &ct.per_exit_inline[idx];
3169                        (ent.cont_pc, ent.exit_tags.clone())
3170                    } else if idx < inline_n + tags_n {
3171                        let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3172                        (*pc, tags.clone())
3173                    } else {
3174                        (ct.head_pc, ct.exit_tags.clone())
3175                    };
3176                    out.push(HotExitInfo {
3177                        head_proto: proto,
3178                        head_pc: ct.head_pc,
3179                        exit_idx: idx,
3180                        hits,
3181                        cont_pc,
3182                        exit_tags,
3183                    });
3184                }
3185            }
3186            for inner in proto.protos.iter() {
3187                walk(*inner, out);
3188            }
3189        }
3190        let mut out: Vec<HotExitInfo> = Vec::new();
3191        walk(cl.proto, &mut out);
3192        out
3193    }
3194
3195    /// P12-S5-B — sum of NewTable sites that actually took the
3196    /// sunk-emit path across every successfully compiled trace on
3197    /// this Vm. Each counted site skips its heap `Gc<Table>`
3198    /// allocation per dispatch; the array part lives as Cranelift
3199    /// `Variable`s for the duration of the trace.
3200    pub fn trace_sunk_alloc_count(&self) -> u64 {
3201        self.jit.counters.sunk_alloc
3202    }
3203
3204    /// P12-S5-C — sum of materialise-helper emit sites across every
3205    /// successfully compiled trace on this Vm. Each unit is a
3206    /// (site × cmp side-exit) pair whose IR reconstructs a heap
3207    /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3208    /// emit is wiring materialise into the right side-exits.
3209    pub fn trace_materialize_emit_count(&self) -> u64 {
3210        self.jit.counters.materialize_emit
3211    }
3212
3213    /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3214    /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3215    /// replaces a `Heap::new_closure_inline` call on the dispatch
3216    /// path; the count is static (one per matching op per compiled
3217    /// trace), summed at compile success.
3218    pub fn trace_closure_emit_count(&self) -> u64 {
3219        self.jit.counters.closure_emit
3220    }
3221
3222    /// v2.0 Stage 7 polish 6 fire experiment — see
3223    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3224    /// Number of compiled traces whose `per_exit_inline.len() > 0`
3225    /// (depth>0 inlined cmp side-exits emitted).
3226    pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3227        self.jit.counters.per_exit_inline_compiled
3228    }
3229
3230    /// v2.0 Stage 7 polish 6 fire experiment — see
3231    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3232    /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3233    /// `dispatchable == true` — i.e. the count of compiled traces
3234    /// that would actually exercise the AOT polish 6 chain-reloc +
3235    /// deploy-resolver path.
3236    pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3237        self.jit.counters.per_exit_inline_dispatchable
3238    }
3239
3240    /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3241    /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3242    /// whether a self-recursive function actually walked the depth
3243    /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3244    /// across traces and Vm activations; reset only on `Vm::new`.
3245    pub fn trace_max_depth_seen(&self) -> u8 {
3246        self.jit.max_depth_seen
3247    }
3248
3249    /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3250    /// dispatch time). The frame-materialization helper reads `.base`
3251    /// to compute offsets for each inlined frame's window.
3252    #[doc(hidden)]
3253    pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3254        match self.frames.last() {
3255            Some(CallFrame::Lua(f)) => Some(*f),
3256            _ => None,
3257        }
3258    }
3259
3260    /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3261    /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3262    /// by `luna-tools` (`luna-profile`'s sampler walks this from
3263    /// inside a `Count` hook). Sibling-module scope: not part of the
3264    /// public embedder surface, but `inspect::frames_for_profile` is.
3265    #[doc(hidden)]
3266    pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3267        &self.frames
3268    }
3269
3270    /// P12-S4-step4b — ensure the value stack covers indices
3271    /// `[0..need)`. Extends with Nil if shorter. Called by the
3272    /// frame-materialization helper before pushing an inlined frame
3273    /// whose register window may exceed the current stack length.
3274    #[doc(hidden)]
3275    pub fn jit_ensure_stack(&mut self, need: usize) {
3276        if self.stack.len() < need {
3277            self.stack.resize(need, Value::Nil);
3278        }
3279    }
3280
3281    /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3282    /// `__close` handlers would run (any active tbc slot ≥ from
3283    /// holding a non-nil/false Value); if so, parks a deopt sentinel
3284    /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3285    /// IR branches to the deopt block. Otherwise performs the safe
3286    /// part of close — `close_from(from)` to close open upvals +
3287    /// drop any drained tbc entries ≥ from — and returns 0.
3288    ///
3289    /// Returns are i64-shaped so the cranelift import sig stays
3290    /// trivial (i64 → i64 mapping).
3291    #[doc(hidden)]
3292    pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3293        if self.jit.pending_err.is_some() {
3294            return 1;
3295        }
3296        let Some(f) = self.jit_last_lua_frame() else {
3297            self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3298            return 1;
3299        };
3300        let from = f.base + start_offset;
3301        let has_handler = self.tbc.iter().any(|&s| {
3302            s >= from && {
3303                let v = self.stack[s as usize];
3304                !matches!(v, Value::Nil | Value::Bool(false))
3305            }
3306        });
3307        if has_handler {
3308            self.jit.pending_err =
3309                Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3310            return 1;
3311        }
3312        self.close_from(from);
3313        // Drain any tbc entries ≥ from (they're nil/false stubs the
3314        // interpreter's drive_close would have skipped silently).
3315        while let Some(&s) = self.tbc.last() {
3316            if s < from {
3317                break;
3318            }
3319            self.tbc.pop();
3320        }
3321        0
3322    }
3323
3324    /// P12-S7-B — spill the trace's current value for a register to
3325    /// the underlying `vm.stack[base + slot_offset]`. Required before
3326    /// an `Op::Closure` whose inner proto has an `in_stack: true`
3327    /// upval at `slot_offset` — the helper's `find_or_create_upval`
3328    /// captures a live pointer to `vm.stack[base + slot_offset]`,
3329    /// which must hold the right value at call time (trace IR's
3330    /// Variable hasn't yet been written back).
3331    ///
3332    /// Parameters arrive as i64 from the IR: `slot_offset` is the
3333    /// caller-frame register index (`u32` in practice, depth=0
3334    /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3335    /// `crate::runtime::value::raw` byte for the slot's RegKind;
3336    /// `raw_bits` is the trace Variable's `use_var` payload
3337    /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3338    /// raw `Gc::as_ptr` cast).
3339    #[doc(hidden)]
3340    pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3341        let Some(f) = self.jit_last_lua_frame() else {
3342            self.jit.pending_err =
3343                Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3344            return;
3345        };
3346        let idx = (f.base as usize) + (slot_offset as usize);
3347        if self.stack.len() <= idx {
3348            self.stack.resize(idx + 1, Value::Nil);
3349        }
3350        // SAFETY: caller (trace JIT IR emit) provides matching
3351        // `(tag, raw_bits)` — same shape produced by Value::unpack.
3352        let v = unsafe {
3353            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3354        };
3355        self.stack[idx] = v;
3356    }
3357
3358    /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3359    /// Mirrors the interp arm (this file ~L5316): copies the
3360    /// generator/state/control triple from `R[A..=A+2]` to
3361    /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3362    /// the iterator function via `begin_call`. v2 only handles
3363    /// `Value::Native` iterators (the canonical `ipairs_iter` /
3364    /// `next` builtins) — a Lua-closure iterator would push a Lua
3365    /// frame mid-trace, breaking `recording_frame_base`, so we
3366    /// deopt by parking a `pending_err` and returning `-1`.
3367    ///
3368    /// `slot_offset` is the caller-frame register index (=
3369    /// `inst.a()` decoded from a u32-wide field). `nvars` is
3370    /// `inst.c() as i32` — the caller's expected return count.
3371    /// P12-S12-C v1 — refresh only the raw payload of
3372    /// `vm.stack[base + slot_offset]`, preserving its existing
3373    /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3374    /// uses this when the slot's `RegKind` is `Unset` (no compile-
3375    /// time tag info; commonly `Str` slots which the trace doesn't
3376    /// model). The interp's previous execution of the same op
3377    /// already populated the slot with the right tag — the trace
3378    /// only needs to swap in its current raw value.
3379    #[doc(hidden)]
3380    pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3381        let Some(f) = self.jit_last_lua_frame() else {
3382            return;
3383        };
3384        let idx = (f.base as usize) + (slot_offset as usize);
3385        if idx >= self.stack.len() {
3386            return;
3387        }
3388        let (tag, _) = self.stack[idx].unpack();
3389        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3390        self.stack[idx] = unsafe {
3391            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3392        };
3393    }
3394
3395    /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3396    ///
3397    /// Mirrors the interp arm (this file ~L5112): `self.top =
3398    /// base + a + n; concat_run(base + a)`. Result lands at
3399    /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3400    /// deopt (any error from `concat_run` OR detection that the
3401    /// metamethod path was taken — `concat_run` returns `Ok(())`
3402    /// after `begin_meta_call` which has pushed a Lua frame the
3403    /// trace can't safely continue past).
3404    ///
3405    /// The frame-push detection uses `pre/post frames.len()` and
3406    /// unwinds any pushed frames before deopting, so the
3407    /// dispatcher's existing deopt path sees a clean stack.
3408    #[doc(hidden)]
3409    pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3410        if self.jit.pending_err.is_some() {
3411            return -1;
3412        }
3413        let Some(f) = self.jit_last_lua_frame() else {
3414            self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3415            return -1;
3416        };
3417        let abs_a = f.base + slot_offset;
3418        self.top = abs_a + n as u32;
3419        let pre_frames = self.frames.len();
3420        let result = self.concat_run(abs_a);
3421        let post_frames = self.frames.len();
3422        // Frame-push = metamethod path taken (begin_meta_call pushed
3423        // a Lua frame). The trace can't continue past it; unwind +
3424        // deopt so interp redoes Op::Concat in the slow path.
3425        while self.frames.len() > pre_frames {
3426            frames_pop_sync(&mut self.frames, &mut self.frames_top);
3427        }
3428        if let Err(e) = result {
3429            self.jit.pending_err = Some(e);
3430            return -1;
3431        }
3432        if post_frames > pre_frames {
3433            self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3434            return -1;
3435        }
3436        0
3437    }
3438
3439    /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3440    /// accumulator buffer pool, returning a raw pointer. The trace
3441    /// fn's IR holds this pointer in a stack slot through the loop
3442    /// and calls `jit_str_buf_extend` per iter. If the pool is
3443    /// empty, allocate fresh.
3444    ///
3445    /// Safety: the returned pointer is valid until
3446    /// `jit_str_buf_release` is called or the Vm is dropped. The
3447    /// caller MUST not retain it across `enter_jit` boundaries.
3448    #[doc(hidden)]
3449    pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3450        let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3451        // Move into a Box so the pointer is stable until release.
3452        Box::into_raw(Box::new(buf))
3453    }
3454
3455    /// P14-S14-B v2 — return a previously-acquired buffer to the
3456    /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3457    /// buffer is `clear`ed (capacity retained) so the next acquire
3458    /// gets a ready-to-extend Vec.
3459    ///
3460    /// Safety: `buf` must have been returned by a prior
3461    /// `jit_str_buf_acquire` on the same Vm.
3462    #[doc(hidden)]
3463    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3464    pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3465        if buf.is_null() {
3466            return;
3467        }
3468        // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3469        let mut owned = unsafe { Box::from_raw(buf) };
3470        owned.clear();
3471        if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3472            self.jit.str_buf_pool.push(*owned);
3473        }
3474        // Else: drop the buffer.
3475    }
3476
3477    /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3478    /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3479    /// the piece slot) and passes it through; we treat it as a
3480    /// `*mut LuaStr` and append its bytes.
3481    ///
3482    /// Returns 0 on success, -1 if the piece isn't a Str (would
3483    /// trip __concat metamethod path → deopt to interp).
3484    ///
3485    /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3486    /// trace's piece slot raw bits.
3487    #[doc(hidden)]
3488    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3489    pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3490        if buf.is_null() || str_ptr == 0 {
3491            return -1;
3492        }
3493        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3494        let buf = unsafe { &mut *buf };
3495        let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3496        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3497        let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3498        buf.extend_from_slice(bytes);
3499        0
3500    }
3501
3502    /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3503    /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3504    /// the trace to write into the accumulator slot.
3505    ///
3506    /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3507    /// (the v2 hard cap; the trace deopts).
3508    ///
3509    /// Safety: `buf` from prior `acquire`. The buffer is left
3510    /// CLEAR (drained) ready for `release`.
3511    #[doc(hidden)]
3512    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3513    pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3514        if buf.is_null() {
3515            return 0;
3516        }
3517        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3518        let buf = unsafe { &mut *buf };
3519        let bytes = std::mem::take(buf);
3520        // v2 hard cap at 256KB per RFC Q3.
3521        if bytes.len() > 256 * 1024 {
3522            return 0;
3523        }
3524        let gc = self.heap.intern(&bytes);
3525        gc.as_ptr() as i64
3526    }
3527
3528    /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3529    ///
3530    /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3531    /// v3: ipairs `inext` fast path at the top — skip begin_call
3532    ///     when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3533    ///     R[A+2]=Int.
3534    /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3535    ///     caller-provided buffers + return R[A+4]'s tag byte. Lets
3536    ///     emit skip 3 separate `luna_jit_stack_load` calls and 1
3537    ///     `luna_jit_stack_tag` call by reading the buffer via
3538    ///     cranelift `stack_load` IR instead. Returns -1 on deopt.
3539    #[doc(hidden)]
3540    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3541    pub fn jit_op_tforcall(
3542        &mut self,
3543        slot_offset: u32,
3544        nvars: i32,
3545        ctrl_out: *mut i64,
3546        key_out: *mut i64,
3547        val_out: *mut i64,
3548    ) -> i64 {
3549        if self.jit.pending_err.is_some() {
3550            return -1;
3551        }
3552        let Some(f) = self.jit_last_lua_frame() else {
3553            self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3554            return -1;
3555        };
3556        let abs = f.base + slot_offset;
3557        let need = (abs + 7) as usize;
3558        if self.stack.len() < need {
3559            self.stack.resize(need, Value::Nil);
3560        }
3561        // v3 fast path.
3562        let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3563            && std::ptr::fn_addr_eq(
3564                n.f,
3565                crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3566            )
3567            && let Value::Table(t) = self.stack[(abs + 1) as usize]
3568            && t.metatable().is_none()
3569            && let Value::Int(i) = self.stack[(abs + 2) as usize]
3570        {
3571            let next_i = i.wrapping_add(1);
3572            let v = t.get_int(next_i);
3573            if v.is_nil() {
3574                self.stack[(abs + 4) as usize] = Value::Nil;
3575            } else {
3576                self.stack[(abs + 4) as usize] = Value::Int(next_i);
3577                if (nvars as usize) >= 2 {
3578                    self.stack[(abs + 5) as usize] = v;
3579                }
3580                for j in 2..nvars as usize {
3581                    let slot = abs + 4 + j as u32;
3582                    if (slot as usize) < self.stack.len() {
3583                        self.stack[slot as usize] = Value::Nil;
3584                    }
3585                }
3586            }
3587            true
3588        } else {
3589            false
3590        };
3591        if !took_fast_path {
3592            // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3593            // route through begin_call. Lua-closure iters would push
3594            // a Lua frame mid-trace → deopt.
3595            self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3596            self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3597            self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3598            if !matches!(self.stack[abs as usize], Value::Native(_)) {
3599                self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3600                return -1;
3601            }
3602            if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3603                self.jit.pending_err = Some(e);
3604                return -1;
3605            }
3606        }
3607        // v4 batched writeback — fill the caller's buffers with the
3608        // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3609        // reload via cranelift `stack_load` instead of separate
3610        // `luna_jit_stack_load` helper calls.
3611        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3612        let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3613        let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3614        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3615        let key_raw = unsafe { key_rv.zero };
3616        let val_raw = if (nvars as usize) >= 2 {
3617            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3618            unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3619        } else {
3620            0u64
3621        };
3622        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3623        unsafe {
3624            ctrl_out.write(ctrl_raw as i64);
3625            key_out.write(key_raw as i64);
3626            val_out.write(val_raw as i64);
3627        }
3628        key_tag as i64
3629    }
3630
3631    /// P12-S12-B-v2 — load the raw `i64` payload of
3632    /// `vm.stack[base + slot_offset]` for the active trace's head
3633    /// Lua frame. Used to reload trace IR `Variable`s after a
3634    /// helper has written to `vm.stack` directly (e.g. TForCall's
3635    /// iter results land at `R[A+4..A+4+nvars]`).
3636    #[doc(hidden)]
3637    pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3638        let Some(f) = self.jit_last_lua_frame() else {
3639            return 0;
3640        };
3641        let idx = (f.base as usize) + (slot_offset as usize);
3642        if idx >= self.stack.len() {
3643            return 0;
3644        }
3645        let v = self.stack[idx];
3646        let (_, raw) = v.unpack();
3647        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3648        unsafe { raw.zero as i64 }
3649    }
3650
3651    /// P12-S12-B-v2 — read the tag byte of
3652    /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3653    /// to dispatch on the iterator's return-key tag at runtime
3654    /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3655    /// deopt for v2).
3656    #[doc(hidden)]
3657    pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3658        let Some(f) = self.jit_last_lua_frame() else {
3659            return crate::runtime::value::raw::NIL;
3660        };
3661        let idx = (f.base as usize) + (slot_offset as usize);
3662        if idx >= self.stack.len() {
3663            return crate::runtime::value::raw::NIL;
3664        }
3665        self.stack[idx].unpack().0
3666    }
3667
3668    /// P12-S4-step4b — push a Lua frame onto the call stack with
3669    /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3670    /// at trace side-exits to recreate the inlined call activations
3671    /// the lowerer compiled past. The contract (enforced by the
3672    /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3673    /// `nresults` is the caller's expected count (today always 1
3674    /// because the lowerer bails Op::Call C != 2), and the caller
3675    /// has already called `jit_ensure_stack` to cover
3676    /// `[0..base + cl.proto.max_stack)`.
3677    #[doc(hidden)]
3678    pub fn jit_push_inlined_frame(
3679        &mut self,
3680        cl: Gc<LuaClosure>,
3681        base: u32,
3682        pc: u32,
3683        nresults: i32,
3684    ) {
3685        frames_push_sync(
3686            &mut self.frames,
3687            &mut self.frames_top,
3688            CallFrame::Lua(Frame {
3689                closure: cl,
3690                base,
3691                pc,
3692                // Lua call ABI: callee R[0] sits at caller R[A+1], so
3693                // callee.base = caller.base + A + 1; func_slot is
3694                // caller.base + A = callee.base - 1.
3695                func_slot: base - 1,
3696                n_varargs: 0,
3697                nresults,
3698                hook_oldpc: u32::MAX,
3699                from_c: false,
3700                tm: None,
3701                is_hook: false,
3702                tailcalls: 0,
3703            }),
3704        );
3705    }
3706
3707    /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3708    /// should set to `false` so `load`/`loadstring` reject bytecode input
3709    /// (which bypasses parser limits and could exploit verifier gaps).
3710    pub fn set_bytecode_loading(&mut self, enabled: bool) {
3711        self.bytecode_loading = enabled;
3712    }
3713
3714    /// Current bytecode-loading gate state.
3715    pub fn bytecode_loading(&self) -> bool {
3716        self.bytecode_loading
3717    }
3718
3719    /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3720    /// bytecode is a strictly larger trust surface than luna's own dump
3721    /// format (third-party toolchain bugs, malformed chunks, unknown
3722    /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3723    /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3724    pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3725        self.puc_bytecode_loading = enabled;
3726    }
3727
3728    /// Current PUC bytecode-loading gate state.
3729    pub fn puc_bytecode_loading(&self) -> bool {
3730        self.puc_bytecode_loading
3731    }
3732
3733    /// Default loader input budget — 256 MiB.
3734    ///
3735    /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3736    /// sources whose byte length crosses this cap, returning the
3737    /// PUC-shaped `not enough memory` error rather than letting the
3738    /// host allocator try (and crash) to hold the next chunk.
3739    pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3740
3741    /// Set the loader input byte budget (see
3742    /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3743    /// effectively disable. Smaller caps are honored verbatim — a 0
3744    /// cap rejects every non-empty source.
3745    pub fn set_loader_input_budget(&mut self, bytes: usize) {
3746        self.loader_input_budget = bytes;
3747    }
3748
3749    /// Current loader input byte budget.
3750    pub fn loader_input_budget(&self) -> usize {
3751        self.loader_input_budget
3752    }
3753
3754    /// Take the error traceback captured at the latest error point and
3755    /// reset it. Embedders should call this immediately after a failed
3756    /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3757    /// entry clears it. Returns `None` if no error was in flight.
3758    pub fn take_error_traceback(&mut self) -> Option<String> {
3759        self.error_traceback
3760            .take()
3761            .map(|b| String::from_utf8_lossy(&b).into_owned())
3762    }
3763
3764    /// Arm the soft memory cap (P09 embedding). The run loop checks the
3765    /// heap's tracked byte usage between dispatch turns; on overshoot it
3766    /// first runs a full collect, and if `bytes` still exceeds the cap it
3767    /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3768    /// itself (fire-once: re-arm before the next `call_value` if reusing
3769    /// the Vm across requests). `None` removes the cap. The accounting is
3770    /// approximate — internal Vec/Box capacity overhead is not tracked,
3771    /// so embedders should size the cap with ~2× margin over the desired
3772    /// hard limit and additionally bound the Vm's lifetime (drop after
3773    /// each request).
3774    pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3775        self.heap.mem_cap = cap;
3776    }
3777
3778    /// Approximate bytes the heap is currently holding. Object shells plus
3779    /// every table's internal array/hash boxes (tracked via
3780    /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3781    /// bytecode and closure upvalue slices still go uncounted — this is a
3782    /// lower bound, not a precise `malloc_stats`-style total.
3783    pub fn memory_used(&self) -> usize {
3784        self.heap.bytes()
3785    }
3786
3787    /// Read upvalue slot `i` of the native function currently on top of the
3788    /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3789    /// when no native is running. Public so the C ABI trampoline can fetch
3790    /// the host C function pointer it stashed there at registration time.
3791    pub fn running_native_upvalue(&self, i: usize) -> Value {
3792        match self.running_natives.last() {
3793            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3794            Some(nc) => unsafe {
3795                let upvals = &(*nc.as_ptr()).upvals;
3796                upvals.get(i).copied().unwrap_or(Value::Nil)
3797            },
3798            None => Value::Nil,
3799        }
3800    }
3801
3802    /// Register a table for finalization if its (just-set) metatable carries a
3803    /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3804    /// `__gc` to the metatable afterwards does not retroactively register).
3805    pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3806        if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3807            self.heap.register_finalizable(t);
3808        }
3809    }
3810
3811    /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3812    /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3813    /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3814    /// metatable that `newproxy` returned, which then needs to flow through.
3815    /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3816    /// would re-check at metatable-set time); luna's only userdata
3817    /// finalizables today come via `newproxy`, which registers itself.
3818    #[allow(dead_code)]
3819    pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3820        if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3821            self.heap.register_finalizable_userdata(u);
3822        }
3823    }
3824
3825    /// Run pending `__gc` finalizers (objects the collector resurrected for
3826    /// finalization). Finalizer errors are swallowed — PUC turns them into a
3827    /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3828    fn run_finalizers(&mut self) {
3829        let _ = self.run_finalizers_or_err();
3830    }
3831
3832    fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3833        if self.gc_finalizing {
3834            return Ok(());
3835        }
3836        let pending = self.heap.take_tobefnz();
3837        if pending.is_empty() {
3838            return Ok(());
3839        }
3840        self.gc_finalizing = true;
3841        let mut first_err: Option<LuaError> = None;
3842        for obj in pending {
3843            let gc = self.get_mm(obj, Mm::Gc);
3844            // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3845            // schedule the object for finalization (`__gc = true` is the
3846            // canonical placeholder); only call it at finalize time when it
3847            // is actually a function. gc.lua 5.2 :412 wires up exactly this
3848            // sentinel and then expects no call.
3849            let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3850            if callable {
3851                // PUC `GCTM` sets `CIST_FIN` on the new ci so
3852                // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3853                // `name = "__gc"`. luna threads the same outcome through the
3854                // generic `pending_tm` slot: the Lua frame born from this
3855                // call consumes it in `push_frame`. Saved/restored around the
3856                // call in case the handler is a native (which never pops it).
3857                // Bare event name; `frame_name` / `c_frame_name` add the
3858                // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3859                // the convention used by `__close`, `__index`, …
3860                let saved_tm = self.pending_tm.replace("gc");
3861                // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3862                // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3863                // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3864                // luna mirrors by temporarily tagging the current top Lua
3865                // frame's `tm` to "__gc" for the duration of the call.
3866                let caller_tm_idx = self
3867                    .frames
3868                    .iter()
3869                    .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3870                let saved_caller_tm = caller_tm_idx.and_then(|i| {
3871                    if let CallFrame::Lua(fr) = &mut self.frames[i] {
3872                        let prev = fr.tm;
3873                        fr.tm = Some("gc");
3874                        Some(prev)
3875                    } else {
3876                        None
3877                    }
3878                });
3879                if let Err(e) = self.call_value(gc, &[obj]) {
3880                    // PUC 5.1 GCTM raised the finalizer's error to the
3881                    // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3882                    // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3883                    // wrapped it in `error in __gc metamethod (msg)` first
3884                    // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3885                    // introduced the warning system and switched to "warn
3886                    // then continue" — never re-raise, just route the
3887                    // wrapped message through `warn`. gc.lua 5.5 :378 wires
3888                    // up `_WARN` capture under the `if T then …` block to
3889                    // baseline on the same wrapped string.
3890                    if self.version >= LuaVersion::Lua54 {
3891                        let inner = self.error_text(&e);
3892                        let msg = format!("error in __gc metamethod ({inner})");
3893                        self.emit_warn(msg.as_bytes(), false);
3894                    } else if first_err.is_none() {
3895                        let wrapped = if self.version >= LuaVersion::Lua52 {
3896                            let inner = self.error_text(&e);
3897                            let msg = format!("error in __gc metamethod ({inner})");
3898                            let s = Value::Str(self.heap.intern(msg.as_bytes()));
3899                            LuaError(s)
3900                        } else {
3901                            e
3902                        };
3903                        first_err = Some(wrapped);
3904                    }
3905                }
3906                self.pending_tm = saved_tm;
3907                if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3908                    && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3909                {
3910                    fr.tm = prev; // prev is Option<&'static str>; restore exactly
3911                }
3912            }
3913        }
3914        self.gc_finalizing = false;
3915        match first_err {
3916            Some(e) => Err(e),
3917            None => Ok(()),
3918        }
3919    }
3920
3921    /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3922    /// Crosses up to three phases per call:
3923    ///   1. Pause      → seed Propagate (`gc_start_propagate`)
3924    ///   2. Propagate  → drain gray up to `budget`; on exhaustion run atomic
3925    ///                   (`gc_finish_atomic` → tobefnz populated; finalizers
3926    ///                   run via `run_finalizers`) and enter Sweep
3927    ///   3. Sweep      → `gc_sweep_step` up to (residual) `budget`
3928    /// Returns true when this call completed the cycle's sweep (back to
3929    /// Pause). The budget is spent generously across phases — a large `n`
3930    /// can finish a whole cycle in one call (PUC stop-the-world step).
3931    pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3932        // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3933        // that may hit a safe point and try to step again. Re-entry was OK
3934        // under STW (collect_garbage had its own guard) but here the
3935        // intermediate phase state would corrupt.
3936        if self.gc_finalizing {
3937            return false;
3938        }
3939        if self.heap.gc_phase_is_pause() {
3940            let (roots, extra) = self.gc_roots();
3941            self.heap.gc_start_propagate(&roots, &extra);
3942        }
3943        if self.heap.gc_phase_is_propagate() {
3944            if !self.heap.gc_step_propagate(budget) {
3945                return false;
3946            }
3947            self.heap.gc_finish_atomic();
3948            // any __gc scheduled by atomic — run before sweep so a finalizer
3949            // re-registering `self` re-enters the next cycle, not this sweep
3950            self.run_finalizers();
3951        }
3952        // either we just transitioned, or we entered already in Sweep, or
3953        // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3954        self.heap.gc_sweep_step(budget)
3955    }
3956
3957    // ---- frames & calls ----
3958
3959    /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3960    /// Returns true if a Lua frame was pushed (the dispatch loop continues
3961    /// there), false if a native completed inline.
3962    fn begin_call(
3963        &mut self,
3964        func_slot: u32,
3965        nargs: Option<u32>,
3966        nresults: i32,
3967        from_c: bool,
3968    ) -> Result<bool, LuaError> {
3969        let mut nargs = match nargs {
3970            Some(n) => n,
3971            None => self.top - (func_slot + 1),
3972        };
3973        // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3974        // only for the immediately-following Lua activation. Native dispatch
3975        // (or `__call` resolution) below must not let it leak to the next
3976        // begin_call's frame; restore it just before push_frame for the Lua
3977        // arm so its meaning is preserved across __call chaining.
3978        let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3979        // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3980        // is inserted before the value so it becomes the first argument, and a
3981        // chain of `__call` tables resolves down to a real function.
3982        let mut chain = 0u32;
3983        loop {
3984            match self.stack[func_slot as usize] {
3985                Value::Closure(cl) => {
3986                    // P11-S2c.B JIT fast path: if the Proto's body fits
3987                    // the int-arith whitelist, every arg is `Value::Int`,
3988                    // and the cached arity matches, skip frame setup and
3989                    // run the cached native fn in-place.
3990                    if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
3991                        self.pending_tailcalls = tailcalls;
3992                        return Ok(false);
3993                    }
3994                    self.pending_tailcalls = tailcalls;
3995                    self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
3996                    // P12-S4-step0 — trace-on-call trigger. The frame
3997                    // we just pushed is the callee whose body the
3998                    // recorder will trace. Bump the per-Proto call
3999                    // counter; once it crosses `CALL_HOT_THRESHOLD`
4000                    // and no other trace is in flight, snapshot the
4001                    // callee's register window (R[0..max_stack]) and
4002                    // begin recording at `pc=0`. This is what unlocks
4003                    // tracing for functions whose body has no negative
4004                    // `Op::Jmp` back-edge (`fib`, recursive helpers).
4005                    //
4006                    // Gated on `trace_jit_enabled`, so the default
4007                    // dispatch pays a single not-taken branch.
4008                    if self.jit.trace_enabled {
4009                        let proto = cl.proto;
4010                        let c = proto.call_hot_count.get();
4011                        if c < u32::MAX / 2 {
4012                            proto.call_hot_count.set(c + 1);
4013                        }
4014                        // P13-S13-H — relaxed call-trigger:
4015                        // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4016                        // `!already_cached` short-circuit. Lets a
4017                        // discarded short call-trigger close retry
4018                        // on the next call (fib(10/15/20/25)
4019                        // pathology — first capture is base-case
4020                        // [Lt,Jmp,Return1]; coverage-heuristic
4021                        // discards; next call gets to record at a
4022                        // potentially deeper recursion point).
4023                        // Without `already_cached`, the relaxed
4024                        // condition would re-record over a cached
4025                        // trace every call.
4026                        //
4027                        // P13-S13-K — additionally short-circuit on
4028                        // `proto.trace_gave_up`. The S13-I discard
4029                        // cap force-compiles a partial trace and
4030                        // flips this flag; subsequent calls into
4031                        // this Proto skip the RefCell borrow + Vec
4032                        // scan entirely.
4033                        if proto.trace_gave_up.get() {
4034                            return Ok(true);
4035                        }
4036                        let call_already_cached =
4037                            proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4038                        if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4039                            && self.jit.active_trace.is_none()
4040                            && !call_already_cached
4041                        {
4042                            // The new frame is on top: index in
4043                            // `self.frames` is `len() - 1`.
4044                            let frame_idx = self.frames.len() - 1;
4045                            // Snapshot R[0..max_stack] at the callee's
4046                            // base. `push_frame` resized `self.stack`
4047                            // to `base + max_stack`, so this window is
4048                            // guaranteed in-bounds.
4049                            let f = match &self.frames[frame_idx] {
4050                                CallFrame::Lua(f) => f,
4051                                _ => unreachable!("push_frame just pushed a Lua frame"),
4052                            };
4053                            let max_stack = cl.proto.max_stack as usize;
4054                            let base_us = f.base as usize;
4055                            let mut entry_tags = Vec::with_capacity(max_stack);
4056                            for i in 0..max_stack {
4057                                let (tag, _) = self.stack[base_us + i].unpack();
4058                                entry_tags.push(tag);
4059                            }
4060                            self.jit.active_trace =
4061                                Some(Box::new(crate::jit::trace::TraceRecord::start(
4062                                    cl.proto, 0, entry_tags, true,
4063                                )));
4064                            self.jit.recording_frame_base = frame_idx;
4065                        }
4066                    }
4067                    return Ok(true);
4068                }
4069                Value::Native(nc) => {
4070                    // v1.1 B10 Stage 2 — async-marked NativeClosure.
4071                    // Route through the cooperative-yield mechanism
4072                    // when async_mode is on; reject when called from
4073                    // a sync `eval`/`call_value` path (would have no
4074                    // executor to drive the returned future).
4075                    if nc.is_async {
4076                        if !self.async_mode {
4077                            let s = Value::Str(
4078                                self.heap.intern(b"async native called in sync context"),
4079                            );
4080                            self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4081                            return Err(LuaError(s));
4082                        }
4083                        // Same root-up bookkeeping as the sync path:
4084                        // pin args + result-count expectation so a
4085                        // collection across the suspend boundary
4086                        // keeps the arg window live.
4087                        self.native_nresults = nresults;
4088                        self.gc_top = func_slot + nargs + 1;
4089                        // v1.3 Phase AS — fire the "call" hook BEFORE
4090                        // building the future. Mirrors the sync native
4091                        // path's `hook_call(true, nargs)` site
4092                        // (`exec.rs` further down) so embedders with a
4093                        // Rust debug hook installed see a Call event
4094                        // for async natives identical to the sync
4095                        // path. The matching "return" hook fires from
4096                        // `commit_async_native_result` in
4097                        // `async_drive.rs` after the future resolves.
4098                        // Placement follows audit §"Open questions"
4099                        // Q6: after the `native_nresults` / `gc_top`
4100                        // pin, before the future is constructed, so a
4101                        // hook body that triggers GC observes the
4102                        // correct pinned window. On hook error the
4103                        // sentinel never returns and
4104                        // `pending_async_native_*` remain `None` —
4105                        // the executor sees `DispatchOutcome::Error`
4106                        // (audit §A.1 edge cases).
4107                        self.hook_call(true, nargs)?;
4108                        // Transmute the stored NativeFn back to its
4109                        // real AsyncNativeFn shape. Sound because
4110                        // `set_async_native` / `create_async_native`
4111                        // installed an AsyncNativeFn through the
4112                        // identically-sized fn-pointer slot, and the
4113                        // `is_async` marker bit is what records that
4114                        // fact.
4115                        let async_fn: crate::vm::async_drive::AsyncNativeFn =
4116                            // SAFETY: same-size fn pointers; provenance
4117                            // preserved through `mem::transmute`. The
4118                            // `is_async` marker is the only safe-to-call
4119                            // gate, set exclusively by
4120                            // `Vm::create_async_native`.
4121                            unsafe { std::mem::transmute(nc.f) };
4122                        let vm_ptr: *mut Vm = self;
4123                        let fut = async_fn(vm_ptr, func_slot, nargs);
4124                        // Stash the future + post-call context for
4125                        // `drive_one` to surface to `EvalFuture::poll`.
4126                        self.pending_async_native_fut = Some(fut);
4127                        self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4128                            func_slot,
4129                            nargs,
4130                            nresults,
4131                            gc_top: self.gc_top,
4132                        });
4133                        // Sentinel Err walked up to `drive_one` (same
4134                        // shape as `host_yield_pending`'s budget yield).
4135                        // Value::Nil — never seen by user code.
4136                        return Err(LuaError(Value::Nil));
4137                    }
4138                    // pcall/xpcall are yieldable: rather than calling the
4139                    // protected function through the Rust stack (which cannot be
4140                    // suspended), push a continuation frame and drive the call
4141                    // through the interpreter loop (PUC lua_pcallk). A yield
4142                    // inside it is preserved with the thread's saved frames.
4143                    use crate::runtime::value::NativeFn;
4144                    if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4145                        return self.begin_pcall(func_slot, nargs, nresults);
4146                    }
4147                    if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4148                        return self.begin_xpcall(func_slot, nargs, nresults);
4149                    }
4150                    // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4151                    // luaB_pairs); without one, fall through to the plain native.
4152                    if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4153                        let arg = self.stack[(func_slot + 1) as usize];
4154                        if !self.get_mm(arg, Mm::Pairs).is_nil() {
4155                            return self.begin_pairs(func_slot, nresults);
4156                        }
4157                    }
4158                    // a native that collects (e.g. `collectgarbage`) roots up to
4159                    // its own arguments — the caller's live registers all sit
4160                    // below `func_slot` and stay rooted.
4161                    self.native_nresults = nresults;
4162                    self.gc_top = func_slot + nargs + 1;
4163                    // Push the native onto the running-natives chain BEFORE
4164                    // firing the call hook so that `debug.getinfo(level)` and
4165                    // `arg_error` from inside the hook see this native as the
4166                    // currently-running C function (db.lua :344 reads
4167                    // `getinfo(2, "f").func` for the just-entered callee).
4168                    // Popped after the matching return hook fires — even on
4169                    // error, the pop must happen, so the body is bracketed
4170                    // through a scope guard.
4171                    self.running_natives.push(nc);
4172                    self.running_native_slots.push((func_slot, nargs));
4173                    // PUC luaD_precall fires the "call" hook for C functions too.
4174                    // A yield inside the native (coroutine.yield) propagates an
4175                    // Err and the matching "return" hook fires on resume instead.
4176                    if let Err(e) = self.hook_call(true, nargs) {
4177                        self.running_natives.pop();
4178                        self.running_native_slots.pop();
4179                        return Err(e);
4180                    }
4181                    // P09: trap a Rust panic in the native and surface it as
4182                    // a Lua error rather than letting it unwind through the
4183                    // VM into the embedder. The VM's internal state may still
4184                    // be inconsistent after a panic (half-pushed args,
4185                    // dangling GC references), so embedders that catch this
4186                    // class of error should drop and re-create the Vm — but
4187                    // it's still better than tearing the host process down.
4188                    // `AssertUnwindSafe` is sound because the caller is the
4189                    // dispatch loop and any half-done state is fenced behind
4190                    // the immediate Err return below.
4191                    use std::panic::{AssertUnwindSafe, catch_unwind};
4192                    let result =
4193                        match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4194                            Ok(r) => r,
4195                            Err(payload) => {
4196                                let msg = panic_payload_str(&payload);
4197                                let s = Value::Str(
4198                                    self.heap.intern(format!("native panic: {msg}").as_bytes()),
4199                                );
4200                                Err(LuaError(s))
4201                            }
4202                        };
4203                    let nret = match result {
4204                        Ok(n) => n,
4205                        Err(e) => {
4206                            // Stash the offending native's name BEFORE the
4207                            // pop so a dying coroutine's traceback snapshot
4208                            // can prepend `[C]: in function '<name>'`. Use
4209                            // pushglobalfuncname (PUC walks package.loaded
4210                            // to qualify); fall back to "?".
4211                            self.errored_native =
4212                                Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4213                            self.running_natives.pop();
4214                            self.running_native_slots.pop();
4215                            return Err(e);
4216                        }
4217                    };
4218                    // PUC `luaD_poscall` fires the return hook BEFORE moving
4219                    // results into the function's slot — at that point args
4220                    // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4221                    // results above them at `[func_slot + 1 + nargs, …)`.
4222                    // luna's `nat_return` has already written the results
4223                    // into `[func_slot, func_slot + nret)`, so we replay PUC's
4224                    // layout by copying the results up past the preserved
4225                    // args, firing the hook (with ftransfer = nargs + 1, so
4226                    // `getlocal(2, ftransfer..)` reads results), and then
4227                    // copying back for `finish_results`. db.lua :541 reads
4228                    // `getinfo("r").ftransfer` + `getlocal` to inspect a
4229                    // returning native's results this way.
4230                    if self.hook.ret
4231                        && !self.in_hook
4232                        && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4233                    {
4234                        let res_dst = func_slot + nargs + 1;
4235                        let need = (res_dst + nret) as usize;
4236                        if self.stack.len() < need {
4237                            self.stack.resize(need, Value::Nil);
4238                        }
4239                        for i in (0..nret).rev() {
4240                            self.stack[(res_dst + i) as usize] =
4241                                self.stack[(func_slot + i) as usize];
4242                        }
4243                        // widen the C-frame's argument window for getlocal
4244                        if let Some(slot) = self.running_native_slots.last_mut() {
4245                            slot.1 = nargs + nret;
4246                        }
4247                        let hr = self.hook_return(true, nargs + 1, nret);
4248                        if let Some(slot) = self.running_native_slots.last_mut() {
4249                            slot.1 = nargs;
4250                        }
4251                        // restore results into the slot finish_results expects
4252                        for i in 0..nret {
4253                            self.stack[(func_slot + i) as usize] =
4254                                self.stack[(res_dst + i) as usize];
4255                        }
4256                        self.running_natives.pop();
4257                        self.running_native_slots.pop();
4258                        hr?;
4259                    } else {
4260                        self.running_natives.pop();
4261                        self.running_native_slots.pop();
4262                    }
4263                    self.finish_results(func_slot, nret, nresults);
4264                    // the native may have allocated; collect with the results as
4265                    // the live boundary (PUC checks GC after a call returns).
4266                    self.maybe_collect_garbage(self.top);
4267                    return Ok(false);
4268                }
4269                v => {
4270                    let mm = self.get_mm(v, Mm::Call);
4271                    if mm.is_nil() {
4272                        return Err(self.call_err(v));
4273                    }
4274                    chain += 1;
4275                    // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4276                    // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4277                    // and the 5.5 test exercises the new tight bound directly
4278                    // (calls.lua :225 builds a 16-deep chain and expects the
4279                    // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4280                    // deep chain and expects it to succeed.
4281                    let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4282                        15
4283                    } else {
4284                        MAX_CCMT
4285                    };
4286                    if chain > cap {
4287                        return Err(self.rt_err("'__call' chain too long"));
4288                    }
4289                    // slots above shift by one; at a call site those are dead
4290                    // temps of the current frame
4291                    self.stack.insert(func_slot as usize, mm);
4292                    if self.top > func_slot {
4293                        self.top += 1;
4294                    }
4295                    nargs += 1;
4296                }
4297            }
4298        }
4299    }
4300
4301    fn push_frame(
4302        &mut self,
4303        cl: Gc<LuaClosure>,
4304        func_slot: u32,
4305        nargs: u32,
4306        nresults: i32,
4307        from_c: bool,
4308    ) -> Result<(), LuaError> {
4309        if func_slot + 256 > MAX_LUA_STACK {
4310            // PUC `stackerror`: a stack overflow that surfaces while the
4311            // current activation is inside an xpcall message handler is
4312            // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4313            // error handling". errors.lua :606 expects the inner pcall(loop)
4314            // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4315            // message matching "error handling".
4316            let msg = if self.msgh_depth > 0 {
4317                "error in error handling"
4318            } else {
4319                "stack overflow"
4320            };
4321            return Err(self.rt_err(msg));
4322        }
4323        let proto = cl.proto;
4324        let nparams = proto.num_params as u32;
4325        // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4326        // stack just below the new `base`, so a named vararg can be indexed
4327        // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4328        // `[e1..em][p1..pn]` so the fixed params land at the new base.
4329        let n_varargs = if proto.is_vararg {
4330            nargs.saturating_sub(nparams)
4331        } else {
4332            0
4333        };
4334        if n_varargs > 0 {
4335            let s = (func_slot + 1) as usize;
4336            self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4337        }
4338        let base = func_slot + 1 + n_varargs;
4339        let need = (base + proto.max_stack as u32) as usize;
4340        if self.stack.len() < need {
4341            self.stack.resize(need, Value::Nil);
4342        }
4343        // wipe the register window beyond the kept parameters (stale values —
4344        // required for GC-safety and codegen). The varargs below `base` survive.
4345        let kept = nargs.saturating_sub(n_varargs).min(nparams);
4346        // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4347        // need` since `base + nparams <= base + max_stack = need` and `kept <=
4348        // nparams`. `slice::fill` lowers to a single memset on Copy types.
4349        unsafe {
4350            self.stack
4351                .get_unchecked_mut((base + kept) as usize..need)
4352                .fill(Value::Nil);
4353        }
4354        frames_push_sync(
4355            &mut self.frames,
4356            &mut self.frames_top,
4357            CallFrame::Lua(Frame {
4358                closure: cl,
4359                base,
4360                pc: 0,
4361                func_slot,
4362                nresults,
4363                hook_oldpc: u32::MAX,
4364                from_c,
4365                n_varargs,
4366                // single-shot consume: `close_slots` sets pending_tm before each
4367                // handler call; the next Lua frame born is that handler's.
4368                tm: self.pending_tm.take(),
4369                // `run_hook` sets `pending_is_hook` before dispatching the user
4370                // hook so its frame reports `namewhat = "hook"` via getinfo.
4371                is_hook: std::mem::take(&mut self.pending_is_hook),
4372                tailcalls: std::mem::take(&mut self.pending_tailcalls),
4373            }),
4374        );
4375        // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4376        // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4377        // the slot at `base + nparams`; the extras sit just below `base` from
4378        // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4379        // hook; vararg.lua's contradictory expectations were already going to
4380        // fail either way (some asserts want `arg == nil`).
4381        if proto.has_compat_vararg_arg {
4382            let arg_slot = (base + nparams) as usize;
4383            let t = self.heap.new_table();
4384            {
4385                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4386                let tm = unsafe { t.as_mut() };
4387                for i in 0..n_varargs {
4388                    let v = self.stack[(base - n_varargs + i) as usize];
4389                    // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4390                    // below `MAX_ASIZE`
4391                    let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4392                }
4393                let nk = Value::Str(self.heap.intern(b"n"));
4394                tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4395                    .expect("'n' key");
4396            }
4397            // once-per-table barrier mirrors SETLIST: t is born BLACK during
4398            // Propagate and the bulk `set_int`/`set` calls above don't barrier
4399            self.heap
4400                .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4401            self.stack[arg_slot] = Value::Table(t);
4402        }
4403        // PUC luaD_precall fires the "call" hook with the new frame current, so
4404        // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4405        // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4406        // fixed params count — extras already live below `base`).
4407        // A frame born via OP_TailCall fires "tail call" instead (PUC
4408        // luaD_pretailcall) and skips the matching "return" hook on exit.
4409        let is_tail = self
4410            .frames
4411            .last()
4412            .and_then(|f| f.lua())
4413            .is_some_and(|f| f.tailcalls > 0);
4414        self.hook_call_with(false, nparams, is_tail)?;
4415        Ok(())
4416    }
4417
4418    /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4419    /// the protected call `f` through the interpreter loop. The protected
4420    /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4421    /// at `func_slot+1` lets its results land one slot above the continuation —
4422    /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4423    /// Always returns `Ok(true)`: a continuation is now on the stack to be
4424    /// resolved by the loop (even when `f` is a native that already ran inline).
4425    fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4426        if nargs == 0 {
4427            return Err(crate::vm::builtins::raise_str(
4428                self,
4429                "bad argument #1 to 'pcall' (value expected)",
4430            ));
4431        }
4432        if self.pcall_depth >= MAX_C_DEPTH {
4433            return Err(self.rt_err("C stack overflow"));
4434        }
4435        self.pcall_depth += 1;
4436        frames_push_sync(
4437            &mut self.frames,
4438            &mut self.frames_top,
4439            CallFrame::Cont(NativeCont {
4440                kind: ContKind::Pcall,
4441                func_slot,
4442                nresults,
4443            }),
4444        );
4445        // call f (slot func_slot+1) with the remaining args, asking for all
4446        // results; a yield or error inside propagates with the continuation kept
4447        // on the stack (caught by `unwind` / preserved across a yield).
4448        self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4449        Ok(true)
4450    }
4451
4452    /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4453    /// message handler is stashed in the continuation and the arguments are
4454    /// shifted down over the handler's slot so `f`'s args are contiguous.
4455    fn begin_xpcall(
4456        &mut self,
4457        func_slot: u32,
4458        nargs: u32,
4459        nresults: i32,
4460    ) -> Result<bool, LuaError> {
4461        if nargs < 2 {
4462            return Err(crate::vm::builtins::raise_str(
4463                self,
4464                "bad argument #2 to 'xpcall' (value expected)",
4465            ));
4466        }
4467        if self.pcall_depth >= MAX_C_DEPTH {
4468            return Err(self.rt_err("C stack overflow"));
4469        }
4470        self.pcall_depth += 1;
4471        // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4472        // close its gap so f's args become [f@+1, a1@+2, ...].
4473        let handler = self.stack[(func_slot + 2) as usize];
4474        let nfargs = nargs - 2;
4475        for i in 0..nfargs {
4476            self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4477        }
4478        self.top = func_slot + 2 + nfargs;
4479        frames_push_sync(
4480            &mut self.frames,
4481            &mut self.frames_top,
4482            CallFrame::Cont(NativeCont {
4483                kind: ContKind::Xpcall { handler },
4484                func_slot,
4485                nresults,
4486            }),
4487        );
4488        self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4489        Ok(true)
4490    }
4491
4492    /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4493    /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4494    /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4495    /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4496    /// results land exactly where `pairs`'s results belong.
4497    fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4498        let arg = self.stack[(func_slot + 1) as usize];
4499        let mm = self.get_mm(arg, Mm::Pairs);
4500        // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4501        self.stack[func_slot as usize] = mm;
4502        self.top = func_slot + 2;
4503        frames_push_sync(
4504            &mut self.frames,
4505            &mut self.frames_top,
4506            CallFrame::Cont(NativeCont {
4507                kind: ContKind::Pairs,
4508                func_slot,
4509                nresults,
4510            }),
4511        );
4512        self.begin_call(func_slot, Some(1), 4, true)?;
4513        Ok(true)
4514    }
4515
4516    /// The running (top) Lua frame. The interpreter only reads this while a Lua
4517    /// frame is on top — a continuation frame is never the running frame (it is
4518    /// consumed the instant the call it protects unwinds onto it).
4519    #[inline]
4520    fn top_frame(&self) -> &Frame {
4521        self.frames
4522            .last()
4523            .and_then(CallFrame::lua)
4524            .expect("running Lua frame")
4525    }
4526
4527    #[inline]
4528    fn top_frame_mut(&mut self) -> &mut Frame {
4529        self.frames
4530            .last_mut()
4531            .and_then(CallFrame::lua_mut)
4532            .expect("running Lua frame")
4533    }
4534
4535    /// Pad/announce results sitting at func_slot.
4536    pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4537        if wanted < 0 {
4538            self.top = func_slot + nret;
4539        } else {
4540            let wanted = wanted as u32;
4541            let need = (func_slot + wanted) as usize;
4542            if self.stack.len() < need {
4543                self.stack.resize(need, Value::Nil);
4544            }
4545            for i in nret..wanted {
4546                self.stack[(func_slot + i) as usize] = Value::Nil;
4547            }
4548            self.top = func_slot + wanted;
4549        }
4550    }
4551
4552    /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4553    /// Used by `EvalFuture` on the bootstrap poll to compute the
4554    /// `entry_depth` it will pass to subsequent resume slices.
4555    pub(crate) fn frame_count(&self) -> usize {
4556        self.frames.len()
4557    }
4558
4559    fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4560        let nret = self.top - func_slot;
4561        let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4562        self.stack.truncate(func_slot as usize);
4563        self.top = func_slot;
4564        out
4565    }
4566
4567    // ---- open upvalues ----
4568
4569    #[doc(hidden)]
4570    pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4571        match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4572            Ok(i) => self.open_upvals[i].1,
4573            Err(i) => {
4574                let uv = self.heap.new_upvalue(UpvalState::Open {
4575                    slot,
4576                    thread: self.current,
4577                });
4578                self.open_upvals.insert(i, (slot, uv));
4579                uv
4580            }
4581        }
4582    }
4583
4584    pub(crate) fn close_from(&mut self, slot: u32) {
4585        while let Some(&(s, uv)) = self.open_upvals.last() {
4586            if s < slot {
4587                break;
4588            }
4589            let v = self.stack[s as usize];
4590            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4591            unsafe { uv.as_mut() }.set_closed(v);
4592            self.heap
4593                .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4594            self.open_upvals.pop();
4595        }
4596    }
4597
4598    /// Register a to-be-closed slot (TBC op / generic-for closing value).
4599    fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4600        let v = self.stack[slot as usize];
4601        if matches!(v, Value::Nil | Value::Bool(false)) {
4602            return Ok(()); // nil and false are silently ignored
4603        }
4604        if self.get_mm(v, Mm::Close).is_nil() {
4605            // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4606            // (a <type> value)"; the local's name comes from the running
4607            // frame's locvars at this pc.
4608            let tn = v.type_name();
4609            let f = self.top_frame();
4610            let reg = slot - f.base;
4611            let pc = (f.pc as usize).saturating_sub(1);
4612            let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4613                Some(n) => format!("variable '{n}'"),
4614                None => "to-be-closed slot".to_string(),
4615            };
4616            return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4617        }
4618        debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4619        self.tbc.push(slot);
4620        Ok(())
4621    }
4622
4623    /// Close upvalues and run `__close` handlers for slots ≥ `from`
4624    /// (handlers in reverse registration order; PUC luaF_close).
4625    fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4626        self.close_from(from);
4627        // PUC: handlers run in reverse declaration order; an error raised by a
4628        // handler becomes the error object passed to the remaining ones, and
4629        // the rest are still closed. The last raised error propagates.
4630        let mut pending = err;
4631        let mut result = Ok(());
4632        let saved_err = self.closing_err;
4633        // On a normal close the handler runs within the closing function's
4634        // activation (debug parent = that function); during error unwinding the
4635        // function's frame is already gone, so the handler sits at the C
4636        // boundary instead (PUC: luaF_close runs after the ci is restored).
4637        let error_close = err.is_some();
4638        while let Some(&s) = self.tbc.last() {
4639            if s < from {
4640                break;
4641            }
4642            self.tbc.pop();
4643            let v = self.stack[s as usize];
4644            if matches!(v, Value::Nil | Value::Bool(false)) {
4645                continue;
4646            }
4647            let mm = self.get_mm(v, Mm::Close);
4648            if mm.is_nil() {
4649                // PUC `prepclosingmethod`: the __close metamethod was present
4650                // at OP_TBC (else we would have errored there) but has since
4651                // been removed/replaced. Treat as a non-callable target.
4652                let tn = self.obj_typename(v);
4653                let e = self.rt_err(&format!(
4654                    "attempt to call a {tn} value (metamethod 'close')"
4655                ));
4656                pending = Some(e.0);
4657                result = Err(e);
4658                continue;
4659            }
4660            // root the pending error: a handler may trigger a collection
4661            self.closing_err = pending;
4662            // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4663            // getinfo report the handler as "in metamethod 'close'". Saved/
4664            // restored around the call to cover the path where `mm` is a
4665            // native (`push_frame` never consumes it) or it raises before
4666            // reaching push_frame.
4667            let saved_tm = self.pending_tm.replace("close");
4668            // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4669            // is nil on a normal close (5.4 locals.lua :875's
4670            // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4671            // through the yield). PUC 5.5 dropped the trailing nil: a clean
4672            // close passes only `obj`, the error case still passes both
4673            // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4674            // normal-close arms, n=2 for the error arm).
4675            let call = match pending {
4676                Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4677                None => {
4678                    if self.version >= LuaVersion::Lua55 {
4679                        self.call_value_impl(mm, &[v], error_close)
4680                    } else {
4681                        self.call_value_impl(mm, &[v, Value::Nil], error_close)
4682                    }
4683                }
4684            };
4685            self.pending_tm = saved_tm;
4686            if let Err(e) = call {
4687                pending = Some(e.0);
4688                result = Err(e);
4689            }
4690        }
4691        self.closing_err = saved_err;
4692        result
4693    }
4694
4695    /// Yieldable variant of `close_slots`: drive the chain of `__close`
4696    /// handlers for slots ≥ `from` through the interpreter loop with a
4697    /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4698    /// suspends cleanly (the close iteration's state rides on the thread's
4699    /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4700    /// applied to `luaF_close`. `after` runs when every slot is closed; if
4701    /// `after` is `Return` and we've returned past `entry_depth`,
4702    /// `Ok(Some(vals))` carries the result up to the host caller.
4703    fn begin_close(
4704        &mut self,
4705        from: u32,
4706        err: Option<Value>,
4707        after: AfterClose,
4708        entry_depth: usize,
4709    ) -> Result<Option<Vec<Value>>, LuaError> {
4710        self.close_from(from);
4711        self.drive_close(from, err, after, entry_depth)
4712    }
4713
4714    /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4715    /// non-callable-mm error for an `__close` that was reset to a bad value
4716    /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4717    /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4718    /// the interpreter then drives the handler and re-enters this driver via
4719    /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4720    /// the threaded error (if any) propagates or `after` fires.
4721    fn drive_close(
4722        &mut self,
4723        from: u32,
4724        mut pending: Option<Value>,
4725        after: AfterClose,
4726        entry_depth: usize,
4727    ) -> Result<Option<Vec<Value>>, LuaError> {
4728        loop {
4729            let drained = match self.tbc.last() {
4730                None => true,
4731                Some(&s) => s < from,
4732            };
4733            if drained {
4734                return self.finish_close_after(after, pending, entry_depth);
4735            }
4736            let s = self.tbc.pop().expect("tbc non-empty");
4737            let v = self.stack[s as usize];
4738            if matches!(v, Value::Nil | Value::Bool(false)) {
4739                continue;
4740            }
4741            let mm = self.get_mm(v, Mm::Close);
4742            if mm.is_nil() {
4743                let tn = self.obj_typename(v);
4744                let e = self.rt_err(&format!(
4745                    "attempt to call a {tn} value (metamethod 'close')"
4746                ));
4747                pending = Some(e.0);
4748                continue;
4749            }
4750            // A real handler: stage [mm, v, (err?)] above the current top,
4751            // record the close iteration state in a Cont::Close, and let the
4752            // interpreter dispatch the handler. On return the run() head
4753            // re-enters this driver via the Cont::Close consumer.
4754            let func_slot = self.top;
4755            let error_close = pending.is_some();
4756            let need = (func_slot + 3) as usize;
4757            if self.stack.len() < need {
4758                self.stack.resize(need, Value::Nil);
4759            }
4760            self.stack[func_slot as usize] = mm;
4761            self.stack[func_slot as usize + 1] = v;
4762            // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4763            // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4764            let nargs = match pending {
4765                Some(e) => {
4766                    self.stack[func_slot as usize + 2] = e;
4767                    2u32
4768                }
4769                None => {
4770                    if self.version >= LuaVersion::Lua55 {
4771                        1u32
4772                    } else {
4773                        self.stack[func_slot as usize + 2] = Value::Nil;
4774                        2u32
4775                    }
4776                }
4777            };
4778            self.top = func_slot + 1 + nargs;
4779            // Root the pending error during the call (a handler may collect).
4780            let saved_err = self.closing_err;
4781            self.closing_err = pending;
4782            // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4783            // for traceback / getinfo.
4784            let saved_tm = self.pending_tm.replace("close");
4785            frames_push_sync(
4786                &mut self.frames,
4787                &mut self.frames_top,
4788                CallFrame::Cont(NativeCont {
4789                    kind: ContKind::Close(CloseCont {
4790                        from,
4791                        pending,
4792                        after,
4793                    }),
4794                    func_slot,
4795                    nresults: 0,
4796                }),
4797            );
4798            // PUC luaF_close runs a normal close *within* the closing
4799            // function's activation (debug parent = that function); during an
4800            // error unwind the function's frame is already gone and the
4801            // handler sits at the C boundary instead.
4802            let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4803            self.pending_tm = saved_tm;
4804            self.closing_err = saved_err;
4805            r?;
4806            return Ok(None);
4807        }
4808    }
4809
4810    /// Fire `after` once every `__close` handler has run. `Block` propagates
4811    /// any remaining error or simply continues; `Return` performs OP_Return's
4812    /// tail (hook + frame pop + result delivery) and may surface results to
4813    /// the host when the function whose return triggered the close was the
4814    /// entry activation, but only on a clean drain — a pending error skips
4815    /// the return tail and propagates instead. `ResumeUnwind` pops the
4816    /// deferred Lua frame and re-raises, letting a handler's own error win
4817    /// over the original propagating one (PUC luaF_close).
4818    fn finish_close_after(
4819        &mut self,
4820        after: AfterClose,
4821        pending: Option<Value>,
4822        entry_depth: usize,
4823    ) -> Result<Option<Vec<Value>>, LuaError> {
4824        match after {
4825            AfterClose::Block => match pending {
4826                Some(e) => Err(LuaError(e)),
4827                None => Ok(None),
4828            },
4829            AfterClose::Return {
4830                abs_a,
4831                nret,
4832                from_native,
4833            } => match pending {
4834                Some(e) => Err(LuaError(e)),
4835                None => self.complete_return(abs_a, nret, from_native, entry_depth),
4836            },
4837            AfterClose::ResumeUnwind { func_slot, err } => {
4838                // The aborting Lua frame was popped before `begin_close`;
4839                // restore the catcher's stack window down to `func_slot` and
4840                // re-raise — preferring a handler-raised error over the
4841                // original (PUC luaF_close).
4842                self.stack.truncate(func_slot as usize);
4843                self.top = func_slot;
4844                self.tbc.retain(|&s| s < func_slot);
4845                Err(LuaError(pending.unwrap_or(err)))
4846            }
4847        }
4848    }
4849
4850    /// OP_Return's post-close tail: fire the "return" hook (frame still
4851    /// current), pop the Lua frame, slide results into `func_slot`, then
4852    /// either hand them to the host (`Ok(Some(vals))` when we've returned
4853    /// past `entry_depth`), leave them contiguous for an exposed
4854    /// pcall/xpcall continuation, or finish into the caller's expected
4855    /// result slot. Mirrors the synchronous OP_Return tail so both paths
4856    /// share semantics — the `from_native` flag selects the right "return"
4857    /// hook context for `hook_return`.
4858    fn complete_return(
4859        &mut self,
4860        abs_a: u32,
4861        nret: u32,
4862        from_native: bool,
4863        entry_depth: usize,
4864    ) -> Result<Option<Vec<Value>>, LuaError> {
4865        // ftransfer is the local index (1-based) of the first result, as
4866        // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4867        // exposes locals starting at `frame.base` (= func_slot + 1 +
4868        // n_varargs for a vararg call), so the conversion is the absolute
4869        // result slot minus base, plus one to make it 1-based. db.lua 5.4
4870        // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4871        // shape end-to-end.
4872        let ftransfer = self
4873            .frames
4874            .last()
4875            .and_then(CallFrame::lua)
4876            .map(|fr| {
4877                let raw = abs_a.saturating_sub(fr.base) + 1;
4878                // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4879                // local injected at index `numparams + 1`, so getlocal
4880                // numbering shifts results past it (5.5 db.lua :539
4881                // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4882                if fr.closure.proto.has_vararg_table_pseudo {
4883                    raw + 1
4884                } else {
4885                    raw
4886                }
4887            })
4888            .unwrap_or(1);
4889        // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4890        // per tail call that collapsed into this activation, *after* its
4891        // own "return". `tailcalls` tracks that count exactly (PUC
4892        // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4893        // "return" hook fires once even when the activation absorbed
4894        // multiple tail calls — only `istailcall` on getinfo surfaces the
4895        // collapse. 5.1 db.lua :366 pins the event ordering.
4896        let tailcalls = if self.version <= LuaVersion::Lua51 {
4897            self.frames
4898                .last()
4899                .and_then(|f| f.lua())
4900                .map(|f| f.tailcalls)
4901                .unwrap_or(0)
4902        } else {
4903            0
4904        };
4905        self.hook_return(from_native, ftransfer, nret)?;
4906        for _ in 0..tailcalls {
4907            self.hook_tail_return()?;
4908        }
4909        let CallFrame::Lua(fr) =
4910            frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4911        else {
4912            unreachable!("returning from a non-Lua frame")
4913        };
4914        for i in 0..nret {
4915            self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4916        }
4917        if self.frames.len() < entry_depth {
4918            self.top = fr.func_slot + nret;
4919            return Ok(Some(self.take_results(fr.func_slot)));
4920        } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4921            self.top = fr.func_slot + nret;
4922        } else {
4923            self.finish_results(fr.func_slot, nret, fr.nresults);
4924        }
4925        Ok(None)
4926    }
4927
4928    #[doc(hidden)]
4929    pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4930        match cl.upvals()[idx as usize].state() {
4931            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4932            UpvalState::Closed(v) => v,
4933        }
4934    }
4935
4936    fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4937        let uv = cl.upvals()[idx as usize];
4938        match uv.state() {
4939            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4940            UpvalState::Closed(_) => {
4941                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4942                unsafe { uv.as_mut() }.set_closed(v);
4943                // forward barrier: a closed upvalue is single-slot, so the
4944                // forward variant is cheaper than barrier_back (PUC uses
4945                // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4946                // tables / threads).
4947                self.heap
4948                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4949            }
4950        }
4951    }
4952
4953    // ---- register / error helpers ----
4954
4955    #[inline(always)]
4956    fn r(&self, base: u32, i: u32) -> Value {
4957        // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4958        // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4959        // and every bytecode-generated reference falls within `[0, max_stack)`.
4960        // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4961        // reason. The bounds check would re-validate this invariant on every
4962        // op — the dispatch hot path can't afford it.
4963        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4964        unsafe { *self.stack.get_unchecked((base + i) as usize) }
4965    }
4966
4967    #[inline(always)]
4968    fn set_r(&mut self, base: u32, i: u32, v: Value) {
4969        // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
4970        // frame-entry contract.
4971        unsafe {
4972            *self.stack.get_unchecked_mut((base + i) as usize) = v;
4973        }
4974    }
4975
4976    #[doc(hidden)]
4977    pub fn rt_err(&mut self, msg: &str) -> LuaError {
4978        let text = match self.position_prefix() {
4979            Some(p) => format!("{p}{msg}"),
4980            None => msg.to_string(),
4981        };
4982        LuaError(Value::Str(self.heap.intern(text.as_bytes())))
4983    }
4984
4985    pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
4986        let extra = self.subject_varinfo(v);
4987        let tn = self.obj_typename(v);
4988        self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
4989    }
4990
4991    /// Name the offending operand of the current instruction (PUC varinfo) for
4992    /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
4993    /// to the instruction's subject register(s); a native-raised error whose
4994    /// current instruction doesn't hold `bad` simply yields "".
4995    fn subject_varinfo(&self, bad: Value) -> String {
4996        use crate::vm::isa::Op;
4997        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
4998            return String::new();
4999        };
5000        let proto = f.closure.proto;
5001        let p: &crate::runtime::Proto = &proto;
5002        let pc = f.pc as usize;
5003        if pc == 0 || pc > p.code.len() {
5004            return String::new();
5005        }
5006        let instr = p.code[pc - 1];
5007        let mut cands: Vec<u32> = Vec::new();
5008        match instr.op() {
5009            // indexed reads / length / method: the table/object is in B
5010            Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5011                cands.push(instr.b());
5012            }
5013            // indexed writes / calls: the table/function is in A
5014            Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5015                cands.push(instr.a());
5016            }
5017            // arithmetic/bitwise: a register operand (B, and C unless constant)
5018            Op::Add
5019            | Op::Sub
5020            | Op::Mul
5021            | Op::Div
5022            | Op::Mod
5023            | Op::Pow
5024            | Op::IDiv
5025            | Op::BAnd
5026            | Op::BOr
5027            | Op::BXor
5028            | Op::Shl
5029            | Op::Shr => {
5030                cands.push(instr.b());
5031                if !instr.k() {
5032                    cands.push(instr.c());
5033                }
5034            }
5035            Op::Unm | Op::BNot => cands.push(instr.b()),
5036            Op::Concat => {
5037                let a = instr.a();
5038                for r in a..a + instr.b() {
5039                    cands.push(r);
5040                }
5041            }
5042            _ => {}
5043        }
5044        for reg in cands {
5045            if self.r(f.base, reg).raw_eq(bad) {
5046                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5047                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5048                    None => String::new(),
5049                };
5050            }
5051        }
5052        String::new()
5053    }
5054
5055    /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5056    /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5057    /// 'add')" when the call is a metamethod dispatched by the current opcode.
5058    fn call_err(&mut self, v: Value) -> LuaError {
5059        let extra = self.call_target_varinfo(v);
5060        let tn = self.obj_typename(v);
5061        self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5062    }
5063
5064    /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5065    /// frame before the call, so the opcode that triggered it lives in the
5066    /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5067    /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5068    fn call_target_varinfo(&self, bad: Value) -> String {
5069        use crate::vm::isa::Op;
5070        let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5071            return String::new();
5072        };
5073        let proto = f.closure.proto;
5074        let p: &crate::runtime::Proto = &proto;
5075        let pc = f.pc as usize;
5076        if pc == 0 || pc > p.code.len() {
5077            return String::new();
5078        }
5079        let instr = p.code[pc - 1];
5080        match instr.op() {
5081            Op::Call | Op::TailCall => {
5082                let reg = instr.a();
5083                if self.r(f.base, reg).raw_eq(bad) {
5084                    match crate::vm::objname::getobjname(p, pc - 1, reg) {
5085                        Some((kind, name)) => format!(" ({kind} '{name}')"),
5086                        None => String::new(),
5087                    }
5088                } else {
5089                    String::new()
5090                }
5091            }
5092            op => match mm_event_name(op) {
5093                Some(ev) => format!(" (metamethod '{ev}')"),
5094                None => String::new(),
5095            },
5096        }
5097    }
5098
5099    /// "number has no integer representation", enriched (PUC luaG_tointerror)
5100    /// with a "(field 'x')"-style suffix naming the offending operand of the
5101    /// current arithmetic instruction when it can be recovered from bytecode.
5102    fn no_int_rep_err(&mut self) -> LuaError {
5103        let extra = self.bad_operand_varinfo();
5104        self.rt_err(&format!("number{extra} has no integer representation"))
5105    }
5106
5107    /// Inspect the current frame's faulting instruction: find the register
5108    /// operand holding a float with no integer representation and name it.
5109    fn bad_operand_varinfo(&self) -> String {
5110        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5111            return String::new();
5112        };
5113        let proto = f.closure.proto;
5114        let p: &crate::runtime::Proto = &proto;
5115        let pc = f.pc as usize;
5116        if pc == 0 || pc > p.code.len() {
5117            return String::new();
5118        }
5119        let instr = p.code[pc - 1];
5120        let mut regs = vec![instr.b()];
5121        if !instr.k() {
5122            regs.push(instr.c());
5123        }
5124        for reg in regs {
5125            let v = self.r(f.base, reg);
5126            if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5127                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5128                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5129                    None => String::new(),
5130                };
5131            }
5132        }
5133        String::new()
5134    }
5135
5136    /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5137    /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5138    /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5139    /// native call), PUC pushes no prefix — match that by looking only at the
5140    /// topmost frame directly and bailing if it is anything but a Lua frame.
5141    pub(crate) fn position_prefix(&self) -> Option<String> {
5142        let f = self.frames.last().and_then(CallFrame::lua)?;
5143        let proto = f.closure.proto;
5144        if proto.source.as_bytes().is_empty() {
5145            return Some(self.stripped_prefix());
5146        }
5147        if proto.lines.is_empty() {
5148            return None;
5149        }
5150        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5151        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5152        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5153        let display = crate::vm::lib_debug::chunk_id(raw);
5154        let src = String::from_utf8_lossy(&display).into_owned();
5155        Some(format!("{src}:{line}: "))
5156    }
5157
5158    /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5159    /// for the source and renders the line as "?" (so the prefix reads
5160    /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5161    /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5162    /// matches `^%?:%-1:`).
5163    fn stripped_prefix(&self) -> String {
5164        if self.version >= crate::version::LuaVersion::Lua55 {
5165            "?:?: ".to_string()
5166        } else {
5167            "?:-1: ".to_string()
5168        }
5169    }
5170
5171    /// Position prefix of the Lua frame `level` steps up from the running C
5172    /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5173    /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5174    /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5175    /// caller's frame is reported even across pcall/xpcall continuations.
5176    pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5177        let fi = match self.dbg_frame(level)? {
5178            DbgKind::Lua(fi) => fi,
5179            DbgKind::C(_) | DbgKind::Tail(_) => return None,
5180        };
5181        let f = self.frames[fi].lua()?;
5182        let proto = f.closure.proto;
5183        // PUC luaG_addinfo: a stripped chunk has no source — see
5184        // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5185        if proto.source.as_bytes().is_empty() {
5186            return Some(self.stripped_prefix());
5187        }
5188        // a stripped chunk carries no per-instruction line info
5189        if proto.lines.is_empty() {
5190            return None;
5191        }
5192        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5193        // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5194        // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5195        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5196        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5197        let display = crate::vm::lib_debug::chunk_id(raw);
5198        let src = String::from_utf8_lossy(&display).into_owned();
5199        Some(format!("{src}:{line}: "))
5200    }
5201
5202    // ---- the interpreter ----
5203
5204    fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5205        let entry_depth = self.frames.len();
5206        self.exec_with(entry_depth)
5207    }
5208
5209    /// Run from the current top frame down to (but not past) `entry_depth`
5210    /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5211    /// runs to completion or a yield.
5212    /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5213    /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5214    /// `EvalFuture::poll` on every poll after the first to walk the
5215    /// existing call frames until the next `BudgetExhausted` or
5216    /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5217    /// embedder reaches it through `Vm::eval_async`.
5218    pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5219        self.exec_with(entry_depth)
5220    }
5221
5222    fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5223        loop {
5224            let r = self.run(entry_depth);
5225            if r.is_err()
5226                && (self.yielding.is_some()
5227                    || self.terminating.is_some()
5228                    || self.host_yield_pending
5229                    || self.pending_async_native_fut.is_some())
5230            {
5231                // a `coroutine.yield` is in flight: keep the frames intact (they
5232                // are the suspended coroutine's saved state) and propagate to
5233                // resume. A self-close termination propagates the same way, so a
5234                // protecting pcall on the way out cannot catch (unwind) it.
5235                // v1.1 B10 — `host_yield_pending` is the async-mode
5236                // analogue: the sentinel must reach `drive_one` without
5237                // a protecting `pcall` swallowing it.
5238                return r;
5239            }
5240            match r {
5241                Ok(vals) => return Ok(vals),
5242                // unwind toward `entry_depth`. A protecting pcall/xpcall
5243                // continuation caught along the way turns the error into
5244                // `false, msg` and the loop resumes running its caller; an
5245                // uncaught error propagates out.
5246                Err(e) => match self.unwind(e.0, entry_depth) {
5247                    Unwound::Caught => continue,
5248                    Unwound::CaughtReturn(vals) => return Ok(vals),
5249                    Unwound::Propagated(err) => return Err(err),
5250                },
5251            }
5252        }
5253    }
5254
5255    /// Unwind the call stack from the error point toward `entry_depth`, running
5256    /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5257    /// continuation frame at/above `entry_depth` (the error is *caught*: its
5258    /// slot receives `false, msg`); if none is reached, the error propagates.
5259    fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5260        // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5261        // error object is nil — so `pcall(function() error(nil) end)` returns
5262        // that string instead of nil, and `assert(nil, nil)` (whose path
5263        // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5264        // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5265        // `doit("error()") == nil` and luna would fail that if it always
5266        // substituted. luna's native `error()` still does its own conversion
5267        // for direct callers.
5268        if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5269            err = Value::Str(self.heap.intern(b"<no error object>"));
5270        }
5271        // The protected call runs in-place among the caller frames' registers,
5272        // so truncating the failed frames here cuts into caller windows below
5273        // the catcher. Snapshot the live length: at the error point the stack
5274        // already spans every surviving frame's window, so restoring it after a
5275        // catch reinstates them all (the reclaimed slots above are dead temps).
5276        // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5277        // we instead clamp the restore to the catcher's caller window when the
5278        // error point was at the stack limit (cause: the next `call_value_impl`
5279        // picks `func_slot = stack.len()` which would otherwise re-overflow).
5280        let saved_len = self.stack.len();
5281        // Snapshot the traceback at the error point — before any frame is
5282        // popped — so an `xpcall` msgh (which runs after the failed frames are
5283        // gone) can still describe the error site. The handler frame about to
5284        // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5285        // visible here; once popped, `debug.traceback` would miss it.
5286        // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5287        // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5288        // recovery — locals.lua:659) re-overflows. Capture-once propagates
5289        // through nested unwinds (inner→outer) without re-running msgh.
5290        if self.error_traceback.is_none() {
5291            self.error_traceback = Some(self.traceback_bytes(1));
5292        }
5293        while self.frames.len() >= entry_depth {
5294            match *self.frames.last().expect("frame") {
5295                // a yieldable-metamethod continuation does not catch: discard the
5296                // abandoned instruction and keep unwinding (PUC drops the partial
5297                // op on error).
5298                CallFrame::Cont(NativeCont {
5299                    kind: ContKind::Meta(mc),
5300                    func_slot,
5301                    ..
5302                }) => {
5303                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5304                    self.stack.truncate(func_slot as usize);
5305                    self.top = mc.saved_top.min(func_slot);
5306                    self.tbc.retain(|&s| s < func_slot);
5307                }
5308                // a __pairs continuation does not catch either: an error inside
5309                // the metamethod propagates past `pairs`.
5310                CallFrame::Cont(NativeCont {
5311                    kind: ContKind::Pairs,
5312                    func_slot,
5313                    ..
5314                }) => {
5315                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5316                    self.stack.truncate(func_slot as usize);
5317                    self.top = func_slot;
5318                    self.tbc.retain(|&s| s < func_slot);
5319                }
5320                // a __close continuation does not catch: drop the half-run
5321                // handler's window, then continue the close yieldably with
5322                // the new error threaded as `pending`. Preserve `cc.after`
5323                // verbatim — `Return`/`Block` originating from an aborting
5324                // OP_Return/OP_Close will be short-circuited by
5325                // `finish_close_after` (pending propagates as Err); a
5326                // `ResumeUnwind` originated by our own Lua-frame handler
5327                // must keep its deferred frame-pop semantics so that frame
5328                // is not orphaned. If a fresh handler yields, `drive_close`
5329                // pushes another `Cont::Close` and we return `Caught` so
5330                // `exec_with` re-enters the run loop.
5331                CallFrame::Cont(NativeCont {
5332                    kind: ContKind::Close(cc),
5333                    func_slot,
5334                    ..
5335                }) => {
5336                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5337                    self.stack.truncate(func_slot as usize);
5338                    self.top = func_slot;
5339                    self.tbc.retain(|&s| s < func_slot);
5340                    match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5341                        Ok(Some(_)) => {
5342                            unreachable!(
5343                                "Block / Return / ResumeUnwind never return host values mid-unwind"
5344                            )
5345                        }
5346                        Ok(None) => return Unwound::Caught,
5347                        Err(e) => {
5348                            err = e.0;
5349                            continue;
5350                        }
5351                    }
5352                }
5353                CallFrame::Cont(nc) => {
5354                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5355                    self.pcall_depth -= 1;
5356                    let result = match nc.kind {
5357                        ContKind::Pcall => err,
5358                        ContKind::Xpcall { handler } => {
5359                            // PUC keeps `L->errfunc` set across the handler's
5360                            // call: `luaG_errormsg` re-fires the handler when
5361                            // it raises (so `xpcall(error, err, 170)` lets the
5362                            // chain bottom out at err(0) → "END"). luna mirrors
5363                            // that by looping until the handler returns or
5364                            // luna's `iters` cap forces termination.
5365                            //
5366                            // The cap models PUC's nCcalls soft window
5367                            // (MAXCCALLS/10*11): once tripped, `stackerror`
5368                            // raises "C stack overflow" via `luaG_runerror`
5369                            // which itself re-enters `luaG_errormsg`, so the
5370                            // handler runs once more with that string and
5371                            // naturally returns it (errors.lua :637 at N=300).
5372                            // We count iterations per Cont::Xpcall rather than
5373                            // a global counter — nested xpcalls each get their
5374                            // own budget, matching the way PUC's stack frames
5375                            // accumulate per dispatch path.
5376                            const MSGH_CAP: u32 = MAX_C_DEPTH;
5377                            let mut cur_err = err;
5378                            let mut iters: u32 = 0;
5379                            let mut capped = false;
5380                            loop {
5381                                if iters >= MSGH_CAP && !capped {
5382                                    cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5383                                    capped = true;
5384                                }
5385                                iters += 1;
5386                                self.msgh_depth += 1;
5387                                let r = self.call_value(handler, &[cur_err]);
5388                                self.msgh_depth -= 1;
5389                                match r {
5390                                    Ok(hr) => {
5391                                        break hr.first().copied().unwrap_or(Value::Nil);
5392                                    }
5393                                    Err(_) if capped => {
5394                                        // the handler still errored on the
5395                                        // synthesized "C stack overflow"; fall
5396                                        // back to PUC's LUA_ERRERR string.
5397                                        break Value::Str(
5398                                            self.heap.intern(b"error in error handling"),
5399                                        );
5400                                    }
5401                                    Err(e) => {
5402                                        cur_err = e.0;
5403                                    }
5404                                }
5405                            }
5406                        }
5407                        ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5408                            unreachable!("Meta/Pairs/Close cont handled above")
5409                        }
5410                    };
5411                    // the error has been caught (pcall/xpcall): the captured
5412                    // traceback was for that error and is no longer in flight.
5413                    self.error_traceback = None;
5414                    let fs = nc.func_slot as usize;
5415                    if self.stack.len() < fs + 2 {
5416                        self.stack.resize(fs + 2, Value::Nil);
5417                    }
5418                    self.stack[fs] = Value::Bool(false);
5419                    self.stack[fs + 1] = result;
5420                    self.top = nc.func_slot + 2;
5421                    self.tbc.retain(|&s| s < nc.func_slot);
5422                    if self.frames.len() < entry_depth {
5423                        return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5424                    }
5425                    self.finish_results(nc.func_slot, 2, nc.nresults);
5426                    // reinstate the caller windows the unwind truncated into,
5427                    // clamped to the catcher's caller window + a `MIN_STACK`
5428                    // reserve. The clamp is a no-op for normal pcall catches
5429                    // (saved_len lies within the caller's max_stack window),
5430                    // and prevents the stack from staying near `MAX_LUA_STACK`
5431                    // after an overflow-recovery catch — which would make the
5432                    // next `call_value_impl` (e.g. a `__close` in the catcher's
5433                    // errorh, locals.lua:659) pick `func_slot = stack.len()`
5434                    // above the limit and re-overflow.
5435                    // Restore the caller's full register window: opcodes
5436                    // index it directly. The cap covers caller's base +
5437                    // `max_stack` + a small reserve. We always resize to
5438                    // exactly this window — previously this clamped
5439                    // `saved_len` from above to prevent staying near
5440                    // `MAX_LUA_STACK` after an overflow-recovery catch, and
5441                    // a yieldable-unwind re-entry adds the dual case where
5442                    // `saved_len` is *below* the window (a prior
5443                    // `ResumeUnwind` truncated). Using the window directly
5444                    // covers both.
5445                    let restore = self
5446                        .frames
5447                        .iter()
5448                        .rev()
5449                        .find_map(CallFrame::lua)
5450                        .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5451                        .unwrap_or(saved_len);
5452                    if self.stack.len() < restore {
5453                        self.stack.resize(restore, Value::Nil);
5454                    } else if self.stack.len() > restore {
5455                        self.stack.truncate(restore);
5456                    }
5457                    return Unwound::Caught;
5458                }
5459                CallFrame::Lua(f) => {
5460                    // Yieldable error-unwind close, PUC luaG_errormsg shape:
5461                    // (1) pop the Lua frame immediately so each `__close`
5462                    // handler runs at the C boundary above — `debug.getinfo`
5463                    // sees the next outer Lua frame's call site (typically
5464                    // `pcall`), not this aborting function (locals.lua:480).
5465                    // (2) drive the close yieldably with
5466                    // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5467                    // it truncates to `func_slot` and re-raises (letting a
5468                    // handler-raised error win over `err`). If a handler
5469                    // yields, `drive_close` pushes `Cont::Close` and we
5470                    // return `Caught` so `exec_with` re-enters the run loop;
5471                    // a synchronous drain returns Err exactly as the old
5472                    // path did.
5473                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5474                    let after = AfterClose::ResumeUnwind {
5475                        func_slot: f.func_slot,
5476                        err,
5477                    };
5478                    match self.begin_close(f.base, Some(err), after, entry_depth) {
5479                        Ok(Some(_)) => {
5480                            unreachable!("ResumeUnwind never returns host values")
5481                        }
5482                        Ok(None) => return Unwound::Caught,
5483                        Err(e) => {
5484                            err = e.0;
5485                            continue;
5486                        }
5487                    }
5488                }
5489            }
5490        }
5491        Unwound::Propagated(LuaError(err))
5492    }
5493
5494    fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5495        loop {
5496            // Fast-path slow-check gate: most embedders run with both
5497            // `instr_budget` and `mem_cap` as None, so a single combined
5498            // is_some test lets the hot loop skip both branches with one
5499            // load + branch instead of two.
5500            if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5501                if let Some(b) = self.instr_budget.as_mut() {
5502                    *b -= 1;
5503                    if *b <= 0 {
5504                        self.instr_budget = None;
5505                        // v1.1 B10 Stage 1 — async-mode cooperative
5506                        // yield. Set a sentinel flag so `exec_with`
5507                        // propagates the Err without `unwind` running
5508                        // (mirroring the `yielding.is_some()` path),
5509                        // and `call_value_impl` preserves the call
5510                        // frames for the next `poll`. Translation back
5511                        // to `DispatchOutcome::BudgetExhausted` happens
5512                        // in `drive_one`. The Err value itself is
5513                        // `Value::Nil` — a pure sentinel, never seen by
5514                        // user code.
5515                        if self.async_mode {
5516                            self.host_yield_pending = true;
5517                            return Err(LuaError(Value::Nil));
5518                        }
5519                        // B6: classify the trip so embedders can
5520                        // distinguish budget exhaustion from a
5521                        // generic Runtime error and retry / give up
5522                        // accordingly.
5523                        self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5524                        let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5525                        return Err(LuaError(s));
5526                    }
5527                }
5528                if let Some(cap) = self.heap.mem_cap
5529                    && self.heap.bytes() > cap
5530                {
5531                    // First try a full collect — embedders set tight caps
5532                    // and the overshoot may be reclaimable (closures kept
5533                    // by short-lived frames, intermediate strings). Only
5534                    // disarm + raise if the cap is still breached after
5535                    // collection. PUC's `LUA_GCEMERGENCY` path matches.
5536                    // gc_top must include `self.top` so the running frame's
5537                    // live locals (e.g. a growing table) are not freed.
5538                    self.gc_top = self.top;
5539                    self.collect_garbage();
5540                    if self.heap.bytes() > cap {
5541                        self.heap.mem_cap = None;
5542                        let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5543                        return Err(LuaError(s));
5544                    }
5545                }
5546            }
5547            // Single combined frame fetch: continuation arm OR Lua arm. Saves
5548            // a second `self.frames.last()` slice access vs the prior split
5549            // form (LLVM doesn't always CSE these across the cont branch).
5550            // A continuation frame on top means the call it protected just
5551            // delivered its results — wrap as `true, results…` and hand to
5552            // the pcall/xpcall caller. The error path is handled by `unwind`;
5553            // this branch is only reached on success/resume completion.
5554            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5555            let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5556            if let &CallFrame::Cont(nc) = frame_peek {
5557                // a yieldable metamethod returned: complete the interrupted
5558                // instruction (PUC luaV_finishOp) and resume the running frame.
5559                if let ContKind::Meta(mc) = nc.kind {
5560                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5561                    let result = if self.top > nc.func_slot {
5562                        self.stack[nc.func_slot as usize]
5563                    } else {
5564                        Value::Nil
5565                    };
5566                    self.stack.truncate(nc.func_slot as usize);
5567                    self.top = mc.saved_top;
5568                    self.finish_meta(mc.action, result)?;
5569                    continue;
5570                }
5571                // a __close handler returned successfully: discard its
5572                // results, restore `top` to the slot the handler was called
5573                // at (the surrounding frame's register window above this slot
5574                // must stay alloc'd — never truncate the underlying stack),
5575                // then continue the close chain (next slot, or fire
5576                // AfterClose). When the close ends an entry activation,
5577                // drive_close hands the results up to exec_with directly.
5578                if let ContKind::Close(cc) = nc.kind {
5579                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5580                    self.top = nc.func_slot;
5581                    if let Some(vals) =
5582                        self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5583                    {
5584                        return Ok(vals);
5585                    }
5586                    continue;
5587                }
5588                // __pairs returned: normalize its results to exactly four
5589                // (iterator, state, control, closing) at pairs's slot, where
5590                // the metamethod was called, and hand them to pairs's caller.
5591                if let ContKind::Pairs = nc.kind {
5592                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5593                    let total = 4u32;
5594                    let need = (nc.func_slot + total) as usize;
5595                    if self.stack.len() < need {
5596                        self.stack.resize(need, Value::Nil);
5597                    }
5598                    for s in self.top..(nc.func_slot + total) {
5599                        self.stack[s as usize] = Value::Nil;
5600                    }
5601                    self.top = nc.func_slot + total;
5602                    if self.frames.len() < entry_depth {
5603                        return Ok(self.take_results(nc.func_slot));
5604                    }
5605                    self.finish_results(nc.func_slot, total, nc.nresults);
5606                    continue;
5607                }
5608                frames_pop_sync(&mut self.frames, &mut self.frames_top);
5609                self.pcall_depth -= 1;
5610                // f's results sit at nc.func_slot+1.. (f was called one slot
5611                // above the continuation), so writing `true` at the slot makes
5612                // `true, results…` already contiguous.
5613                let nret = self.top - (nc.func_slot + 1);
5614                self.stack[nc.func_slot as usize] = Value::Bool(true);
5615                let total = 1 + nret;
5616                self.top = nc.func_slot + total;
5617                if self.frames.len() < entry_depth {
5618                    return Ok(self.take_results(nc.func_slot));
5619                }
5620                self.finish_results(nc.func_slot, total, nc.nresults);
5621                continue;
5622            }
5623            // GC runs only at the allocation safe points below (PUC's
5624            // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5625            // no longer collects, so a stale full-window `gc_top` cannot leak in.
5626            //
5627            // Hot-path frame fetch: the Cont arm above continues the loop,
5628            // so reaching here means `frame_peek` is the Lua frame. Reuse it
5629            // rather than re-fetching `self.frames.last()`.
5630            let f = match frame_peek {
5631                CallFrame::Lua(f) => f,
5632                _ => unreachable!("Cont frame survived the dispatch loop head"),
5633            };
5634            let cl = f.closure;
5635            let base = f.base;
5636            let func_slot = f.func_slot;
5637            let n_varargs = f.n_varargs;
5638            let pc = f.pc;
5639            let oldpc = f.hook_oldpc;
5640
5641            // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5642            // — every branch / call op only sets `pc` to a valid index, and
5643            // function entry initialises pc=0 with a non-empty body. PUC's
5644            // `vmfetch` uses the equivalent unchecked load.
5645            let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5646
5647            // P12-S1.C/D — trace recording append + close detection.
5648            // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5649            // so default dispatch keeps a single not-taken branch.
5650            //
5651            // - At the head PC with a non-empty record, the trace has
5652            //   looped back to its start: mark `closed = true` and
5653            //   take the record (S2 will compile + cache).
5654            // - Otherwise, capture the op. If the record overflows
5655            //   MAX_TRACE_LEN, abort by dropping it.
5656            if self.jit.trace_enabled
5657                && let Some(_rec) = self.jit.active_trace.as_mut()
5658            {
5659                // P12-S4 — depth tracking. The trace head's frame is
5660                // at index `recording_frame_base`; every Op::Call that
5661                // pushes a new frame bumps the live depth, every
5662                // Op::Return that pops one decrements it.
5663                //
5664                // **Three clean-close conditions** (P12-S4-step4a):
5665                // - `at_head`: cur_depth == 0 AND about-to-execute the
5666                //   trace's head_pc on its head_proto (loop closed back
5667                //   to start). Same for loop-triggered and call-triggered
5668                //   traces — step4a unified the gating so call-triggered
5669                //   no longer closes on the first re-entry (that left
5670                //   fib's body at 7 depth=0 ops; step4a lets it inline
5671                //   up to MAX_INLINE_DEPTH levels before any close).
5672                // - `returned_past_head`: trace head's frame is gone
5673                //   (callee returned past it, or the call-trigger
5674                //   started a recording inside a callee that has now
5675                //   returned). Whatever ops were recorded form the
5676                //   trace body; the lowerer treats the partial trace
5677                //   the same as InlineAbort (dispatchable=false until
5678                //   step4b's frame materialization lands).
5679                // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5680                //   Recording any deeper would just bloat the IR; close
5681                //   with the body we have. Lowerer's existing length
5682                //   gate + InlineAbort path handles short bodies.
5683                let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5684                let cur_depth = if returned_past_head {
5685                    0
5686                } else {
5687                    self.frames.len() - 1 - self.jit.recording_frame_base
5688                };
5689                let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5690                let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5691                let at_head_loop = cur_depth == 0
5692                    && !rec.ops.is_empty()
5693                    && !returned_past_head
5694                    && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5695                    && pc == rec.head_pc;
5696                // P16-A — self-link cycle catch (mirrors LuaJIT's
5697                // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5698                //   1. We're about to execute the head_pc on head_proto
5699                //      at depth > 0 (we're re-entering the trace head
5700                //      from inside an inlined recursion level — UpRec).
5701                //   2. The count of ancestor frames in the recording
5702                //      window that share `head_proto` exceeds
5703                //      [`RECUNROLL_THRESHOLD`] (default 2).
5704                // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5705                // recursion levels are captured, the recorder enters
5706                // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5707                // trips this catch — closing with `SelfRecKind::UpRec`.
5708                // The lowerer's `TraceEnd::SelfLink` tail emits the
5709                // bump-base + branch-to-self loop body.
5710                //
5711                // TailRec vs UpRec: LJ distinguishes via
5712                // `framedepth + retdepth == 0`. luna doesn't track
5713                // retdepth separately; cur_depth == 0 with a non-empty
5714                // call chain in tail position is rare (would require
5715                // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5716                // condition (fib's case); cur_depth == 0 with positive
5717                // ancestor count would route to TailRec, but luna's
5718                // recorder doesn't currently produce that shape because
5719                // tail-call elision pops the caller frame and we'd
5720                // hit `at_head_loop` instead.
5721                let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5722                    if self.jit.p16_self_link_enabled
5723                        && !returned_past_head
5724                        && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5725                        && pc == rec.head_pc
5726                        && cur_depth > 0
5727                    {
5728                        // Count ancestor frames sharing head_proto.
5729                        // self.frames[recording_frame_base..] currently
5730                        // includes the just-pushed frame at the top
5731                        // (the one about to execute head_pc). Ancestors
5732                        // = the slice excluding the top frame.
5733                        let head_proto_ptr = rec.head_proto.as_ptr();
5734                        let last_idx = self.frames.len() - 1;
5735                        let mut count = 0usize;
5736                        for i in self.jit.recording_frame_base..last_idx {
5737                            if let CallFrame::Lua(f) = &self.frames[i]
5738                                && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5739                            {
5740                                count += 1;
5741                            }
5742                        }
5743                        if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5744                            // cur_depth > 0 → UpRec (fib pattern).
5745                            // cur_depth == 0 wouldn't reach this arm.
5746                            Some(crate::jit::trace::SelfRecKind::UpRec)
5747                        } else {
5748                            None
5749                        }
5750                    } else {
5751                        None
5752                    }
5753                };
5754                if let Some(kind) = self_link_trip {
5755                    // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
5756                    // self-recursive patterns at frame depth >= 2.
5757                    //
5758                    // Pre sub-0: a SelfLink trip at the head_pc re-entry
5759                    // unconditionally stamped `self_link_kind`. The
5760                    // R3a `downrec_close` marker can only fire from the
5761                    // depth>0 Op::Return path (`rec.retfs` chain),
5762                    // which never reaches the recorder for fib(28)-like
5763                    // shapes that hit the SelfLink cycle catch BEFORE
5764                    // any base-case Return — leaving `downrec_close`
5765                    // None and routing the trace through R1's safe
5766                    // `dispatchable=false` `"self-link-retf-r1"` path
5767                    // (audit measured `trace_dispatched = 0`).
5768                    //
5769                    // Sub-0 lift: when the SelfLink trip fires AND
5770                    // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
5771                    // gate already requires this — kept explicit as a
5772                    // safety floor), route the close through `downrec_
5773                    // close` INSTEAD of `self_link_kind`. The recorder
5774                    // synthesises the close marker from the most
5775                    // recent Op::Call at depth `cur_depth - 1`:
5776                    //   - `return_pc` = `call.pc + 1` (caller's resume
5777                    //     PC after the recursive call returns; mirror
5778                    //     of R3a's `caller_pc` derivation at the
5779                    //     depth>0 Op::Return capture path below).
5780                    //   - `target_proto` = `call.proto` (caller's
5781                    //     proto; equals `rec.head_proto` for self-
5782                    //     recursion).
5783                    //   - `depth_delta` = `1` (today's recorder always
5784                    //     unrolls one level; R3a uses the same
5785                    //     constant).
5786                    //
5787                    // The lowerer's `end_idx` picker (`trace.rs:3729`)
5788                    // routes through `TraceEnd::DownRec` ahead of the
5789                    // `self_link_kind` arm; the R3b/R3d lowerer arm
5790                    // emits the stitch-sentinel + caller-pc-guard
5791                    // scaffold. Single-candidate guard chain (sub-0's
5792                    // recorder produces 1 caller_pc candidate because
5793                    // `rec.retfs` is empty) keeps `dispatchable=false`
5794                    // + `"downrec-stitch-pending"` label (per R3d's
5795                    // `multi_way_candidate_count >= 2` gate at
5796                    // `trace.rs:7385`). Net behaviour: trace compiles
5797                    // under DownRec routing; interp runs the
5798                    // recursion naturally → result 317811.
5799                    //
5800                    // The `cur_depth >= 2` gate is automatically
5801                    // satisfied by the count > RECUNROLL_THRESHOLD=2
5802                    // trip condition (3 ancestor frames sharing
5803                    // head_proto implies cur_depth >= 3), kept
5804                    // explicit so a future RECUNROLL_THRESHOLD tweak
5805                    // doesn't silently flip shallow-recursion
5806                    // shapes (cur_depth == 1) onto the DownRec arm.
5807                    //
5808                    // R3.3+ sub-1/2/3/4 will replace the depth-baked
5809                    // op_offsets[] addressing with runtime base_var
5810                    // threading so the trace's recorded body is
5811                    // depth-relative and the DownRec dispatch
5812                    // becomes wall-clock-positive. Sub-0 is the
5813                    // routing scaffold; it does not aim for gain.
5814                    let _ = kind;
5815                    let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
5816                        let caller_depth_u8 = (cur_depth - 1) as u8;
5817                        if let Some(call_op) = rec.ops.iter().rev().find(|r| {
5818                            r.inline_depth == caller_depth_u8
5819                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
5820                        }) {
5821                            rec.downrec_close = Some(crate::jit::trace::DownRecClose {
5822                                return_pc: call_op.pc + 1,
5823                                target_proto: call_op.proto,
5824                                depth_delta: 1,
5825                            });
5826                            true
5827                        } else {
5828                            false
5829                        }
5830                    };
5831                    if relaxed_to_downrec {
5832                        // R2 close-cause taxonomy: tag the lift so
5833                        // probes can tally the fire rate. Mirrors
5834                        // R3a's `"downrec-restart"` bump for the
5835                        // depth>0 Op::Return path (different trip
5836                        // origin, same downstream routing). The
5837                        // existing `"self-link-retf-r1"` label still
5838                        // fires for trips that DON'T relax (no
5839                        // candidate Op::Call ancestor in rec.ops, or
5840                        // cur_depth < 2) via the lowerer's
5841                        // dispatch_off_reason mirror at the close
5842                        // handler — kept as a regression safety net.
5843                        self.jit
5844                            .counters
5845                            .bump_close_cause("selflink-yields-to-downrec");
5846                    } else {
5847                        rec.self_link_kind = Some(kind);
5848                    }
5849                }
5850                let should_close =
5851                    at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5852                if should_close {
5853                    // P13-S13-H — long-trace bias: a call-triggered
5854                    // recording that closed with a very short body
5855                    // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5856                    // binary_trees `make(0)`: 4 ops) is pathological.
5857                    // Compiling + caching it pins `Proto.traces` to a
5858                    // trace that the length gate will refuse to
5859                    // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5860                    // = 40`), AND blocks the back-edge / longer-call
5861                    // path from re-recording the same head_pc (the
5862                    // dedup `already_cached` check below short-
5863                    // circuits). The fix: discard the short call-
5864                    // triggered recording WITHOUT caching, and bias
5865                    // the proto's `call_hot_count` back to
5866                    // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5867                    // sequence of calls retries the trigger at a
5868                    // different (hopefully deeper) recursion point.
5869                    //
5870                    // Back-edge triggered traces are exempt — a
5871                    // tight numeric-for loop's body is legitimately
5872                    // 3 ops (`Add`, ForLoop) and DOES dispatch
5873                    // usefully when re-entered many times.
5874                    // P13-S13-H — coverage heuristic to detect
5875                    // pathologically partial call-triggered traces:
5876                    // for self-recursive / branchy protos like
5877                    // `fib` (~17 bytecode ops) or
5878                    // `binary_trees.make` (~26 ops), the recorder
5879                    // can fire at a BASE-case entry (`fib(0)` or
5880                    // `make(0)`) producing a 3–4 op trace that
5881                    // covers a tiny fraction of the proto's code.
5882                    // That trace is doomed by the length gate
5883                    // post-compile AND blocks any longer follow-up
5884                    // (the dedup `already_cached` check below). The
5885                    // fix: discard call-triggered closes where
5886                    // `rec.ops.len() * 2 < head_proto.code.len()`
5887                    // (less than half the proto's bytecode), so the
5888                    // back-edge / longer call path can take over.
5889                    //
5890                    // Why coverage > raw length:protos with
5891                    // intrinsically short bodies (closure
5892                    // factories: `Closure + Return1` = 2 ops,
5893                    // simple wrappers: `LoadI + Return1` = 2 ops)
5894                    // record 100% coverage even at length 2 — those
5895                    // ARE legitimately short and the closure /
5896                    // sunk-emit lowering paths (S7-A / S9-C) make
5897                    // them worth compiling. The heuristic admits
5898                    // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5899                    // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5900                    // ~26) get discarded.
5901                    //
5902                    // Back-edge triggered traces are unaffected —
5903                    // a tight numeric-for body legitimately covers
5904                    // 3 of ~3 proto ops it can dispatch from
5905                    // (`Add + ForLoop`) and the recorder fires on
5906                    // the back-edge, not call entry.
5907                    //
5908                    // `call_hot_count` is intentionally NOT reset
5909                    // (an earlier draft tried `THRESHOLD - 32` but
5910                    // caused active_trace contention with the
5911                    // outer back-edge trigger — see
5912                    // setlist_b_zero_with_call_c_zero_sunk_emits).
5913                    // We give up on dispatching the pathological
5914                    // shape on the same proto; the back-edge or a
5915                    // longer call path on a deeper recursion point
5916                    // can still record + cache a real trace.
5917                    let proto_code_len = rec.head_proto.code.len();
5918                    let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5919                    // P13-S13-I — per-Proto discard cap. The S13-H
5920                    // relaxed trigger condition (`c >= THRESHOLD &&
5921                    // !already_cached`) means a Proto whose every
5922                    // recording is partial-coverage will re-fire the
5923                    // trigger every call indefinitely (1500+ in
5924                    // `binary_trees`-pattern test). The cap stops
5925                    // discarding after `MAX_DISCARDS_PER_PROTO` —
5926                    // the next close falls through to compile (even
5927                    // if partial), caches the trace, and the
5928                    // `already_cached` short-circuit kills the
5929                    // storm. Dispatch may still be refused
5930                    // post-compile (length gate), but the recorder
5931                    // stops churning.
5932                    const MAX_DISCARDS_PER_PROTO: u32 = 5;
5933                    let prior_discards = rec.head_proto.trace_discard_count.get();
5934                    let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
5935                    // P13-S13-K — flip the `gave_up` flag the
5936                    // moment cap is reached (BEFORE the close-
5937                    // dispatching branch below). The trigger gates
5938                    // short-circuit on this flag, skipping the
5939                    // RefCell + linear `already_cached` scan on
5940                    // every subsequent call to this Proto. Useful
5941                    // for `binary_trees_pattern`-class loads where
5942                    // a single Proto sees ~20k calls post-cap.
5943                    if cap_reached
5944                        && rec.is_call_triggered
5945                        && is_partial_coverage
5946                        && !rec.head_proto.trace_gave_up.get()
5947                    {
5948                        rec.head_proto.trace_gave_up.set(true);
5949                    }
5950                    if rec.is_call_triggered && is_partial_coverage && !cap_reached {
5951                        // Tally as closed (for visibility) but DROP
5952                        // without compile/cache. Use the existing
5953                        // closed-lens accumulator so probes can
5954                        // observe the discarded shape.
5955                        // P13-S13-I — bump discard count BEFORE
5956                        // dropping the recording so the next
5957                        // close sees the updated counter.
5958                        rec.head_proto.trace_discard_count.set(prior_discards + 1);
5959                        self.jit.counters.closed += 1;
5960                        self.jit
5961                            .counters
5962                            .closed_lens
5963                            .push((rec.is_call_triggered, rec.ops.len()));
5964                        // v2.0 Track-R R2 — partial-coverage discard
5965                        // close path. Pre-R2 this site bumped `closed`
5966                        // + `closed_lens` (visibility) but no per-
5967                        // reason label, so probes couldn't separate a
5968                        // real successful close from a discard tally.
5969                        // Tag explicitly to make the recorder-side
5970                        // close-cause taxonomy single-source.
5971                        self.jit
5972                            .counters
5973                            .bump_close_cause("partial-coverage-discard");
5974                        self.jit.active_trace = None;
5975                        // Continue with interp loop — don't
5976                        // fall through to compile path.
5977                        // The op at `pc` hasn't dispatched yet;
5978                        // the outer loop iteration handles it.
5979                    } else {
5980                        rec.closed = true;
5981                        // P12-S2.C — detach the closed record, then try
5982                        // to compile it. Dedup by `head_pc`: a Proto
5983                        // already carrying a CompiledTrace for this PC
5984                        // skips recompile (the hot counter caps
5985                        // re-recording at `u32::MAX / 2` anyway, but
5986                        // explicit dedup keeps `Proto.traces` short
5987                        // for the S3 dispatcher's linear scan).
5988                        //
5989                        // No `Vm::run` change for failure: we just bump
5990                        // the failed counter and drop the record. S3
5991                        // will read `Proto.traces` to decide whether to
5992                        // dispatch — until then, this is bookkeeping.
5993                        let head_pc_val = rec.head_pc;
5994                        let closed_record = self
5995                            .jit
5996                            .active_trace
5997                            .take()
5998                            .expect("active_trace was Some this branch");
5999                        self.jit.counters.closed += 1;
6000                        self.jit
6001                            .counters
6002                            .closed_lens
6003                            .push((closed_record.is_call_triggered, closed_record.ops.len()));
6004                        // P12-S5-B fix: cache the trace on the
6005                        // recorder's *head proto*, not the current
6006                        // closure's proto. For non-recursive
6007                        // call-triggered traces, close fires after
6008                        // `Return1` pops the callee frame — `cl` at
6009                        // that point is the CALLER's closure, while
6010                        // `closed_record.head_proto` is the CALLEE's
6011                        // proto (the one we actually want the trace
6012                        // to be discoverable from on the next call).
6013                        // Self-recursive fib closed via depth-cap
6014                        // mid-recursion so `cl.proto == head_proto`
6015                        // happened to coincide — this fix makes that
6016                        // accidental coincidence intentional.
6017                        let head_proto = closed_record.head_proto;
6018                        let already_cached = head_proto
6019                            .traces
6020                            .borrow()
6021                            .iter()
6022                            .any(|t| t.head_pc == head_pc_val);
6023                        if !already_cached {
6024                            // Internal-loop = true: the trace runs in
6025                            // a native loop until a cmp side-exits, so
6026                            // the dispatcher's per-entry marshal cost
6027                            // amortizes across the whole run of
6028                            // iterations the loop's recorded direction
6029                            // stays valid. The lowerer auto-downgrades
6030                            // to one-shot for cmp-less or Call-truncating
6031                            // traces.
6032                            // P15-A v2-C-A6-5 — side traces MUST NOT
6033                            // internal-loop. The parent's recorded prefix
6034                            // (ops at PCs < side trace's head_pc) defines
6035                            // values for registers the child's body reads
6036                            // without re-writing each iter — e.g. for
6037                            // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6038                            // + R[11]` sets R[12], and the child trace
6039                            // (head_pc=24) re-runs `pc=20 Move R[1] =
6040                            // R[12]` each iter via its outer ForLoop
6041                            // internal-loop, ALWAYS reading the stale
6042                            // entry-time R[12]. The parent's Add never
6043                            // re-runs during child's loop, so R[1] gets
6044                            // pinned to one stale value. Force one-shot
6045                            // for side traces: each parent-exit round-
6046                            // trips through dispatcher → parent's Add
6047                            // runs → side trace runs ONE iter → return.
6048                            let opts = crate::jit::trace::CompileOptions {
6049                                internal_loop: closed_record.side_trace_parent.is_none(),
6050                                pre53: self.version() <= LuaVersion::Lua53,
6051                                aot: false,
6052                            };
6053                            // v1.1 A1 Session A — route through trace_compiler.
6054                            // v2.0 Track J sub-step J-B — split-borrow JitState
6055                            // so the trait method can take `&mut dyn JitStorage`.
6056                            let result = {
6057                                let jit = &mut self.jit;
6058                                let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6059                                jit.trace_compiler
6060                                    .try_compile_trace(storage, &closed_record, opts)
6061                            };
6062                            match result {
6063                                Some(mut ct) => {
6064                                    // P12-S5-A/B/C — tally Sinkable sites
6065                                    // + actually-sunk-emit sites + materialise
6066                                    // emit sites before moving `ct` into
6067                                    // Proto.traces.
6068                                    self.jit.counters.sinkable_seen +=
6069                                        ct.sinkable_sites_seen as u64;
6070                                    self.jit.counters.accum_bufferable_seen +=
6071                                        ct.accum_bufferable_seen as u64;
6072                                    self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6073                                    self.jit.counters.materialize_emit +=
6074                                        ct.materialize_emit_count as u64;
6075                                    self.jit.counters.closure_emit += ct.closure_seen as u64;
6076                                    if ct.is_inline_abort_close {
6077                                        self.jit.counters.inline_abort += 1;
6078                                    }
6079                                    // v2.0 Stage 7 polish 6 fire
6080                                    // experiment — split tally so a
6081                                    // probe can answer the AOT
6082                                    // `accepted_with_per_exit_inline`
6083                                    // gate's question at the JIT
6084                                    // surface too: how many compiled
6085                                    // traces emitted depth>0 cmp
6086                                    // side-exits, and how many of
6087                                    // those survived all the
6088                                    // `dispatchable = false` pins
6089                                    // (`InlineAbort-gate`,
6090                                    // `self-link-retf-r1`,
6091                                    // `downrec-stitch-pending`, etc.).
6092                                    if !ct.per_exit_inline.is_empty() {
6093                                        self.jit.counters.per_exit_inline_compiled += 1;
6094                                        if ct.dispatchable {
6095                                            self.jit.counters.per_exit_inline_dispatchable += 1;
6096                                        }
6097                                    }
6098                                    if let Some(reason) = ct.dispatch_off_reason {
6099                                        self.jit.counters.dispatch_off_reasons.push(reason);
6100                                        // v2.0 Track-R R2 — mirror
6101                                        // the ordered Vec push into
6102                                        // the per-reason HashMap so
6103                                        // probes can answer "how many
6104                                        // of each dispatch_off label
6105                                        // fired" in O(1) without
6106                                        // walking the Vec. Same
6107                                        // bucket as the recorder-side
6108                                        // abort/discard tags above.
6109                                        self.jit.counters.bump_close_cause(reason);
6110                                    }
6111                                    // v2.0 Track-R R3b — count
6112                                    // compiled traces that carry a
6113                                    // down-recursion stitch link.
6114                                    // Bumped here (not at the lowerer
6115                                    // emit site) because the Vm's
6116                                    // JitCounters live on the Vm,
6117                                    // and the lowerer doesn't have a
6118                                    // Vm handle. R3b's regression
6119                                    // pin reads this via
6120                                    // `Vm::trace_downrec_link_compiled_count`.
6121                                    if ct.downrec_link.is_some() {
6122                                        self.jit.counters.downrec_link_compiled += 1;
6123                                    }
6124                                    // v2.0 Track-R R3d — multi-way
6125                                    // guard emit counter. Bumped when
6126                                    // the lowerer's R3d arm collected
6127                                    // >= 2 distinct caller_pc candidates
6128                                    // and lifted `dispatchable=true`.
6129                                    // R3c's single-CMP shape stores
6130                                    // `1` here without bumping; non-
6131                                    // DownRec closes store `0`.
6132                                    if ct.downrec_multi_way_count >= 2 {
6133                                        self.jit.counters.multi_way_guard_emitted += 1;
6134                                    }
6135                                    // P15-A v2-A — side-trace finalisation.
6136                                    // Pin `dispatchable=false` so the
6137                                    // primary lookup `traces.find(|t|
6138                                    // t.head_pc == pc && t.dispatchable)`
6139                                    // never matches this entry — the
6140                                    // side trace is meant to be entered
6141                                    // ONLY through the parent's exit
6142                                    // indirection (v2-B/C IR), not the
6143                                    // back-edge / call-trigger paths.
6144                                    // Then write the entry fn ptr into
6145                                    // the parent's `exit_side_trace_ptrs`
6146                                    // slot so v2-B/C IR can read it.
6147                                    if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6148                                        closed_record.side_trace_parent
6149                                    {
6150                                        ct.dispatchable = false;
6151                                        let entry_ptr = ct.entry as *const () as *const u8;
6152                                        let _side_trace_head_pc = closed_record.head_pc;
6153                                        let parent_traces = parent_proto.traces.borrow();
6154                                        if let Some(parent_ct) = parent_traces
6155                                            .iter()
6156                                            .find(|t| t.head_pc == parent_head_pc)
6157                                        {
6158                                            // P15-A v2-C-A5-C — shape-match
6159                                            // gate. Find the parent's per-exit
6160                                            // tag snapshot at the wired exit
6161                                            // (inline / tag / global) and
6162                                            // check the child's entry_tags
6163                                            // match. If not, leave the cell
6164                                            // null + skip cache populate so
6165                                            // the future v2-C-A2 IR's
6166                                            // `call_indirect` stays inert at
6167                                            // this exit (the child's
6168                                            // shape-specialised IR would
6169                                            // mis-interpret raw bits the
6170                                            // parent writes to reg_state).
6171                                            let inline_n = parent_ct.per_exit_inline.len();
6172                                            let tags_n = parent_ct.per_exit_tags.len();
6173                                            let parent_exit_tags_slice: &[
6174                                            crate::jit::trace::ExitTag
6175                                        ] = if parent_exit_idx < inline_n {
6176                                            &parent_ct.per_exit_inline
6177                                                [parent_exit_idx]
6178                                                .exit_tags
6179                                        } else if parent_exit_idx
6180                                            < inline_n + tags_n
6181                                        {
6182                                            &parent_ct.per_exit_tags
6183                                                [parent_exit_idx - inline_n]
6184                                                .1
6185                                        } else {
6186                                            &parent_ct.exit_tags
6187                                        };
6188                                            let shape_ok =
6189                                                crate::jit::trace::exit_tags_match_entry_tags(
6190                                                    &ct.entry_tags,
6191                                                    parent_exit_tags_slice,
6192                                                    &parent_ct.entry_tags,
6193                                                );
6194                                            if !shape_ok {
6195                                                self.jit.counters.side_trace_shape_mismatch += 1;
6196                                            }
6197                                            // P15-A v2-C-A4 — write the child's
6198                                            // entry fn ptr to BOTH the legacy
6199                                            // v2-A `exit_side_trace_ptrs[idx]`
6200                                            // cell (kept so v2-A's
6201                                            // walk_any_side_ptr_non_null tests
6202                                            // stay green) AND the per-kind cell
6203                                            // whose heap address the parent's
6204                                            // IR baked (v2-C-A2). The IR-baked
6205                                            // cell is what the call_indirect
6206                                            // gate actually reads. Only write
6207                                            // when A5-C shape gate passes.
6208                                            if shape_ok {
6209                                                if let Some(cell) = parent_ct
6210                                                    .exit_side_trace_ptrs
6211                                                    .get(parent_exit_idx)
6212                                                {
6213                                                    cell.set(entry_ptr);
6214                                                }
6215                                                // Compute (kind, local) for the
6216                                                // IR-baked cell. Layout follows
6217                                                // exit_hit_counts: inline first,
6218                                                // then per_exit_tags, then the
6219                                                // global tail slot.
6220                                                let (sent_kind, sent_local) = if parent_exit_idx
6221                                                    < inline_n
6222                                                {
6223                                                    parent_ct.per_exit_inline[parent_exit_idx]
6224                                                        .side_trace_ptr
6225                                                        .set(entry_ptr);
6226                                                    (
6227                                                        crate::jit::trace::SIDE_SENT_KIND_INLINE,
6228                                                        parent_exit_idx as u32,
6229                                                    )
6230                                                } else if parent_exit_idx < inline_n + tags_n {
6231                                                    let local = parent_exit_idx - inline_n;
6232                                                    if let Some(b) =
6233                                                        parent_ct.tags_side_trace_ptrs.get(local)
6234                                                    {
6235                                                        b.set(entry_ptr);
6236                                                    }
6237                                                    (
6238                                                        crate::jit::trace::SIDE_SENT_KIND_TAG,
6239                                                        local as u32,
6240                                                    )
6241                                                } else {
6242                                                    parent_ct.global_side_trace_ptr.set(entry_ptr);
6243                                                    (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6244                                                };
6245                                                self.jit.counters.side_trace_compiled += 1;
6246                                                // P15-A v2-D-A8 — flip the
6247                                                // parent's fast-path hint so
6248                                                // the dispatcher knows to do
6249                                                // the tentative decode + cell
6250                                                // check on subsequent
6251                                                // dispatches. Set once and
6252                                                // stays true (we never unwire
6253                                                // a side trace today).
6254                                                parent_ct.has_any_side_wired.set(true);
6255
6256                                                // P15-A v2-C-A1/A4 — populate
6257                                                // the O(1) lookup cache the
6258                                                // dispatcher consults on
6259                                                // sentinel-bit-set returns.
6260                                                // Key is the encoded sentinel
6261                                                // (same encoding the IR ORs
6262                                                // into bits 56..=62 of the
6263                                                // child's i64 return).
6264                                                let sentinel =
6265                                                    crate::jit::trace::encode_side_sentinel(
6266                                                        sent_kind, sent_local,
6267                                                    );
6268                                                let predicted_idx = if std::ptr::eq(
6269                                                    parent_proto.as_ptr(),
6270                                                    head_proto.as_ptr(),
6271                                                ) {
6272                                                    parent_traces.len() as u32
6273                                                } else {
6274                                                    head_proto.traces.borrow().len() as u32
6275                                                };
6276                                                parent_ct
6277                                                    .side_trace_cache
6278                                                    .borrow_mut()
6279                                                    .insert(sentinel, predicted_idx);
6280                                            }
6281                                        }
6282                                        drop(parent_traces);
6283                                    }
6284                                    head_proto.traces.borrow_mut().push(TArc::new(ct));
6285                                    self.jit.counters.compiled += 1;
6286                                }
6287                                None => {
6288                                    self.jit.counters.compile_failed += 1;
6289                                    self.jit
6290                                        .counters
6291                                        .compile_failed_reasons
6292                                        .push(self.jit.trace_compiler.last_compile_checkpoint());
6293                                }
6294                            }
6295                        }
6296                    } // P13-S13-H — close the long-trace-bias else branch
6297                } else {
6298                    // P12-S4-step1 + step4a — depth-aware push at the
6299                    // current `cur_depth`. The `depth_cap_hit` /
6300                    // `returned_past_head` early-exit is handled by
6301                    // the `should_close` branch above; reaching here
6302                    // means `cur_depth <= MAX_INLINE_DEPTH` and the
6303                    // trace head's frame is still live.
6304                    let depth_u8 = cur_depth as u8;
6305                    if depth_u8 > self.jit.max_depth_seen {
6306                        self.jit.max_depth_seen = depth_u8;
6307                    }
6308                    // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6309                    // return / variable return count). Recorder pushed
6310                    // it with var_count=None before the call dispatched;
6311                    // now that the call has returned and we're about to
6312                    // push the next op, top reflects the actual return
6313                    // count. Snapshot top - (caller.base + call.a).
6314                    if let Some(last) = rec.ops.last_mut()
6315                        && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6316                        && last.inst.c() == 0
6317                        && last.var_count.is_none()
6318                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6319                    {
6320                        let from = f.base + last.inst.a();
6321                        if self.top >= from {
6322                            last.var_count = Some(self.top - from);
6323                        }
6324                    }
6325                    // P12-S9-A/C — for SetList B=0, snapshot the source
6326                    // count = top - A - 1 (mirrors Lua's `n = top - ra
6327                    // - 1` from lvm.c OP_SETLIST). Sources are
6328                    // R[A+1..top), exclusive top. For Call C=0's
6329                    // var_count (the return count = top - A inclusive),
6330                    // see the prior-op fix-up above; here we
6331                    // initialise the current Call op to None and let
6332                    // the fix-up on the next op's push populate it.
6333                    let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6334                        && inst.b() == 0
6335                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6336                    {
6337                        let from = f.base + inst.a();
6338                        if self.top > from {
6339                            Some(self.top - from - 1)
6340                        } else {
6341                            None
6342                        }
6343                    } else {
6344                        None
6345                    };
6346                    let op = crate::jit::trace::RecordedOp {
6347                        proto: cl.proto,
6348                        pc,
6349                        inst,
6350                        inline_depth: depth_u8,
6351                        var_count,
6352                    };
6353                    // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6354                    // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6355                    // Captured as a side-channel `RetfRecord` parallel to
6356                    // `ops` when `p16_self_link_enabled` is on. R3's
6357                    // down-rec stitch consumes these to guard side-trace
6358                    // inlined-frame topology against the recorded shape.
6359                    // Gated on the same flag as the cycle catch so the
6360                    // ship-default path (p16 off) sees zero behavior
6361                    // change. `caller_pc` is the recorded enclosing Call's
6362                    // pc + 1 — interp's resume point after the inlined
6363                    // frame pops.
6364                    if self.jit.p16_self_link_enabled
6365                        && depth_u8 > 0
6366                        && matches!(
6367                            inst.op(),
6368                            crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6369                        )
6370                    {
6371                        let results: u8 = match inst.op() {
6372                            crate::vm::isa::Op::Return0 => 0,
6373                            crate::vm::isa::Op::Return1 => 1,
6374                            _ => 0,
6375                        };
6376                        // Most recent Op::Call recorded at the caller's
6377                        // depth (`depth_u8 - 1`) is the frame this Return
6378                        // is unwinding from. Reverse scan stops at the
6379                        // first match.
6380                        let caller_depth = depth_u8 - 1;
6381                        let caller_call = rec.ops.iter().rev().find(|r| {
6382                            r.inline_depth == caller_depth
6383                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6384                        });
6385                        let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6386                        // v2.0 Track-R R3a — capture the caller's proto
6387                        // for the RetfRecord. LuaJIT `IR_RETF.op1`
6388                        // equivalent. For fib(28) the caller's proto
6389                        // equals the trace head; for future mutual
6390                        // recursion the recorded Op::Call's proto is the
6391                        // right target. Fallback to head_proto when no
6392                        // enclosing Call op was captured (mirrors
6393                        // `caller_pc`'s fallback to the Return's own pc).
6394                        let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6395                        rec.retfs.push(crate::jit::trace::RetfRecord {
6396                            from_depth: depth_u8,
6397                            to_depth: caller_depth,
6398                            results,
6399                            caller_pc,
6400                            proto: caller_proto,
6401                        });
6402                        // v2.0 Track-R R3a — DownRec close trigger:
6403                        // count RetfRecords on this recording whose
6404                        // `proto` matches `caller_proto` (LuaJIT
6405                        // `check_downrec_unroll` chain filter
6406                        // `op1 == ptref`). Threshold mirrors
6407                        // RECUNROLL_THRESHOLD; first trip stamps the
6408                        // `downrec_close` marker, subsequent retfs
6409                        // keep the marker without overwrite. The
6410                        // lowerer's end_idx picker routes through
6411                        // TraceEnd::DownRec when the marker is set;
6412                        // R3a's tail emit still falls through to R1's
6413                        // safe deopt path so fib(28) result stays
6414                        // 317_811. R3b lifts.
6415                        if rec.downrec_close.is_none() {
6416                            let caller_proto_ptr = caller_proto.as_ptr();
6417                            let prior_match_count = rec
6418                                .retfs
6419                                .iter()
6420                                .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6421                                .count();
6422                            // Strictly-greater-than threshold matches
6423                            // LuaJIT `count + J->tailcalled > recunroll`.
6424                            // The newly-pushed retf is already counted.
6425                            if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6426                                rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6427                                    return_pc: caller_pc,
6428                                    target_proto: caller_proto,
6429                                    depth_delta: 1,
6430                                });
6431                                // R2 close-cause taxonomy: tag the
6432                                // restart with `"downrec-restart"`. R3b
6433                                // adds `"downrec-stitch-failed"` when
6434                                // the lifted back-edge falls back to
6435                                // deopt.
6436                                self.jit.counters.bump_close_cause("downrec-restart");
6437                            }
6438                        }
6439                    }
6440                    // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6441                    // FIRST eligible Op::GetField site under env-gate
6442                    // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6443                    //   - R[B] is Value::Table with metatable.is_none()
6444                    //   - K[C] is Value::Str
6445                    //   - The string key actually occupies a hash slot
6446                    //     (so the IC's slot_idx is a real index, not
6447                    //     a probe sentinel).
6448                    // Once captured, subsequent GetFields skip this
6449                    // logic (rec.field_ic_snapshot.is_some() short-
6450                    // circuits). Env-OFF short-circuits on the cached
6451                    // atomic check inside field_ic_enabled().
6452                    if rec.field_ic_snapshot.is_none()
6453                        && matches!(inst.op(), crate::vm::isa::Op::GetField)
6454                        && crate::jit::trace_types::field_ic_enabled()
6455                    {
6456                        let b = inst.b();
6457                        let c_idx = inst.c() as usize;
6458                        let r_b = self.stack[(base + b) as usize];
6459                        if let Value::Table(g) = r_b
6460                            && g.metatable().is_none()
6461                            && c_idx < cl.proto.consts.len()
6462                            && let Value::Str(s) = cl.proto.consts[c_idx]
6463                        {
6464                            let key = Value::Str(s);
6465                            let tbl_ref = &*g;
6466                            if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6467                                && let Some(val) = tbl_ref.node_val_at(slot_idx)
6468                            {
6469                                let op_idx = rec.ops.len() as u32;
6470                                rec.field_ic_snapshot =
6471                                    Some(crate::jit::trace_types::FieldIcSnapshot {
6472                                        op_idx,
6473                                        nodes_len: tbl_ref.nodes_capacity() as u64,
6474                                        slot_idx: slot_idx as u64,
6475                                        key_ptr_bits: s.as_ptr() as u64,
6476                                        cached_val_tag: val.tag_byte(),
6477                                    });
6478                                self.jit.counters.field_ic_snapshot_captured += 1;
6479                            }
6480                        }
6481                    }
6482                    if !rec.push(op) {
6483                        // v2.0 Track-R R2 — recorder overflow
6484                        // (MAX_TRACE_LEN). Pre-R2 this site bumped
6485                        // `aborted` with no reason label, leaving the
6486                        // overflow indistinguishable from any other
6487                        // abort cause that might be added later.
6488                        // Tag it explicitly under the close-cause
6489                        // bucket so probes can tally overflow vs
6490                        // other abort causes in O(1).
6491                        self.jit.active_trace = None;
6492                        self.jit.counters.aborted += 1;
6493                        self.jit.counters.bump_close_cause("trace-overflow");
6494                    }
6495                }
6496            }
6497
6498            // P12-S3 — trace JIT dispatcher.
6499            //
6500            // When the dispatch loop is about to execute the op at
6501            // `pc` and there's a `numeric_only` CompiledTrace cached
6502            // for that `head_pc`, marshal the live regs into an
6503            // i64 buffer, jump into the trace, and resume the
6504            // interpreter at the returned continuation PC.
6505            //
6506            // Skipped (zero overhead) when `trace_jit_enabled` is
6507            // false; the lookup is a borrow + scan over
6508            // `cl.proto.traces`, which is a `Vec` whose size is at
6509            // most one entry per back-edge per Proto in practice.
6510            //
6511            // Marshalling contract — only Int slots survive the
6512            // round-trip cleanly (the reg_state ABI is `*mut i64`
6513            // with no tag info). Any non-Int slot in the affected
6514            // window forces a skip; interp takes over for one op
6515            // and the back-edge brings us back to try again next
6516            // pass (slots that were Nil/Float at one moment can
6517            // settle to Int by the time the next back-edge fires).
6518            //
6519            // A trace that comes back with `vm.jit.pending_err`
6520            // parked is treated as a deopt: clear the err, leave
6521            // the stack as the trace wrote it, and let the
6522            // interpreter run from the same `pc`. The trace itself
6523            // is left cached — a future entry might find no
6524            // metatable in the way and succeed.
6525            // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6526            // of 6 per-field Rc clones. proto.traces is now
6527            // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6528            // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6529            // operations per dispatch × 434k = ~2.2M Rc atomic ops
6530            // (~1-2% gain measured separately).
6531            // v2.0 Track-R R3c — one-shot consume of the
6532            // `suppress_downrec_admit_once` flag. Set by the R3c
6533            // downrec post-invoke arm below when it force-deopts the
6534            // trace (caller-pc guard miss OR cycle-budget exhausted)
6535            // so the NEXT interpreter loop iteration skips the
6536            // downrec admit, lets interp run the op at `head_pc`,
6537            // advances `pc` past `head_pc`, and breaks the otherwise-
6538            // infinite admit loop. Reading + clearing here means a
6539            // single dispatch tick consumes the suppression — the
6540            // following tick re-admits naturally (with the budget
6541            // also reset by the deopt site).
6542            let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6543            self.jit.suppress_downrec_admit_once = false;
6544            if self.jit.trace_enabled
6545                && let Some(ct) = {
6546                    let traces = cl.proto.traces.borrow();
6547                    traces
6548                        .iter()
6549                        .find(|t| {
6550                            if t.head_pc != pc {
6551                                return false;
6552                            }
6553                            let is_downrec = t.downrec_link.is_some();
6554                            // v2.0 Track-R R3c — the one-shot suppress
6555                            // flag blocks any admit (primary or fallback)
6556                            // for `downrec_link`-bearing traces so the
6557                            // next interp iter can run the natural op
6558                            // at `head_pc` and advance past it. R3d's
6559                            // `dispatchable=true` lift means the suppress
6560                            // must also cover the primary `t.dispatchable`
6561                            // arm — otherwise the lifted lookup would
6562                            // immediately re-admit after a force-deopt
6563                            // and the infinite loop returns.
6564                            if is_downrec && downrec_admit_blocked {
6565                                return false;
6566                            }
6567                            // Primary arm: `dispatchable=true` traces
6568                            // (R3d-lifted DownRec or normal traces).
6569                            // Fallback arm: R3c-shape `dispatchable=false`
6570                            // DownRec traces (single-CMP guard kept
6571                            // pinned because the 90% miss-rate would
6572                            // make blind admit perf-negative).
6573                            t.dispatchable || is_downrec
6574                        })
6575                        .cloned()
6576                }
6577            {
6578                // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6579                // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6580                // across the entire dispatch block so the fields outlive
6581                // all consumers. Saves 5 Rc::clone per dispatch.
6582                let entry_fn = ct.entry;
6583                let head_pc_val = ct.head_pc;
6584                let window_size = ct.window_size;
6585                let exit_tags = &ct.exit_tags;
6586                let per_exit_tags = &ct.per_exit_tags;
6587                let per_exit_inline = &ct.per_exit_inline;
6588                let compile_entry_tags = &ct.entry_tags;
6589                let global_tag_res_kind = ct.global_tag_res_kind;
6590                let exit_hit_counts = &ct.exit_hit_counts;
6591                let max_stack = cl.proto.max_stack as usize;
6592                let window_size_us = window_size as usize;
6593                let base_us = base as usize;
6594                // P12-S4-step3a — `reg_state` sized to the trace's
6595                // `window_size`, which today equals max_stack but
6596                // S4-step3b will expand for inlined frames.
6597                // Marshal-in still only writes [0..max_stack); slots
6598                // [max_stack..window_size) are zero-initialised and
6599                // filled by the trace's own GetUpval / arith.
6600                // P13-S13-D — reuse the Vm's amortised buffers
6601                // instead of allocating fresh Vecs each dispatch.
6602                // mem::take leaves an empty placeholder we restore
6603                // at the end of the dispatch block (success +
6604                // deopt paths both fall through to the restore).
6605                let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6606                entry_tags.clear();
6607                entry_tags.reserve(max_stack);
6608                // v2.0 Track-R R3c — this trace was admitted via the
6609                // `downrec_link.is_some()` arm rather than the normal
6610                // `dispatchable=true` arm. The pre-invoke path
6611                // populates a reserved saved-PC slot just past the
6612                // normal register window so R3b's lowerer guard load
6613                // (`reg_state[window_size]`) compares the runtime
6614                // saved caller PC against the recorded `dr_return_pc`.
6615                //
6616                // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6617                // gate. After R3d lifts `dispatchable = true` for
6618                // multi-way guards, the trace's body still emits the
6619                // R3b/R3d sentinel shape on return — the saved-PC slot
6620                // and post-invoke classifier must keep firing.
6621                // `downrec_link.is_some()` is the unique structural
6622                // signal that the trace closes via DownRec.
6623                let is_downrec_entry = ct.downrec_link.is_some();
6624                let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6625                reg_state.clear();
6626                // v2.0 Track-R R3c — when admitting a downrec trace,
6627                // size the buffer to `window_size + 1` so the lowerer
6628                // can `load(I64, ..., reg_state, window_size * 8)`
6629                // for the saved caller PC guard input. The extra slot
6630                // is the LAST element so cranelift's existing
6631                // `0..window_size` accesses are unaffected.
6632                let reg_state_len = if is_downrec_entry {
6633                    window_size_us + 1
6634                } else {
6635                    window_size_us
6636                };
6637                reg_state.resize(reg_state_len, 0i64);
6638                let mut dispatch_ok = true;
6639                for i in 0..max_stack {
6640                    let v = self.stack[base_us + i];
6641                    let (tag, raw) = v.unpack();
6642                    entry_tags.push(tag);
6643                    // P12-S12-C v3 — entry tag guard. The trace's IR
6644                    // is specialised to the compile-time entry tags
6645                    // (via current_kinds propagation from
6646                    // from_entry_tag). A runtime tag mismatch means
6647                    // body ops would mis-interpret raw bits (e.g.
6648                    // treat a Str pointer as Int payload → garbage).
6649                    // Skip dispatch on mismatch so interp handles
6650                    // this entry shape; the trace stays cached for
6651                    // future entries that match.
6652                    if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6653                        dispatch_ok = false;
6654                        break;
6655                    }
6656                    match tag {
6657                        // Int / Float / Table / Nil all marshal
6658                        // to raw payload cleanly; the trace's IR
6659                        // treats the 8-byte slot as an i64 (with
6660                        // f64 ops bitcasting around the boundary).
6661                        crate::runtime::value::raw::INT
6662                        | crate::runtime::value::raw::FLOAT
6663                        | crate::runtime::value::raw::TABLE
6664                        | crate::runtime::value::raw::CLOSURE
6665                        // P12-S12-B-v2 — Native iter slots (e.g.
6666                        // R[A] = ipairs_iter) are present in
6667                        // generic-for traces; the raw bits are a
6668                        // valid `*mut NativeClosure` and round-trip
6669                        // cleanly.
6670                        | crate::runtime::value::raw::NATIVE
6671                        // P12-S12-C v1 — Str slots show up in
6672                        // string-concat traces; raw bits = `*mut
6673                        // LuaStr` (interned, GC-managed). Round-
6674                        // trips cleanly as a heap pointer.
6675                        | crate::runtime::value::raw::STR
6676                        | crate::runtime::value::raw::NIL => {
6677                            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6678                            reg_state[i] = unsafe { raw.zero as i64 };
6679                        }
6680                        _ => {
6681                            dispatch_ok = false;
6682                            break;
6683                        }
6684                    }
6685                }
6686
6687                if dispatch_ok {
6688                    debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6689                    self.jit.pending_err = None;
6690                    // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6691                    // count. A cmp@d>0 side-exit calls the materialize
6692                    // helper which pushes inlined frames onto
6693                    // `vm.frames`; on deopt those frames must be popped
6694                    // before falling through to the interpreter, else
6695                    // the stack grows unboundedly per deopted dispatch.
6696                    let pre_frames = self.frames.len();
6697                    // v2.0 Track-R R3c — saved-PC slot population. The
6698                    // recorded `dr_return_pc` on the closing trace is
6699                    // the caller's resume PC captured at a depth>0
6700                    // Return push (recorder push site, see R3a verdict
6701                    // §3). The natural runtime analogue for self-
6702                    // stitch is the dispatching frame's PARENT frame's
6703                    // PC: the trace's head_pc sits inside a Lua frame,
6704                    // and the parent (caller) frame's `pc` is what
6705                    // luna would observe as `[base-8]` in the LJ
6706                    // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6707                    // the parent isn't a Lua frame (top-level dispatch
6708                    // — first invocation through `call_value`), no
6709                    // saved PC exists; we write 0, which always
6710                    // mismatches the recorded `dr_return_pc != 0`
6711                    // invariant pinned by R3b
6712                    // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6713                    // debug_assert!(dr_return_pc != 0, ...)`).
6714                    if is_downrec_entry {
6715                        let saved_pc: i64 = if pre_frames >= 2 {
6716                            match &self.frames[pre_frames - 2] {
6717                                CallFrame::Lua(parent) => parent.pc as i64,
6718                                CallFrame::Cont(_) => 0,
6719                            }
6720                        } else {
6721                            0
6722                        };
6723                        reg_state[window_size_us] = saved_pc;
6724                    }
6725                    // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6726                    // diagnostic hook. The probe fires once per trace dispatch
6727                    // (regardless of JIT vs AOT origin — both go through this
6728                    // arm), letting the AOT smoke test verify mcode actually
6729                    // executed. Guarded behind `OnceLock` so the env read is
6730                    // a one-time cost per process; not gated on a particular
6731                    // counter so the smoke test gets a deterministic single-
6732                    // line `aot_trace_fired pc=N` per first dispatch.
6733                    if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6734                        eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6735                    }
6736                    let continuation_pc = {
6737                        // v1.1 A1 Session A — chunk_compiler.enter
6738                        // (CraneliftBackend delegates to enter_jit;
6739                        // NullJitBackend returns an inert guard).
6740                        let vm_ptr: *mut Vm = self;
6741                        let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6742                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6743                        unsafe { entry_fn(reg_state.as_mut_ptr()) }
6744                    };
6745                    self.jit.counters.dispatched += 1;
6746
6747                    if self.jit.pending_err.is_some() {
6748                        self.jit.pending_err = None;
6749                        self.jit.counters.deopt += 1;
6750                        // P12-S4-step4b-C-2 — unwind any helper-pushed
6751                        // inlined frames before the interpreter resumes.
6752                        // Don't restore reg_state — the trace's partial
6753                        // writes are discarded; interp re-executes from
6754                        // the original `pc`.
6755                        while self.frames.len() > pre_frames {
6756                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6757                        }
6758                        if is_downrec_entry {
6759                            // v2.0 Track-R R3c — pending_err observed
6760                            // mid-trace inside a downrec admit. Treat
6761                            // it as a guard miss: bump `downrec_deopt`
6762                            // and suppress the next downrec admit so
6763                            // interp can advance past `head_pc` and
6764                            // the same trace doesn't immediately re-
6765                            // fire on the next loop iteration.
6766                            self.jit.counters.downrec_deopt += 1;
6767                            self.jit.suppress_downrec_admit_once = true;
6768                        }
6769                    } else if is_downrec_entry && {
6770                        // v2.0 Track-R R3d — only enter the R3c/R3d
6771                        // downrec classifier for returns whose shape
6772                        // matches the lowerer's `downrec_idx_opt` tail
6773                        // emit: either the stitch_blk DOWNREC sentinel
6774                        // (HIT) or the deopt_blk GLOBAL-sentinel-with-
6775                        // body==head_pc (MISS via guard fail). Any
6776                        // other return from a downrec trace (intermediate
6777                        // body cmp side-exit, GetField inference fail,
6778                        // etc.) carries a different sentinel/body shape
6779                        // and means the body exited BEFORE reaching the
6780                        // downrec close — classify those through the
6781                        // normal decode path (else branch below) so
6782                        // reg_state restores + pc advances correctly.
6783                        // The pre-R3d behavior (R3c) classified them all
6784                        // as MISS and skipped the normal restore, which
6785                        // inflated `downrec_deopt` with non-downrec
6786                        // events and lost the trace's mid-flight writes.
6787                        let raw_ret = continuation_pc as u64;
6788                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6789                        let sentinel_code = if from_side_trace {
6790                            ((raw_ret >> 56) & 0x7F) as u32
6791                        } else {
6792                            0
6793                        };
6794                        let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6795                        let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
6796                            crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
6797                            0,
6798                        );
6799                        from_side_trace
6800                            && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
6801                                || (sentinel_code == global_deopt_code
6802                                    && raw_body == head_pc_val as u64))
6803                    } {
6804                        // R3d downrec event classifier.
6805                        let raw_ret = continuation_pc as u64;
6806                        let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6807                        if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
6808                            // Guard HIT — saved_pc matched one of the
6809                            // baked candidates and the trace's
6810                            // `stitch_blk` arm returned the DOWNREC
6811                            // sentinel. Cycle-safety checkpoint:
6812                            // decrement budget; on underflow,
6813                            // reclassify as deopt + reset budget.
6814                            // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
6815                            // ~all natural HITs in a hot loop fire
6816                            // before reset pressure.
6817                            if self.jit.stitch_depth_remaining > 0 {
6818                                self.jit.stitch_depth_remaining -= 1;
6819                                self.jit.counters.downrec_dispatched += 1;
6820                            } else {
6821                                self.jit.counters.downrec_deopt += 1;
6822                                self.jit.stitch_depth_remaining =
6823                                    crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
6824                            }
6825                        } else {
6826                            // Guard MISS via the lowerer's deopt_blk
6827                            // arm (GLOBAL sentinel + body == head_pc).
6828                            // The deopt_blk emit performs the
6829                            // store-back via `emit_store_back_and_return_pc`,
6830                            // so the live stack already reflects the
6831                            // body's writes; no extra restore needed
6832                            // from the dispatcher side.
6833                            self.jit.counters.downrec_deopt += 1;
6834                        }
6835                        self.jit.suppress_downrec_admit_once = true;
6836                        // Pop helper-pushed inlined frames (defensive —
6837                        // R3d's emit shape doesn't push frames in the
6838                        // tail, but a body side-exit before reaching
6839                        // the tail may have via the materialize helper).
6840                        while self.frames.len() > pre_frames {
6841                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6842                        }
6843                        self.jit.reg_state_buf = reg_state;
6844                        self.jit.entry_tags_buf = entry_tags;
6845                        continue;
6846                    } else {
6847                        // Restore each slot using the trace's
6848                        // exit-tag analysis (see ExitTag docs).
6849                        // P12-S4-step4b-C-2 — decode the IR's
6850                        // side-exit shape. Upper 32 bits = (site_idx
6851                        // + 1) for inline cmp side-exits, 0 for
6852                        // legacy clean-tail / non-inline exits.
6853                        // P15-A v2-C-A0 — decode lives in
6854                        // `crate::jit::trace::decode_exit_shape` so
6855                        // v2-C-A3 can reuse it with the SIDE TRACE's
6856                        // shape inputs when the sentinel bit
6857                        // (v2-C-A2) is set on `raw_ret`.
6858                        let raw_ret = continuation_pc as u64;
6859                        // P15-A v2-C-A3 — side-trace return decode.
6860                        // Bit 63 of `raw_ret` is the side-trace
6861                        // marker the parent's IR OR'd in when it
6862                        // tail-called into a wired child trace.
6863                        // Bits 56..=62 carry the sentinel code (the
6864                        // cache key into the parent's
6865                        // `side_trace_cache`); bits 0..=55 are the
6866                        // child's own return value (encoded site or
6867                        // plain cont_pc) which we MUST decode using
6868                        // the CHILD's per_exit_inline / per_exit_tags
6869                        // / exit_tags / exit_hit_counts — not the
6870                        // parent's. The dispatcher snapshot read
6871                        // above holds the parent's shapes; when bit
6872                        // 63 is set we re-fetch the child's via the
6873                        // sentinel-keyed cache.
6874                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6875                        let (
6876                            decode_inline,
6877                            decode_tags,
6878                            decode_exit_tags,
6879                            decode_hit_counts,
6880                            decode_body,
6881                        ) = if from_side_trace {
6882                            let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6883                            let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6884                            let traces = cl.proto.traces.borrow();
6885                            let child_idx = traces
6886                                .iter()
6887                                .find(|t| t.head_pc == head_pc_val)
6888                                .and_then(|pct| {
6889                                    pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6890                                });
6891                            if let Some(idx) = child_idx
6892                                && let Some(child) = traces.get(idx as usize)
6893                            {
6894                                if crate::jit::trace::v2c_probe_enabled() {
6895                                    eprintln!(
6896                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6897                                        sentinel_code,
6898                                        body,
6899                                        idx,
6900                                        child.n_ops,
6901                                        child.head_pc,
6902                                        child.window_size,
6903                                        pc,
6904                                        window_size,
6905                                        child.dispatchable,
6906                                        child.is_inline_abort_close,
6907                                    );
6908                                }
6909                                (
6910                                    child.per_exit_inline.clone(),
6911                                    child.per_exit_tags.clone(),
6912                                    child.exit_tags.clone(),
6913                                    child.exit_hit_counts.clone(),
6914                                    body,
6915                                )
6916                            } else {
6917                                if crate::jit::trace::v2c_probe_enabled() {
6918                                    eprintln!(
6919                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6920                                        sentinel_code, body,
6921                                    );
6922                                }
6923                                // Cache miss — fall back to parent
6924                                // shapes with the body bits. Best-
6925                                // effort; the trace_side_trace_
6926                                // shape_mismatch_count records this
6927                                // path indirectly (close-handler
6928                                // skips wiring on mismatch so we
6929                                // shouldn't reach here when shape
6930                                // gate held).
6931                                (
6932                                    per_exit_inline.clone(),
6933                                    per_exit_tags.clone(),
6934                                    exit_tags.clone(),
6935                                    exit_hit_counts.clone(),
6936                                    body,
6937                                )
6938                            }
6939                        } else {
6940                            // P15-A v2-D — dispatcher-level side-trace
6941                            // invocation. Replaces v2-C's universal IR
6942                            // gate (`load + icmp + brif` at every
6943                            // emit_store_back callsite, which A6/A7
6944                            // measured as a net perf regression).
6945                            // A8 fast-path: skip the tentative decode +
6946                            // child lookup entirely when `has_any_side
6947                            // _wired == false` (the common case until
6948                            // the first side trace compiles for this
6949                            // parent). For fib_10_x10k and other tight
6950                            // short-trace workloads where most parent
6951                            // traces never get a wired child, this
6952                            // collapses the v2-D overhead to a single
6953                            // `Cell::get()` on the cold path.
6954                            // A8-revert: A8 had `parent_has_side` short-
6955                            // circuit + snapshot hoist; mini N=3 showed
6956                            // A8 lost the btrees_d8 1.02× win (dropped
6957                            // to 0.95×) WITHOUT helping fib_10 (same
6958                            // 0.86×). Drop A8 — accept the always-run
6959                            // v2-D path; the tentative decode + cell
6960                            // load is cheaper than the cost A8 added.
6961                            {
6962                                let tentative = crate::jit::trace::decode_exit_shape(
6963                                    raw_ret,
6964                                    per_exit_inline,
6965                                    per_exit_tags,
6966                                    exit_tags,
6967                                );
6968                                let tentative_exit_idx = tentative.exit_hit_idx;
6969                                let child_invoke = {
6970                                    let traces = cl.proto.traces.borrow();
6971                                    traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
6972                                        |pct| {
6973                                            let cell =
6974                                                pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
6975                                            let fn_ptr = cell.get();
6976                                            if fn_ptr.is_null() {
6977                                                return None;
6978                                            }
6979                                            traces
6980                                                .iter()
6981                                                .find(|t| {
6982                                                    t.entry as *const () as *const u8 == fn_ptr
6983                                                })
6984                                                .map(|child| {
6985                                                    (
6986                                                        child.entry,
6987                                                        child.per_exit_inline.clone(),
6988                                                        child.per_exit_tags.clone(),
6989                                                        child.exit_tags.clone(),
6990                                                        child.exit_hit_counts.clone(),
6991                                                    )
6992                                                })
6993                                        },
6994                                    )
6995                                };
6996                                if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
6997                                    let child_raw_ret = {
6998                                        // v1.1 A1 Session A — chunk_compiler.enter
6999                                        // (side-trace entry).
7000                                        let vm_ptr: *mut Vm = self;
7001                                        let _guard =
7002                                            self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7003                                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7004                                        unsafe { cent(reg_state.as_mut_ptr()) }
7005                                    };
7006                                    (cpi, cpt, cet, chc, child_raw_ret as u64)
7007                                } else {
7008                                    (
7009                                        per_exit_inline.clone(),
7010                                        per_exit_tags.clone(),
7011                                        exit_tags.clone(),
7012                                        exit_hit_counts.clone(),
7013                                        raw_ret,
7014                                    )
7015                                }
7016                            }
7017                        };
7018                        let decoded = crate::jit::trace::decode_exit_shape(
7019                            decode_body,
7020                            &decode_inline,
7021                            &decode_tags,
7022                            &decode_exit_tags,
7023                        );
7024                        let site_id = decoded.site_id;
7025                        let cont_pc = decoded.cont_pc;
7026                        let exit_hit_idx = decoded.exit_hit_idx;
7027                        let exit_tags_for_pc = decoded.exit_tags_for_pc;
7028                        // P15-A v2-C-A3 — for side-trace returns
7029                        // force using_global_exit_tags=false so the
7030                        // restore loop always takes the per-tag slow
7031                        // path (the child's global_tag_res_kind
7032                        // classification isn't plumbed through yet
7033                        // — TODO for a future polish step).
7034                        let using_global_exit_tags = if from_side_trace {
7035                            false
7036                        } else {
7037                            decoded.using_global_exit_tags
7038                        };
7039                        // P15-prep — increment the counter (saturate
7040                        // at u32::MAX to avoid wrap on long runs).
7041                        // P15-A v1 — track whether this increment is
7042                        // the one that crossed `HOTEXIT_THRESHOLD`
7043                        // (transition: previous v < threshold, new v
7044                        // == threshold). The side-trace start is
7045                        // deferred to just before `continue;` so
7046                        // vm.stack and frame.pc are fully restored
7047                        // (the snapshot reads post-restore values).
7048                        let mut side_trace_should_start = false;
7049                        // P15-A v2-C-A3 — for side-trace returns the
7050                        // counter to bump is the CHILD's (decoded
7051                        // shape lookup) — `exit_hit_idx` is into the
7052                        // decoded layout, so use the matching
7053                        // `decode_hit_counts`. For parent decode
7054                        // they're aliased (clone of the parent's
7055                        // own Rc).
7056                        if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7057                            let v = c.get();
7058                            if v < u32::MAX {
7059                                c.set(v + 1);
7060                            }
7061                            if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7062                                && self.jit.active_trace.is_none()
7063                                && self.jit.trace_enabled
7064                            {
7065                                side_trace_should_start = true;
7066                            }
7067                        }
7068                        // P12-S4-step4b-C-2 — at an inline cmp@d>0
7069                        // side-exit, the helper has pushed N frames on
7070                        // top of the trace head's frame and
7071                        // `exit_tags_for_pc.len()` covers the full
7072                        // window (caller + each inlined frame's
7073                        // window). Slots beyond `max_stack` belong to
7074                        // an inlined frame: their `Untouched` entries
7075                        // default to Nil (no entry-tag fallback —
7076                        // marshal-in only captured caller slots) and
7077                        // we write to interp stack at `base + i` which
7078                        // mirrors `op_offsets`-derived layout.
7079                        let slot_count = exit_tags_for_pc.len();
7080                        // P12-S4-step4b-C-2 — the helper only extends
7081                        // vm.stack up to the deepest pushed frame's
7082                        // window, but the exit_tags snapshot covers
7083                        // the trace's full `window_size` (which
7084                        // includes depth-N+1 scratch slots that the
7085                        // trace's IR may have written without a
7086                        // matching pushed frame). Extend with Nil so
7087                        // the write at the tail doesn't panic; these
7088                        // slots get overwritten by the writeback loop
7089                        // and won't leak meaningful data past the
7090                        // pushed frames' R[0..max_stack) windows.
7091                        if self.stack.len() < base_us + slot_count {
7092                            self.stack
7093                                .resize(base_us + slot_count, crate::runtime::Value::Nil);
7094                        }
7095                        // P13-S13-E — fast-path restore loop. When
7096                        // we landed on the global `exit_tags`,
7097                        // dispatch on the compile-time
7098                        // classification: skip the loop entirely
7099                        // for `AllUntouched`, do a tag-free
7100                        // `Value::Int(...)` write per slot for
7101                        // `AllInt`, otherwise fall through to the
7102                        // general match-arm loop. site_id > 0
7103                        // (inline frame mat) and per_exit_tags
7104                        // hits always take the general path —
7105                        // their per-side-exit shapes aren't
7106                        // pre-classified yet.
7107                        let fast_path_taken = if using_global_exit_tags {
7108                            match global_tag_res_kind {
7109                                crate::jit::trace::TagResKind::AllUntouched => {
7110                                    // No-op: vm.stack already
7111                                    // matches the trace's post-
7112                                    // entry state for these
7113                                    // slots (entry values not
7114                                    // overridden, or already
7115                                    // spilled by helpers).
7116                                    true
7117                                }
7118                                crate::jit::trace::TagResKind::AllInt => {
7119                                    for i in 0..slot_count {
7120                                        self.stack[base_us + i] =
7121                                            crate::runtime::Value::Int(reg_state[i]);
7122                                    }
7123                                    true
7124                                }
7125                                crate::jit::trace::TagResKind::Mixed => false,
7126                            }
7127                        } else {
7128                            false
7129                        };
7130                        if !fast_path_taken {
7131                            for i in 0..slot_count {
7132                                let tag = match exit_tags_for_pc[i] {
7133                                    crate::jit::trace::ExitTag::Untouched => {
7134                                        if i < max_stack {
7135                                            entry_tags[i]
7136                                        } else {
7137                                            crate::runtime::value::raw::NIL
7138                                        }
7139                                    }
7140                                    crate::jit::trace::ExitTag::Int => {
7141                                        crate::runtime::value::raw::INT
7142                                    }
7143                                    crate::jit::trace::ExitTag::Float => {
7144                                        crate::runtime::value::raw::FLOAT
7145                                    }
7146                                    crate::jit::trace::ExitTag::Table => {
7147                                        crate::runtime::value::raw::TABLE
7148                                    }
7149                                    crate::jit::trace::ExitTag::Closure => {
7150                                        crate::runtime::value::raw::CLOSURE
7151                                    }
7152                                    // P12-S6-A1 — trace actively wrote Nil
7153                                    // to this slot (e.g. via Op::LoadNil).
7154                                    // Restore as Nil regardless of the entry
7155                                    // tag, since the i64 payload is 0 and
7156                                    // packing as the entry tag (e.g. INT)
7157                                    // would mis-type the slot.
7158                                    crate::jit::trace::ExitTag::Nil => {
7159                                        crate::runtime::value::raw::NIL
7160                                    }
7161                                    // P12-S12-C v2 — trace wrote a Str ptr
7162                                    // to this slot (LoadK Str / Move from
7163                                    // Str / Concat result). Restore as
7164                                    // Value::Str with raw bits round-
7165                                    // tripped.
7166                                    crate::jit::trace::ExitTag::Str => {
7167                                        crate::runtime::value::raw::STR
7168                                    }
7169                                };
7170                                // SAFETY: tag is from a verified slot
7171                                // (entry validated above) or pinned by
7172                                // the exit-tag analysis to INT/TABLE.
7173                                // The raw payload sits in reg_state[i].
7174                                // Stack was extended by the materialize
7175                                // helper for inline frames.
7176                                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7177                                self.stack[base_us + i] = unsafe {
7178                                    Value::pack(
7179                                        tag,
7180                                        crate::runtime::value::RawVal {
7181                                            zero: reg_state[i] as u64,
7182                                        },
7183                                    )
7184                                };
7185                            }
7186                        }
7187                        // P12-S4-step4b-C-2 — for non-inline exits the
7188                        // helper was never called (no metas chain for
7189                        // this cont_pc), so `frames.last()` is the
7190                        // trace head's frame and we set its pc to
7191                        // cont_pc as before. For inline exits the
7192                        // helper baked the side-exit PC into the
7193                        // innermost frame's `pc` at push time
7194                        // (chain.last().pc was overridden at emit),
7195                        // so this assignment to `frames.last_mut().pc
7196                        // = cont_pc` is a redundant-but-correct
7197                        // confirmation.
7198                        let _ = &per_exit_inline; // hold the Rc alive across dispatch
7199                        // P12-S4-step4b-C-2 — for inline side-exits the
7200                        // helper has pushed N frames on top. The trace
7201                        // head frame is at `pre_frames - 1`; set its
7202                        // pc to `head_resume_pc` so when the chain
7203                        // eventually pops back to it, interp resumes
7204                        // PAST the trace's depth-0 Op::Call instead of
7205                        // restarting from `head_pc` and re-triggering
7206                        // dispatch (infinite loop). The innermost
7207                        // (helper-pushed) frame already has its pc
7208                        // baked in at compile time, but we still
7209                        // assign `cont_pc` below for parity with the
7210                        // non-inline path (no-op).
7211                        if site_id > 0 {
7212                            let idx = (site_id - 1) as usize;
7213                            let head_resume_pc = decode_inline[idx].head_resume_pc;
7214                            if pre_frames > 0 {
7215                                if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7216                                    f.pc = head_resume_pc;
7217                                }
7218                            }
7219                        }
7220                        let frames_len_now = self.frames.len();
7221                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7222                        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7223                            CallFrame::Lua(fmut) => {
7224                                if crate::jit::trace::v2c_probe_enabled() {
7225                                    eprintln!(
7226                                        "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7227                                        from_side_trace,
7228                                        raw_ret,
7229                                        fmut.pc,
7230                                        cont_pc,
7231                                        site_id,
7232                                        frames_len_now,
7233                                        pre_frames,
7234                                        max_stack,
7235                                    );
7236                                }
7237                                fmut.pc = cont_pc;
7238                            }
7239                            _ => unreachable!("Cont frame at trace dispatch"),
7240                        }
7241                        // P15-A v1 — deferred side-trace start. The
7242                        // increment block above flagged this exit's
7243                        // hit count crossing HOTEXIT_THRESHOLD; now
7244                        // that vm.stack is restored and frame.pc is
7245                        // settled, snapshot entry_tags from the
7246                        // resume frame's window and create the
7247                        // recorder. The recorder's first push fires
7248                        // on the next interp iteration at cont_pc.
7249                        //
7250                        // `head_proto` for the side trace = cl.proto
7251                        // (trace JIT only inlines self-recursive
7252                        // calls today, so cont_pc always lands in
7253                        // the same proto as the parent). Frame base
7254                        // is the resume frame (top of `self.frames`
7255                        // — inline-pushed frames moved this).
7256                        if side_trace_should_start {
7257                            let (resume_base, resume_proto) = match self.frames.last() {
7258                                Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7259                                _ => (base_us, cl.proto),
7260                            };
7261                            let resume_max_stack = resume_proto.max_stack as usize;
7262                            let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7263                            // Extend stack if cont_pc's frame window
7264                            // overhangs the current stack len (rare,
7265                            // but inline-pushed frame stack writes
7266                            // only covered the trace's writeback).
7267                            if self.stack.len() < resume_base + resume_max_stack {
7268                                self.stack.resize(
7269                                    resume_base + resume_max_stack,
7270                                    crate::runtime::Value::Nil,
7271                                );
7272                            }
7273                            for i in 0..resume_max_stack {
7274                                let (tag, _) = self.stack[resume_base + i].unpack();
7275                                side_entry_tags.push(tag);
7276                            }
7277                            self.jit.active_trace =
7278                                Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7279                                    resume_proto,
7280                                    cont_pc,
7281                                    side_entry_tags,
7282                                    cl.proto,
7283                                    head_pc_val,
7284                                    exit_hit_idx,
7285                                )));
7286                            self.jit.recording_frame_base = self.frames.len() - 1;
7287                            self.jit.counters.side_trace_started += 1;
7288                        }
7289                        // P13-S13-D — put the dispatch buffers back
7290                        // before the `continue;` so the next
7291                        // dispatch picks up the same allocation.
7292                        self.jit.reg_state_buf = reg_state;
7293                        self.jit.entry_tags_buf = entry_tags;
7294                        continue;
7295                    }
7296                }
7297                // P13-S13-D — !dispatch_ok / deopt path / non-cont
7298                // exit also restore the buffers before falling
7299                // through to the interp.
7300                self.jit.reg_state_buf = reg_state;
7301                self.jit.entry_tags_buf = entry_tags;
7302            }
7303
7304            // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7305            // hook code that consults `currentpc = savedpc - 1` lands on the
7306            // instruction now executing. luna mirrors that by advancing
7307            // `f.pc` to `pc + 1` before the hook block — local_at /
7308            // getinfo / line attribution all read f.pc, and the existing
7309            // `pc - 1` convention in those helpers then yields the current
7310            // instruction's pc (db.lua :696: local `A` visible at the
7311            // chunk's return line once OP_CLOSURE has advanced pc).
7312            //
7313            // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7314            // (cont frames drained above) so the and_then/Option layers are
7315            // dead weight.
7316            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7317            match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7318                CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7319                _ => unreachable!("Cont frame at pc bump"),
7320            }
7321
7322            // count + line hooks (PUC traceexec): before executing the
7323            // instruction. Skipped while the hook itself runs.
7324            // (Parens here are load-bearing — without them `&&` binds tighter
7325            // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7326            // letting a Lua line hook recurse into itself → stack overflow
7327            // on db.lua line-hook assertions. Matches the `hook_call_with` /
7328            // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7329            if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7330                let lines = &cl.proto.lines;
7331                let cur_line = if lines.is_empty() {
7332                    None
7333                } else {
7334                    Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7335                };
7336                // count hook: fire every `count_base` instructions
7337                if self.hook.count {
7338                    self.hook.count_left -= 1;
7339                    if self.hook.count_left <= 0 {
7340                        self.hook.count_left = self.hook.count_base;
7341                        // hooked function is the running Lua frame: its frame
7342                        // is on the stack, so no synthetic C level is needed.
7343                        self.run_hook(b"count", cur_line, false)?;
7344                    }
7345                }
7346                // line hook: fire on a fresh frame, a backward jump (loop), or a
7347                // change of source line.
7348                if self.hook.line {
7349                    if lines.is_empty() {
7350                        // PUC: a stripped chunk has no line info, so
7351                        // `getfuncline` returns -1. The line hook still fires
7352                        // on the first instruction of the new frame (where
7353                        // `npci <= oldpc` holds at oldpc=0), with the line
7354                        // pushed as `nil` instead of an integer (db.lua :1030
7355                        // "hook called without debug info for 1st instruction").
7356                        if oldpc == u32::MAX {
7357                            self.run_hook(b"line", None, false)?;
7358                            self.top_frame_mut().hook_oldpc = pc;
7359                        }
7360                    } else {
7361                        let newline = lines[(pc as usize).min(lines.len() - 1)];
7362                        // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7363                        // on a backward jump (`pc < oldpc` — strict; an equal pc
7364                        // would re-fire the install-site after `oldpc = pc`),
7365                        // or when the source line changes.
7366                        let fire = oldpc == u32::MAX
7367                            || pc < oldpc
7368                            || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7369                        if fire {
7370                            self.run_hook(b"line", Some(newline as i64), false)?;
7371                        }
7372                        self.top_frame_mut().hook_oldpc = pc;
7373                    }
7374                }
7375            }
7376
7377            match inst.op() {
7378                Op::Move => {
7379                    let v = self.r(base, inst.b());
7380                    self.set_r(base, inst.a(), v);
7381                }
7382                Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7383                Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7384                Op::LoadK => {
7385                    let v = cl.proto.consts[inst.bx() as usize];
7386                    self.set_r(base, inst.a(), v);
7387                }
7388                Op::LoadKx => {
7389                    let extra = cl.proto.code[self.pc_of_top() as usize];
7390                    self.bump_pc();
7391                    let v = cl.proto.consts[extra.ax() as usize];
7392                    self.set_r(base, inst.a(), v);
7393                }
7394                Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7395                Op::LFalseSkip => {
7396                    self.set_r(base, inst.a(), Value::Bool(false));
7397                    self.bump_pc();
7398                }
7399                Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7400                Op::LoadNil => {
7401                    let a = inst.a();
7402                    for i in 0..=inst.b() {
7403                        self.set_r(base, a + i, Value::Nil);
7404                    }
7405                }
7406                Op::GetUpval => {
7407                    let v = self.upval_get(cl, inst.b());
7408                    self.set_r(base, inst.a(), v);
7409                }
7410                Op::SetUpval => {
7411                    let v = self.r(base, inst.a());
7412                    self.upval_set(cl, inst.b(), v);
7413                }
7414                Op::GetTabUp => {
7415                    let t = self.upval_get(cl, inst.b());
7416                    let key = cl.proto.consts[inst.c() as usize];
7417                    self.op_index(t, key, base + inst.a())?;
7418                }
7419                Op::GetTable => {
7420                    let t = self.r(base, inst.b());
7421                    let key = self.r(base, inst.c());
7422                    self.op_index(t, key, base + inst.a())?;
7423                }
7424                Op::GetI => {
7425                    let t = self.r(base, inst.b());
7426                    self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7427                }
7428                Op::GetField => {
7429                    let t = self.r(base, inst.b());
7430                    let key = cl.proto.consts[inst.c() as usize];
7431                    // v1.2 D4 A1 — fast path: known-Str const key + no
7432                    // metatable on the table → skip `op_index` /
7433                    // `index_step`'s MAX_TAG_LOOP setup and the outer
7434                    // `Value` match. Falls through to the slow path
7435                    // unchanged when either invariant breaks (so
7436                    // `__index` metamethods, non-Table receivers, and
7437                    // non-Str keys behave exactly as before).
7438                    if let Value::Table(tb) = t
7439                        && tb.metatable().is_none()
7440                        && let Value::Str(s) = key
7441                    {
7442                        let v = tb.get_str(s);
7443                        self.stack[(base + inst.a()) as usize] = v;
7444                    } else {
7445                        self.op_index(t, key, base + inst.a())?;
7446                    }
7447                }
7448                Op::SetTabUp => {
7449                    let t = self.upval_get(cl, inst.a());
7450                    let key = cl.proto.consts[inst.b() as usize];
7451                    let v = self.r(base, inst.c());
7452                    self.op_newindex(t, key, v)?;
7453                }
7454                Op::SetTable => {
7455                    let t = self.r(base, inst.a());
7456                    let key = self.r(base, inst.b());
7457                    let v = self.r(base, inst.c());
7458                    self.op_newindex(t, key, v)?;
7459                }
7460                Op::SetI => {
7461                    let t = self.r(base, inst.a());
7462                    let v = self.r(base, inst.c());
7463                    self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7464                }
7465                Op::SetField => {
7466                    let t = self.r(base, inst.a());
7467                    let key = cl.proto.consts[inst.b() as usize];
7468                    let v = self.r(base, inst.c());
7469                    self.op_newindex(t, key, v)?;
7470                }
7471                Op::NewTable => {
7472                    let t = self.heap.new_table();
7473                    self.set_r(base, inst.a(), Value::Table(t));
7474                    self.maybe_collect_garbage(base + inst.a() + 1);
7475                }
7476                Op::SetList => {
7477                    let a = inst.a();
7478                    let abs_a = base + a;
7479                    let n = if inst.b() == 0 {
7480                        self.top - (abs_a + 1)
7481                    } else {
7482                        inst.b()
7483                    };
7484                    let offset = if inst.k() {
7485                        let extra = cl.proto.code[self.pc_of_top() as usize];
7486                        self.bump_pc();
7487                        extra.ax() as i64
7488                    } else {
7489                        inst.c() as i64
7490                    };
7491                    let Value::Table(t) = self.r(base, a) else {
7492                        unreachable!("SETLIST on non-table");
7493                    };
7494                    for i in 1..=n {
7495                        let v = self.r(base, a + i);
7496                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7497                        if let Err(TableError::Overflow) =
7498                            unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7499                        {
7500                            return Err(self.rt_err("table overflow"));
7501                        }
7502                    }
7503                    // one barrier_back covers every store this op did — PUC's
7504                    // `luaC_barrierback_` once-per-table optimisation
7505                    self.heap
7506                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7507                    // the element temps above the table are now consumed
7508                    self.maybe_collect_garbage(base + a + 1);
7509                }
7510                Op::SelfOp => {
7511                    let o = self.r(base, inst.b());
7512                    self.set_r(base, inst.a() + 1, o);
7513                    // PUC OP_SELF's C is a constant index when the k-flag is
7514                    // set; otherwise it points to a register that holds the
7515                    // (constant-loaded) key. luna's compiler falls back to the
7516                    // register form when the constant index exceeds OP_SELF's
7517                    // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7518                    // a table with 250+ string keys, where "findfield" lands
7519                    // past const #255). The exec must honour the same split.
7520                    let key = if inst.k() {
7521                        cl.proto.consts[inst.c() as usize]
7522                    } else {
7523                        self.r(base, inst.c())
7524                    };
7525                    self.op_index(o, key, base + inst.a())?;
7526                }
7527                Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7528                Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7529                Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7530                Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7531                Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7532                Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7533                Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7534                Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7535                Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7536                Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7537                Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7538                Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7539                Op::Unm => {
7540                    let v = self.r(base, inst.b());
7541                    match coerce_num(v) {
7542                        Some(Num::Int(i)) => {
7543                            self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7544                        }
7545                        Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7546                        None => {
7547                            let mm = self.get_mm(v, Mm::Unm);
7548                            if mm.is_nil() {
7549                                return Err(self.type_err("perform arithmetic on", v));
7550                            }
7551                            let dst = base + inst.a();
7552                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7553                        }
7554                    }
7555                }
7556                Op::BNot => {
7557                    let v = self.r(base, inst.b());
7558                    match coerce_num(v) {
7559                        Some(n) => {
7560                            let i = self.int_from_num(n)?;
7561                            self.set_r(base, inst.a(), Value::Int(!i));
7562                        }
7563                        None => {
7564                            let mm = self.get_mm(v, Mm::BNot);
7565                            if mm.is_nil() {
7566                                return Err(self.type_err("perform bitwise operation on", v));
7567                            }
7568                            let dst = base + inst.a();
7569                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7570                        }
7571                    }
7572                }
7573                Op::Not => {
7574                    let v = self.r(base, inst.b());
7575                    self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7576                }
7577                Op::Len => {
7578                    let v = self.r(base, inst.b());
7579                    match self.len_step(v)? {
7580                        MmOut::Done(r) => self.set_r(base, inst.a(), r),
7581                        MmOut::Mm { func, recv } => {
7582                            let dst = base + inst.a();
7583                            self.begin_meta_call(
7584                                func,
7585                                &[recv, recv],
7586                                MetaAction::Store { dst },
7587                                "len",
7588                            )?;
7589                        }
7590                        MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7591                    }
7592                }
7593                Op::Concat => {
7594                    // right-associative fold over operands at base+a .. base+a+n,
7595                    // in place on the stack so a yielding __concat can suspend.
7596                    let a = inst.a();
7597                    let n = inst.b();
7598                    self.top = base + a + n;
7599                    self.concat_run(base + a)?;
7600                }
7601                Op::Close => {
7602                    // Yieldable: drive __close handlers through the
7603                    // interpreter loop so a coroutine.yield() inside a
7604                    // handler suspends cleanly (locals.lua block-end yield).
7605                    // `drive_close` parks the handler call at `self.top`, so
7606                    // raise `top` past this frame's full register window
7607                    // first — a goto out of a nested for-loop can fire
7608                    // OP_Close while `self.top` still sits at the inner
7609                    // body's working top, which would let `push_frame`'s
7610                    // wipe clobber the outer tbc slot before it could be
7611                    // closed (locals.lua:1219 nested-for goto regression).
7612                    self.top = self.top.max(base + cl.proto.max_stack as u32);
7613                    let _ =
7614                        self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7615                }
7616                Op::Tbc => {
7617                    self.register_tbc(base + inst.a())?;
7618                }
7619                Op::Jmp => {
7620                    let off = inst.sj();
7621                    // P12-S1.B — trace JIT back-edge counter. A negative
7622                    // jump offset is a loop back-edge (the only canonical
7623                    // backward jumps the compiler emits — `while`, `for`,
7624                    // `repeat`). Tick the per-Proto counter and, once it
7625                    // exceeds the threshold, log a stub promotion that
7626                    // S1.C will turn into actual trace recording. The
7627                    // whole block is gated on `trace_jit_enabled` so
7628                    // existing benches see one branch-not-taken and no
7629                    // counter writes.
7630                    if self.jit.trace_enabled && off < 0 {
7631                        let proto = cl.proto;
7632                        let c = proto.trace_hot_count.get();
7633                        if c < u32::MAX / 2 {
7634                            proto.trace_hot_count.set(c + 1);
7635                        }
7636                        // P13-S13-H — relaxed back-edge trigger:
7637                        // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7638                        // a missed crossing (active_trace busy with
7639                        // a call-trigger, or the recorder slot
7640                        // happened to be in use) doesn't permanently
7641                        // lock this back-edge target out. The
7642                        // `already_cached` short-circuit prevents
7643                        // duplicate recordings: once a trace is
7644                        // cached for this target, subsequent
7645                        // crossings skip the start. This pairs with
7646                        // S13-H's discard-on-partial-coverage close
7647                        // handling — when a short call-trigger is
7648                        // discarded, the back-edge can still find an
7649                        // open slot at the next iteration.
7650                        let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7651                        // P13-S13-K — gave-up short-circuit. Skip
7652                        // the RefCell borrow + scan when the
7653                        // S13-I cap force-compiled a partial
7654                        // trace on this Proto.
7655                        let back_edge_already_cached = if proto.trace_gave_up.get() {
7656                            true
7657                        } else {
7658                            proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7659                        };
7660                        if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7661                            && self.jit.active_trace.is_none()
7662                            && !back_edge_already_cached
7663                        {
7664                            // Back-edge target = pc after `add_pc(off)`,
7665                            // i.e. current `pc + 1 + off` (the dispatch
7666                            // loop has already advanced f.pc to pc+1).
7667                            let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7668                            // Snapshot per-slot Value tag at trace
7669                            // entry so the lowerer's kind tracker
7670                            // knows which arith path to lower
7671                            // (iadd vs fadd, etc.).
7672                            let max_stack = cl.proto.max_stack as usize;
7673                            let base_us = base as usize;
7674                            let mut entry_tags = Vec::with_capacity(max_stack);
7675                            for i in 0..max_stack {
7676                                let (tag, _) = self.stack[base_us + i].unpack();
7677                                entry_tags.push(tag);
7678                            }
7679                            self.jit.active_trace =
7680                                Some(Box::new(crate::jit::trace::TraceRecord::start(
7681                                    cl.proto, target, entry_tags, false,
7682                                )));
7683                            // P12-S4 — record the frame the trace
7684                            // started in. `self.frames.len() - 1`
7685                            // since we're inside the currently-running
7686                            // Lua frame's dispatch.
7687                            self.jit.recording_frame_base = self.frames.len() - 1;
7688                        }
7689                    }
7690                    self.add_pc(off);
7691                }
7692                Op::Eq => {
7693                    let l = self.r(base, inst.a());
7694                    let r = self.r(base, inst.b());
7695                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7696                        if (a == b) != inst.k() {
7697                            self.bump_pc();
7698                        }
7699                    } else {
7700                        let step = self.eq_step(l, r);
7701                        self.op_compare(step, l, r, inst.k(), "eq")?;
7702                    }
7703                }
7704                Op::EqK => {
7705                    let l = self.r(base, inst.a());
7706                    let r = cl.proto.consts[inst.b() as usize];
7707                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7708                        if (a == b) != inst.k() {
7709                            self.bump_pc();
7710                        }
7711                    } else {
7712                        let step = self.eq_step(l, r);
7713                        self.op_compare(step, l, r, inst.k(), "eq")?;
7714                    }
7715                }
7716                Op::Lt => {
7717                    let l = self.r(base, inst.a());
7718                    let r = self.r(base, inst.b());
7719                    // hot path: Int < Int — drops the MmOut + op_compare match
7720                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7721                        if (a < b) != inst.k() {
7722                            self.bump_pc();
7723                        }
7724                    } else {
7725                        let step = self.less_step(l, r, false)?;
7726                        self.op_compare(step, l, r, inst.k(), "lt")?;
7727                    }
7728                }
7729                Op::Le => {
7730                    let l = self.r(base, inst.a());
7731                    let r = self.r(base, inst.b());
7732                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7733                        if (a <= b) != inst.k() {
7734                            self.bump_pc();
7735                        }
7736                    } else {
7737                        let step = self.less_step(l, r, true)?;
7738                        self.op_compare(step, l, r, inst.k(), "le")?;
7739                    }
7740                }
7741                Op::Test => {
7742                    let cond = self.r(base, inst.a()).truthy();
7743                    self.cond_skip(cond, inst.k());
7744                }
7745                Op::TestSet => {
7746                    let v = self.r(base, inst.b());
7747                    if v.truthy() == inst.k() {
7748                        self.set_r(base, inst.a(), v);
7749                    } else {
7750                        self.bump_pc();
7751                    }
7752                }
7753                Op::Call => {
7754                    let abs = base + inst.a();
7755                    let nargs = if inst.b() == 0 {
7756                        None
7757                    } else {
7758                        Some(inst.b() - 1)
7759                    };
7760                    let wanted = inst.c() as i32 - 1;
7761                    self.begin_call(abs, nargs, wanted, false)?;
7762                }
7763                Op::TailCall => {
7764                    let fr = *self.top_frame();
7765                    let abs = base + inst.a();
7766                    let mut nargs = if inst.b() == 0 {
7767                        self.top - (abs + 1)
7768                    } else {
7769                        inst.b() - 1
7770                    };
7771                    // A tail call pops this frame before begin_call, so a
7772                    // non-callable target would lose its name/position. Report
7773                    // it now (PUC reads funcname from the still-current ci),
7774                    // while the frame is intact, for "(field 'x')"-style info.
7775                    let mut func = self.stack[abs as usize];
7776                    if !matches!(func, Value::Closure(_) | Value::Native(_))
7777                        && self.get_mm(func, Mm::Call).is_nil()
7778                    {
7779                        return Err(self.call_err(func));
7780                    }
7781                    // PUC `luaD_pretailcall` resolves a chain of `__call`
7782                    // metamethods *in place* before deciding whether to
7783                    // collapse this frame. Without that, each __call hop
7784                    // would push a fresh Lua frame and a 10000-deep
7785                    // tail-recursion through a 100-deep __call chain
7786                    // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7787                    // shift args right, install the handler at `abs`, retry.
7788                    // Chain depth limit matches the call-site `begin_call`
7789                    // version cap (5.5 calls.lua :223 — 15 max, then "too
7790                    // long"; 16th wrap fails the call). An infinite
7791                    // self-referential `__call` would otherwise spin.
7792                    let chain_cap = if self.version >= LuaVersion::Lua55 {
7793                        15
7794                    } else {
7795                        MAX_CCMT
7796                    };
7797                    let mut chain = 0u32;
7798                    while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7799                        let mm = self.get_mm(func, Mm::Call);
7800                        if mm.is_nil() {
7801                            return Err(self.call_err(func));
7802                        }
7803                        chain += 1;
7804                        if chain > chain_cap {
7805                            return Err(self.rt_err("'__call' chain too long"));
7806                        }
7807                        let end = (abs + 1 + nargs) as usize;
7808                        if self.stack.len() < end + 1 {
7809                            self.stack.resize(end + 1, Value::Nil);
7810                        }
7811                        for i in (0..=nargs).rev() {
7812                            self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7813                        }
7814                        self.stack[abs as usize] = mm;
7815                        nargs += 1;
7816                        self.top = abs + 1 + nargs;
7817                        func = mm;
7818                    }
7819                    // PUC's tail-call collapse is Lua→Lua only. A tail call to
7820                    // a C function runs the C function under the *current* Lua
7821                    // activation (no frame fold — a C frame has nothing to
7822                    // collapse into); after the C function returns, the
7823                    // calling Lua function returns those results normally.
7824                    // Mirror that: keep our Lua frame on the stack, call the
7825                    // target through `begin_call(abs, …)` as a regular call,
7826                    // and let the fallback `Op::Return` that the compiler
7827                    // emits right after `Op::TailCall` forward the results.
7828                    // 5.1 closure.lua :177's `return getfenv()` from inside
7829                    // foo needs level 1 to resolve to foo, not to the
7830                    // thread's globals fallback that happens when no Lua
7831                    // frame is on the stack.
7832                    let lua_target = matches!(func, Value::Closure(_));
7833                    if lua_target {
7834                        self.close_slots(fr.base, None)?;
7835                        for i in 0..=nargs {
7836                            self.stack[(fr.func_slot + i) as usize] =
7837                                self.stack[(abs + i) as usize];
7838                        }
7839                        // PUC `CIST_TAIL`: the new Lua activation inherits
7840                        // the popped frame's tailcalls count plus one for
7841                        // this collapse. 5.1 db.lua :372 hammers 30000
7842                        // recursive tail calls and expects to see the
7843                        // synthetic tail level for every one of them.
7844                        self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7845                        frames_pop_sync(&mut self.frames, &mut self.frames_top);
7846                        if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7847                            && self.frames.len() < entry_depth
7848                        {
7849                            // a native completed what was this function's result
7850                            return Ok(self.take_results(fr.func_slot));
7851                        }
7852                    } else {
7853                        // Native (or __call-bearing) target: regular call. The
7854                        // results land at `abs..self.top` and the next op (the
7855                        // fallback `Op::Return`) forwards them. `wanted = -1`
7856                        // because the caller will multret them through Return.
7857                        self.begin_call(abs, Some(nargs), -1, false)?;
7858                    }
7859                }
7860                Op::Return | Op::Return0 | Op::Return1 => {
7861                    let (abs_a, nret) = match inst.op() {
7862                        Op::Return0 => (base, 0),
7863                        Op::Return1 => (base + inst.a(), 1),
7864                        _ => {
7865                            let abs_a = base + inst.a();
7866                            let nret = if inst.b() == 0 {
7867                                self.top - abs_a
7868                            } else {
7869                                inst.b() - 1
7870                            };
7871                            (abs_a, nret)
7872                        }
7873                    };
7874                    // close before moving results: __close handlers run above
7875                    // the stack top, so the result region [abs_a..abs_a+nret)
7876                    // stays intact across any yields the close performs.
7877                    // Fixed-count returns may leave `self.top` below the last
7878                    // result slot (the compiler does not always re-bump it);
7879                    // raise it past the result region so `drive_close` parks
7880                    // the handler call *above* — landing at `self.top` would
7881                    // otherwise clobber a result with the handler closure.
7882                    self.top = self.top.max(abs_a + nret);
7883                    if let Some(vals) = self.begin_close(
7884                        base,
7885                        None,
7886                        AfterClose::Return {
7887                            abs_a,
7888                            nret,
7889                            from_native: false,
7890                        },
7891                        entry_depth,
7892                    )? {
7893                        return Ok(vals);
7894                    }
7895                }
7896                Op::ForPrep => self.for_prep(inst, base)?,
7897                Op::ForLoop => {
7898                    // P12 — trace JIT back-edge counter on the
7899                    // numeric-for back-edge. ForLoop is always at
7900                    // a back-edge position (when it continues);
7901                    // for the trace recorder we treat it as the
7902                    // close-detection equivalent of `Op::Jmp` with
7903                    // negative offset. Counter only ticks when the
7904                    // back-edge will actually fire (count > 0 in
7905                    // the 5.4+ Int form, comparable predicates in
7906                    // pre-5.3 / Float). The cheap check up front
7907                    // matches the for_loop helper's branch.
7908                    if self.jit.trace_enabled {
7909                        let a = inst.a();
7910                        let pre53 = self.version() <= LuaVersion::Lua53;
7911                        let take_back_edge =
7912                            match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
7913                                (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
7914                                    count > 0
7915                                }
7916                                (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
7917                                    let next = cur.wrapping_add(st);
7918                                    if st > 0 { next <= lim } else { next >= lim }
7919                                }
7920                                (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
7921                                    let next = cur + st;
7922                                    if st > 0.0 { next <= lim } else { next >= lim }
7923                                }
7924                                _ => false,
7925                            };
7926                        if take_back_edge {
7927                            let proto = cl.proto;
7928                            let c = proto.trace_hot_count.get();
7929                            if c < u32::MAX / 2 {
7930                                proto.trace_hot_count.set(c + 1);
7931                            }
7932                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7933                                && self.jit.active_trace.is_none()
7934                            {
7935                                // ForLoop's back-edge target = pc
7936                                // after `add_pc(-bx)` runs from the
7937                                // already-bumped f.pc (= pc + 1).
7938                                // So target = (pc + 1) - bx.
7939                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
7940                                let max_stack = cl.proto.max_stack as usize;
7941                                let base_us = base as usize;
7942                                let mut entry_tags = Vec::with_capacity(max_stack);
7943                                for i in 0..max_stack {
7944                                    let (tag, _) = self.stack[base_us + i].unpack();
7945                                    entry_tags.push(tag);
7946                                }
7947                                self.jit.active_trace =
7948                                    Some(Box::new(crate::jit::trace::TraceRecord::start(
7949                                        cl.proto, target, entry_tags, false,
7950                                    )));
7951                                // P12-S4 — record the frame the trace
7952                                // started in. The currently-running
7953                                // Lua frame is at len() - 1.
7954                                self.jit.recording_frame_base = self.frames.len() - 1;
7955                            }
7956                        }
7957                    }
7958                    self.for_loop(inst, base);
7959                }
7960                Op::TForPrep => {
7961                    // the 4th control slot is the iterator's closing value
7962                    self.register_tbc(base + inst.a() + 3)?;
7963                    self.add_pc(inst.bx() as i32);
7964                }
7965                Op::TForCall => {
7966                    let abs = base + inst.a();
7967                    let need = (abs + 7) as usize;
7968                    if self.stack.len() < need {
7969                        self.stack.resize(need, Value::Nil);
7970                    }
7971                    self.stack[(abs + 4) as usize] = self.stack[abs as usize];
7972                    self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
7973                    self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
7974                    let nvars = inst.c() as i32;
7975                    self.begin_call(abs + 4, Some(2), nvars, false)?;
7976                }
7977                Op::TForLoop => {
7978                    let a = inst.a();
7979                    let ctrl = self.r(base, a + 4);
7980                    if !ctrl.is_nil() {
7981                        // P12-S12-B v1 — trace JIT back-edge counter on
7982                        // generic-for back-edge. TForLoop sits at the
7983                        // tail of `for k,v in expr do ... end`; recorder
7984                        // treats it as the close-detection equivalent of
7985                        // a negative Op::Jmp. Gate on `take_back_edge`
7986                        // (= `ctrl != nil`) so empty-iter loops don't
7987                        // pollute hot_count. v1 only adds the trigger;
7988                        // whitelist + helper + emit live in v2.
7989                        if self.jit.trace_enabled {
7990                            let proto = cl.proto;
7991                            let c = proto.trace_hot_count.get();
7992                            if c < u32::MAX / 2 {
7993                                proto.trace_hot_count.set(c + 1);
7994                            }
7995                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7996                                && self.jit.active_trace.is_none()
7997                            {
7998                                // TForLoop back-edge target = pc after
7999                                // `add_pc(-bx)` runs from the already-
8000                                // bumped f.pc (= pc + 1). So target =
8001                                // (pc + 1) - bx, normally landing on
8002                                // body_top (the op right after TForPrep).
8003                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8004                                let max_stack = cl.proto.max_stack as usize;
8005                                let base_us = base as usize;
8006                                let mut entry_tags = Vec::with_capacity(max_stack);
8007                                for i in 0..max_stack {
8008                                    let (tag, _) = self.stack[base_us + i].unpack();
8009                                    entry_tags.push(tag);
8010                                }
8011                                // P12-S12-B-v5 — snapshot the iter
8012                                // fn's address if Native, so the
8013                                // lowerer can specialise ipairs into
8014                                // inline Table aget IR.
8015                                let iter_ptr =
8016                                    if let Value::Native(n) = self.stack[base_us + a as usize] {
8017                                        Some(n.f as usize)
8018                                    } else {
8019                                        None
8020                                    };
8021                                // P12-S12-C v3 — snapshot R[A+5]'s
8022                                // tag (= current iter's val from
8023                                // the just-fired TForCall). The v5
8024                                // inline aget fast_blk emits a
8025                                // runtime guard against this tag;
8026                                // mixed-tag arrays deopt rather
8027                                // than producing garbage pointers
8028                                // through the v2 spill path.
8029                                let val_slot = base_us + (a as usize) + 5;
8030                                let val_tag = if val_slot < self.stack.len() {
8031                                    Some(self.stack[val_slot].unpack().0)
8032                                } else {
8033                                    None
8034                                };
8035                                let mut rec = crate::jit::trace::TraceRecord::start(
8036                                    cl.proto, target, entry_tags, false,
8037                                );
8038                                rec.tfor_iter_ptr = iter_ptr;
8039                                rec.tfor_val_tag = val_tag;
8040                                self.jit.active_trace = Some(Box::new(rec));
8041                                self.jit.recording_frame_base = self.frames.len() - 1;
8042                            }
8043                        }
8044                        self.set_r(base, a + 2, ctrl);
8045                        self.add_pc(-(inst.bx() as i32));
8046                    }
8047                }
8048                Op::Closure => {
8049                    let proto = cl.proto.protos[inst.bx() as usize];
8050                    let n_ups = proto.upvals.len();
8051                    // P11-S5d.M — build upvals on the stack for small
8052                    // closures, skipping the per-call Vec/Box alloc
8053                    // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8054                    // = 2 covers most Lua source (1 captured local, or
8055                    // _ENV + a single capture). Beyond that, fall back
8056                    // to a heap Vec.
8057                    use crate::runtime::function::INLINE_UPVALS_N;
8058                    let mut stack_buf: [std::mem::MaybeUninit<
8059                        Gc<crate::runtime::function::Upvalue>,
8060                    >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8061                    let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8062                    let use_inline = n_ups <= INLINE_UPVALS_N;
8063                    if !use_inline {
8064                        heap_buf.reserve_exact(n_ups);
8065                    }
8066                    for (i, d) in proto.upvals.iter().enumerate() {
8067                        let uv = if d.in_stack {
8068                            self.find_or_create_upval(base + d.index as u32)
8069                        } else {
8070                            cl.upvals()[d.index as usize]
8071                        };
8072                        if use_inline {
8073                            stack_buf[i] = std::mem::MaybeUninit::new(uv);
8074                        } else {
8075                            heap_buf.push(uv);
8076                        }
8077                    }
8078                    // Tiny shim around the two paths so the 5.1 _ENV
8079                    // clone + cache check below see one uniform
8080                    // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8081                    // into the local frame (still valid through the
8082                    // rest of this Op::Closure handler).
8083                    let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8084                        // SAFETY: the first n_ups slots of stack_buf
8085                        // were initialised above; we hand out a slice
8086                        // covering exactly them.
8087                        unsafe {
8088                            std::slice::from_raw_parts_mut(
8089                                stack_buf.as_mut_ptr()
8090                                    as *mut Gc<crate::runtime::function::Upvalue>,
8091                                n_ups,
8092                            )
8093                        }
8094                    } else {
8095                        &mut heap_buf[..]
8096                    };
8097                    // PUC 5.1 had per-function environments: every Lua
8098                    // function carried its own `env` slot, snapshotted from
8099                    // the creating function's env at closure time, so a
8100                    // `setfenv` on one closure never bled into a sibling.
8101                    // luna models that by giving the 5.1 closure a *fresh*
8102                    // closed upvalue for whichever cell holds `_ENV`, seeded
8103                    // from the parent's current env value. Only that cell is
8104                    // cloned — every other upvalue keeps its open/shared
8105                    // identity (so e.g. `local function range(...) ...
8106                    // range(...) ... end` still sees its self-reference). 5.2+
8107                    // keeps the shared-upval model (and the proto cache that
8108                    // depends on it).
8109                    let v51 = self.version() <= LuaVersion::Lua51;
8110                    if v51 && proto.env_upval_idx != u8::MAX {
8111                        let i = proto.env_upval_idx as usize;
8112                        let cur = match ups[i].state() {
8113                            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8114                            UpvalState::Closed(v) => v,
8115                        };
8116                        ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8117                    }
8118                    let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8119                    // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8120                    // and reuses it when every fresh-upvalue binding still
8121                    // points to the same Upvalue object as the cached one.
8122                    // That keeps `function() return outer end` repeated in a
8123                    // loop comparing equal across iterations (the captured
8124                    // outer is a shared open upvalue), while `function()
8125                    // return loop_var end` gets a fresh closure each round
8126                    // because the loop var is re-created per iteration. PUC
8127                    // 5.1 predated the cache, and the per-closure `_ENV`
8128                    // clone above would defeat it anyway, so skip it.
8129                    let nc = if v51 {
8130                        self.heap.new_closure_inline(proto, ups_slice)
8131                    } else {
8132                        let cached = proto.cache.get().filter(|c| {
8133                            c.upvals().len() == ups_slice.len()
8134                                && c.upvals()
8135                                    .iter()
8136                                    .zip(ups_slice.iter())
8137                                    .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8138                        });
8139                        match cached {
8140                            Some(c) => c,
8141                            None => {
8142                                let n = self.heap.new_closure_inline(proto, ups_slice);
8143                                proto.cache.set(Some(n));
8144                                n
8145                            }
8146                        }
8147                    };
8148                    self.set_r(base, inst.a(), Value::Closure(nc));
8149                    self.maybe_collect_garbage(base + inst.a() + 1);
8150                }
8151                Op::Vararg => {
8152                    let abs_a = base + inst.a();
8153                    let wanted = inst.c() as i32 - 1;
8154                    // A materialized named vararg lives in func_slot (its writes
8155                    // must be visible to `...`); otherwise spread the extra args
8156                    // straight off the stack at func_slot+1 .. +n_varargs.
8157                    let vt = match self.stack[func_slot as usize] {
8158                        Value::Table(t) => Some(t),
8159                        _ => None,
8160                    };
8161                    let n = match vt {
8162                        Some(t) => {
8163                            let n_key = Value::Str(self.heap.intern(b"n"));
8164                            // PUC getnumargs: a named vararg `t.n` set out of the
8165                            // integer range [0, INT_MAX/2] is rejected here
8166                            match t.get(n_key) {
8167                                Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8168                                _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8169                            }
8170                        }
8171                        None => n_varargs,
8172                    };
8173                    let count = if wanted < 0 { n } else { wanted as u32 };
8174                    let need = (abs_a + count) as usize;
8175                    if self.stack.len() < need {
8176                        self.stack.resize(need, Value::Nil);
8177                    }
8178                    for i in 0..count {
8179                        let v = if i >= n {
8180                            Value::Nil
8181                        } else if let Some(t) = vt {
8182                            t.get_int(i as i64 + 1)
8183                        } else {
8184                            self.stack[(func_slot + 1 + i) as usize]
8185                        };
8186                        self.stack[(abs_a + i) as usize] = v;
8187                    }
8188                    if wanted < 0 {
8189                        self.top = abs_a + count;
8190                    }
8191                }
8192                Op::GetVarg => {
8193                    // materialize the vararg table (PUC table.pack shape) from the
8194                    // stack varargs — used when the named vararg is written /
8195                    // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8196                    // sees later writes) and in the local register R[A].
8197                    let n = n_varargs;
8198                    let t = self.heap.new_table();
8199                    {
8200                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8201                        let tm = unsafe { t.as_mut() };
8202                        for i in 0..n {
8203                            let _ = tm.set_int(
8204                                &mut self.heap,
8205                                i as i64 + 1,
8206                                self.stack[(func_slot + 1 + i) as usize],
8207                            );
8208                        }
8209                    }
8210                    let n_key = Value::Str(self.heap.intern(b"n"));
8211                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8212                    unsafe { t.as_mut() }
8213                        .set(&mut self.heap, n_key, Value::Int(n as i64))
8214                        .expect("'n' is a valid key");
8215                    // once-per-table barrier (mirror SETLIST): t is born BLACK
8216                    // during Propagate; the bulk inserts above don't barrier.
8217                    self.heap
8218                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8219                    self.stack[func_slot as usize] = Value::Table(t);
8220                    self.set_r(base, inst.a(), Value::Table(t));
8221                }
8222                Op::VargIdx => {
8223                    // R[A] := vararg[R[C]] without allocating: integer key in
8224                    // [1,n] → that vararg, "n" → the count, else nil.
8225                    let key = self.r(base, inst.c());
8226                    let n = n_varargs;
8227                    let v = match key {
8228                        Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8229                            self.stack[(func_slot + k as u32) as usize]
8230                        }
8231                        Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8232                            self.stack[(func_slot + f as u32) as usize]
8233                        }
8234                        Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8235                        _ => Value::Nil,
8236                    };
8237                    self.set_r(base, inst.a(), v);
8238                }
8239                Op::ErrNNil => {
8240                    let v = self.r(base, inst.a());
8241                    if !matches!(v, Value::Nil) {
8242                        let bx = inst.bx();
8243                        let name = if bx == 0 {
8244                            "?".to_string()
8245                        } else {
8246                            match cl.proto.consts[(bx - 1) as usize] {
8247                                Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8248                                _ => "?".to_string(),
8249                            }
8250                        };
8251                        return Err(self.rt_err(&format!("global '{name}' already defined")));
8252                    }
8253                }
8254                Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8255            }
8256        }
8257    }
8258
8259    #[inline(always)]
8260    fn pc_of_top(&self) -> u32 {
8261        self.top_frame().pc
8262    }
8263
8264    #[inline(always)]
8265    fn bump_pc(&mut self) {
8266        // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8267        // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8268        // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8269        // up over `fib_28`'s ~500k jumps.
8270        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8271        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8272            CallFrame::Lua(f) => f.pc += 1,
8273            _ => unreachable!("Cont frame at bump_pc"),
8274        }
8275    }
8276
8277    #[inline(always)]
8278    fn add_pc(&mut self, d: i32) {
8279        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8280        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8281            CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8282            _ => unreachable!("Cont frame at add_pc"),
8283        }
8284    }
8285
8286    /// PUC conditional-skip convention: the JMP that follows is executed when
8287    /// `cond == k`; otherwise it is skipped.
8288    #[inline(always)]
8289    fn cond_skip(&mut self, cond: bool, k: bool) {
8290        if cond != k {
8291            self.bump_pc();
8292        }
8293    }
8294
8295    // ---- indexing (with __index/__newindex chains) ----
8296
8297    /// The `#` length operation: string byte length, `__len` if present, else
8298    /// the raw table border. Returns the raw length value (may be non-integer
8299    /// when `__len` is exotic).
8300    pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8301        match self.len_step(v)? {
8302            MmOut::Done(n) => Ok(n),
8303            // PUC calls unary metamethods with the operand twice
8304            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8305            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8306        }
8307    }
8308
8309    /// Length fast path: a string's byte count or a table's raw border when no
8310    /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8311    /// called with the operand twice. Errors for a non-table with no `__len`.
8312    fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8313        match v {
8314            Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8315            Value::Table(t) => {
8316                let mm = self.get_mm(v, Mm::Len);
8317                if mm.is_nil() {
8318                    Ok(MmOut::Done(Value::Int(t.len())))
8319                } else {
8320                    Ok(MmOut::Mm { func: mm, recv: v })
8321                }
8322            }
8323            _ => {
8324                let mm = self.get_mm(v, Mm::Len);
8325                if mm.is_nil() {
8326                    Err(self.type_err("get length of", v))
8327                } else {
8328                    Ok(MmOut::Mm { func: mm, recv: v })
8329                }
8330            }
8331        }
8332    }
8333
8334    /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8335    /// value with no integer representation.
8336    pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8337        match self.len_value(v)? {
8338            Value::Int(i) => Ok(i),
8339            Value::Float(f) => crate::runtime::value::f2i_exact(f)
8340                .ok_or_else(|| self.rt_err("object length is not an integer")),
8341            _ => Err(self.rt_err("object length is not an integer")),
8342        }
8343    }
8344
8345    pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8346        match self.index_step(t, key)? {
8347            MmOut::Done(v) => Ok(v),
8348            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8349            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8350        }
8351    }
8352
8353    /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8354    /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8355    /// are followed inline (no yield possible); only a function link can yield.
8356    fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8357        let mut cur = t;
8358        for _ in 0..MAX_TAG_LOOP {
8359            let mm = match cur {
8360                Value::Table(tb) => {
8361                    let v = tb.get(key);
8362                    if !v.is_nil() {
8363                        return Ok(MmOut::Done(v));
8364                    }
8365                    let mm = self.get_mm(cur, Mm::Index);
8366                    if mm.is_nil() {
8367                        return Ok(MmOut::Done(Value::Nil));
8368                    }
8369                    mm
8370                }
8371                v => {
8372                    let mm = self.get_mm(v, Mm::Index);
8373                    if mm.is_nil() {
8374                        return Err(self.type_err("index", v));
8375                    }
8376                    mm
8377                }
8378            };
8379            match mm {
8380                Value::Closure(_) | Value::Native(_) => {
8381                    return Ok(MmOut::Mm {
8382                        func: mm,
8383                        recv: cur,
8384                    });
8385                }
8386                next => cur = next,
8387            }
8388        }
8389        Err(self.rt_err("'__index' chain too long; possible loop"))
8390    }
8391
8392    pub(crate) fn newindex_value(
8393        &mut self,
8394        t: Value,
8395        key: Value,
8396        v: Value,
8397    ) -> Result<(), LuaError> {
8398        match self.newindex_step(t, key, v)? {
8399            MmOut::Done(_) => Ok(()),
8400            MmOut::Mm { func, recv } => {
8401                self.call_value(func, &[recv, key, v])?;
8402                Ok(())
8403            }
8404            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8405        }
8406    }
8407
8408    /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8409    /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8410    /// needs an actual call — which the caller may run yieldably.
8411    fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8412        let mut cur = t;
8413        for _ in 0..MAX_TAG_LOOP {
8414            let mm = match cur {
8415                Value::Table(tb) => {
8416                    // PI-A3 single-walk collapse — Table::try_set_existing
8417                    // fuses the prior `tb.get(key).is_nil()` gate and
8418                    // `raw_set` walk into one chain traversal when the
8419                    // key is already present with a non-nil value. The
8420                    // __newindex chain semantics are preserved by the
8421                    // identity (slot_nil ⇔ fire_newindex); see
8422                    // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8423                    //
8424                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8425                    // heap is single-threaded and the pointer is live as
8426                    // long as it is reachable from active roots (see
8427                    // heap.rs:5-7). Mirrors the raw_set wrapper below.
8428                    if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8429                        self.heap
8430                            .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8431                        return Ok(MmOut::Done(Value::Nil));
8432                    }
8433                    let mm = self.get_mm(cur, Mm::NewIndex);
8434                    if mm.is_nil() {
8435                        self.raw_set(tb, key, v)?;
8436                        return Ok(MmOut::Done(Value::Nil));
8437                    }
8438                    mm
8439                }
8440                bad => {
8441                    let mm = self.get_mm(bad, Mm::NewIndex);
8442                    if mm.is_nil() {
8443                        return Err(self.type_err("index", bad));
8444                    }
8445                    mm
8446                }
8447            };
8448            match mm {
8449                Value::Closure(_) | Value::Native(_) => {
8450                    return Ok(MmOut::Mm {
8451                        func: mm,
8452                        recv: cur,
8453                    });
8454                }
8455                next => cur = next,
8456            }
8457        }
8458        Err(self.rt_err("'__newindex' chain too long; possible loop"))
8459    }
8460
8461    fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8462        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8463        match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8464            Ok(()) => {
8465                self.heap
8466                    .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8467                Ok(())
8468            }
8469            Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8470            Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8471            Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8472            Err(TableError::InvalidNext) => unreachable!(),
8473        }
8474    }
8475
8476    /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8477    /// the boolean result; `Mm` (when raw equality fails and both are tables
8478    /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8479    fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8480        if l.raw_eq(r) {
8481            return MmOut::Done(Value::Bool(true));
8482        }
8483        if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8484            (l, r)
8485        {
8486            // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8487            // (and earlier) required the two operands' metatables to expose a
8488            // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8489            // metatable falls straight back to raw inequality. events.lua 5.1
8490            // :262 bakes this in.
8491            let mm = if self.version() <= LuaVersion::Lua51 {
8492                self.get_comp_mm(l, r, Mm::Eq)
8493            } else {
8494                let mut m = self.get_mm(l, Mm::Eq);
8495                if m.is_nil() {
8496                    m = self.get_mm(r, Mm::Eq);
8497                }
8498                m
8499            };
8500            if !mm.is_nil() {
8501                return MmOut::Mm { func: mm, recv: l };
8502            }
8503        }
8504        MmOut::Done(Value::Bool(false))
8505    }
8506
8507    // ---- arithmetic ----
8508
8509    #[inline(always)]
8510    fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8511        let l = self.r(base, inst.b());
8512        let r = self.r(base, inst.c());
8513        // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8514        // binary_trees all hammer these. Skipping coerce_num + the big
8515        // arith_fast match shaves several conditional moves per op.
8516        if let (Value::Int(a), Value::Int(b)) = (l, r) {
8517            let fast = match op {
8518                ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8519                ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8520                ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8521                _ => None,
8522            };
8523            if let Some(v) = fast {
8524                self.set_r(base, inst.a(), v);
8525                return Ok(());
8526            }
8527        }
8528        // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8529        // and any numeric workload with non-integer accumulators benefits.
8530        if let (Value::Float(a), Value::Float(b)) = (l, r) {
8531            let fast = match op {
8532                ArithOp::Add => Some(Value::Float(a + b)),
8533                ArithOp::Sub => Some(Value::Float(a - b)),
8534                ArithOp::Mul => Some(Value::Float(a * b)),
8535                ArithOp::Div => Some(Value::Float(a / b)),
8536                _ => None,
8537            };
8538            if let Some(v) = fast {
8539                self.set_r(base, inst.a(), v);
8540                return Ok(());
8541            }
8542        }
8543        match self.arith_fast(op, l, r)? {
8544            Some(v) => self.set_r(base, inst.a(), v),
8545            None => {
8546                let mm = self.arith_mm_func(op, l, r)?;
8547                let dst = base + inst.a();
8548                self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8549            }
8550        }
8551        Ok(())
8552    }
8553
8554    /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8555    /// directly, `Ok(None)` when a metamethod is required (the caller decides
8556    /// whether to call it synchronously or yieldably).
8557    fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8558        use ArithOp::*;
8559        match op {
8560            BAnd | BOr | BXor | Shl | Shr => {
8561                // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8562                match (coerce_num(l), coerce_num(r)) {
8563                    (Some(a), Some(b)) => {
8564                        let to_int = |n: Num| match n {
8565                            Num::Int(i) => Some(i),
8566                            Num::Float(f) => crate::runtime::value::f2i_exact(f),
8567                        };
8568                        let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8569                            // PUC luaG_tointerror: name the offending operand
8570                            return Err(self.no_int_rep_err());
8571                        };
8572                        let v = match op {
8573                            BAnd => a & b,
8574                            BOr => a | b,
8575                            BXor => a ^ b,
8576                            Shl => shift_left(a, b),
8577                            Shr => shift_left(a, b.wrapping_neg()),
8578                            _ => unreachable!(),
8579                        };
8580                        return Ok(Some(Value::Int(v)));
8581                    }
8582                    _ => return Ok(None),
8583                }
8584            }
8585            _ => {}
8586        }
8587        let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8588            (Some(a), Some(b)) => (a, b),
8589            _ => return Ok(None),
8590        };
8591        let v = match (op, ln, rn) {
8592            (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8593            (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8594            (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8595            (IDiv, Num::Int(a), Num::Int(b)) => {
8596                if b == 0 {
8597                    return Err(self.rt_err("attempt to divide by zero"));
8598                }
8599                let mut q = a.wrapping_div(b);
8600                if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8601                    q -= 1;
8602                }
8603                Value::Int(q)
8604            }
8605            (Mod, Num::Int(a), Num::Int(b)) => {
8606                if b == 0 {
8607                    return Err(self.rt_err("attempt to perform 'n%0'"));
8608                }
8609                let mut m = a.wrapping_rem(b);
8610                if m != 0 && (m ^ b) < 0 {
8611                    m += b;
8612                }
8613                Value::Int(m)
8614            }
8615            (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8616            (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8617            (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8618            (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8619            (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8620            (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8621            (Mod, a, b) => {
8622                let (x, y) = (a.as_f64(), b.as_f64());
8623                // PUC luai_nummod: correct fmod's sign without the `m*y`
8624                // product, which underflows to 0 for tiny denormals
8625                let mut m = x % y;
8626                if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8627                    m += y;
8628                }
8629                Value::Float(m)
8630            }
8631            _ => unreachable!(),
8632        };
8633        Ok(Some(v))
8634    }
8635
8636    pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8637        match v {
8638            Value::Int(i) => Ok(i),
8639            Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8640                Some(i) => Ok(i),
8641                None => Err(self.rt_err("number has no integer representation")),
8642            },
8643            v => Err(self.type_err(what, v)),
8644        }
8645    }
8646
8647    fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8648        match n {
8649            Num::Int(i) => Ok(i),
8650            Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8651                Some(i) => Ok(i),
8652                None => Err(self.rt_err("number has no integer representation")),
8653            },
8654        }
8655    }
8656
8657    /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8658    /// PUC type error when neither operand provides one.
8659    fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8660        use ArithOp::*;
8661        let event = match op {
8662            Add => Mm::Add,
8663            Sub => Mm::Sub,
8664            Mul => Mm::Mul,
8665            Div => Mm::Div,
8666            Mod => Mm::Mod,
8667            Pow => Mm::Pow,
8668            IDiv => Mm::IDiv,
8669            BAnd => Mm::BAnd,
8670            BOr => Mm::BOr,
8671            BXor => Mm::BXor,
8672            Shl => Mm::Shl,
8673            Shr => Mm::Shr,
8674        };
8675        let mut mm = self.get_mm(l, event);
8676        if mm.is_nil() {
8677            mm = self.get_mm(r, event);
8678        }
8679        if mm.is_nil() {
8680            let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8681                "perform bitwise operation on"
8682            } else {
8683                "perform arithmetic on"
8684            };
8685            let bad = if coerce_num(l).is_none() { l } else { r };
8686            return Err(self.type_err(what, bad));
8687        }
8688        Ok(mm)
8689    }
8690
8691    // ---- comparison ----
8692
8693    pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8694        match self.less_step(l, r, or_eq)? {
8695            MmOut::Done(v) => Ok(v.truthy()),
8696            MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8697            MmOut::CompareSynth { func } => {
8698                // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8699                // by library code (sort comparator etc.) — no yield expected
8700                // here (a yield would have hit `call_noyield`'s C boundary).
8701                Ok(!self.call_mm1(func, &[r, l])?.truthy())
8702            }
8703        }
8704    }
8705
8706    /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8707    /// carries the boolean result; `Mm` (for non-number/string operands) carries
8708    /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8709    /// neither operand provides one.
8710    fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8711        let b = match (l, r) {
8712            (Value::Int(a), Value::Int(b)) => {
8713                if or_eq {
8714                    a <= b
8715                } else {
8716                    a < b
8717                }
8718            }
8719            (Value::Float(a), Value::Float(b)) => {
8720                if or_eq {
8721                    a <= b
8722                } else {
8723                    a < b
8724                }
8725            }
8726            (Value::Int(a), Value::Float(b)) => {
8727                if or_eq {
8728                    int_le_float(a, b)
8729                } else {
8730                    int_lt_float(a, b)
8731                }
8732            }
8733            (Value::Float(a), Value::Int(b)) => {
8734                if a.is_nan() {
8735                    false
8736                } else if or_eq {
8737                    !int_lt_float(b, a)
8738                } else {
8739                    !int_le_float(b, a)
8740                }
8741            }
8742            (Value::Str(a), Value::Str(b)) => {
8743                let (a, b) = (a.as_bytes(), b.as_bytes());
8744                if or_eq { a <= b } else { a < b }
8745            }
8746            (l, r) => {
8747                let event = if or_eq { Mm::Le } else { Mm::Lt };
8748                // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8749                // too: both operands' metatables must expose the same
8750                // implementation for `__lt` / `__le` to fire. events.lua 5.1
8751                // :262 expects `c < d` (where `d` has no metatable) to error
8752                // with the default "attempt to compare two table values"
8753                // rather than running c's `__lt` blindly.
8754                let mm = if self.version() <= LuaVersion::Lua51 {
8755                    self.get_comp_mm(l, r, event)
8756                } else {
8757                    let mut m = self.get_mm(l, event);
8758                    if m.is_nil() {
8759                        m = self.get_mm(r, event);
8760                    }
8761                    m
8762                };
8763                // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8764                // operand carries `__le`. 5.4 dropped the synthesis (now
8765                // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8766                // on the synthesis — its metatable defines only `__lt`.
8767                // The fallback calls `__lt(r, l)` synchronously (the suite's
8768                // `__lt` doesn't yield) and negates the result; the yieldable
8769                // `__lt` path stays reserved for the explicit `<` operator.
8770                if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8771                    let lt = Mm::Lt;
8772                    let mut mm_lt = self.get_mm(l, lt);
8773                    if mm_lt.is_nil() {
8774                        mm_lt = self.get_mm(r, lt);
8775                    }
8776                    if !mm_lt.is_nil() {
8777                        return Ok(MmOut::CompareSynth { func: mm_lt });
8778                    }
8779                }
8780                if mm.is_nil() {
8781                    // PUC luaG_ordererror: "two X values" when the operand
8782                    // types match, "X with Y" otherwise (objtypename-aware).
8783                    let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8784                    return Err(self.rt_err(&if t1 == t2 {
8785                        format!("attempt to compare two {t1} values")
8786                    } else {
8787                        format!("attempt to compare {t1} with {t2}")
8788                    }));
8789                }
8790                return Ok(MmOut::Mm { func: mm, recv: l });
8791            }
8792        };
8793        Ok(MmOut::Done(Value::Bool(b)))
8794    }
8795
8796    // ---- numeric for ----
8797
8798    fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8799        let a = inst.a();
8800        let init = self.r(base, a);
8801        let limit = self.r(base, a + 1);
8802        let step = self.r(base, a + 2);
8803        let (Some(init_n), Some(limit_n), Some(step_n)) =
8804            (as_num(init), as_num(limit), as_num(step))
8805        else {
8806            // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8807            // PUC checks limit, then step, then initial value.
8808            let (what, bad) = if as_num(limit).is_none() {
8809                ("limit", limit)
8810            } else if as_num(step).is_none() {
8811                ("step", step)
8812            } else {
8813                ("initial value", init)
8814            };
8815            let tn = self.obj_typename(bad);
8816            return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8817        };
8818        // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8819        // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8820        // first test, so each successful iteration emits a backward `OP_FORLOOP`
8821        // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8822        // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8823        // distance in luna's encoding is `loop_pc - prep_pc`; firing
8824        // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8825        let pre53 = self.version() <= LuaVersion::Lua53;
8826        match (init_n, step_n) {
8827            (Num::Int(i0), Num::Int(st)) => {
8828                if st == 0 {
8829                    return Err(self.rt_err("'for' step is zero"));
8830                }
8831                if pre53 {
8832                    // PUC 5.3 `forlimit`: int limit passes through; float limit
8833                    // gets clamped to MIN/MAX with a `stopnow` flag set only
8834                    // when the clamp is unreachable (positive float with a
8835                    // negative step → limit=MAX, stopnow; negative float with
8836                    // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8837                    // `init = 0` so OP_FORLOOP's first test against the
8838                    // unreachable clamp fails cleanly. An ordinary in-range
8839                    // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8840                    // lets OP_FORLOOP's natural test reject the first step.
8841                    let (lim, stopnow) = match limit_n {
8842                        Num::Int(l) => (l, false),
8843                        Num::Float(f) => {
8844                            if f.is_nan() {
8845                                (0, true)
8846                            } else if f >= i64::MAX as f64 + 1.0 {
8847                                // beyond +MAX: unreachable for a decreasing loop
8848                                (i64::MAX, st < 0)
8849                            } else if f <= i64::MIN as f64 {
8850                                // beyond -MIN: unreachable for an increasing loop
8851                                (i64::MIN, st >= 0)
8852                            } else if st > 0 {
8853                                (f.floor() as i64, false)
8854                            } else {
8855                                (f.ceil() as i64, false)
8856                            }
8857                        }
8858                    };
8859                    let initv = if stopnow { 0 } else { i0 };
8860                    let pre = initv.wrapping_sub(st);
8861                    self.set_r(base, a, Value::Int(pre));
8862                    self.set_r(base, a + 1, Value::Int(lim));
8863                    self.set_r(base, a + 2, Value::Int(st));
8864                    self.add_pc(inst.bx() as i32 - 1);
8865                    return Ok(());
8866                }
8867                let (lim, empty) = int_for_limit(limit_n, i0, st);
8868                if empty {
8869                    self.add_pc(inst.bx() as i32);
8870                    return Ok(());
8871                }
8872                let count = if st > 0 {
8873                    (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8874                } else {
8875                    (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8876                };
8877                self.set_r(base, a, Value::Int(i0));
8878                self.set_r(base, a + 1, Value::Int(count as i64));
8879                self.set_r(base, a + 2, Value::Int(st));
8880                self.set_r(base, a + 3, Value::Int(i0));
8881            }
8882            _ => {
8883                let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8884                if st == 0.0 {
8885                    return Err(self.rt_err("'for' step is zero"));
8886                }
8887                if pre53 {
8888                    let pre = x0 - st;
8889                    self.set_r(base, a, Value::Float(pre));
8890                    self.set_r(base, a + 1, Value::Float(lim));
8891                    self.set_r(base, a + 2, Value::Float(st));
8892                    self.add_pc(inst.bx() as i32 - 1);
8893                    return Ok(());
8894                }
8895                let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8896                if !runs {
8897                    self.add_pc(inst.bx() as i32);
8898                    return Ok(());
8899                }
8900                self.set_r(base, a, Value::Float(x0));
8901                self.set_r(base, a + 1, Value::Float(lim));
8902                self.set_r(base, a + 2, Value::Float(st));
8903                self.set_r(base, a + 3, Value::Float(x0));
8904            }
8905        }
8906        Ok(())
8907    }
8908
8909    #[inline(always)]
8910    fn for_loop(&mut self, inst: Inst, base: u32) {
8911        let a = inst.a();
8912        // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
8913        // directly (R[a+1] holds the limit, *not* a remaining-count) so the
8914        // first iteration's test fires through the same backward-jump path as
8915        // every later iteration. 5.4+ switched to the count-based form luna
8916        // already uses for `Int`; the float branch was already PUC-3.x-style.
8917        let pre53 = self.version() <= LuaVersion::Lua53;
8918        match self.r(base, a) {
8919            Value::Int(cur) if pre53 => {
8920                let Value::Int(lim) = self.r(base, a + 1) else {
8921                    unreachable!()
8922                };
8923                let Value::Int(st) = self.r(base, a + 2) else {
8924                    unreachable!()
8925                };
8926                let next = cur.wrapping_add(st);
8927                let cont = if st > 0 { next <= lim } else { next >= lim };
8928                if cont {
8929                    self.set_r(base, a, Value::Int(next));
8930                    self.set_r(base, a + 3, Value::Int(next));
8931                    self.add_pc(-(inst.bx() as i32));
8932                }
8933            }
8934            Value::Int(cur) => {
8935                let Value::Int(count) = self.r(base, a + 1) else {
8936                    unreachable!()
8937                };
8938                if count > 0 {
8939                    let Value::Int(st) = self.r(base, a + 2) else {
8940                        unreachable!()
8941                    };
8942                    let next = cur.wrapping_add(st);
8943                    self.set_r(base, a, Value::Int(next));
8944                    self.set_r(base, a + 1, Value::Int(count - 1));
8945                    self.set_r(base, a + 3, Value::Int(next));
8946                    self.add_pc(-(inst.bx() as i32));
8947                }
8948            }
8949            Value::Float(cur) => {
8950                let Value::Float(lim) = self.r(base, a + 1) else {
8951                    unreachable!()
8952                };
8953                let Value::Float(st) = self.r(base, a + 2) else {
8954                    unreachable!()
8955                };
8956                let next = cur + st;
8957                let cont = if st > 0.0 { next <= lim } else { next >= lim };
8958                if cont {
8959                    self.set_r(base, a, Value::Float(next));
8960                    self.set_r(base, a + 3, Value::Float(next));
8961                    self.add_pc(-(inst.bx() as i32));
8962                }
8963            }
8964            _ => unreachable!("corrupt for-loop state"),
8965        }
8966    }
8967
8968    // ---- native helpers (used by builtins) ----
8969
8970    /// A native function's own captured upvalue (self lives at func_slot).
8971    ///
8972    /// Public so `native_typed` trampolines and embedders authoring
8973    /// stateful natives via `native_with(...)` can read their upvals.
8974    pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
8975        let Value::Native(nc) = self.stack[func_slot as usize] else {
8976            unreachable!("native frame without native closure");
8977        };
8978        nc.upvals[i]
8979    }
8980
8981    /// Number of upvalues captured by the native at `func_slot` (variadic
8982    /// captures such as the `io.lines` format list).
8983    pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
8984        let Value::Native(nc) = self.stack[func_slot as usize] else {
8985            unreachable!("native frame without native closure");
8986        };
8987        nc.upvals.len()
8988    }
8989
8990    /// Write a native function's own upvalue (stateful iterators).
8991    pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
8992        let Value::Native(nc) = self.stack[func_slot as usize] else {
8993            unreachable!("native frame without native closure");
8994        };
8995        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8996        unsafe { nc.as_mut() }.upvals[i] = v;
8997        // NativeClosure.upvals is traced as part of its Trace; a long-lived
8998        // stateful iterator closure (e.g. string.gmatch) sees many writes —
8999        // barrier_back once-and-done is cheaper than per-child forward.
9000        self.heap
9001            .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9002    }
9003
9004    /// Read the i-th positional argument inside a `NativeFn` body
9005    /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9006    /// matching PUC's "missing arg is nil" contract. Public so embedders
9007    /// can author their own natives.
9008    pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9009        if i < nargs {
9010            self.stack[(func_slot + 1 + i) as usize]
9011        } else {
9012            Value::Nil
9013        }
9014    }
9015
9016    /// Push the return values of a `NativeFn` and return their count
9017    /// (analogous to pushing N values then `return N` from a C function).
9018    /// Public so embedders can author their own natives.
9019    pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9020        let need = func_slot as usize + vals.len();
9021        if self.stack.len() < need {
9022            self.stack.resize(need, Value::Nil);
9023        }
9024        for (i, &v) in vals.iter().enumerate() {
9025            self.stack[func_slot as usize + i] = v;
9026        }
9027        vals.len() as u32
9028    }
9029
9030    /// Fast string concatenation of an adjacent pair, or `None` when a
9031    /// `__concat` metamethod is required.
9032    fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9033        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9034        // Length-check fast paths for both string operands BEFORE the
9035        // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9036        // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9037        // overflow on the first pair that would exceed `INT_MAX` instead
9038        // of allocating multi-GB intermediates first.
9039        let max_str = i32::MAX as usize;
9040        if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9041            let a_len = ls.as_bytes().len();
9042            let b_len = rs.as_bytes().len();
9043            let new_len = a_len.checked_add(b_len);
9044            if new_len.is_none() || new_len.unwrap() > max_str {
9045                return Err(self.rt_err("string length overflow"));
9046            }
9047        }
9048        match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9049            (Some(a), Some(b)) => {
9050                // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9051                // concat past it raises "string length overflow"
9052                // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9053                // exactly this wall).
9054                let new_len = a.len().checked_add(b.len());
9055                if new_len.is_none() || new_len.unwrap() > max_str {
9056                    return Err(self.rt_err("string length overflow"));
9057                }
9058                let mut combined = a;
9059                combined.extend_from_slice(&b);
9060                Ok(Some(Value::Str(self.heap.intern(&combined))))
9061            }
9062            _ => Ok(None),
9063        }
9064    }
9065
9066    /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9067    /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9068    /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9069    /// call — its `Meta` continuation re-enters here on the metamethod's return.
9070    fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9071        // Sum the lengths of all all-Str operands BEFORE starting the
9072        // right-associative fold so a 129-operand `a..a..…` chain
9073        // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9074        // not after dozens of multi-GB intermediate intern+hash rounds.
9075        // A non-Str operand falls through to the per-pair check.
9076        let max_str = i32::MAX as usize;
9077        let mut total: usize = 0;
9078        let mut all_str = true;
9079        for slot in base_a..self.top {
9080            match self.stack[slot as usize] {
9081                Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9082                    Some(t) if t <= max_str => total = t,
9083                    _ => return Err(self.rt_err("string length overflow")),
9084                },
9085                _ => {
9086                    all_str = false;
9087                    break;
9088                }
9089            }
9090        }
9091        let _ = all_str; // discrimination already captured by early returns above
9092        while self.top.saturating_sub(base_a) >= 2 {
9093            let i = self.top - 1; // rightmost operand
9094            let x = self.stack[(i - 1) as usize];
9095            let y = self.stack[i as usize];
9096            match self.concat_pair(x, y)? {
9097                Some(s) => {
9098                    self.stack[(i - 1) as usize] = s;
9099                    self.top = i; // consumed y
9100                }
9101                None => {
9102                    let mut mm = self.get_mm(x, Mm::Concat);
9103                    if mm.is_nil() {
9104                        mm = self.get_mm(y, Mm::Concat);
9105                    }
9106                    if mm.is_nil() {
9107                        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9108                        let bad = if concat_piece(x, legacy).is_none() {
9109                            x
9110                        } else {
9111                            y
9112                        };
9113                        return Err(self.type_err("concatenate", bad));
9114                    }
9115                    // result lands at i-1, dropping y (top→i); resume continues.
9116                    let dst = i - 1;
9117                    self.begin_meta_call(
9118                        mm,
9119                        &[x, y],
9120                        MetaAction::Concat { dst, base_a },
9121                        "concat",
9122                    )?;
9123                    return Ok(());
9124                }
9125            }
9126        }
9127        self.maybe_collect_garbage(base_a + 1);
9128        Ok(())
9129    }
9130
9131    /// tostring with __tostring / __name support.
9132    pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9133        let mm = self.get_mm(v, Mm::ToString);
9134        if !mm.is_nil() {
9135            return match self.call_mm1(mm, &[v])? {
9136                Value::Str(s) => Ok(s.as_bytes().to_vec()),
9137                _ => Err(self.rt_err("'__tostring' must return a string")),
9138            };
9139        }
9140        if let Value::Table(t) = v
9141            && let Value::Str(name) = self.get_mm(v, Mm::Name)
9142        {
9143            let mut out = name.as_bytes().to_vec();
9144            out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9145            return Ok(out);
9146        }
9147        Ok(self.tostring_basic(v))
9148    }
9149
9150    /// Basic tostring (no metamethods).
9151    pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9152        match v {
9153            Value::Nil => b"nil".to_vec(),
9154            Value::Bool(true) => b"true".to_vec(),
9155            Value::Bool(false) => b"false".to_vec(),
9156            Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9157            // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9158            // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9159            // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9160            // concatenating these renderings.
9161            Value::Float(f) => {
9162                let legacy = self.version <= crate::version::LuaVersion::Lua52;
9163                numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9164            }
9165            Value::Str(s) => s.as_bytes().to_vec(),
9166            Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9167            Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9168            Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9169            Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9170            // PUC names file handles `file (0x…)`; a bare userdata is
9171            // `userdata: 0x…`. The io library overrides this via __tostring.
9172            Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9173            // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9174            // (the "light" qualifier only appears in `luaL_typeerror`).
9175            Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9176        }
9177    }
9178}
9179
9180#[derive(Clone, Copy, PartialEq, Eq)]
9181enum ArithOp {
9182    Add,
9183    Sub,
9184    Mul,
9185    Mod,
9186    Pow,
9187    Div,
9188    IDiv,
9189    BAnd,
9190    BOr,
9191    BXor,
9192    Shl,
9193    Shr,
9194}
9195
9196impl ArithOp {
9197    /// PUC metamethod event name (`__add` → "add" etc.) used by
9198    /// `debug.getinfo(level, "n")` inside a metamethod handler.
9199    fn mm_name(self) -> &'static str {
9200        match self {
9201            ArithOp::Add => "add",
9202            ArithOp::Sub => "sub",
9203            ArithOp::Mul => "mul",
9204            ArithOp::Mod => "mod",
9205            ArithOp::Pow => "pow",
9206            ArithOp::Div => "div",
9207            ArithOp::IDiv => "idiv",
9208            ArithOp::BAnd => "band",
9209            ArithOp::BOr => "bor",
9210            ArithOp::BXor => "bxor",
9211            ArithOp::Shl => "shl",
9212            ArithOp::Shr => "shr",
9213        }
9214    }
9215}
9216
9217fn as_num(v: Value) -> Option<Num> {
9218    match v {
9219        Value::Int(i) => Some(Num::Int(i)),
9220        Value::Float(f) => Some(Num::Float(f)),
9221        // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9222        Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9223        _ => None,
9224    }
9225}
9226
9227/// A concatenable operand's byte form (string, or a number coerced to its
9228/// string), or `None` when only a `__concat` metamethod can handle it.
9229/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9230/// suffix on integer-valued floats) — see `num_to_string_for`.
9231fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9232    match v {
9233        Value::Str(s) => Some(s.as_bytes().to_vec()),
9234        Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9235        Value::Float(x) => {
9236            Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9237        }
9238        _ => None,
9239    }
9240}
9241
9242/// Index into the per-basic-type metatable table for a non-table value
9243/// (None for tables, which carry their own metatable).
9244fn type_mt_slot(v: Value) -> Option<usize> {
9245    match v {
9246        Value::Nil => Some(0),
9247        Value::Bool(_) => Some(1),
9248        Value::Int(_) | Value::Float(_) => Some(2),
9249        Value::Str(_) => Some(3),
9250        Value::Closure(_) | Value::Native(_) => Some(4),
9251        // tables and full userdata carry their own metatable; threads and
9252        // light userdata have none (PUC keeps a shared per-type mt slot for
9253        // light, but luna doesn't expose it — no test gates on it yet).
9254        Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9255    }
9256}
9257
9258/// Number, or string coerced to number (5.5 default string-arith coercion).
9259fn coerce_num(v: Value) -> Option<Num> {
9260    match v {
9261        Value::Int(i) => Some(Num::Int(i)),
9262        Value::Float(f) => Some(Num::Float(f)),
9263        Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9264        _ => None,
9265    }
9266}
9267
9268/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9269/// reverse direction.
9270fn shift_left(a: i64, b: i64) -> i64 {
9271    if b < 0 {
9272        if b <= -64 {
9273            0
9274        } else {
9275            ((a as u64) >> (-b as u32)) as i64
9276        }
9277    } else if b >= 64 {
9278        0
9279    } else {
9280        ((a as u64) << (b as u32)) as i64
9281    }
9282}
9283
9284/// i < f, exactly (PUC LTintfloat shape).
9285fn int_lt_float(i: i64, f: f64) -> bool {
9286    if f.is_nan() {
9287        return false;
9288    }
9289    if f >= 9_223_372_036_854_775_808.0 {
9290        return true;
9291    }
9292    if f < -9_223_372_036_854_775_808.0 {
9293        return false;
9294    }
9295    let ff = f.floor();
9296    let fi = ff as i64;
9297    if f == ff { i < fi } else { i <= fi }
9298}
9299
9300/// i <= f, exactly.
9301fn int_le_float(i: i64, f: f64) -> bool {
9302    if f.is_nan() {
9303        return false;
9304    }
9305    if f >= 9_223_372_036_854_775_808.0 {
9306        return true;
9307    }
9308    if f < -9_223_372_036_854_775_808.0 {
9309        return false;
9310    }
9311    i <= f.floor() as i64
9312}
9313
9314/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9315/// (clipped limit, loop-is-empty).
9316fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9317    match limit {
9318        Num::Int(l) => {
9319            let empty = if step > 0 { init > l } else { init < l };
9320            (l, empty)
9321        }
9322        Num::Float(f) => {
9323            if f.is_nan() {
9324                return (0, true);
9325            }
9326            if step > 0 {
9327                if f >= 9_223_372_036_854_775_808.0 {
9328                    (i64::MAX, false)
9329                } else {
9330                    let l = f.floor();
9331                    if l < -9_223_372_036_854_775_808.0 {
9332                        (i64::MIN, true)
9333                    } else {
9334                        let li = l as i64;
9335                        (li, init > li)
9336                    }
9337                }
9338            } else if f <= -9_223_372_036_854_775_808.0 {
9339                (i64::MIN, false)
9340            } else {
9341                let l = f.ceil();
9342                if l >= 9_223_372_036_854_775_808.0 {
9343                    // PUC forlimit: a positive limit beyond the integer range
9344                    // is unreachable for a decreasing loop — empty.
9345                    (i64::MAX, true)
9346                } else {
9347                    let li = l as i64;
9348                    (li, init < li)
9349                }
9350            }
9351        }
9352    }
9353}
9354
9355/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9356/// `@file` / `=name` markers in `source`).
9357fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9358    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9359    let b = unsafe { crate::runtime::string::bytes_of(p) };
9360    match b.first() {
9361        Some(b'@') | Some(b'=') => &b[1..],
9362        _ => b,
9363    }
9364}
9365
9366impl Vm {
9367    /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9368    /// that called the current native. Returns (closure, current line,
9369    /// extra vararg count).
9370    /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9371    /// function running at `level`, recovered from the caller's call
9372    /// instruction (PUC funcnamefromcode). None for the main chunk or a
9373    /// tail/anonymous call with no recoverable name.
9374    /// A debug-level position: either a real Lua frame (by index) or a synthetic
9375    /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9376    /// coroutine body), which `debug.getinfo` and traceback report as "C".
9377    /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9378    /// frame at `level`'s current pc, as (name, value). Locals are visited in
9379    /// registration order (start pc, then register) to match luaF_getlocalname.
9380    pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9381        if n == 0 {
9382            return None;
9383        }
9384        let fi = match self.dbg_frame(level)? {
9385            DbgKind::Lua(fi) => fi,
9386            // Tail-call placeholder has no real frame backing it — no locals
9387            // exist to read or write here. PUC `findlocal` returns NULL on
9388            // a CIST_TAIL activation.
9389            DbgKind::Tail(_) => return None,
9390            // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9391            // for slot `n` inside the argument window (db.lua :408-:413, and
9392            // the call/return hook reads of math.sin / select args via
9393            // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9394            // meaningful for a C frame here.
9395            DbgKind::C(fi) => {
9396                if n < 1 {
9397                    return None;
9398                }
9399                let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9400                if (n as u32) > nargs {
9401                    return None;
9402                }
9403                let slot = (func_slot + n as u32) as usize;
9404                let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9405                return Some((self.temporary_locvar_name().to_string(), val));
9406            }
9407        };
9408        let f = self.frames[fi].lua()?;
9409        // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9410        // is the first extra arg passed to the function (`...[1]`), `-2` the
9411        // second, etc. The 5.5 stack layout parks varargs in
9412        // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9413        if n < 0 {
9414            let i = (-n) as u32;
9415            if i == 0 || i > f.n_varargs {
9416                return None;
9417            }
9418            let val = self
9419                .stack
9420                .get((f.func_slot + i) as usize)
9421                .copied()
9422                .unwrap_or(Value::Nil);
9423            return Some((self.vararg_locvar_name().to_string(), val));
9424        }
9425        let proto = f.closure.proto;
9426        // PUC's parser injects a hidden `(vararg table)` locvar for an
9427        // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9428        // right after the fixed parameters (`numparams + 1`). Main chunks
9429        // and `(...t)` named-vararg funcs do NOT get one — gate on the
9430        // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9431        // their declared registers (no shadow slot allocated), so we expose
9432        // that hidden index purely in this debug view.
9433        let num_params = proto.num_params as i64;
9434        let vararg_slot = if proto.has_vararg_table_pseudo {
9435            Some(num_params + 1)
9436        } else {
9437            None
9438        };
9439        if vararg_slot == Some(n) {
9440            return Some(("(vararg table)".to_string(), Value::Nil));
9441        }
9442        let pc = (f.pc as usize).saturating_sub(1);
9443        let mut active: Vec<&crate::runtime::LocVar> = proto
9444            .locvars
9445            .iter()
9446            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9447            .collect();
9448        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9449        let mut idx: i64 = n - 1;
9450        if let Some(vs) = vararg_slot
9451            && n > vs
9452        {
9453            idx -= 1;
9454        }
9455        let idx = idx as usize;
9456        if let Some(lv) = active.get(idx) {
9457            let val = self
9458                .stack
9459                .get((f.base + lv.reg) as usize)
9460                .copied()
9461                .unwrap_or(Value::Nil);
9462            return Some((lv.name.to_string(), val));
9463        }
9464        // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9465        // still inside the frame's live register window — report a
9466        // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9467        // the next frame's func slot (`ci->next->func.p`) so the
9468        // temporary window stops where the callee's frame begins
9469        // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9470        // an out-of-range slot).
9471        let limit = self
9472            .frames
9473            .get(fi + 1)
9474            .and_then(|cf| cf.lua())
9475            .map(|nf| nf.func_slot)
9476            .unwrap_or_else(|| self.top.max(f.base));
9477        let temp_reg = idx as u32;
9478        if f.base + temp_reg < limit {
9479            let val = self
9480                .stack
9481                .get((f.base + temp_reg) as usize)
9482                .copied()
9483                .unwrap_or(Value::Nil);
9484            return Some((self.lua_temporary_locvar_name().to_string(), val));
9485        }
9486        None
9487    }
9488
9489    /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9490    /// the local / vararg name on success, `None` when the slot does not
9491    /// resolve. Mirrors `local_at`'s indexing exactly.
9492    pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9493        if n == 0 {
9494            return None;
9495        }
9496        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9497            return None;
9498        };
9499        let f = self.frames[fi].lua()?;
9500        if n < 0 {
9501            let i = (-n) as u32;
9502            if i == 0 || i > f.n_varargs {
9503                return None;
9504            }
9505            let slot = (f.func_slot + i) as usize;
9506            if let Some(s) = self.stack.get_mut(slot) {
9507                *s = v;
9508            }
9509            return Some(self.vararg_locvar_name().to_string());
9510        }
9511        let proto = f.closure.proto;
9512        let num_params = proto.num_params as i64;
9513        let vararg_slot = if proto.has_vararg_table_pseudo {
9514            Some(num_params + 1)
9515        } else {
9516            None
9517        };
9518        if vararg_slot == Some(n) {
9519            // hidden (vararg table) slot has no real storage — accept the
9520            // write as a no-op for PUC parity (db.lua doesn't write to it).
9521            return Some("(vararg table)".to_string());
9522        }
9523        let pc = (f.pc as usize).saturating_sub(1);
9524        let mut active: Vec<&crate::runtime::LocVar> = proto
9525            .locvars
9526            .iter()
9527            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9528            .collect();
9529        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9530        let mut idx: i64 = n - 1;
9531        if let Some(vs) = vararg_slot
9532            && n > vs
9533        {
9534            idx -= 1;
9535        }
9536        let idx = idx as usize;
9537        let (name, reg) = if let Some(lv) = active.get(idx) {
9538            (lv.name.to_string(), lv.reg)
9539        } else {
9540            // PUC `luaG_findlocal` fallback into the temporary window —
9541            // bounded by the next frame's func slot (see local_at).
9542            let limit = self
9543                .frames
9544                .get(fi + 1)
9545                .and_then(|cf| cf.lua())
9546                .map(|nf| nf.func_slot)
9547                .unwrap_or_else(|| self.top.max(f.base));
9548            let temp_reg = idx as u32;
9549            if f.base + temp_reg >= limit {
9550                return None;
9551            }
9552            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9553        };
9554        let slot = (f.base + reg) as usize;
9555        if let Some(s) = self.stack.get_mut(slot) {
9556            *s = v;
9557        }
9558        Some(name)
9559    }
9560
9561    /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9562    /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9563    /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9564    /// vararg indexing past `n_varargs`, or for a register past the live
9565    /// window. Naming follows the same priority as `local_at`: named locals,
9566    /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9567    /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9568    /// window.
9569    pub(crate) fn local_at_coro(
9570        &self,
9571        co: Gc<crate::runtime::Coro>,
9572        level: i64,
9573        n: i64,
9574    ) -> Option<(String, Value)> {
9575        if level < 1 || n == 0 {
9576            return None;
9577        }
9578        let frames = &co.frames;
9579        // Logical level: iterate Lua frames from the top.
9580        let lua_indices: Vec<usize> = (0..frames.len())
9581            .rev()
9582            .filter(|&i| frames[i].lua().is_some())
9583            .collect();
9584        let fi = *lua_indices.get((level - 1) as usize)?;
9585        let f = frames[fi].lua()?;
9586        if n < 0 {
9587            let i = (-n) as u32;
9588            if i == 0 || i > f.n_varargs {
9589                return None;
9590            }
9591            let val = co
9592                .stack
9593                .get((f.func_slot + i) as usize)
9594                .copied()
9595                .unwrap_or(Value::Nil);
9596            return Some((self.vararg_locvar_name().to_string(), val));
9597        }
9598        let proto = f.closure.proto;
9599        let num_params = proto.num_params as i64;
9600        let vararg_slot = if proto.has_vararg_table_pseudo {
9601            Some(num_params + 1)
9602        } else {
9603            None
9604        };
9605        if vararg_slot == Some(n) {
9606            return Some(("(vararg table)".to_string(), Value::Nil));
9607        }
9608        let pc = (f.pc as usize).saturating_sub(1);
9609        let mut active: Vec<&crate::runtime::LocVar> = proto
9610            .locvars
9611            .iter()
9612            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9613            .collect();
9614        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9615        let mut idx: i64 = n - 1;
9616        if let Some(vs) = vararg_slot
9617            && n > vs
9618        {
9619            idx -= 1;
9620        }
9621        let idx = idx as usize;
9622        if let Some(lv) = active.get(idx) {
9623            let val = co
9624                .stack
9625                .get((f.base + lv.reg) as usize)
9626                .copied()
9627                .unwrap_or(Value::Nil);
9628            return Some((lv.name.to_string(), val));
9629        }
9630        let limit = frames
9631            .get(fi + 1)
9632            .and_then(|cf| cf.lua())
9633            .map(|nf| nf.func_slot)
9634            .unwrap_or(co.top.max(f.base));
9635        let temp_reg = idx as u32;
9636        if f.base + temp_reg < limit {
9637            let val = co
9638                .stack
9639                .get((f.base + temp_reg) as usize)
9640                .copied()
9641                .unwrap_or(Value::Nil);
9642            return Some((self.lua_temporary_locvar_name().to_string(), val));
9643        }
9644        None
9645    }
9646
9647    /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9648    /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9649    pub(crate) fn local_set_coro(
9650        &mut self,
9651        co: Gc<crate::runtime::Coro>,
9652        level: i64,
9653        n: i64,
9654        v: Value,
9655    ) -> Option<String> {
9656        if level < 1 || n == 0 {
9657            return None;
9658        }
9659        let lua_indices: Vec<usize> = (0..co.frames.len())
9660            .rev()
9661            .filter(|&i| co.frames[i].lua().is_some())
9662            .collect();
9663        let fi = *lua_indices.get((level - 1) as usize)?;
9664        let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9665            let f = co.frames[fi].lua()?;
9666            (
9667                f.func_slot,
9668                f.n_varargs,
9669                f.base,
9670                f.closure.proto,
9671                co.top.max(f.base),
9672                co.frames
9673                    .get(fi + 1)
9674                    .and_then(|cf| cf.lua())
9675                    .map(|nf| nf.func_slot),
9676            )
9677        };
9678        if n < 0 {
9679            let i = (-n) as u32;
9680            if i == 0 || i > n_varargs {
9681                return None;
9682            }
9683            let slot = (func_slot + i) as usize;
9684            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9685            let stack = unsafe { &mut co.as_mut().stack };
9686            if let Some(s) = stack.get_mut(slot) {
9687                *s = v;
9688            }
9689            // co.stack values are traced — once-per-call barrier so propagate
9690            // sees the new value if co was already BLACK this cycle.
9691            self.heap
9692                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9693            return Some(self.vararg_locvar_name().to_string());
9694        }
9695        let num_params = proto.num_params as i64;
9696        let vararg_slot = if proto.has_vararg_table_pseudo {
9697            Some(num_params + 1)
9698        } else {
9699            None
9700        };
9701        if vararg_slot == Some(n) {
9702            return Some("(vararg table)".to_string());
9703        }
9704        let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9705        let mut active: Vec<&crate::runtime::LocVar> = proto
9706            .locvars
9707            .iter()
9708            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9709            .collect();
9710        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9711        let mut idx: i64 = n - 1;
9712        if let Some(vs) = vararg_slot
9713            && n > vs
9714        {
9715            idx -= 1;
9716        }
9717        let idx = idx as usize;
9718        let (name, reg) = if let Some(lv) = active.get(idx) {
9719            (lv.name.to_string(), lv.reg)
9720        } else {
9721            let limit = next_func_slot.unwrap_or(top_for_temp);
9722            let temp_reg = idx as u32;
9723            if base + temp_reg >= limit {
9724                return None;
9725            }
9726            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9727        };
9728        let slot = (base + reg) as usize;
9729        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9730        let stack = unsafe { &mut co.as_mut().stack };
9731        if let Some(s) = stack.get_mut(slot) {
9732            *s = v;
9733        }
9734        // co.stack values are traced — once-per-call barrier so propagate
9735        // sees the new value if co was already BLACK this cycle.
9736        self.heap
9737            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9738        Some(name)
9739    }
9740
9741    /// Frame info for a level on a suspended coroutine (PUC
9742    /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9743    /// Returns the closure + currentline + extraargs + istailcall for the
9744    /// level-th Lua activation in `co.frames`. None if level overshoots.
9745    pub(crate) fn coro_frame_info(
9746        &self,
9747        co: Gc<crate::runtime::Coro>,
9748        level: i64,
9749    ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9750        if level < 1 {
9751            return None;
9752        }
9753        let lua_indices: Vec<usize> = (0..co.frames.len())
9754            .rev()
9755            .filter(|&i| co.frames[i].lua().is_some())
9756            .collect();
9757        let fi = *lua_indices.get((level - 1) as usize)?;
9758        let f = co.frames[fi].lua()?;
9759        let proto = f.closure.proto;
9760        let pc = (f.pc as usize)
9761            .saturating_sub(1)
9762            .min(proto.lines.len().saturating_sub(1));
9763        let line = proto.lines.get(pc).copied().unwrap_or(0);
9764        Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9765    }
9766
9767    /// Whether `level` resolves to any live activation (PUC lua_getstack).
9768    pub(crate) fn level_in_range(&self, level: i64) -> bool {
9769        self.dbg_frame(level).is_some()
9770    }
9771
9772    /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9773    /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9774    /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9775    /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9776    pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9777        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9778            "(*vararg)"
9779        } else {
9780            "(vararg)"
9781        }
9782    }
9783
9784    /// PUC's debug-API placeholder for an unnamed temporary on a C
9785    /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9786    /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9787    /// their spelling.
9788    pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9789        if matches!(
9790            self.version,
9791            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9792        ) {
9793            // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9794            // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9795            // to `(C temporary)`.
9796            "(*temporary)"
9797        } else {
9798            "(C temporary)"
9799        }
9800    }
9801
9802    /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9803    /// (an arithmetic intermediate sitting past the last named local on a
9804    /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9805    /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9806    /// spelling.
9807    pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9808        if matches!(
9809            self.version,
9810            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9811        ) {
9812            "(*temporary)"
9813        } else {
9814            "(temporary)"
9815        }
9816    }
9817
9818    /// The Lua closure running at `level` on the current thread, or `None`
9819    /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9820    /// need this to reach the function whose env they read or rewrite.
9821    pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9822        // `DbgKind::Tail` also falls into the else branch — a tail-call
9823        // placeholder has no closure of its own, so PUC's `lua_getstack` +
9824        // `getfunc` for that level returns no function, and `getfenv(level)`
9825        // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9826        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9827            return None;
9828        };
9829        Some(self.frames[fi].lua()?.closure)
9830    }
9831
9832    pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9833        if level < 1 {
9834            return false;
9835        }
9836        let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9837        (level as usize) <= count
9838    }
9839
9840    pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9841        if level < 1 {
9842            return None;
9843        }
9844        // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9845        // activation counts as a level, and each Lua activation's
9846        // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9847        // dropped the synthetic shape: `istailcall` becomes a flag on the
9848        // real frame and Cont activations no longer count separately.
9849        // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9850        // `getinfo(2).func == g1` pins the 5.2+ shape.
9851        let v51 = self.version <= LuaVersion::Lua51;
9852        let mut lvl = level;
9853        for fi in (0..self.frames.len()).rev() {
9854            match &self.frames[fi] {
9855                CallFrame::Lua(f) => {
9856                    lvl -= 1;
9857                    if lvl == 0 {
9858                        return Some(DbgKind::Lua(fi));
9859                    }
9860                    if v51 {
9861                        // 5.1 reports one synthetic CIST_TAIL level per
9862                        // collapsed tail call (PUC `lua_getstack` subtracts
9863                        // `ci->u.l.tailcalls` from the remaining level).
9864                        for _ in 0..f.tailcalls {
9865                            lvl -= 1;
9866                            if lvl == 0 {
9867                                return Some(DbgKind::Tail(fi));
9868                            }
9869                        }
9870                    }
9871                    if f.from_c {
9872                        lvl -= 1;
9873                        if lvl == 0 {
9874                            return Some(DbgKind::C(fi));
9875                        }
9876                    }
9877                }
9878                CallFrame::Cont(_) => {
9879                    if !v51 {
9880                        continue;
9881                    }
9882                    lvl -= 1;
9883                    if lvl == 0 {
9884                        let parent = (0..fi)
9885                            .rev()
9886                            .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9887                        return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9888                    }
9889                }
9890            }
9891        }
9892        None
9893    }
9894
9895    pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9896        let f = self.frames[fi].lua()?;
9897        // metamethod handler frames carry the event tag (e.g. "close" for
9898        // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9899        if f.is_hook {
9900            return Some(("hook", "?".to_string()));
9901        }
9902        if let Some(tm) = f.tm {
9903            return Some(("metamethod", tm_debug_name(self.version, tm)));
9904        }
9905        // a frame entered across a C boundary has no naming call instruction
9906        if fi == 0 || f.from_c {
9907            return None;
9908        }
9909        // the caller's call instruction names this frame; a continuation frame
9910        // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
9911        // already short-circuits those.
9912        let caller = self.frames[fi - 1].lua()?;
9913        let caller_proto = caller.closure.proto;
9914        let p: &crate::runtime::Proto = &caller_proto;
9915        let call_pc = (caller.pc as usize).checked_sub(1)?;
9916        let instr = *p.code.get(call_pc)?;
9917        match instr.op() {
9918            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9919            Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9920            _ => None,
9921        }
9922    }
9923
9924    /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
9925    /// (PUC names a C function from the call instruction that invoked it). The
9926    /// native was called by the nearest Lua frame below `fi` (skipping pcall/
9927    /// xpcall continuations); that frame's call instruction names it.
9928    pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9929        // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
9930        // the synthetic C edge between the __gc finalizer (top Lua frame, has
9931        // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
9932        // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
9933        // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
9934        // sets the tag on the handler frame only, so level 2 there still
9935        // names the calling Lua frame's call instruction (5.5 locals.lua
9936        // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
9937        if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
9938            && fr.tm == Some("gc")
9939        {
9940            let name = tm_debug_name(self.version, "gc");
9941            return Some(("metamethod", name));
9942        }
9943        let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
9944        let caller = self.frames[caller_fi].lua()?;
9945        let p = &caller.closure.proto;
9946        let call_pc = (caller.pc as usize).checked_sub(1)?;
9947        let instr = *p.code.get(call_pc)?;
9948        match instr.op() {
9949            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9950            _ => None,
9951        }
9952    }
9953
9954    /// Native value currently sitting on the synthetic C edge identified by
9955    /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
9956    /// above `fi` (each one corresponds to one native pushing the hook) and
9957    /// indexes into `running_natives` from the top, also skipping the caller
9958    /// of `getinfo` itself (the native that is currently asking).
9959    /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
9960    /// expects the just-entered C function.
9961    pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
9962        let idx = self.c_frame_native_idx(fi)?;
9963        Some(Value::Native(self.running_natives[idx]))
9964    }
9965
9966    /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
9967    /// so `local_at` can index the native's argument window like PUC's
9968    /// `(C temporary)` path. Returns `None` when no matching native exists
9969    /// (e.g. the C edge corresponds to a non-native boundary).
9970    pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
9971        let idx = self.c_frame_native_idx(fi)?;
9972        self.running_native_slots.get(idx).copied()
9973    }
9974
9975    fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
9976        let n_above = self.frames[fi..]
9977            .iter()
9978            .filter_map(CallFrame::lua)
9979            .filter(|f| f.from_c)
9980            .count();
9981        if n_above == 0 {
9982            return None;
9983        }
9984        // running_natives.last() is the native currently executing (the one
9985        // that called getinfo). Pop it conceptually, then take the n_above-th
9986        // entry from the top of what remains.
9987        let nr = self.running_natives.len().checked_sub(1)?;
9988        nr.checked_sub(n_above)
9989    }
9990
9991    /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
9992    /// native whose function pointer matches `target`, and return its qualified
9993    /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
9994    /// `None` if no match is found. Used by `arg_error` when the running native
9995    /// was invoked from another native (PUC `ar.name == NULL` at level 0).
9996    pub(crate) fn pushglobalfuncname(
9997        &mut self,
9998        target: crate::runtime::value::NativeFn,
9999    ) -> Option<String> {
10000        let pkg_k = Value::Str(self.heap.intern(b"package"));
10001        let pkg = match self.globals().get(pkg_k) {
10002            Value::Table(t) => t,
10003            _ => return None,
10004        };
10005        let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10006        let loaded = match pkg.get(loaded_k) {
10007            Value::Table(t) => t,
10008            _ => return None,
10009        };
10010        let matches = |v: Value| -> bool {
10011            matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10012        };
10013        let mut k = Value::Nil;
10014        while let Ok(Some((nk, nv))) = loaded.next(k) {
10015            k = nk;
10016            let Value::Str(outer) = nk else { continue };
10017            let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10018            if matches(nv) {
10019                return Some(if outer == "_G" { String::new() } else { outer });
10020            }
10021            if let Value::Table(inner_t) = nv {
10022                let mut k2 = Value::Nil;
10023                while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10024                    k2 = nk2;
10025                    if matches(nv2)
10026                        && let Value::Str(inner) = nk2
10027                    {
10028                        let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10029                        return Some(if outer == "_G" {
10030                            inner
10031                        } else {
10032                            format!("{outer}.{inner}")
10033                        });
10034                    }
10035                }
10036            }
10037        }
10038        None
10039    }
10040
10041    /// Name and namewhat of the native currently running on behalf of the top
10042    /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10043    /// `luaL_argerror` rewrite a method call's self-argument error.
10044    pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10045        let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10046        let p = &caller.closure.proto;
10047        let call_pc = (caller.pc as usize).checked_sub(1)?;
10048        let instr = *p.code.get(call_pc)?;
10049        match instr.op() {
10050            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10051            _ => None,
10052        }
10053    }
10054
10055    pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10056        let f = self.frames[fi].lua().expect("Lua frame");
10057        let proto = f.closure.proto;
10058        let pc = (f.pc as usize)
10059            .saturating_sub(1)
10060            .min(proto.lines.len().saturating_sub(1));
10061        let line = proto.lines.get(pc).copied().unwrap_or(0);
10062        // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10063        // (independent of any later write to a materialized vararg table's `n`).
10064        // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10065        // any nonzero `tailcalls` count flips it true.
10066        (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10067    }
10068
10069    /// Read an upvalue cell of a closure (debug.getupvalue).
10070    pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10071        match cl.upvals()[idx].state() {
10072            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10073            UpvalState::Closed(v) => v,
10074        }
10075    }
10076
10077    /// Write an upvalue cell of a closure (debug.setupvalue).
10078    pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10079        let uv = cl.upvals()[idx];
10080        match uv.state() {
10081            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10082            UpvalState::Closed(_) => {
10083                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10084                unsafe { uv.as_mut() }.set_closed(v);
10085                self.heap
10086                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10087            }
10088        }
10089    }
10090
10091    /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10092    /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10093    /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10094    /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10095    /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10096    /// else `"function <src:line_defined>"` for an anonymous Lua function.
10097    /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10098    /// Walks the coroutine's saved frames and prepends a synthetic C-level
10099    /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10100    /// (its `resume_at` marker is set). `level` skips entries from the top
10101    /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10102    /// frame; etc.). db.lua :764-:768 sample several levels.
10103    pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10104        use crate::runtime::CoroStatus;
10105        const LEVELS1: usize = 10;
10106        const LEVELS2: usize = 11;
10107        #[derive(Clone, Copy)]
10108        enum VFrame<'a> {
10109            Lua(&'a crate::runtime::function::Frame),
10110            CPcall,
10111            CXpcall,
10112            CYield,
10113            /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10114            /// call collapsed into the next Lua frame down the chain.
10115            Tail,
10116        }
10117        let v51 = self.version <= LuaVersion::Lua51;
10118        let mut visible: Vec<VFrame<'_>> = Vec::new();
10119        // PUC's level 0 entry on a suspended coroutine is the C call where it
10120        // paused — `coroutine.yield` for a yielded thread.
10121        if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10122            visible.push(VFrame::CYield);
10123        }
10124        for cf in co.frames.iter().rev() {
10125            match cf {
10126                CallFrame::Lua(f) => {
10127                    visible.push(VFrame::Lua(f));
10128                    if v51 {
10129                        for _ in 0..f.tailcalls {
10130                            visible.push(VFrame::Tail);
10131                        }
10132                    }
10133                }
10134                CallFrame::Cont(nc) => match nc.kind {
10135                    ContKind::Pcall => visible.push(VFrame::CPcall),
10136                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10137                    _ => {}
10138                },
10139            }
10140        }
10141        if level < 0 {
10142            level = 0;
10143        }
10144        if (level as usize) >= visible.len() {
10145            return Vec::new();
10146        }
10147        let visible = &visible[level as usize..];
10148        let total = visible.len();
10149        let mut out = Vec::new();
10150        // To name a Lua frame, PUC consults the caller's OP_CALL via
10151        // getobjname: find the index `fi` of the current frame in co.frames,
10152        // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10153        let coro_frame_name = |frames: &[CallFrame],
10154                               target: &crate::runtime::function::Frame|
10155         -> Option<(&'static str, String)> {
10156            let fi = frames
10157                .iter()
10158                .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10159            if fi == 0 || target.from_c {
10160                return None;
10161            }
10162            let caller = frames[fi - 1].lua()?;
10163            let p = &caller.closure.proto;
10164            let call_pc = (caller.pc as usize).checked_sub(1)?;
10165            let instr = *p.code.get(call_pc)?;
10166            match instr.op() {
10167                Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10168                Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10169                _ => None,
10170            }
10171        };
10172        let frames = &co.frames;
10173        let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10174            VFrame::Lua(f) => {
10175                let proto = f.closure.proto;
10176                let src = chunk_display_name(proto.source.as_ptr());
10177                let pc = (f.pc as usize)
10178                    .saturating_sub(1)
10179                    .min(proto.lines.len().saturating_sub(1));
10180                let line = proto.lines.get(pc).copied().unwrap_or(0);
10181                out.extend_from_slice(b"\n\t");
10182                out.extend_from_slice(src);
10183                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10184                if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10185                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10186                } else if proto.line_defined == 0 {
10187                    out.extend_from_slice(b"main chunk");
10188                } else {
10189                    out.extend_from_slice(
10190                        format!(
10191                            "function <{}:{}>",
10192                            String::from_utf8_lossy(src),
10193                            proto.line_defined
10194                        )
10195                        .as_bytes(),
10196                    );
10197                }
10198            }
10199            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10200            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10201            VFrame::CYield => {
10202                // PUC `pushglobalfuncname` reports `yield` as
10203                // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10204                // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10205                // `'yield'` (5.5 :841).
10206                let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10207                if qualified {
10208                    out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10209                } else {
10210                    out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10211                }
10212            }
10213            VFrame::Tail => {
10214                // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10215                // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10216                // :403 asserts these appear once per collapsed tail call.
10217                out.extend_from_slice(b"\n\t(...tail calls...)");
10218            }
10219        };
10220        if total <= LEVELS1 + LEVELS2 {
10221            for &v in visible {
10222                emit(&mut out, v);
10223            }
10224        } else {
10225            for &v in &visible[..LEVELS1] {
10226                emit(&mut out, v);
10227            }
10228            let skip = total - LEVELS1 - LEVELS2;
10229            out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10230            for &v in &visible[total - LEVELS2..] {
10231                emit(&mut out, v);
10232            }
10233        }
10234        out
10235    }
10236
10237    pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10238        // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10239        // (11) bottom frames; if there are more, the middle is collapsed into
10240        // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10241        // overflow traceback would balloon to tens of megabytes (errors.lua's
10242        // stack-overflow test ran string.gmatch over the resulting buffer).
10243        const LEVELS1: usize = 10;
10244        const LEVELS2: usize = 11;
10245        // Collect visible frames in top-down order (deepest first). Both Lua
10246        // activations and pcall/xpcall continuations (which stand in for a
10247        // C-level pcall on the stack) are visible; PUC's traceback enumerates
10248        // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10249        #[derive(Clone, Copy)]
10250        enum VFrame {
10251            Lua(usize),
10252            CPcall,
10253            CXpcall,
10254        }
10255        let mut visible: Vec<VFrame> = Vec::new();
10256        for (fi, cf) in self.frames.iter().enumerate().rev() {
10257            match cf {
10258                CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10259                CallFrame::Cont(nc) => match nc.kind {
10260                    ContKind::Pcall => visible.push(VFrame::CPcall),
10261                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10262                    _ => {}
10263                },
10264            }
10265        }
10266        // PUC `luaL_traceback` starts enumerating at the given `level` (in
10267        // terms of L1's CallInfo chain). For the running-thread case the C
10268        // frame for debug.traceback itself is level 0 and luna's `visible`
10269        // doesn't include it — so level=1 (PUC default) means "emit from the
10270        // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10271        // the top. level<=0 emits nothing extra here (d_traceback handles the
10272        // "[C]: in function 'traceback'" prefix for level==0 separately).
10273        let skip = (level - 1).max(0) as usize;
10274        if skip >= visible.len() {
10275            return Vec::new();
10276        }
10277        let visible = &visible[skip..];
10278        let total = visible.len();
10279        let mut out = Vec::new();
10280        let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10281            VFrame::Lua(fi) => {
10282                let f = this.frames[fi].lua().expect("Lua frame");
10283                let proto = f.closure.proto;
10284                let src = chunk_display_name(proto.source.as_ptr());
10285                let pc = (f.pc as usize)
10286                    .saturating_sub(1)
10287                    .min(proto.lines.len().saturating_sub(1));
10288                let line = proto.lines.get(pc).copied().unwrap_or(0);
10289                out.extend_from_slice(b"\n\t");
10290                out.extend_from_slice(src);
10291                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10292                if let Some((namewhat, name)) = this.frame_name(fi) {
10293                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10294                } else if proto.line_defined == 0 {
10295                    out.extend_from_slice(b"main chunk");
10296                } else {
10297                    out.extend_from_slice(
10298                        format!(
10299                            "function <{}:{}>",
10300                            String::from_utf8_lossy(src),
10301                            proto.line_defined
10302                        )
10303                        .as_bytes(),
10304                    );
10305                }
10306            }
10307            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10308            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10309        };
10310        if total <= LEVELS1 + LEVELS2 {
10311            for &v in visible {
10312                emit_frame(&mut out, v, self);
10313            }
10314        } else {
10315            for &v in &visible[..LEVELS1] {
10316                emit_frame(&mut out, v, self);
10317            }
10318            let dropped = total - LEVELS1 - LEVELS2;
10319            out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10320            for &v in &visible[total - LEVELS2..] {
10321                emit_frame(&mut out, v, self);
10322            }
10323        }
10324        out
10325    }
10326}
10327
10328// ────────────────────────────────────────────────────────────────────
10329// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10330//
10331// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10332// trace-meta section after `vm.load`, resolves each entry's
10333// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10334// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10335// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10336// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10337// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10338// mcode without further plumbing — same code path the runtime JIT
10339// uses.
10340//
10341// Why a separate impl block: keeps the AOT API surface (one fn) easy
10342// to locate when grep'ing for `install_aot_trace`, without dragging
10343// the 8500-line `impl Vm` block above.
10344// ────────────────────────────────────────────────────────────────────
10345
10346impl Vm {
10347    /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10348    /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10349    /// fires it at the trace's `head_pc`. This is the runtime install
10350    /// API the deploy-side `luna-runtime-helpers` resolver calls once
10351    /// per AOT-emitted trace meta entry, after looking up `proto` by
10352    /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10353    ///
10354    /// # What this does
10355    ///
10356    /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10357    /// The trace's `entry` fn ptr must already point at runnable
10358    /// machine code (the AOT linker resolved the symbol at link time;
10359    /// the deploy resolver passes the address verbatim).
10360    ///
10361    /// # What this does NOT do
10362    ///
10363    /// - **No deduplication.** Calling twice with the same `head_pc`
10364    ///   pushes two entries; the dispatcher's `find` will pick the
10365    ///   first match. The deploy resolver is responsible for not
10366    ///   double-installing.
10367    /// - **No invalidation of the runtime JIT cache.** If the runtime
10368    ///   JIT later records + compiles a trace for the same
10369    ///   `(proto, head_pc)`, both coexist on `proto.traces` and the
10370    ///   dispatcher's `find` picks whichever appears first. AOT
10371    ///   traces install before any runtime recording is possible
10372    ///   (resolver runs before `vm.load` returns its first closure),
10373    ///   so AOT traces win the race for the same site.
10374    /// - **No coverage gating.** AOT traces are trusted by
10375    ///   construction — they were validated at compile time. Setting
10376    ///   `dispatchable: false` on the input would silently disable
10377    ///   dispatch; the caller controls that flag.
10378    ///
10379    /// # Safety / soundness
10380    ///
10381    /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10382    /// machine code). Soundness contract:
10383    ///
10384    /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10385    ///   In the AOT-binary deploy shape this is trivially satisfied —
10386    ///   the fn lives in the binary's `.text`.
10387    /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10388    ///   what the trace's IR actually compiled against; the dispatcher
10389    ///   uses them to marshal `reg_state` in and out without further
10390    ///   validation. A mismatch corrupts vm.stack.
10391    ///
10392    /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10393    /// invariants hold; this fn is a plain push — no validation that
10394    /// would slow the dispatcher's hot path either.
10395    pub fn install_aot_trace(
10396        &mut self,
10397        proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10398        trace: crate::jit::trace::CompiledTrace,
10399    ) {
10400        let _ = self; // resolver passes &mut Vm for symmetry with future
10401        // pending-install + hash-walk variants; nothing on `self` to
10402        // mutate today because the install target lives on the Proto.
10403        proto.traces.borrow_mut().push(TArc::new(trace));
10404    }
10405
10406    /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10407    /// reachable from `root` and return `(proto, stable_hash)` pairs
10408    /// for every Proto found. Used by the deploy-side resolver to
10409    /// match AOT-emitted `proto_hash` keys against the freshly
10410    /// `undump`'d chunk's protos.
10411    ///
10412    /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10413    /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10414    /// multiple nested closures (rare; the cache field would catch
10415    /// the closure-side dedup, not the Proto side) is reported once.
10416    ///
10417    /// # Why on `&Vm` and not a free fn
10418    ///
10419    /// Keeps the AOT install API discoverable on the Vm surface —
10420    /// `vm.collect_proto_hashes(root)` reads naturally next to
10421    /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10422    /// any Vm field, so `&self` (read-only) is enough.
10423    pub fn collect_proto_hashes(
10424        &self,
10425        root: crate::runtime::Gc<crate::runtime::function::Proto>,
10426    ) -> Vec<(
10427        crate::runtime::Gc<crate::runtime::function::Proto>,
10428        [u8; 16],
10429    )> {
10430        let _ = self;
10431        let mut out = Vec::new();
10432        let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10433            std::collections::HashSet::new();
10434        let mut queue: std::collections::VecDeque<
10435            crate::runtime::Gc<crate::runtime::function::Proto>,
10436        > = std::collections::VecDeque::new();
10437        queue.push_back(root);
10438        while let Some(p) = queue.pop_front() {
10439            let key = p.as_ptr() as *const _;
10440            if !seen.insert(key) {
10441                continue;
10442            }
10443            out.push((p, p.stable_hash()));
10444            for &child in p.protos.iter() {
10445                queue.push_back(child);
10446            }
10447        }
10448        out
10449    }
10450}