Skip to main content

luna_core/vm/
exec.rs

1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16    AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17    MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48    /// The GC heap owned by this VM. Embedders normally interact via the
49    /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50    /// the heap directly.
51    pub heap: Heap,
52    stack: Vec<Value>,
53    frames: Vec<CallFrame>,
54    /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55    /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56    /// helpers (debug-asserted on use). NOT consumed by readers yet;
57    /// week 1 is pure scaffold. Week 2-N migrations replace readers
58    /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59    /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60    frames_top: u32,
61    /// open upvalues, sorted ascending by stack slot
62    open_upvals: Vec<(u32, Gc<Upvalue>)>,
63    /// to-be-closed slots, ascending
64    tbc: Vec<u32>,
65    /// logical stack top for multi-result sequences
66    pub(crate) top: u32,
67    globals: Gc<Table>,
68    /// shared metatable for all strings (populated by the string lib, P04)
69    /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70    /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71    /// own. Settable via debug.setmetatable.
72    type_mt: [Option<Gc<Table>>; 5],
73    /// pre-interned metamethod event names, indexed by `Mm`
74    mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75    /// native↔Lua nesting depth (PUC C-stack guard analogue)
76    c_depth: u32,
77    /// number of live pcall/xpcall continuation frames on the running thread
78    /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79    /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80    /// with the coroutine context, since continuations survive a yield.
81    pcall_depth: u32,
82    /// number of non-yieldable C calls in flight on the running thread (PUC's
83    /// `L->nny`). A library callback that runs via synchronous Rust recursion
84    /// (sort comparator, gsub replacement) cannot be continued across a yield,
85    /// so it bumps this for its duration; `coroutine.yield` inside hits the
86    /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87    /// never cross such a call), so it needs no per-thread save/restore.
88    nny: u32,
89    /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90    /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91    /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92    /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93    /// loop)` then matches. PUC tracks this via the soft-cap window
94    /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95    /// scope explicitly.
96    msgh_depth: u32,
97    /// set by a coroutine closing itself (`coroutine.close()` on the running
98    /// thread): the to-be-closed handlers have already run; the thread must now
99    /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100    /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101    /// unwind, so a protecting pcall cannot catch it) the termination.
102    terminating: Option<Option<Value>>,
103    /// xoshiro256** state (math.random)
104    rng: [u64; 4],
105    /// VM creation time (os.clock)
106    started: std::time::Instant,
107    version: LuaVersion,
108    /// error object being threaded through a chain of __close handlers; a GC
109    /// root for the duration (a handler may trigger collection)
110    closing_err: Option<Value>,
111    /// the coroutine whose context is currently live in the fields above;
112    /// `None` while the main thread runs (P05)
113    current: Option<Gc<crate::runtime::Coro>>,
114    /// the main thread's saved execution context while a coroutine runs
115    main_ctx: Option<SavedCtx>,
116    /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117    /// values plus the slot/result-count needed to finish the yielding call on
118    /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119    yielding: Option<(Vec<Value>, u32, i32)>,
120    /// results expected by the in-flight native call (so `yield` knows how many
121    /// values its call site wants when it suspends)
122    native_nresults: i32,
123    /// identity object for the main thread, returned by `coroutine.running`
124    /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125    main_coro: Option<Gc<Coro>>,
126    /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127    /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128    /// switches report the previous one, as PUC does.
129    gc_mode: &'static str,
130    /// the live-register boundary of the running thread for GC rooting (PUC's
131    /// `L->top`): set precisely at each GC safe point so freed temporary
132    /// registers above it are not rooted. Without this the collector roots the
133    /// whole stack window, pinning weak-table values stranded in stale temps
134    /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135    pub(crate) gc_top: u32,
136    /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137    /// is still stop-the-world, so these are stored/returned for API fidelity
138    /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139    /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140    gc_pause: i64,
141    gc_stepmul: i64,
142    gc_stepsize: i64,
143    /// true while `__gc` finalizers are being run, so a finalizer that calls
144    /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145    /// `collectgarbage` yields fail).
146    gc_finalizing: bool,
147    /// C ABI scratch (`capi` module): the host-visible value stack that C
148    /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149    /// Kept here (instead of in a separate `LuaState` wrapper) so the
150    /// trampoline that bridges to a `LuaCFunction` can safely cast the
151    /// Vm pointer it already holds to the public `*mut LuaState` type
152    /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153    pub capi_stack: Vec<crate::runtime::Value>,
154    /// Pinned CString backing the pointer last returned by `lua_tostring`;
155    /// valid until the next `lua_tostring` on the same Vm.
156    pub capi_cstr_pin: Option<std::ffi::CString>,
157    /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158    /// concatenate across continuation calls until a non-`tocont` call
159    /// flushes; the default warnf recognises `@on`/`@off` control messages
160    /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161    /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162    /// keep the older raise semantics).
163    pub(crate) warn_state: WarnState,
164    pub(crate) warn_buf: Vec<u8>,
165    /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166    /// loop decrements once per dispatch turn. When it hits zero the loop
167    /// raises a catchable "instruction budget exceeded" error so the embedder
168    /// can yield control back to its caller (short-script eval, game
169    /// frame budgets). `None` = unbounded; reset on each call via
170    /// `set_instr_budget`.
171    pub(crate) instr_budget: Option<i64>,
172    // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173    // `self.jit` below + `crate::vm::jit_state` for field docs.
174    // (Was: jit_enabled here.)
175    // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176    // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177    // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178    // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179    // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180    // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181    // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182    // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183    // materialize_emit,closure_emit}_count — all moved to JitState.
184    /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185    /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186    /// precompiled chunks (which bypass the parser's depth / opcode
187    /// limits). When `false`, the loader rejects any source whose first
188    /// byte is the bytecode signature `\27` ("`\27Lua`").
189    pub(crate) bytecode_loading: bool,
190    /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191    /// a strictly larger trust surface than luna's own dump format
192    /// (third-party toolchain bugs, malformed chunks, unknown opcode
193    /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194    /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195    /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196    /// Embedder toggles via `set_puc_bytecode_loading`.
197    pub(crate) puc_bytecode_loading: bool,
198    /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199    /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200    /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201    /// source exceeds this, the loader returns the PUC-shaped
202    /// `not enough memory` error before the host allocator is asked to
203    /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204    /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205    /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206    /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207    /// Embedders that genuinely need to load > 256 MiB sources widen the
208    /// cap via [`Vm::set_loader_input_budget`].
209    pub(crate) loader_input_budget: usize,
210    /// In-process log of fully-emitted warnings (each entry = one flushed
211    /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212    /// tests assert what was warned without scraping stderr.
213    pub(crate) warn_log: Vec<Vec<u8>>,
214    /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215    /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216    /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217    /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218    /// shape stub for db.lua :328; if other registry-keyed APIs land later
219    /// they can share this table.
220    pub(crate) registry: Option<Gc<Table>>,
221    /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222    /// registry entry); attached to every file userdata the io library makes
223    pub(crate) file_mt: Option<Gc<Table>>,
224    /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225    pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226    pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227    /// the running thread's debug hook state (`debug.sethook`); per-thread,
228    /// swapped with the execution context on a coroutine resume/yield
229    pub(crate) hook: HookState,
230    /// true while the hook itself runs, so its own execution fires no events
231    /// (PUC clears the mask for the duration)
232    pub(crate) in_hook: bool,
233    /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234    /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235    /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236    /// accumulate the count instead of capping at 1.
237    pub(crate) pending_tailcalls: u32,
238    /// Name of the C native that just propagated an error (captured before
239    /// the native is popped from `running_natives`). Lets a dying coroutine
240    /// preserve `[C]: in function '<name>'` at the top of its traceback
241    /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242    /// luna's native frames are off-stack so we stash the name explicitly.
243    pub(crate) errored_native: Option<String>,
244    /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245    /// (relative to the activation's func slot) and the number transferred.
246    /// Set just before firing a call/return hook, read by `getinfo("r")`.
247    pub(crate) hook_ftransfer: u16,
248    pub(crate) hook_ntransfer: u16,
249    /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250    /// pushed by `push_frame`; `close_slots` sets this before calling a
251    /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252    /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253    pending_tm: Option<&'static str>,
254    /// `true` when the next `push_frame` is the user hook function itself,
255    /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256    /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257    pending_is_hook: bool,
258    /// traceback snapshot taken at the error point (the first `unwind` entry
259    /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260    /// the failed frames are popped — can still see the error point's stack
261    /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262    /// stack intact; we approximate by snapshotting the string and letting
263    /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264    /// `call_value` entry (`public_call_depth == 0`).
265    pub(crate) error_traceback: Option<Vec<u8>>,
266    /// nesting depth of public `call_value` entries (host vs. internal). The
267    /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268    /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269    public_call_depth: u32,
270    /// stack of native (`Value::Native`) closures currently running on the
271    /// Rust call stack. `begin_call` pushes the closure before invoking
272    /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273    /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274    /// caller is C, not Lua) and qualify the running function's name via
275    /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276    pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277    /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278    /// the native's argument-window head and width, so `debug.getlocal`
279    /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280    pub(crate) running_native_slots: Vec<(u32, u32)>,
281    // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282    // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283    // trace_compiler — all moved to JitState. See `jit` below.
284    /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285    /// when `chunk_compiler` / `trace_compiler` are
286    /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287    ///
288    /// `#[doc(hidden)] pub` so the `luna` crate's
289    /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290    /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291    /// field). Not part of the embedder-facing API surface.
292    #[doc(hidden)]
293    pub jit: crate::vm::jit_state::JitState,
294
295    /// B12 host roots — append-only `Vec<Value>` traced as an extra
296    /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297    /// `LuaRoot`) hold indices into this vector so the underlying
298    /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299    ///
300    /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301    /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302    /// the trade-offs between `Drop` plumbing and append-only memory
303    /// growth have a richer ergonomics envelope to live in.
304    pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305    /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306    /// back if non-empty, else extends `host_roots`. Generation
307    /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308    pub(crate) host_roots_free: Vec<u32>,
309
310    /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311    /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312    /// callback). Each entry is one in-flight working buffer; `gc_roots`
313    /// extends with every contained `Value` so a `collectgarbage()`
314    /// inside the comparator cannot free strings/tables snapshotted
315    /// here. Nested sorts push a new buffer on entry, pop on exit
316    /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317    /// regression).
318    pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320    /// v1.3 Phase ML — MacroLua compile-time macro registry.
321    /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322    /// `@if` / `@gensym`) at construction time when `version ==
323    /// LuaVersion::MacroLua`; embedders register custom macros via
324    /// [`Vm::define_macro`]. The expander runs once per `load()` call
325    /// between lexing and parsing (only when `is_macro_lua()`).
326    pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328    /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329    /// by `TypeId::of::<T>()` for embedder types implementing
330    /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331    /// [`Vm::register_userdata`]; metatables are pinned via
332    /// [`Vm::pin_host`] at registration time so the entry's
333    /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334    pub(crate) userdata_metatables:
335        std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337    /// B6 — classification of the most recent error raised on this Vm.
338    /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339    /// at well-known sites (syntax errors, instr-budget trips, native
340    /// callback errors, type errors).
341    pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343    /// B6 — `(source_name, line)` of the most recent error. Set by the
344    /// dispatcher / lexer / parser; cleared when a new call_value
345    /// enters cleanly.
346    pub(crate) last_error_source: Option<(String, u32)>,
347
348    /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349    /// the dispatcher hot loop yields cooperatively (sets
350    /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351    /// to `EvalFuture::poll`) instead of returning a real
352    /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353    /// for the duration of the future; restored to `false` on
354    /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355    /// leave it `false` so v1.0 behavior is preserved exactly.
356    pub(crate) async_mode: bool,
357
358    /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359    /// before driving a slice. The dispatcher itself does not call it
360    /// (the future's poll loop does `wake_by_ref` after observing
361    /// `BudgetExhausted`), but storing the waker keeps the door open
362    /// for Stage 2 async natives to wake the host directly from a
363    /// helper future.
364    pub(crate) async_waker: Option<std::task::Waker>,
365
366    /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367    /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368    /// Default 10_000 (RFC §D5). Tunable via
369    /// [`Vm::set_async_slice`].
370    pub(crate) async_slice_size: i64,
371
372    /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373    /// budget exhaustion fires; checked by `exec_with` (so the
374    /// sentinel propagates without `unwind` running, mirroring
375    /// `yielding.is_some()`) and by `call_value_impl` (so the call
376    /// frames survive for the next poll). Cleared by `drive_one`
377    /// after translating it to `DispatchOutcome::BudgetExhausted`.
378    pub(crate) host_yield_pending: bool,
379
380    /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381    /// when an async-marked [`NativeClosure`] is invoked under
382    /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383    /// mechanism as `host_yield_pending` — see `exec_with` +
384    /// `call_value_impl`), stashes the in-flight future +
385    /// post-completion context here, and surfaces them to
386    /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387    /// once the future is moved out into a
388    /// `DispatchOutcome::AsyncNativeAwaiting`.
389    pub(crate) pending_async_native_fut:
390        Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392    /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393    /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394    /// commit the future's eventual `Ok(nret)` back into the calling
395    /// frame's expected result slots. Recorded by the dispatcher;
396    /// consumed by [`Vm::commit_async_native_result`] after the
397    /// future resolves.
398    pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411    pub func_slot: u32,
412    /// Recorded for parity with the sync native-call path's
413    /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414    /// hook firing + traceback shaping. Not yet read in Stage 2.
415    #[allow(dead_code)]
416    pub nargs: u32,
417    pub nresults: i32,
418    /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419    /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420    /// unread today; carried so an Stage 3 audit can confirm the
421    /// pre-suspend root window matches the post-resume one.
422    #[allow(dead_code)]
423    pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430    /// the hook function (`None` when no hook is installed)
431    pub func: Option<Value>,
432    /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433    /// (Rust first); both can be installed simultaneously, but most
434    /// embedders pick one.
435    pub rust_func: Option<RustDebugHook>,
436    /// LUA_MASKCALL — fire on function entry
437    pub call: bool,
438    /// LUA_MASKRET — fire on function return
439    pub ret: bool,
440    /// LUA_MASKLINE — fire on source-line change
441    pub line: bool,
442    /// LUA_MASKCOUNT — fire every `count_base` instructions
443    pub count: bool,
444    /// instruction count between count events (PUC basehookcount)
445    pub count_base: i64,
446    /// instructions left until the next count event (PUC hookcount)
447    pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459    /// Function entry (`hook_call` analogue).
460    Call,
461    /// Function return (`hook_return` analogue).
462    Return,
463    /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464    TailCall,
465    /// Source-line change (the `u32` is the 1-based line number).
466    Line(u32),
467    /// Instruction count event (fires every `count_base` instructions).
468    Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483    stack: Vec<Value>,
484    frames: Vec<CallFrame>,
485    open_upvals: Vec<(u32, Gc<Upvalue>)>,
486    tbc: Vec<u32>,
487    top: u32,
488    pcall_depth: u32,
489    hook: HookState,
490    /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491    /// the rest of the suspended state so each thread can keep its own
492    /// `setfenv(0, env)` rewire without the swap leaking into another
493    /// thread (5.1 closure.lua :177).
494    globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499    /// caught by a pcall/xpcall continuation; resume running its caller
500    Caught,
501    /// caught by a continuation that was the entry-level activation; these are
502    /// the call's (wrapped) results
503    CaughtReturn(Vec<Value>),
504    /// no protecting continuation up to `entry_depth`; propagate the error
505    Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511    Lua(usize),
512    /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513    /// used to name the native via its invoking call instruction.
514    C(usize),
515    /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516    /// caller's activation, so `debug.getinfo(level)` at this slot returns
517    /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518    /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519    /// both shapes). The index points at the *tail-called* frame whose
520    /// `is_tail` flag induced this synthetic level.
521    Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528    /// index → the looked-up value; newindex → done (raw set performed);
529    /// comparison → the boolean result already known
530    Done(Value),
531    /// a metamethod to call; `recv` is the chain element it was found on (the
532    /// extra args — key / value — are supplied by the caller)
533    Mm { func: Value, recv: Value },
534    /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535    /// carries `__le` — `op_compare` swaps the args and negates the result.
536    /// Lives separate from `Mm` so the synth path can stay yieldable without
537    /// every other Mm caller learning a swap flag they would never set.
538    CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545    Index,
546    NewIndex,
547    Call,
548    ToString,
549    Metatable,
550    Name,
551    Eq,
552    Lt,
553    Le,
554    Concat,
555    Len,
556    Add,
557    Sub,
558    Mul,
559    Div,
560    Mod,
561    Pow,
562    IDiv,
563    BAnd,
564    BOr,
565    BXor,
566    Shl,
567    Shr,
568    Unm,
569    BNot,
570    Close,
571    Gc,
572    Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576    "__index",
577    "__newindex",
578    "__call",
579    "__tostring",
580    "__metatable",
581    "__name",
582    "__eq",
583    "__lt",
584    "__le",
585    "__concat",
586    "__len",
587    "__add",
588    "__sub",
589    "__mul",
590    "__div",
591    "__mod",
592    "__pow",
593    "__idiv",
594    "__band",
595    "__bor",
596    "__bxor",
597    "__shl",
598    "__shr",
599    "__unm",
600    "__bnot",
601    "__close",
602    "__gc",
603    "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614    if version <= LuaVersion::Lua53 {
615        format!("__{tm}")
616    } else if tm == "gc" {
617        "__gc".to_string()
618    } else {
619        tm.to_string()
620    }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626    use crate::vm::isa::Op;
627    Some(match op {
628        Op::Add => "add",
629        Op::Sub => "sub",
630        Op::Mul => "mul",
631        Op::Div => "div",
632        Op::Mod => "mod",
633        Op::Pow => "pow",
634        Op::IDiv => "idiv",
635        Op::BAnd => "band",
636        Op::BOr => "bor",
637        Op::BXor => "bxor",
638        Op::Shl => "shl",
639        Op::Shr => "shr",
640        Op::Unm => "unm",
641        Op::BNot => "bnot",
642        Op::Concat => "concat",
643        Op::Len => "len",
644        Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645        Op::SetField | Op::SetTable | Op::SetI => "newindex",
646        Op::Eq | Op::EqK => "eq",
647        Op::Lt => "lt",
648        Op::Le => "le",
649        _ => return None,
650    })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678    /// `warn` calls are silently dropped (default after `warn("@off")`).
679    Off,
680    /// `warn` calls are delivered to stderr (after `warn("@on")`).
681    On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689    if let Some(s) = payload.downcast_ref::<String>() {
690        return s.clone();
691    }
692    if let Some(s) = payload.downcast_ref::<&'static str>() {
693        return (*s).to_string();
694    }
695    "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702    /// Parse or compile failure.
703    Syntax(SyntaxError),
704    /// Runtime error raised during execution.
705    Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709    fn from(e: SyntaxError) -> Error {
710        Error::Syntax(e)
711    }
712}
713
714impl From<LuaError> for Error {
715    fn from(e: LuaError) -> Error {
716        Error::Runtime(e)
717    }
718}
719
720impl Drop for Vm {
721    fn drop(&mut self) {
722        // state close: run `__gc` for every still-registered finalizable before
723        // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724        // single pass — objects created by a closing finalizer are not
725        // re-finalized (they go to the heap's free list directly).
726        self.heap.queue_all_finalizers();
727        self.run_finalizers();
728    }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742    frames.push(cf);
743    // Shadow maintenance is debug-only: release builds skip the
744    // increment + assertion entirely. The shadow's purpose in Week 1
745    // is to VERIFY the assumed invariant (frames_top == frames.len())
746    // across all push/pop sites; once Week 2+ migrates readers to
747    // consume the shadow, release will run the increment unconditionally.
748    #[cfg(debug_assertions)]
749    {
750        *frames_top += 1;
751        debug_assert_eq!(
752            *frames_top as usize,
753            frames.len(),
754            "P17-D frames_top out of sync after push",
755        );
756    }
757    #[cfg(not(debug_assertions))]
758    let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763    let r = frames.pop();
764    #[cfg(debug_assertions)]
765    {
766        if r.is_some() {
767            *frames_top = frames_top.saturating_sub(1);
768        }
769        debug_assert_eq!(
770            *frames_top as usize,
771            frames.len(),
772            "P17-D frames_top out of sync after pop",
773        );
774    }
775    #[cfg(not(debug_assertions))]
776    let _ = frames_top;
777    r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786    static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787    *PROBE_ON.get_or_init(|| {
788        std::env::var("LUNA_AOT_PROBE")
789            .ok()
790            .filter(|v| !v.is_empty())
791            .is_some()
792    })
793}
794
795impl Vm {
796    /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797    /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798    /// the Vec replacement to keep the shadow valid.
799    #[inline(always)]
800    fn frames_resync(&mut self) {
801        // Debug-only Week 1 — see `frames_push_sync` comment.
802        #[cfg(debug_assertions)]
803        {
804            self.frames_top = self.frames.len() as u32;
805        }
806    }
807
808    // ====================================================================
809    // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810    //
811    // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812    // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813    // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814    // sites to consume them. Phase 4 removes Vec<CallFrame>.
815    //
816    // Preconditions (debug-asserted):
817    // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818    // - self.stack.len() > base + max_stack (caller has grown stack)
819    // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820    // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821    //
822    // No release-build cost when unused (LTO strips dead methods).
823    // ====================================================================
824
825    /// Write a Lua frame's closure pointer into `stack[base-2]`.
826    /// The caller must ensure `base >= 2` and the slot is within the
827    /// stack's allocated range.
828    #[inline]
829    #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830    fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831        debug_assert!(
832            base >= 2,
833            "frame closure slot needs base >= 2; got {}",
834            base
835        );
836        let idx = (base - 2) as usize;
837        debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838        self.stack[idx] = Value::Closure(cl);
839    }
840
841    /// Read a Lua frame's closure pointer from `stack[base-2]`.
842    /// Returns `None` if the slot doesn't hold a closure (caller is
843    /// expected to treat that as a corrupt frame).
844    ///
845    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846    /// to avoid the enum-match cost on the hot path. Tag check via
847    /// 1-byte load + branch + `as_closure_unchecked` payload load.
848    #[inline]
849    #[allow(dead_code)]
850    fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851        debug_assert!(base >= 2);
852        let v = self.stack.get((base - 2) as usize)?;
853        if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854            // SAFETY: tag byte just verified == CLOSURE.
855            Some(unsafe { v.as_closure_unchecked() })
856        } else {
857            None
858        }
859    }
860
861    /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862    /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863    /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864    /// cleanly through the value stack without losing bits.
865    #[inline]
866    #[allow(dead_code)]
867    fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868        debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869        let idx = (base - 1) as usize;
870        debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871        self.stack[idx] = Value::Int(marker.to_raw());
872    }
873
874    /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875    /// `None` if the slot isn't a `Value::Int` (caller treats as a
876    /// corrupt frame); the kind tag itself may still be invalid, in
877    /// which case [`FrameMarker::kind`] returns `None` on the result.
878    ///
879    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880    /// for the tag check + `as_int_unchecked` for the payload load.
881    #[inline]
882    #[allow(dead_code)]
883    fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884        debug_assert!(base >= 1);
885        let v = self.stack.get((base - 1) as usize)?;
886        if v.tag_byte() == crate::runtime::value::tag::INT {
887            // SAFETY: tag byte just verified == INT.
888            Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889                unsafe { v.as_int_unchecked() },
890            ))
891        } else {
892            None
893        }
894    }
895
896    /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897    /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898    /// caller is responsible for the rest of the bring-up.
899    fn new_inner(version: LuaVersion) -> Vm {
900        let mut heap = Heap::new();
901        // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902        // values strongly. gc.lua's "weak tables" section relies on that.
903        heap.no_ephemeron = version <= LuaVersion::Lua51;
904        // PUC 5.3 needs two GC cycles to finalize a table caught in a
905        // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906        // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907        heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908        let globals = heap.new_table();
909        let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911        Vm {
912            heap,
913            stack: Vec::new(),
914            frames: Vec::new(),
915            frames_top: 0,
916            open_upvals: Vec::new(),
917            tbc: Vec::new(),
918            top: 0,
919            globals,
920            type_mt: [None; 5],
921            mm_names,
922            c_depth: 0,
923            pcall_depth: 0,
924            nny: 0,
925            msgh_depth: 0,
926            terminating: None,
927            rng: [0; 4],
928            started: std::time::Instant::now(),
929            version,
930            closing_err: None,
931            current: None,
932            main_ctx: None,
933            yielding: None,
934            native_nresults: -1,
935            main_coro: None,
936            gc_mode: "incremental",
937            gc_top: 0,
938            gc_pause: 200,
939            gc_stepmul: 100,
940            gc_stepsize: 13,
941            gc_finalizing: false,
942            capi_stack: Vec::new(),
943            capi_cstr_pin: None,
944            warn_state: WarnState::Off,
945            warn_buf: Vec::new(),
946            warn_log: Vec::new(),
947            instr_budget: None,
948            bytecode_loading: true,
949            puc_bytecode_loading: false,
950            loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951            registry: None,
952            file_mt: None,
953            io_input: None,
954            io_output: None,
955            hook: HookState::default(),
956            in_hook: false,
957            pending_tailcalls: 0,
958            errored_native: None,
959            hook_ftransfer: 0,
960            hook_ntransfer: 0,
961            pending_tm: None,
962            pending_is_hook: false,
963            error_traceback: None,
964            public_call_depth: 0,
965            running_natives: Vec::new(),
966            running_native_slots: Vec::new(),
967            // v1.1 A2 — JIT-specific state factored into `JitState`
968            // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969            // `install_jit_backend` / `luaL_newstate` swap in
970            // `CraneliftBackend` for callers that want JIT acceleration.
971            jit: crate::vm::jit_state::JitState::with_null_backend(),
972            // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973            host_roots: Vec::new(),
974            // v1.3 Phase ML — MacroLua registry. Pre-populated with
975            // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976            // when this Vm is constructed under `LuaVersion::MacroLua`.
977            macro_registry: if version == LuaVersion::MacroLua {
978                crate::frontend::macro_expander::MacroRegistry::with_builtins()
979            } else {
980                crate::frontend::macro_expander::MacroRegistry::new()
981            },
982            host_roots_free: Vec::new(),
983            sort_scratch: Vec::new(),
984            // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985            // metatable cache. Populated lazily by register_userdata.
986            userdata_metatables: std::collections::HashMap::new(),
987            // v1.1 B6 — error classification metadata. Defaults to
988            // Runtime; set at known sites (syntax / budget trip /
989            // native error / type error).
990            last_error_kind: crate::vm::error::LuaErrorKind::default(),
991            last_error_source: None,
992            // v1.1 B10 Stage 1 — async embedder fields. Defaults
993            // preserve sync behavior bit-for-bit (`async_mode = false`
994            // means the budget hot loop errors out exactly as v1.0).
995            async_mode: false,
996            async_waker: None,
997            async_slice_size: 10_000,
998            host_yield_pending: false,
999            // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000            // default; populated only by the dispatcher when an
1001            // async-marked NativeClosure is invoked under async_mode.
1002            pending_async_native_fut: None,
1003            pending_async_native_ctx: None,
1004        }
1005    }
1006
1007    /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008    /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009    /// followed by `vm.open_all_libs()`.
1010    pub fn new(version: LuaVersion) -> Vm {
1011        let mut vm = Vm::new_minimal(version);
1012        vm.open_all_libs();
1013        vm
1014    }
1015
1016    /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017    /// that want a sandbox (Redis-style scripts, in-game scripting with
1018    /// a curated API) call this and then `open_base` / `open_math` / etc.
1019    /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020    /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021    pub fn new_minimal(version: LuaVersion) -> Vm {
1022        let mut vm = Vm::new_inner(version);
1023        let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025        unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026        vm.main_coro = Some(mc);
1027        let (a, b) = vm.rng_auto_seed();
1028        vm.rng_seed(a as u64, b as u64);
1029        vm
1030    }
1031
1032    /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033    /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034    /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035    /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036    /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037    /// does NOT evict those cached entries — call right after
1038    /// construction for a clean swap.
1039    ///
1040    /// Naming: `install_jit_backend` (not `install_default_jit`)
1041    /// because the "default" in luna-core is `NullJitBackend`; the
1042    /// "default JIT" lives in the `luna` crate.
1043    pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044    where
1045        C: crate::jit::IntChunkCompiler + 'static,
1046        T: crate::jit::TraceCompiler + 'static,
1047    {
1048        self.jit.chunk_compiler = Box::new(chunk);
1049        self.jit.trace_compiler = Box::new(trace);
1050    }
1051
1052    /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053    /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054    /// the `luna_jit` crate's `install_default_jit` pairs this with
1055    /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056    /// also install a fresh `CraneliftJitStorage`. Storage holds
1057    /// the per-`Vm` JIT cache + handle collections that used to be
1058    /// `thread_local!`s in `luna_jit::jit_backend`.
1059    ///
1060    /// Idempotency: re-installing storage on a Vm that already
1061    /// holds compiled-trace pointers WILL evict their owners (the
1062    /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063    /// pages). Call right after construction for a clean swap.
1064    pub fn install_jit_storage<S>(&mut self, storage: S)
1065    where
1066        S: crate::jit::JitStorage + 'static,
1067    {
1068        self.jit.storage = Box::new(storage);
1069    }
1070
1071    /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072    /// reports "skipped" so every closure stays on the interpreter
1073    /// path, and the trace recorder's compile attempt always returns
1074    /// `None`. Intended for tests that want to verify the trait
1075    /// boundary works in a JIT-free configuration, and for the future
1076    /// `luna-core` build path that ships without Cranelift.
1077    ///
1078    /// Calling this on a Vm whose closures already populated
1079    /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080    /// cached entries — the dispatcher will still call into them. For
1081    /// a truly JIT-free run, call this immediately after construction.
1082    pub fn install_null_jit(&mut self) {
1083        self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084        self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085    }
1086
1087    /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088    /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089    /// time instead (`open_base`, `open_math`, `open_table`, …).
1090    pub fn open_all_libs(&mut self) {
1091        self.open_base();
1092        self.open_math();
1093        self.open_table();
1094        self.open_string();
1095        self.open_utf8();
1096        self.open_os_io();
1097        self.open_debug();
1098        self.open_coroutine();
1099        self.open_package();
1100        // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101        // operators replace it on 64-bit integers). Only expose it under 5.2
1102        // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103        // leaking the global into newer dialects.
1104        if self.version == LuaVersion::Lua52 {
1105            self.open_bit32();
1106        }
1107    }
1108
1109    /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110    /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111    /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112    /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113    /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114    /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115    /// once per Vm.
1116    pub fn open_base(&mut self) {
1117        crate::vm::builtins::open_base(self);
1118    }
1119    /// Install the `math` standard library.
1120    pub fn open_math(&mut self) {
1121        crate::vm::lib_math::open_math(self);
1122    }
1123    /// Install the `table` standard library.
1124    pub fn open_table(&mut self) {
1125        crate::vm::lib_table::open_table(self);
1126    }
1127    /// Install the `string` standard library (and the shared string metatable).
1128    pub fn open_string(&mut self) {
1129        crate::vm::lib_string::open_string(self);
1130    }
1131    /// Install the `utf8` standard library (5.3+).
1132    pub fn open_utf8(&mut self) {
1133        crate::vm::lib_utf8::open_utf8(self);
1134    }
1135    /// `os` and `io` are merged because file userdata shares state with both
1136    /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137    /// wraps `os.execute`'s shell).
1138    pub fn open_os_io(&mut self) {
1139        crate::vm::lib_os_io::open_os_io(self);
1140    }
1141    /// Install the `debug` standard library (introspection / hooks). Off by
1142    /// default for sandbox embedders.
1143    pub fn open_debug(&mut self) {
1144        crate::vm::lib_debug::open_debug(self);
1145    }
1146    /// Install the `coroutine` standard library.
1147    pub fn open_coroutine(&mut self) {
1148        crate::vm::lib_coroutine::open_coroutine(self);
1149    }
1150    /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151    pub fn open_package(&mut self) {
1152        crate::vm::lib_os_io::open_package(self);
1153    }
1154    /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155    /// ops on 64-bit integers).
1156    pub fn open_bit32(&mut self) {
1157        crate::vm::lib_bit32::open_bit32(self);
1158    }
1159
1160    /// xoshiro256** next.
1161    pub(crate) fn rng_next(&mut self) -> u64 {
1162        let s = &mut self.rng;
1163        let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164        let t = s[1] << 17;
1165        s[2] ^= s[0];
1166        s[3] ^= s[1];
1167        s[1] ^= s[2];
1168        s[0] ^= s[3];
1169        s[2] ^= t;
1170        s[3] = s[3].rotate_left(45);
1171        result
1172    }
1173
1174    /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175    pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176        // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177        // state), then 16 discards to spread the seed. Matches PUC's exact
1178        // sequence so the low-level conformance test passes.
1179        self.rng = [a, 0xff, b, 0];
1180        for _ in 0..16 {
1181            self.rng_next();
1182        }
1183    }
1184
1185    /// Wall-clock since VM creation (os.clock approximation).
1186    pub(crate) fn uptime(&self) -> std::time::Duration {
1187        self.started.elapsed()
1188    }
1189
1190    /// Entropy for math.randomseed() with no arguments.
1191    pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192        let t = std::time::SystemTime::now()
1193            .duration_since(std::time::UNIX_EPOCH)
1194            .map(|d| d.as_nanos() as u64)
1195            .unwrap_or(0);
1196        let addr = &self.rng as *const _ as u64;
1197        (t as i64, addr as i64)
1198    }
1199
1200    /// Allocate a native function object (no upvalues): builtin registration.
1201    pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202        Value::Native(self.heap.new_native(f, Box::new([])))
1203    }
1204
1205    /// Allocate a native function object with captured upvalues.
1206    pub fn native_with(
1207        &mut self,
1208        f: crate::runtime::value::NativeFn,
1209        upvals: Box<[Value]>,
1210    ) -> Value {
1211        Value::Native(self.heap.new_native(f, upvals))
1212    }
1213
1214    /// Install the shared string metatable (string library, P04).
1215    pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216        self.type_mt[3] = mt;
1217    }
1218
1219    /// The current globals table (`_G` / `_ENV` source for new chunks).
1220    pub fn globals(&self) -> Gc<Table> {
1221        self.globals
1222    }
1223
1224    /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225    /// Library code that pushes a known number of fresh slots — e.g.
1226    /// `table.unpack` returning N values — consults this to refuse when
1227    /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228    /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229    /// coroutine's already-built table eats a few slots, so an unpack of
1230    /// ~lim values can't fit.
1231    pub(crate) fn stack_room(&self) -> i64 {
1232        PUC_MAXSTACK - (self.stack.len() as i64)
1233    }
1234
1235    /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236    /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237    /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238    /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239    pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240        self.globals = env;
1241    }
1242
1243    /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244    /// 5.5). Determines numeric semantics, available standard libraries, and
1245    /// metamethod behavior.
1246    pub fn version(&self) -> LuaVersion {
1247        self.version
1248    }
1249
1250    /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251    /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252    /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253    /// `Gc<NativeClosure>` handle.
1254    ///
1255    /// Returns `Err(LuaError)` only if the globals table overflows
1256    /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257    /// String interning + key construction cannot fail.
1258    ///
1259    /// ```
1260    /// # use luna_core::vm::Vm;
1261    /// # use luna_core::version::LuaVersion;
1262    /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263    /// vm.set_global("answer", 42).unwrap();
1264    /// vm.set_global("ratio", 0.5_f64).unwrap();
1265    /// vm.set_global("hello", "world").unwrap();
1266    /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267    /// assert_eq!(r.len(), 3);
1268    /// ```
1269    pub fn set_global<V: crate::vm::IntoValue>(
1270        &mut self,
1271        name: &str,
1272        v: V,
1273    ) -> Result<(), LuaError> {
1274        let v = v.into_value(self);
1275        let k = Value::Str(self.heap.intern(name.as_bytes()));
1276        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277        unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278        self.heap
1279            .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280        Ok(())
1281    }
1282
1283    /// Backward write barrier shorthand for native lib code: demote `t` from
1284    /// BLACK back to gray so the next propagate step re-traces its fields.
1285    /// No-op outside Propagate (parent is never BLACK at mutation time).
1286    pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287        self.heap
1288            .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289    }
1290
1291    /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292    /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293    /// No-op outside Propagate.
1294    pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295        self.heap
1296            .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297    }
1298
1299    /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300    /// under non-MacroLua dialects (the macro is stored but the load
1301    /// path only consults the registry when
1302    /// `self.version == LuaVersion::MacroLua`).
1303    ///
1304    /// `name` is stored without the leading `@` — source code writes
1305    /// `@double(x)` to invoke a macro registered as `"double"`.
1306    pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307        self.macro_registry.register(name, m);
1308    }
1309
1310    /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311    /// Mostly useful for tests / dogfood resets.
1312    pub fn clear_macros(&mut self) {
1313        self.macro_registry.clear();
1314    }
1315
1316    /// Parse + compile a chunk and close it over the globals table.
1317    pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318        // Reject oversize input *before* handing the parser/lexer a
1319        // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320        // message keeps `heavy.lua::loadrep` compatibility: that test
1321        // accepts either `string length overflow` or `not enough memory`
1322        // as the failure mode for a feeder loop that outruns the host
1323        // allocator. See `set_loader_input_budget`.
1324        if src.len() > self.loader_input_budget {
1325            return Err(SyntaxError {
1326                line: 0,
1327                msg: b"not enough memory".to_vec(),
1328            });
1329        }
1330        // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331        let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332        if is_bytecode && !self.bytecode_loading {
1333            return Err(SyntaxError {
1334                line: 0,
1335                msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336            });
1337        }
1338        let proto = if is_bytecode {
1339            let allow_puc = self.puc_bytecode_loading;
1340            crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341                |msg| SyntaxError {
1342                    line: 0,
1343                    msg: msg.into_bytes(),
1344                },
1345            )?
1346        } else if self.version.is_macro_lua() {
1347            // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348            // token vec, run the macro expander pre-pass against the
1349            // per-Vm registry, then hand the rewritten stream to
1350            // `parse_tokens`. The AST + compiler are dialect-agnostic
1351            // because by this point all `@`/quote tokens are gone.
1352            let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353            let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354            loop {
1355                let t = lexer.next_token()?;
1356                let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357                raw.push(t);
1358                if eof {
1359                    break;
1360                }
1361            }
1362            // Drop the trailing Eof — expander operates on the body and
1363            // `parse_tokens` reinserts Eof when it runs out of tokens.
1364            raw.pop();
1365            let expanded = self.macro_registry.expand(raw)?;
1366            let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368        } else {
1369            let ast = parse(src, self.version)?;
1370            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371        };
1372        // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373        // upvalue with the globals table when the closure has *exactly* one
1374        // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375        // function with two-or-more upvalues keeps every cell at nil; the
1376        // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377        // :293's `assert(x() == nil)` pins this contract.
1378        let n = proto.upvals.len();
1379        let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380        if n == 0 {
1381            // synthetic main chunk has no declared upvalues, but the engine
1382            // still expects at least one cell so the host can probe via
1383            // `debug.upvalueid` etc. Match the historical luna shape.
1384            ups.push(
1385                self.heap
1386                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387            );
1388        } else if n == 1 {
1389            ups.push(
1390                self.heap
1391                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392            );
1393        } else {
1394            for _ in 0..n {
1395                ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396            }
1397        }
1398        Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399    }
1400
1401    /// Compile and run `src` as an anonymous chunk; return its results.
1402    /// Source name in the traceback is `"=eval"`. Syntax errors are
1403    /// surfaced as `LuaError` carrying the formatted PUC-style message
1404    /// (interned through the heap so the error value composes with
1405    /// `pcall` / `error_text` like any runtime error).
1406    pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407        self.eval_chunk(src, "=eval")
1408    }
1409
1410    /// Render an error value for messages/tests. Non-string errors —
1411    /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412    /// (`"(error object is a table value)"`); embedders that need
1413    /// structured payloads should inspect `e.0` directly. Errors whose
1414    /// text starts with `"native panic:"` indicate a Rust panic
1415    /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416    /// be dropped (do not reuse).
1417    pub fn error_text(&self, e: &LuaError) -> String {
1418        match e.0 {
1419            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420            v => format!("(error object is a {} value)", v.type_name()),
1421        }
1422    }
1423
1424    /// Call any callable value from the host (or from natives like pcall).
1425    pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426        // host-level entry (no enclosing exec): drop any error state from a
1427        // prior call that propagated uncaught (`error_traceback` would
1428        // otherwise leak into the next debug.traceback call).
1429        if self.public_call_depth == 0 {
1430            self.error_traceback = None;
1431        }
1432        self.public_call_depth += 1;
1433        // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434        // chunk whose body fits the S1 int-arith whitelist short-circuits
1435        // the whole interpreter dispatch and runs straight through the
1436        // mmap'd native code. The lookup is one Cell::get + one match —
1437        // the slow path (compile attempt on first reach) is paid once per
1438        // Proto.
1439        if args.is_empty()
1440            && let Value::Closure(cl) = f
1441            && let Some(vs) = self.try_jit_call(cl)
1442        {
1443            self.public_call_depth -= 1;
1444            return Ok(vs);
1445        }
1446        let r = self.call_value_impl(f, args, true);
1447        self.public_call_depth -= 1;
1448        r
1449    }
1450
1451    /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452    /// `Some(values)` when the cached native fn is callable for a
1453    /// zero-arg call. (Non-zero-arg dispatch is handled by
1454    /// `try_jit_call_op` from inside `begin_call`.)
1455    fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456        use crate::runtime::function::JitProtoState;
1457        if !self.jit.enabled {
1458            return None;
1459        }
1460        let proto = cl.proto;
1461        if let JitProtoState::Untried = proto.jit.get() {
1462            self.populate_jit_cache(proto);
1463        }
1464        match proto.jit.get() {
1465            JitProtoState::Compiled {
1466                entry,
1467                num_args: 0,
1468                returns_one,
1469                arg_float_mask: _,
1470                arg_table_mask: _,
1471                ret_is_float,
1472                ret_is_table,
1473            } => {
1474                // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475                let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476                // P11-S5c / S5d.J — install the active Vm + closure
1477                // for any Rust helper the JIT'd code may call (e.g.
1478                // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479                // cranelift `Linkage::Import`. RAII clear on return.
1480                // Chunks with no upvalue reads don't touch the closure
1481                // slot, paying nothing.
1482                // v1.1 A1 Session A — route through chunk_compiler so
1483                // the NullJitBackend path stays inert. Raw-ptr arg
1484                // avoids the &mut self borrow conflict against the
1485                // shared self.jit.chunk_compiler read.
1486                let vm_ptr: *mut Vm = self;
1487                let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489                let r = unsafe { f() };
1490                drop(_jit_vm_guard);
1491                // P11-S5d.E' — a JIT helper may have detected a metatable
1492                // on a table operand and parked a deopt request here.
1493                // Discard the sentinel value and return None so the caller
1494                // re-runs the call through the interpreter, which honours
1495                // __index/__newindex.
1496                if self.jit.pending_err.take().is_some() {
1497                    return None;
1498                }
1499                Some(if returns_one {
1500                    let v = if ret_is_float {
1501                        Value::Float(f64::from_bits(r as u64))
1502                    } else if ret_is_table {
1503                        Value::Table(crate::runtime::Gc::from_ptr(
1504                            r as *mut crate::runtime::Table,
1505                        ))
1506                    } else {
1507                        Value::Int(r)
1508                    };
1509                    vec![v]
1510                } else {
1511                    Vec::new()
1512                })
1513            }
1514            // Non-zero-arg Compiled state: call_value's empty-args
1515            // fast path can't drive it. Op::Call handles those.
1516            JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517        }
1518    }
1519
1520    /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521    /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522    /// states (call sites guard with a get before invoking).
1523    ///
1524    /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525    /// `proto.code`. Compiled artefacts live in the thread-local
1526    /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527    /// `Vm`s loading the same source skip the cranelift compile step
1528    /// entirely.
1529    fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530        use crate::runtime::function::JitProtoState;
1531        let version = self.version();
1532        let pre53 = version <= crate::version::LuaVersion::Lua53;
1533        // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534        // are Float). The JIT's `GetUpval` ValueRead path uses this
1535        // to default-pin upvalue reads to Float without a tag check.
1536        let float_only = version <= crate::version::LuaVersion::Lua52;
1537        // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538        // trait method can take `&mut dyn JitStorage` without
1539        // double-borrowing self.jit.
1540        let jit = &mut self.jit;
1541        let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542        match jit
1543            .chunk_compiler
1544            .try_compile(storage, proto, pre53, float_only)
1545        {
1546            crate::jit::CompileResult::Compiled {
1547                entry,
1548                num_args,
1549                returns_one,
1550                arg_float_mask,
1551                arg_table_mask,
1552                ret_is_float,
1553                ret_is_table,
1554            } => {
1555                proto.jit.set(JitProtoState::Compiled {
1556                    entry,
1557                    num_args,
1558                    returns_one,
1559                    arg_float_mask,
1560                    arg_table_mask,
1561                    ret_is_float,
1562                    ret_is_table,
1563                });
1564            }
1565            crate::jit::CompileResult::Skipped => {
1566                proto.jit.set(JitProtoState::Failed);
1567            }
1568        }
1569    }
1570
1571    /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572    /// before `push_frame`. Returns `true` when the call was handled
1573    /// in-place (no new Lua frame). Constraints: every arg slot must
1574    /// be `Value::Int`, the cached arity must match the call site's
1575    /// `nargs`, the host wanted-count `wanted` is honoured by
1576    /// `finish_results`. Also bails when a debug hook is armed —
1577    /// JIT'd code does not fire line / call / return hooks, so any
1578    /// active hook makes the interpreter the source of truth.
1579    fn try_jit_call_op(
1580        &mut self,
1581        cl: Gc<LuaClosure>,
1582        func_slot: u32,
1583        nargs: u32,
1584        wanted: i32,
1585    ) -> bool {
1586        use crate::runtime::function::JitProtoState;
1587        if !self.jit.enabled {
1588            return false;
1589        }
1590        // Any active debug hook means the interpreter has to run the
1591        // call so the hook gets the expected events.
1592        if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593            return false;
1594        }
1595        let proto = cl.proto;
1596        if let JitProtoState::Untried = proto.jit.get() {
1597            self.populate_jit_cache(proto);
1598        }
1599        let JitProtoState::Compiled {
1600            entry,
1601            num_args,
1602            returns_one,
1603            arg_float_mask,
1604            arg_table_mask,
1605            ret_is_float,
1606            ret_is_table,
1607        } = proto.jit.get()
1608        else {
1609            return false;
1610        };
1611        if num_args as u32 != nargs {
1612            return false;
1613        }
1614        // Pack args into i64 bit-patterns per the per-slot expected
1615        // kind. A Float-typed slot accepts Value::Float verbatim and
1616        // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617        // accepts only Value::Table and passes the raw Gc ptr; an
1618        // Int-typed slot accepts only Value::Int. Any other shape
1619        // bails to the interpreter so the call's actual dynamics
1620        // (metamethod dispatch / type-coerce) take over.
1621        let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622            [0; crate::jit::MAX_JIT_ARITY as usize];
1623        for i in 0..num_args as usize {
1624            let v = self.stack[(func_slot + 1) as usize + i];
1625            let want_float = (arg_float_mask >> i) & 1 == 1;
1626            let want_table = (arg_table_mask >> i) & 1 == 1;
1627            args[i] = match (want_table, want_float, v) {
1628                (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629                (false, false, Value::Int(x)) => x,
1630                (false, true, Value::Float(f)) => f.to_bits() as i64,
1631                (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632                _ => return false,
1633            };
1634        }
1635        // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636        // matching guard in `try_jit_call`.
1637        // v1.1 A1 Session A — route through chunk_compiler.
1638        let vm_ptr: *mut Vm = self;
1639        let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640        // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641        let r = unsafe {
1642            match num_args {
1643                0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644                1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645                2 => {
1646                    (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647                }
1648                3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649                    args[0], args[1], args[2],
1650                ),
1651                4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652                    args[0], args[1], args[2], args[3],
1653                ),
1654                _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655            }
1656        };
1657        drop(_jit_vm_guard);
1658        // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659        // flagged a metatable on a table operand; bail to the interpreter
1660        // so `push_frame` runs the call from scratch.
1661        if self.jit.pending_err.take().is_some() {
1662            return false;
1663        }
1664        // Write result at func_slot, replacing the closure value, then
1665        // hand to finish_results to pad/truncate per the call site's
1666        // `wanted` count.
1667        if returns_one {
1668            let v = if ret_is_float {
1669                Value::Float(f64::from_bits(r as u64))
1670            } else if ret_is_table {
1671                Value::Table(crate::runtime::Gc::from_ptr(
1672                    r as *mut crate::runtime::Table,
1673                ))
1674            } else {
1675                Value::Int(r)
1676            };
1677            self.stack[func_slot as usize] = v;
1678            self.finish_results(func_slot, 1, wanted);
1679        } else {
1680            self.finish_results(func_slot, 0, wanted);
1681        }
1682        true
1683    }
1684
1685    /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686    /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687    /// invokes it inside that ci), so it is called with `from_c = false`: its
1688    /// debug parent is the closing function, not a synthetic C level.
1689    fn call_value_impl(
1690        &mut self,
1691        f: Value,
1692        args: &[Value],
1693        from_c: bool,
1694    ) -> Result<Vec<Value>, LuaError> {
1695        if self.c_depth >= MAX_C_DEPTH {
1696            return Err(self.rt_err("stack overflow"));
1697        }
1698        self.c_depth += 1;
1699        let func_slot = self.stack.len() as u32;
1700        self.stack.push(f);
1701        self.stack.extend_from_slice(args);
1702        self.top = self.stack.len() as u32;
1703        let r = self.call_at(func_slot, args.len() as u32, from_c);
1704        self.c_depth -= 1;
1705        if r.is_err()
1706            && self.yielding.is_none()
1707            && self.terminating.is_none()
1708            && !self.host_yield_pending
1709            && self.pending_async_native_fut.is_none()
1710        {
1711            // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712            // Rust stack, but the suspended coroutine's frames/registers (which
1713            // sit at/above `func_slot`) must survive for the next resume — so we
1714            // only truncate on a real error. A self-close termination is in the
1715            // same boat: the dying thread's state is discarded wholesale.
1716            // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717            // the same boat as `yielding`: the next `EvalFuture::poll`
1718            // resumes the same call, so the in-flight frames must
1719            // survive.
1720            self.stack.truncate(func_slot as usize);
1721            self.top = func_slot;
1722        }
1723        r
1724    }
1725
1726    /// Invoke `f` with the running thread marked non-yieldable for the duration
1727    /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728    /// boundary and errors instead of suspending. Used by library callbacks
1729    /// (sort comparator, gsub replacement) that run via synchronous Rust
1730    /// recursion and so could not be re-entered after a yield.
1731    pub(crate) fn call_noyield(
1732        &mut self,
1733        f: Value,
1734        args: &[Value],
1735    ) -> Result<Vec<Value>, LuaError> {
1736        self.nny += 1;
1737        let r = self.call_value(f, args);
1738        self.nny -= 1;
1739        r
1740    }
1741
1742    // ---- coroutines (P05) ----
1743
1744    pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745        // The new coroutine inherits the creating thread's current globals
1746        // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747        // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748        // it here picks the creator regardless of which coro is running.
1749        self.heap.new_coro(body, self.globals)
1750    }
1751
1752    /// Is `t` the thread whose context is currently live in the VM?
1753    pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754        match (self.current, t) {
1755            (None, None) => true,
1756            (Some(a), Some(b)) => a.ptr_eq(b),
1757            _ => false,
1758        }
1759    }
1760
1761    /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762    /// stack if that thread is current, else its saved context).
1763    #[doc(hidden)]
1764    pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765        let s = slot as usize;
1766        if self.is_current_thread(thread) {
1767            self.stack[s]
1768        } else {
1769            match thread {
1770                Some(co) => co.stack[s],
1771                None => self.main_ctx.as_ref().expect("main context").stack[s],
1772            }
1773        }
1774    }
1775
1776    fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777        let s = slot as usize;
1778        if self.is_current_thread(thread) {
1779            self.stack[s] = v;
1780        } else {
1781            match thread {
1782                Some(co) => {
1783                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784                    unsafe { co.as_mut() }.stack[s] = v;
1785                    // co.stack is traced by Coro::trace; demote co back to
1786                    // gray so propagate re-traces this slot if it was
1787                    // already black.
1788                    self.heap
1789                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790                }
1791                None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792            }
1793        }
1794    }
1795
1796    /// Whether `co` is the main thread's identity object.
1797    pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798        self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799    }
1800
1801    /// The status of `co` from the caller's view. The main thread's identity
1802    /// object has no stored status — it is "running" when nothing else runs,
1803    /// else "normal" (it resumed the active coroutine).
1804    pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805        if self.is_main_coro(co) {
1806            if self.current.is_none() {
1807                CoroStatus::Running
1808            } else {
1809                CoroStatus::Normal
1810            }
1811        } else {
1812            co.status
1813        }
1814    }
1815
1816    /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817    /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818    /// context. Handlers see the coroutine's death error (if it died by error)
1819    /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820    /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821    pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822        // re-entrant close: a __close handler closed its own coroutine while the
1823        // outer close is mid-flight (its context is live). Report success and let
1824        // the outer close finish — re-entering the swap would corrupt the stack.
1825        if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826            return Ok(None);
1827        }
1828        // A chain of coroutines whose `__close` handlers each close the previous
1829        // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830        // The calling handler's `call_value` has already pushed `c_depth` to the
1831        // cap, so here it reads as full first — report PUC's "C stack overflow"
1832        // before the next handler call would surface the plainer "stack overflow".
1833        if self.c_depth >= MAX_C_DEPTH {
1834            return Err(self.rt_err("C stack overflow"));
1835        }
1836        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837        let death_err = unsafe { co.as_mut() }.error_value.take();
1838        // swap the caller's live context out (into a GC-rooted home) and the
1839        // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840        // the coroutine's stack while everything stays rooted.
1841        let resumer = self.current;
1842        let rctx = self.take_ctx();
1843        match resumer {
1844            Some(r) => {
1845                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846                let m = unsafe { r.as_mut() };
1847                m.stack = rctx.stack;
1848                m.frames = rctx.frames;
1849                m.open_upvals = rctx.open_upvals;
1850                m.tbc = rctx.tbc;
1851                m.top = rctx.top;
1852                m.pcall_depth = rctx.pcall_depth;
1853            }
1854            None => self.main_ctx = Some(rctx),
1855        }
1856        self.load_coro_ctx(co);
1857        self.current = Some(co);
1858        let result = self.close_slots(0, death_err);
1859        // discard the (now-closed) coroutine context and restore the caller
1860        let _ = self.take_ctx();
1861        match resumer {
1862            Some(r) => {
1863                self.load_coro_ctx(r);
1864                self.current = Some(r);
1865            }
1866            None => {
1867                let m = self.main_ctx.take().expect("main context saved");
1868                self.put_ctx(m);
1869                self.current = None;
1870            }
1871        }
1872        {
1873            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874            let m = unsafe { co.as_mut() };
1875            m.status = CoroStatus::Dead;
1876            m.stack = Vec::new();
1877            m.frames = Vec::new();
1878            m.open_upvals = Vec::new();
1879            m.tbc = Vec::new();
1880            m.top = 0;
1881            m.pcall_depth = 0;
1882            m.resume_at = None;
1883            m.error_value = None;
1884        }
1885        result.map(|()| death_err)
1886    }
1887
1888    /// `coroutine.running`: the running thread plus whether it is the main one.
1889    pub(crate) fn running_thread(&self) -> (Value, bool) {
1890        match self.current {
1891            Some(co) => (Value::Coro(co), false),
1892            None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893        }
1894    }
1895
1896    /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897    /// thread) can yield. The main thread never can; any other coroutine can
1898    /// unless it is dead.
1899    pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900        match co {
1901            Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902            // the running thread can yield only outside any non-yieldable C call
1903            None => self.current.is_some() && self.nny == 0,
1904        }
1905    }
1906
1907    /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908    /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909    /// from "inside an unyieldable C call" (sort/gsub callback).
1910    pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911        if self.current.is_none() {
1912            Some("attempt to yield from outside a coroutine")
1913        } else if self.nny > 0 {
1914            Some("attempt to yield across a C-call boundary")
1915        } else {
1916            None
1917        }
1918    }
1919
1920    /// The coroutine whose context is currently live (`None` on the main thread).
1921    pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922        self.current
1923    }
1924
1925    /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926    /// its pending `__close` handlers, then signal termination. The handlers run
1927    /// here, in place, with the thread still non-yieldable (a yield in one hits
1928    /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929    /// way a yield does — `exec_with` propagates it past any protecting pcall
1930    /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931    /// clean death (or, if a handler raised, the coroutine's error).
1932    pub(crate) fn close_running(&mut self) -> LuaError {
1933        let death = match self.close_slots(0, None) {
1934            Ok(()) => None,
1935            Err(e) => Some(e.0),
1936        };
1937        self.terminating = Some(death);
1938        LuaError(Value::Nil)
1939    }
1940
1941    /// `coroutine.status` as seen by the caller.
1942    pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943        match self.effective_coro_status(co) {
1944            CoroStatus::Suspended => "suspended",
1945            CoroStatus::Running => "running",
1946            CoroStatus::Normal => "normal",
1947            CoroStatus::Dead => "dead",
1948        }
1949    }
1950
1951    fn take_ctx(&mut self) -> SavedCtx {
1952        let saved = SavedCtx {
1953            stack: std::mem::take(&mut self.stack),
1954            frames: std::mem::take(&mut self.frames),
1955            open_upvals: std::mem::take(&mut self.open_upvals),
1956            tbc: std::mem::take(&mut self.tbc),
1957            top: self.top,
1958            pcall_depth: self.pcall_depth,
1959            hook: self.hook,
1960            globals: self.globals,
1961        };
1962        self.frames_resync(); // P17-D Week 1 — frames now empty.
1963        saved
1964    }
1965
1966    fn put_ctx(&mut self, c: SavedCtx) {
1967        self.stack = c.stack;
1968        self.frames = c.frames;
1969        self.open_upvals = c.open_upvals;
1970        self.tbc = c.tbc;
1971        self.top = c.top;
1972        self.pcall_depth = c.pcall_depth;
1973        self.hook = c.hook;
1974        self.globals = c.globals;
1975        self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976    }
1977
1978    /// Move a coroutine's saved context into the live VM fields.
1979    fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981        let m = unsafe { co.as_mut() };
1982        self.stack = std::mem::take(&mut m.stack);
1983        self.frames = std::mem::take(&mut m.frames);
1984        self.open_upvals = std::mem::take(&mut m.open_upvals);
1985        self.tbc = std::mem::take(&mut m.tbc);
1986        self.top = m.top;
1987        self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988        self.pcall_depth = m.pcall_depth;
1989        self.hook = m.hook;
1990        self.globals = m.globals;
1991    }
1992
1993    /// Save the live VM context back into a coroutine object.
1994    fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995        let c = self.take_ctx();
1996        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997        let m = unsafe { co.as_mut() };
1998        m.stack = c.stack;
1999        m.frames = c.frames;
2000        m.open_upvals = c.open_upvals;
2001        m.tbc = c.tbc;
2002        m.top = c.top;
2003        m.pcall_depth = c.pcall_depth;
2004        m.hook = c.hook;
2005        m.globals = c.globals;
2006        // bulk-overwrite of every collectable field traced by Coro::trace:
2007        // demote the coro back to gray so propagate re-traces its new state.
2008        self.heap
2009            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010    }
2011
2012    /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013    /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014    /// error raised inside the coroutine (the coroutine becomes dead).
2015    pub(crate) fn resume_coro(
2016        &mut self,
2017        co: Gc<Coro>,
2018        args: Vec<Value>,
2019    ) -> Result<Vec<Value>, LuaError> {
2020        match co.status {
2021            CoroStatus::Suspended => {}
2022            CoroStatus::Dead => return Err(self.plain_err("cannot resume dead coroutine")),
2023            _ => return Err(self.plain_err("cannot resume non-suspended coroutine")),
2024        }
2025        if self.c_depth >= MAX_C_DEPTH {
2026            return Err(self.plain_err("C stack overflow"));
2027        }
2028        self.c_depth += 1;
2029        let resumer = self.current;
2030        // save the resumer's live context away
2031        let rctx = self.take_ctx();
2032        match resumer {
2033            Some(r) => {
2034                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035                let m = unsafe { r.as_mut() };
2036                m.stack = rctx.stack;
2037                m.frames = rctx.frames;
2038                m.open_upvals = rctx.open_upvals;
2039                m.tbc = rctx.tbc;
2040                m.top = rctx.top;
2041                m.pcall_depth = rctx.pcall_depth;
2042                m.globals = rctx.globals;
2043                m.status = CoroStatus::Normal;
2044                // bulk overwrite of every traced field on r — mirror
2045                // store_coro_ctx's barrier_back so propagate re-traces r.
2046                self.heap
2047                    .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048            }
2049            None => self.main_ctx = Some(rctx),
2050        }
2051        // swap the coroutine in
2052        self.load_coro_ctx(co);
2053        {
2054            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055            let m = unsafe { co.as_mut() };
2056            m.status = CoroStatus::Running;
2057            m.resumer = resumer;
2058        }
2059        // co.resumer is a traced Gc field; barrier_back covers the new
2060        // resumer reference and any future field writes during this call.
2061        self.heap
2062            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063        self.current = Some(co);
2064
2065        // drive it
2066        let drive = if co.started {
2067            self.coro_continue(&args)
2068        } else {
2069            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070            unsafe { co.as_mut() }.started = true;
2071            self.coro_first(co.body, &args)
2072        };
2073
2074        // classify: a self-close termination or a pending yield each win over
2075        // the (sentinel) error they raised to unwind the Rust stack.
2076        let (outcome, status) = if let Some(death) = self.terminating.take() {
2077            // the coroutine closed itself: it dies now, cleanly or with the
2078            // error a `__close` handler raised.
2079            match death {
2080                Some(e) => {
2081                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082                    unsafe { co.as_mut() }.error_value = Some(e);
2083                    self.heap
2084                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085                    (Err(LuaError(e)), CoroStatus::Dead)
2086                }
2087                None => (Ok(Vec::new()), CoroStatus::Dead),
2088            }
2089        } else {
2090            match self.yielding.take() {
2091                Some((vals, fslot, nres)) => {
2092                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093                    unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094                    (Ok(vals), CoroStatus::Suspended)
2095                }
2096                None => {
2097                    // died: a return is clean, an error is remembered so a later
2098                    // `coroutine.close` can report it (PUC lua_closethread).
2099                    // Capture the error-point traceback (set by `unwind` before
2100                    // popping the failing frames) and prepend a synthetic
2101                    // top entry for the C native that initiated the error
2102                    // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103                    // on the dead coroutine still shows the error site
2104                    // (db.lua :848 family).
2105                    if drive.is_err() {
2106                        let mut tb = self.error_traceback.take().unwrap_or_default();
2107                        if let Some(nm) = self.errored_native.take() {
2108                            let mut prefixed: Vec<u8> = Vec::new();
2109                            prefixed.extend_from_slice(
2110                                format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111                            );
2112                            prefixed.extend(tb);
2113                            tb = prefixed;
2114                        }
2115                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116                        unsafe { co.as_mut() }.error_traceback = Some(tb);
2117                    }
2118                    if let Err(e) = drive {
2119                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120                        unsafe { co.as_mut() }.error_value = Some(e.0);
2121                        self.heap
2122                            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123                    }
2124                    (drive, CoroStatus::Dead)
2125                }
2126            }
2127        };
2128
2129        // save the coroutine's context back and restore the resumer
2130        self.store_coro_ctx(co);
2131        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132        unsafe { co.as_mut() }.status = status;
2133        match resumer {
2134            Some(r) => {
2135                self.load_coro_ctx(r);
2136                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137                unsafe { r.as_mut() }.status = CoroStatus::Running;
2138                self.current = Some(r);
2139            }
2140            None => {
2141                let m = self.main_ctx.take().expect("main context saved");
2142                self.put_ctx(m);
2143                self.current = None;
2144            }
2145        }
2146        self.c_depth -= 1;
2147        outcome
2148    }
2149
2150    /// First resume: install the body function at slot 0 and run.
2151    fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152        self.stack.clear();
2153        self.stack.push(body);
2154        self.stack.extend_from_slice(args);
2155        self.top = self.stack.len() as u32;
2156        match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157            Ok(true) => self.exec_with(1),
2158            Ok(false) => Ok(self.take_results(0)),
2159            Err(e) => Err(e),
2160        }
2161    }
2162
2163    /// Resume after a yield: deliver `args` as the results of the call that
2164    /// yielded, then continue the suspended thread.
2165    fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166        let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167        let n = args.len() as u32;
2168        // Restore the full register window of the suspended top frame: a yield
2169        // that unwound through a native (call_value) may have left the stack
2170        // shorter than the frame needs. `base + max_stack` is what push_frame
2171        // allocates; `fslot + n` covers the delivered yield results.
2172        let frame_need = self
2173            .frames
2174            .last()
2175            .and_then(CallFrame::lua)
2176            .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177            .unwrap_or(0);
2178        let need = frame_need.max((fslot + n) as usize);
2179        if self.stack.len() < need {
2180            self.stack.resize(need, Value::Nil);
2181        }
2182        for (i, &v) in args.iter().enumerate() {
2183            self.stack[fslot as usize + i] = v;
2184        }
2185        self.finish_results(fslot, n, nres);
2186        // the suspended `coroutine.yield` (a C call) now returns its resume
2187        // values: fire the matching "return" hook PUC defers until the resume.
2188        self.hook_return(true, 1, n)?;
2189        self.exec_with(1)
2190    }
2191
2192    /// `coroutine.yield`: suspend the running coroutine, recording where to
2193    /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194    /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195    pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196        let nres = self.native_nresults;
2197        self.yielding = Some((vals, func_slot, nres));
2198        // value is irrelevant: resume_coro consults `self.yielding`, not this
2199        LuaError(Value::Nil)
2200    }
2201
2202    /// Install or clear the debug hook on the running thread (`debug.sethook`
2203    /// without a thread argument). Arms the calling frame's `oldpc` to the
2204    /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205    /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206    /// native return: the very next traceexec compares against the sethook
2207    /// CALL's line. When the install statement and the following statement are
2208    /// on different source lines (db.lua :322), `changedline` fires for that
2209    /// first statement; when they share a line (db.lua :25 wrapper), they do
2210    /// not, so the wrapper line is not re-fired.
2211    pub(crate) fn install_hook(&mut self, hook: HookState) {
2212        self.hook = hook;
2213        if self.hook.line
2214            && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215        {
2216            f.hook_oldpc = f.pc.saturating_sub(1);
2217        }
2218    }
2219
2220    /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221    /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222    /// with an optional thread argument.
2223    ///
2224    /// `target == None` means "no explicit thread argument" — PUC binds that
2225    /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226    /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227    /// of whether that's the main thread or a currently-resumed coroutine
2228    /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229    /// `store_coro_ctx`). So a `None` target should always route to
2230    /// `install_hook` on the live fields. The pre-fix predicate gate
2231    /// `is_current_thread(target)` returned `false` when running inside a
2232    /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233    /// and silently dropped the hook on the floor — the install happened on
2234    /// no thread at all.
2235    pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236        if target.is_none() || self.is_current_thread(target) {
2237            self.install_hook(state);
2238        } else if let Some(co) = target {
2239            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240            let m = unsafe { co.as_mut() };
2241            m.hook = state;
2242            if state.line
2243                && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244            {
2245                f.hook_oldpc = u32::MAX;
2246            }
2247            // co.hook.func is a traced Value (Coro::trace covers it); demote
2248            // co back to gray so propagate sees the new hook function.
2249            self.heap
2250                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251        }
2252    }
2253
2254    /// The hook state of `target` (`None`/current → the live VM state).
2255    pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256        match target {
2257            t if self.is_current_thread(t) => self.hook,
2258            Some(co) => co.hook,
2259            None => self.hook,
2260        }
2261    }
2262
2263    /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264    /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265    /// discarded so the interrupted frame's register window is untouched.
2266    /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267    fn run_hook(
2268        &mut self,
2269        event: &[u8],
2270        line: Option<i64>,
2271        from_native: bool,
2272    ) -> Result<(), LuaError> {
2273        // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274        // synchronous fn pointer call). Both Rust and Lua hooks may be
2275        // installed; both observe each event.
2276        if let Some(rh) = self.hook.rust_func {
2277            let evt = match event {
2278                b"call" => Some(RustHookEvent::Call),
2279                b"return" => Some(RustHookEvent::Return),
2280                b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281                b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282                b"count" => Some(RustHookEvent::Count),
2283                _ => None,
2284            };
2285            if let Some(evt) = evt {
2286                let was_in_hook = self.in_hook;
2287                self.in_hook = true;
2288                rh(self, evt);
2289                self.in_hook = was_in_hook;
2290            }
2291        }
2292        let Some(hook) = self.hook.func else {
2293            return Ok(());
2294        };
2295        let saved_top = self.top;
2296        let saved_len = self.stack.len();
2297        let name = Value::Str(self.heap.intern(event));
2298        let lv = line.map_or(Value::Nil, Value::Int);
2299        self.in_hook = true;
2300        // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301        // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302        // ci sat below `hookf` (the function being hooked). When that hooked
2303        // function is native, no Lua frame for it exists in luna's `frames`;
2304        // model it as a synthetic C level by pushing the hook with
2305        // `from_c = true` (then `c_frame_name` reads the caller's call
2306        // instruction → e.g. `name = "sethook"`). When the hooked function is
2307        // Lua (its frame is still on the stack), push with `from_c = false`
2308        // so the level descent lands on it directly. The hook's own frame
2309        // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310        // (PUC `CIST_HOOKED`).
2311        self.pending_is_hook = true;
2312        let r = self.call_value_impl(hook, &[name, lv], from_native);
2313        self.pending_is_hook = false;
2314        self.in_hook = false;
2315        self.stack.truncate(saved_len);
2316        self.top = saved_top;
2317        r.map(|_| ())
2318    }
2319
2320    /// Fire the "call" hook on entry to a function, if armed and not already in
2321    /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322    /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323    /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324    /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325    fn hook_call_with(
2326        &mut self,
2327        from_native: bool,
2328        nargs: u32,
2329        is_tail: bool,
2330    ) -> Result<(), LuaError> {
2331        if self.hook.call
2332            && !self.in_hook
2333            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334        {
2335            self.hook_ftransfer = 1;
2336            self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337            // PUC 5.1 didn't distinguish tail-call events — every call,
2338            // including tail-calls, fired plain `"call"`. 5.2 introduced
2339            // the separate `"tail call"` event (mask `"c"` covers both).
2340            // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341            // "return","tail return","return","tail return"}`.
2342            let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343                b"tail call"
2344            } else {
2345                b"call"
2346            };
2347            self.run_hook(event, None, from_native)?;
2348        }
2349        Ok(())
2350    }
2351
2352    pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353        self.hook_call_with(from_native, nargs, false)
2354    }
2355
2356    /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357    /// the first result slot relative to the activation's func slot, ntransfer
2358    /// the number of results.
2359    pub(crate) fn hook_return(
2360        &mut self,
2361        from_native: bool,
2362        ftransfer: u32,
2363        nresults: u32,
2364    ) -> Result<(), LuaError> {
2365        if self.hook.ret
2366            && !self.in_hook
2367            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368        {
2369            self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370            self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371            self.run_hook(b"return", None, from_native)?;
2372        }
2373        Ok(())
2374    }
2375
2376    /// PUC "tail return" event — fires once per tail call that collapsed
2377    /// into the activation now returning, *after* its own "return" event.
2378    /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379    fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380        if self.hook.ret
2381            && !self.in_hook
2382            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383        {
2384            self.run_hook(b"tail return", None, false)?;
2385        }
2386        Ok(())
2387    }
2388
2389    /// Call a metamethod with a single expected result.
2390    fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391        let mut r = self.call_value(f, args)?;
2392        Ok(if r.is_empty() {
2393            Value::Nil
2394        } else {
2395            r.swap_remove(0)
2396        })
2397    }
2398
2399    /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400    /// driven through the interpreter loop with a `Meta` continuation, so a
2401    /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402    /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403    /// Returns to the caller with the call set up — the opcode arm must do no
2404    /// further work on the running frame and let the loop iterate. `tm` is
2405    /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406    /// born from this call inherits it via `pending_tm`, so
2407    /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408    /// (db.lua :878).
2409    fn begin_meta_call(
2410        &mut self,
2411        func: Value,
2412        args: &[Value],
2413        action: MetaAction,
2414        tm: &'static str,
2415    ) -> Result<(), LuaError> {
2416        let saved_top = self.top;
2417        let cont_slot = self.stack.len() as u32;
2418        self.stack.push(func);
2419        self.stack.extend_from_slice(args);
2420        self.top = self.stack.len() as u32;
2421        frames_push_sync(
2422            &mut self.frames,
2423            &mut self.frames_top,
2424            CallFrame::Cont(NativeCont {
2425                kind: ContKind::Meta(MetaCont { action, saved_top }),
2426                func_slot: cont_slot,
2427                nresults: 1,
2428            }),
2429        );
2430        let saved_tm = self.pending_tm.replace(tm);
2431        // begin_call drives a Lua metamethod through the loop (returns true) or
2432        // runs a native one inline (returns false, leaving results at cont_slot
2433        // for the loop head to pick up); either way the Meta cont resolves there.
2434        let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435        // Native callees never consumed pending_tm (push_frame is only hit on
2436        // a Lua callee); restore so it doesn't leak to a later push_frame.
2437        self.pending_tm = saved_tm;
2438        r?;
2439        Ok(())
2440    }
2441
2442    /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443    fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444        // v2.13 WUC read-time probe: a collectable key must be live at
2445        // the moment it is used. O(1) membership test against the
2446        // freed-pointer log — gc-verify diagnostic builds only; exact
2447        // under quarantining allocators (ASAN).
2448        #[cfg(feature = "gc-verify")]
2449        if matches!(key, Value::Str(_)) {
2450            let h = match key {
2451                Value::Str(s) => s.as_ptr() as usize,
2452                _ => unreachable!(),
2453            };
2454            if self.heap.recently_freed.contains(&h) {
2455                let (pc, reg_info) = match self.frames.last() {
2456                    Some(CallFrame::Lua(f)) => {
2457                        let pc = f.pc as usize;
2458                        let inst = f.closure.proto.code.get(pc.wrapping_sub(1));
2459                        (
2460                            pc,
2461                            inst.map(|i| {
2462                                format!(
2463                                    "op[pc-1]={:?} a={} b={} c={} base={}",
2464                                    i.op(),
2465                                    i.a(),
2466                                    i.b(),
2467                                    i.c(),
2468                                    f.base
2469                                )
2470                            })
2471                            .unwrap_or_default(),
2472                        )
2473                    }
2474                    _ => (0, String::new()),
2475                };
2476                panic!(
2477                    "[gc-verify] op_index READ of dead string key {h:#x} \
2478                     (gc_top {}, top {}, pc {pc}, {reg_info})",
2479                    self.gc_top, self.top,
2480                );
2481            }
2482        }
2483        match self.index_step(t, key)? {
2484            MmOut::Done(v) => self.stack[dst as usize] = v,
2485            MmOut::Mm { func, recv } => {
2486                self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2487            }
2488            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2489        }
2490        Ok(())
2491    }
2492
2493    /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2494    fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2495        match self.newindex_step(t, key, v)? {
2496            MmOut::Done(_) => {}
2497            MmOut::Mm { func, recv } => {
2498                self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2499            }
2500            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2501        }
2502        Ok(())
2503    }
2504
2505    /// Apply a comparison opcode's outcome: a known boolean drives the
2506    /// conditional skip directly; a metamethod is called yieldably, its
2507    /// truthiness driving the skip on return.
2508    fn op_compare(
2509        &mut self,
2510        step: MmOut,
2511        l: Value,
2512        r: Value,
2513        k: bool,
2514        tm: &'static str,
2515    ) -> Result<(), LuaError> {
2516        match step {
2517            MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2518            MmOut::Mm { func, .. } => {
2519                self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2520            }
2521            MmOut::CompareSynth { func } => {
2522                // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2523                // negation are driven through `MetaAction::Compare` so the
2524                // metamethod call can yield like any other compare.
2525                self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2526            }
2527        }
2528        Ok(())
2529    }
2530
2531    /// Complete a VM instruction whose metamethod just returned `result` (PUC
2532    /// `luaV_finishOp`). The running frame is already back on top.
2533    fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2534        match action {
2535            MetaAction::Store { dst } => self.stack[dst as usize] = result,
2536            MetaAction::Discard => {}
2537            MetaAction::Compare { k, negate } => {
2538                let t = if negate {
2539                    !result.truthy()
2540                } else {
2541                    result.truthy()
2542                };
2543                self.cond_skip(t, k);
2544            }
2545            MetaAction::Concat { dst, base_a } => {
2546                self.stack[dst as usize] = result;
2547                self.top = dst + 1;
2548                self.concat_run(base_a)?;
2549            }
2550        }
2551        Ok(())
2552    }
2553
2554    // ---- metatables ----
2555
2556    pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2557        match v {
2558            Value::Table(t) => t.metatable(),
2559            Value::Userdata(u) => u.metatable(),
2560            v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2561        }
2562    }
2563
2564    /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2565    /// non-table). No-op for tables (they carry their own).
2566    pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2567        if let Some(i) = type_mt_slot(v) {
2568            self.type_mt[i] = mt;
2569        }
2570    }
2571
2572    /// The metamethod of `v` for `mm`, or nil.
2573    pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2574        match self.metatable_of(v) {
2575            Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2576            None => Value::Nil,
2577        }
2578    }
2579
2580    /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2581    /// only fires when both operands carry a metatable that exposes the same
2582    /// implementation. Returns the metamethod to call, or `Nil` when no
2583    /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2584    /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2585    pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2586        let mt1 = self.metatable_of(l);
2587        let Some(mt1) = mt1 else { return Value::Nil };
2588        let key = Value::Str(self.mm_names[mm as usize]);
2589        let tm1 = mt1.get(key);
2590        if tm1.is_nil() {
2591            return Value::Nil;
2592        }
2593        let mt2 = self.metatable_of(r);
2594        let Some(mt2) = mt2 else { return Value::Nil };
2595        if mt1.as_ptr() == mt2.as_ptr() {
2596            return tm1;
2597        }
2598        let tm2 = mt2.get(key);
2599        if tm2.is_nil() {
2600            return Value::Nil;
2601        }
2602        if tm1.raw_eq(tm2) {
2603            return tm1;
2604        }
2605        Value::Nil
2606    }
2607
2608    /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2609    /// or full userdata whose metatable carries a string `__name` reports that
2610    /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2611    pub(crate) fn obj_typename(&self, v: Value) -> String {
2612        if matches!(v, Value::Table(_) | Value::Userdata(_))
2613            && let Value::Str(s) = self.get_mm(v, Mm::Name)
2614        {
2615            return String::from_utf8_lossy(s.as_bytes()).into_owned();
2616        }
2617        v.type_name().to_string()
2618    }
2619
2620    fn call_at(
2621        &mut self,
2622        func_slot: u32,
2623        nargs: u32,
2624        from_c: bool,
2625    ) -> Result<Vec<Value>, LuaError> {
2626        if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2627            self.exec()
2628        } else {
2629            // native completed inline; results at func_slot..top
2630            Ok(self.take_results(func_slot))
2631        }
2632    }
2633
2634    /// Switch the `collectgarbage` mode, returning the previous mode name.
2635    pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2636        std::mem::replace(&mut self.gc_mode, new)
2637    }
2638
2639    /// Whether the current `collectgarbage` mode is "generational" (where a
2640    /// "step" is a minor collection — a full atomic pass — rather than a paced
2641    /// incremental sweep).
2642    pub(crate) fn gc_mode_is_generational(&self) -> bool {
2643        self.gc_mode == "generational"
2644    }
2645
2646    /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2647    /// completes a whole cycle at once).
2648    pub(crate) fn gc_stepsize(&self) -> i64 {
2649        self.gc_stepsize
2650    }
2651
2652    /// `collectgarbage("param", name [,value])`: read (or set, returning the
2653    /// previous value of) a pacing parameter. Returns `None` for an unknown
2654    /// name so the caller can raise PUC's `invalid parameter` error. The
2655    /// collector is stop-the-world, so these only round-trip for API fidelity.
2656    pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2657        let slot = match name {
2658            b"pause" => &mut self.gc_pause,
2659            b"stepmul" => &mut self.gc_stepmul,
2660            b"stepsize" => &mut self.gc_stepsize,
2661            _ => return None,
2662        };
2663        let prev = *slot;
2664        if let Some(v) = set {
2665            *slot = v;
2666        }
2667        Some(prev)
2668    }
2669
2670    /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2671    /// paced sweep via `Vm::gc_step`.
2672    ///
2673    /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2674    /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2675    /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2676    /// suspected UAF is structurally closed — born objects no longer become
2677    /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2678    ///
2679    /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2680    /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2681    /// O(SWEEP_DIVISOR) safe-points regardless of size.
2682    #[inline(always)]
2683    pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2684        if self.gc_finalizing {
2685            return;
2686        }
2687        if !self.heap.gc_due() {
2688            return;
2689        }
2690        // v2.5 P1B-2E: tighten to bare `live_top`. The v2.2.0
2691        // `live_top.max(self.top)` workaround is now obsoleted by
2692        // v2.3's `finish_results` slot-clear + v2.5 P1B-2A
2693        // (Op::TailCall collapse slot-clear) + v2.5 P1B-2B
2694        // (pcall unwind slot-clear). PUC L->top discipline is now
2695        // mirrored at every frame-pop site.
2696        self.gc_top = live_top;
2697        // PUC stepmul: % of allocation rate. Higher = more GC work per
2698        // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2699        // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2700        const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2701        const MIN_BUDGET: usize = 64_000;
2702        let stepmul = self.gc_stepmul.max(1) as usize;
2703        let divisor = (SWEEP_BASE / stepmul).max(1);
2704        let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2705        if self.gc_step(budget) {
2706            self.heap.rearm_gc_pause(self.gc_pause);
2707        }
2708    }
2709
2710    /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2711    /// roots (open upvalues, which are not first-class Values). Shared by the
2712    /// full collector and the incremental-sweep driver so both snapshot the
2713    /// exact same live set.
2714    fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2715        let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2716        roots.push(Value::Table(self.globals));
2717        for mt in self.type_mt.into_iter().flatten() {
2718            roots.push(Value::Table(mt));
2719        }
2720        for &n in &self.mm_names {
2721            roots.push(Value::Str(n));
2722        }
2723        // Root the running thread's live registers (PUC marks [stack, top)).
2724        // `gc_top` is the instruction-level cursor of the last GC
2725        // safe-point: allocation safe-points set it via
2726        // `maybe_collect_garbage(live_top)`, and `begin_call` raises it
2727        // to the callee's argument top when entering a native — PUC's
2728        // `L->top = func + 1 + nargs` C-call discipline. Without that
2729        // raise, an explicit `collectgarbage()` collected with a STALE
2730        // cursor from some earlier (lower) safe-point and freed its own
2731        // caller's register-held strings — UAF-C
2732        // (STATUS_ACCESS_VIOLATION on Windows / ASAN heap-use-after-free
2733        // on Linux; the v2.13 WUC gc-verify frame audit pinpointed the
2734        // under-rooted slots). Values stranded above the cursor stay
2735        // excluded so weak-table entries are not spuriously pinned
2736        // (gc.lua:544 suspended-coroutine collection).
2737        let live = (self.gc_top as usize).min(self.stack.len());
2738        roots.extend_from_slice(&self.stack[..live]);
2739        for cf in &self.frames {
2740            match cf {
2741                CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2742                CallFrame::Cont(NativeCont {
2743                    kind: ContKind::Xpcall { handler },
2744                    ..
2745                }) => roots.push(*handler),
2746                CallFrame::Cont(NativeCont {
2747                    kind: ContKind::Close(cc),
2748                    ..
2749                }) => {
2750                    // Root the error threaded through this close chain so a
2751                    // `collectgarbage()` inside a sibling `__close` handler
2752                    // does not free it before the next handler is invoked
2753                    // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2754                    if let Some(e) = cc.pending {
2755                        roots.push(e);
2756                    }
2757                    if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2758                        roots.push(err);
2759                    }
2760                }
2761                CallFrame::Cont(_) => {}
2762            }
2763        }
2764        if let Some(e) = self.closing_err {
2765            roots.push(e);
2766        }
2767        // B12 host roots — Lua-facade handles keep their referenced
2768        // values alive across calls/yields. Trace the whole vector;
2769        // unused slots (post-`unpin_all`) carry Value::Nil which the
2770        // GC ignores.
2771        for slot in &self.host_roots {
2772            // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2773            roots.push(slot.value);
2774        }
2775        // v2.1 — `table.sort` and similar builtins stash their working
2776        // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2777        // comparator callback doesn't free strings/tables snapshotted
2778        // off the live table (sort.lua's `load(..)(); collectgarbage()`
2779        // compare regression).
2780        for buf in &self.sort_scratch {
2781            roots.extend_from_slice(buf);
2782        }
2783        // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2784        // mid-execution. Without rooting them here, a `collectgarbage()`
2785        // invoked inside the running native (sort.lua AA `load(..)();
2786        // collectgarbage()` compare callback regression) sweeps the
2787        // closure that's actively executing, leaving `nc.upvals`
2788        // dangling and the Rust local `nc` pointing at recycled memory
2789        // — the SIGSEGV pops on the very next field access or pop.
2790        for &nc in &self.running_natives {
2791            roots.push(Value::Native(nc));
2792        }
2793        // the running thread's debug hook (suspended threads root theirs via
2794        // Coro::trace / the main_ctx sweep below)
2795        if let Some(h) = self.hook.func {
2796            roots.push(h);
2797        }
2798        // the running coroutine (its saved-context fields live in the VM, but
2799        // the object itself + its resumer chain must stay reachable)
2800        if let Some(co) = self.current {
2801            roots.push(Value::Coro(co));
2802        }
2803        if let Some(mc) = self.main_coro {
2804            roots.push(Value::Coro(mc));
2805        }
2806        // debug.getregistry() and io library state
2807        if let Some(r) = self.registry {
2808            roots.push(Value::Table(r));
2809        }
2810        if let Some(mt) = self.file_mt {
2811            roots.push(Value::Table(mt));
2812        }
2813        if let Some(f) = self.io_input {
2814            roots.push(Value::Userdata(f));
2815        }
2816        if let Some(f) = self.io_output {
2817            roots.push(Value::Userdata(f));
2818        }
2819        // the main thread's saved context while a coroutine runs
2820        if let Some(m) = &self.main_ctx {
2821            roots.extend_from_slice(&m.stack);
2822            if let Some(h) = m.hook.func {
2823                roots.push(h);
2824            }
2825            for cf in &m.frames {
2826                match cf {
2827                    CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2828                    CallFrame::Cont(NativeCont {
2829                        kind: ContKind::Xpcall { handler },
2830                        ..
2831                    }) => roots.push(*handler),
2832                    CallFrame::Cont(_) => {}
2833                }
2834            }
2835        }
2836        let mut extra: Vec<*mut GcHeader> = self
2837            .open_upvals
2838            .iter()
2839            .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2840            .collect();
2841        if let Some(m) = &self.main_ctx {
2842            extra.extend(
2843                m.open_upvals
2844                    .iter()
2845                    .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2846            );
2847        }
2848        (roots, extra)
2849    }
2850
2851    /// Run a full collection with the VM's roots, then run any `__gc`
2852    /// finalizers the collection scheduled. A no-op (returns 0) when already
2853    /// inside a finalizer — the collector is not reentrant (PUC).
2854    pub fn collect_garbage(&mut self) -> usize {
2855        if self.gc_finalizing {
2856            return 0;
2857        }
2858        let (roots, extra) = self.gc_roots();
2859        let freed = self.heap.collect_ex(&roots, &extra);
2860        #[cfg(feature = "gc-verify")]
2861        self.verify_frame_regs_live("collect_garbage");
2862        self.run_finalizers();
2863        freed
2864    }
2865
2866    /// v2.13 WUC `gc-verify` — after a collect, every register slot the
2867    /// collector just rooted (`[0, max(gc_top, top))` — the same bound
2868    /// `gc_roots` uses) must hold a live value. A dead value inside the
2869    /// rooted range means the root snapshot and the sweep disagreed —
2870    /// the bug class behind UAF-C. (Slots ABOVE the bound may hold
2871    /// stale dead values legitimately; the interpreter's contract is
2872    /// that it writes them before reading.)
2873    #[cfg(feature = "gc-verify")]
2874    pub(crate) fn verify_frame_regs_live(&self, ctx: &str) {
2875        let live = self.heap.debug_live_set();
2876        let header = |v: Value| -> Option<usize> {
2877            match v {
2878                Value::Str(s) => Some(s.as_ptr() as usize),
2879                Value::Table(t) => Some(t.as_ptr() as usize),
2880                Value::Closure(c) => Some(c.as_ptr() as usize),
2881                Value::Native(n) => Some(n.as_ptr() as usize),
2882                Value::Coro(c) => Some(c.as_ptr() as usize),
2883                Value::Userdata(u) => Some(u.as_ptr() as usize),
2884                _ => None,
2885            }
2886        };
2887        let bound = (self.gc_top as usize).min(self.stack.len());
2888        for i in 0..bound {
2889            if let Some(h) = header(self.stack[i]) {
2890                if !live.contains(&h) {
2891                    panic!(
2892                        "[gc-verify] {ctx}: rooted stack slot {i} (gc_top {}, top {}) \
2893                         holds a dead value {h:#x} after collect",
2894                        self.gc_top, self.top,
2895                    );
2896                }
2897            }
2898        }
2899        // Diagnostic tier: a dead value ABOVE the cursor is only a bug if
2900        // that register is a named local still in scope (the interpreter
2901        // WILL read it). Cross-check against the proto's LocVar table.
2902        for (fi, cf) in self.frames.iter().enumerate() {
2903            if let CallFrame::Lua(f) = cf {
2904                let base = f.base as usize;
2905                let maxs = f.closure.proto.max_stack as usize;
2906                let hi = (base + maxs).min(self.stack.len());
2907                let pc = f.pc;
2908                for i in bound.max(base)..hi {
2909                    if let Some(h) = header(self.stack[i]) {
2910                        if !live.contains(&h) {
2911                            let reg = (i - base) as u32;
2912                            if let Some(lv) = f
2913                                .closure
2914                                .proto
2915                                .locvars
2916                                .iter()
2917                                .find(|lv| lv.reg == reg && lv.start_pc <= pc && pc < lv.end_pc)
2918                            {
2919                                panic!(
2920                                    "[gc-verify] {ctx}: frame {fi} IN-SCOPE LOCAL '{}' \
2921                                     (reg {reg}, abs {i}, pc {pc}, gc_top {}) holds a \
2922                                     dead value {h:#x} — live_top cursor excluded a \
2923                                     live named local",
2924                                    lv.name, self.gc_top,
2925                                );
2926                            }
2927                        }
2928                    }
2929                }
2930            }
2931        }
2932    }
2933
2934    /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2935    /// threw; gc.lua's "errors during collection" probe relies on it. This
2936    /// variant runs the same cycle but propagates the captured finalizer
2937    /// error to the explicit caller.
2938    pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2939        if self.gc_finalizing {
2940            return Ok(0);
2941        }
2942        let (roots, extra) = self.gc_roots();
2943        let freed = self.heap.collect_ex(&roots, &extra);
2944        #[cfg(feature = "gc-verify")]
2945        self.verify_frame_regs_live("collect_garbage_propagating");
2946        self.run_finalizers_or_err()?;
2947        Ok(freed)
2948    }
2949
2950    /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2951    /// should report fail rather than collect).
2952    pub(crate) fn gc_is_finalizing(&self) -> bool {
2953        self.gc_finalizing
2954    }
2955
2956    /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2957    /// = true indicates more pieces follow (concatenated until the first
2958    /// `to_cont = false` call flushes the whole line). Mirrors
2959    /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2960    ///   * If the buffer is fresh, `to_cont` is false, and the message is
2961    ///     `@<word>`, treat as a control message — only `@on` / `@off` are
2962    ///     recognised; any other `@…` is silently ignored.
2963    ///   * Otherwise, while the state is `Off`, drop the piece; while `On`,
2964    ///     accumulate, and flush to stderr + `warn_log` on the
2965    ///     non-continuation call.
2966    pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2967        if self.warn_buf.is_empty()
2968            && !to_cont
2969            && let Some(b'@') = msg.first().copied()
2970        {
2971            match &msg[1..] {
2972                b"on" => self.warn_state = WarnState::On,
2973                b"off" => self.warn_state = WarnState::Off,
2974                _ => {} // unknown control — silently ignored (PUC checkcontrol)
2975            }
2976            return;
2977        }
2978        if self.warn_state == WarnState::Off {
2979            // drop continuation pieces too — PUC `warnfoff` is the trampoline
2980            return;
2981        }
2982        self.warn_buf.extend_from_slice(msg);
2983        if !to_cont {
2984            let line = std::mem::take(&mut self.warn_buf);
2985            eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2986            self.warn_log.push(line);
2987        }
2988    }
2989
2990    /// Drain the in-process warning log (one entry per emitted message, sans
2991    /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2992    /// assert on warn output without scraping stderr.
2993    pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2994        std::mem::take(&mut self.warn_log)
2995    }
2996
2997    /// Arm the cooperative instruction budget (P09 embedding). The run loop
2998    /// decrements this once per dispatch turn; on zero it raises a catchable
2999    /// `"instruction budget exceeded"` error and disarms itself so the host
3000    /// can resume with a fresh budget on the next call. `None` removes the
3001    /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
3002    /// short-script semantics.
3003    pub fn set_instr_budget(&mut self, budget: Option<i64>) {
3004        self.instr_budget = budget;
3005    }
3006
3007    /// Remaining instruction budget (None when unbounded).
3008    pub fn instr_budget_remaining(&self) -> Option<i64> {
3009        self.instr_budget
3010    }
3011
3012    /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
3013    /// **must** disable JIT when relying on `instr_budget` — see the
3014    /// `jit_enabled` field doc for the rationale.
3015    pub fn set_jit_enabled(&mut self, enabled: bool) {
3016        self.jit.enabled = enabled;
3017    }
3018
3019    /// Current JIT enable state.
3020    pub fn jit_enabled(&self) -> bool {
3021        self.jit.enabled
3022    }
3023
3024    /// Toggle the trace JIT (P12). Off by default while the sprint
3025    /// develops. When enabled, hot back-edges are counted on
3026    /// `Proto.trace_hot_count`; once the counter passes
3027    /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
3028    /// mode at the back-edge target. Stays a no-op until S2's
3029    /// trace lowerer and S3's dispatcher land.
3030    pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
3031        self.jit.trace_enabled = enabled;
3032    }
3033
3034    /// P16-A — opt-in flag for the self-link cycle catch. See field
3035    /// docs for the correctness blocker. Default `false`.
3036    pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
3037        self.jit.p16_self_link_enabled = enabled;
3038    }
3039
3040    /// Current state of the P16-A self-link cycle catch.
3041    pub fn p16_self_link_enabled(&self) -> bool {
3042        self.jit.p16_self_link_enabled
3043    }
3044
3045    /// Current trace-JIT enable state.
3046    pub fn trace_jit_enabled(&self) -> bool {
3047        self.jit.trace_enabled
3048    }
3049
3050    /// Number of traces that have closed cleanly (looped back to the
3051    /// head PC) since this Vm was constructed. Cumulative; used by
3052    /// tests + tuning. Will become the dominant signal once S2's
3053    /// compile + cache lands.
3054    pub fn trace_closed_count(&self) -> u64 {
3055        self.jit.counters.closed
3056    }
3057
3058    /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
3059    /// hit an un-recordable op — the latter lands at S2).
3060    pub fn trace_aborted_count(&self) -> u64 {
3061        self.jit.counters.aborted
3062    }
3063
3064    /// P13-S13-G v2 — number of compiled traces whose close shape
3065    /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
3066    /// pin `dispatchable=false` because the dispatcher can't
3067    /// resume at a depth>0 PC without the matching CallFrames.
3068    /// S4-step4b's frame-mat helper could synthesise those, but
3069    /// the InlineAbort emit path isn't wired up yet — fresh
3070    /// pickup work for S13-G v2-full.
3071    pub fn trace_inline_abort_count(&self) -> u64 {
3072        self.jit.counters.inline_abort
3073    }
3074
3075    /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
3076    pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
3077        &self.jit.counters.dispatch_off_reasons
3078    }
3079
3080    /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
3081    pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
3082        &self.jit.counters.compile_failed_reasons
3083    }
3084
3085    /// P13-S13-H — see `JitCounters::closed_lens`. Returns
3086    /// `(is_call_triggered, ops_len)` for every trace that closed.
3087    pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
3088        &self.jit.counters.closed_lens
3089    }
3090
3091    /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
3092    /// Per-reason close-cause counts (recorder-side abort/discard +
3093    /// lowerer-side dispatch_off labels) keyed by `&'static str`.
3094    pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
3095        &self.jit.counters.close_cause_counts
3096    }
3097
3098    /// v2.0 Track-R R3b — number of compiled traces whose
3099    /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
3100    /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
3101    /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
3102    /// hot loop with p16-on. R3b keeps `dispatchable = false` even
3103    /// when this count bumps; R3d will lift it.
3104    pub fn trace_downrec_link_compiled_count(&self) -> u64 {
3105        self.jit.counters.downrec_link_compiled
3106    }
3107
3108    /// v2.0 Track-R R3c — see
3109    /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
3110    /// of times the dispatcher's `is_downrec_sentinel` arm fired and
3111    /// classified the return as a caller-pc-guard HIT.
3112    pub fn trace_downrec_dispatched_count(&self) -> u64 {
3113        self.jit.counters.downrec_dispatched
3114    }
3115
3116    /// v2.0 Track-R R3c — see
3117    /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
3118    /// times the dispatcher entered a `downrec_link`-bearing trace and
3119    /// the trace returned via the lowerer's deopt block (caller-pc
3120    /// guard MISS), or the dispatcher itself force-deopted via the
3121    /// stitch-cycle checkpoint.
3122    pub fn trace_downrec_deopt_count(&self) -> u64 {
3123        self.jit.counters.downrec_deopt
3124    }
3125
3126    /// v2.0 Track-R R3d — see
3127    /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3128    /// Number of compiled traces whose lowerer emitted a multi-way
3129    /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3130    /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3131    pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3132        self.jit.counters.multi_way_guard_emitted
3133    }
3134
3135    /// P12-S2.C — number of closed traces the lowerer compiled and
3136    /// parked on `Proto.traces`. Re-records of the same head_pc are
3137    /// deduped (the second close finds the head_pc already cached
3138    /// and skips compile), so this never exceeds `trace_closed_count`.
3139    pub fn trace_compiled_count(&self) -> u64 {
3140        self.jit.counters.compiled
3141    }
3142
3143    /// v2.1 Phase 1I.B — number of times the recorder captured a
3144    /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3145    /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3146    /// by the Phase 1I.B opt-in fire test to verify the env gate
3147    /// wiring round-trips end-to-end (env -> recorder -> snapshot
3148    /// -> counter -> getter -> assertion).
3149    pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3150        self.jit.counters.field_ic_snapshot_captured
3151    }
3152
3153    /// P12-S2.C — number of closed traces the lowerer rejected
3154    /// (any of the bail conditions in
3155    /// `crate::jit::trace::try_compile_trace`).
3156    pub fn trace_compile_failed_count(&self) -> u64 {
3157        self.jit.counters.compile_failed
3158    }
3159
3160    /// P12-S3 — number of times the dispatcher jumped into a
3161    /// compiled trace. Bumps on every entry; `trace_deopt_count`
3162    /// counts the subset where the trace returned with a parked
3163    /// `jit_pending_err`.
3164    pub fn trace_dispatched_count(&self) -> u64 {
3165        self.jit.counters.dispatched
3166    }
3167
3168    /// P12-S3 — number of trace entries that came back with
3169    /// `jit_pending_err` set (typically a metatable shadowed an
3170    /// index inside a helper, forcing the dispatcher to fall back
3171    /// to the interpreter without committing the trace's result).
3172    pub fn trace_deopt_count(&self) -> u64 {
3173        self.jit.counters.deopt
3174    }
3175
3176    /// P15-A v1 — number of times the dispatcher started a side
3177    /// trace recording (an `exit_hit_counts` slot crossed
3178    /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3179    /// was None and trace JIT was enabled). Each unit is exactly one
3180    /// `start_side_trace` call; the actual compile success counts
3181    /// under [`Self::trace_compiled_count`] like any other trace.
3182    /// Probe use: distinguishes the "side-trace pipeline fired"
3183    /// signal from the "primary back-edge / call-trigger fired"
3184    /// signal so v0-v3 architectural progress is visible without
3185    /// reading per-counter histograms.
3186    pub fn trace_side_trace_started_count(&self) -> u64 {
3187        self.jit.counters.side_trace_started
3188    }
3189
3190    /// P15-A v2-A — number of side-trace recordings that closed,
3191    /// compiled successfully, AND patched their parent's
3192    /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3193    /// dispatch through these ptrs yet (v2-B/C job), but the
3194    /// counter + ptr write proves the compile + link pipeline is
3195    /// complete end-to-end.
3196    pub fn trace_side_trace_compiled_count(&self) -> u64 {
3197        self.jit.counters.side_trace_compiled
3198    }
3199
3200    /// P15-A v2-C-A5-C — number of side traces that compiled
3201    /// successfully but were SHEDDED by the close-handler shape-
3202    /// match gate (`exit_tags_match_entry_tags`). High ratios
3203    /// vs. `trace_side_trace_compiled_count` indicate the
3204    /// architecture is shedding lots of would-be side traces;
3205    /// useful as a tuning probe for future relaxation of the
3206    /// gate or for child-IR re-specialisation against parent's
3207    /// exit shape.
3208    pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3209        self.jit.counters.side_trace_shape_mismatch
3210    }
3211
3212    /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3213    /// classified as `crate::jit::trace::EscapeState::Sinkable`
3214    /// across every successfully compiled trace on this Vm. The
3215    /// count is post-demotion: sites pre-emit drops back to Escaped
3216    /// for not meeting v1 sunk-emit criteria are NOT counted.
3217    /// `trace_sunk_alloc_count` matches one-for-one today (every
3218    /// surviving Sinkable site goes through sunk emit).
3219    pub fn trace_sinkable_seen_count(&self) -> u64 {
3220        self.jit.counters.sinkable_seen
3221    }
3222
3223    /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3224    pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3225        self.jit.counters.accum_bufferable_seen
3226    }
3227
3228    /// P15-prep — total dispatch hits across all known traces,
3229    /// broken into hot-exit telemetry (max single-exit count,
3230    /// total dispatches, exit count). Used by probes to identify
3231    /// hot side-exits as side-trace candidates.
3232    ///
3233    /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3234    /// recursively, so inner functions' traces are reported.
3235    pub fn trace_exit_hit_summary(
3236        &self,
3237        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3238    ) -> Vec<(u32, Vec<u32>)> {
3239        fn walk(
3240            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3241            out: &mut Vec<(u32, Vec<u32>)>,
3242        ) {
3243            for ct in proto.traces.borrow().iter() {
3244                let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3245                out.push((ct.head_pc, counts));
3246            }
3247            for inner in proto.protos.iter() {
3248                walk(*inner, out);
3249            }
3250        }
3251        let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3252        walk(cl.proto, &mut out);
3253        out
3254    }
3255
3256    /// P15-A v0 — surface every side-exit slot whose hit count is
3257    /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3258    /// `cl.proto` (recursively walking `proto.protos`). Returned
3259    /// entries are side-trace candidates: each carries the parent
3260    /// trace's `(head_proto, head_pc)`, the exit's index in the
3261    /// parent's `exit_hit_counts`, and the side trace's natural
3262    /// entry shape (`cont_pc` + `exit_tags`).
3263    ///
3264    /// Layout of `exit_hit_counts` (mirrored by the iter):
3265    /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3266    ///   window-sized exit_tags).
3267    /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3268    ///   → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3269    /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3270    ///   exit_tags = `ct.exit_tags`).
3271    pub fn hot_exit_iter(
3272        &self,
3273        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3274    ) -> Vec<crate::jit::trace::HotExitInfo> {
3275        use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3276        fn walk(
3277            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3278            out: &mut Vec<HotExitInfo>,
3279        ) {
3280            for ct in proto.traces.borrow().iter() {
3281                let inline_n = ct.per_exit_inline.len();
3282                let tags_n = ct.per_exit_tags.len();
3283                debug_assert_eq!(
3284                    ct.exit_hit_counts.len(),
3285                    inline_n + tags_n + 1,
3286                    "exit_hit_counts layout invariant violated"
3287                );
3288                for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3289                    let hits = cell.get();
3290                    if hits < HOTEXIT_THRESHOLD {
3291                        continue;
3292                    }
3293                    let (cont_pc, exit_tags) = if idx < inline_n {
3294                        let ent = &ct.per_exit_inline[idx];
3295                        (ent.cont_pc, ent.exit_tags.clone())
3296                    } else if idx < inline_n + tags_n {
3297                        let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3298                        (*pc, tags.clone())
3299                    } else {
3300                        (ct.head_pc, ct.exit_tags.clone())
3301                    };
3302                    out.push(HotExitInfo {
3303                        head_proto: proto,
3304                        head_pc: ct.head_pc,
3305                        exit_idx: idx,
3306                        hits,
3307                        cont_pc,
3308                        exit_tags,
3309                    });
3310                }
3311            }
3312            for inner in proto.protos.iter() {
3313                walk(*inner, out);
3314            }
3315        }
3316        let mut out: Vec<HotExitInfo> = Vec::new();
3317        walk(cl.proto, &mut out);
3318        out
3319    }
3320
3321    /// P12-S5-B — sum of NewTable sites that actually took the
3322    /// sunk-emit path across every successfully compiled trace on
3323    /// this Vm. Each counted site skips its heap `Gc<Table>`
3324    /// allocation per dispatch; the array part lives as Cranelift
3325    /// `Variable`s for the duration of the trace.
3326    pub fn trace_sunk_alloc_count(&self) -> u64 {
3327        self.jit.counters.sunk_alloc
3328    }
3329
3330    /// P12-S5-C — sum of materialise-helper emit sites across every
3331    /// successfully compiled trace on this Vm. Each unit is a
3332    /// (site × cmp side-exit) pair whose IR reconstructs a heap
3333    /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3334    /// emit is wiring materialise into the right side-exits.
3335    pub fn trace_materialize_emit_count(&self) -> u64 {
3336        self.jit.counters.materialize_emit
3337    }
3338
3339    /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3340    /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3341    /// replaces a `Heap::new_closure_inline` call on the dispatch
3342    /// path; the count is static (one per matching op per compiled
3343    /// trace), summed at compile success.
3344    pub fn trace_closure_emit_count(&self) -> u64 {
3345        self.jit.counters.closure_emit
3346    }
3347
3348    /// v2.0 Stage 7 polish 6 fire experiment — see
3349    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3350    /// Number of compiled traces whose `per_exit_inline.len() > 0`
3351    /// (depth>0 inlined cmp side-exits emitted).
3352    pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3353        self.jit.counters.per_exit_inline_compiled
3354    }
3355
3356    /// v2.0 Stage 7 polish 6 fire experiment — see
3357    /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3358    /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3359    /// `dispatchable == true` — i.e. the count of compiled traces
3360    /// that would actually exercise the AOT polish 6 chain-reloc +
3361    /// deploy-resolver path.
3362    pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3363        self.jit.counters.per_exit_inline_dispatchable
3364    }
3365
3366    /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3367    /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3368    /// whether a self-recursive function actually walked the depth
3369    /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3370    /// across traces and Vm activations; reset only on `Vm::new`.
3371    pub fn trace_max_depth_seen(&self) -> u8 {
3372        self.jit.max_depth_seen
3373    }
3374
3375    /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3376    /// dispatch time). The frame-materialization helper reads `.base`
3377    /// to compute offsets for each inlined frame's window.
3378    #[doc(hidden)]
3379    pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3380        match self.frames.last() {
3381            Some(CallFrame::Lua(f)) => Some(*f),
3382            _ => None,
3383        }
3384    }
3385
3386    /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3387    /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3388    /// by `luna-tools` (`luna-profile`'s sampler walks this from
3389    /// inside a `Count` hook). Sibling-module scope: not part of the
3390    /// public embedder surface, but `inspect::frames_for_profile` is.
3391    #[doc(hidden)]
3392    pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3393        &self.frames
3394    }
3395
3396    /// P12-S4-step4b — ensure the value stack covers indices
3397    /// `[0..need)`. Extends with Nil if shorter. Called by the
3398    /// frame-materialization helper before pushing an inlined frame
3399    /// whose register window may exceed the current stack length.
3400    #[doc(hidden)]
3401    pub fn jit_ensure_stack(&mut self, need: usize) {
3402        if self.stack.len() < need {
3403            self.stack.resize(need, Value::Nil);
3404        }
3405    }
3406
3407    /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3408    /// `__close` handlers would run (any active tbc slot ≥ from
3409    /// holding a non-nil/false Value); if so, parks a deopt sentinel
3410    /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3411    /// IR branches to the deopt block. Otherwise performs the safe
3412    /// part of close — `close_from(from)` to close open upvals +
3413    /// drop any drained tbc entries ≥ from — and returns 0.
3414    ///
3415    /// Returns are i64-shaped so the cranelift import sig stays
3416    /// trivial (i64 → i64 mapping).
3417    #[doc(hidden)]
3418    pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3419        if self.jit.pending_err.is_some() {
3420            return 1;
3421        }
3422        let Some(f) = self.jit_last_lua_frame() else {
3423            self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3424            return 1;
3425        };
3426        let from = f.base + start_offset;
3427        let has_handler = self.tbc.iter().any(|&s| {
3428            s >= from && {
3429                let v = self.stack[s as usize];
3430                !matches!(v, Value::Nil | Value::Bool(false))
3431            }
3432        });
3433        if has_handler {
3434            self.jit.pending_err =
3435                Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3436            return 1;
3437        }
3438        self.close_from(from);
3439        // Drain any tbc entries ≥ from (they're nil/false stubs the
3440        // interpreter's drive_close would have skipped silently).
3441        while let Some(&s) = self.tbc.last() {
3442            if s < from {
3443                break;
3444            }
3445            self.tbc.pop();
3446        }
3447        0
3448    }
3449
3450    /// P12-S7-B — spill the trace's current value for a register to
3451    /// the underlying `vm.stack[base + slot_offset]`. Required before
3452    /// an `Op::Closure` whose inner proto has an `in_stack: true`
3453    /// upval at `slot_offset` — the helper's `find_or_create_upval`
3454    /// captures a live pointer to `vm.stack[base + slot_offset]`,
3455    /// which must hold the right value at call time (trace IR's
3456    /// Variable hasn't yet been written back).
3457    ///
3458    /// Parameters arrive as i64 from the IR: `slot_offset` is the
3459    /// caller-frame register index (`u32` in practice, depth=0
3460    /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3461    /// `crate::runtime::value::raw` byte for the slot's RegKind;
3462    /// `raw_bits` is the trace Variable's `use_var` payload
3463    /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3464    /// raw `Gc::as_ptr` cast).
3465    #[doc(hidden)]
3466    pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3467        let Some(f) = self.jit_last_lua_frame() else {
3468            self.jit.pending_err =
3469                Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3470            return;
3471        };
3472        let idx = (f.base as usize) + (slot_offset as usize);
3473        if self.stack.len() <= idx {
3474            self.stack.resize(idx + 1, Value::Nil);
3475        }
3476        // SAFETY: caller (trace JIT IR emit) provides matching
3477        // `(tag, raw_bits)` — same shape produced by Value::unpack.
3478        let v = unsafe {
3479            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3480        };
3481        self.stack[idx] = v;
3482    }
3483
3484    /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3485    /// Mirrors the interp arm (this file ~L5316): copies the
3486    /// generator/state/control triple from `R[A..=A+2]` to
3487    /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3488    /// the iterator function via `begin_call`. v2 only handles
3489    /// `Value::Native` iterators (the canonical `ipairs_iter` /
3490    /// `next` builtins) — a Lua-closure iterator would push a Lua
3491    /// frame mid-trace, breaking `recording_frame_base`, so we
3492    /// deopt by parking a `pending_err` and returning `-1`.
3493    ///
3494    /// `slot_offset` is the caller-frame register index (=
3495    /// `inst.a()` decoded from a u32-wide field). `nvars` is
3496    /// `inst.c() as i32` — the caller's expected return count.
3497    /// P12-S12-C v1 — refresh only the raw payload of
3498    /// `vm.stack[base + slot_offset]`, preserving its existing
3499    /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3500    /// uses this when the slot's `RegKind` is `Unset` (no compile-
3501    /// time tag info; commonly `Str` slots which the trace doesn't
3502    /// model). The interp's previous execution of the same op
3503    /// already populated the slot with the right tag — the trace
3504    /// only needs to swap in its current raw value.
3505    #[doc(hidden)]
3506    pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3507        let Some(f) = self.jit_last_lua_frame() else {
3508            return;
3509        };
3510        let idx = (f.base as usize) + (slot_offset as usize);
3511        if idx >= self.stack.len() {
3512            return;
3513        }
3514        let (tag, _) = self.stack[idx].unpack();
3515        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3516        self.stack[idx] = unsafe {
3517            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3518        };
3519    }
3520
3521    /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3522    ///
3523    /// Mirrors the interp arm (this file ~L5112): `self.top =
3524    /// base + a + n; concat_run(base + a)`. Result lands at
3525    /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3526    /// deopt (any error from `concat_run` OR detection that the
3527    /// metamethod path was taken — `concat_run` returns `Ok(())`
3528    /// after `begin_meta_call` which has pushed a Lua frame the
3529    /// trace can't safely continue past).
3530    ///
3531    /// The frame-push detection uses `pre/post frames.len()` and
3532    /// unwinds any pushed frames before deopting, so the
3533    /// dispatcher's existing deopt path sees a clean stack.
3534    #[doc(hidden)]
3535    pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3536        if self.jit.pending_err.is_some() {
3537            return -1;
3538        }
3539        let Some(f) = self.jit_last_lua_frame() else {
3540            self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3541            return -1;
3542        };
3543        let abs_a = f.base + slot_offset;
3544        self.top = abs_a + n as u32;
3545        let pre_frames = self.frames.len();
3546        let result = self.concat_run(abs_a);
3547        let post_frames = self.frames.len();
3548        // Frame-push = metamethod path taken (begin_meta_call pushed
3549        // a Lua frame). The trace can't continue past it; unwind +
3550        // deopt so interp redoes Op::Concat in the slow path.
3551        while self.frames.len() > pre_frames {
3552            frames_pop_sync(&mut self.frames, &mut self.frames_top);
3553        }
3554        if let Err(e) = result {
3555            self.jit.pending_err = Some(e);
3556            return -1;
3557        }
3558        if post_frames > pre_frames {
3559            self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3560            return -1;
3561        }
3562        0
3563    }
3564
3565    /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3566    /// accumulator buffer pool, returning a raw pointer. The trace
3567    /// fn's IR holds this pointer in a stack slot through the loop
3568    /// and calls `jit_str_buf_extend` per iter. If the pool is
3569    /// empty, allocate fresh.
3570    ///
3571    /// Safety: the returned pointer is valid until
3572    /// `jit_str_buf_release` is called or the Vm is dropped. The
3573    /// caller MUST not retain it across `enter_jit` boundaries.
3574    #[doc(hidden)]
3575    pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3576        let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3577        // Move into a Box so the pointer is stable until release.
3578        Box::into_raw(Box::new(buf))
3579    }
3580
3581    /// P14-S14-B v2 — return a previously-acquired buffer to the
3582    /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3583    /// buffer is `clear`ed (capacity retained) so the next acquire
3584    /// gets a ready-to-extend Vec.
3585    ///
3586    /// Safety: `buf` must have been returned by a prior
3587    /// `jit_str_buf_acquire` on the same Vm.
3588    #[doc(hidden)]
3589    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3590    pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3591        if buf.is_null() {
3592            return;
3593        }
3594        // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3595        let mut owned = unsafe { Box::from_raw(buf) };
3596        owned.clear();
3597        if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3598            self.jit.str_buf_pool.push(*owned);
3599        }
3600        // Else: drop the buffer.
3601    }
3602
3603    /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3604    /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3605    /// the piece slot) and passes it through; we treat it as a
3606    /// `*mut LuaStr` and append its bytes.
3607    ///
3608    /// Returns 0 on success, -1 if the piece isn't a Str (would
3609    /// trip __concat metamethod path → deopt to interp).
3610    ///
3611    /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3612    /// trace's piece slot raw bits.
3613    #[doc(hidden)]
3614    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3615    pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3616        if buf.is_null() || str_ptr == 0 {
3617            return -1;
3618        }
3619        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3620        let buf = unsafe { &mut *buf };
3621        let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3622        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3623        let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3624        buf.extend_from_slice(bytes);
3625        0
3626    }
3627
3628    /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3629    /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3630    /// the trace to write into the accumulator slot.
3631    ///
3632    /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3633    /// (the v2 hard cap; the trace deopts).
3634    ///
3635    /// Safety: `buf` from prior `acquire`. The buffer is left
3636    /// CLEAR (drained) ready for `release`.
3637    #[doc(hidden)]
3638    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3639    pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3640        if buf.is_null() {
3641            return 0;
3642        }
3643        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3644        let buf = unsafe { &mut *buf };
3645        let bytes = std::mem::take(buf);
3646        // v2 hard cap at 256KB per RFC Q3.
3647        if bytes.len() > 256 * 1024 {
3648            return 0;
3649        }
3650        let gc = self.heap.intern(&bytes);
3651        gc.as_ptr() as i64
3652    }
3653
3654    /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3655    ///
3656    /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3657    /// v3: ipairs `inext` fast path at the top — skip begin_call
3658    ///     when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3659    ///     R[A+2]=Int.
3660    /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3661    ///     caller-provided buffers + return R[A+4]'s tag byte. Lets
3662    ///     emit skip 3 separate `luna_jit_stack_load` calls and 1
3663    ///     `luna_jit_stack_tag` call by reading the buffer via
3664    ///     cranelift `stack_load` IR instead. Returns -1 on deopt.
3665    #[doc(hidden)]
3666    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3667    pub fn jit_op_tforcall(
3668        &mut self,
3669        slot_offset: u32,
3670        nvars: i32,
3671        ctrl_out: *mut i64,
3672        key_out: *mut i64,
3673        val_out: *mut i64,
3674    ) -> i64 {
3675        if self.jit.pending_err.is_some() {
3676            return -1;
3677        }
3678        let Some(f) = self.jit_last_lua_frame() else {
3679            self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3680            return -1;
3681        };
3682        let abs = f.base + slot_offset;
3683        let need = (abs + 7) as usize;
3684        if self.stack.len() < need {
3685            self.stack.resize(need, Value::Nil);
3686        }
3687        // v3 fast path.
3688        let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3689            && std::ptr::fn_addr_eq(
3690                n.f,
3691                crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3692            )
3693            && let Value::Table(t) = self.stack[(abs + 1) as usize]
3694            && t.metatable().is_none()
3695            && let Value::Int(i) = self.stack[(abs + 2) as usize]
3696        {
3697            let next_i = i.wrapping_add(1);
3698            let v = t.get_int(next_i);
3699            if v.is_nil() {
3700                self.stack[(abs + 4) as usize] = Value::Nil;
3701            } else {
3702                self.stack[(abs + 4) as usize] = Value::Int(next_i);
3703                if (nvars as usize) >= 2 {
3704                    self.stack[(abs + 5) as usize] = v;
3705                }
3706                for j in 2..nvars as usize {
3707                    let slot = abs + 4 + j as u32;
3708                    if (slot as usize) < self.stack.len() {
3709                        self.stack[slot as usize] = Value::Nil;
3710                    }
3711                }
3712            }
3713            true
3714        } else {
3715            false
3716        };
3717        if !took_fast_path {
3718            // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3719            // route through begin_call. Lua-closure iters would push
3720            // a Lua frame mid-trace → deopt.
3721            self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3722            self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3723            self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3724            if !matches!(self.stack[abs as usize], Value::Native(_)) {
3725                self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3726                return -1;
3727            }
3728            if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3729                self.jit.pending_err = Some(e);
3730                return -1;
3731            }
3732        }
3733        // v4 batched writeback — fill the caller's buffers with the
3734        // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3735        // reload via cranelift `stack_load` instead of separate
3736        // `luna_jit_stack_load` helper calls.
3737        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3738        let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3739        let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3740        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3741        let key_raw = unsafe { key_rv.zero };
3742        let val_raw = if (nvars as usize) >= 2 {
3743            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3744            unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3745        } else {
3746            0u64
3747        };
3748        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3749        unsafe {
3750            ctrl_out.write(ctrl_raw as i64);
3751            key_out.write(key_raw as i64);
3752            val_out.write(val_raw as i64);
3753        }
3754        key_tag as i64
3755    }
3756
3757    /// P12-S12-B-v2 — load the raw `i64` payload of
3758    /// `vm.stack[base + slot_offset]` for the active trace's head
3759    /// Lua frame. Used to reload trace IR `Variable`s after a
3760    /// helper has written to `vm.stack` directly (e.g. TForCall's
3761    /// iter results land at `R[A+4..A+4+nvars]`).
3762    #[doc(hidden)]
3763    pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3764        let Some(f) = self.jit_last_lua_frame() else {
3765            return 0;
3766        };
3767        let idx = (f.base as usize) + (slot_offset as usize);
3768        if idx >= self.stack.len() {
3769            return 0;
3770        }
3771        let v = self.stack[idx];
3772        let (_, raw) = v.unpack();
3773        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3774        unsafe { raw.zero as i64 }
3775    }
3776
3777    /// P12-S12-B-v2 — read the tag byte of
3778    /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3779    /// to dispatch on the iterator's return-key tag at runtime
3780    /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3781    /// deopt for v2).
3782    #[doc(hidden)]
3783    pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3784        let Some(f) = self.jit_last_lua_frame() else {
3785            return crate::runtime::value::raw::NIL;
3786        };
3787        let idx = (f.base as usize) + (slot_offset as usize);
3788        if idx >= self.stack.len() {
3789            return crate::runtime::value::raw::NIL;
3790        }
3791        self.stack[idx].unpack().0
3792    }
3793
3794    /// P12-S4-step4b — push a Lua frame onto the call stack with
3795    /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3796    /// at trace side-exits to recreate the inlined call activations
3797    /// the lowerer compiled past. The contract (enforced by the
3798    /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3799    /// `nresults` is the caller's expected count (today always 1
3800    /// because the lowerer bails Op::Call C != 2), and the caller
3801    /// has already called `jit_ensure_stack` to cover
3802    /// `[0..base + cl.proto.max_stack)`.
3803    #[doc(hidden)]
3804    pub fn jit_push_inlined_frame(
3805        &mut self,
3806        cl: Gc<LuaClosure>,
3807        base: u32,
3808        pc: u32,
3809        nresults: i32,
3810    ) {
3811        frames_push_sync(
3812            &mut self.frames,
3813            &mut self.frames_top,
3814            CallFrame::Lua(Frame {
3815                closure: cl,
3816                base,
3817                pc,
3818                // Lua call ABI: callee R[0] sits at caller R[A+1], so
3819                // callee.base = caller.base + A + 1; func_slot is
3820                // caller.base + A = callee.base - 1.
3821                func_slot: base - 1,
3822                n_varargs: 0,
3823                nresults,
3824                hook_oldpc: u32::MAX,
3825                from_c: false,
3826                tm: None,
3827                is_hook: false,
3828                tailcalls: 0,
3829            }),
3830        );
3831    }
3832
3833    /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3834    /// should set to `false` so `load`/`loadstring` reject bytecode input
3835    /// (which bypasses parser limits and could exploit verifier gaps).
3836    pub fn set_bytecode_loading(&mut self, enabled: bool) {
3837        self.bytecode_loading = enabled;
3838    }
3839
3840    /// Current bytecode-loading gate state.
3841    pub fn bytecode_loading(&self) -> bool {
3842        self.bytecode_loading
3843    }
3844
3845    /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3846    /// bytecode is a strictly larger trust surface than luna's own dump
3847    /// format (third-party toolchain bugs, malformed chunks, unknown
3848    /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3849    /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3850    pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3851        self.puc_bytecode_loading = enabled;
3852    }
3853
3854    /// Current PUC bytecode-loading gate state.
3855    pub fn puc_bytecode_loading(&self) -> bool {
3856        self.puc_bytecode_loading
3857    }
3858
3859    /// Default loader input budget — 256 MiB.
3860    ///
3861    /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3862    /// sources whose byte length crosses this cap, returning the
3863    /// PUC-shaped `not enough memory` error rather than letting the
3864    /// host allocator try (and crash) to hold the next chunk.
3865    pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3866
3867    /// Set the loader input byte budget (see
3868    /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3869    /// effectively disable. Smaller caps are honored verbatim — a 0
3870    /// cap rejects every non-empty source.
3871    pub fn set_loader_input_budget(&mut self, bytes: usize) {
3872        self.loader_input_budget = bytes;
3873    }
3874
3875    /// Current loader input byte budget.
3876    pub fn loader_input_budget(&self) -> usize {
3877        self.loader_input_budget
3878    }
3879
3880    /// Take the error traceback captured at the latest error point and
3881    /// reset it. Embedders should call this immediately after a failed
3882    /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3883    /// entry clears it. Returns `None` if no error was in flight.
3884    pub fn take_error_traceback(&mut self) -> Option<String> {
3885        self.error_traceback
3886            .take()
3887            .map(|b| String::from_utf8_lossy(&b).into_owned())
3888    }
3889
3890    /// Arm the soft memory cap (P09 embedding). The run loop checks the
3891    /// heap's tracked byte usage between dispatch turns; on overshoot it
3892    /// first runs a full collect, and if `bytes` still exceeds the cap it
3893    /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3894    /// itself (fire-once: re-arm before the next `call_value` if reusing
3895    /// the Vm across requests). `None` removes the cap. The accounting is
3896    /// approximate — internal Vec/Box capacity overhead is not tracked,
3897    /// so embedders should size the cap with ~2× margin over the desired
3898    /// hard limit and additionally bound the Vm's lifetime (drop after
3899    /// each request).
3900    pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3901        self.heap.mem_cap = cap;
3902    }
3903
3904    /// Approximate bytes the heap is currently holding. Object shells plus
3905    /// every table's internal array/hash boxes (tracked via
3906    /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3907    /// bytecode and closure upvalue slices still go uncounted — this is a
3908    /// lower bound, not a precise `malloc_stats`-style total.
3909    pub fn memory_used(&self) -> usize {
3910        self.heap.bytes()
3911    }
3912
3913    /// Read upvalue slot `i` of the native function currently on top of the
3914    /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3915    /// when no native is running. Public so the C ABI trampoline can fetch
3916    /// the host C function pointer it stashed there at registration time.
3917    pub fn running_native_upvalue(&self, i: usize) -> Value {
3918        match self.running_natives.last() {
3919            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3920            Some(nc) => unsafe {
3921                let upvals = &(*nc.as_ptr()).upvals;
3922                upvals.get(i).copied().unwrap_or(Value::Nil)
3923            },
3924            None => Value::Nil,
3925        }
3926    }
3927
3928    /// Register a table for finalization if its (just-set) metatable carries a
3929    /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3930    /// `__gc` to the metatable afterwards does not retroactively register).
3931    pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3932        if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3933            self.heap.register_finalizable(t);
3934        }
3935    }
3936
3937    /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3938    /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3939    /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3940    /// metatable that `newproxy` returned, which then needs to flow through.
3941    /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3942    /// would re-check at metatable-set time); luna's only userdata
3943    /// finalizables today come via `newproxy`, which registers itself.
3944    #[allow(dead_code)]
3945    pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3946        if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3947            self.heap.register_finalizable_userdata(u);
3948        }
3949    }
3950
3951    /// Run pending `__gc` finalizers (objects the collector resurrected for
3952    /// finalization). Finalizer errors are swallowed — PUC turns them into a
3953    /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3954    fn run_finalizers(&mut self) {
3955        let _ = self.run_finalizers_or_err();
3956    }
3957
3958    fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3959        if self.gc_finalizing {
3960            return Ok(());
3961        }
3962        let pending = self.heap.take_tobefnz();
3963        if pending.is_empty() {
3964            return Ok(());
3965        }
3966        self.gc_finalizing = true;
3967        let mut first_err: Option<LuaError> = None;
3968        for obj in pending {
3969            let gc = self.get_mm(obj, Mm::Gc);
3970            // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3971            // schedule the object for finalization (`__gc = true` is the
3972            // canonical placeholder); only call it at finalize time when it
3973            // is actually a function. gc.lua 5.2 :412 wires up exactly this
3974            // sentinel and then expects no call.
3975            let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3976            if callable {
3977                // PUC `GCTM` sets `CIST_FIN` on the new ci so
3978                // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3979                // `name = "__gc"`. luna threads the same outcome through the
3980                // generic `pending_tm` slot: the Lua frame born from this
3981                // call consumes it in `push_frame`. Saved/restored around the
3982                // call in case the handler is a native (which never pops it).
3983                // Bare event name; `frame_name` / `c_frame_name` add the
3984                // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3985                // the convention used by `__close`, `__index`, …
3986                let saved_tm = self.pending_tm.replace("gc");
3987                // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3988                // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3989                // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3990                // luna mirrors by temporarily tagging the current top Lua
3991                // frame's `tm` to "__gc" for the duration of the call.
3992                let caller_tm_idx = self
3993                    .frames
3994                    .iter()
3995                    .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3996                let saved_caller_tm = caller_tm_idx.and_then(|i| {
3997                    if let CallFrame::Lua(fr) = &mut self.frames[i] {
3998                        let prev = fr.tm;
3999                        fr.tm = Some("gc");
4000                        Some(prev)
4001                    } else {
4002                        None
4003                    }
4004                });
4005                if let Err(e) = self.call_value(gc, &[obj]) {
4006                    // PUC 5.1 GCTM raised the finalizer's error to the
4007                    // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
4008                    // baselines on `not pcall(collectgarbage)`). 5.2/5.3
4009                    // wrapped it in `error in __gc metamethod (msg)` first
4010                    // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
4011                    // introduced the warning system and switched to "warn
4012                    // then continue" — never re-raise, just route the
4013                    // wrapped message through `warn`. gc.lua 5.5 :378 wires
4014                    // up `_WARN` capture under the `if T then …` block to
4015                    // baseline on the same wrapped string.
4016                    if self.version >= LuaVersion::Lua54 {
4017                        let inner = self.error_text(&e);
4018                        let msg = format!("error in __gc metamethod ({inner})");
4019                        self.emit_warn(msg.as_bytes(), false);
4020                    } else if first_err.is_none() {
4021                        let wrapped = if self.version >= LuaVersion::Lua52 {
4022                            let inner = self.error_text(&e);
4023                            let msg = format!("error in __gc metamethod ({inner})");
4024                            let s = Value::Str(self.heap.intern(msg.as_bytes()));
4025                            LuaError(s)
4026                        } else {
4027                            e
4028                        };
4029                        first_err = Some(wrapped);
4030                    }
4031                }
4032                self.pending_tm = saved_tm;
4033                if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
4034                    && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
4035                {
4036                    fr.tm = prev; // prev is Option<&'static str>; restore exactly
4037                }
4038            }
4039        }
4040        self.gc_finalizing = false;
4041        match first_err {
4042            Some(e) => Err(e),
4043            None => Ok(()),
4044        }
4045    }
4046
4047    /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
4048    /// Crosses up to three phases per call:
4049    ///   1. Pause      → seed Propagate (`gc_start_propagate`)
4050    ///   2. Propagate  → drain gray up to `budget`; on exhaustion run atomic
4051    ///                   (`gc_finish_atomic` → tobefnz populated; finalizers
4052    ///                   run via `run_finalizers`) and enter Sweep
4053    ///   3. Sweep      → `gc_sweep_step` up to (residual) `budget`
4054    /// Returns true when this call completed the cycle's sweep (back to
4055    /// Pause). The budget is spent generously across phases — a large `n`
4056    /// can finish a whole cycle in one call (PUC stop-the-world step).
4057    pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
4058        // Re-entry guard: never recurse — `run_finalizers` calls Lua code
4059        // that may hit a safe point and try to step again. Re-entry was OK
4060        // under STW (collect_garbage had its own guard) but here the
4061        // intermediate phase state would corrupt.
4062        if self.gc_finalizing {
4063            return false;
4064        }
4065        if self.heap.gc_phase_is_pause() {
4066            let (roots, extra) = self.gc_roots();
4067            self.heap.gc_start_propagate(&roots, &extra);
4068        }
4069        if self.heap.gc_phase_is_propagate() {
4070            if !self.heap.gc_step_propagate(budget) {
4071                return false;
4072            }
4073            self.heap.gc_finish_atomic();
4074            // any __gc scheduled by atomic — run before sweep so a finalizer
4075            // re-registering `self` re-enters the next cycle, not this sweep
4076            self.run_finalizers();
4077        }
4078        // either we just transitioned, or we entered already in Sweep, or
4079        // a finalizer started a new cycle (gc_sweep_step is a no-op then)
4080        self.heap.gc_sweep_step(budget)
4081    }
4082
4083    // ---- frames & calls ----
4084
4085    /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
4086    /// Returns true if a Lua frame was pushed (the dispatch loop continues
4087    /// there), false if a native completed inline.
4088    fn begin_call(
4089        &mut self,
4090        func_slot: u32,
4091        nargs: Option<u32>,
4092        nresults: i32,
4093        from_c: bool,
4094    ) -> Result<bool, LuaError> {
4095        let mut nargs = match nargs {
4096            Some(n) => n,
4097            None => self.top - (func_slot + 1),
4098        };
4099        // Consume `pending_is_tail` at the boundary: a tail-call op sets it
4100        // only for the immediately-following Lua activation. Native dispatch
4101        // (or `__call` resolution) below must not let it leak to the next
4102        // begin_call's frame; restore it just before push_frame for the Lua
4103        // arm so its meaning is preserved across __call chaining.
4104        let tailcalls = std::mem::take(&mut self.pending_tailcalls);
4105        // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
4106        // is inserted before the value so it becomes the first argument, and a
4107        // chain of `__call` tables resolves down to a real function.
4108        let mut chain = 0u32;
4109        loop {
4110            match self.stack[func_slot as usize] {
4111                Value::Closure(cl) => {
4112                    // P11-S2c.B JIT fast path: if the Proto's body fits
4113                    // the int-arith whitelist, every arg is `Value::Int`,
4114                    // and the cached arity matches, skip frame setup and
4115                    // run the cached native fn in-place.
4116                    if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
4117                        self.pending_tailcalls = tailcalls;
4118                        return Ok(false);
4119                    }
4120                    self.pending_tailcalls = tailcalls;
4121                    self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4122                    // P12-S4-step0 — trace-on-call trigger. The frame
4123                    // we just pushed is the callee whose body the
4124                    // recorder will trace. Bump the per-Proto call
4125                    // counter; once it crosses `CALL_HOT_THRESHOLD`
4126                    // and no other trace is in flight, snapshot the
4127                    // callee's register window (R[0..max_stack]) and
4128                    // begin recording at `pc=0`. This is what unlocks
4129                    // tracing for functions whose body has no negative
4130                    // `Op::Jmp` back-edge (`fib`, recursive helpers).
4131                    //
4132                    // Gated on `trace_jit_enabled`, so the default
4133                    // dispatch pays a single not-taken branch.
4134                    if self.jit.trace_enabled {
4135                        let proto = cl.proto;
4136                        let c = proto.call_hot_count.get();
4137                        if c < u32::MAX / 2 {
4138                            proto.call_hot_count.set(c + 1);
4139                        }
4140                        // P13-S13-H — relaxed call-trigger:
4141                        // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4142                        // `!already_cached` short-circuit. Lets a
4143                        // discarded short call-trigger close retry
4144                        // on the next call (fib(10/15/20/25)
4145                        // pathology — first capture is base-case
4146                        // [Lt,Jmp,Return1]; coverage-heuristic
4147                        // discards; next call gets to record at a
4148                        // potentially deeper recursion point).
4149                        // Without `already_cached`, the relaxed
4150                        // condition would re-record over a cached
4151                        // trace every call.
4152                        //
4153                        // P13-S13-K — additionally short-circuit on
4154                        // `proto.trace_gave_up`. The S13-I discard
4155                        // cap force-compiles a partial trace and
4156                        // flips this flag; subsequent calls into
4157                        // this Proto skip the RefCell borrow + Vec
4158                        // scan entirely.
4159                        if proto.trace_gave_up.get() {
4160                            return Ok(true);
4161                        }
4162                        let call_already_cached =
4163                            proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4164                        if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4165                            && self.jit.active_trace.is_none()
4166                            && !call_already_cached
4167                        {
4168                            // The new frame is on top: index in
4169                            // `self.frames` is `len() - 1`.
4170                            let frame_idx = self.frames.len() - 1;
4171                            // Snapshot R[0..max_stack] at the callee's
4172                            // base. `push_frame` resized `self.stack`
4173                            // to `base + max_stack`, so this window is
4174                            // guaranteed in-bounds.
4175                            let f = match &self.frames[frame_idx] {
4176                                CallFrame::Lua(f) => f,
4177                                _ => unreachable!("push_frame just pushed a Lua frame"),
4178                            };
4179                            let max_stack = cl.proto.max_stack as usize;
4180                            let base_us = f.base as usize;
4181                            let mut entry_tags = Vec::with_capacity(max_stack);
4182                            for i in 0..max_stack {
4183                                let (tag, _) = self.stack[base_us + i].unpack();
4184                                entry_tags.push(tag);
4185                            }
4186                            self.jit.active_trace =
4187                                Some(Box::new(crate::jit::trace::TraceRecord::start(
4188                                    cl.proto, 0, entry_tags, true,
4189                                )));
4190                            self.jit.recording_frame_base = frame_idx;
4191                        }
4192                    }
4193                    return Ok(true);
4194                }
4195                Value::Native(nc) => {
4196                    // v1.1 B10 Stage 2 — async-marked NativeClosure.
4197                    // Route through the cooperative-yield mechanism
4198                    // when async_mode is on; reject when called from
4199                    // a sync `eval`/`call_value` path (would have no
4200                    // executor to drive the returned future).
4201                    if nc.is_async {
4202                        if !self.async_mode {
4203                            let s = Value::Str(
4204                                self.heap.intern(b"async native called in sync context"),
4205                            );
4206                            self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4207                            return Err(LuaError(s));
4208                        }
4209                        // Same root-up bookkeeping as the sync path:
4210                        // pin args + result-count expectation so a
4211                        // collection across the suspend boundary
4212                        // keeps the arg window live.
4213                        self.native_nresults = nresults;
4214                        self.gc_top = func_slot + nargs + 1;
4215                        // v1.3 Phase AS — fire the "call" hook BEFORE
4216                        // building the future. Mirrors the sync native
4217                        // path's `hook_call(true, nargs)` site
4218                        // (`exec.rs` further down) so embedders with a
4219                        // Rust debug hook installed see a Call event
4220                        // for async natives identical to the sync
4221                        // path. The matching "return" hook fires from
4222                        // `commit_async_native_result` in
4223                        // `async_drive.rs` after the future resolves.
4224                        // Placement follows audit §"Open questions"
4225                        // Q6: after the `native_nresults` / `gc_top`
4226                        // pin, before the future is constructed, so a
4227                        // hook body that triggers GC observes the
4228                        // correct pinned window. On hook error the
4229                        // sentinel never returns and
4230                        // `pending_async_native_*` remain `None` —
4231                        // the executor sees `DispatchOutcome::Error`
4232                        // (audit §A.1 edge cases).
4233                        self.hook_call(true, nargs)?;
4234                        // Transmute the stored NativeFn back to its
4235                        // real AsyncNativeFn shape. Sound because
4236                        // `set_async_native` / `create_async_native`
4237                        // installed an AsyncNativeFn through the
4238                        // identically-sized fn-pointer slot, and the
4239                        // `is_async` marker bit is what records that
4240                        // fact.
4241                        let async_fn: crate::vm::async_drive::AsyncNativeFn =
4242                            // SAFETY: same-size fn pointers; provenance
4243                            // preserved through `mem::transmute`. The
4244                            // `is_async` marker is the only safe-to-call
4245                            // gate, set exclusively by
4246                            // `Vm::create_async_native`.
4247                            unsafe { std::mem::transmute(nc.f) };
4248                        let vm_ptr: *mut Vm = self;
4249                        let fut = async_fn(vm_ptr, func_slot, nargs);
4250                        // Stash the future + post-call context for
4251                        // `drive_one` to surface to `EvalFuture::poll`.
4252                        self.pending_async_native_fut = Some(fut);
4253                        self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4254                            func_slot,
4255                            nargs,
4256                            nresults,
4257                            gc_top: self.gc_top,
4258                        });
4259                        // Sentinel Err walked up to `drive_one` (same
4260                        // shape as `host_yield_pending`'s budget yield).
4261                        // Value::Nil — never seen by user code.
4262                        return Err(LuaError(Value::Nil));
4263                    }
4264                    // pcall/xpcall are yieldable: rather than calling the
4265                    // protected function through the Rust stack (which cannot be
4266                    // suspended), push a continuation frame and drive the call
4267                    // through the interpreter loop (PUC lua_pcallk). A yield
4268                    // inside it is preserved with the thread's saved frames.
4269                    use crate::runtime::value::NativeFn;
4270                    if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4271                        return self.begin_pcall(func_slot, nargs, nresults);
4272                    }
4273                    if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4274                        return self.begin_xpcall(func_slot, nargs, nresults);
4275                    }
4276                    // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4277                    // luaB_pairs); without one, fall through to the plain native.
4278                    if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4279                        let arg = self.stack[(func_slot + 1) as usize];
4280                        if !self.get_mm(arg, Mm::Pairs).is_nil() {
4281                            return self.begin_pairs(func_slot, nresults);
4282                        }
4283                    }
4284                    // a native that collects (e.g. `collectgarbage`) roots up to
4285                    // its own arguments — the caller's live registers all sit
4286                    // below `func_slot` and stay rooted.
4287                    self.native_nresults = nresults;
4288                    self.gc_top = func_slot + nargs + 1;
4289                    // Push the native onto the running-natives chain BEFORE
4290                    // firing the call hook so that `debug.getinfo(level)` and
4291                    // `arg_error` from inside the hook see this native as the
4292                    // currently-running C function (db.lua :344 reads
4293                    // `getinfo(2, "f").func` for the just-entered callee).
4294                    // Popped after the matching return hook fires — even on
4295                    // error, the pop must happen, so the body is bracketed
4296                    // through a scope guard.
4297                    self.running_natives.push(nc);
4298                    self.running_native_slots.push((func_slot, nargs));
4299                    // PUC C-call discipline: entering a C function sets
4300                    // L->top to func + 1 + nargs, so a collect triggered
4301                    // INSIDE the native (explicit `collectgarbage()`, or
4302                    // an allocation crossing the GC threshold) roots the
4303                    // whole caller window up to and including the
4304                    // arguments. Without this raise the cursor is stale —
4305                    // parked at some earlier, possibly much lower
4306                    // safe-point — and the collect frees register-held
4307                    // values of the native's own caller (UAF-C, v2.13
4308                    // Track WUC). Never lower it: a re-entrant chain
4309                    // (native → Lua → native) must keep the outermost
4310                    // window rooted.
4311                    self.gc_top = self.gc_top.max(func_slot + 1 + nargs);
4312                    // PUC luaD_precall fires the "call" hook for C functions too.
4313                    // A yield inside the native (coroutine.yield) propagates an
4314                    // Err and the matching "return" hook fires on resume instead.
4315                    if let Err(e) = self.hook_call(true, nargs) {
4316                        self.running_natives.pop();
4317                        self.running_native_slots.pop();
4318                        return Err(e);
4319                    }
4320                    // P09: trap a Rust panic in the native and surface it as
4321                    // a Lua error rather than letting it unwind through the
4322                    // VM into the embedder. The VM's internal state may still
4323                    // be inconsistent after a panic (half-pushed args,
4324                    // dangling GC references), so embedders that catch this
4325                    // class of error should drop and re-create the Vm — but
4326                    // it's still better than tearing the host process down.
4327                    // `AssertUnwindSafe` is sound because the caller is the
4328                    // dispatch loop and any half-done state is fenced behind
4329                    // the immediate Err return below.
4330                    use std::panic::{AssertUnwindSafe, catch_unwind};
4331                    let result =
4332                        match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4333                            Ok(r) => r,
4334                            Err(payload) => {
4335                                let msg = panic_payload_str(&payload);
4336                                let s = Value::Str(
4337                                    self.heap.intern(format!("native panic: {msg}").as_bytes()),
4338                                );
4339                                Err(LuaError(s))
4340                            }
4341                        };
4342                    let nret = match result {
4343                        Ok(n) => n,
4344                        Err(e) => {
4345                            // Stash the offending native's name BEFORE the
4346                            // pop so a dying coroutine's traceback snapshot
4347                            // can prepend `[C]: in function '<name>'`. Use
4348                            // pushglobalfuncname (PUC walks package.loaded
4349                            // to qualify); fall back to "?".
4350                            self.errored_native =
4351                                Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4352                            self.running_natives.pop();
4353                            self.running_native_slots.pop();
4354                            return Err(e);
4355                        }
4356                    };
4357                    // PUC `luaD_poscall` fires the return hook BEFORE moving
4358                    // results into the function's slot — at that point args
4359                    // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4360                    // results above them at `[func_slot + 1 + nargs, …)`.
4361                    // luna's `nat_return` has already written the results
4362                    // into `[func_slot, func_slot + nret)`, so we replay PUC's
4363                    // layout by copying the results up past the preserved
4364                    // args, firing the hook (with ftransfer = nargs + 1, so
4365                    // `getlocal(2, ftransfer..)` reads results), and then
4366                    // copying back for `finish_results`. db.lua :541 reads
4367                    // `getinfo("r").ftransfer` + `getlocal` to inspect a
4368                    // returning native's results this way.
4369                    if self.hook.ret
4370                        && !self.in_hook
4371                        && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4372                    {
4373                        let res_dst = func_slot + nargs + 1;
4374                        let need = (res_dst + nret) as usize;
4375                        if self.stack.len() < need {
4376                            self.stack.resize(need, Value::Nil);
4377                        }
4378                        for i in (0..nret).rev() {
4379                            self.stack[(res_dst + i) as usize] =
4380                                self.stack[(func_slot + i) as usize];
4381                        }
4382                        // widen the C-frame's argument window for getlocal
4383                        if let Some(slot) = self.running_native_slots.last_mut() {
4384                            slot.1 = nargs + nret;
4385                        }
4386                        let hr = self.hook_return(true, nargs + 1, nret);
4387                        if let Some(slot) = self.running_native_slots.last_mut() {
4388                            slot.1 = nargs;
4389                        }
4390                        // restore results into the slot finish_results expects
4391                        for i in 0..nret {
4392                            self.stack[(func_slot + i) as usize] =
4393                                self.stack[(res_dst + i) as usize];
4394                        }
4395                        self.running_natives.pop();
4396                        self.running_native_slots.pop();
4397                        hr?;
4398                    } else {
4399                        self.running_natives.pop();
4400                        self.running_native_slots.pop();
4401                    }
4402                    self.finish_results(func_slot, nret, nresults);
4403                    // the native may have allocated; collect with the results as
4404                    // the live boundary (PUC checks GC after a call returns).
4405                    self.maybe_collect_garbage(self.top);
4406                    return Ok(false);
4407                }
4408                v => {
4409                    let mm = self.get_mm(v, Mm::Call);
4410                    if mm.is_nil() {
4411                        return Err(self.call_err(v));
4412                    }
4413                    chain += 1;
4414                    // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4415                    // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4416                    // and the 5.5 test exercises the new tight bound directly
4417                    // (calls.lua :225 builds a 16-deep chain and expects the
4418                    // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4419                    // deep chain and expects it to succeed.
4420                    let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4421                        15
4422                    } else {
4423                        MAX_CCMT
4424                    };
4425                    if chain > cap {
4426                        return Err(self.rt_err("'__call' chain too long"));
4427                    }
4428                    // slots above shift by one; at a call site those are dead
4429                    // temps of the current frame
4430                    self.stack.insert(func_slot as usize, mm);
4431                    if self.top > func_slot {
4432                        self.top += 1;
4433                    }
4434                    nargs += 1;
4435                }
4436            }
4437        }
4438    }
4439
4440    fn push_frame(
4441        &mut self,
4442        cl: Gc<LuaClosure>,
4443        func_slot: u32,
4444        nargs: u32,
4445        nresults: i32,
4446        from_c: bool,
4447    ) -> Result<(), LuaError> {
4448        if func_slot + 256 > MAX_LUA_STACK {
4449            // PUC `stackerror`: a stack overflow that surfaces while the
4450            // current activation is inside an xpcall message handler is
4451            // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4452            // error handling". errors.lua :606 expects the inner pcall(loop)
4453            // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4454            // message matching "error handling".
4455            let msg = if self.msgh_depth > 0 {
4456                "error in error handling"
4457            } else {
4458                "stack overflow"
4459            };
4460            return Err(self.rt_err(msg));
4461        }
4462        let proto = cl.proto;
4463        let nparams = proto.num_params as u32;
4464        // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4465        // stack just below the new `base`, so a named vararg can be indexed
4466        // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4467        // `[e1..em][p1..pn]` so the fixed params land at the new base.
4468        let n_varargs = if proto.is_vararg {
4469            nargs.saturating_sub(nparams)
4470        } else {
4471            0
4472        };
4473        if n_varargs > 0 {
4474            let s = (func_slot + 1) as usize;
4475            self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4476        }
4477        let base = func_slot + 1 + n_varargs;
4478        let need = (base + proto.max_stack as u32) as usize;
4479        if self.stack.len() < need {
4480            self.stack.resize(need, Value::Nil);
4481        }
4482        // wipe the register window beyond the kept parameters (stale values —
4483        // required for GC-safety and codegen). The varargs below `base` survive.
4484        let kept = nargs.saturating_sub(n_varargs).min(nparams);
4485        // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4486        // need` since `base + nparams <= base + max_stack = need` and `kept <=
4487        // nparams`. `slice::fill` lowers to a single memset on Copy types.
4488        unsafe {
4489            self.stack
4490                .get_unchecked_mut((base + kept) as usize..need)
4491                .fill(Value::Nil);
4492        }
4493        frames_push_sync(
4494            &mut self.frames,
4495            &mut self.frames_top,
4496            CallFrame::Lua(Frame {
4497                closure: cl,
4498                base,
4499                pc: 0,
4500                func_slot,
4501                nresults,
4502                hook_oldpc: u32::MAX,
4503                from_c,
4504                n_varargs,
4505                // single-shot consume: `close_slots` sets pending_tm before each
4506                // handler call; the next Lua frame born is that handler's.
4507                tm: self.pending_tm.take(),
4508                // `run_hook` sets `pending_is_hook` before dispatching the user
4509                // hook so its frame reports `namewhat = "hook"` via getinfo.
4510                is_hook: std::mem::take(&mut self.pending_is_hook),
4511                tailcalls: std::mem::take(&mut self.pending_tailcalls),
4512            }),
4513        );
4514        // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4515        // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4516        // the slot at `base + nparams`; the extras sit just below `base` from
4517        // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4518        // hook; vararg.lua's contradictory expectations were already going to
4519        // fail either way (some asserts want `arg == nil`).
4520        if proto.has_compat_vararg_arg {
4521            let arg_slot = (base + nparams) as usize;
4522            let t = self.heap.new_table();
4523            {
4524                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4525                let tm = unsafe { t.as_mut() };
4526                for i in 0..n_varargs {
4527                    let v = self.stack[(base - n_varargs + i) as usize];
4528                    // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4529                    // below `MAX_ASIZE`
4530                    let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4531                }
4532                let nk = Value::Str(self.heap.intern(b"n"));
4533                tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4534                    .expect("'n' key");
4535            }
4536            // once-per-table barrier mirrors SETLIST: t is born BLACK during
4537            // Propagate and the bulk `set_int`/`set` calls above don't barrier
4538            self.heap
4539                .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4540            self.stack[arg_slot] = Value::Table(t);
4541        }
4542        // PUC luaD_precall fires the "call" hook with the new frame current, so
4543        // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4544        // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4545        // fixed params count — extras already live below `base`).
4546        // A frame born via OP_TailCall fires "tail call" instead (PUC
4547        // luaD_pretailcall) and skips the matching "return" hook on exit.
4548        let is_tail = self
4549            .frames
4550            .last()
4551            .and_then(|f| f.lua())
4552            .is_some_and(|f| f.tailcalls > 0);
4553        self.hook_call_with(false, nparams, is_tail)?;
4554        Ok(())
4555    }
4556
4557    /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4558    /// the protected call `f` through the interpreter loop. The protected
4559    /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4560    /// at `func_slot+1` lets its results land one slot above the continuation —
4561    /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4562    /// Always returns `Ok(true)`: a continuation is now on the stack to be
4563    /// resolved by the loop (even when `f` is a native that already ran inline).
4564    fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4565        if nargs == 0 {
4566            return Err(crate::vm::builtins::raise_str(
4567                self,
4568                "bad argument #1 to 'pcall' (value expected)",
4569            ));
4570        }
4571        if self.pcall_depth >= MAX_C_DEPTH {
4572            return Err(self.rt_err("C stack overflow"));
4573        }
4574        self.pcall_depth += 1;
4575        frames_push_sync(
4576            &mut self.frames,
4577            &mut self.frames_top,
4578            CallFrame::Cont(NativeCont {
4579                kind: ContKind::Pcall,
4580                func_slot,
4581                nresults,
4582            }),
4583        );
4584        // call f (slot func_slot+1) with the remaining args, asking for all
4585        // results; a yield or error inside propagates with the continuation kept
4586        // on the stack (caught by `unwind` / preserved across a yield).
4587        self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4588        Ok(true)
4589    }
4590
4591    /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4592    /// message handler is stashed in the continuation and the arguments are
4593    /// shifted down over the handler's slot so `f`'s args are contiguous.
4594    fn begin_xpcall(
4595        &mut self,
4596        func_slot: u32,
4597        nargs: u32,
4598        nresults: i32,
4599    ) -> Result<bool, LuaError> {
4600        if nargs < 2 {
4601            return Err(crate::vm::builtins::raise_str(
4602                self,
4603                "bad argument #2 to 'xpcall' (value expected)",
4604            ));
4605        }
4606        if self.pcall_depth >= MAX_C_DEPTH {
4607            return Err(self.rt_err("C stack overflow"));
4608        }
4609        self.pcall_depth += 1;
4610        // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4611        // close its gap so f's args become [f@+1, a1@+2, ...].
4612        let handler = self.stack[(func_slot + 2) as usize];
4613        let nfargs = nargs - 2;
4614        for i in 0..nfargs {
4615            self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4616        }
4617        self.top = func_slot + 2 + nfargs;
4618        frames_push_sync(
4619            &mut self.frames,
4620            &mut self.frames_top,
4621            CallFrame::Cont(NativeCont {
4622                kind: ContKind::Xpcall { handler },
4623                func_slot,
4624                nresults,
4625            }),
4626        );
4627        self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4628        Ok(true)
4629    }
4630
4631    /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4632    /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4633    /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4634    /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4635    /// results land exactly where `pairs`'s results belong.
4636    fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4637        let arg = self.stack[(func_slot + 1) as usize];
4638        let mm = self.get_mm(arg, Mm::Pairs);
4639        // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4640        self.stack[func_slot as usize] = mm;
4641        self.top = func_slot + 2;
4642        frames_push_sync(
4643            &mut self.frames,
4644            &mut self.frames_top,
4645            CallFrame::Cont(NativeCont {
4646                kind: ContKind::Pairs,
4647                func_slot,
4648                nresults,
4649            }),
4650        );
4651        self.begin_call(func_slot, Some(1), 4, true)?;
4652        Ok(true)
4653    }
4654
4655    /// The running (top) Lua frame. The interpreter only reads this while a Lua
4656    /// frame is on top — a continuation frame is never the running frame (it is
4657    /// consumed the instant the call it protects unwinds onto it).
4658    #[inline]
4659    fn top_frame(&self) -> &Frame {
4660        self.frames
4661            .last()
4662            .and_then(CallFrame::lua)
4663            .expect("running Lua frame")
4664    }
4665
4666    #[inline]
4667    fn top_frame_mut(&mut self) -> &mut Frame {
4668        self.frames
4669            .last_mut()
4670            .and_then(CallFrame::lua_mut)
4671            .expect("running Lua frame")
4672    }
4673
4674    /// Pad/announce results sitting at func_slot.
4675    pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4676        // v2.3 P1B-A: capture the call's high-water-mark before
4677        // setting the new top so we can Nil-clear slots that the
4678        // call temporarily wrote but no longer holds — matching
4679        // PUC's `L->top` discipline (slots past L->top are "free"
4680        // and the next push overwrites them). Without this clear,
4681        // a stale `Value::Closure` (e.g. the called function
4682        // itself, when wanted = 0) sits at `func_slot` and a
4683        // later GC with wider `gc_top` traces it after the
4684        // closure has been freed by a previous narrow safe-point
4685        // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4686        // sort.lua AA case).
4687        let prev_top = self.top as usize;
4688        if wanted < 0 {
4689            self.top = func_slot + nret;
4690        } else {
4691            let wanted = wanted as u32;
4692            let need = (func_slot + wanted) as usize;
4693            if self.stack.len() < need {
4694                self.stack.resize(need, Value::Nil);
4695            }
4696            for i in nret..wanted {
4697                self.stack[(func_slot + i) as usize] = Value::Nil;
4698            }
4699            self.top = func_slot + wanted;
4700        }
4701        let new_top = self.top as usize;
4702        let clear_end = prev_top.min(self.stack.len());
4703        if new_top < clear_end {
4704            for slot in &mut self.stack[new_top..clear_end] {
4705                *slot = Value::Nil;
4706            }
4707        }
4708    }
4709
4710    /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4711    /// Used by `EvalFuture` on the bootstrap poll to compute the
4712    /// `entry_depth` it will pass to subsequent resume slices.
4713    pub(crate) fn frame_count(&self) -> usize {
4714        self.frames.len()
4715    }
4716
4717    fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4718        let nret = self.top - func_slot;
4719        let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4720        self.stack.truncate(func_slot as usize);
4721        self.top = func_slot;
4722        out
4723    }
4724
4725    // ---- open upvalues ----
4726
4727    #[doc(hidden)]
4728    pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4729        match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4730            Ok(i) => self.open_upvals[i].1,
4731            Err(i) => {
4732                let uv = self.heap.new_upvalue(UpvalState::Open {
4733                    slot,
4734                    thread: self.current,
4735                });
4736                self.open_upvals.insert(i, (slot, uv));
4737                uv
4738            }
4739        }
4740    }
4741
4742    pub(crate) fn close_from(&mut self, slot: u32) {
4743        while let Some(&(s, uv)) = self.open_upvals.last() {
4744            if s < slot {
4745                break;
4746            }
4747            let v = self.stack[s as usize];
4748            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4749            unsafe { uv.as_mut() }.set_closed(v);
4750            self.heap
4751                .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4752            self.open_upvals.pop();
4753        }
4754    }
4755
4756    /// Register a to-be-closed slot (TBC op / generic-for closing value).
4757    fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4758        let v = self.stack[slot as usize];
4759        if matches!(v, Value::Nil | Value::Bool(false)) {
4760            return Ok(()); // nil and false are silently ignored
4761        }
4762        if self.get_mm(v, Mm::Close).is_nil() {
4763            // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4764            // (a <type> value)"; the local's name comes from the running
4765            // frame's locvars at this pc.
4766            let tn = v.type_name();
4767            let f = self.top_frame();
4768            let reg = slot - f.base;
4769            let pc = (f.pc as usize).saturating_sub(1);
4770            let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4771                Some(n) => format!("variable '{n}'"),
4772                None => "to-be-closed slot".to_string(),
4773            };
4774            return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4775        }
4776        debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4777        self.tbc.push(slot);
4778        Ok(())
4779    }
4780
4781    /// Close upvalues and run `__close` handlers for slots ≥ `from`
4782    /// (handlers in reverse registration order; PUC luaF_close).
4783    fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4784        self.close_from(from);
4785        // PUC: handlers run in reverse declaration order; an error raised by a
4786        // handler becomes the error object passed to the remaining ones, and
4787        // the rest are still closed. The last raised error propagates.
4788        let mut pending = err;
4789        let mut result = Ok(());
4790        let saved_err = self.closing_err;
4791        // On a normal close the handler runs within the closing function's
4792        // activation (debug parent = that function); during error unwinding the
4793        // function's frame is already gone, so the handler sits at the C
4794        // boundary instead (PUC: luaF_close runs after the ci is restored).
4795        let error_close = err.is_some();
4796        while let Some(&s) = self.tbc.last() {
4797            if s < from {
4798                break;
4799            }
4800            self.tbc.pop();
4801            let v = self.stack[s as usize];
4802            if matches!(v, Value::Nil | Value::Bool(false)) {
4803                continue;
4804            }
4805            let mm = self.get_mm(v, Mm::Close);
4806            if mm.is_nil() {
4807                // PUC `prepclosingmethod`: the __close metamethod was present
4808                // at OP_TBC (else we would have errored there) but has since
4809                // been removed/replaced. Treat as a non-callable target.
4810                let tn = self.obj_typename(v);
4811                let e = self.rt_err(&format!(
4812                    "attempt to call a {tn} value (metamethod 'close')"
4813                ));
4814                pending = Some(e.0);
4815                result = Err(e);
4816                continue;
4817            }
4818            // root the pending error: a handler may trigger a collection
4819            self.closing_err = pending;
4820            // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4821            // getinfo report the handler as "in metamethod 'close'". Saved/
4822            // restored around the call to cover the path where `mm` is a
4823            // native (`push_frame` never consumes it) or it raises before
4824            // reaching push_frame.
4825            let saved_tm = self.pending_tm.replace("close");
4826            // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4827            // is nil on a normal close (5.4 locals.lua :875's
4828            // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4829            // through the yield). PUC 5.5 dropped the trailing nil: a clean
4830            // close passes only `obj`, the error case still passes both
4831            // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4832            // normal-close arms, n=2 for the error arm).
4833            let call = match pending {
4834                Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4835                None => {
4836                    if self.version >= LuaVersion::Lua55 {
4837                        self.call_value_impl(mm, &[v], error_close)
4838                    } else {
4839                        self.call_value_impl(mm, &[v, Value::Nil], error_close)
4840                    }
4841                }
4842            };
4843            self.pending_tm = saved_tm;
4844            if let Err(e) = call {
4845                pending = Some(e.0);
4846                result = Err(e);
4847            }
4848        }
4849        self.closing_err = saved_err;
4850        result
4851    }
4852
4853    /// Yieldable variant of `close_slots`: drive the chain of `__close`
4854    /// handlers for slots ≥ `from` through the interpreter loop with a
4855    /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4856    /// suspends cleanly (the close iteration's state rides on the thread's
4857    /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4858    /// applied to `luaF_close`. `after` runs when every slot is closed; if
4859    /// `after` is `Return` and we've returned past `entry_depth`,
4860    /// `Ok(Some(vals))` carries the result up to the host caller.
4861    fn begin_close(
4862        &mut self,
4863        from: u32,
4864        err: Option<Value>,
4865        after: AfterClose,
4866        entry_depth: usize,
4867    ) -> Result<Option<Vec<Value>>, LuaError> {
4868        self.close_from(from);
4869        self.drive_close(from, err, after, entry_depth)
4870    }
4871
4872    /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4873    /// non-callable-mm error for an `__close` that was reset to a bad value
4874    /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4875    /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4876    /// the interpreter then drives the handler and re-enters this driver via
4877    /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4878    /// the threaded error (if any) propagates or `after` fires.
4879    fn drive_close(
4880        &mut self,
4881        from: u32,
4882        mut pending: Option<Value>,
4883        after: AfterClose,
4884        entry_depth: usize,
4885    ) -> Result<Option<Vec<Value>>, LuaError> {
4886        loop {
4887            let drained = match self.tbc.last() {
4888                None => true,
4889                Some(&s) => s < from,
4890            };
4891            if drained {
4892                return self.finish_close_after(after, pending, entry_depth);
4893            }
4894            let s = self.tbc.pop().expect("tbc non-empty");
4895            let v = self.stack[s as usize];
4896            if matches!(v, Value::Nil | Value::Bool(false)) {
4897                continue;
4898            }
4899            let mm = self.get_mm(v, Mm::Close);
4900            if mm.is_nil() {
4901                let tn = self.obj_typename(v);
4902                let e = self.rt_err(&format!(
4903                    "attempt to call a {tn} value (metamethod 'close')"
4904                ));
4905                pending = Some(e.0);
4906                continue;
4907            }
4908            // A real handler: stage [mm, v, (err?)] above the current top,
4909            // record the close iteration state in a Cont::Close, and let the
4910            // interpreter dispatch the handler. On return the run() head
4911            // re-enters this driver via the Cont::Close consumer.
4912            let func_slot = self.top;
4913            let error_close = pending.is_some();
4914            let need = (func_slot + 3) as usize;
4915            if self.stack.len() < need {
4916                self.stack.resize(need, Value::Nil);
4917            }
4918            self.stack[func_slot as usize] = mm;
4919            self.stack[func_slot as usize + 1] = v;
4920            // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4921            // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4922            let nargs = match pending {
4923                Some(e) => {
4924                    self.stack[func_slot as usize + 2] = e;
4925                    2u32
4926                }
4927                None => {
4928                    if self.version >= LuaVersion::Lua55 {
4929                        1u32
4930                    } else {
4931                        self.stack[func_slot as usize + 2] = Value::Nil;
4932                        2u32
4933                    }
4934                }
4935            };
4936            self.top = func_slot + 1 + nargs;
4937            // Root the pending error during the call (a handler may collect).
4938            let saved_err = self.closing_err;
4939            self.closing_err = pending;
4940            // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4941            // for traceback / getinfo.
4942            let saved_tm = self.pending_tm.replace("close");
4943            frames_push_sync(
4944                &mut self.frames,
4945                &mut self.frames_top,
4946                CallFrame::Cont(NativeCont {
4947                    kind: ContKind::Close(CloseCont {
4948                        from,
4949                        pending,
4950                        after,
4951                    }),
4952                    func_slot,
4953                    nresults: 0,
4954                }),
4955            );
4956            // PUC luaF_close runs a normal close *within* the closing
4957            // function's activation (debug parent = that function); during an
4958            // error unwind the function's frame is already gone and the
4959            // handler sits at the C boundary instead.
4960            let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4961            self.pending_tm = saved_tm;
4962            self.closing_err = saved_err;
4963            r?;
4964            return Ok(None);
4965        }
4966    }
4967
4968    /// Fire `after` once every `__close` handler has run. `Block` propagates
4969    /// any remaining error or simply continues; `Return` performs OP_Return's
4970    /// tail (hook + frame pop + result delivery) and may surface results to
4971    /// the host when the function whose return triggered the close was the
4972    /// entry activation, but only on a clean drain — a pending error skips
4973    /// the return tail and propagates instead. `ResumeUnwind` pops the
4974    /// deferred Lua frame and re-raises, letting a handler's own error win
4975    /// over the original propagating one (PUC luaF_close).
4976    fn finish_close_after(
4977        &mut self,
4978        after: AfterClose,
4979        pending: Option<Value>,
4980        entry_depth: usize,
4981    ) -> Result<Option<Vec<Value>>, LuaError> {
4982        match after {
4983            AfterClose::Block => match pending {
4984                Some(e) => Err(LuaError(e)),
4985                None => Ok(None),
4986            },
4987            AfterClose::Return {
4988                abs_a,
4989                nret,
4990                from_native,
4991            } => match pending {
4992                Some(e) => Err(LuaError(e)),
4993                None => self.complete_return(abs_a, nret, from_native, entry_depth),
4994            },
4995            AfterClose::ResumeUnwind { func_slot, err } => {
4996                // The aborting Lua frame was popped before `begin_close`;
4997                // restore the catcher's stack window down to `func_slot` and
4998                // re-raise — preferring a handler-raised error over the
4999                // original (PUC luaF_close).
5000                self.stack.truncate(func_slot as usize);
5001                self.top = func_slot;
5002                self.tbc.retain(|&s| s < func_slot);
5003                Err(LuaError(pending.unwrap_or(err)))
5004            }
5005        }
5006    }
5007
5008    /// OP_Return's post-close tail: fire the "return" hook (frame still
5009    /// current), pop the Lua frame, slide results into `func_slot`, then
5010    /// either hand them to the host (`Ok(Some(vals))` when we've returned
5011    /// past `entry_depth`), leave them contiguous for an exposed
5012    /// pcall/xpcall continuation, or finish into the caller's expected
5013    /// result slot. Mirrors the synchronous OP_Return tail so both paths
5014    /// share semantics — the `from_native` flag selects the right "return"
5015    /// hook context for `hook_return`.
5016    fn complete_return(
5017        &mut self,
5018        abs_a: u32,
5019        nret: u32,
5020        from_native: bool,
5021        entry_depth: usize,
5022    ) -> Result<Option<Vec<Value>>, LuaError> {
5023        // ftransfer is the local index (1-based) of the first result, as
5024        // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
5025        // exposes locals starting at `frame.base` (= func_slot + 1 +
5026        // n_varargs for a vararg call), so the conversion is the absolute
5027        // result slot minus base, plus one to make it 1-based. db.lua 5.4
5028        // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
5029        // shape end-to-end.
5030        let ftransfer = self
5031            .frames
5032            .last()
5033            .and_then(CallFrame::lua)
5034            .map(|fr| {
5035                let raw = abs_a.saturating_sub(fr.base) + 1;
5036                // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
5037                // local injected at index `numparams + 1`, so getlocal
5038                // numbering shifts results past it (5.5 db.lua :539
5039                // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
5040                if fr.closure.proto.has_vararg_table_pseudo {
5041                    raw + 1
5042                } else {
5043                    raw
5044                }
5045            })
5046            .unwrap_or(1);
5047        // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
5048        // per tail call that collapsed into this activation, *after* its
5049        // own "return". `tailcalls` tracks that count exactly (PUC
5050        // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
5051        // "return" hook fires once even when the activation absorbed
5052        // multiple tail calls — only `istailcall` on getinfo surfaces the
5053        // collapse. 5.1 db.lua :366 pins the event ordering.
5054        let tailcalls = if self.version <= LuaVersion::Lua51 {
5055            self.frames
5056                .last()
5057                .and_then(|f| f.lua())
5058                .map(|f| f.tailcalls)
5059                .unwrap_or(0)
5060        } else {
5061            0
5062        };
5063        self.hook_return(from_native, ftransfer, nret)?;
5064        for _ in 0..tailcalls {
5065            self.hook_tail_return()?;
5066        }
5067        let CallFrame::Lua(fr) =
5068            frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
5069        else {
5070            unreachable!("returning from a non-Lua frame")
5071        };
5072        for i in 0..nret {
5073            self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
5074        }
5075        if self.frames.len() < entry_depth {
5076            self.top = fr.func_slot + nret;
5077            return Ok(Some(self.take_results(fr.func_slot)));
5078        } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
5079            self.top = fr.func_slot + nret;
5080        } else {
5081            self.finish_results(fr.func_slot, nret, fr.nresults);
5082        }
5083        Ok(None)
5084    }
5085
5086    #[doc(hidden)]
5087    pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
5088        match cl.upvals()[idx as usize].state() {
5089            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
5090            UpvalState::Closed(v) => v,
5091        }
5092    }
5093
5094    fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
5095        let uv = cl.upvals()[idx as usize];
5096        match uv.state() {
5097            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
5098            UpvalState::Closed(_) => {
5099                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5100                unsafe { uv.as_mut() }.set_closed(v);
5101                // forward barrier: a closed upvalue is single-slot, so the
5102                // forward variant is cheaper than barrier_back (PUC uses
5103                // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
5104                // tables / threads).
5105                self.heap
5106                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
5107            }
5108        }
5109    }
5110
5111    // ---- register / error helpers ----
5112
5113    #[inline(always)]
5114    fn r(&self, base: u32, i: u32) -> Value {
5115        // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
5116        // at frame entry (`push_frame` sizes the stack up to base + max_stack),
5117        // and every bytecode-generated reference falls within `[0, max_stack)`.
5118        // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
5119        // reason. The bounds check would re-validate this invariant on every
5120        // op — the dispatch hot path can't afford it.
5121        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5122        unsafe { *self.stack.get_unchecked((base + i) as usize) }
5123    }
5124
5125    #[inline(always)]
5126    fn set_r(&mut self, base: u32, i: u32, v: Value) {
5127        // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
5128        // frame-entry contract.
5129        unsafe {
5130            *self.stack.get_unchecked_mut((base + i) as usize) = v;
5131        }
5132    }
5133
5134    #[doc(hidden)]
5135    pub fn rt_err(&mut self, msg: &str) -> LuaError {
5136        let text = match self.position_prefix() {
5137            Some(p) => format!("{p}{msg}"),
5138            None => msg.to_string(),
5139        };
5140        LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5141    }
5142
5143    /// Error without the `chunk:line:` position prefix. PUC's
5144    /// `resume_error` (ldo.c) pushes its message as a bare literal,
5145    /// so `cannot resume dead coroutine` etc. must not be prefixed.
5146    pub(crate) fn plain_err(&mut self, msg: &str) -> LuaError {
5147        LuaError(Value::Str(self.heap.intern(msg.as_bytes())))
5148    }
5149
5150    pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5151        let extra = self.subject_varinfo(v);
5152        let tn = self.obj_typename(v);
5153        self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5154    }
5155
5156    /// Name the offending operand of the current instruction (PUC varinfo) for
5157    /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5158    /// to the instruction's subject register(s); a native-raised error whose
5159    /// current instruction doesn't hold `bad` simply yields "".
5160    fn subject_varinfo(&self, bad: Value) -> String {
5161        use crate::vm::isa::Op;
5162        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5163            return String::new();
5164        };
5165        let proto = f.closure.proto;
5166        let p: &crate::runtime::Proto = &proto;
5167        let pc = f.pc as usize;
5168        if pc == 0 || pc > p.code.len() {
5169            return String::new();
5170        }
5171        let instr = p.code[pc - 1];
5172        let mut cands: Vec<u32> = Vec::new();
5173        match instr.op() {
5174            // indexed reads / length / method: the table/object is in B
5175            Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5176                cands.push(instr.b());
5177            }
5178            // indexed writes / calls: the table/function is in A
5179            Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5180                cands.push(instr.a());
5181            }
5182            // arithmetic/bitwise: a register operand (B, and C unless constant)
5183            Op::Add
5184            | Op::Sub
5185            | Op::Mul
5186            | Op::Div
5187            | Op::Mod
5188            | Op::Pow
5189            | Op::IDiv
5190            | Op::BAnd
5191            | Op::BOr
5192            | Op::BXor
5193            | Op::Shl
5194            | Op::Shr => {
5195                cands.push(instr.b());
5196                if !instr.k() {
5197                    cands.push(instr.c());
5198                }
5199            }
5200            Op::Unm | Op::BNot => cands.push(instr.b()),
5201            Op::Concat => {
5202                let a = instr.a();
5203                for r in a..a + instr.b() {
5204                    cands.push(r);
5205                }
5206            }
5207            _ => {}
5208        }
5209        for reg in cands {
5210            if self.r(f.base, reg).raw_eq(bad) {
5211                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5212                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5213                    None => String::new(),
5214                };
5215            }
5216        }
5217        String::new()
5218    }
5219
5220    /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5221    /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5222    /// 'add')" when the call is a metamethod dispatched by the current opcode.
5223    fn call_err(&mut self, v: Value) -> LuaError {
5224        let extra = self.call_target_varinfo(v);
5225        let tn = self.obj_typename(v);
5226        self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5227    }
5228
5229    /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5230    /// frame before the call, so the opcode that triggered it lives in the
5231    /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5232    /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5233    fn call_target_varinfo(&self, bad: Value) -> String {
5234        use crate::vm::isa::Op;
5235        let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5236            return String::new();
5237        };
5238        let proto = f.closure.proto;
5239        let p: &crate::runtime::Proto = &proto;
5240        let pc = f.pc as usize;
5241        if pc == 0 || pc > p.code.len() {
5242            return String::new();
5243        }
5244        let instr = p.code[pc - 1];
5245        match instr.op() {
5246            Op::Call | Op::TailCall => {
5247                let reg = instr.a();
5248                if self.r(f.base, reg).raw_eq(bad) {
5249                    match crate::vm::objname::getobjname(p, pc - 1, reg) {
5250                        Some((kind, name)) => format!(" ({kind} '{name}')"),
5251                        None => String::new(),
5252                    }
5253                } else {
5254                    String::new()
5255                }
5256            }
5257            op => match mm_event_name(op) {
5258                Some(ev) => format!(" (metamethod '{ev}')"),
5259                None => String::new(),
5260            },
5261        }
5262    }
5263
5264    /// "number has no integer representation", enriched (PUC luaG_tointerror)
5265    /// with a "(field 'x')"-style suffix naming the offending operand of the
5266    /// current arithmetic instruction when it can be recovered from bytecode.
5267    fn no_int_rep_err(&mut self) -> LuaError {
5268        let extra = self.bad_operand_varinfo();
5269        self.rt_err(&format!("number{extra} has no integer representation"))
5270    }
5271
5272    /// Inspect the current frame's faulting instruction: find the register
5273    /// operand holding a float with no integer representation and name it.
5274    fn bad_operand_varinfo(&self) -> String {
5275        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5276            return String::new();
5277        };
5278        let proto = f.closure.proto;
5279        let p: &crate::runtime::Proto = &proto;
5280        let pc = f.pc as usize;
5281        if pc == 0 || pc > p.code.len() {
5282            return String::new();
5283        }
5284        let instr = p.code[pc - 1];
5285        let mut regs = vec![instr.b()];
5286        if !instr.k() {
5287            regs.push(instr.c());
5288        }
5289        for reg in regs {
5290            let v = self.r(f.base, reg);
5291            if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5292                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5293                    Some((kind, name)) => format!(" ({kind} '{name}')"),
5294                    None => String::new(),
5295                };
5296            }
5297        }
5298        String::new()
5299    }
5300
5301    /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5302    /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5303    /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5304    /// native call), PUC pushes no prefix — match that by looking only at the
5305    /// topmost frame directly and bailing if it is anything but a Lua frame.
5306    pub(crate) fn position_prefix(&self) -> Option<String> {
5307        let f = self.frames.last().and_then(CallFrame::lua)?;
5308        let proto = f.closure.proto;
5309        if proto.source.as_bytes().is_empty() {
5310            return Some(self.stripped_prefix());
5311        }
5312        if proto.lines.is_empty() {
5313            return None;
5314        }
5315        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5316        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5317        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5318        let display = crate::vm::lib_debug::chunk_id(raw);
5319        let src = String::from_utf8_lossy(&display).into_owned();
5320        Some(format!("{src}:{line}: "))
5321    }
5322
5323    /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5324    /// for the source and renders the line as "?" (so the prefix reads
5325    /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5326    /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5327    /// matches `^%?:%-1:`).
5328    fn stripped_prefix(&self) -> String {
5329        if self.version >= crate::version::LuaVersion::Lua55 {
5330            "?:?: ".to_string()
5331        } else {
5332            "?:-1: ".to_string()
5333        }
5334    }
5335
5336    /// Position prefix of the Lua frame `level` steps up from the running C
5337    /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5338    /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5339    /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5340    /// caller's frame is reported even across pcall/xpcall continuations.
5341    /// `luaL_where(level)` for `error()`: unlike `dbg_frame` (whose 5.2+
5342    /// level numbering skips Cont activations to match db.lua's getinfo
5343    /// shape), PUC counts EVERY CallInfo — a C caller occupies a level of
5344    /// its own. `pcall(pcall, error, "msg")` must therefore resolve
5345    /// level 1 to the inner pcall (a C activation, no line info → no
5346    /// prefix), not tunnel through to the Lua frame below (v2.13
5347    /// CORPUS-IV fixture 239).
5348    pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5349        if level < 1 {
5350            return None;
5351        }
5352        let v51 = self.version <= LuaVersion::Lua51;
5353        let mut lvl = level;
5354        let mut found: Option<usize> = None;
5355        'walk: for fi in (0..self.frames.len()).rev() {
5356            match &self.frames[fi] {
5357                CallFrame::Lua(f) => {
5358                    lvl -= 1;
5359                    if lvl == 0 {
5360                        found = Some(fi);
5361                        break 'walk;
5362                    }
5363                    if v51 {
5364                        for _ in 0..f.tailcalls {
5365                            lvl -= 1;
5366                            if lvl == 0 {
5367                                return None; // synthetic tail level: no line info
5368                            }
5369                        }
5370                    }
5371                    if f.from_c {
5372                        lvl -= 1;
5373                        if lvl == 0 {
5374                            return None; // C activation: no line info
5375                        }
5376                    }
5377                }
5378                CallFrame::Cont(_) => {
5379                    // A continuation-driven native (pcall/xpcall/close)
5380                    // is a C activation — it takes a level and has no
5381                    // line info.
5382                    lvl -= 1;
5383                    if lvl == 0 {
5384                        return None;
5385                    }
5386                }
5387            }
5388        }
5389        let fi = found?;
5390        let f = self.frames[fi].lua()?;
5391        let proto = f.closure.proto;
5392        // PUC luaG_addinfo: a stripped chunk has no source — see
5393        // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5394        if proto.source.as_bytes().is_empty() {
5395            return Some(self.stripped_prefix());
5396        }
5397        // a stripped chunk carries no per-instruction line info
5398        if proto.lines.is_empty() {
5399            return None;
5400        }
5401        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5402        // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5403        // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5404        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5405        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5406        let display = crate::vm::lib_debug::chunk_id(raw);
5407        let src = String::from_utf8_lossy(&display).into_owned();
5408        Some(format!("{src}:{line}: "))
5409    }
5410
5411    // ---- the interpreter ----
5412
5413    fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5414        let entry_depth = self.frames.len();
5415        self.exec_with(entry_depth)
5416    }
5417
5418    /// Run from the current top frame down to (but not past) `entry_depth`
5419    /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5420    /// runs to completion or a yield.
5421    /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5422    /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5423    /// `EvalFuture::poll` on every poll after the first to walk the
5424    /// existing call frames until the next `BudgetExhausted` or
5425    /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5426    /// embedder reaches it through `Vm::eval_async`.
5427    pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5428        self.exec_with(entry_depth)
5429    }
5430
5431    fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5432        loop {
5433            let r = self.run(entry_depth);
5434            if r.is_err()
5435                && (self.yielding.is_some()
5436                    || self.terminating.is_some()
5437                    || self.host_yield_pending
5438                    || self.pending_async_native_fut.is_some())
5439            {
5440                // a `coroutine.yield` is in flight: keep the frames intact (they
5441                // are the suspended coroutine's saved state) and propagate to
5442                // resume. A self-close termination propagates the same way, so a
5443                // protecting pcall on the way out cannot catch (unwind) it.
5444                // v1.1 B10 — `host_yield_pending` is the async-mode
5445                // analogue: the sentinel must reach `drive_one` without
5446                // a protecting `pcall` swallowing it.
5447                return r;
5448            }
5449            match r {
5450                Ok(vals) => return Ok(vals),
5451                // unwind toward `entry_depth`. A protecting pcall/xpcall
5452                // continuation caught along the way turns the error into
5453                // `false, msg` and the loop resumes running its caller; an
5454                // uncaught error propagates out.
5455                Err(e) => match self.unwind(e.0, entry_depth) {
5456                    Unwound::Caught => continue,
5457                    Unwound::CaughtReturn(vals) => return Ok(vals),
5458                    Unwound::Propagated(err) => return Err(err),
5459                },
5460            }
5461        }
5462    }
5463
5464    /// Unwind the call stack from the error point toward `entry_depth`, running
5465    /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5466    /// continuation frame at/above `entry_depth` (the error is *caught*: its
5467    /// slot receives `false, msg`); if none is reached, the error propagates.
5468    fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5469        // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5470        // error object is nil — so `pcall(function() error(nil) end)` returns
5471        // that string instead of nil, and `assert(nil, nil)` (whose path
5472        // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5473        // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5474        // `doit("error()") == nil` and luna would fail that if it always
5475        // substituted. luna's native `error()` still does its own conversion
5476        // for direct callers.
5477        if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5478            err = Value::Str(self.heap.intern(b"<no error object>"));
5479        }
5480        // The protected call runs in-place among the caller frames' registers,
5481        // so truncating the failed frames here cuts into caller windows below
5482        // the catcher. Snapshot the live length: at the error point the stack
5483        // already spans every surviving frame's window, so restoring it after a
5484        // catch reinstates them all (the reclaimed slots above are dead temps).
5485        // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5486        // we instead clamp the restore to the catcher's caller window when the
5487        // error point was at the stack limit (cause: the next `call_value_impl`
5488        // picks `func_slot = stack.len()` which would otherwise re-overflow).
5489        let saved_len = self.stack.len();
5490        // Snapshot the traceback at the error point — before any frame is
5491        // popped — so an `xpcall` msgh (which runs after the failed frames are
5492        // gone) can still describe the error site. The handler frame about to
5493        // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5494        // visible here; once popped, `debug.traceback` would miss it.
5495        // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5496        // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5497        // recovery — locals.lua:659) re-overflows. Capture-once propagates
5498        // through nested unwinds (inner→outer) without re-running msgh.
5499        if self.error_traceback.is_none() {
5500            self.error_traceback = Some(self.traceback_bytes(1));
5501        }
5502        while self.frames.len() >= entry_depth {
5503            match *self.frames.last().expect("frame") {
5504                // a yieldable-metamethod continuation does not catch: discard the
5505                // abandoned instruction and keep unwinding (PUC drops the partial
5506                // op on error).
5507                CallFrame::Cont(NativeCont {
5508                    kind: ContKind::Meta(mc),
5509                    func_slot,
5510                    ..
5511                }) => {
5512                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5513                    self.stack.truncate(func_slot as usize);
5514                    self.top = mc.saved_top.min(func_slot);
5515                    self.tbc.retain(|&s| s < func_slot);
5516                }
5517                // a __pairs continuation does not catch either: an error inside
5518                // the metamethod propagates past `pairs`.
5519                CallFrame::Cont(NativeCont {
5520                    kind: ContKind::Pairs,
5521                    func_slot,
5522                    ..
5523                }) => {
5524                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5525                    self.stack.truncate(func_slot as usize);
5526                    self.top = func_slot;
5527                    self.tbc.retain(|&s| s < func_slot);
5528                }
5529                // a __close continuation does not catch: drop the half-run
5530                // handler's window, then continue the close yieldably with
5531                // the new error threaded as `pending`. Preserve `cc.after`
5532                // verbatim — `Return`/`Block` originating from an aborting
5533                // OP_Return/OP_Close will be short-circuited by
5534                // `finish_close_after` (pending propagates as Err); a
5535                // `ResumeUnwind` originated by our own Lua-frame handler
5536                // must keep its deferred frame-pop semantics so that frame
5537                // is not orphaned. If a fresh handler yields, `drive_close`
5538                // pushes another `Cont::Close` and we return `Caught` so
5539                // `exec_with` re-enters the run loop.
5540                CallFrame::Cont(NativeCont {
5541                    kind: ContKind::Close(cc),
5542                    func_slot,
5543                    ..
5544                }) => {
5545                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5546                    self.stack.truncate(func_slot as usize);
5547                    self.top = func_slot;
5548                    self.tbc.retain(|&s| s < func_slot);
5549                    match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5550                        Ok(Some(_)) => {
5551                            unreachable!(
5552                                "Block / Return / ResumeUnwind never return host values mid-unwind"
5553                            )
5554                        }
5555                        Ok(None) => return Unwound::Caught,
5556                        Err(e) => {
5557                            err = e.0;
5558                            continue;
5559                        }
5560                    }
5561                }
5562                CallFrame::Cont(nc) => {
5563                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5564                    self.pcall_depth -= 1;
5565                    let result = match nc.kind {
5566                        ContKind::Pcall => err,
5567                        ContKind::Xpcall { handler } => {
5568                            // PUC keeps `L->errfunc` set across the handler's
5569                            // call: `luaG_errormsg` re-fires the handler when
5570                            // it raises (so `xpcall(error, err, 170)` lets the
5571                            // chain bottom out at err(0) → "END"). luna mirrors
5572                            // that by looping until the handler returns or
5573                            // luna's `iters` cap forces termination.
5574                            //
5575                            // The cap models PUC's nCcalls soft window
5576                            // (MAXCCALLS/10*11): once tripped, `stackerror`
5577                            // raises "C stack overflow" via `luaG_runerror`
5578                            // which itself re-enters `luaG_errormsg`, so the
5579                            // handler runs once more with that string and
5580                            // naturally returns it (errors.lua :637 at N=300).
5581                            // We count iterations per Cont::Xpcall rather than
5582                            // a global counter — nested xpcalls each get their
5583                            // own budget, matching the way PUC's stack frames
5584                            // accumulate per dispatch path.
5585                            const MSGH_CAP: u32 = MAX_C_DEPTH;
5586                            let mut cur_err = err;
5587                            let mut iters: u32 = 0;
5588                            let mut capped = false;
5589                            loop {
5590                                if iters >= MSGH_CAP && !capped {
5591                                    cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5592                                    capped = true;
5593                                }
5594                                iters += 1;
5595                                self.msgh_depth += 1;
5596                                let r = self.call_value(handler, &[cur_err]);
5597                                self.msgh_depth -= 1;
5598                                match r {
5599                                    Ok(hr) => {
5600                                        break hr.first().copied().unwrap_or(Value::Nil);
5601                                    }
5602                                    Err(_) if capped => {
5603                                        // the handler still errored on the
5604                                        // synthesized "C stack overflow"; fall
5605                                        // back to PUC's LUA_ERRERR string.
5606                                        break Value::Str(
5607                                            self.heap.intern(b"error in error handling"),
5608                                        );
5609                                    }
5610                                    Err(e) => {
5611                                        cur_err = e.0;
5612                                    }
5613                                }
5614                            }
5615                        }
5616                        ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5617                            unreachable!("Meta/Pairs/Close cont handled above")
5618                        }
5619                    };
5620                    // the error has been caught (pcall/xpcall): the captured
5621                    // traceback was for that error and is no longer in flight.
5622                    self.error_traceback = None;
5623                    let fs = nc.func_slot as usize;
5624                    if self.stack.len() < fs + 2 {
5625                        self.stack.resize(fs + 2, Value::Nil);
5626                    }
5627                    self.stack[fs] = Value::Bool(false);
5628                    self.stack[fs + 1] = result;
5629                    self.top = nc.func_slot + 2;
5630                    self.tbc.retain(|&s| s < nc.func_slot);
5631                    if self.frames.len() < entry_depth {
5632                        return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5633                    }
5634                    self.finish_results(nc.func_slot, 2, nc.nresults);
5635                    // reinstate the caller windows the unwind truncated into,
5636                    // clamped to the catcher's caller window + a `MIN_STACK`
5637                    // reserve. The clamp is a no-op for normal pcall catches
5638                    // (saved_len lies within the caller's max_stack window),
5639                    // and prevents the stack from staying near `MAX_LUA_STACK`
5640                    // after an overflow-recovery catch — which would make the
5641                    // next `call_value_impl` (e.g. a `__close` in the catcher's
5642                    // errorh, locals.lua:659) pick `func_slot = stack.len()`
5643                    // above the limit and re-overflow.
5644                    // Restore the caller's full register window: opcodes
5645                    // index it directly. The cap covers caller's base +
5646                    // `max_stack` + a small reserve. We always resize to
5647                    // exactly this window — previously this clamped
5648                    // `saved_len` from above to prevent staying near
5649                    // `MAX_LUA_STACK` after an overflow-recovery catch, and
5650                    // a yieldable-unwind re-entry adds the dual case where
5651                    // `saved_len` is *below* the window (a prior
5652                    // `ResumeUnwind` truncated). Using the window directly
5653                    // covers both.
5654                    let restore = self
5655                        .frames
5656                        .iter()
5657                        .rev()
5658                        .find_map(CallFrame::lua)
5659                        .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5660                        .unwrap_or(saved_len);
5661                    if self.stack.len() < restore {
5662                        self.stack.resize(restore, Value::Nil);
5663                    } else if self.stack.len() > restore {
5664                        self.stack.truncate(restore);
5665                    }
5666                    // v2.5 P1B-2B: clear slots vacated by the popped
5667                    // frames the unwind walked over. finish_results
5668                    // above clears `[nc.func_slot + nresults ..
5669                    // nc.func_slot + 2)`, which only covers the
5670                    // pcall's own result region — the unwind-popped
5671                    // frames' locals in `[nc.func_slot + 2 .. restore)`
5672                    // are still in place with whatever Gc-bearing
5673                    // Values they last held. Without this clear, a
5674                    // later GC marks the stale pointers (UAF-A family
5675                    // analog of the v2.3 Op::Return finish_results
5676                    // path). PUC's `luaD_pcall` similarly truncates
5677                    // L->top to the catcher's level — luna's
5678                    // truncate above resizes the Vec but doesn't
5679                    // touch slots [func_slot+2..restore) that were
5680                    // already present.
5681                    let clear_lo = (nc.func_slot as usize + 2).min(self.stack.len());
5682                    let clear_hi = restore.min(self.stack.len());
5683                    if clear_lo < clear_hi {
5684                        for slot in &mut self.stack[clear_lo..clear_hi] {
5685                            *slot = Value::Nil;
5686                        }
5687                    }
5688                    return Unwound::Caught;
5689                }
5690                CallFrame::Lua(f) => {
5691                    // Yieldable error-unwind close, PUC luaG_errormsg shape:
5692                    // (1) pop the Lua frame immediately so each `__close`
5693                    // handler runs at the C boundary above — `debug.getinfo`
5694                    // sees the next outer Lua frame's call site (typically
5695                    // `pcall`), not this aborting function (locals.lua:480).
5696                    // (2) drive the close yieldably with
5697                    // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5698                    // it truncates to `func_slot` and re-raises (letting a
5699                    // handler-raised error win over `err`). If a handler
5700                    // yields, `drive_close` pushes `Cont::Close` and we
5701                    // return `Caught` so `exec_with` re-enters the run loop;
5702                    // a synchronous drain returns Err exactly as the old
5703                    // path did.
5704                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5705                    let after = AfterClose::ResumeUnwind {
5706                        func_slot: f.func_slot,
5707                        err,
5708                    };
5709                    match self.begin_close(f.base, Some(err), after, entry_depth) {
5710                        Ok(Some(_)) => {
5711                            unreachable!("ResumeUnwind never returns host values")
5712                        }
5713                        Ok(None) => return Unwound::Caught,
5714                        Err(e) => {
5715                            err = e.0;
5716                            continue;
5717                        }
5718                    }
5719                }
5720            }
5721        }
5722        Unwound::Propagated(LuaError(err))
5723    }
5724
5725    fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5726        loop {
5727            // Fast-path slow-check gate: most embedders run with both
5728            // `instr_budget` and `mem_cap` as None, so a single combined
5729            // is_some test lets the hot loop skip both branches with one
5730            // load + branch instead of two.
5731            if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5732                if let Some(b) = self.instr_budget.as_mut() {
5733                    *b -= 1;
5734                    if *b <= 0 {
5735                        self.instr_budget = None;
5736                        // v1.1 B10 Stage 1 — async-mode cooperative
5737                        // yield. Set a sentinel flag so `exec_with`
5738                        // propagates the Err without `unwind` running
5739                        // (mirroring the `yielding.is_some()` path),
5740                        // and `call_value_impl` preserves the call
5741                        // frames for the next `poll`. Translation back
5742                        // to `DispatchOutcome::BudgetExhausted` happens
5743                        // in `drive_one`. The Err value itself is
5744                        // `Value::Nil` — a pure sentinel, never seen by
5745                        // user code.
5746                        if self.async_mode {
5747                            self.host_yield_pending = true;
5748                            return Err(LuaError(Value::Nil));
5749                        }
5750                        // B6: classify the trip so embedders can
5751                        // distinguish budget exhaustion from a
5752                        // generic Runtime error and retry / give up
5753                        // accordingly.
5754                        self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5755                        let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5756                        return Err(LuaError(s));
5757                    }
5758                }
5759                if let Some(cap) = self.heap.mem_cap
5760                    && self.heap.bytes() > cap
5761                {
5762                    // First try a full collect — embedders set tight caps
5763                    // and the overshoot may be reclaimable (closures kept
5764                    // by short-lived frames, intermediate strings). Only
5765                    // disarm + raise if the cap is still breached after
5766                    // collection. PUC's `LUA_GCEMERGENCY` path matches.
5767                    //
5768                    // v2.6 A.2: tighten mem-cap-fire over-root from
5769                    // entire `self.stack.len()` (whole heap) to the
5770                    // deepest Lua frame's `base + max_stack` window
5771                    // (covers register operands the current opcode
5772                    // might reference). The cap fires during table
5773                    // mutation in a tight `a[i] = i` loop where `a`
5774                    // lives at a frame-register slot past `self.top`
5775                    // (OP_NEWINDEX doesn't advance top); the deepest
5776                    // frame's max_stack window provably covers it
5777                    // since `a` is a register of the executing proto.
5778                    //
5779                    // Still over-roots caller frames' dead regs
5780                    // (slots between caller.base and the callee
5781                    // func_slot are live; slots past callee
5782                    // func_slot in caller's frame are dead until
5783                    // caller resumes). For fire-once cap path this
5784                    // residual over-root is acceptable; full
5785                    // per-frame walk was canceled per
5786                    // `.dev/rfcs/v2.6-plan-state.md` amendments log
5787                    // (charter §2.1's strong/weak pass split is
5788                    // semantically impossible — weak pass depends on
5789                    // strong-pass marks).
5790                    let cap_root_top = self
5791                        .frames
5792                        .iter()
5793                        .rev()
5794                        .find_map(CallFrame::lua)
5795                        .map(|f| f.base + f.closure.proto.max_stack as u32)
5796                        .unwrap_or(self.top);
5797                    self.gc_top = cap_root_top.max(self.top);
5798                    self.collect_garbage();
5799                    if self.heap.bytes() > cap {
5800                        self.heap.mem_cap = None;
5801                        let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5802                        return Err(LuaError(s));
5803                    }
5804                }
5805            }
5806            // Single combined frame fetch: continuation arm OR Lua arm. Saves
5807            // a second `self.frames.last()` slice access vs the prior split
5808            // form (LLVM doesn't always CSE these across the cont branch).
5809            // A continuation frame on top means the call it protected just
5810            // delivered its results — wrap as `true, results…` and hand to
5811            // the pcall/xpcall caller. The error path is handled by `unwind`;
5812            // this branch is only reached on success/resume completion.
5813            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5814            let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5815            if let &CallFrame::Cont(nc) = frame_peek {
5816                // a yieldable metamethod returned: complete the interrupted
5817                // instruction (PUC luaV_finishOp) and resume the running frame.
5818                if let ContKind::Meta(mc) = nc.kind {
5819                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5820                    let result = if self.top > nc.func_slot {
5821                        self.stack[nc.func_slot as usize]
5822                    } else {
5823                        Value::Nil
5824                    };
5825                    self.stack.truncate(nc.func_slot as usize);
5826                    self.top = mc.saved_top;
5827                    self.finish_meta(mc.action, result)?;
5828                    continue;
5829                }
5830                // a __close handler returned successfully: discard its
5831                // results, restore `top` to the slot the handler was called
5832                // at (the surrounding frame's register window above this slot
5833                // must stay alloc'd — never truncate the underlying stack),
5834                // then continue the close chain (next slot, or fire
5835                // AfterClose). When the close ends an entry activation,
5836                // drive_close hands the results up to exec_with directly.
5837                if let ContKind::Close(cc) = nc.kind {
5838                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5839                    self.top = nc.func_slot;
5840                    if let Some(vals) =
5841                        self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5842                    {
5843                        return Ok(vals);
5844                    }
5845                    continue;
5846                }
5847                // __pairs returned: normalize its results to exactly four
5848                // (iterator, state, control, closing) at pairs's slot, where
5849                // the metamethod was called, and hand them to pairs's caller.
5850                if let ContKind::Pairs = nc.kind {
5851                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5852                    let total = 4u32;
5853                    let need = (nc.func_slot + total) as usize;
5854                    if self.stack.len() < need {
5855                        self.stack.resize(need, Value::Nil);
5856                    }
5857                    for s in self.top..(nc.func_slot + total) {
5858                        self.stack[s as usize] = Value::Nil;
5859                    }
5860                    self.top = nc.func_slot + total;
5861                    if self.frames.len() < entry_depth {
5862                        return Ok(self.take_results(nc.func_slot));
5863                    }
5864                    self.finish_results(nc.func_slot, total, nc.nresults);
5865                    continue;
5866                }
5867                frames_pop_sync(&mut self.frames, &mut self.frames_top);
5868                self.pcall_depth -= 1;
5869                // f's results sit at nc.func_slot+1.. (f was called one slot
5870                // above the continuation), so writing `true` at the slot makes
5871                // `true, results…` already contiguous.
5872                let nret = self.top - (nc.func_slot + 1);
5873                self.stack[nc.func_slot as usize] = Value::Bool(true);
5874                let total = 1 + nret;
5875                self.top = nc.func_slot + total;
5876                if self.frames.len() < entry_depth {
5877                    return Ok(self.take_results(nc.func_slot));
5878                }
5879                self.finish_results(nc.func_slot, total, nc.nresults);
5880                continue;
5881            }
5882            // GC runs only at the allocation safe points below (PUC's
5883            // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5884            // no longer collects, so a stale full-window `gc_top` cannot leak in.
5885            //
5886            // Hot-path frame fetch: the Cont arm above continues the loop,
5887            // so reaching here means `frame_peek` is the Lua frame. Reuse it
5888            // rather than re-fetching `self.frames.last()`.
5889            let f = match frame_peek {
5890                CallFrame::Lua(f) => f,
5891                _ => unreachable!("Cont frame survived the dispatch loop head"),
5892            };
5893            let cl = f.closure;
5894            let base = f.base;
5895            let func_slot = f.func_slot;
5896            let n_varargs = f.n_varargs;
5897            let pc = f.pc;
5898            let oldpc = f.hook_oldpc;
5899
5900            // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5901            // — every branch / call op only sets `pc` to a valid index, and
5902            // function entry initialises pc=0 with a non-empty body. PUC's
5903            // `vmfetch` uses the equivalent unchecked load.
5904            let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5905
5906            // P12-S1.C/D — trace recording append + close detection.
5907            // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5908            // so default dispatch keeps a single not-taken branch.
5909            //
5910            // - At the head PC with a non-empty record, the trace has
5911            //   looped back to its start: mark `closed = true` and
5912            //   take the record (S2 will compile + cache).
5913            // - Otherwise, capture the op. If the record overflows
5914            //   MAX_TRACE_LEN, abort by dropping it.
5915            if self.jit.trace_enabled
5916                && let Some(_rec) = self.jit.active_trace.as_mut()
5917            {
5918                // P12-S4 — depth tracking. The trace head's frame is
5919                // at index `recording_frame_base`; every Op::Call that
5920                // pushes a new frame bumps the live depth, every
5921                // Op::Return that pops one decrements it.
5922                //
5923                // **Three clean-close conditions** (P12-S4-step4a):
5924                // - `at_head`: cur_depth == 0 AND about-to-execute the
5925                //   trace's head_pc on its head_proto (loop closed back
5926                //   to start). Same for loop-triggered and call-triggered
5927                //   traces — step4a unified the gating so call-triggered
5928                //   no longer closes on the first re-entry (that left
5929                //   fib's body at 7 depth=0 ops; step4a lets it inline
5930                //   up to MAX_INLINE_DEPTH levels before any close).
5931                // - `returned_past_head`: trace head's frame is gone
5932                //   (callee returned past it, or the call-trigger
5933                //   started a recording inside a callee that has now
5934                //   returned). Whatever ops were recorded form the
5935                //   trace body; the lowerer treats the partial trace
5936                //   the same as InlineAbort (dispatchable=false until
5937                //   step4b's frame materialization lands).
5938                // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5939                //   Recording any deeper would just bloat the IR; close
5940                //   with the body we have. Lowerer's existing length
5941                //   gate + InlineAbort path handles short bodies.
5942                let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5943                let cur_depth = if returned_past_head {
5944                    0
5945                } else {
5946                    self.frames.len() - 1 - self.jit.recording_frame_base
5947                };
5948                let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5949                let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5950                let at_head_loop = cur_depth == 0
5951                    && !rec.ops.is_empty()
5952                    && !returned_past_head
5953                    && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5954                    && pc == rec.head_pc;
5955                // P16-A — self-link cycle catch (mirrors LuaJIT's
5956                // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5957                //   1. We're about to execute the head_pc on head_proto
5958                //      at depth > 0 (we're re-entering the trace head
5959                //      from inside an inlined recursion level — UpRec).
5960                //   2. The count of ancestor frames in the recording
5961                //      window that share `head_proto` exceeds
5962                //      [`RECUNROLL_THRESHOLD`] (default 2).
5963                // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5964                // recursion levels are captured, the recorder enters
5965                // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5966                // trips this catch — closing with `SelfRecKind::UpRec`.
5967                // The lowerer's `TraceEnd::SelfLink` tail emits the
5968                // bump-base + branch-to-self loop body.
5969                //
5970                // TailRec vs UpRec: LJ distinguishes via
5971                // `framedepth + retdepth == 0`. luna doesn't track
5972                // retdepth separately; cur_depth == 0 with a non-empty
5973                // call chain in tail position is rare (would require
5974                // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5975                // condition (fib's case); cur_depth == 0 with positive
5976                // ancestor count would route to TailRec, but luna's
5977                // recorder doesn't currently produce that shape because
5978                // tail-call elision pops the caller frame and we'd
5979                // hit `at_head_loop` instead.
5980                let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5981                    if self.jit.p16_self_link_enabled
5982                        && !returned_past_head
5983                        && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5984                        && pc == rec.head_pc
5985                        && cur_depth > 0
5986                    {
5987                        // Count ancestor frames sharing head_proto.
5988                        // self.frames[recording_frame_base..] currently
5989                        // includes the just-pushed frame at the top
5990                        // (the one about to execute head_pc). Ancestors
5991                        // = the slice excluding the top frame.
5992                        let head_proto_ptr = rec.head_proto.as_ptr();
5993                        let last_idx = self.frames.len() - 1;
5994                        let mut count = 0usize;
5995                        for i in self.jit.recording_frame_base..last_idx {
5996                            if let CallFrame::Lua(f) = &self.frames[i]
5997                                && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5998                            {
5999                                count += 1;
6000                            }
6001                        }
6002                        if count > crate::jit::trace::RECUNROLL_THRESHOLD {
6003                            // cur_depth > 0 → UpRec (fib pattern).
6004                            // cur_depth == 0 wouldn't reach this arm.
6005                            Some(crate::jit::trace::SelfRecKind::UpRec)
6006                        } else {
6007                            None
6008                        }
6009                    } else {
6010                        None
6011                    }
6012                };
6013                if let Some(kind) = self_link_trip {
6014                    // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
6015                    // self-recursive patterns at frame depth >= 2.
6016                    //
6017                    // Pre sub-0: a SelfLink trip at the head_pc re-entry
6018                    // unconditionally stamped `self_link_kind`. The
6019                    // R3a `downrec_close` marker can only fire from the
6020                    // depth>0 Op::Return path (`rec.retfs` chain),
6021                    // which never reaches the recorder for fib(28)-like
6022                    // shapes that hit the SelfLink cycle catch BEFORE
6023                    // any base-case Return — leaving `downrec_close`
6024                    // None and routing the trace through R1's safe
6025                    // `dispatchable=false` `"self-link-retf-r1"` path
6026                    // (audit measured `trace_dispatched = 0`).
6027                    //
6028                    // Sub-0 lift: when the SelfLink trip fires AND
6029                    // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
6030                    // gate already requires this — kept explicit as a
6031                    // safety floor), route the close through `downrec_
6032                    // close` INSTEAD of `self_link_kind`. The recorder
6033                    // synthesises the close marker from the most
6034                    // recent Op::Call at depth `cur_depth - 1`:
6035                    //   - `return_pc` = `call.pc + 1` (caller's resume
6036                    //     PC after the recursive call returns; mirror
6037                    //     of R3a's `caller_pc` derivation at the
6038                    //     depth>0 Op::Return capture path below).
6039                    //   - `target_proto` = `call.proto` (caller's
6040                    //     proto; equals `rec.head_proto` for self-
6041                    //     recursion).
6042                    //   - `depth_delta` = `1` (today's recorder always
6043                    //     unrolls one level; R3a uses the same
6044                    //     constant).
6045                    //
6046                    // The lowerer's `end_idx` picker (`trace.rs:3729`)
6047                    // routes through `TraceEnd::DownRec` ahead of the
6048                    // `self_link_kind` arm; the R3b/R3d lowerer arm
6049                    // emits the stitch-sentinel + caller-pc-guard
6050                    // scaffold. Single-candidate guard chain (sub-0's
6051                    // recorder produces 1 caller_pc candidate because
6052                    // `rec.retfs` is empty) keeps `dispatchable=false`
6053                    // + `"downrec-stitch-pending"` label (per R3d's
6054                    // `multi_way_candidate_count >= 2` gate at
6055                    // `trace.rs:7385`). Net behaviour: trace compiles
6056                    // under DownRec routing; interp runs the
6057                    // recursion naturally → result 317811.
6058                    //
6059                    // The `cur_depth >= 2` gate is automatically
6060                    // satisfied by the count > RECUNROLL_THRESHOLD=2
6061                    // trip condition (3 ancestor frames sharing
6062                    // head_proto implies cur_depth >= 3), kept
6063                    // explicit so a future RECUNROLL_THRESHOLD tweak
6064                    // doesn't silently flip shallow-recursion
6065                    // shapes (cur_depth == 1) onto the DownRec arm.
6066                    //
6067                    // R3.3+ sub-1/2/3/4 will replace the depth-baked
6068                    // op_offsets[] addressing with runtime base_var
6069                    // threading so the trace's recorded body is
6070                    // depth-relative and the DownRec dispatch
6071                    // becomes wall-clock-positive. Sub-0 is the
6072                    // routing scaffold; it does not aim for gain.
6073                    let _ = kind;
6074                    let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
6075                        let caller_depth_u8 = (cur_depth - 1) as u8;
6076                        if let Some(call_op) = rec.ops.iter().rev().find(|r| {
6077                            r.inline_depth == caller_depth_u8
6078                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6079                        }) {
6080                            rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6081                                return_pc: call_op.pc + 1,
6082                                target_proto: call_op.proto,
6083                                depth_delta: 1,
6084                            });
6085                            true
6086                        } else {
6087                            false
6088                        }
6089                    };
6090                    if relaxed_to_downrec {
6091                        // R2 close-cause taxonomy: tag the lift so
6092                        // probes can tally the fire rate. Mirrors
6093                        // R3a's `"downrec-restart"` bump for the
6094                        // depth>0 Op::Return path (different trip
6095                        // origin, same downstream routing). The
6096                        // existing `"self-link-retf-r1"` label still
6097                        // fires for trips that DON'T relax (no
6098                        // candidate Op::Call ancestor in rec.ops, or
6099                        // cur_depth < 2) via the lowerer's
6100                        // dispatch_off_reason mirror at the close
6101                        // handler — kept as a regression safety net.
6102                        self.jit
6103                            .counters
6104                            .bump_close_cause("selflink-yields-to-downrec");
6105                    } else {
6106                        rec.self_link_kind = Some(kind);
6107                    }
6108                }
6109                let should_close =
6110                    at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
6111                if should_close {
6112                    // P13-S13-H — long-trace bias: a call-triggered
6113                    // recording that closed with a very short body
6114                    // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
6115                    // binary_trees `make(0)`: 4 ops) is pathological.
6116                    // Compiling + caching it pins `Proto.traces` to a
6117                    // trace that the length gate will refuse to
6118                    // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
6119                    // = 40`), AND blocks the back-edge / longer-call
6120                    // path from re-recording the same head_pc (the
6121                    // dedup `already_cached` check below short-
6122                    // circuits). The fix: discard the short call-
6123                    // triggered recording WITHOUT caching, and bias
6124                    // the proto's `call_hot_count` back to
6125                    // `THRESHOLD - HOT_RETRY_WINDOW` so the next
6126                    // sequence of calls retries the trigger at a
6127                    // different (hopefully deeper) recursion point.
6128                    //
6129                    // Back-edge triggered traces are exempt — a
6130                    // tight numeric-for loop's body is legitimately
6131                    // 3 ops (`Add`, ForLoop) and DOES dispatch
6132                    // usefully when re-entered many times.
6133                    // P13-S13-H — coverage heuristic to detect
6134                    // pathologically partial call-triggered traces:
6135                    // for self-recursive / branchy protos like
6136                    // `fib` (~17 bytecode ops) or
6137                    // `binary_trees.make` (~26 ops), the recorder
6138                    // can fire at a BASE-case entry (`fib(0)` or
6139                    // `make(0)`) producing a 3–4 op trace that
6140                    // covers a tiny fraction of the proto's code.
6141                    // That trace is doomed by the length gate
6142                    // post-compile AND blocks any longer follow-up
6143                    // (the dedup `already_cached` check below). The
6144                    // fix: discard call-triggered closes where
6145                    // `rec.ops.len() * 2 < head_proto.code.len()`
6146                    // (less than half the proto's bytecode), so the
6147                    // back-edge / longer call path can take over.
6148                    //
6149                    // Why coverage > raw length:protos with
6150                    // intrinsically short bodies (closure
6151                    // factories: `Closure + Return1` = 2 ops,
6152                    // simple wrappers: `LoadI + Return1` = 2 ops)
6153                    // record 100% coverage even at length 2 — those
6154                    // ARE legitimately short and the closure /
6155                    // sunk-emit lowering paths (S7-A / S9-C) make
6156                    // them worth compiling. The heuristic admits
6157                    // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
6158                    // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
6159                    // ~26) get discarded.
6160                    //
6161                    // Back-edge triggered traces are unaffected —
6162                    // a tight numeric-for body legitimately covers
6163                    // 3 of ~3 proto ops it can dispatch from
6164                    // (`Add + ForLoop`) and the recorder fires on
6165                    // the back-edge, not call entry.
6166                    //
6167                    // `call_hot_count` is intentionally NOT reset
6168                    // (an earlier draft tried `THRESHOLD - 32` but
6169                    // caused active_trace contention with the
6170                    // outer back-edge trigger — see
6171                    // setlist_b_zero_with_call_c_zero_sunk_emits).
6172                    // We give up on dispatching the pathological
6173                    // shape on the same proto; the back-edge or a
6174                    // longer call path on a deeper recursion point
6175                    // can still record + cache a real trace.
6176                    let proto_code_len = rec.head_proto.code.len();
6177                    let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
6178                    // P13-S13-I — per-Proto discard cap. The S13-H
6179                    // relaxed trigger condition (`c >= THRESHOLD &&
6180                    // !already_cached`) means a Proto whose every
6181                    // recording is partial-coverage will re-fire the
6182                    // trigger every call indefinitely (1500+ in
6183                    // `binary_trees`-pattern test). The cap stops
6184                    // discarding after `MAX_DISCARDS_PER_PROTO` —
6185                    // the next close falls through to compile (even
6186                    // if partial), caches the trace, and the
6187                    // `already_cached` short-circuit kills the
6188                    // storm. Dispatch may still be refused
6189                    // post-compile (length gate), but the recorder
6190                    // stops churning.
6191                    const MAX_DISCARDS_PER_PROTO: u32 = 5;
6192                    let prior_discards = rec.head_proto.trace_discard_count.get();
6193                    let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
6194                    // P13-S13-K — flip the `gave_up` flag the
6195                    // moment cap is reached (BEFORE the close-
6196                    // dispatching branch below). The trigger gates
6197                    // short-circuit on this flag, skipping the
6198                    // RefCell + linear `already_cached` scan on
6199                    // every subsequent call to this Proto. Useful
6200                    // for `binary_trees_pattern`-class loads where
6201                    // a single Proto sees ~20k calls post-cap.
6202                    if cap_reached
6203                        && rec.is_call_triggered
6204                        && is_partial_coverage
6205                        && !rec.head_proto.trace_gave_up.get()
6206                    {
6207                        rec.head_proto.trace_gave_up.set(true);
6208                    }
6209                    if rec.is_call_triggered && is_partial_coverage && !cap_reached {
6210                        // Tally as closed (for visibility) but DROP
6211                        // without compile/cache. Use the existing
6212                        // closed-lens accumulator so probes can
6213                        // observe the discarded shape.
6214                        // P13-S13-I — bump discard count BEFORE
6215                        // dropping the recording so the next
6216                        // close sees the updated counter.
6217                        rec.head_proto.trace_discard_count.set(prior_discards + 1);
6218                        self.jit.counters.closed += 1;
6219                        self.jit
6220                            .counters
6221                            .closed_lens
6222                            .push((rec.is_call_triggered, rec.ops.len()));
6223                        // v2.0 Track-R R2 — partial-coverage discard
6224                        // close path. Pre-R2 this site bumped `closed`
6225                        // + `closed_lens` (visibility) but no per-
6226                        // reason label, so probes couldn't separate a
6227                        // real successful close from a discard tally.
6228                        // Tag explicitly to make the recorder-side
6229                        // close-cause taxonomy single-source.
6230                        self.jit
6231                            .counters
6232                            .bump_close_cause("partial-coverage-discard");
6233                        self.jit.active_trace = None;
6234                        // Continue with interp loop — don't
6235                        // fall through to compile path.
6236                        // The op at `pc` hasn't dispatched yet;
6237                        // the outer loop iteration handles it.
6238                    } else {
6239                        rec.closed = true;
6240                        // P12-S2.C — detach the closed record, then try
6241                        // to compile it. Dedup by `head_pc`: a Proto
6242                        // already carrying a CompiledTrace for this PC
6243                        // skips recompile (the hot counter caps
6244                        // re-recording at `u32::MAX / 2` anyway, but
6245                        // explicit dedup keeps `Proto.traces` short
6246                        // for the S3 dispatcher's linear scan).
6247                        //
6248                        // No `Vm::run` change for failure: we just bump
6249                        // the failed counter and drop the record. S3
6250                        // will read `Proto.traces` to decide whether to
6251                        // dispatch — until then, this is bookkeeping.
6252                        let head_pc_val = rec.head_pc;
6253                        let closed_record = self
6254                            .jit
6255                            .active_trace
6256                            .take()
6257                            .expect("active_trace was Some this branch");
6258                        self.jit.counters.closed += 1;
6259                        self.jit
6260                            .counters
6261                            .closed_lens
6262                            .push((closed_record.is_call_triggered, closed_record.ops.len()));
6263                        // P12-S5-B fix: cache the trace on the
6264                        // recorder's *head proto*, not the current
6265                        // closure's proto. For non-recursive
6266                        // call-triggered traces, close fires after
6267                        // `Return1` pops the callee frame — `cl` at
6268                        // that point is the CALLER's closure, while
6269                        // `closed_record.head_proto` is the CALLEE's
6270                        // proto (the one we actually want the trace
6271                        // to be discoverable from on the next call).
6272                        // Self-recursive fib closed via depth-cap
6273                        // mid-recursion so `cl.proto == head_proto`
6274                        // happened to coincide — this fix makes that
6275                        // accidental coincidence intentional.
6276                        let head_proto = closed_record.head_proto;
6277                        let already_cached = head_proto
6278                            .traces
6279                            .borrow()
6280                            .iter()
6281                            .any(|t| t.head_pc == head_pc_val);
6282                        if !already_cached {
6283                            // Internal-loop = true: the trace runs in
6284                            // a native loop until a cmp side-exits, so
6285                            // the dispatcher's per-entry marshal cost
6286                            // amortizes across the whole run of
6287                            // iterations the loop's recorded direction
6288                            // stays valid. The lowerer auto-downgrades
6289                            // to one-shot for cmp-less or Call-truncating
6290                            // traces.
6291                            // P15-A v2-C-A6-5 — side traces MUST NOT
6292                            // internal-loop. The parent's recorded prefix
6293                            // (ops at PCs < side trace's head_pc) defines
6294                            // values for registers the child's body reads
6295                            // without re-writing each iter — e.g. for
6296                            // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6297                            // + R[11]` sets R[12], and the child trace
6298                            // (head_pc=24) re-runs `pc=20 Move R[1] =
6299                            // R[12]` each iter via its outer ForLoop
6300                            // internal-loop, ALWAYS reading the stale
6301                            // entry-time R[12]. The parent's Add never
6302                            // re-runs during child's loop, so R[1] gets
6303                            // pinned to one stale value. Force one-shot
6304                            // for side traces: each parent-exit round-
6305                            // trips through dispatcher → parent's Add
6306                            // runs → side trace runs ONE iter → return.
6307                            let opts = crate::jit::trace::CompileOptions {
6308                                internal_loop: closed_record.side_trace_parent.is_none(),
6309                                pre53: self.version() <= LuaVersion::Lua53,
6310                                aot: false,
6311                            };
6312                            // v1.1 A1 Session A — route through trace_compiler.
6313                            // v2.0 Track J sub-step J-B — split-borrow JitState
6314                            // so the trait method can take `&mut dyn JitStorage`.
6315                            let result = {
6316                                let jit = &mut self.jit;
6317                                let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6318                                jit.trace_compiler
6319                                    .try_compile_trace(storage, &closed_record, opts)
6320                            };
6321                            match result {
6322                                Some(mut ct) => {
6323                                    // P12-S5-A/B/C — tally Sinkable sites
6324                                    // + actually-sunk-emit sites + materialise
6325                                    // emit sites before moving `ct` into
6326                                    // Proto.traces.
6327                                    self.jit.counters.sinkable_seen +=
6328                                        ct.sinkable_sites_seen as u64;
6329                                    self.jit.counters.accum_bufferable_seen +=
6330                                        ct.accum_bufferable_seen as u64;
6331                                    self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6332                                    self.jit.counters.materialize_emit +=
6333                                        ct.materialize_emit_count as u64;
6334                                    self.jit.counters.closure_emit += ct.closure_seen as u64;
6335                                    if ct.is_inline_abort_close {
6336                                        self.jit.counters.inline_abort += 1;
6337                                    }
6338                                    // v2.0 Stage 7 polish 6 fire
6339                                    // experiment — split tally so a
6340                                    // probe can answer the AOT
6341                                    // `accepted_with_per_exit_inline`
6342                                    // gate's question at the JIT
6343                                    // surface too: how many compiled
6344                                    // traces emitted depth>0 cmp
6345                                    // side-exits, and how many of
6346                                    // those survived all the
6347                                    // `dispatchable = false` pins
6348                                    // (`InlineAbort-gate`,
6349                                    // `self-link-retf-r1`,
6350                                    // `downrec-stitch-pending`, etc.).
6351                                    if !ct.per_exit_inline.is_empty() {
6352                                        self.jit.counters.per_exit_inline_compiled += 1;
6353                                        if ct.dispatchable {
6354                                            self.jit.counters.per_exit_inline_dispatchable += 1;
6355                                        }
6356                                    }
6357                                    if let Some(reason) = ct.dispatch_off_reason {
6358                                        self.jit.counters.dispatch_off_reasons.push(reason);
6359                                        // v2.0 Track-R R2 — mirror
6360                                        // the ordered Vec push into
6361                                        // the per-reason HashMap so
6362                                        // probes can answer "how many
6363                                        // of each dispatch_off label
6364                                        // fired" in O(1) without
6365                                        // walking the Vec. Same
6366                                        // bucket as the recorder-side
6367                                        // abort/discard tags above.
6368                                        self.jit.counters.bump_close_cause(reason);
6369                                    }
6370                                    // v2.0 Track-R R3b — count
6371                                    // compiled traces that carry a
6372                                    // down-recursion stitch link.
6373                                    // Bumped here (not at the lowerer
6374                                    // emit site) because the Vm's
6375                                    // JitCounters live on the Vm,
6376                                    // and the lowerer doesn't have a
6377                                    // Vm handle. R3b's regression
6378                                    // pin reads this via
6379                                    // `Vm::trace_downrec_link_compiled_count`.
6380                                    if ct.downrec_link.is_some() {
6381                                        self.jit.counters.downrec_link_compiled += 1;
6382                                    }
6383                                    // v2.0 Track-R R3d — multi-way
6384                                    // guard emit counter. Bumped when
6385                                    // the lowerer's R3d arm collected
6386                                    // >= 2 distinct caller_pc candidates
6387                                    // and lifted `dispatchable=true`.
6388                                    // R3c's single-CMP shape stores
6389                                    // `1` here without bumping; non-
6390                                    // DownRec closes store `0`.
6391                                    if ct.downrec_multi_way_count >= 2 {
6392                                        self.jit.counters.multi_way_guard_emitted += 1;
6393                                    }
6394                                    // P15-A v2-A — side-trace finalisation.
6395                                    // Pin `dispatchable=false` so the
6396                                    // primary lookup `traces.find(|t|
6397                                    // t.head_pc == pc && t.dispatchable)`
6398                                    // never matches this entry — the
6399                                    // side trace is meant to be entered
6400                                    // ONLY through the parent's exit
6401                                    // indirection (v2-B/C IR), not the
6402                                    // back-edge / call-trigger paths.
6403                                    // Then write the entry fn ptr into
6404                                    // the parent's `exit_side_trace_ptrs`
6405                                    // slot so v2-B/C IR can read it.
6406                                    if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6407                                        closed_record.side_trace_parent
6408                                    {
6409                                        ct.dispatchable = false;
6410                                        let entry_ptr = ct.entry as *const () as *const u8;
6411                                        let _side_trace_head_pc = closed_record.head_pc;
6412                                        let parent_traces = parent_proto.traces.borrow();
6413                                        if let Some(parent_ct) = parent_traces
6414                                            .iter()
6415                                            .find(|t| t.head_pc == parent_head_pc)
6416                                        {
6417                                            // P15-A v2-C-A5-C — shape-match
6418                                            // gate. Find the parent's per-exit
6419                                            // tag snapshot at the wired exit
6420                                            // (inline / tag / global) and
6421                                            // check the child's entry_tags
6422                                            // match. If not, leave the cell
6423                                            // null + skip cache populate so
6424                                            // the future v2-C-A2 IR's
6425                                            // `call_indirect` stays inert at
6426                                            // this exit (the child's
6427                                            // shape-specialised IR would
6428                                            // mis-interpret raw bits the
6429                                            // parent writes to reg_state).
6430                                            let inline_n = parent_ct.per_exit_inline.len();
6431                                            let tags_n = parent_ct.per_exit_tags.len();
6432                                            let parent_exit_tags_slice: &[
6433                                            crate::jit::trace::ExitTag
6434                                        ] = if parent_exit_idx < inline_n {
6435                                            &parent_ct.per_exit_inline
6436                                                [parent_exit_idx]
6437                                                .exit_tags
6438                                        } else if parent_exit_idx
6439                                            < inline_n + tags_n
6440                                        {
6441                                            &parent_ct.per_exit_tags
6442                                                [parent_exit_idx - inline_n]
6443                                                .1
6444                                        } else {
6445                                            &parent_ct.exit_tags
6446                                        };
6447                                            let shape_ok =
6448                                                crate::jit::trace::exit_tags_match_entry_tags(
6449                                                    &ct.entry_tags,
6450                                                    parent_exit_tags_slice,
6451                                                    &parent_ct.entry_tags,
6452                                                );
6453                                            if !shape_ok {
6454                                                self.jit.counters.side_trace_shape_mismatch += 1;
6455                                            }
6456                                            // P15-A v2-C-A4 — write the child's
6457                                            // entry fn ptr to BOTH the legacy
6458                                            // v2-A `exit_side_trace_ptrs[idx]`
6459                                            // cell (kept so v2-A's
6460                                            // walk_any_side_ptr_non_null tests
6461                                            // stay green) AND the per-kind cell
6462                                            // whose heap address the parent's
6463                                            // IR baked (v2-C-A2). The IR-baked
6464                                            // cell is what the call_indirect
6465                                            // gate actually reads. Only write
6466                                            // when A5-C shape gate passes.
6467                                            if shape_ok {
6468                                                if let Some(cell) = parent_ct
6469                                                    .exit_side_trace_ptrs
6470                                                    .get(parent_exit_idx)
6471                                                {
6472                                                    cell.set(entry_ptr);
6473                                                }
6474                                                // Compute (kind, local) for the
6475                                                // IR-baked cell. Layout follows
6476                                                // exit_hit_counts: inline first,
6477                                                // then per_exit_tags, then the
6478                                                // global tail slot.
6479                                                let (sent_kind, sent_local) = if parent_exit_idx
6480                                                    < inline_n
6481                                                {
6482                                                    parent_ct.per_exit_inline[parent_exit_idx]
6483                                                        .side_trace_ptr
6484                                                        .set(entry_ptr);
6485                                                    (
6486                                                        crate::jit::trace::SIDE_SENT_KIND_INLINE,
6487                                                        parent_exit_idx as u32,
6488                                                    )
6489                                                } else if parent_exit_idx < inline_n + tags_n {
6490                                                    let local = parent_exit_idx - inline_n;
6491                                                    if let Some(b) =
6492                                                        parent_ct.tags_side_trace_ptrs.get(local)
6493                                                    {
6494                                                        b.set(entry_ptr);
6495                                                    }
6496                                                    (
6497                                                        crate::jit::trace::SIDE_SENT_KIND_TAG,
6498                                                        local as u32,
6499                                                    )
6500                                                } else {
6501                                                    parent_ct.global_side_trace_ptr.set(entry_ptr);
6502                                                    (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6503                                                };
6504                                                self.jit.counters.side_trace_compiled += 1;
6505                                                // P15-A v2-D-A8 — flip the
6506                                                // parent's fast-path hint so
6507                                                // the dispatcher knows to do
6508                                                // the tentative decode + cell
6509                                                // check on subsequent
6510                                                // dispatches. Set once and
6511                                                // stays true (we never unwire
6512                                                // a side trace today).
6513                                                parent_ct.has_any_side_wired.set(true);
6514
6515                                                // P15-A v2-C-A1/A4 — populate
6516                                                // the O(1) lookup cache the
6517                                                // dispatcher consults on
6518                                                // sentinel-bit-set returns.
6519                                                // Key is the encoded sentinel
6520                                                // (same encoding the IR ORs
6521                                                // into bits 56..=62 of the
6522                                                // child's i64 return).
6523                                                let sentinel =
6524                                                    crate::jit::trace::encode_side_sentinel(
6525                                                        sent_kind, sent_local,
6526                                                    );
6527                                                let predicted_idx = if std::ptr::eq(
6528                                                    parent_proto.as_ptr(),
6529                                                    head_proto.as_ptr(),
6530                                                ) {
6531                                                    parent_traces.len() as u32
6532                                                } else {
6533                                                    head_proto.traces.borrow().len() as u32
6534                                                };
6535                                                parent_ct
6536                                                    .side_trace_cache
6537                                                    .borrow_mut()
6538                                                    .insert(sentinel, predicted_idx);
6539                                            }
6540                                        }
6541                                        drop(parent_traces);
6542                                    }
6543                                    head_proto.traces.borrow_mut().push(TArc::new(ct));
6544                                    self.jit.counters.compiled += 1;
6545                                }
6546                                None => {
6547                                    self.jit.counters.compile_failed += 1;
6548                                    self.jit
6549                                        .counters
6550                                        .compile_failed_reasons
6551                                        .push(self.jit.trace_compiler.last_compile_checkpoint());
6552                                }
6553                            }
6554                        }
6555                    } // P13-S13-H — close the long-trace-bias else branch
6556                } else {
6557                    // P12-S4-step1 + step4a — depth-aware push at the
6558                    // current `cur_depth`. The `depth_cap_hit` /
6559                    // `returned_past_head` early-exit is handled by
6560                    // the `should_close` branch above; reaching here
6561                    // means `cur_depth <= MAX_INLINE_DEPTH` and the
6562                    // trace head's frame is still live.
6563                    let depth_u8 = cur_depth as u8;
6564                    if depth_u8 > self.jit.max_depth_seen {
6565                        self.jit.max_depth_seen = depth_u8;
6566                    }
6567                    // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6568                    // return / variable return count). Recorder pushed
6569                    // it with var_count=None before the call dispatched;
6570                    // now that the call has returned and we're about to
6571                    // push the next op, top reflects the actual return
6572                    // count. Snapshot top - (caller.base + call.a).
6573                    if let Some(last) = rec.ops.last_mut()
6574                        && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6575                        && last.inst.c() == 0
6576                        && last.var_count.is_none()
6577                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6578                    {
6579                        let from = f.base + last.inst.a();
6580                        if self.top >= from {
6581                            last.var_count = Some(self.top - from);
6582                        }
6583                    }
6584                    // P12-S9-A/C — for SetList B=0, snapshot the source
6585                    // count = top - A - 1 (mirrors Lua's `n = top - ra
6586                    // - 1` from lvm.c OP_SETLIST). Sources are
6587                    // R[A+1..top), exclusive top. For Call C=0's
6588                    // var_count (the return count = top - A inclusive),
6589                    // see the prior-op fix-up above; here we
6590                    // initialise the current Call op to None and let
6591                    // the fix-up on the next op's push populate it.
6592                    let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6593                        && inst.b() == 0
6594                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6595                    {
6596                        let from = f.base + inst.a();
6597                        if self.top > from {
6598                            Some(self.top - from - 1)
6599                        } else {
6600                            None
6601                        }
6602                    } else {
6603                        None
6604                    };
6605                    let op = crate::jit::trace::RecordedOp {
6606                        proto: cl.proto,
6607                        pc,
6608                        inst,
6609                        inline_depth: depth_u8,
6610                        var_count,
6611                    };
6612                    // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6613                    // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6614                    // Captured as a side-channel `RetfRecord` parallel to
6615                    // `ops` when `p16_self_link_enabled` is on. R3's
6616                    // down-rec stitch consumes these to guard side-trace
6617                    // inlined-frame topology against the recorded shape.
6618                    // Gated on the same flag as the cycle catch so the
6619                    // ship-default path (p16 off) sees zero behavior
6620                    // change. `caller_pc` is the recorded enclosing Call's
6621                    // pc + 1 — interp's resume point after the inlined
6622                    // frame pops.
6623                    if self.jit.p16_self_link_enabled
6624                        && depth_u8 > 0
6625                        && matches!(
6626                            inst.op(),
6627                            crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6628                        )
6629                    {
6630                        let results: u8 = match inst.op() {
6631                            crate::vm::isa::Op::Return0 => 0,
6632                            crate::vm::isa::Op::Return1 => 1,
6633                            _ => 0,
6634                        };
6635                        // Most recent Op::Call recorded at the caller's
6636                        // depth (`depth_u8 - 1`) is the frame this Return
6637                        // is unwinding from. Reverse scan stops at the
6638                        // first match.
6639                        let caller_depth = depth_u8 - 1;
6640                        let caller_call = rec.ops.iter().rev().find(|r| {
6641                            r.inline_depth == caller_depth
6642                                && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6643                        });
6644                        let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6645                        // v2.0 Track-R R3a — capture the caller's proto
6646                        // for the RetfRecord. LuaJIT `IR_RETF.op1`
6647                        // equivalent. For fib(28) the caller's proto
6648                        // equals the trace head; for future mutual
6649                        // recursion the recorded Op::Call's proto is the
6650                        // right target. Fallback to head_proto when no
6651                        // enclosing Call op was captured (mirrors
6652                        // `caller_pc`'s fallback to the Return's own pc).
6653                        let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6654                        rec.retfs.push(crate::jit::trace::RetfRecord {
6655                            from_depth: depth_u8,
6656                            to_depth: caller_depth,
6657                            results,
6658                            caller_pc,
6659                            proto: caller_proto,
6660                        });
6661                        // v2.0 Track-R R3a — DownRec close trigger:
6662                        // count RetfRecords on this recording whose
6663                        // `proto` matches `caller_proto` (LuaJIT
6664                        // `check_downrec_unroll` chain filter
6665                        // `op1 == ptref`). Threshold mirrors
6666                        // RECUNROLL_THRESHOLD; first trip stamps the
6667                        // `downrec_close` marker, subsequent retfs
6668                        // keep the marker without overwrite. The
6669                        // lowerer's end_idx picker routes through
6670                        // TraceEnd::DownRec when the marker is set;
6671                        // R3a's tail emit still falls through to R1's
6672                        // safe deopt path so fib(28) result stays
6673                        // 317_811. R3b lifts.
6674                        if rec.downrec_close.is_none() {
6675                            let caller_proto_ptr = caller_proto.as_ptr();
6676                            let prior_match_count = rec
6677                                .retfs
6678                                .iter()
6679                                .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6680                                .count();
6681                            // Strictly-greater-than threshold matches
6682                            // LuaJIT `count + J->tailcalled > recunroll`.
6683                            // The newly-pushed retf is already counted.
6684                            if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6685                                rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6686                                    return_pc: caller_pc,
6687                                    target_proto: caller_proto,
6688                                    depth_delta: 1,
6689                                });
6690                                // R2 close-cause taxonomy: tag the
6691                                // restart with `"downrec-restart"`. R3b
6692                                // adds `"downrec-stitch-failed"` when
6693                                // the lifted back-edge falls back to
6694                                // deopt.
6695                                self.jit.counters.bump_close_cause("downrec-restart");
6696                            }
6697                        }
6698                    }
6699                    // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6700                    // FIRST eligible Op::GetField site under env-gate
6701                    // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6702                    //   - R[B] is Value::Table with metatable.is_none()
6703                    //   - K[C] is Value::Str
6704                    //   - The string key actually occupies a hash slot
6705                    //     (so the IC's slot_idx is a real index, not
6706                    //     a probe sentinel).
6707                    // Once captured, subsequent GetFields skip this
6708                    // logic (rec.field_ic_snapshot.is_some() short-
6709                    // circuits). Env-OFF short-circuits on the cached
6710                    // atomic check inside field_ic_enabled().
6711                    if rec.field_ic_snapshot.is_none()
6712                        && matches!(inst.op(), crate::vm::isa::Op::GetField)
6713                        && crate::jit::trace_types::field_ic_enabled()
6714                    {
6715                        let b = inst.b();
6716                        let c_idx = inst.c() as usize;
6717                        let r_b = self.stack[(base + b) as usize];
6718                        if let Value::Table(g) = r_b
6719                            && g.metatable().is_none()
6720                            && c_idx < cl.proto.consts.len()
6721                            && let Value::Str(s) = cl.proto.consts[c_idx]
6722                        {
6723                            let key = Value::Str(s);
6724                            let tbl_ref = &*g;
6725                            if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6726                                && let Some(val) = tbl_ref.node_val_at(slot_idx)
6727                            {
6728                                let op_idx = rec.ops.len() as u32;
6729                                rec.field_ic_snapshot =
6730                                    Some(crate::jit::trace_types::FieldIcSnapshot {
6731                                        op_idx,
6732                                        nodes_len: tbl_ref.nodes_capacity() as u64,
6733                                        slot_idx: slot_idx as u64,
6734                                        key_ptr_bits: s.as_ptr() as u64,
6735                                        cached_val_tag: val.tag_byte(),
6736                                    });
6737                                self.jit.counters.field_ic_snapshot_captured += 1;
6738                            }
6739                        }
6740                    }
6741                    if !rec.push(op) {
6742                        // v2.0 Track-R R2 — recorder overflow
6743                        // (MAX_TRACE_LEN). Pre-R2 this site bumped
6744                        // `aborted` with no reason label, leaving the
6745                        // overflow indistinguishable from any other
6746                        // abort cause that might be added later.
6747                        // Tag it explicitly under the close-cause
6748                        // bucket so probes can tally overflow vs
6749                        // other abort causes in O(1).
6750                        self.jit.active_trace = None;
6751                        self.jit.counters.aborted += 1;
6752                        self.jit.counters.bump_close_cause("trace-overflow");
6753                    }
6754                }
6755            }
6756
6757            // P12-S3 — trace JIT dispatcher.
6758            //
6759            // When the dispatch loop is about to execute the op at
6760            // `pc` and there's a `numeric_only` CompiledTrace cached
6761            // for that `head_pc`, marshal the live regs into an
6762            // i64 buffer, jump into the trace, and resume the
6763            // interpreter at the returned continuation PC.
6764            //
6765            // Skipped (zero overhead) when `trace_jit_enabled` is
6766            // false; the lookup is a borrow + scan over
6767            // `cl.proto.traces`, which is a `Vec` whose size is at
6768            // most one entry per back-edge per Proto in practice.
6769            //
6770            // Marshalling contract — only Int slots survive the
6771            // round-trip cleanly (the reg_state ABI is `*mut i64`
6772            // with no tag info). Any non-Int slot in the affected
6773            // window forces a skip; interp takes over for one op
6774            // and the back-edge brings us back to try again next
6775            // pass (slots that were Nil/Float at one moment can
6776            // settle to Int by the time the next back-edge fires).
6777            //
6778            // A trace that comes back with `vm.jit.pending_err`
6779            // parked is treated as a deopt: clear the err, leave
6780            // the stack as the trace wrote it, and let the
6781            // interpreter run from the same `pc`. The trace itself
6782            // is left cached — a future entry might find no
6783            // metatable in the way and succeed.
6784            // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6785            // of 6 per-field Rc clones. proto.traces is now
6786            // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6787            // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6788            // operations per dispatch × 434k = ~2.2M Rc atomic ops
6789            // (~1-2% gain measured separately).
6790            // v2.0 Track-R R3c — one-shot consume of the
6791            // `suppress_downrec_admit_once` flag. Set by the R3c
6792            // downrec post-invoke arm below when it force-deopts the
6793            // trace (caller-pc guard miss OR cycle-budget exhausted)
6794            // so the NEXT interpreter loop iteration skips the
6795            // downrec admit, lets interp run the op at `head_pc`,
6796            // advances `pc` past `head_pc`, and breaks the otherwise-
6797            // infinite admit loop. Reading + clearing here means a
6798            // single dispatch tick consumes the suppression — the
6799            // following tick re-admits naturally (with the budget
6800            // also reset by the deopt site).
6801            let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6802            self.jit.suppress_downrec_admit_once = false;
6803            if self.jit.trace_enabled
6804                && let Some(ct) = {
6805                    let traces = cl.proto.traces.borrow();
6806                    traces
6807                        .iter()
6808                        .find(|t| {
6809                            if t.head_pc != pc {
6810                                return false;
6811                            }
6812                            let is_downrec = t.downrec_link.is_some();
6813                            // v2.0 Track-R R3c — the one-shot suppress
6814                            // flag blocks any admit (primary or fallback)
6815                            // for `downrec_link`-bearing traces so the
6816                            // next interp iter can run the natural op
6817                            // at `head_pc` and advance past it. R3d's
6818                            // `dispatchable=true` lift means the suppress
6819                            // must also cover the primary `t.dispatchable`
6820                            // arm — otherwise the lifted lookup would
6821                            // immediately re-admit after a force-deopt
6822                            // and the infinite loop returns.
6823                            if is_downrec && downrec_admit_blocked {
6824                                return false;
6825                            }
6826                            // Primary arm: `dispatchable=true` traces
6827                            // (R3d-lifted DownRec or normal traces).
6828                            // Fallback arm: R3c-shape `dispatchable=false`
6829                            // DownRec traces (single-CMP guard kept
6830                            // pinned because the 90% miss-rate would
6831                            // make blind admit perf-negative).
6832                            t.dispatchable || is_downrec
6833                        })
6834                        .cloned()
6835                }
6836            {
6837                // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6838                // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6839                // across the entire dispatch block so the fields outlive
6840                // all consumers. Saves 5 Rc::clone per dispatch.
6841                let entry_fn = ct.entry;
6842                let head_pc_val = ct.head_pc;
6843                let window_size = ct.window_size;
6844                let exit_tags = &ct.exit_tags;
6845                let per_exit_tags = &ct.per_exit_tags;
6846                let per_exit_inline = &ct.per_exit_inline;
6847                let compile_entry_tags = &ct.entry_tags;
6848                let global_tag_res_kind = ct.global_tag_res_kind;
6849                let exit_hit_counts = &ct.exit_hit_counts;
6850                let max_stack = cl.proto.max_stack as usize;
6851                let window_size_us = window_size as usize;
6852                let base_us = base as usize;
6853                // P12-S4-step3a — `reg_state` sized to the trace's
6854                // `window_size`, which today equals max_stack but
6855                // S4-step3b will expand for inlined frames.
6856                // Marshal-in still only writes [0..max_stack); slots
6857                // [max_stack..window_size) are zero-initialised and
6858                // filled by the trace's own GetUpval / arith.
6859                // P13-S13-D — reuse the Vm's amortised buffers
6860                // instead of allocating fresh Vecs each dispatch.
6861                // mem::take leaves an empty placeholder we restore
6862                // at the end of the dispatch block (success +
6863                // deopt paths both fall through to the restore).
6864                let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6865                entry_tags.clear();
6866                entry_tags.reserve(max_stack);
6867                // v2.0 Track-R R3c — this trace was admitted via the
6868                // `downrec_link.is_some()` arm rather than the normal
6869                // `dispatchable=true` arm. The pre-invoke path
6870                // populates a reserved saved-PC slot just past the
6871                // normal register window so R3b's lowerer guard load
6872                // (`reg_state[window_size]`) compares the runtime
6873                // saved caller PC against the recorded `dr_return_pc`.
6874                //
6875                // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6876                // gate. After R3d lifts `dispatchable = true` for
6877                // multi-way guards, the trace's body still emits the
6878                // R3b/R3d sentinel shape on return — the saved-PC slot
6879                // and post-invoke classifier must keep firing.
6880                // `downrec_link.is_some()` is the unique structural
6881                // signal that the trace closes via DownRec.
6882                let is_downrec_entry = ct.downrec_link.is_some();
6883                let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6884                reg_state.clear();
6885                // v2.0 Track-R R3c — when admitting a downrec trace,
6886                // size the buffer to `window_size + 1` so the lowerer
6887                // can `load(I64, ..., reg_state, window_size * 8)`
6888                // for the saved caller PC guard input. The extra slot
6889                // is the LAST element so cranelift's existing
6890                // `0..window_size` accesses are unaffected.
6891                let reg_state_len = if is_downrec_entry {
6892                    window_size_us + 1
6893                } else {
6894                    window_size_us
6895                };
6896                reg_state.resize(reg_state_len, 0i64);
6897                let mut dispatch_ok = true;
6898                for i in 0..max_stack {
6899                    let v = self.stack[base_us + i];
6900                    let (tag, raw) = v.unpack();
6901                    entry_tags.push(tag);
6902                    // P12-S12-C v3 — entry tag guard. The trace's IR
6903                    // is specialised to the compile-time entry tags
6904                    // (via current_kinds propagation from
6905                    // from_entry_tag). A runtime tag mismatch means
6906                    // body ops would mis-interpret raw bits (e.g.
6907                    // treat a Str pointer as Int payload → garbage).
6908                    // Skip dispatch on mismatch so interp handles
6909                    // this entry shape; the trace stays cached for
6910                    // future entries that match.
6911                    if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6912                        dispatch_ok = false;
6913                        break;
6914                    }
6915                    match tag {
6916                        // Int / Float / Table / Nil all marshal
6917                        // to raw payload cleanly; the trace's IR
6918                        // treats the 8-byte slot as an i64 (with
6919                        // f64 ops bitcasting around the boundary).
6920                        crate::runtime::value::raw::INT
6921                        | crate::runtime::value::raw::FLOAT
6922                        | crate::runtime::value::raw::TABLE
6923                        | crate::runtime::value::raw::CLOSURE
6924                        // P12-S12-B-v2 — Native iter slots (e.g.
6925                        // R[A] = ipairs_iter) are present in
6926                        // generic-for traces; the raw bits are a
6927                        // valid `*mut NativeClosure` and round-trip
6928                        // cleanly.
6929                        | crate::runtime::value::raw::NATIVE
6930                        // P12-S12-C v1 — Str slots show up in
6931                        // string-concat traces; raw bits = `*mut
6932                        // LuaStr` (interned, GC-managed). Round-
6933                        // trips cleanly as a heap pointer.
6934                        | crate::runtime::value::raw::STR
6935                        | crate::runtime::value::raw::NIL => {
6936                            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6937                            reg_state[i] = unsafe { raw.zero as i64 };
6938                        }
6939                        _ => {
6940                            dispatch_ok = false;
6941                            break;
6942                        }
6943                    }
6944                }
6945
6946                if dispatch_ok {
6947                    debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6948                    self.jit.pending_err = None;
6949                    // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6950                    // count. A cmp@d>0 side-exit calls the materialize
6951                    // helper which pushes inlined frames onto
6952                    // `vm.frames`; on deopt those frames must be popped
6953                    // before falling through to the interpreter, else
6954                    // the stack grows unboundedly per deopted dispatch.
6955                    let pre_frames = self.frames.len();
6956                    // v2.0 Track-R R3c — saved-PC slot population. The
6957                    // recorded `dr_return_pc` on the closing trace is
6958                    // the caller's resume PC captured at a depth>0
6959                    // Return push (recorder push site, see R3a verdict
6960                    // §3). The natural runtime analogue for self-
6961                    // stitch is the dispatching frame's PARENT frame's
6962                    // PC: the trace's head_pc sits inside a Lua frame,
6963                    // and the parent (caller) frame's `pc` is what
6964                    // luna would observe as `[base-8]` in the LJ
6965                    // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6966                    // the parent isn't a Lua frame (top-level dispatch
6967                    // — first invocation through `call_value`), no
6968                    // saved PC exists; we write 0, which always
6969                    // mismatches the recorded `dr_return_pc != 0`
6970                    // invariant pinned by R3b
6971                    // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6972                    // debug_assert!(dr_return_pc != 0, ...)`).
6973                    if is_downrec_entry {
6974                        let saved_pc: i64 = if pre_frames >= 2 {
6975                            match &self.frames[pre_frames - 2] {
6976                                CallFrame::Lua(parent) => parent.pc as i64,
6977                                CallFrame::Cont(_) => 0,
6978                            }
6979                        } else {
6980                            0
6981                        };
6982                        reg_state[window_size_us] = saved_pc;
6983                    }
6984                    // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6985                    // diagnostic hook. The probe fires once per trace dispatch
6986                    // (regardless of JIT vs AOT origin — both go through this
6987                    // arm), letting the AOT smoke test verify mcode actually
6988                    // executed. Guarded behind `OnceLock` so the env read is
6989                    // a one-time cost per process; not gated on a particular
6990                    // counter so the smoke test gets a deterministic single-
6991                    // line `aot_trace_fired pc=N` per first dispatch.
6992                    if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6993                        eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6994                    }
6995                    let continuation_pc = {
6996                        // v1.1 A1 Session A — chunk_compiler.enter
6997                        // (CraneliftBackend delegates to enter_jit;
6998                        // NullJitBackend returns an inert guard).
6999                        let vm_ptr: *mut Vm = self;
7000                        let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7001                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7002                        unsafe { entry_fn(reg_state.as_mut_ptr()) }
7003                    };
7004                    self.jit.counters.dispatched += 1;
7005
7006                    if self.jit.pending_err.is_some() {
7007                        self.jit.pending_err = None;
7008                        self.jit.counters.deopt += 1;
7009                        // P12-S4-step4b-C-2 — unwind any helper-pushed
7010                        // inlined frames before the interpreter resumes.
7011                        // Don't restore reg_state — the trace's partial
7012                        // writes are discarded; interp re-executes from
7013                        // the original `pc`.
7014                        while self.frames.len() > pre_frames {
7015                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
7016                        }
7017                        if is_downrec_entry {
7018                            // v2.0 Track-R R3c — pending_err observed
7019                            // mid-trace inside a downrec admit. Treat
7020                            // it as a guard miss: bump `downrec_deopt`
7021                            // and suppress the next downrec admit so
7022                            // interp can advance past `head_pc` and
7023                            // the same trace doesn't immediately re-
7024                            // fire on the next loop iteration.
7025                            self.jit.counters.downrec_deopt += 1;
7026                            self.jit.suppress_downrec_admit_once = true;
7027                        }
7028                    } else if is_downrec_entry && {
7029                        // v2.0 Track-R R3d — only enter the R3c/R3d
7030                        // downrec classifier for returns whose shape
7031                        // matches the lowerer's `downrec_idx_opt` tail
7032                        // emit: either the stitch_blk DOWNREC sentinel
7033                        // (HIT) or the deopt_blk GLOBAL-sentinel-with-
7034                        // body==head_pc (MISS via guard fail). Any
7035                        // other return from a downrec trace (intermediate
7036                        // body cmp side-exit, GetField inference fail,
7037                        // etc.) carries a different sentinel/body shape
7038                        // and means the body exited BEFORE reaching the
7039                        // downrec close — classify those through the
7040                        // normal decode path (else branch below) so
7041                        // reg_state restores + pc advances correctly.
7042                        // The pre-R3d behavior (R3c) classified them all
7043                        // as MISS and skipped the normal restore, which
7044                        // inflated `downrec_deopt` with non-downrec
7045                        // events and lost the trace's mid-flight writes.
7046                        let raw_ret = continuation_pc as u64;
7047                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
7048                        let sentinel_code = if from_side_trace {
7049                            ((raw_ret >> 56) & 0x7F) as u32
7050                        } else {
7051                            0
7052                        };
7053                        let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
7054                        let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
7055                            crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
7056                            0,
7057                        );
7058                        from_side_trace
7059                            && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
7060                                || (sentinel_code == global_deopt_code
7061                                    && raw_body == head_pc_val as u64))
7062                    } {
7063                        // R3d downrec event classifier.
7064                        let raw_ret = continuation_pc as u64;
7065                        let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
7066                        if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
7067                            // Guard HIT — saved_pc matched one of the
7068                            // baked candidates and the trace's
7069                            // `stitch_blk` arm returned the DOWNREC
7070                            // sentinel. Cycle-safety checkpoint:
7071                            // decrement budget; on underflow,
7072                            // reclassify as deopt + reset budget.
7073                            // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
7074                            // ~all natural HITs in a hot loop fire
7075                            // before reset pressure.
7076                            if self.jit.stitch_depth_remaining > 0 {
7077                                self.jit.stitch_depth_remaining -= 1;
7078                                self.jit.counters.downrec_dispatched += 1;
7079                            } else {
7080                                self.jit.counters.downrec_deopt += 1;
7081                                self.jit.stitch_depth_remaining =
7082                                    crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
7083                            }
7084                        } else {
7085                            // Guard MISS via the lowerer's deopt_blk
7086                            // arm (GLOBAL sentinel + body == head_pc).
7087                            // The deopt_blk emit performs the
7088                            // store-back via `emit_store_back_and_return_pc`,
7089                            // so the live stack already reflects the
7090                            // body's writes; no extra restore needed
7091                            // from the dispatcher side.
7092                            self.jit.counters.downrec_deopt += 1;
7093                        }
7094                        self.jit.suppress_downrec_admit_once = true;
7095                        // Pop helper-pushed inlined frames (defensive —
7096                        // R3d's emit shape doesn't push frames in the
7097                        // tail, but a body side-exit before reaching
7098                        // the tail may have via the materialize helper).
7099                        while self.frames.len() > pre_frames {
7100                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
7101                        }
7102                        self.jit.reg_state_buf = reg_state;
7103                        self.jit.entry_tags_buf = entry_tags;
7104                        continue;
7105                    } else {
7106                        // Restore each slot using the trace's
7107                        // exit-tag analysis (see ExitTag docs).
7108                        // P12-S4-step4b-C-2 — decode the IR's
7109                        // side-exit shape. Upper 32 bits = (site_idx
7110                        // + 1) for inline cmp side-exits, 0 for
7111                        // legacy clean-tail / non-inline exits.
7112                        // P15-A v2-C-A0 — decode lives in
7113                        // `crate::jit::trace::decode_exit_shape` so
7114                        // v2-C-A3 can reuse it with the SIDE TRACE's
7115                        // shape inputs when the sentinel bit
7116                        // (v2-C-A2) is set on `raw_ret`.
7117                        let raw_ret = continuation_pc as u64;
7118                        // P15-A v2-C-A3 — side-trace return decode.
7119                        // Bit 63 of `raw_ret` is the side-trace
7120                        // marker the parent's IR OR'd in when it
7121                        // tail-called into a wired child trace.
7122                        // Bits 56..=62 carry the sentinel code (the
7123                        // cache key into the parent's
7124                        // `side_trace_cache`); bits 0..=55 are the
7125                        // child's own return value (encoded site or
7126                        // plain cont_pc) which we MUST decode using
7127                        // the CHILD's per_exit_inline / per_exit_tags
7128                        // / exit_tags / exit_hit_counts — not the
7129                        // parent's. The dispatcher snapshot read
7130                        // above holds the parent's shapes; when bit
7131                        // 63 is set we re-fetch the child's via the
7132                        // sentinel-keyed cache.
7133                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
7134                        let (
7135                            decode_inline,
7136                            decode_tags,
7137                            decode_exit_tags,
7138                            decode_hit_counts,
7139                            decode_body,
7140                        ) = if from_side_trace {
7141                            let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
7142                            let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
7143                            let traces = cl.proto.traces.borrow();
7144                            let child_idx = traces
7145                                .iter()
7146                                .find(|t| t.head_pc == head_pc_val)
7147                                .and_then(|pct| {
7148                                    pct.side_trace_cache.borrow().get(&sentinel_code).copied()
7149                                });
7150                            if let Some(idx) = child_idx
7151                                && let Some(child) = traces.get(idx as usize)
7152                            {
7153                                if crate::jit::trace::v2c_probe_enabled() {
7154                                    eprintln!(
7155                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
7156                                        sentinel_code,
7157                                        body,
7158                                        idx,
7159                                        child.n_ops,
7160                                        child.head_pc,
7161                                        child.window_size,
7162                                        pc,
7163                                        window_size,
7164                                        child.dispatchable,
7165                                        child.is_inline_abort_close,
7166                                    );
7167                                }
7168                                (
7169                                    child.per_exit_inline.clone(),
7170                                    child.per_exit_tags.clone(),
7171                                    child.exit_tags.clone(),
7172                                    child.exit_hit_counts.clone(),
7173                                    body,
7174                                )
7175                            } else {
7176                                if crate::jit::trace::v2c_probe_enabled() {
7177                                    eprintln!(
7178                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
7179                                        sentinel_code, body,
7180                                    );
7181                                }
7182                                // Cache miss — fall back to parent
7183                                // shapes with the body bits. Best-
7184                                // effort; the trace_side_trace_
7185                                // shape_mismatch_count records this
7186                                // path indirectly (close-handler
7187                                // skips wiring on mismatch so we
7188                                // shouldn't reach here when shape
7189                                // gate held).
7190                                (
7191                                    per_exit_inline.clone(),
7192                                    per_exit_tags.clone(),
7193                                    exit_tags.clone(),
7194                                    exit_hit_counts.clone(),
7195                                    body,
7196                                )
7197                            }
7198                        } else {
7199                            // P15-A v2-D — dispatcher-level side-trace
7200                            // invocation. Replaces v2-C's universal IR
7201                            // gate (`load + icmp + brif` at every
7202                            // emit_store_back callsite, which A6/A7
7203                            // measured as a net perf regression).
7204                            // A8 fast-path: skip the tentative decode +
7205                            // child lookup entirely when `has_any_side
7206                            // _wired == false` (the common case until
7207                            // the first side trace compiles for this
7208                            // parent). For fib_10_x10k and other tight
7209                            // short-trace workloads where most parent
7210                            // traces never get a wired child, this
7211                            // collapses the v2-D overhead to a single
7212                            // `Cell::get()` on the cold path.
7213                            // A8-revert: A8 had `parent_has_side` short-
7214                            // circuit + snapshot hoist; mini N=3 showed
7215                            // A8 lost the btrees_d8 1.02× win (dropped
7216                            // to 0.95×) WITHOUT helping fib_10 (same
7217                            // 0.86×). Drop A8 — accept the always-run
7218                            // v2-D path; the tentative decode + cell
7219                            // load is cheaper than the cost A8 added.
7220                            {
7221                                let tentative = crate::jit::trace::decode_exit_shape(
7222                                    raw_ret,
7223                                    per_exit_inline,
7224                                    per_exit_tags,
7225                                    exit_tags,
7226                                );
7227                                let tentative_exit_idx = tentative.exit_hit_idx;
7228                                let child_invoke = {
7229                                    let traces = cl.proto.traces.borrow();
7230                                    traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7231                                        |pct| {
7232                                            let cell =
7233                                                pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7234                                            let fn_ptr = cell.get();
7235                                            if fn_ptr.is_null() {
7236                                                return None;
7237                                            }
7238                                            traces
7239                                                .iter()
7240                                                .find(|t| {
7241                                                    t.entry as *const () as *const u8 == fn_ptr
7242                                                })
7243                                                .map(|child| {
7244                                                    (
7245                                                        child.entry,
7246                                                        child.per_exit_inline.clone(),
7247                                                        child.per_exit_tags.clone(),
7248                                                        child.exit_tags.clone(),
7249                                                        child.exit_hit_counts.clone(),
7250                                                    )
7251                                                })
7252                                        },
7253                                    )
7254                                };
7255                                if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7256                                    let child_raw_ret = {
7257                                        // v1.1 A1 Session A — chunk_compiler.enter
7258                                        // (side-trace entry).
7259                                        let vm_ptr: *mut Vm = self;
7260                                        let _guard =
7261                                            self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7262                                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7263                                        unsafe { cent(reg_state.as_mut_ptr()) }
7264                                    };
7265                                    (cpi, cpt, cet, chc, child_raw_ret as u64)
7266                                } else {
7267                                    (
7268                                        per_exit_inline.clone(),
7269                                        per_exit_tags.clone(),
7270                                        exit_tags.clone(),
7271                                        exit_hit_counts.clone(),
7272                                        raw_ret,
7273                                    )
7274                                }
7275                            }
7276                        };
7277                        let decoded = crate::jit::trace::decode_exit_shape(
7278                            decode_body,
7279                            &decode_inline,
7280                            &decode_tags,
7281                            &decode_exit_tags,
7282                        );
7283                        let site_id = decoded.site_id;
7284                        let cont_pc = decoded.cont_pc;
7285                        let exit_hit_idx = decoded.exit_hit_idx;
7286                        let exit_tags_for_pc = decoded.exit_tags_for_pc;
7287                        // P15-A v2-C-A3 — for side-trace returns
7288                        // force using_global_exit_tags=false so the
7289                        // restore loop always takes the per-tag slow
7290                        // path (the child's global_tag_res_kind
7291                        // classification isn't plumbed through yet
7292                        // — TODO for a future polish step).
7293                        let using_global_exit_tags = if from_side_trace {
7294                            false
7295                        } else {
7296                            decoded.using_global_exit_tags
7297                        };
7298                        // P15-prep — increment the counter (saturate
7299                        // at u32::MAX to avoid wrap on long runs).
7300                        // P15-A v1 — track whether this increment is
7301                        // the one that crossed `HOTEXIT_THRESHOLD`
7302                        // (transition: previous v < threshold, new v
7303                        // == threshold). The side-trace start is
7304                        // deferred to just before `continue;` so
7305                        // vm.stack and frame.pc are fully restored
7306                        // (the snapshot reads post-restore values).
7307                        let mut side_trace_should_start = false;
7308                        // P15-A v2-C-A3 — for side-trace returns the
7309                        // counter to bump is the CHILD's (decoded
7310                        // shape lookup) — `exit_hit_idx` is into the
7311                        // decoded layout, so use the matching
7312                        // `decode_hit_counts`. For parent decode
7313                        // they're aliased (clone of the parent's
7314                        // own Rc).
7315                        if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7316                            let v = c.get();
7317                            if v < u32::MAX {
7318                                c.set(v + 1);
7319                            }
7320                            if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7321                                && self.jit.active_trace.is_none()
7322                                && self.jit.trace_enabled
7323                            {
7324                                side_trace_should_start = true;
7325                            }
7326                        }
7327                        // P12-S4-step4b-C-2 — at an inline cmp@d>0
7328                        // side-exit, the helper has pushed N frames on
7329                        // top of the trace head's frame and
7330                        // `exit_tags_for_pc.len()` covers the full
7331                        // window (caller + each inlined frame's
7332                        // window). Slots beyond `max_stack` belong to
7333                        // an inlined frame: their `Untouched` entries
7334                        // default to Nil (no entry-tag fallback —
7335                        // marshal-in only captured caller slots) and
7336                        // we write to interp stack at `base + i` which
7337                        // mirrors `op_offsets`-derived layout.
7338                        let slot_count = exit_tags_for_pc.len();
7339                        // P12-S4-step4b-C-2 — the helper only extends
7340                        // vm.stack up to the deepest pushed frame's
7341                        // window, but the exit_tags snapshot covers
7342                        // the trace's full `window_size` (which
7343                        // includes depth-N+1 scratch slots that the
7344                        // trace's IR may have written without a
7345                        // matching pushed frame). Extend with Nil so
7346                        // the write at the tail doesn't panic; these
7347                        // slots get overwritten by the writeback loop
7348                        // and won't leak meaningful data past the
7349                        // pushed frames' R[0..max_stack) windows.
7350                        if self.stack.len() < base_us + slot_count {
7351                            self.stack
7352                                .resize(base_us + slot_count, crate::runtime::Value::Nil);
7353                        }
7354                        // P13-S13-E — fast-path restore loop. When
7355                        // we landed on the global `exit_tags`,
7356                        // dispatch on the compile-time
7357                        // classification: skip the loop entirely
7358                        // for `AllUntouched`, do a tag-free
7359                        // `Value::Int(...)` write per slot for
7360                        // `AllInt`, otherwise fall through to the
7361                        // general match-arm loop. site_id > 0
7362                        // (inline frame mat) and per_exit_tags
7363                        // hits always take the general path —
7364                        // their per-side-exit shapes aren't
7365                        // pre-classified yet.
7366                        let fast_path_taken = if using_global_exit_tags {
7367                            match global_tag_res_kind {
7368                                crate::jit::trace::TagResKind::AllUntouched => {
7369                                    // No-op: vm.stack already
7370                                    // matches the trace's post-
7371                                    // entry state for these
7372                                    // slots (entry values not
7373                                    // overridden, or already
7374                                    // spilled by helpers).
7375                                    true
7376                                }
7377                                crate::jit::trace::TagResKind::AllInt => {
7378                                    for i in 0..slot_count {
7379                                        self.stack[base_us + i] =
7380                                            crate::runtime::Value::Int(reg_state[i]);
7381                                    }
7382                                    true
7383                                }
7384                                crate::jit::trace::TagResKind::Mixed => false,
7385                            }
7386                        } else {
7387                            false
7388                        };
7389                        if !fast_path_taken {
7390                            for i in 0..slot_count {
7391                                let tag = match exit_tags_for_pc[i] {
7392                                    crate::jit::trace::ExitTag::Untouched => {
7393                                        if i < max_stack {
7394                                            entry_tags[i]
7395                                        } else {
7396                                            crate::runtime::value::raw::NIL
7397                                        }
7398                                    }
7399                                    crate::jit::trace::ExitTag::Int => {
7400                                        crate::runtime::value::raw::INT
7401                                    }
7402                                    crate::jit::trace::ExitTag::Float => {
7403                                        crate::runtime::value::raw::FLOAT
7404                                    }
7405                                    crate::jit::trace::ExitTag::Table => {
7406                                        crate::runtime::value::raw::TABLE
7407                                    }
7408                                    crate::jit::trace::ExitTag::Closure => {
7409                                        crate::runtime::value::raw::CLOSURE
7410                                    }
7411                                    // P12-S6-A1 — trace actively wrote Nil
7412                                    // to this slot (e.g. via Op::LoadNil).
7413                                    // Restore as Nil regardless of the entry
7414                                    // tag, since the i64 payload is 0 and
7415                                    // packing as the entry tag (e.g. INT)
7416                                    // would mis-type the slot.
7417                                    crate::jit::trace::ExitTag::Nil => {
7418                                        crate::runtime::value::raw::NIL
7419                                    }
7420                                    // P12-S12-C v2 — trace wrote a Str ptr
7421                                    // to this slot (LoadK Str / Move from
7422                                    // Str / Concat result). Restore as
7423                                    // Value::Str with raw bits round-
7424                                    // tripped.
7425                                    crate::jit::trace::ExitTag::Str => {
7426                                        crate::runtime::value::raw::STR
7427                                    }
7428                                };
7429                                // SAFETY: tag is from a verified slot
7430                                // (entry validated above) or pinned by
7431                                // the exit-tag analysis to INT/TABLE.
7432                                // The raw payload sits in reg_state[i].
7433                                // Stack was extended by the materialize
7434                                // helper for inline frames.
7435                                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7436                                self.stack[base_us + i] = unsafe {
7437                                    Value::pack(
7438                                        tag,
7439                                        crate::runtime::value::RawVal {
7440                                            zero: reg_state[i] as u64,
7441                                        },
7442                                    )
7443                                };
7444                            }
7445                        }
7446                        // P12-S4-step4b-C-2 — for non-inline exits the
7447                        // helper was never called (no metas chain for
7448                        // this cont_pc), so `frames.last()` is the
7449                        // trace head's frame and we set its pc to
7450                        // cont_pc as before. For inline exits the
7451                        // helper baked the side-exit PC into the
7452                        // innermost frame's `pc` at push time
7453                        // (chain.last().pc was overridden at emit),
7454                        // so this assignment to `frames.last_mut().pc
7455                        // = cont_pc` is a redundant-but-correct
7456                        // confirmation.
7457                        let _ = &per_exit_inline; // hold the Rc alive across dispatch
7458                        // P12-S4-step4b-C-2 — for inline side-exits the
7459                        // helper has pushed N frames on top. The trace
7460                        // head frame is at `pre_frames - 1`; set its
7461                        // pc to `head_resume_pc` so when the chain
7462                        // eventually pops back to it, interp resumes
7463                        // PAST the trace's depth-0 Op::Call instead of
7464                        // restarting from `head_pc` and re-triggering
7465                        // dispatch (infinite loop). The innermost
7466                        // (helper-pushed) frame already has its pc
7467                        // baked in at compile time, but we still
7468                        // assign `cont_pc` below for parity with the
7469                        // non-inline path (no-op).
7470                        if site_id > 0 {
7471                            let idx = (site_id - 1) as usize;
7472                            let head_resume_pc = decode_inline[idx].head_resume_pc;
7473                            if pre_frames > 0 {
7474                                if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7475                                    f.pc = head_resume_pc;
7476                                }
7477                            }
7478                        }
7479                        let frames_len_now = self.frames.len();
7480                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7481                        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7482                            CallFrame::Lua(fmut) => {
7483                                if crate::jit::trace::v2c_probe_enabled() {
7484                                    eprintln!(
7485                                        "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7486                                        from_side_trace,
7487                                        raw_ret,
7488                                        fmut.pc,
7489                                        cont_pc,
7490                                        site_id,
7491                                        frames_len_now,
7492                                        pre_frames,
7493                                        max_stack,
7494                                    );
7495                                }
7496                                fmut.pc = cont_pc;
7497                            }
7498                            _ => unreachable!("Cont frame at trace dispatch"),
7499                        }
7500                        // P15-A v1 — deferred side-trace start. The
7501                        // increment block above flagged this exit's
7502                        // hit count crossing HOTEXIT_THRESHOLD; now
7503                        // that vm.stack is restored and frame.pc is
7504                        // settled, snapshot entry_tags from the
7505                        // resume frame's window and create the
7506                        // recorder. The recorder's first push fires
7507                        // on the next interp iteration at cont_pc.
7508                        //
7509                        // `head_proto` for the side trace = cl.proto
7510                        // (trace JIT only inlines self-recursive
7511                        // calls today, so cont_pc always lands in
7512                        // the same proto as the parent). Frame base
7513                        // is the resume frame (top of `self.frames`
7514                        // — inline-pushed frames moved this).
7515                        if side_trace_should_start {
7516                            let (resume_base, resume_proto) = match self.frames.last() {
7517                                Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7518                                _ => (base_us, cl.proto),
7519                            };
7520                            let resume_max_stack = resume_proto.max_stack as usize;
7521                            let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7522                            // Extend stack if cont_pc's frame window
7523                            // overhangs the current stack len (rare,
7524                            // but inline-pushed frame stack writes
7525                            // only covered the trace's writeback).
7526                            if self.stack.len() < resume_base + resume_max_stack {
7527                                self.stack.resize(
7528                                    resume_base + resume_max_stack,
7529                                    crate::runtime::Value::Nil,
7530                                );
7531                            }
7532                            for i in 0..resume_max_stack {
7533                                let (tag, _) = self.stack[resume_base + i].unpack();
7534                                side_entry_tags.push(tag);
7535                            }
7536                            self.jit.active_trace =
7537                                Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7538                                    resume_proto,
7539                                    cont_pc,
7540                                    side_entry_tags,
7541                                    cl.proto,
7542                                    head_pc_val,
7543                                    exit_hit_idx,
7544                                )));
7545                            self.jit.recording_frame_base = self.frames.len() - 1;
7546                            self.jit.counters.side_trace_started += 1;
7547                        }
7548                        // P13-S13-D — put the dispatch buffers back
7549                        // before the `continue;` so the next
7550                        // dispatch picks up the same allocation.
7551                        self.jit.reg_state_buf = reg_state;
7552                        self.jit.entry_tags_buf = entry_tags;
7553                        continue;
7554                    }
7555                }
7556                // P13-S13-D — !dispatch_ok / deopt path / non-cont
7557                // exit also restore the buffers before falling
7558                // through to the interp.
7559                self.jit.reg_state_buf = reg_state;
7560                self.jit.entry_tags_buf = entry_tags;
7561            }
7562
7563            // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7564            // hook code that consults `currentpc = savedpc - 1` lands on the
7565            // instruction now executing. luna mirrors that by advancing
7566            // `f.pc` to `pc + 1` before the hook block — local_at /
7567            // getinfo / line attribution all read f.pc, and the existing
7568            // `pc - 1` convention in those helpers then yields the current
7569            // instruction's pc (db.lua :696: local `A` visible at the
7570            // chunk's return line once OP_CLOSURE has advanced pc).
7571            //
7572            // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7573            // (cont frames drained above) so the and_then/Option layers are
7574            // dead weight.
7575            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7576            match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7577                CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7578                _ => unreachable!("Cont frame at pc bump"),
7579            }
7580
7581            // count + line hooks (PUC traceexec): before executing the
7582            // instruction. Skipped while the hook itself runs.
7583            // (Parens here are load-bearing — without them `&&` binds tighter
7584            // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7585            // letting a Lua line hook recurse into itself → stack overflow
7586            // on db.lua line-hook assertions. Matches the `hook_call_with` /
7587            // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7588            if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7589                let lines = &cl.proto.lines;
7590                let cur_line = if lines.is_empty() {
7591                    None
7592                } else {
7593                    Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7594                };
7595                // count hook: fire every `count_base` instructions
7596                if self.hook.count {
7597                    self.hook.count_left -= 1;
7598                    if self.hook.count_left <= 0 {
7599                        self.hook.count_left = self.hook.count_base;
7600                        // hooked function is the running Lua frame: its frame
7601                        // is on the stack, so no synthetic C level is needed.
7602                        self.run_hook(b"count", cur_line, false)?;
7603                    }
7604                }
7605                // line hook: fire on a fresh frame, a backward jump (loop), or a
7606                // change of source line.
7607                if self.hook.line {
7608                    if lines.is_empty() {
7609                        // PUC: a stripped chunk has no line info, so
7610                        // `getfuncline` returns -1. The line hook still fires
7611                        // on the first instruction of the new frame (where
7612                        // `npci <= oldpc` holds at oldpc=0), with the line
7613                        // pushed as `nil` instead of an integer (db.lua :1030
7614                        // "hook called without debug info for 1st instruction").
7615                        if oldpc == u32::MAX {
7616                            self.run_hook(b"line", None, false)?;
7617                            self.top_frame_mut().hook_oldpc = pc;
7618                        }
7619                    } else {
7620                        let newline = lines[(pc as usize).min(lines.len() - 1)];
7621                        // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7622                        // on a backward jump (`pc < oldpc` — strict; an equal pc
7623                        // would re-fire the install-site after `oldpc = pc`),
7624                        // or when the source line changes.
7625                        let fire = oldpc == u32::MAX
7626                            || pc < oldpc
7627                            || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7628                        if fire {
7629                            self.run_hook(b"line", Some(newline as i64), false)?;
7630                        }
7631                        self.top_frame_mut().hook_oldpc = pc;
7632                    }
7633                }
7634            }
7635
7636            match inst.op() {
7637                Op::Move => {
7638                    let v = self.r(base, inst.b());
7639                    self.set_r(base, inst.a(), v);
7640                }
7641                Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7642                Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7643                Op::LoadK => {
7644                    let v = cl.proto.consts[inst.bx() as usize];
7645                    self.set_r(base, inst.a(), v);
7646                }
7647                Op::LoadKx => {
7648                    let extra = cl.proto.code[self.pc_of_top() as usize];
7649                    self.bump_pc();
7650                    let v = cl.proto.consts[extra.ax() as usize];
7651                    self.set_r(base, inst.a(), v);
7652                }
7653                Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7654                Op::LFalseSkip => {
7655                    self.set_r(base, inst.a(), Value::Bool(false));
7656                    self.bump_pc();
7657                }
7658                Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7659                Op::LoadNil => {
7660                    let a = inst.a();
7661                    for i in 0..=inst.b() {
7662                        self.set_r(base, a + i, Value::Nil);
7663                    }
7664                }
7665                Op::GetUpval => {
7666                    let v = self.upval_get(cl, inst.b());
7667                    self.set_r(base, inst.a(), v);
7668                }
7669                Op::SetUpval => {
7670                    let v = self.r(base, inst.a());
7671                    self.upval_set(cl, inst.b(), v);
7672                }
7673                Op::GetTabUp => {
7674                    let t = self.upval_get(cl, inst.b());
7675                    let key = cl.proto.consts[inst.c() as usize];
7676                    self.op_index(t, key, base + inst.a())?;
7677                }
7678                Op::GetTable => {
7679                    let t = self.r(base, inst.b());
7680                    let key = self.r(base, inst.c());
7681                    self.op_index(t, key, base + inst.a())?;
7682                }
7683                Op::GetI => {
7684                    let t = self.r(base, inst.b());
7685                    self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7686                }
7687                Op::GetField => {
7688                    let t = self.r(base, inst.b());
7689                    let key = cl.proto.consts[inst.c() as usize];
7690                    // v1.2 D4 A1 — fast path: known-Str const key + no
7691                    // metatable on the table → skip `op_index` /
7692                    // `index_step`'s MAX_TAG_LOOP setup and the outer
7693                    // `Value` match. Falls through to the slow path
7694                    // unchanged when either invariant breaks (so
7695                    // `__index` metamethods, non-Table receivers, and
7696                    // non-Str keys behave exactly as before).
7697                    if let Value::Table(tb) = t
7698                        && tb.metatable().is_none()
7699                        && let Value::Str(s) = key
7700                    {
7701                        let v = tb.get_str(s);
7702                        self.stack[(base + inst.a()) as usize] = v;
7703                    } else {
7704                        self.op_index(t, key, base + inst.a())?;
7705                    }
7706                }
7707                Op::SetTabUp => {
7708                    let t = self.upval_get(cl, inst.a());
7709                    let key = cl.proto.consts[inst.b() as usize];
7710                    let v = self.r(base, inst.c());
7711                    self.op_newindex(t, key, v)?;
7712                }
7713                Op::SetTable => {
7714                    let t = self.r(base, inst.a());
7715                    let key = self.r(base, inst.b());
7716                    let v = self.r(base, inst.c());
7717                    self.op_newindex(t, key, v)?;
7718                }
7719                Op::SetI => {
7720                    let t = self.r(base, inst.a());
7721                    let v = self.r(base, inst.c());
7722                    self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7723                }
7724                Op::SetField => {
7725                    let t = self.r(base, inst.a());
7726                    let key = cl.proto.consts[inst.b() as usize];
7727                    let v = self.r(base, inst.c());
7728                    self.op_newindex(t, key, v)?;
7729                }
7730                Op::NewTable => {
7731                    let t = self.heap.new_table();
7732                    self.set_r(base, inst.a(), Value::Table(t));
7733                    self.maybe_collect_garbage(base + inst.a() + 1);
7734                }
7735                Op::SetList => {
7736                    let a = inst.a();
7737                    let abs_a = base + a;
7738                    let n = if inst.b() == 0 {
7739                        self.top - (abs_a + 1)
7740                    } else {
7741                        inst.b()
7742                    };
7743                    let offset = if inst.k() {
7744                        let extra = cl.proto.code[self.pc_of_top() as usize];
7745                        self.bump_pc();
7746                        extra.ax() as i64
7747                    } else {
7748                        inst.c() as i64
7749                    };
7750                    let Value::Table(t) = self.r(base, a) else {
7751                        unreachable!("SETLIST on non-table");
7752                    };
7753                    for i in 1..=n {
7754                        let v = self.r(base, a + i);
7755                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7756                        if let Err(TableError::Overflow) =
7757                            unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7758                        {
7759                            return Err(self.rt_err("table overflow"));
7760                        }
7761                    }
7762                    // one barrier_back covers every store this op did — PUC's
7763                    // `luaC_barrierback_` once-per-table optimisation
7764                    self.heap
7765                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7766                    // the element temps above the table are now consumed
7767                    self.maybe_collect_garbage(base + a + 1);
7768                }
7769                Op::SelfOp => {
7770                    let o = self.r(base, inst.b());
7771                    self.set_r(base, inst.a() + 1, o);
7772                    // PUC OP_SELF's C is a constant index when the k-flag is
7773                    // set; otherwise it points to a register that holds the
7774                    // (constant-loaded) key. luna's compiler falls back to the
7775                    // register form when the constant index exceeds OP_SELF's
7776                    // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7777                    // a table with 250+ string keys, where "findfield" lands
7778                    // past const #255). The exec must honour the same split.
7779                    let key = if inst.k() {
7780                        cl.proto.consts[inst.c() as usize]
7781                    } else {
7782                        self.r(base, inst.c())
7783                    };
7784                    self.op_index(o, key, base + inst.a())?;
7785                }
7786                Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7787                Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7788                Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7789                Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7790                Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7791                Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7792                Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7793                Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7794                Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7795                Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7796                Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7797                Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7798                Op::Unm => {
7799                    let v = self.r(base, inst.b());
7800                    match coerce_num(v) {
7801                        Some(Num::Int(i)) => {
7802                            self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7803                        }
7804                        Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7805                        None => {
7806                            let mm = self.get_mm(v, Mm::Unm);
7807                            if mm.is_nil() {
7808                                return Err(self.type_err("perform arithmetic on", v));
7809                            }
7810                            let dst = base + inst.a();
7811                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7812                        }
7813                    }
7814                }
7815                Op::BNot => {
7816                    let v = self.r(base, inst.b());
7817                    match coerce_num(v) {
7818                        Some(n) => {
7819                            let i = self.int_from_num(n)?;
7820                            self.set_r(base, inst.a(), Value::Int(!i));
7821                        }
7822                        None => {
7823                            let mm = self.get_mm(v, Mm::BNot);
7824                            if mm.is_nil() {
7825                                return Err(self.type_err("perform bitwise operation on", v));
7826                            }
7827                            let dst = base + inst.a();
7828                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7829                        }
7830                    }
7831                }
7832                Op::Not => {
7833                    let v = self.r(base, inst.b());
7834                    self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7835                }
7836                Op::Len => {
7837                    let v = self.r(base, inst.b());
7838                    match self.len_step(v)? {
7839                        MmOut::Done(r) => self.set_r(base, inst.a(), r),
7840                        MmOut::Mm { func, recv } => {
7841                            let dst = base + inst.a();
7842                            self.begin_meta_call(
7843                                func,
7844                                &[recv, recv],
7845                                MetaAction::Store { dst },
7846                                "len",
7847                            )?;
7848                        }
7849                        MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7850                    }
7851                }
7852                Op::Concat => {
7853                    // right-associative fold over operands at base+a .. base+a+n,
7854                    // in place on the stack so a yielding __concat can suspend.
7855                    let a = inst.a();
7856                    let n = inst.b();
7857                    self.top = base + a + n;
7858                    self.concat_run(base + a)?;
7859                }
7860                Op::Close => {
7861                    // Yieldable: drive __close handlers through the
7862                    // interpreter loop so a coroutine.yield() inside a
7863                    // handler suspends cleanly (locals.lua block-end yield).
7864                    // `drive_close` parks the handler call at `self.top`, so
7865                    // raise `top` past this frame's full register window
7866                    // first — a goto out of a nested for-loop can fire
7867                    // OP_Close while `self.top` still sits at the inner
7868                    // body's working top, which would let `push_frame`'s
7869                    // wipe clobber the outer tbc slot before it could be
7870                    // closed (locals.lua:1219 nested-for goto regression).
7871                    self.top = self.top.max(base + cl.proto.max_stack as u32);
7872                    let _ =
7873                        self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7874                }
7875                Op::Tbc => {
7876                    self.register_tbc(base + inst.a())?;
7877                }
7878                Op::Jmp => {
7879                    let off = inst.sj();
7880                    // P12-S1.B — trace JIT back-edge counter. A negative
7881                    // jump offset is a loop back-edge (the only canonical
7882                    // backward jumps the compiler emits — `while`, `for`,
7883                    // `repeat`). Tick the per-Proto counter and, once it
7884                    // exceeds the threshold, log a stub promotion that
7885                    // S1.C will turn into actual trace recording. The
7886                    // whole block is gated on `trace_jit_enabled` so
7887                    // existing benches see one branch-not-taken and no
7888                    // counter writes.
7889                    if self.jit.trace_enabled && off < 0 {
7890                        let proto = cl.proto;
7891                        let c = proto.trace_hot_count.get();
7892                        if c < u32::MAX / 2 {
7893                            proto.trace_hot_count.set(c + 1);
7894                        }
7895                        // P13-S13-H — relaxed back-edge trigger:
7896                        // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7897                        // a missed crossing (active_trace busy with
7898                        // a call-trigger, or the recorder slot
7899                        // happened to be in use) doesn't permanently
7900                        // lock this back-edge target out. The
7901                        // `already_cached` short-circuit prevents
7902                        // duplicate recordings: once a trace is
7903                        // cached for this target, subsequent
7904                        // crossings skip the start. This pairs with
7905                        // S13-H's discard-on-partial-coverage close
7906                        // handling — when a short call-trigger is
7907                        // discarded, the back-edge can still find an
7908                        // open slot at the next iteration.
7909                        let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7910                        // P13-S13-K — gave-up short-circuit. Skip
7911                        // the RefCell borrow + scan when the
7912                        // S13-I cap force-compiled a partial
7913                        // trace on this Proto.
7914                        let back_edge_already_cached = if proto.trace_gave_up.get() {
7915                            true
7916                        } else {
7917                            proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7918                        };
7919                        if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7920                            && self.jit.active_trace.is_none()
7921                            && !back_edge_already_cached
7922                        {
7923                            // Back-edge target = pc after `add_pc(off)`,
7924                            // i.e. current `pc + 1 + off` (the dispatch
7925                            // loop has already advanced f.pc to pc+1).
7926                            let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7927                            // Snapshot per-slot Value tag at trace
7928                            // entry so the lowerer's kind tracker
7929                            // knows which arith path to lower
7930                            // (iadd vs fadd, etc.).
7931                            let max_stack = cl.proto.max_stack as usize;
7932                            let base_us = base as usize;
7933                            let mut entry_tags = Vec::with_capacity(max_stack);
7934                            for i in 0..max_stack {
7935                                let (tag, _) = self.stack[base_us + i].unpack();
7936                                entry_tags.push(tag);
7937                            }
7938                            self.jit.active_trace =
7939                                Some(Box::new(crate::jit::trace::TraceRecord::start(
7940                                    cl.proto, target, entry_tags, false,
7941                                )));
7942                            // P12-S4 — record the frame the trace
7943                            // started in. `self.frames.len() - 1`
7944                            // since we're inside the currently-running
7945                            // Lua frame's dispatch.
7946                            self.jit.recording_frame_base = self.frames.len() - 1;
7947                        }
7948                    }
7949                    self.add_pc(off);
7950                }
7951                Op::Eq => {
7952                    let l = self.r(base, inst.a());
7953                    let r = self.r(base, inst.b());
7954                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7955                        if (a == b) != inst.k() {
7956                            self.bump_pc();
7957                        }
7958                    } else {
7959                        let step = self.eq_step(l, r);
7960                        self.op_compare(step, l, r, inst.k(), "eq")?;
7961                    }
7962                }
7963                Op::EqK => {
7964                    let l = self.r(base, inst.a());
7965                    let r = cl.proto.consts[inst.b() as usize];
7966                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7967                        if (a == b) != inst.k() {
7968                            self.bump_pc();
7969                        }
7970                    } else {
7971                        let step = self.eq_step(l, r);
7972                        self.op_compare(step, l, r, inst.k(), "eq")?;
7973                    }
7974                }
7975                Op::Lt => {
7976                    let l = self.r(base, inst.a());
7977                    let r = self.r(base, inst.b());
7978                    // hot path: Int < Int — drops the MmOut + op_compare match
7979                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7980                        if (a < b) != inst.k() {
7981                            self.bump_pc();
7982                        }
7983                    } else {
7984                        let step = self.less_step(l, r, false)?;
7985                        self.op_compare(step, l, r, inst.k(), "lt")?;
7986                    }
7987                }
7988                Op::Le => {
7989                    let l = self.r(base, inst.a());
7990                    let r = self.r(base, inst.b());
7991                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7992                        if (a <= b) != inst.k() {
7993                            self.bump_pc();
7994                        }
7995                    } else {
7996                        let step = self.less_step(l, r, true)?;
7997                        self.op_compare(step, l, r, inst.k(), "le")?;
7998                    }
7999                }
8000                Op::Test => {
8001                    let cond = self.r(base, inst.a()).truthy();
8002                    self.cond_skip(cond, inst.k());
8003                }
8004                Op::TestSet => {
8005                    let v = self.r(base, inst.b());
8006                    if v.truthy() == inst.k() {
8007                        self.set_r(base, inst.a(), v);
8008                    } else {
8009                        self.bump_pc();
8010                    }
8011                }
8012                Op::Call => {
8013                    let abs = base + inst.a();
8014                    let nargs = if inst.b() == 0 {
8015                        None
8016                    } else {
8017                        Some(inst.b() - 1)
8018                    };
8019                    let wanted = inst.c() as i32 - 1;
8020                    self.begin_call(abs, nargs, wanted, false)?;
8021                }
8022                Op::TailCall => {
8023                    let fr = *self.top_frame();
8024                    let abs = base + inst.a();
8025                    let mut nargs = if inst.b() == 0 {
8026                        self.top - (abs + 1)
8027                    } else {
8028                        inst.b() - 1
8029                    };
8030                    // A tail call pops this frame before begin_call, so a
8031                    // non-callable target would lose its name/position. Report
8032                    // it now (PUC reads funcname from the still-current ci),
8033                    // while the frame is intact, for "(field 'x')"-style info.
8034                    let mut func = self.stack[abs as usize];
8035                    if !matches!(func, Value::Closure(_) | Value::Native(_))
8036                        && self.get_mm(func, Mm::Call).is_nil()
8037                    {
8038                        return Err(self.call_err(func));
8039                    }
8040                    // PUC `luaD_pretailcall` resolves a chain of `__call`
8041                    // metamethods *in place* before deciding whether to
8042                    // collapse this frame. Without that, each __call hop
8043                    // would push a fresh Lua frame and a 10000-deep
8044                    // tail-recursion through a 100-deep __call chain
8045                    // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
8046                    // shift args right, install the handler at `abs`, retry.
8047                    // Chain depth limit matches the call-site `begin_call`
8048                    // version cap (5.5 calls.lua :223 — 15 max, then "too
8049                    // long"; 16th wrap fails the call). An infinite
8050                    // self-referential `__call` would otherwise spin.
8051                    let chain_cap = if self.version >= LuaVersion::Lua55 {
8052                        15
8053                    } else {
8054                        MAX_CCMT
8055                    };
8056                    let mut chain = 0u32;
8057                    while !matches!(func, Value::Closure(_) | Value::Native(_)) {
8058                        let mm = self.get_mm(func, Mm::Call);
8059                        if mm.is_nil() {
8060                            return Err(self.call_err(func));
8061                        }
8062                        chain += 1;
8063                        if chain > chain_cap {
8064                            return Err(self.rt_err("'__call' chain too long"));
8065                        }
8066                        let end = (abs + 1 + nargs) as usize;
8067                        if self.stack.len() < end + 1 {
8068                            self.stack.resize(end + 1, Value::Nil);
8069                        }
8070                        for i in (0..=nargs).rev() {
8071                            self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
8072                        }
8073                        self.stack[abs as usize] = mm;
8074                        nargs += 1;
8075                        self.top = abs + 1 + nargs;
8076                        func = mm;
8077                    }
8078                    // PUC's tail-call collapse is Lua→Lua only. A tail call to
8079                    // a C function runs the C function under the *current* Lua
8080                    // activation (no frame fold — a C frame has nothing to
8081                    // collapse into); after the C function returns, the
8082                    // calling Lua function returns those results normally.
8083                    // Mirror that: keep our Lua frame on the stack, call the
8084                    // target through `begin_call(abs, …)` as a regular call,
8085                    // and let the fallback `Op::Return` that the compiler
8086                    // emits right after `Op::TailCall` forward the results.
8087                    // 5.1 closure.lua :177's `return getfenv()` from inside
8088                    // foo needs level 1 to resolve to foo, not to the
8089                    // thread's globals fallback that happens when no Lua
8090                    // frame is on the stack.
8091                    let lua_target = matches!(func, Value::Closure(_));
8092                    if lua_target {
8093                        self.close_slots(fr.base, None)?;
8094                        for i in 0..=nargs {
8095                            self.stack[(fr.func_slot + i) as usize] =
8096                                self.stack[(abs + i) as usize];
8097                        }
8098                        // v2.5 P1B-2A: clear the slot range that's now
8099                        // stranded by the tail-call collapse. The args
8100                        // were copied to `[fr.func_slot..fr.func_slot+
8101                        // nargs+1)`; the source slots `[abs..abs+
8102                        // nargs+1)` still hold the same `Value::Closure
8103                        // / Value::Str / ...` entries, but they're past
8104                        // the new call's window. Without this clear, a
8105                        // later GC with wider gc_top would mark stale
8106                        // pointers there (same UAF-A family the v2.3
8107                        // finish_results slot-clear closed for the
8108                        // Op::Return path).
8109                        let new_top_lower_bound = fr.func_slot + nargs + 1;
8110                        let prev_top = (self.top as usize).min(self.stack.len());
8111                        if (new_top_lower_bound as usize) < prev_top {
8112                            for slot in &mut self.stack[new_top_lower_bound as usize..prev_top] {
8113                                *slot = Value::Nil;
8114                            }
8115                        }
8116                        // PUC `CIST_TAIL`: the new Lua activation inherits
8117                        // the popped frame's tailcalls count plus one for
8118                        // this collapse. 5.1 db.lua :372 hammers 30000
8119                        // recursive tail calls and expects to see the
8120                        // synthetic tail level for every one of them.
8121                        self.pending_tailcalls = fr.tailcalls.saturating_add(1);
8122                        frames_pop_sync(&mut self.frames, &mut self.frames_top);
8123                        if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
8124                            && self.frames.len() < entry_depth
8125                        {
8126                            // a native completed what was this function's result
8127                            return Ok(self.take_results(fr.func_slot));
8128                        }
8129                    } else {
8130                        // Native (or __call-bearing) target: regular call. The
8131                        // results land at `abs..self.top` and the next op (the
8132                        // fallback `Op::Return`) forwards them. `wanted = -1`
8133                        // because the caller will multret them through Return.
8134                        self.begin_call(abs, Some(nargs), -1, false)?;
8135                    }
8136                }
8137                Op::Return | Op::Return0 | Op::Return1 => {
8138                    let (abs_a, nret) = match inst.op() {
8139                        Op::Return0 => (base, 0),
8140                        Op::Return1 => (base + inst.a(), 1),
8141                        _ => {
8142                            let abs_a = base + inst.a();
8143                            let nret = if inst.b() == 0 {
8144                                self.top - abs_a
8145                            } else {
8146                                inst.b() - 1
8147                            };
8148                            (abs_a, nret)
8149                        }
8150                    };
8151                    // close before moving results: __close handlers run above
8152                    // the stack top, so the result region [abs_a..abs_a+nret)
8153                    // stays intact across any yields the close performs.
8154                    // Fixed-count returns may leave `self.top` below the last
8155                    // result slot (the compiler does not always re-bump it);
8156                    // raise it past the result region so `drive_close` parks
8157                    // the handler call *above* — landing at `self.top` would
8158                    // otherwise clobber a result with the handler closure.
8159                    self.top = self.top.max(abs_a + nret);
8160                    if let Some(vals) = self.begin_close(
8161                        base,
8162                        None,
8163                        AfterClose::Return {
8164                            abs_a,
8165                            nret,
8166                            from_native: false,
8167                        },
8168                        entry_depth,
8169                    )? {
8170                        return Ok(vals);
8171                    }
8172                }
8173                Op::ForPrep => self.for_prep(inst, base)?,
8174                Op::ForLoop => {
8175                    // P12 — trace JIT back-edge counter on the
8176                    // numeric-for back-edge. ForLoop is always at
8177                    // a back-edge position (when it continues);
8178                    // for the trace recorder we treat it as the
8179                    // close-detection equivalent of `Op::Jmp` with
8180                    // negative offset. Counter only ticks when the
8181                    // back-edge will actually fire (count > 0 in
8182                    // the 5.4+ Int form, comparable predicates in
8183                    // pre-5.3 / Float). The cheap check up front
8184                    // matches the for_loop helper's branch.
8185                    if self.jit.trace_enabled {
8186                        let a = inst.a();
8187                        let pre53 = self.version() <= LuaVersion::Lua53;
8188                        let take_back_edge =
8189                            match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
8190                                (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
8191                                    count > 0
8192                                }
8193                                (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
8194                                    let next = cur.wrapping_add(st);
8195                                    if st > 0 { next <= lim } else { next >= lim }
8196                                }
8197                                (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
8198                                    let next = cur + st;
8199                                    if st > 0.0 { next <= lim } else { next >= lim }
8200                                }
8201                                _ => false,
8202                            };
8203                        if take_back_edge {
8204                            let proto = cl.proto;
8205                            let c = proto.trace_hot_count.get();
8206                            if c < u32::MAX / 2 {
8207                                proto.trace_hot_count.set(c + 1);
8208                            }
8209                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8210                                && self.jit.active_trace.is_none()
8211                            {
8212                                // ForLoop's back-edge target = pc
8213                                // after `add_pc(-bx)` runs from the
8214                                // already-bumped f.pc (= pc + 1).
8215                                // So target = (pc + 1) - bx.
8216                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8217                                let max_stack = cl.proto.max_stack as usize;
8218                                let base_us = base as usize;
8219                                let mut entry_tags = Vec::with_capacity(max_stack);
8220                                for i in 0..max_stack {
8221                                    let (tag, _) = self.stack[base_us + i].unpack();
8222                                    entry_tags.push(tag);
8223                                }
8224                                self.jit.active_trace =
8225                                    Some(Box::new(crate::jit::trace::TraceRecord::start(
8226                                        cl.proto, target, entry_tags, false,
8227                                    )));
8228                                // P12-S4 — record the frame the trace
8229                                // started in. The currently-running
8230                                // Lua frame is at len() - 1.
8231                                self.jit.recording_frame_base = self.frames.len() - 1;
8232                            }
8233                        }
8234                    }
8235                    self.for_loop(inst, base);
8236                }
8237                Op::TForPrep => {
8238                    // the 4th control slot is the iterator's closing value
8239                    self.register_tbc(base + inst.a() + 3)?;
8240                    self.add_pc(inst.bx() as i32);
8241                }
8242                Op::TForCall => {
8243                    let abs = base + inst.a();
8244                    let need = (abs + 7) as usize;
8245                    if self.stack.len() < need {
8246                        self.stack.resize(need, Value::Nil);
8247                    }
8248                    self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8249                    self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8250                    self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8251                    let nvars = inst.c() as i32;
8252                    self.begin_call(abs + 4, Some(2), nvars, false)?;
8253                }
8254                Op::TForLoop => {
8255                    let a = inst.a();
8256                    let ctrl = self.r(base, a + 4);
8257                    if !ctrl.is_nil() {
8258                        // P12-S12-B v1 — trace JIT back-edge counter on
8259                        // generic-for back-edge. TForLoop sits at the
8260                        // tail of `for k,v in expr do ... end`; recorder
8261                        // treats it as the close-detection equivalent of
8262                        // a negative Op::Jmp. Gate on `take_back_edge`
8263                        // (= `ctrl != nil`) so empty-iter loops don't
8264                        // pollute hot_count. v1 only adds the trigger;
8265                        // whitelist + helper + emit live in v2.
8266                        if self.jit.trace_enabled {
8267                            let proto = cl.proto;
8268                            let c = proto.trace_hot_count.get();
8269                            if c < u32::MAX / 2 {
8270                                proto.trace_hot_count.set(c + 1);
8271                            }
8272                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8273                                && self.jit.active_trace.is_none()
8274                            {
8275                                // TForLoop back-edge target = pc after
8276                                // `add_pc(-bx)` runs from the already-
8277                                // bumped f.pc (= pc + 1). So target =
8278                                // (pc + 1) - bx, normally landing on
8279                                // body_top (the op right after TForPrep).
8280                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8281                                let max_stack = cl.proto.max_stack as usize;
8282                                let base_us = base as usize;
8283                                let mut entry_tags = Vec::with_capacity(max_stack);
8284                                for i in 0..max_stack {
8285                                    let (tag, _) = self.stack[base_us + i].unpack();
8286                                    entry_tags.push(tag);
8287                                }
8288                                // P12-S12-B-v5 — snapshot the iter
8289                                // fn's address if Native, so the
8290                                // lowerer can specialise ipairs into
8291                                // inline Table aget IR.
8292                                let iter_ptr =
8293                                    if let Value::Native(n) = self.stack[base_us + a as usize] {
8294                                        Some(n.f as usize)
8295                                    } else {
8296                                        None
8297                                    };
8298                                // P12-S12-C v3 — snapshot R[A+5]'s
8299                                // tag (= current iter's val from
8300                                // the just-fired TForCall). The v5
8301                                // inline aget fast_blk emits a
8302                                // runtime guard against this tag;
8303                                // mixed-tag arrays deopt rather
8304                                // than producing garbage pointers
8305                                // through the v2 spill path.
8306                                let val_slot = base_us + (a as usize) + 5;
8307                                let val_tag = if val_slot < self.stack.len() {
8308                                    Some(self.stack[val_slot].unpack().0)
8309                                } else {
8310                                    None
8311                                };
8312                                let mut rec = crate::jit::trace::TraceRecord::start(
8313                                    cl.proto, target, entry_tags, false,
8314                                );
8315                                rec.tfor_iter_ptr = iter_ptr;
8316                                rec.tfor_val_tag = val_tag;
8317                                self.jit.active_trace = Some(Box::new(rec));
8318                                self.jit.recording_frame_base = self.frames.len() - 1;
8319                            }
8320                        }
8321                        self.set_r(base, a + 2, ctrl);
8322                        self.add_pc(-(inst.bx() as i32));
8323                    }
8324                }
8325                Op::Closure => {
8326                    let proto = cl.proto.protos[inst.bx() as usize];
8327                    let n_ups = proto.upvals.len();
8328                    // P11-S5d.M — build upvals on the stack for small
8329                    // closures, skipping the per-call Vec/Box alloc
8330                    // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8331                    // = 2 covers most Lua source (1 captured local, or
8332                    // _ENV + a single capture). Beyond that, fall back
8333                    // to a heap Vec.
8334                    use crate::runtime::function::INLINE_UPVALS_N;
8335                    let mut stack_buf: [std::mem::MaybeUninit<
8336                        Gc<crate::runtime::function::Upvalue>,
8337                    >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8338                    let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8339                    let use_inline = n_ups <= INLINE_UPVALS_N;
8340                    if !use_inline {
8341                        heap_buf.reserve_exact(n_ups);
8342                    }
8343                    for (i, d) in proto.upvals.iter().enumerate() {
8344                        let uv = if d.in_stack {
8345                            self.find_or_create_upval(base + d.index as u32)
8346                        } else {
8347                            cl.upvals()[d.index as usize]
8348                        };
8349                        if use_inline {
8350                            stack_buf[i] = std::mem::MaybeUninit::new(uv);
8351                        } else {
8352                            heap_buf.push(uv);
8353                        }
8354                    }
8355                    // Tiny shim around the two paths so the 5.1 _ENV
8356                    // clone + cache check below see one uniform
8357                    // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8358                    // into the local frame (still valid through the
8359                    // rest of this Op::Closure handler).
8360                    let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8361                        // SAFETY: the first n_ups slots of stack_buf
8362                        // were initialised above; we hand out a slice
8363                        // covering exactly them.
8364                        unsafe {
8365                            std::slice::from_raw_parts_mut(
8366                                stack_buf.as_mut_ptr()
8367                                    as *mut Gc<crate::runtime::function::Upvalue>,
8368                                n_ups,
8369                            )
8370                        }
8371                    } else {
8372                        &mut heap_buf[..]
8373                    };
8374                    // PUC 5.1 had per-function environments: every Lua
8375                    // function carried its own `env` slot, snapshotted from
8376                    // the creating function's env at closure time, so a
8377                    // `setfenv` on one closure never bled into a sibling.
8378                    // luna models that by giving the 5.1 closure a *fresh*
8379                    // closed upvalue for whichever cell holds `_ENV`, seeded
8380                    // from the parent's current env value. Only that cell is
8381                    // cloned — every other upvalue keeps its open/shared
8382                    // identity (so e.g. `local function range(...) ...
8383                    // range(...) ... end` still sees its self-reference). 5.2+
8384                    // keeps the shared-upval model (and the proto cache that
8385                    // depends on it).
8386                    let v51 = self.version() <= LuaVersion::Lua51;
8387                    if v51 && proto.env_upval_idx != u8::MAX {
8388                        let i = proto.env_upval_idx as usize;
8389                        let cur = match ups[i].state() {
8390                            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8391                            UpvalState::Closed(v) => v,
8392                        };
8393                        ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8394                    }
8395                    let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8396                    // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8397                    // and reuses it when every fresh-upvalue binding still
8398                    // points to the same Upvalue object as the cached one.
8399                    // That keeps `function() return outer end` repeated in a
8400                    // loop comparing equal across iterations (the captured
8401                    // outer is a shared open upvalue), while `function()
8402                    // return loop_var end` gets a fresh closure each round
8403                    // because the loop var is re-created per iteration. PUC
8404                    // 5.1 predated the cache, and the per-closure `_ENV`
8405                    // clone above would defeat it anyway, so skip it.
8406                    let nc = if v51 {
8407                        self.heap.new_closure_inline(proto, ups_slice)
8408                    } else {
8409                        let cached = proto.cache.get().filter(|c| {
8410                            c.upvals().len() == ups_slice.len()
8411                                && c.upvals()
8412                                    .iter()
8413                                    .zip(ups_slice.iter())
8414                                    .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8415                        });
8416                        match cached {
8417                            Some(c) => c,
8418                            None => {
8419                                let n = self.heap.new_closure_inline(proto, ups_slice);
8420                                proto.cache.set(Some(n));
8421                                n
8422                            }
8423                        }
8424                    };
8425                    self.set_r(base, inst.a(), Value::Closure(nc));
8426                    self.maybe_collect_garbage(base + inst.a() + 1);
8427                }
8428                Op::Vararg => {
8429                    let abs_a = base + inst.a();
8430                    let wanted = inst.c() as i32 - 1;
8431                    // A materialized named vararg lives in func_slot (its writes
8432                    // must be visible to `...`); otherwise spread the extra args
8433                    // straight off the stack at func_slot+1 .. +n_varargs.
8434                    let vt = match self.stack[func_slot as usize] {
8435                        Value::Table(t) => Some(t),
8436                        _ => None,
8437                    };
8438                    let n = match vt {
8439                        Some(t) => {
8440                            let n_key = Value::Str(self.heap.intern(b"n"));
8441                            // PUC getnumargs: a named vararg `t.n` set out of the
8442                            // integer range [0, INT_MAX/2] is rejected here
8443                            match t.get(n_key) {
8444                                Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8445                                _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8446                            }
8447                        }
8448                        None => n_varargs,
8449                    };
8450                    let count = if wanted < 0 { n } else { wanted as u32 };
8451                    let need = (abs_a + count) as usize;
8452                    if self.stack.len() < need {
8453                        self.stack.resize(need, Value::Nil);
8454                    }
8455                    for i in 0..count {
8456                        let v = if i >= n {
8457                            Value::Nil
8458                        } else if let Some(t) = vt {
8459                            t.get_int(i as i64 + 1)
8460                        } else {
8461                            self.stack[(func_slot + 1 + i) as usize]
8462                        };
8463                        self.stack[(abs_a + i) as usize] = v;
8464                    }
8465                    if wanted < 0 {
8466                        self.top = abs_a + count;
8467                    }
8468                }
8469                Op::GetVarg => {
8470                    // materialize the vararg table (PUC table.pack shape) from the
8471                    // stack varargs — used when the named vararg is written /
8472                    // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8473                    // sees later writes) and in the local register R[A].
8474                    let n = n_varargs;
8475                    let t = self.heap.new_table();
8476                    {
8477                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8478                        let tm = unsafe { t.as_mut() };
8479                        for i in 0..n {
8480                            let _ = tm.set_int(
8481                                &mut self.heap,
8482                                i as i64 + 1,
8483                                self.stack[(func_slot + 1 + i) as usize],
8484                            );
8485                        }
8486                    }
8487                    let n_key = Value::Str(self.heap.intern(b"n"));
8488                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8489                    unsafe { t.as_mut() }
8490                        .set(&mut self.heap, n_key, Value::Int(n as i64))
8491                        .expect("'n' is a valid key");
8492                    // once-per-table barrier (mirror SETLIST): t is born BLACK
8493                    // during Propagate; the bulk inserts above don't barrier.
8494                    self.heap
8495                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8496                    self.stack[func_slot as usize] = Value::Table(t);
8497                    self.set_r(base, inst.a(), Value::Table(t));
8498                }
8499                Op::VargIdx => {
8500                    // R[A] := vararg[R[C]] without allocating: integer key in
8501                    // [1,n] → that vararg, "n" → the count, else nil.
8502                    let key = self.r(base, inst.c());
8503                    let n = n_varargs;
8504                    let v = match key {
8505                        Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8506                            self.stack[(func_slot + k as u32) as usize]
8507                        }
8508                        Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8509                            self.stack[(func_slot + f as u32) as usize]
8510                        }
8511                        Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8512                        _ => Value::Nil,
8513                    };
8514                    self.set_r(base, inst.a(), v);
8515                }
8516                Op::ErrNNil => {
8517                    let v = self.r(base, inst.a());
8518                    if !matches!(v, Value::Nil) {
8519                        let bx = inst.bx();
8520                        let name = if bx == 0 {
8521                            "?".to_string()
8522                        } else {
8523                            match cl.proto.consts[(bx - 1) as usize] {
8524                                Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8525                                _ => "?".to_string(),
8526                            }
8527                        };
8528                        return Err(self.rt_err(&format!("global '{name}' already defined")));
8529                    }
8530                }
8531                Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8532            }
8533        }
8534    }
8535
8536    #[inline(always)]
8537    fn pc_of_top(&self) -> u32 {
8538        self.top_frame().pc
8539    }
8540
8541    #[inline(always)]
8542    fn bump_pc(&mut self) {
8543        // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8544        // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8545        // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8546        // up over `fib_28`'s ~500k jumps.
8547        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8548        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8549            CallFrame::Lua(f) => f.pc += 1,
8550            _ => unreachable!("Cont frame at bump_pc"),
8551        }
8552    }
8553
8554    #[inline(always)]
8555    fn add_pc(&mut self, d: i32) {
8556        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8557        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8558            CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8559            _ => unreachable!("Cont frame at add_pc"),
8560        }
8561    }
8562
8563    /// PUC conditional-skip convention: the JMP that follows is executed when
8564    /// `cond == k`; otherwise it is skipped.
8565    #[inline(always)]
8566    fn cond_skip(&mut self, cond: bool, k: bool) {
8567        if cond != k {
8568            self.bump_pc();
8569        }
8570    }
8571
8572    // ---- indexing (with __index/__newindex chains) ----
8573
8574    /// The `#` length operation: string byte length, `__len` if present, else
8575    /// the raw table border. Returns the raw length value (may be non-integer
8576    /// when `__len` is exotic).
8577    pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8578        match self.len_step(v)? {
8579            MmOut::Done(n) => Ok(n),
8580            // PUC calls unary metamethods with the operand twice
8581            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8582            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8583        }
8584    }
8585
8586    /// Length fast path: a string's byte count or a table's raw border when no
8587    /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8588    /// called with the operand twice. Errors for a non-table with no `__len`.
8589    fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8590        match v {
8591            Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8592            Value::Table(t) => {
8593                let mm = self.get_mm(v, Mm::Len);
8594                if mm.is_nil() {
8595                    Ok(MmOut::Done(Value::Int(t.len())))
8596                } else {
8597                    Ok(MmOut::Mm { func: mm, recv: v })
8598                }
8599            }
8600            _ => {
8601                let mm = self.get_mm(v, Mm::Len);
8602                if mm.is_nil() {
8603                    Err(self.type_err("get length of", v))
8604                } else {
8605                    Ok(MmOut::Mm { func: mm, recv: v })
8606                }
8607            }
8608        }
8609    }
8610
8611    /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8612    /// value with no integer representation.
8613    pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8614        match self.len_value(v)? {
8615            Value::Int(i) => Ok(i),
8616            Value::Float(f) => crate::runtime::value::f2i_exact(f)
8617                .ok_or_else(|| self.rt_err("object length is not an integer")),
8618            _ => Err(self.rt_err("object length is not an integer")),
8619        }
8620    }
8621
8622    pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8623        match self.index_step(t, key)? {
8624            MmOut::Done(v) => Ok(v),
8625            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8626            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8627        }
8628    }
8629
8630    /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8631    /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8632    /// are followed inline (no yield possible); only a function link can yield.
8633    fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8634        let mut cur = t;
8635        for _ in 0..MAX_TAG_LOOP {
8636            let mm = match cur {
8637                Value::Table(tb) => {
8638                    let v = tb.get(key);
8639                    if !v.is_nil() {
8640                        return Ok(MmOut::Done(v));
8641                    }
8642                    let mm = self.get_mm(cur, Mm::Index);
8643                    if mm.is_nil() {
8644                        return Ok(MmOut::Done(Value::Nil));
8645                    }
8646                    mm
8647                }
8648                v => {
8649                    let mm = self.get_mm(v, Mm::Index);
8650                    if mm.is_nil() {
8651                        return Err(self.type_err("index", v));
8652                    }
8653                    mm
8654                }
8655            };
8656            match mm {
8657                Value::Closure(_) | Value::Native(_) => {
8658                    return Ok(MmOut::Mm {
8659                        func: mm,
8660                        recv: cur,
8661                    });
8662                }
8663                next => cur = next,
8664            }
8665        }
8666        Err(self.rt_err("'__index' chain too long; possible loop"))
8667    }
8668
8669    pub(crate) fn newindex_value(
8670        &mut self,
8671        t: Value,
8672        key: Value,
8673        v: Value,
8674    ) -> Result<(), LuaError> {
8675        match self.newindex_step(t, key, v)? {
8676            MmOut::Done(_) => Ok(()),
8677            MmOut::Mm { func, recv } => {
8678                self.call_value(func, &[recv, key, v])?;
8679                Ok(())
8680            }
8681            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8682        }
8683    }
8684
8685    /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8686    /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8687    /// needs an actual call — which the caller may run yieldably.
8688    fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8689        // v2.13 WUC read-time probe (gc-verify): a dead query key at a
8690        // WRITE site, attributed to the instruction that produced it.
8691        #[cfg(feature = "gc-verify")]
8692        if let Some(p) = (match key {
8693            Value::Str(s) => Some(s.as_ptr() as usize),
8694            Value::Table(t2) => Some(t2.as_ptr() as usize),
8695            _ => None,
8696        }) {
8697            if crate::runtime::gc_verify_probe::is_freed(p) {
8698                let detail = match self.frames.last() {
8699                    Some(CallFrame::Lua(f)) => {
8700                        let pc = f.pc as usize;
8701                        let mut w = String::new();
8702                        for q in pc.saturating_sub(6)..(pc + 2) {
8703                            if let Some(inst) = f.closure.proto.code.get(q) {
8704                                w.push_str(&format!(
8705                                    "\n  [{q}] {:?} a={} b={} c={} k={}",
8706                                    inst.op(),
8707                                    inst.a(),
8708                                    inst.b(),
8709                                    inst.c(),
8710                                    inst.k()
8711                                ));
8712                            }
8713                        }
8714                        format!("pc={pc} base={} gc_top={} window:{w}", f.base, self.gc_top)
8715                    }
8716                    _ => "non-Lua frame".into(),
8717                };
8718                panic!("[gc-verify] newindex_step QUERY key {p:#x} freed. {detail}");
8719            }
8720        }
8721        let mut cur = t;
8722        for _ in 0..MAX_TAG_LOOP {
8723            let mm = match cur {
8724                Value::Table(tb) => {
8725                    // PI-A3 single-walk collapse — Table::try_set_existing
8726                    // fuses the prior `tb.get(key).is_nil()` gate and
8727                    // `raw_set` walk into one chain traversal when the
8728                    // key is already present with a non-nil value. The
8729                    // __newindex chain semantics are preserved by the
8730                    // identity (slot_nil ⇔ fire_newindex); see
8731                    // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8732                    //
8733                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8734                    // heap is single-threaded and the pointer is live as
8735                    // long as it is reachable from active roots (see
8736                    // heap.rs:5-7). Mirrors the raw_set wrapper below.
8737                    if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8738                        self.heap
8739                            .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8740                        return Ok(MmOut::Done(Value::Nil));
8741                    }
8742                    let mm = self.get_mm(cur, Mm::NewIndex);
8743                    if mm.is_nil() {
8744                        self.raw_set(tb, key, v)?;
8745                        return Ok(MmOut::Done(Value::Nil));
8746                    }
8747                    mm
8748                }
8749                bad => {
8750                    let mm = self.get_mm(bad, Mm::NewIndex);
8751                    if mm.is_nil() {
8752                        return Err(self.type_err("index", bad));
8753                    }
8754                    mm
8755                }
8756            };
8757            match mm {
8758                Value::Closure(_) | Value::Native(_) => {
8759                    return Ok(MmOut::Mm {
8760                        func: mm,
8761                        recv: cur,
8762                    });
8763                }
8764                next => cur = next,
8765            }
8766        }
8767        Err(self.rt_err("'__newindex' chain too long; possible loop"))
8768    }
8769
8770    fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8771        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8772        match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8773            Ok(()) => {
8774                self.heap
8775                    .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8776                Ok(())
8777            }
8778            Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8779            Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8780            Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8781            Err(TableError::InvalidNext) => unreachable!(),
8782        }
8783    }
8784
8785    /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8786    /// the boolean result; `Mm` (when raw equality fails and both are tables
8787    /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8788    fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8789        if l.raw_eq(r) {
8790            return MmOut::Done(Value::Bool(true));
8791        }
8792        if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8793            (l, r)
8794        {
8795            // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8796            // (and earlier) required the two operands' metatables to expose a
8797            // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8798            // metatable falls straight back to raw inequality. events.lua 5.1
8799            // :262 bakes this in.
8800            let mm = if self.version() <= LuaVersion::Lua51 {
8801                self.get_comp_mm(l, r, Mm::Eq)
8802            } else {
8803                let mut m = self.get_mm(l, Mm::Eq);
8804                if m.is_nil() {
8805                    m = self.get_mm(r, Mm::Eq);
8806                }
8807                m
8808            };
8809            if !mm.is_nil() {
8810                return MmOut::Mm { func: mm, recv: l };
8811            }
8812        }
8813        MmOut::Done(Value::Bool(false))
8814    }
8815
8816    // ---- arithmetic ----
8817
8818    #[inline(always)]
8819    fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8820        let l = self.r(base, inst.b());
8821        let r = self.r(base, inst.c());
8822        // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8823        // binary_trees all hammer these. Skipping coerce_num + the big
8824        // arith_fast match shaves several conditional moves per op.
8825        if let (Value::Int(a), Value::Int(b)) = (l, r) {
8826            let fast = match op {
8827                ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8828                ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8829                ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8830                _ => None,
8831            };
8832            if let Some(v) = fast {
8833                self.set_r(base, inst.a(), v);
8834                return Ok(());
8835            }
8836        }
8837        // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8838        // and any numeric workload with non-integer accumulators benefits.
8839        if let (Value::Float(a), Value::Float(b)) = (l, r) {
8840            let fast = match op {
8841                ArithOp::Add => Some(Value::Float(a + b)),
8842                ArithOp::Sub => Some(Value::Float(a - b)),
8843                ArithOp::Mul => Some(Value::Float(a * b)),
8844                ArithOp::Div => Some(Value::Float(a / b)),
8845                _ => None,
8846            };
8847            if let Some(v) = fast {
8848                self.set_r(base, inst.a(), v);
8849                return Ok(());
8850            }
8851        }
8852        match self.arith_fast(op, l, r)? {
8853            Some(v) => self.set_r(base, inst.a(), v),
8854            None => {
8855                let mm = self.arith_mm_func(op, l, r)?;
8856                let dst = base + inst.a();
8857                self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8858            }
8859        }
8860        Ok(())
8861    }
8862
8863    /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8864    /// directly, `Ok(None)` when a metamethod is required (the caller decides
8865    /// whether to call it synchronously or yieldably).
8866    fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8867        use ArithOp::*;
8868        match op {
8869            BAnd | BOr | BXor | Shl | Shr => {
8870                // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8871                match (coerce_num(l), coerce_num(r)) {
8872                    (Some(a), Some(b)) => {
8873                        let to_int = |n: Num| match n {
8874                            Num::Int(i) => Some(i),
8875                            Num::Float(f) => crate::runtime::value::f2i_exact(f),
8876                        };
8877                        let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8878                            // PUC luaG_tointerror: name the offending operand
8879                            return Err(self.no_int_rep_err());
8880                        };
8881                        let v = match op {
8882                            BAnd => a & b,
8883                            BOr => a | b,
8884                            BXor => a ^ b,
8885                            Shl => shift_left(a, b),
8886                            Shr => shift_left(a, b.wrapping_neg()),
8887                            _ => unreachable!(),
8888                        };
8889                        return Ok(Some(Value::Int(v)));
8890                    }
8891                    _ => return Ok(None),
8892                }
8893            }
8894            _ => {}
8895        }
8896        let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8897            (Some(a), Some(b)) => (a, b),
8898            _ => return Ok(None),
8899        };
8900        let v = match (op, ln, rn) {
8901            (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8902            (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8903            (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8904            (IDiv, Num::Int(a), Num::Int(b)) => {
8905                if b == 0 {
8906                    return Err(self.rt_err("attempt to divide by zero"));
8907                }
8908                let mut q = a.wrapping_div(b);
8909                if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8910                    q -= 1;
8911                }
8912                Value::Int(q)
8913            }
8914            (Mod, Num::Int(a), Num::Int(b)) => {
8915                if b == 0 {
8916                    return Err(self.rt_err("attempt to perform 'n%0'"));
8917                }
8918                let mut m = a.wrapping_rem(b);
8919                if m != 0 && (m ^ b) < 0 {
8920                    m += b;
8921                }
8922                Value::Int(m)
8923            }
8924            (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8925            (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8926            (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8927            (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8928            (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8929            (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8930            (Mod, a, b) => {
8931                let (x, y) = (a.as_f64(), b.as_f64());
8932                // PUC luai_nummod: correct fmod's sign without the `m*y`
8933                // product, which underflows to 0 for tiny denormals
8934                let mut m = x % y;
8935                if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8936                    m += y;
8937                }
8938                Value::Float(m)
8939            }
8940            _ => unreachable!(),
8941        };
8942        Ok(Some(v))
8943    }
8944
8945    pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8946        match v {
8947            Value::Int(i) => Ok(i),
8948            Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8949                Some(i) => Ok(i),
8950                None => Err(self.rt_err("number has no integer representation")),
8951            },
8952            v => Err(self.type_err(what, v)),
8953        }
8954    }
8955
8956    fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8957        match n {
8958            Num::Int(i) => Ok(i),
8959            Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8960                Some(i) => Ok(i),
8961                None => Err(self.rt_err("number has no integer representation")),
8962            },
8963        }
8964    }
8965
8966    /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8967    /// PUC type error when neither operand provides one.
8968    fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8969        use ArithOp::*;
8970        let event = match op {
8971            Add => Mm::Add,
8972            Sub => Mm::Sub,
8973            Mul => Mm::Mul,
8974            Div => Mm::Div,
8975            Mod => Mm::Mod,
8976            Pow => Mm::Pow,
8977            IDiv => Mm::IDiv,
8978            BAnd => Mm::BAnd,
8979            BOr => Mm::BOr,
8980            BXor => Mm::BXor,
8981            Shl => Mm::Shl,
8982            Shr => Mm::Shr,
8983        };
8984        let mut mm = self.get_mm(l, event);
8985        if mm.is_nil() {
8986            mm = self.get_mm(r, event);
8987        }
8988        if mm.is_nil() {
8989            let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8990                "perform bitwise operation on"
8991            } else {
8992                "perform arithmetic on"
8993            };
8994            let bad = if coerce_num(l).is_none() { l } else { r };
8995            return Err(self.type_err(what, bad));
8996        }
8997        Ok(mm)
8998    }
8999
9000    // ---- comparison ----
9001
9002    pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
9003        match self.less_step(l, r, or_eq)? {
9004            MmOut::Done(v) => Ok(v.truthy()),
9005            MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
9006            MmOut::CompareSynth { func } => {
9007                // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
9008                // by library code (sort comparator etc.) — no yield expected
9009                // here (a yield would have hit `call_noyield`'s C boundary).
9010                Ok(!self.call_mm1(func, &[r, l])?.truthy())
9011            }
9012        }
9013    }
9014
9015    /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
9016    /// carries the boolean result; `Mm` (for non-number/string operands) carries
9017    /// the metamethod — called with `(l, r)`; raises the PUC compare error when
9018    /// neither operand provides one.
9019    fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
9020        let b = match (l, r) {
9021            (Value::Int(a), Value::Int(b)) => {
9022                if or_eq {
9023                    a <= b
9024                } else {
9025                    a < b
9026                }
9027            }
9028            (Value::Float(a), Value::Float(b)) => {
9029                if or_eq {
9030                    a <= b
9031                } else {
9032                    a < b
9033                }
9034            }
9035            (Value::Int(a), Value::Float(b)) => {
9036                if or_eq {
9037                    int_le_float(a, b)
9038                } else {
9039                    int_lt_float(a, b)
9040                }
9041            }
9042            (Value::Float(a), Value::Int(b)) => {
9043                if a.is_nan() {
9044                    false
9045                } else if or_eq {
9046                    !int_lt_float(b, a)
9047                } else {
9048                    !int_le_float(b, a)
9049                }
9050            }
9051            (Value::Str(a), Value::Str(b)) => {
9052                let (a, b) = (a.as_bytes(), b.as_bytes());
9053                if or_eq { a <= b } else { a < b }
9054            }
9055            (l, r) => {
9056                let event = if or_eq { Mm::Le } else { Mm::Lt };
9057                // PUC 5.1's `get_compTM` rule applies to ordered comparisons
9058                // too: both operands' metatables must expose the same
9059                // implementation for `__lt` / `__le` to fire. events.lua 5.1
9060                // :262 expects `c < d` (where `d` has no metatable) to error
9061                // with the default "attempt to compare two table values"
9062                // rather than running c's `__lt` blindly.
9063                let mm = if self.version() <= LuaVersion::Lua51 {
9064                    self.get_comp_mm(l, r, event)
9065                } else {
9066                    let mut m = self.get_mm(l, event);
9067                    if m.is_nil() {
9068                        m = self.get_mm(r, event);
9069                    }
9070                    m
9071                };
9072                // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
9073                // operand carries `__le`. 5.4 dropped the synthesis (now
9074                // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
9075                // on the synthesis — its metatable defines only `__lt`.
9076                // The fallback calls `__lt(r, l)` synchronously (the suite's
9077                // `__lt` doesn't yield) and negates the result; the yieldable
9078                // `__lt` path stays reserved for the explicit `<` operator.
9079                if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
9080                    let lt = Mm::Lt;
9081                    let mut mm_lt = self.get_mm(l, lt);
9082                    if mm_lt.is_nil() {
9083                        mm_lt = self.get_mm(r, lt);
9084                    }
9085                    if !mm_lt.is_nil() {
9086                        return Ok(MmOut::CompareSynth { func: mm_lt });
9087                    }
9088                }
9089                if mm.is_nil() {
9090                    // PUC luaG_ordererror: "two X values" when the operand
9091                    // types match, "X with Y" otherwise (objtypename-aware).
9092                    let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
9093                    return Err(self.rt_err(&if t1 == t2 {
9094                        format!("attempt to compare two {t1} values")
9095                    } else {
9096                        format!("attempt to compare {t1} with {t2}")
9097                    }));
9098                }
9099                return Ok(MmOut::Mm { func: mm, recv: l });
9100            }
9101        };
9102        Ok(MmOut::Done(Value::Bool(b)))
9103    }
9104
9105    // ---- numeric for ----
9106
9107    fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
9108        let a = inst.a();
9109        let init = self.r(base, a);
9110        let limit = self.r(base, a + 1);
9111        let step = self.r(base, a + 2);
9112        let (Some(init_n), Some(limit_n), Some(step_n)) =
9113            (as_num(init), as_num(limit), as_num(step))
9114        else {
9115            // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
9116            // PUC checks limit, then step, then initial value.
9117            let (what, bad) = if as_num(limit).is_none() {
9118                ("limit", limit)
9119            } else if as_num(step).is_none() {
9120                ("step", step)
9121            } else {
9122                ("initial value", init)
9123            };
9124            let tn = self.obj_typename(bad);
9125            return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
9126        };
9127        // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
9128        // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
9129        // first test, so each successful iteration emits a backward `OP_FORLOOP`
9130        // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
9131        // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
9132        // distance in luna's encoding is `loop_pc - prep_pc`; firing
9133        // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
9134        let pre53 = self.version() <= LuaVersion::Lua53;
9135        match (init_n, step_n) {
9136            (Num::Int(i0), Num::Int(st)) => {
9137                if st == 0 {
9138                    return Err(self.rt_err("'for' step is zero"));
9139                }
9140                if pre53 {
9141                    // PUC 5.3 `forlimit`: int limit passes through; float limit
9142                    // gets clamped to MIN/MAX with a `stopnow` flag set only
9143                    // when the clamp is unreachable (positive float with a
9144                    // negative step → limit=MAX, stopnow; negative float with
9145                    // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
9146                    // `init = 0` so OP_FORLOOP's first test against the
9147                    // unreachable clamp fails cleanly. An ordinary in-range
9148                    // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
9149                    // lets OP_FORLOOP's natural test reject the first step.
9150                    let (lim, stopnow) = match limit_n {
9151                        Num::Int(l) => (l, false),
9152                        Num::Float(f) => {
9153                            if f.is_nan() {
9154                                (0, true)
9155                            } else if f >= i64::MAX as f64 + 1.0 {
9156                                // beyond +MAX: unreachable for a decreasing loop
9157                                (i64::MAX, st < 0)
9158                            } else if f <= i64::MIN as f64 {
9159                                // beyond -MIN: unreachable for an increasing loop
9160                                (i64::MIN, st >= 0)
9161                            } else if st > 0 {
9162                                (f.floor() as i64, false)
9163                            } else {
9164                                (f.ceil() as i64, false)
9165                            }
9166                        }
9167                    };
9168                    let initv = if stopnow { 0 } else { i0 };
9169                    let pre = initv.wrapping_sub(st);
9170                    self.set_r(base, a, Value::Int(pre));
9171                    self.set_r(base, a + 1, Value::Int(lim));
9172                    self.set_r(base, a + 2, Value::Int(st));
9173                    self.add_pc(inst.bx() as i32 - 1);
9174                    return Ok(());
9175                }
9176                let (lim, empty) = int_for_limit(limit_n, i0, st);
9177                if empty {
9178                    self.add_pc(inst.bx() as i32);
9179                    return Ok(());
9180                }
9181                let count = if st > 0 {
9182                    (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
9183                } else {
9184                    (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
9185                };
9186                self.set_r(base, a, Value::Int(i0));
9187                self.set_r(base, a + 1, Value::Int(count as i64));
9188                self.set_r(base, a + 2, Value::Int(st));
9189                self.set_r(base, a + 3, Value::Int(i0));
9190            }
9191            _ => {
9192                let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
9193                if st == 0.0 {
9194                    return Err(self.rt_err("'for' step is zero"));
9195                }
9196                if pre53 {
9197                    let pre = x0 - st;
9198                    self.set_r(base, a, Value::Float(pre));
9199                    self.set_r(base, a + 1, Value::Float(lim));
9200                    self.set_r(base, a + 2, Value::Float(st));
9201                    self.add_pc(inst.bx() as i32 - 1);
9202                    return Ok(());
9203                }
9204                let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
9205                if !runs {
9206                    self.add_pc(inst.bx() as i32);
9207                    return Ok(());
9208                }
9209                self.set_r(base, a, Value::Float(x0));
9210                self.set_r(base, a + 1, Value::Float(lim));
9211                self.set_r(base, a + 2, Value::Float(st));
9212                self.set_r(base, a + 3, Value::Float(x0));
9213            }
9214        }
9215        Ok(())
9216    }
9217
9218    #[inline(always)]
9219    fn for_loop(&mut self, inst: Inst, base: u32) {
9220        let a = inst.a();
9221        // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
9222        // directly (R[a+1] holds the limit, *not* a remaining-count) so the
9223        // first iteration's test fires through the same backward-jump path as
9224        // every later iteration. 5.4+ switched to the count-based form luna
9225        // already uses for `Int`; the float branch was already PUC-3.x-style.
9226        let pre53 = self.version() <= LuaVersion::Lua53;
9227        match self.r(base, a) {
9228            Value::Int(cur) if pre53 => {
9229                let Value::Int(lim) = self.r(base, a + 1) else {
9230                    unreachable!()
9231                };
9232                let Value::Int(st) = self.r(base, a + 2) else {
9233                    unreachable!()
9234                };
9235                let next = cur.wrapping_add(st);
9236                let cont = if st > 0 { next <= lim } else { next >= lim };
9237                if cont {
9238                    self.set_r(base, a, Value::Int(next));
9239                    self.set_r(base, a + 3, Value::Int(next));
9240                    self.add_pc(-(inst.bx() as i32));
9241                }
9242            }
9243            Value::Int(cur) => {
9244                let Value::Int(count) = self.r(base, a + 1) else {
9245                    unreachable!()
9246                };
9247                if count > 0 {
9248                    let Value::Int(st) = self.r(base, a + 2) else {
9249                        unreachable!()
9250                    };
9251                    let next = cur.wrapping_add(st);
9252                    self.set_r(base, a, Value::Int(next));
9253                    self.set_r(base, a + 1, Value::Int(count - 1));
9254                    self.set_r(base, a + 3, Value::Int(next));
9255                    self.add_pc(-(inst.bx() as i32));
9256                }
9257            }
9258            Value::Float(cur) => {
9259                let Value::Float(lim) = self.r(base, a + 1) else {
9260                    unreachable!()
9261                };
9262                let Value::Float(st) = self.r(base, a + 2) else {
9263                    unreachable!()
9264                };
9265                let next = cur + st;
9266                let cont = if st > 0.0 { next <= lim } else { next >= lim };
9267                if cont {
9268                    self.set_r(base, a, Value::Float(next));
9269                    self.set_r(base, a + 3, Value::Float(next));
9270                    self.add_pc(-(inst.bx() as i32));
9271                }
9272            }
9273            _ => unreachable!("corrupt for-loop state"),
9274        }
9275    }
9276
9277    // ---- native helpers (used by builtins) ----
9278
9279    /// A native function's own captured upvalue (self lives at func_slot).
9280    ///
9281    /// Public so `native_typed` trampolines and embedders authoring
9282    /// stateful natives via `native_with(...)` can read their upvals.
9283    pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9284        let Value::Native(nc) = self.stack[func_slot as usize] else {
9285            unreachable!("native frame without native closure");
9286        };
9287        nc.upvals[i]
9288    }
9289
9290    /// Number of upvalues captured by the native at `func_slot` (variadic
9291    /// captures such as the `io.lines` format list).
9292    pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9293        let Value::Native(nc) = self.stack[func_slot as usize] else {
9294            unreachable!("native frame without native closure");
9295        };
9296        nc.upvals.len()
9297    }
9298
9299    /// Write a native function's own upvalue (stateful iterators).
9300    pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9301        let Value::Native(nc) = self.stack[func_slot as usize] else {
9302            unreachable!("native frame without native closure");
9303        };
9304        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9305        unsafe { nc.as_mut() }.upvals[i] = v;
9306        // NativeClosure.upvals is traced as part of its Trace; a long-lived
9307        // stateful iterator closure (e.g. string.gmatch) sees many writes —
9308        // barrier_back once-and-done is cheaper than per-child forward.
9309        self.heap
9310            .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9311    }
9312
9313    /// Read the i-th positional argument inside a `NativeFn` body
9314    /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9315    /// matching PUC's "missing arg is nil" contract. Public so embedders
9316    /// can author their own natives.
9317    pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9318        if i < nargs {
9319            self.stack[(func_slot + 1 + i) as usize]
9320        } else {
9321            Value::Nil
9322        }
9323    }
9324
9325    /// Push the return values of a `NativeFn` and return their count
9326    /// (analogous to pushing N values then `return N` from a C function).
9327    /// Public so embedders can author their own natives.
9328    pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9329        let need = func_slot as usize + vals.len();
9330        if self.stack.len() < need {
9331            self.stack.resize(need, Value::Nil);
9332        }
9333        for (i, &v) in vals.iter().enumerate() {
9334            self.stack[func_slot as usize + i] = v;
9335        }
9336        vals.len() as u32
9337    }
9338
9339    /// Fast string concatenation of an adjacent pair, or `None` when a
9340    /// `__concat` metamethod is required.
9341    fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9342        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9343        // Length-check fast paths for both string operands BEFORE the
9344        // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9345        // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9346        // overflow on the first pair that would exceed `INT_MAX` instead
9347        // of allocating multi-GB intermediates first.
9348        let max_str = i32::MAX as usize;
9349        if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9350            let a_len = ls.as_bytes().len();
9351            let b_len = rs.as_bytes().len();
9352            let new_len = a_len.checked_add(b_len);
9353            if new_len.is_none() || new_len.unwrap() > max_str {
9354                return Err(self.rt_err("string length overflow"));
9355            }
9356        }
9357        match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9358            (Some(a), Some(b)) => {
9359                // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9360                // concat past it raises "string length overflow"
9361                // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9362                // exactly this wall).
9363                let new_len = a.len().checked_add(b.len());
9364                if new_len.is_none() || new_len.unwrap() > max_str {
9365                    return Err(self.rt_err("string length overflow"));
9366                }
9367                let mut combined = a;
9368                combined.extend_from_slice(&b);
9369                Ok(Some(Value::Str(self.heap.intern(&combined))))
9370            }
9371            _ => Ok(None),
9372        }
9373    }
9374
9375    /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9376    /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9377    /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9378    /// call — its `Meta` continuation re-enters here on the metamethod's return.
9379    fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9380        // Sum the lengths of all all-Str operands BEFORE starting the
9381        // right-associative fold so a 129-operand `a..a..…` chain
9382        // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9383        // not after dozens of multi-GB intermediate intern+hash rounds.
9384        // A non-Str operand falls through to the per-pair check.
9385        let max_str = i32::MAX as usize;
9386        let mut total: usize = 0;
9387        let mut all_str = true;
9388        for slot in base_a..self.top {
9389            match self.stack[slot as usize] {
9390                Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9391                    Some(t) if t <= max_str => total = t,
9392                    _ => return Err(self.rt_err("string length overflow")),
9393                },
9394                _ => {
9395                    all_str = false;
9396                    break;
9397                }
9398            }
9399        }
9400        let _ = all_str; // discrimination already captured by early returns above
9401        while self.top.saturating_sub(base_a) >= 2 {
9402            let i = self.top - 1; // rightmost operand
9403            let x = self.stack[(i - 1) as usize];
9404            let y = self.stack[i as usize];
9405            match self.concat_pair(x, y)? {
9406                Some(s) => {
9407                    self.stack[(i - 1) as usize] = s;
9408                    self.top = i; // consumed y
9409                }
9410                None => {
9411                    let mut mm = self.get_mm(x, Mm::Concat);
9412                    if mm.is_nil() {
9413                        mm = self.get_mm(y, Mm::Concat);
9414                    }
9415                    if mm.is_nil() {
9416                        let legacy = self.version <= crate::version::LuaVersion::Lua52;
9417                        let bad = if concat_piece(x, legacy).is_none() {
9418                            x
9419                        } else {
9420                            y
9421                        };
9422                        return Err(self.type_err("concatenate", bad));
9423                    }
9424                    // result lands at i-1, dropping y (top→i); resume continues.
9425                    let dst = i - 1;
9426                    self.begin_meta_call(
9427                        mm,
9428                        &[x, y],
9429                        MetaAction::Concat { dst, base_a },
9430                        "concat",
9431                    )?;
9432                    return Ok(());
9433                }
9434            }
9435        }
9436        self.maybe_collect_garbage(base_a + 1);
9437        Ok(())
9438    }
9439
9440    /// tostring with __tostring / __name support.
9441    pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9442        let mm = self.get_mm(v, Mm::ToString);
9443        if !mm.is_nil() {
9444            return match self.call_mm1(mm, &[v])? {
9445                Value::Str(s) => Ok(s.as_bytes().to_vec()),
9446                _ => Err(self.rt_err("'__tostring' must return a string")),
9447            };
9448        }
9449        if let Value::Table(t) = v
9450            && let Value::Str(name) = self.get_mm(v, Mm::Name)
9451        {
9452            let mut out = name.as_bytes().to_vec();
9453            out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9454            return Ok(out);
9455        }
9456        Ok(self.tostring_basic(v))
9457    }
9458
9459    /// Basic tostring (no metamethods).
9460    pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9461        match v {
9462            Value::Nil => b"nil".to_vec(),
9463            Value::Bool(true) => b"true".to_vec(),
9464            Value::Bool(false) => b"false".to_vec(),
9465            Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9466            // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9467            // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9468            // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9469            // concatenating these renderings.
9470            Value::Float(f) => {
9471                let legacy = self.version <= crate::version::LuaVersion::Lua52;
9472                numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9473            }
9474            Value::Str(s) => s.as_bytes().to_vec(),
9475            Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9476            Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9477            Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9478            Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9479            // PUC names file handles `file (0x…)`; a bare userdata is
9480            // `userdata: 0x…`. The io library overrides this via __tostring.
9481            Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9482            // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9483            // (the "light" qualifier only appears in `luaL_typeerror`).
9484            Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9485        }
9486    }
9487}
9488
9489#[derive(Clone, Copy, PartialEq, Eq)]
9490enum ArithOp {
9491    Add,
9492    Sub,
9493    Mul,
9494    Mod,
9495    Pow,
9496    Div,
9497    IDiv,
9498    BAnd,
9499    BOr,
9500    BXor,
9501    Shl,
9502    Shr,
9503}
9504
9505impl ArithOp {
9506    /// PUC metamethod event name (`__add` → "add" etc.) used by
9507    /// `debug.getinfo(level, "n")` inside a metamethod handler.
9508    fn mm_name(self) -> &'static str {
9509        match self {
9510            ArithOp::Add => "add",
9511            ArithOp::Sub => "sub",
9512            ArithOp::Mul => "mul",
9513            ArithOp::Mod => "mod",
9514            ArithOp::Pow => "pow",
9515            ArithOp::Div => "div",
9516            ArithOp::IDiv => "idiv",
9517            ArithOp::BAnd => "band",
9518            ArithOp::BOr => "bor",
9519            ArithOp::BXor => "bxor",
9520            ArithOp::Shl => "shl",
9521            ArithOp::Shr => "shr",
9522        }
9523    }
9524}
9525
9526fn as_num(v: Value) -> Option<Num> {
9527    match v {
9528        Value::Int(i) => Some(Num::Int(i)),
9529        Value::Float(f) => Some(Num::Float(f)),
9530        // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9531        Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9532        _ => None,
9533    }
9534}
9535
9536/// A concatenable operand's byte form (string, or a number coerced to its
9537/// string), or `None` when only a `__concat` metamethod can handle it.
9538/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9539/// suffix on integer-valued floats) — see `num_to_string_for`.
9540fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9541    match v {
9542        Value::Str(s) => Some(s.as_bytes().to_vec()),
9543        Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9544        Value::Float(x) => {
9545            Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9546        }
9547        _ => None,
9548    }
9549}
9550
9551/// Index into the per-basic-type metatable table for a non-table value
9552/// (None for tables, which carry their own metatable).
9553fn type_mt_slot(v: Value) -> Option<usize> {
9554    match v {
9555        Value::Nil => Some(0),
9556        Value::Bool(_) => Some(1),
9557        Value::Int(_) | Value::Float(_) => Some(2),
9558        Value::Str(_) => Some(3),
9559        Value::Closure(_) | Value::Native(_) => Some(4),
9560        // tables and full userdata carry their own metatable; threads and
9561        // light userdata have none (PUC keeps a shared per-type mt slot for
9562        // light, but luna doesn't expose it — no test gates on it yet).
9563        Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9564    }
9565}
9566
9567/// Number, or string coerced to number (5.5 default string-arith coercion).
9568fn coerce_num(v: Value) -> Option<Num> {
9569    match v {
9570        Value::Int(i) => Some(Num::Int(i)),
9571        Value::Float(f) => Some(Num::Float(f)),
9572        Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9573        _ => None,
9574    }
9575}
9576
9577/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9578/// reverse direction.
9579fn shift_left(a: i64, b: i64) -> i64 {
9580    if b < 0 {
9581        if b <= -64 {
9582            0
9583        } else {
9584            ((a as u64) >> (-b as u32)) as i64
9585        }
9586    } else if b >= 64 {
9587        0
9588    } else {
9589        ((a as u64) << (b as u32)) as i64
9590    }
9591}
9592
9593/// i < f, exactly (PUC LTintfloat shape).
9594fn int_lt_float(i: i64, f: f64) -> bool {
9595    if f.is_nan() {
9596        return false;
9597    }
9598    if f >= 9_223_372_036_854_775_808.0 {
9599        return true;
9600    }
9601    if f < -9_223_372_036_854_775_808.0 {
9602        return false;
9603    }
9604    let ff = f.floor();
9605    let fi = ff as i64;
9606    if f == ff { i < fi } else { i <= fi }
9607}
9608
9609/// i <= f, exactly.
9610fn int_le_float(i: i64, f: f64) -> bool {
9611    if f.is_nan() {
9612        return false;
9613    }
9614    if f >= 9_223_372_036_854_775_808.0 {
9615        return true;
9616    }
9617    if f < -9_223_372_036_854_775_808.0 {
9618        return false;
9619    }
9620    i <= f.floor() as i64
9621}
9622
9623/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9624/// (clipped limit, loop-is-empty).
9625fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9626    match limit {
9627        Num::Int(l) => {
9628            let empty = if step > 0 { init > l } else { init < l };
9629            (l, empty)
9630        }
9631        Num::Float(f) => {
9632            if f.is_nan() {
9633                return (0, true);
9634            }
9635            if step > 0 {
9636                if f >= 9_223_372_036_854_775_808.0 {
9637                    (i64::MAX, false)
9638                } else {
9639                    let l = f.floor();
9640                    if l < -9_223_372_036_854_775_808.0 {
9641                        (i64::MIN, true)
9642                    } else {
9643                        let li = l as i64;
9644                        (li, init > li)
9645                    }
9646                }
9647            } else if f <= -9_223_372_036_854_775_808.0 {
9648                (i64::MIN, false)
9649            } else {
9650                let l = f.ceil();
9651                if l >= 9_223_372_036_854_775_808.0 {
9652                    // PUC forlimit: a positive limit beyond the integer range
9653                    // is unreachable for a decreasing loop — empty.
9654                    (i64::MAX, true)
9655                } else {
9656                    let li = l as i64;
9657                    (li, init < li)
9658                }
9659            }
9660        }
9661    }
9662}
9663
9664/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9665/// `@file` / `=name` markers in `source`).
9666fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9667    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9668    let b = unsafe { crate::runtime::string::bytes_of(p) };
9669    match b.first() {
9670        Some(b'@') | Some(b'=') => &b[1..],
9671        _ => b,
9672    }
9673}
9674
9675impl Vm {
9676    /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9677    /// that called the current native. Returns (closure, current line,
9678    /// extra vararg count).
9679    /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9680    /// function running at `level`, recovered from the caller's call
9681    /// instruction (PUC funcnamefromcode). None for the main chunk or a
9682    /// tail/anonymous call with no recoverable name.
9683    /// A debug-level position: either a real Lua frame (by index) or a synthetic
9684    /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9685    /// coroutine body), which `debug.getinfo` and traceback report as "C".
9686    /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9687    /// frame at `level`'s current pc, as (name, value). Locals are visited in
9688    /// registration order (start pc, then register) to match luaF_getlocalname.
9689    pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9690        if n == 0 {
9691            return None;
9692        }
9693        let fi = match self.dbg_frame(level)? {
9694            DbgKind::Lua(fi) => fi,
9695            // Tail-call placeholder has no real frame backing it — no locals
9696            // exist to read or write here. PUC `findlocal` returns NULL on
9697            // a CIST_TAIL activation.
9698            DbgKind::Tail(_) => return None,
9699            // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9700            // for slot `n` inside the argument window (db.lua :408-:413, and
9701            // the call/return hook reads of math.sin / select args via
9702            // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9703            // meaningful for a C frame here.
9704            DbgKind::C(fi) => {
9705                if n < 1 {
9706                    return None;
9707                }
9708                let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9709                if (n as u32) > nargs {
9710                    return None;
9711                }
9712                let slot = (func_slot + n as u32) as usize;
9713                let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9714                return Some((self.temporary_locvar_name().to_string(), val));
9715            }
9716        };
9717        let f = self.frames[fi].lua()?;
9718        // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9719        // is the first extra arg passed to the function (`...[1]`), `-2` the
9720        // second, etc. The 5.5 stack layout parks varargs in
9721        // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9722        if n < 0 {
9723            let i = (-n) as u32;
9724            if i == 0 || i > f.n_varargs {
9725                return None;
9726            }
9727            let val = self
9728                .stack
9729                .get((f.func_slot + i) as usize)
9730                .copied()
9731                .unwrap_or(Value::Nil);
9732            return Some((self.vararg_locvar_name().to_string(), val));
9733        }
9734        let proto = f.closure.proto;
9735        // PUC's parser injects a hidden `(vararg table)` locvar for an
9736        // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9737        // right after the fixed parameters (`numparams + 1`). Main chunks
9738        // and `(...t)` named-vararg funcs do NOT get one — gate on the
9739        // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9740        // their declared registers (no shadow slot allocated), so we expose
9741        // that hidden index purely in this debug view.
9742        let num_params = proto.num_params as i64;
9743        let vararg_slot = if proto.has_vararg_table_pseudo {
9744            Some(num_params + 1)
9745        } else {
9746            None
9747        };
9748        if vararg_slot == Some(n) {
9749            return Some(("(vararg table)".to_string(), Value::Nil));
9750        }
9751        let pc = (f.pc as usize).saturating_sub(1);
9752        let mut active: Vec<&crate::runtime::LocVar> = proto
9753            .locvars
9754            .iter()
9755            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9756            .collect();
9757        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9758        let mut idx: i64 = n - 1;
9759        if let Some(vs) = vararg_slot
9760            && n > vs
9761        {
9762            idx -= 1;
9763        }
9764        let idx = idx as usize;
9765        if let Some(lv) = active.get(idx) {
9766            let val = self
9767                .stack
9768                .get((f.base + lv.reg) as usize)
9769                .copied()
9770                .unwrap_or(Value::Nil);
9771            return Some((lv.name.to_string(), val));
9772        }
9773        // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9774        // still inside the frame's live register window — report a
9775        // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9776        // the next frame's func slot (`ci->next->func.p`) so the
9777        // temporary window stops where the callee's frame begins
9778        // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9779        // an out-of-range slot).
9780        let limit = self
9781            .frames
9782            .get(fi + 1)
9783            .and_then(|cf| cf.lua())
9784            .map(|nf| nf.func_slot)
9785            .unwrap_or_else(|| self.top.max(f.base));
9786        let temp_reg = idx as u32;
9787        if f.base + temp_reg < limit {
9788            let val = self
9789                .stack
9790                .get((f.base + temp_reg) as usize)
9791                .copied()
9792                .unwrap_or(Value::Nil);
9793            return Some((self.lua_temporary_locvar_name().to_string(), val));
9794        }
9795        None
9796    }
9797
9798    /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9799    /// the local / vararg name on success, `None` when the slot does not
9800    /// resolve. Mirrors `local_at`'s indexing exactly.
9801    pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9802        if n == 0 {
9803            return None;
9804        }
9805        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9806            return None;
9807        };
9808        let f = self.frames[fi].lua()?;
9809        if n < 0 {
9810            let i = (-n) as u32;
9811            if i == 0 || i > f.n_varargs {
9812                return None;
9813            }
9814            let slot = (f.func_slot + i) as usize;
9815            if let Some(s) = self.stack.get_mut(slot) {
9816                *s = v;
9817            }
9818            return Some(self.vararg_locvar_name().to_string());
9819        }
9820        let proto = f.closure.proto;
9821        let num_params = proto.num_params as i64;
9822        let vararg_slot = if proto.has_vararg_table_pseudo {
9823            Some(num_params + 1)
9824        } else {
9825            None
9826        };
9827        if vararg_slot == Some(n) {
9828            // hidden (vararg table) slot has no real storage — accept the
9829            // write as a no-op for PUC parity (db.lua doesn't write to it).
9830            return Some("(vararg table)".to_string());
9831        }
9832        let pc = (f.pc as usize).saturating_sub(1);
9833        let mut active: Vec<&crate::runtime::LocVar> = proto
9834            .locvars
9835            .iter()
9836            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9837            .collect();
9838        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9839        let mut idx: i64 = n - 1;
9840        if let Some(vs) = vararg_slot
9841            && n > vs
9842        {
9843            idx -= 1;
9844        }
9845        let idx = idx as usize;
9846        let (name, reg) = if let Some(lv) = active.get(idx) {
9847            (lv.name.to_string(), lv.reg)
9848        } else {
9849            // PUC `luaG_findlocal` fallback into the temporary window —
9850            // bounded by the next frame's func slot (see local_at).
9851            let limit = self
9852                .frames
9853                .get(fi + 1)
9854                .and_then(|cf| cf.lua())
9855                .map(|nf| nf.func_slot)
9856                .unwrap_or_else(|| self.top.max(f.base));
9857            let temp_reg = idx as u32;
9858            if f.base + temp_reg >= limit {
9859                return None;
9860            }
9861            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9862        };
9863        let slot = (f.base + reg) as usize;
9864        if let Some(s) = self.stack.get_mut(slot) {
9865            *s = v;
9866        }
9867        Some(name)
9868    }
9869
9870    /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9871    /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9872    /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9873    /// vararg indexing past `n_varargs`, or for a register past the live
9874    /// window. Naming follows the same priority as `local_at`: named locals,
9875    /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9876    /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9877    /// window.
9878    pub(crate) fn local_at_coro(
9879        &self,
9880        co: Gc<crate::runtime::Coro>,
9881        level: i64,
9882        n: i64,
9883    ) -> Option<(String, Value)> {
9884        if level < 1 || n == 0 {
9885            return None;
9886        }
9887        let frames = &co.frames;
9888        // Logical level: iterate Lua frames from the top.
9889        let lua_indices: Vec<usize> = (0..frames.len())
9890            .rev()
9891            .filter(|&i| frames[i].lua().is_some())
9892            .collect();
9893        let fi = *lua_indices.get((level - 1) as usize)?;
9894        let f = frames[fi].lua()?;
9895        if n < 0 {
9896            let i = (-n) as u32;
9897            if i == 0 || i > f.n_varargs {
9898                return None;
9899            }
9900            let val = co
9901                .stack
9902                .get((f.func_slot + i) as usize)
9903                .copied()
9904                .unwrap_or(Value::Nil);
9905            return Some((self.vararg_locvar_name().to_string(), val));
9906        }
9907        let proto = f.closure.proto;
9908        let num_params = proto.num_params as i64;
9909        let vararg_slot = if proto.has_vararg_table_pseudo {
9910            Some(num_params + 1)
9911        } else {
9912            None
9913        };
9914        if vararg_slot == Some(n) {
9915            return Some(("(vararg table)".to_string(), Value::Nil));
9916        }
9917        let pc = (f.pc as usize).saturating_sub(1);
9918        let mut active: Vec<&crate::runtime::LocVar> = proto
9919            .locvars
9920            .iter()
9921            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9922            .collect();
9923        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9924        let mut idx: i64 = n - 1;
9925        if let Some(vs) = vararg_slot
9926            && n > vs
9927        {
9928            idx -= 1;
9929        }
9930        let idx = idx as usize;
9931        if let Some(lv) = active.get(idx) {
9932            let val = co
9933                .stack
9934                .get((f.base + lv.reg) as usize)
9935                .copied()
9936                .unwrap_or(Value::Nil);
9937            return Some((lv.name.to_string(), val));
9938        }
9939        let limit = frames
9940            .get(fi + 1)
9941            .and_then(|cf| cf.lua())
9942            .map(|nf| nf.func_slot)
9943            .unwrap_or(co.top.max(f.base));
9944        let temp_reg = idx as u32;
9945        if f.base + temp_reg < limit {
9946            let val = co
9947                .stack
9948                .get((f.base + temp_reg) as usize)
9949                .copied()
9950                .unwrap_or(Value::Nil);
9951            return Some((self.lua_temporary_locvar_name().to_string(), val));
9952        }
9953        None
9954    }
9955
9956    /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9957    /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9958    pub(crate) fn local_set_coro(
9959        &mut self,
9960        co: Gc<crate::runtime::Coro>,
9961        level: i64,
9962        n: i64,
9963        v: Value,
9964    ) -> Option<String> {
9965        if level < 1 || n == 0 {
9966            return None;
9967        }
9968        let lua_indices: Vec<usize> = (0..co.frames.len())
9969            .rev()
9970            .filter(|&i| co.frames[i].lua().is_some())
9971            .collect();
9972        let fi = *lua_indices.get((level - 1) as usize)?;
9973        let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9974            let f = co.frames[fi].lua()?;
9975            (
9976                f.func_slot,
9977                f.n_varargs,
9978                f.base,
9979                f.closure.proto,
9980                co.top.max(f.base),
9981                co.frames
9982                    .get(fi + 1)
9983                    .and_then(|cf| cf.lua())
9984                    .map(|nf| nf.func_slot),
9985            )
9986        };
9987        if n < 0 {
9988            let i = (-n) as u32;
9989            if i == 0 || i > n_varargs {
9990                return None;
9991            }
9992            let slot = (func_slot + i) as usize;
9993            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9994            let stack = unsafe { &mut co.as_mut().stack };
9995            if let Some(s) = stack.get_mut(slot) {
9996                *s = v;
9997            }
9998            // co.stack values are traced — once-per-call barrier so propagate
9999            // sees the new value if co was already BLACK this cycle.
10000            self.heap
10001                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
10002            return Some(self.vararg_locvar_name().to_string());
10003        }
10004        let num_params = proto.num_params as i64;
10005        let vararg_slot = if proto.has_vararg_table_pseudo {
10006            Some(num_params + 1)
10007        } else {
10008            None
10009        };
10010        if vararg_slot == Some(n) {
10011            return Some("(vararg table)".to_string());
10012        }
10013        let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
10014        let mut active: Vec<&crate::runtime::LocVar> = proto
10015            .locvars
10016            .iter()
10017            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
10018            .collect();
10019        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
10020        let mut idx: i64 = n - 1;
10021        if let Some(vs) = vararg_slot
10022            && n > vs
10023        {
10024            idx -= 1;
10025        }
10026        let idx = idx as usize;
10027        let (name, reg) = if let Some(lv) = active.get(idx) {
10028            (lv.name.to_string(), lv.reg)
10029        } else {
10030            let limit = next_func_slot.unwrap_or(top_for_temp);
10031            let temp_reg = idx as u32;
10032            if base + temp_reg >= limit {
10033                return None;
10034            }
10035            (self.lua_temporary_locvar_name().to_string(), temp_reg)
10036        };
10037        let slot = (base + reg) as usize;
10038        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10039        let stack = unsafe { &mut co.as_mut().stack };
10040        if let Some(s) = stack.get_mut(slot) {
10041            *s = v;
10042        }
10043        // co.stack values are traced — once-per-call barrier so propagate
10044        // sees the new value if co was already BLACK this cycle.
10045        self.heap
10046            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
10047        Some(name)
10048    }
10049
10050    /// Frame info for a level on a suspended coroutine (PUC
10051    /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
10052    /// Returns the closure + currentline + extraargs + istailcall for the
10053    /// level-th Lua activation in `co.frames`. None if level overshoots.
10054    pub(crate) fn coro_frame_info(
10055        &self,
10056        co: Gc<crate::runtime::Coro>,
10057        level: i64,
10058    ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
10059        if level < 1 {
10060            return None;
10061        }
10062        let lua_indices: Vec<usize> = (0..co.frames.len())
10063            .rev()
10064            .filter(|&i| co.frames[i].lua().is_some())
10065            .collect();
10066        let fi = *lua_indices.get((level - 1) as usize)?;
10067        let f = co.frames[fi].lua()?;
10068        let proto = f.closure.proto;
10069        let pc = (f.pc as usize)
10070            .saturating_sub(1)
10071            .min(proto.lines.len().saturating_sub(1));
10072        let line = proto.lines.get(pc).copied().unwrap_or(0);
10073        Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
10074    }
10075
10076    /// Whether `level` resolves to any live activation (PUC lua_getstack).
10077    pub(crate) fn level_in_range(&self, level: i64) -> bool {
10078        self.dbg_frame(level).is_some()
10079    }
10080
10081    /// PUC's debug-API placeholder for an unnamed vararg slot returned by
10082    /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
10083    /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
10084    /// 5.3 :195 / 5.4 :286 baseline on their respective form.
10085    pub(crate) fn vararg_locvar_name(&self) -> &'static str {
10086        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
10087            "(*vararg)"
10088        } else {
10089            "(vararg)"
10090        }
10091    }
10092
10093    /// PUC's debug-API placeholder for an unnamed temporary on a C
10094    /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
10095    /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
10096    /// their spelling.
10097    pub(crate) fn temporary_locvar_name(&self) -> &'static str {
10098        if matches!(
10099            self.version,
10100            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
10101        ) {
10102            // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
10103            // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
10104            // to `(C temporary)`.
10105            "(*temporary)"
10106        } else {
10107            "(C temporary)"
10108        }
10109    }
10110
10111    /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
10112    /// (an arithmetic intermediate sitting past the last named local on a
10113    /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
10114    /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
10115    /// spelling.
10116    pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
10117        if matches!(
10118            self.version,
10119            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
10120        ) {
10121            "(*temporary)"
10122        } else {
10123            "(temporary)"
10124        }
10125    }
10126
10127    /// The Lua closure running at `level` on the current thread, or `None`
10128    /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
10129    /// need this to reach the function whose env they read or rewrite.
10130    pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
10131        // `DbgKind::Tail` also falls into the else branch — a tail-call
10132        // placeholder has no closure of its own, so PUC's `lua_getstack` +
10133        // `getfunc` for that level returns no function, and `getfenv(level)`
10134        // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
10135        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
10136            return None;
10137        };
10138        Some(self.frames[fi].lua()?.closure)
10139    }
10140
10141    pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
10142        if level < 1 {
10143            return false;
10144        }
10145        let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
10146        (level as usize) <= count
10147    }
10148
10149    pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
10150        if level < 1 {
10151            return None;
10152        }
10153        // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
10154        // activation counts as a level, and each Lua activation's
10155        // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
10156        // dropped the synthetic shape: `istailcall` becomes a flag on the
10157        // real frame and Cont activations no longer count separately.
10158        // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
10159        // `getinfo(2).func == g1` pins the 5.2+ shape.
10160        let v51 = self.version <= LuaVersion::Lua51;
10161        let mut lvl = level;
10162        for fi in (0..self.frames.len()).rev() {
10163            match &self.frames[fi] {
10164                CallFrame::Lua(f) => {
10165                    lvl -= 1;
10166                    if lvl == 0 {
10167                        return Some(DbgKind::Lua(fi));
10168                    }
10169                    if v51 {
10170                        // 5.1 reports one synthetic CIST_TAIL level per
10171                        // collapsed tail call (PUC `lua_getstack` subtracts
10172                        // `ci->u.l.tailcalls` from the remaining level).
10173                        for _ in 0..f.tailcalls {
10174                            lvl -= 1;
10175                            if lvl == 0 {
10176                                return Some(DbgKind::Tail(fi));
10177                            }
10178                        }
10179                    }
10180                    if f.from_c {
10181                        lvl -= 1;
10182                        if lvl == 0 {
10183                            return Some(DbgKind::C(fi));
10184                        }
10185                    }
10186                }
10187                CallFrame::Cont(_) => {
10188                    if !v51 {
10189                        continue;
10190                    }
10191                    lvl -= 1;
10192                    if lvl == 0 {
10193                        let parent = (0..fi)
10194                            .rev()
10195                            .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
10196                        return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
10197                    }
10198                }
10199            }
10200        }
10201        None
10202    }
10203
10204    pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10205        let f = self.frames[fi].lua()?;
10206        // metamethod handler frames carry the event tag (e.g. "close" for
10207        // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
10208        if f.is_hook {
10209            return Some(("hook", "?".to_string()));
10210        }
10211        if let Some(tm) = f.tm {
10212            return Some(("metamethod", tm_debug_name(self.version, tm)));
10213        }
10214        // a frame entered across a C boundary has no naming call instruction
10215        if fi == 0 || f.from_c {
10216            return None;
10217        }
10218        // the caller's call instruction names this frame; a continuation frame
10219        // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
10220        // already short-circuits those.
10221        let caller = self.frames[fi - 1].lua()?;
10222        let caller_proto = caller.closure.proto;
10223        let p: &crate::runtime::Proto = &caller_proto;
10224        let call_pc = (caller.pc as usize).checked_sub(1)?;
10225        let instr = *p.code.get(call_pc)?;
10226        match instr.op() {
10227            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10228            Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10229            _ => None,
10230        }
10231    }
10232
10233    /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
10234    /// (PUC names a C function from the call instruction that invoked it). The
10235    /// native was called by the nearest Lua frame below `fi` (skipping pcall/
10236    /// xpcall continuations); that frame's call instruction names it.
10237    pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10238        // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
10239        // the synthetic C edge between the __gc finalizer (top Lua frame, has
10240        // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
10241        // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
10242        // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
10243        // sets the tag on the handler frame only, so level 2 there still
10244        // names the calling Lua frame's call instruction (5.5 locals.lua
10245        // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
10246        if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
10247            && fr.tm == Some("gc")
10248        {
10249            let name = tm_debug_name(self.version, "gc");
10250            return Some(("metamethod", name));
10251        }
10252        let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
10253        let caller = self.frames[caller_fi].lua()?;
10254        let p = &caller.closure.proto;
10255        let call_pc = (caller.pc as usize).checked_sub(1)?;
10256        let instr = *p.code.get(call_pc)?;
10257        match instr.op() {
10258            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10259            _ => None,
10260        }
10261    }
10262
10263    /// Native value currently sitting on the synthetic C edge identified by
10264    /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
10265    /// above `fi` (each one corresponds to one native pushing the hook) and
10266    /// indexes into `running_natives` from the top, also skipping the caller
10267    /// of `getinfo` itself (the native that is currently asking).
10268    /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10269    /// expects the just-entered C function.
10270    pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10271        let idx = self.c_frame_native_idx(fi)?;
10272        Some(Value::Native(self.running_natives[idx]))
10273    }
10274
10275    /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10276    /// so `local_at` can index the native's argument window like PUC's
10277    /// `(C temporary)` path. Returns `None` when no matching native exists
10278    /// (e.g. the C edge corresponds to a non-native boundary).
10279    pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10280        let idx = self.c_frame_native_idx(fi)?;
10281        self.running_native_slots.get(idx).copied()
10282    }
10283
10284    fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10285        let n_above = self.frames[fi..]
10286            .iter()
10287            .filter_map(CallFrame::lua)
10288            .filter(|f| f.from_c)
10289            .count();
10290        if n_above == 0 {
10291            return None;
10292        }
10293        // running_natives.last() is the native currently executing (the one
10294        // that called getinfo). Pop it conceptually, then take the n_above-th
10295        // entry from the top of what remains.
10296        let nr = self.running_natives.len().checked_sub(1)?;
10297        nr.checked_sub(n_above)
10298    }
10299
10300    /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10301    /// native whose function pointer matches `target`, and return its qualified
10302    /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10303    /// `None` if no match is found. Used by `arg_error` when the running native
10304    /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10305    pub(crate) fn pushglobalfuncname(
10306        &mut self,
10307        target: crate::runtime::value::NativeFn,
10308    ) -> Option<String> {
10309        let pkg_k = Value::Str(self.heap.intern(b"package"));
10310        let pkg = match self.globals().get(pkg_k) {
10311            Value::Table(t) => t,
10312            _ => return None,
10313        };
10314        let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10315        let loaded = match pkg.get(loaded_k) {
10316            Value::Table(t) => t,
10317            _ => return None,
10318        };
10319        let matches = |v: Value| -> bool {
10320            matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10321        };
10322        let mut k = Value::Nil;
10323        while let Ok(Some((nk, nv))) = loaded.next(k) {
10324            k = nk;
10325            let Value::Str(outer) = nk else { continue };
10326            let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10327            if matches(nv) {
10328                return Some(if outer == "_G" { String::new() } else { outer });
10329            }
10330            if let Value::Table(inner_t) = nv {
10331                let mut k2 = Value::Nil;
10332                while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10333                    k2 = nk2;
10334                    if matches(nv2)
10335                        && let Value::Str(inner) = nk2
10336                    {
10337                        let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10338                        return Some(if outer == "_G" {
10339                            inner
10340                        } else {
10341                            format!("{outer}.{inner}")
10342                        });
10343                    }
10344                }
10345            }
10346        }
10347        None
10348    }
10349
10350    /// Name and namewhat of the native currently running on behalf of the top
10351    /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10352    /// `luaL_argerror` rewrite a method call's self-argument error.
10353    pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10354        let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10355        let p = &caller.closure.proto;
10356        let call_pc = (caller.pc as usize).checked_sub(1)?;
10357        let instr = *p.code.get(call_pc)?;
10358        match instr.op() {
10359            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10360            _ => None,
10361        }
10362    }
10363
10364    pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10365        let f = self.frames[fi].lua().expect("Lua frame");
10366        let proto = f.closure.proto;
10367        let pc = (f.pc as usize)
10368            .saturating_sub(1)
10369            .min(proto.lines.len().saturating_sub(1));
10370        let line = proto.lines.get(pc).copied().unwrap_or(0);
10371        // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10372        // (independent of any later write to a materialized vararg table's `n`).
10373        // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10374        // any nonzero `tailcalls` count flips it true.
10375        (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10376    }
10377
10378    /// Read an upvalue cell of a closure (debug.getupvalue).
10379    pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10380        match cl.upvals()[idx].state() {
10381            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10382            UpvalState::Closed(v) => v,
10383        }
10384    }
10385
10386    /// Write an upvalue cell of a closure (debug.setupvalue).
10387    pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10388        let uv = cl.upvals()[idx];
10389        match uv.state() {
10390            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10391            UpvalState::Closed(_) => {
10392                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10393                unsafe { uv.as_mut() }.set_closed(v);
10394                self.heap
10395                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10396            }
10397        }
10398    }
10399
10400    /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10401    /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10402    /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10403    /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10404    /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10405    /// else `"function <src:line_defined>"` for an anonymous Lua function.
10406    /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10407    /// Walks the coroutine's saved frames and prepends a synthetic C-level
10408    /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10409    /// (its `resume_at` marker is set). `level` skips entries from the top
10410    /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10411    /// frame; etc.). db.lua :764-:768 sample several levels.
10412    pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10413        use crate::runtime::CoroStatus;
10414        const LEVELS1: usize = 10;
10415        const LEVELS2: usize = 11;
10416        #[derive(Clone, Copy)]
10417        enum VFrame<'a> {
10418            Lua(&'a crate::runtime::function::Frame),
10419            CPcall,
10420            CXpcall,
10421            CYield,
10422            /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10423            /// call collapsed into the next Lua frame down the chain.
10424            Tail,
10425        }
10426        let v51 = self.version <= LuaVersion::Lua51;
10427        let mut visible: Vec<VFrame<'_>> = Vec::new();
10428        // PUC's level 0 entry on a suspended coroutine is the C call where it
10429        // paused — `coroutine.yield` for a yielded thread.
10430        if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10431            visible.push(VFrame::CYield);
10432        }
10433        for cf in co.frames.iter().rev() {
10434            match cf {
10435                CallFrame::Lua(f) => {
10436                    visible.push(VFrame::Lua(f));
10437                    if v51 {
10438                        for _ in 0..f.tailcalls {
10439                            visible.push(VFrame::Tail);
10440                        }
10441                    }
10442                }
10443                CallFrame::Cont(nc) => match nc.kind {
10444                    ContKind::Pcall => visible.push(VFrame::CPcall),
10445                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10446                    _ => {}
10447                },
10448            }
10449        }
10450        if level < 0 {
10451            level = 0;
10452        }
10453        if (level as usize) >= visible.len() {
10454            return Vec::new();
10455        }
10456        let visible = &visible[level as usize..];
10457        let total = visible.len();
10458        let mut out = Vec::new();
10459        // To name a Lua frame, PUC consults the caller's OP_CALL via
10460        // getobjname: find the index `fi` of the current frame in co.frames,
10461        // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10462        let coro_frame_name = |frames: &[CallFrame],
10463                               target: &crate::runtime::function::Frame|
10464         -> Option<(&'static str, String)> {
10465            let fi = frames
10466                .iter()
10467                .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10468            if fi == 0 || target.from_c {
10469                return None;
10470            }
10471            let caller = frames[fi - 1].lua()?;
10472            let p = &caller.closure.proto;
10473            let call_pc = (caller.pc as usize).checked_sub(1)?;
10474            let instr = *p.code.get(call_pc)?;
10475            match instr.op() {
10476                Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10477                Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10478                _ => None,
10479            }
10480        };
10481        let frames = &co.frames;
10482        let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10483            VFrame::Lua(f) => {
10484                let proto = f.closure.proto;
10485                let src = chunk_display_name(proto.source.as_ptr());
10486                let pc = (f.pc as usize)
10487                    .saturating_sub(1)
10488                    .min(proto.lines.len().saturating_sub(1));
10489                let line = proto.lines.get(pc).copied().unwrap_or(0);
10490                out.extend_from_slice(b"\n\t");
10491                out.extend_from_slice(src);
10492                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10493                if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10494                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10495                } else if proto.line_defined == 0 {
10496                    out.extend_from_slice(b"main chunk");
10497                } else {
10498                    out.extend_from_slice(
10499                        format!(
10500                            "function <{}:{}>",
10501                            String::from_utf8_lossy(src),
10502                            proto.line_defined
10503                        )
10504                        .as_bytes(),
10505                    );
10506                }
10507            }
10508            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10509            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10510            VFrame::CYield => {
10511                // PUC `pushglobalfuncname` reports `yield` as
10512                // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10513                // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10514                // `'yield'` (5.5 :841).
10515                let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10516                if qualified {
10517                    out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10518                } else {
10519                    out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10520                }
10521            }
10522            VFrame::Tail => {
10523                // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10524                // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10525                // :403 asserts these appear once per collapsed tail call.
10526                out.extend_from_slice(b"\n\t(...tail calls...)");
10527            }
10528        };
10529        if total <= LEVELS1 + LEVELS2 {
10530            for &v in visible {
10531                emit(&mut out, v);
10532            }
10533        } else {
10534            for &v in &visible[..LEVELS1] {
10535                emit(&mut out, v);
10536            }
10537            let skip = total - LEVELS1 - LEVELS2;
10538            out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10539            for &v in &visible[total - LEVELS2..] {
10540                emit(&mut out, v);
10541            }
10542        }
10543        out
10544    }
10545
10546    pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10547        // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10548        // (11) bottom frames; if there are more, the middle is collapsed into
10549        // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10550        // overflow traceback would balloon to tens of megabytes (errors.lua's
10551        // stack-overflow test ran string.gmatch over the resulting buffer).
10552        const LEVELS1: usize = 10;
10553        const LEVELS2: usize = 11;
10554        // Collect visible frames in top-down order (deepest first). Both Lua
10555        // activations and pcall/xpcall continuations (which stand in for a
10556        // C-level pcall on the stack) are visible; PUC's traceback enumerates
10557        // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10558        #[derive(Clone, Copy)]
10559        enum VFrame {
10560            Lua(usize),
10561            CPcall,
10562            CXpcall,
10563        }
10564        let mut visible: Vec<VFrame> = Vec::new();
10565        for (fi, cf) in self.frames.iter().enumerate().rev() {
10566            match cf {
10567                CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10568                CallFrame::Cont(nc) => match nc.kind {
10569                    ContKind::Pcall => visible.push(VFrame::CPcall),
10570                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10571                    _ => {}
10572                },
10573            }
10574        }
10575        // PUC `luaL_traceback` starts enumerating at the given `level` (in
10576        // terms of L1's CallInfo chain). For the running-thread case the C
10577        // frame for debug.traceback itself is level 0 and luna's `visible`
10578        // doesn't include it — so level=1 (PUC default) means "emit from the
10579        // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10580        // the top. level<=0 emits nothing extra here (d_traceback handles the
10581        // "[C]: in function 'traceback'" prefix for level==0 separately).
10582        let skip = (level - 1).max(0) as usize;
10583        if skip >= visible.len() {
10584            return Vec::new();
10585        }
10586        let visible = &visible[skip..];
10587        let total = visible.len();
10588        let mut out = Vec::new();
10589        let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10590            VFrame::Lua(fi) => {
10591                let f = this.frames[fi].lua().expect("Lua frame");
10592                let proto = f.closure.proto;
10593                let src = chunk_display_name(proto.source.as_ptr());
10594                let pc = (f.pc as usize)
10595                    .saturating_sub(1)
10596                    .min(proto.lines.len().saturating_sub(1));
10597                let line = proto.lines.get(pc).copied().unwrap_or(0);
10598                out.extend_from_slice(b"\n\t");
10599                out.extend_from_slice(src);
10600                out.extend_from_slice(format!(":{line}: in ").as_bytes());
10601                if let Some((namewhat, name)) = this.frame_name(fi) {
10602                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10603                } else if proto.line_defined == 0 {
10604                    out.extend_from_slice(b"main chunk");
10605                } else {
10606                    out.extend_from_slice(
10607                        format!(
10608                            "function <{}:{}>",
10609                            String::from_utf8_lossy(src),
10610                            proto.line_defined
10611                        )
10612                        .as_bytes(),
10613                    );
10614                }
10615            }
10616            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10617            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10618        };
10619        if total <= LEVELS1 + LEVELS2 {
10620            for &v in visible {
10621                emit_frame(&mut out, v, self);
10622            }
10623        } else {
10624            for &v in &visible[..LEVELS1] {
10625                emit_frame(&mut out, v, self);
10626            }
10627            let dropped = total - LEVELS1 - LEVELS2;
10628            out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10629            for &v in &visible[total - LEVELS2..] {
10630                emit_frame(&mut out, v, self);
10631            }
10632        }
10633        out
10634    }
10635}
10636
10637// ────────────────────────────────────────────────────────────────────
10638// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10639//
10640// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10641// trace-meta section after `vm.load`, resolves each entry's
10642// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10643// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10644// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10645// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10646// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10647// mcode without further plumbing — same code path the runtime JIT
10648// uses.
10649//
10650// Why a separate impl block: keeps the AOT API surface (one fn) easy
10651// to locate when grep'ing for `install_aot_trace`, without dragging
10652// the 8500-line `impl Vm` block above.
10653// ────────────────────────────────────────────────────────────────────
10654
10655impl Vm {
10656    /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10657    /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10658    /// fires it at the trace's `head_pc`. This is the runtime install
10659    /// API the deploy-side `luna-runtime-helpers` resolver calls once
10660    /// per AOT-emitted trace meta entry, after looking up `proto` by
10661    /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10662    ///
10663    /// # What this does
10664    ///
10665    /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10666    /// The trace's `entry` fn ptr must already point at runnable
10667    /// machine code (the AOT linker resolved the symbol at link time;
10668    /// the deploy resolver passes the address verbatim).
10669    ///
10670    /// # What this does NOT do
10671    ///
10672    /// - **No deduplication.** Calling twice with the same `head_pc`
10673    ///   pushes two entries; the dispatcher's `find` will pick the
10674    ///   first match. The deploy resolver is responsible for not
10675    ///   double-installing.
10676    /// - **No invalidation of the runtime JIT cache.** If the runtime
10677    ///   JIT later records + compiles a trace for the same
10678    ///   `(proto, head_pc)`, both coexist on `proto.traces` and the
10679    ///   dispatcher's `find` picks whichever appears first. AOT
10680    ///   traces install before any runtime recording is possible
10681    ///   (resolver runs before `vm.load` returns its first closure),
10682    ///   so AOT traces win the race for the same site.
10683    /// - **No coverage gating.** AOT traces are trusted by
10684    ///   construction — they were validated at compile time. Setting
10685    ///   `dispatchable: false` on the input would silently disable
10686    ///   dispatch; the caller controls that flag.
10687    ///
10688    /// # Safety / soundness
10689    ///
10690    /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10691    /// machine code). Soundness contract:
10692    ///
10693    /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10694    ///   In the AOT-binary deploy shape this is trivially satisfied —
10695    ///   the fn lives in the binary's `.text`.
10696    /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10697    ///   what the trace's IR actually compiled against; the dispatcher
10698    ///   uses them to marshal `reg_state` in and out without further
10699    ///   validation. A mismatch corrupts vm.stack.
10700    ///
10701    /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10702    /// invariants hold; this fn is a plain push — no validation that
10703    /// would slow the dispatcher's hot path either.
10704    pub fn install_aot_trace(
10705        &mut self,
10706        proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10707        trace: crate::jit::trace::CompiledTrace,
10708    ) {
10709        let _ = self; // resolver passes &mut Vm for symmetry with future
10710        // pending-install + hash-walk variants; nothing on `self` to
10711        // mutate today because the install target lives on the Proto.
10712        proto.traces.borrow_mut().push(TArc::new(trace));
10713    }
10714
10715    /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10716    /// reachable from `root` and return `(proto, stable_hash)` pairs
10717    /// for every Proto found. Used by the deploy-side resolver to
10718    /// match AOT-emitted `proto_hash` keys against the freshly
10719    /// `undump`'d chunk's protos.
10720    ///
10721    /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10722    /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10723    /// multiple nested closures (rare; the cache field would catch
10724    /// the closure-side dedup, not the Proto side) is reported once.
10725    ///
10726    /// # Why on `&Vm` and not a free fn
10727    ///
10728    /// Keeps the AOT install API discoverable on the Vm surface —
10729    /// `vm.collect_proto_hashes(root)` reads naturally next to
10730    /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10731    /// any Vm field, so `&self` (read-only) is enough.
10732    pub fn collect_proto_hashes(
10733        &self,
10734        root: crate::runtime::Gc<crate::runtime::function::Proto>,
10735    ) -> Vec<(
10736        crate::runtime::Gc<crate::runtime::function::Proto>,
10737        [u8; 16],
10738    )> {
10739        let _ = self;
10740        let mut out = Vec::new();
10741        let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10742            std::collections::HashSet::new();
10743        let mut queue: std::collections::VecDeque<
10744            crate::runtime::Gc<crate::runtime::function::Proto>,
10745        > = std::collections::VecDeque::new();
10746        queue.push_back(root);
10747        while let Some(p) = queue.pop_front() {
10748            let key = p.as_ptr() as *const _;
10749            if !seen.insert(key) {
10750                continue;
10751            }
10752            out.push((p, p.stable_hash()));
10753            for &child in p.protos.iter() {
10754                queue.push_back(child);
10755            }
10756        }
10757        out
10758    }
10759}