Skip to main content

luna_core/vm/
exec.rs

1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::numeric::{self, Num};
13use crate::runtime::heap::GcHeader;
14use crate::runtime::{
15    AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
16    MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
17};
18use crate::version::LuaVersion;
19use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
20use crate::vm::error::LuaError;
21use crate::vm::isa::{Inst, Op};
22
23/// A Lua virtual machine: one OS thread's worth of Lua state.
24///
25/// # Threading model
26///
27/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
28/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
29/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
30/// design. Embedders that want concurrency spawn one `Vm` per OS
31/// thread (or per single-thread Tokio worker) and exchange data via
32/// channels. See [`docs/threading.md`](../../docs/threading.md) for
33/// canonical embedding patterns including Tokio `current_thread`,
34/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
35///
36/// The constraint is enforced at compile time:
37///
38/// ```compile_fail
39/// fn must_be_send<T: Send>() {}
40/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
41/// ```
42///
43/// A future `feature = "send"` (post-v1.1 sprint) will gate an
44/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
45/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
46pub struct Vm {
47    /// The GC heap owned by this VM. Embedders normally interact via the
48    /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
49    /// the heap directly.
50    pub heap: Heap,
51    stack: Vec<Value>,
52    frames: Vec<CallFrame>,
53    /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
54    /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
55    /// helpers (debug-asserted on use). NOT consumed by readers yet;
56    /// week 1 is pure scaffold. Week 2-N migrations replace readers
57    /// one slice at a time, then remove `frames: Vec<CallFrame>` in
58    /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
59    frames_top: u32,
60    /// open upvalues, sorted ascending by stack slot
61    open_upvals: Vec<(u32, Gc<Upvalue>)>,
62    /// to-be-closed slots, ascending
63    tbc: Vec<u32>,
64    /// logical stack top for multi-result sequences
65    pub(crate) top: u32,
66    globals: Gc<Table>,
67    /// shared metatable for all strings (populated by the string lib, P04)
68    /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
69    /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
70    /// own. Settable via debug.setmetatable.
71    type_mt: [Option<Gc<Table>>; 5],
72    /// pre-interned metamethod event names, indexed by `Mm`
73    mm_names: Vec<Gc<crate::runtime::LuaStr>>,
74    /// native↔Lua nesting depth (PUC C-stack guard analogue)
75    c_depth: u32,
76    /// number of live pcall/xpcall continuation frames on the running thread
77    /// (PUC counts these against nCcalls). Bounds protected-call recursion the
78    /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
79    /// with the coroutine context, since continuations survive a yield.
80    pcall_depth: u32,
81    /// number of non-yieldable C calls in flight on the running thread (PUC's
82    /// `L->nny`). A library callback that runs via synchronous Rust recursion
83    /// (sort comparator, gsub replacement) cannot be continued across a yield,
84    /// so it bumps this for its duration; `coroutine.yield` inside hits the
85    /// C-call boundary and errors. Always 0 at a suspend point (a yield can
86    /// never cross such a call), so it needs no per-thread save/restore.
87    nny: u32,
88    /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
89    /// stack-overflow that surfaces *inside* the handler is reported as PUC's
90    /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
91    /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
92    /// loop)` then matches. PUC tracks this via the soft-cap window
93    /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
94    /// scope explicitly.
95    msgh_depth: u32,
96    /// set by a coroutine closing itself (`coroutine.close()` on the running
97    /// thread): the to-be-closed handlers have already run; the thread must now
98    /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
99    /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
100    /// unwind, so a protecting pcall cannot catch it) the termination.
101    terminating: Option<Option<Value>>,
102    /// xoshiro256** state (math.random)
103    rng: [u64; 4],
104    /// VM creation time (os.clock)
105    started: std::time::Instant,
106    version: LuaVersion,
107    /// error object being threaded through a chain of __close handlers; a GC
108    /// root for the duration (a handler may trigger collection)
109    closing_err: Option<Value>,
110    /// the coroutine whose context is currently live in the fields above;
111    /// `None` while the main thread runs (P05)
112    current: Option<Gc<crate::runtime::Coro>>,
113    /// the main thread's saved execution context while a coroutine runs
114    main_ctx: Option<SavedCtx>,
115    /// set by `coroutine.yield` to suspend the running coroutine: the yielded
116    /// values plus the slot/result-count needed to finish the yielding call on
117    /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
118    yielding: Option<(Vec<Value>, u32, i32)>,
119    /// results expected by the in-flight native call (so `yield` knows how many
120    /// values its call site wants when it suspends)
121    native_nresults: i32,
122    /// identity object for the main thread, returned by `coroutine.running`
123    /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
124    main_coro: Option<Gc<Coro>>,
125    /// `collectgarbage` mode name ("incremental"/"generational"). The collector
126    /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
127    /// switches report the previous one, as PUC does.
128    gc_mode: &'static str,
129    /// the live-register boundary of the running thread for GC rooting (PUC's
130    /// `L->top`): set precisely at each GC safe point so freed temporary
131    /// registers above it are not rooted. Without this the collector roots the
132    /// whole stack window, pinning weak-table values stranded in stale temps
133    /// (e.g. closure.lua's `while x[1]` GC-detection loop).
134    pub(crate) gc_top: u32,
135    /// `collectgarbage("param", name [,value])` pacing parameters. The collector
136    /// is still stop-the-world, so these are stored/returned for API fidelity
137    /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
138    /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
139    gc_pause: i64,
140    gc_stepmul: i64,
141    gc_stepsize: i64,
142    /// true while `__gc` finalizers are being run, so a finalizer that calls
143    /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
144    /// `collectgarbage` yields fail).
145    gc_finalizing: bool,
146    /// C ABI scratch (`capi` module): the host-visible value stack that C
147    /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
148    /// Kept here (instead of in a separate `LuaState` wrapper) so the
149    /// trampoline that bridges to a `LuaCFunction` can safely cast the
150    /// Vm pointer it already holds to the public `*mut LuaState` type
151    /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
152    pub capi_stack: Vec<crate::runtime::Value>,
153    /// Pinned CString backing the pointer last returned by `lua_tostring`;
154    /// valid until the next `lua_tostring` on the same Vm.
155    pub capi_cstr_pin: Option<std::ffi::CString>,
156    /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
157    /// concatenate across continuation calls until a non-`tocont` call
158    /// flushes; the default warnf recognises `@on`/`@off` control messages
159    /// and starts disabled. luna's `emit_warn` mirrors the default warnf
160    /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
161    /// keep the older raise semantics).
162    pub(crate) warn_state: WarnState,
163    pub(crate) warn_buf: Vec<u8>,
164    /// P09 embedding cooperative budget: a per-Vm tick counter that the run
165    /// loop decrements once per dispatch turn. When it hits zero the loop
166    /// raises a catchable "instruction budget exceeded" error so the embedder
167    /// can yield control back to its caller (short-script eval, game
168    /// frame budgets). `None` = unbounded; reset on each call via
169    /// `set_instr_budget`.
170    pub(crate) instr_budget: Option<i64>,
171    // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
172    // `self.jit` below + `crate::vm::jit_state` for field docs.
173    // (Was: jit_enabled here.)
174    // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
175    // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
176    // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
177    // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
178    // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
179    // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
180    // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
181    // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
182    // materialize_emit,closure_emit}_count — all moved to JitState.
183    /// Bytecode-loading gate. Default `true`. Sandbox embedders should
184    /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
185    /// precompiled chunks (which bypass the parser's depth / opcode
186    /// limits). When `false`, the loader rejects any source whose first
187    /// byte is the bytecode signature `\27` ("`\27Lua`").
188    pub(crate) bytecode_loading: bool,
189    /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
190    /// a strictly larger trust surface than luna's own dump format
191    /// (third-party toolchain bugs, malformed chunks, unknown opcode
192    /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
193    /// through the per-dialect PUC translators in `crate::vm::dump::puc`
194    /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
195    /// Embedder toggles via `set_puc_bytecode_loading`.
196    pub(crate) puc_bytecode_loading: bool,
197    /// In-process log of fully-emitted warnings (each entry = one flushed
198    /// message, sans the "Lua warning: " prefix and trailing newline). Lets
199    /// tests assert what was warned without scraping stderr.
200    pub(crate) warn_log: Vec<Vec<u8>>,
201    /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
202    /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
203    /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
204    /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
205    /// shape stub for db.lua :328; if other registry-keyed APIs land later
206    /// they can share this table.
207    pub(crate) registry: Option<Gc<Table>>,
208    /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
209    /// registry entry); attached to every file userdata the io library makes
210    pub(crate) file_mt: Option<Gc<Table>>,
211    /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
212    pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
213    pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
214    /// the running thread's debug hook state (`debug.sethook`); per-thread,
215    /// swapped with the execution context on a coroutine resume/yield
216    pub(crate) hook: HookState,
217    /// true while the hook itself runs, so its own execution fires no events
218    /// (PUC clears the mask for the duration)
219    pub(crate) in_hook: bool,
220    /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
221    /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
222    /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
223    /// accumulate the count instead of capping at 1.
224    pub(crate) pending_tailcalls: u32,
225    /// Name of the C native that just propagated an error (captured before
226    /// the native is popped from `running_natives`). Lets a dying coroutine
227    /// preserve `[C]: in function '<name>'` at the top of its traceback
228    /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
229    /// luna's native frames are off-stack so we stash the name explicitly.
230    pub(crate) errored_native: Option<String>,
231    /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
232    /// (relative to the activation's func slot) and the number transferred.
233    /// Set just before firing a call/return hook, read by `getinfo("r")`.
234    pub(crate) hook_ftransfer: u16,
235    pub(crate) hook_ntransfer: u16,
236    /// metamethod event tag (e.g. "close") to attach to the next Lua frame
237    /// pushed by `push_frame`; `close_slots` sets this before calling a
238    /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
239    /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
240    pending_tm: Option<&'static str>,
241    /// `true` when the next `push_frame` is the user hook function itself,
242    /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
243    /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
244    pending_is_hook: bool,
245    /// traceback snapshot taken at the error point (the first `unwind` entry
246    /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
247    /// the failed frames are popped — can still see the error point's stack
248    /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
249    /// stack intact; we approximate by snapshotting the string and letting
250    /// `d_traceback` consume it. Cleared on Cont catch and at host-level
251    /// `call_value` entry (`public_call_depth == 0`).
252    pub(crate) error_traceback: Option<Vec<u8>>,
253    /// nesting depth of public `call_value` entries (host vs. internal). The
254    /// outermost entry (depth 0) resets per-error state (`error_traceback`);
255    /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
256    public_call_depth: u32,
257    /// stack of native (`Value::Native`) closures currently running on the
258    /// Rust call stack. `begin_call` pushes the closure before invoking
259    /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
260    /// native call (PUC `ar.name == NULL` at level 0 because the level-0
261    /// caller is C, not Lua) and qualify the running function's name via
262    /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
263    pub(crate) running_natives: Vec<Gc<NativeClosure>>,
264    /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
265    /// the native's argument-window head and width, so `debug.getlocal`
266    /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
267    pub(crate) running_native_slots: Vec<(u32, u32)>,
268    // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
269    // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
270    // trace_compiler — all moved to JitState. See `jit` below.
271    /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
272    /// when `chunk_compiler` / `trace_compiler` are
273    /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
274    ///
275    /// `#[doc(hidden)] pub` so the `luna` crate's
276    /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
277    /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
278    /// field). Not part of the embedder-facing API surface.
279    #[doc(hidden)]
280    pub jit: crate::vm::jit_state::JitState,
281
282    /// B12 host roots — append-only `Vec<Value>` traced as an extra
283    /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
284    /// `LuaRoot`) hold indices into this vector so the underlying
285    /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
286    ///
287    /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
288    /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
289    /// the trade-offs between `Drop` plumbing and append-only memory
290    /// growth have a richer ergonomics envelope to live in.
291    pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
292    /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
293    /// back if non-empty, else extends `host_roots`. Generation
294    /// overflow at `u32::MAX` retires the slot (NOT pushed here).
295    pub(crate) host_roots_free: Vec<u32>,
296
297    /// v1.3 Phase ML — MacroLua compile-time macro registry.
298    /// Pre-populated with built-in macros (`@quote` / `@unquote` /
299    /// `@if` / `@gensym`) at construction time when `version ==
300    /// LuaVersion::MacroLua`; embedders register custom macros via
301    /// [`Vm::define_macro`]. The expander runs once per `load()` call
302    /// between lexing and parsing (only when `is_macro_lua()`).
303    pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
304
305    /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
306    /// by `TypeId::of::<T>()` for embedder types implementing
307    /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
308    /// [`Vm::register_userdata`]; metatables are pinned via
309    /// [`Vm::pin_host`] at registration time so the entry's
310    /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
311    pub(crate) userdata_metatables:
312        std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
313
314    /// B6 — classification of the most recent error raised on this Vm.
315    /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
316    /// at well-known sites (syntax errors, instr-budget trips, native
317    /// callback errors, type errors).
318    pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
319
320    /// B6 — `(source_name, line)` of the most recent error. Set by the
321    /// dispatcher / lexer / parser; cleared when a new call_value
322    /// enters cleanly.
323    pub(crate) last_error_source: Option<(String, u32)>,
324
325    /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
326    /// the dispatcher hot loop yields cooperatively (sets
327    /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
328    /// to `EvalFuture::poll`) instead of returning a real
329    /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
330    /// for the duration of the future; restored to `false` on
331    /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
332    /// leave it `false` so v1.0 behavior is preserved exactly.
333    pub(crate) async_mode: bool,
334
335    /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
336    /// before driving a slice. The dispatcher itself does not call it
337    /// (the future's poll loop does `wake_by_ref` after observing
338    /// `BudgetExhausted`), but storing the waker keeps the door open
339    /// for Stage 2 async natives to wake the host directly from a
340    /// helper future.
341    pub(crate) async_waker: Option<std::task::Waker>,
342
343    /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
344    /// `instr_budget` at the start of each `EvalFuture::poll` slice.
345    /// Default 10_000 (RFC §D5). Tunable via
346    /// [`Vm::set_async_slice`].
347    pub(crate) async_slice_size: i64,
348
349    /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
350    /// budget exhaustion fires; checked by `exec_with` (so the
351    /// sentinel propagates without `unwind` running, mirroring
352    /// `yielding.is_some()`) and by `call_value_impl` (so the call
353    /// frames survive for the next poll). Cleared by `drive_one`
354    /// after translating it to `DispatchOutcome::BudgetExhausted`.
355    pub(crate) host_yield_pending: bool,
356
357    /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
358    /// when an async-marked [`NativeClosure`] is invoked under
359    /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
360    /// mechanism as `host_yield_pending` — see `exec_with` +
361    /// `call_value_impl`), stashes the in-flight future +
362    /// post-completion context here, and surfaces them to
363    /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
364    /// once the future is moved out into a
365    /// `DispatchOutcome::AsyncNativeAwaiting`.
366    pub(crate) pending_async_native_fut:
367        Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
368
369    /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
370    /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
371    /// commit the future's eventual `Ok(nret)` back into the calling
372    /// frame's expected result slots. Recorded by the dispatcher;
373    /// consumed by [`Vm::commit_async_native_result`] after the
374    /// future resolves.
375    pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
376}
377
378/// v1.1 B10 Stage 2 — call-site context an in-flight async native
379/// needs preserved across the cooperative-yield boundary.
380///
381/// The dispatcher records this when it routes a `NativeClosure` with
382/// `is_async == true` through the cooperative path; `EvalFuture::poll`
383/// hands it back to [`Vm::commit_async_native_result`] once the
384/// awaited future resolves so `finish_results` (and the post-call GC
385/// checkpoint) can run as if the native had completed synchronously.
386#[derive(Clone, Copy)]
387pub(crate) struct AsyncNativeCallCtx {
388    pub func_slot: u32,
389    /// Recorded for parity with the sync native-call path's
390    /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
391    /// hook firing + traceback shaping. Not yet read in Stage 2.
392    #[allow(dead_code)]
393    pub nargs: u32,
394    pub nresults: i32,
395    /// Recorded for Stage 3+ traceback + GC-root-window auditing.
396    /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
397    /// unread today; carried so an Stage 3 audit can confirm the
398    /// pre-suspend root window matches the post-resume one.
399    #[allow(dead_code)]
400    pub gc_top: u32,
401}
402
403/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
404/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
405#[derive(Clone, Copy, Default)]
406pub struct HookState {
407    /// the hook function (`None` when no hook is installed)
408    pub func: Option<Value>,
409    /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
410    /// (Rust first); both can be installed simultaneously, but most
411    /// embedders pick one.
412    pub rust_func: Option<RustDebugHook>,
413    /// LUA_MASKCALL — fire on function entry
414    pub call: bool,
415    /// LUA_MASKRET — fire on function return
416    pub ret: bool,
417    /// LUA_MASKLINE — fire on source-line change
418    pub line: bool,
419    /// LUA_MASKCOUNT — fire every `count_base` instructions
420    pub count: bool,
421    /// instruction count between count events (PUC basehookcount)
422    pub count_base: i64,
423    /// instructions left until the next count event (PUC hookcount)
424    pub count_left: i64,
425}
426
427/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
428/// classified event. The callback runs synchronously in the
429/// dispatcher; the hook flag (`in_hook`) is set for its duration so
430/// hook recursion is suppressed.
431pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
432
433/// Classified debug event delivered to a [`RustDebugHook`].
434#[derive(Clone, Copy, Debug, PartialEq, Eq)]
435pub enum RustHookEvent {
436    /// Function entry (`hook_call` analogue).
437    Call,
438    /// Function return (`hook_return` analogue).
439    Return,
440    /// Tail call entry (PUC 5.2+ separates this from a plain Call).
441    TailCall,
442    /// Source-line change (the `u32` is the 1-based line number).
443    Line(u32),
444    /// Instruction count event (fires every `count_base` instructions).
445    Count,
446}
447
448/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
449/// to multiple event categories with a single hook installation.
450pub const HOOK_MASK_CALL: u32 = 1;
451/// Subscribe to function-return events.
452pub const HOOK_MASK_RETURN: u32 = 2;
453/// Subscribe to line-change events.
454pub const HOOK_MASK_LINE: u32 = 4;
455/// Subscribe to instruction-count events.
456pub const HOOK_MASK_COUNT: u32 = 8;
457
458/// A thread's swapped-out execution context (PUC per-thread stack state).
459struct SavedCtx {
460    stack: Vec<Value>,
461    frames: Vec<CallFrame>,
462    open_upvals: Vec<(u32, Gc<Upvalue>)>,
463    tbc: Vec<u32>,
464    top: u32,
465    pcall_depth: u32,
466    hook: HookState,
467    /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
468    /// the rest of the suspended state so each thread can keep its own
469    /// `setfenv(0, env)` rewire without the swap leaking into another
470    /// thread (5.1 closure.lua :177).
471    globals: Gc<Table>,
472}
473
474/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
475enum Unwound {
476    /// caught by a pcall/xpcall continuation; resume running its caller
477    Caught,
478    /// caught by a continuation that was the entry-level activation; these are
479    /// the call's (wrapped) results
480    CaughtReturn(Vec<Value>),
481    /// no protecting continuation up to `entry_depth`; propagate the error
482    Propagated(LuaError),
483}
484
485/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
486/// synthetic C frame for a call_value boundary.
487pub(crate) enum DbgKind {
488    Lua(usize),
489    /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
490    /// used to name the native via its invoking call instruction.
491    C(usize),
492    /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
493    /// caller's activation, so `debug.getinfo(level)` at this slot returns
494    /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
495    /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
496    /// both shapes). The index points at the *tail-called* frame whose
497    /// `is_tail` flag induced this synthetic level.
498    Tail(#[allow(dead_code)] usize),
499}
500
501/// Outcome of an index/newindex/comparison fast path: either a directly
502/// computed result, or a metamethod (with the receiver it resolved against) the
503/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
504enum MmOut {
505    /// index → the looked-up value; newindex → done (raw set performed);
506    /// comparison → the boolean result already known
507    Done(Value),
508    /// a metamethod to call; `recv` is the chain element it was found on (the
509    /// extra args — key / value — are supplied by the caller)
510    Mm { func: Value, recv: Value },
511    /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
512    /// carries `__le` — `op_compare` swaps the args and negates the result.
513    /// Lives separate from `Mm` so the synth path can stay yieldable without
514    /// every other Mm caller learning a swap flag they would never set.
515    CompareSynth { func: Value },
516}
517
518/// Metamethod events; discriminants index `Vm::mm_names`.
519#[derive(Clone, Copy, PartialEq, Eq)]
520#[repr(usize)]
521pub(crate) enum Mm {
522    Index,
523    NewIndex,
524    Call,
525    ToString,
526    Metatable,
527    Name,
528    Eq,
529    Lt,
530    Le,
531    Concat,
532    Len,
533    Add,
534    Sub,
535    Mul,
536    Div,
537    Mod,
538    Pow,
539    IDiv,
540    BAnd,
541    BOr,
542    BXor,
543    Shl,
544    Shr,
545    Unm,
546    BNot,
547    Close,
548    Gc,
549    Pairs,
550}
551
552const MM_NAMES: [&str; 28] = [
553    "__index",
554    "__newindex",
555    "__call",
556    "__tostring",
557    "__metatable",
558    "__name",
559    "__eq",
560    "__lt",
561    "__le",
562    "__concat",
563    "__len",
564    "__add",
565    "__sub",
566    "__mul",
567    "__div",
568    "__mod",
569    "__pow",
570    "__idiv",
571    "__band",
572    "__bor",
573    "__bxor",
574    "__shl",
575    "__shr",
576    "__unm",
577    "__bnot",
578    "__close",
579    "__gc",
580    "__pairs",
581];
582
583/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
584/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
585///
586/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
587/// every event *except* `__gc` (`funcnamefromcall` returns the literal
588/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
589/// `getstr(tmname[tm]) + 2` to skip the `__`).
590fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
591    if version <= LuaVersion::Lua53 {
592        format!("__{tm}")
593    } else if tm == "gc" {
594        "__gc".to_string()
595    } else {
596        tm.to_string()
597    }
598}
599
600/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
601/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
602fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
603    use crate::vm::isa::Op;
604    Some(match op {
605        Op::Add => "add",
606        Op::Sub => "sub",
607        Op::Mul => "mul",
608        Op::Div => "div",
609        Op::Mod => "mod",
610        Op::Pow => "pow",
611        Op::IDiv => "idiv",
612        Op::BAnd => "band",
613        Op::BOr => "bor",
614        Op::BXor => "bxor",
615        Op::Shl => "shl",
616        Op::Shr => "shr",
617        Op::Unm => "unm",
618        Op::BNot => "bnot",
619        Op::Concat => "concat",
620        Op::Len => "len",
621        Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
622        Op::SetField | Op::SetTable | Op::SetI => "newindex",
623        Op::Eq | Op::EqK => "eq",
624        Op::Lt => "lt",
625        Op::Le => "le",
626        _ => return None,
627    })
628}
629
630/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
631const MAX_TAG_LOOP: u32 = 2000;
632/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
633/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
634/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
635const MAX_CCMT: u32 = 200;
636/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
637const MAX_C_DEPTH: u32 = 200;
638/// luna's engine-level VM stack cap (used by call-site overflow checks).
639/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
640/// little headroom above any single library push.
641const MAX_LUA_STACK: u32 = 1 << 20;
642/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
643/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
644/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
645/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
646/// to fail because the few slots already consumed in the coroutine push
647/// the effective cap below lim-10.
648const PUC_MAXSTACK: i64 = 1_000_000;
649
650/// PUC 5.4+ default warnf state. The base library's `warn` function flips
651/// between `Off` and `On` via the `@on` / `@off` control messages; any other
652/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
653#[derive(Clone, Copy, PartialEq, Eq, Debug)]
654pub enum WarnState {
655    /// `warn` calls are silently dropped (default after `warn("@off")`).
656    Off,
657    /// `warn` calls are delivered to stderr (after `warn("@on")`).
658    On,
659}
660
661/// Best-effort extraction of a textual message from a `catch_unwind` payload.
662/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
663/// else degrades to `"<non-string panic>"`. Used by the native-call
664/// catch_unwind to fold the panic into a Lua error.
665fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
666    if let Some(s) = payload.downcast_ref::<String>() {
667        return s.clone();
668    }
669    if let Some(s) = payload.downcast_ref::<&'static str>() {
670        return (*s).to_string();
671    }
672    "<non-string panic>".to_string()
673}
674
675/// Combined error type returned by [`Vm::eval`] and friends — either the
676/// chunk failed to parse / compile, or it raised at runtime.
677#[derive(Debug)]
678pub enum Error {
679    /// Parse or compile failure.
680    Syntax(SyntaxError),
681    /// Runtime error raised during execution.
682    Runtime(LuaError),
683}
684
685impl From<SyntaxError> for Error {
686    fn from(e: SyntaxError) -> Error {
687        Error::Syntax(e)
688    }
689}
690
691impl From<LuaError> for Error {
692    fn from(e: LuaError) -> Error {
693        Error::Runtime(e)
694    }
695}
696
697impl Drop for Vm {
698    fn drop(&mut self) {
699        // state close: run `__gc` for every still-registered finalizable before
700        // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
701        // single pass — objects created by a closing finalizer are not
702        // re-finalized (they go to the heap's free list directly).
703        self.heap.queue_all_finalizers();
704        self.run_finalizers();
705    }
706}
707
708// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
709// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
710// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
711// as split borrows, allowing other `&mut self.field` reads inside the
712// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
713//
714// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
715// Week 2 begins migrating hot-path readers (materialize_frames helper)
716// to consume `frames_top` and a flat array in place of the Vec.
717#[inline(always)]
718fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
719    frames.push(cf);
720    // Shadow maintenance is debug-only: release builds skip the
721    // increment + assertion entirely. The shadow's purpose in Week 1
722    // is to VERIFY the assumed invariant (frames_top == frames.len())
723    // across all push/pop sites; once Week 2+ migrates readers to
724    // consume the shadow, release will run the increment unconditionally.
725    #[cfg(debug_assertions)]
726    {
727        *frames_top += 1;
728        debug_assert_eq!(
729            *frames_top as usize,
730            frames.len(),
731            "P17-D frames_top out of sync after push",
732        );
733    }
734    #[cfg(not(debug_assertions))]
735    let _ = frames_top;
736}
737
738#[inline(always)]
739fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
740    let r = frames.pop();
741    #[cfg(debug_assertions)]
742    {
743        if r.is_some() {
744            *frames_top = frames_top.saturating_sub(1);
745        }
746        debug_assert_eq!(
747            *frames_top as usize,
748            frames.len(),
749            "P17-D frames_top out of sync after pop",
750        );
751    }
752    #[cfg(not(debug_assertions))]
753    let _ = frames_top;
754    r
755}
756
757/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
758/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
759/// non-empty value. The result is cached in a `OnceLock` so the
760/// dispatcher's hot path pays a single atomic load per process. Off
761/// by default — production deploys don't bleed diagnostic prints.
762fn jit_probe_enabled() -> bool {
763    static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
764    *PROBE_ON.get_or_init(|| {
765        std::env::var("LUNA_AOT_PROBE")
766            .ok()
767            .filter(|v| !v.is_empty())
768            .is_some()
769    })
770}
771
772impl Vm {
773    /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
774    /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
775    /// the Vec replacement to keep the shadow valid.
776    #[inline(always)]
777    fn frames_resync(&mut self) {
778        // Debug-only Week 1 — see `frames_push_sync` comment.
779        #[cfg(debug_assertions)]
780        {
781            self.frames_top = self.frames.len() as u32;
782        }
783    }
784
785    // ====================================================================
786    // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
787    //
788    // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
789    // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
790    // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
791    // sites to consume them. Phase 4 removes Vec<CallFrame>.
792    //
793    // Preconditions (debug-asserted):
794    // - base >= 2 (slots base-2 and base-1 must exist below the frame)
795    // - self.stack.len() > base + max_stack (caller has grown stack)
796    // - For Lua frames, stack[base-2] holds Value::Closure(cl)
797    // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
798    //
799    // No release-build cost when unused (LTO strips dead methods).
800    // ====================================================================
801
802    /// Write a Lua frame's closure pointer into `stack[base-2]`.
803    /// The caller must ensure `base >= 2` and the slot is within the
804    /// stack's allocated range.
805    #[inline]
806    #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
807    fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
808        debug_assert!(
809            base >= 2,
810            "frame closure slot needs base >= 2; got {}",
811            base
812        );
813        let idx = (base - 2) as usize;
814        debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
815        self.stack[idx] = Value::Closure(cl);
816    }
817
818    /// Read a Lua frame's closure pointer from `stack[base-2]`.
819    /// Returns `None` if the slot doesn't hold a closure (caller is
820    /// expected to treat that as a corrupt frame).
821    ///
822    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
823    /// to avoid the enum-match cost on the hot path. Tag check via
824    /// 1-byte load + branch + `as_closure_unchecked` payload load.
825    #[inline]
826    #[allow(dead_code)]
827    fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
828        debug_assert!(base >= 2);
829        let v = self.stack.get((base - 2) as usize)?;
830        if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
831            // SAFETY: tag byte just verified == CLOSURE.
832            Some(unsafe { v.as_closure_unchecked() })
833        } else {
834            None
835        }
836    }
837
838    /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
839    /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
840    /// Stored as `Value::Int(marker.to_raw())` so it round-trips
841    /// cleanly through the value stack without losing bits.
842    #[inline]
843    #[allow(dead_code)]
844    fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
845        debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
846        let idx = (base - 1) as usize;
847        debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
848        self.stack[idx] = Value::Int(marker.to_raw());
849    }
850
851    /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
852    /// `None` if the slot isn't a `Value::Int` (caller treats as a
853    /// corrupt frame); the kind tag itself may still be invalid, in
854    /// which case [`FrameMarker::kind`] returns `None` on the result.
855    ///
856    /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
857    /// for the tag check + `as_int_unchecked` for the payload load.
858    #[inline]
859    #[allow(dead_code)]
860    fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
861        debug_assert!(base >= 1);
862        let v = self.stack.get((base - 1) as usize)?;
863        if v.tag_byte() == crate::runtime::value::tag::INT {
864            // SAFETY: tag byte just verified == INT.
865            Some(crate::runtime::frame_marker::FrameMarker::from_raw(
866                unsafe { v.as_int_unchecked() },
867            ))
868        } else {
869            None
870        }
871    }
872
873    /// Build the raw `Vm` struct without main coroutine / RNG seed / library
874    /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
875    /// caller is responsible for the rest of the bring-up.
876    fn new_inner(version: LuaVersion) -> Vm {
877        let mut heap = Heap::new();
878        // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
879        // values strongly. gc.lua's "weak tables" section relies on that.
880        heap.no_ephemeron = version <= LuaVersion::Lua51;
881        // PUC 5.3 needs two GC cycles to finalize a table caught in a
882        // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
883        // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
884        heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
885        let globals = heap.new_table();
886        let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
887
888        Vm {
889            heap,
890            stack: Vec::new(),
891            frames: Vec::new(),
892            frames_top: 0,
893            open_upvals: Vec::new(),
894            tbc: Vec::new(),
895            top: 0,
896            globals,
897            type_mt: [None; 5],
898            mm_names,
899            c_depth: 0,
900            pcall_depth: 0,
901            nny: 0,
902            msgh_depth: 0,
903            terminating: None,
904            rng: [0; 4],
905            started: std::time::Instant::now(),
906            version,
907            closing_err: None,
908            current: None,
909            main_ctx: None,
910            yielding: None,
911            native_nresults: -1,
912            main_coro: None,
913            gc_mode: "incremental",
914            gc_top: 0,
915            gc_pause: 200,
916            gc_stepmul: 100,
917            gc_stepsize: 13,
918            gc_finalizing: false,
919            capi_stack: Vec::new(),
920            capi_cstr_pin: None,
921            warn_state: WarnState::Off,
922            warn_buf: Vec::new(),
923            warn_log: Vec::new(),
924            instr_budget: None,
925            bytecode_loading: true,
926            puc_bytecode_loading: false,
927            registry: None,
928            file_mt: None,
929            io_input: None,
930            io_output: None,
931            hook: HookState::default(),
932            in_hook: false,
933            pending_tailcalls: 0,
934            errored_native: None,
935            hook_ftransfer: 0,
936            hook_ntransfer: 0,
937            pending_tm: None,
938            pending_is_hook: false,
939            error_traceback: None,
940            public_call_depth: 0,
941            running_natives: Vec::new(),
942            running_native_slots: Vec::new(),
943            // v1.1 A2 — JIT-specific state factored into `JitState`
944            // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
945            // `install_jit_backend` / `luaL_newstate` swap in
946            // `CraneliftBackend` for callers that want JIT acceleration.
947            jit: crate::vm::jit_state::JitState::with_null_backend(),
948            // v1.1 B12 — host roots ticket pool for the `Lua` facade.
949            host_roots: Vec::new(),
950            // v1.3 Phase ML — MacroLua registry. Pre-populated with
951            // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
952            // when this Vm is constructed under `LuaVersion::MacroLua`.
953            macro_registry: if version == LuaVersion::MacroLua {
954                crate::frontend::macro_expander::MacroRegistry::with_builtins()
955            } else {
956                crate::frontend::macro_expander::MacroRegistry::new()
957            },
958            host_roots_free: Vec::new(),
959            // v1.2 Track B — LuaUserdata trait sugar's per-Vm
960            // metatable cache. Populated lazily by register_userdata.
961            userdata_metatables: std::collections::HashMap::new(),
962            // v1.1 B6 — error classification metadata. Defaults to
963            // Runtime; set at known sites (syntax / budget trip /
964            // native error / type error).
965            last_error_kind: crate::vm::error::LuaErrorKind::default(),
966            last_error_source: None,
967            // v1.1 B10 Stage 1 — async embedder fields. Defaults
968            // preserve sync behavior bit-for-bit (`async_mode = false`
969            // means the budget hot loop errors out exactly as v1.0).
970            async_mode: false,
971            async_waker: None,
972            async_slice_size: 10_000,
973            host_yield_pending: false,
974            // v1.1 B10 Stage 2 — pending async-native state. Empty by
975            // default; populated only by the dispatcher when an
976            // async-marked NativeClosure is invoked under async_mode.
977            pending_async_native_fut: None,
978            pending_async_native_ctx: None,
979        }
980    }
981
982    /// Build a fully-loaded Vm — the default for embedders that want PUC's
983    /// standard library surface. Equivalent to `Vm::new_minimal(version)`
984    /// followed by `vm.open_all_libs()`.
985    pub fn new(version: LuaVersion) -> Vm {
986        let mut vm = Vm::new_minimal(version);
987        vm.open_all_libs();
988        vm
989    }
990
991    /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
992    /// that want a sandbox (Redis-style scripts, in-game scripting with
993    /// a curated API) call this and then `open_base` / `open_math` / etc.
994    /// selectively. The Vm is otherwise fully initialized (main coroutine,
995    /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
996    pub fn new_minimal(version: LuaVersion) -> Vm {
997        let mut vm = Vm::new_inner(version);
998        let mc = vm.heap.new_coro(Value::Nil, vm.globals);
999        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1000        unsafe { mc.as_mut() }.status = CoroStatus::Running;
1001        vm.main_coro = Some(mc);
1002        let (a, b) = vm.rng_auto_seed();
1003        vm.rng_seed(a as u64, b as u64);
1004        vm
1005    }
1006
1007    /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1008    /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1009    /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1010    /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1011    /// closures already populated `Proto.jit: JitProtoState::Compiled`
1012    /// does NOT evict those cached entries — call right after
1013    /// construction for a clean swap.
1014    ///
1015    /// Naming: `install_jit_backend` (not `install_default_jit`)
1016    /// because the "default" in luna-core is `NullJitBackend`; the
1017    /// "default JIT" lives in the `luna` crate.
1018    pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1019    where
1020        C: crate::jit::IntChunkCompiler + 'static,
1021        T: crate::jit::TraceCompiler + 'static,
1022    {
1023        self.jit.chunk_compiler = Box::new(chunk);
1024        self.jit.trace_compiler = Box::new(trace);
1025    }
1026
1027    /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1028    /// reports "skipped" so every closure stays on the interpreter
1029    /// path, and the trace recorder's compile attempt always returns
1030    /// `None`. Intended for tests that want to verify the trait
1031    /// boundary works in a JIT-free configuration, and for the future
1032    /// `luna-core` build path that ships without Cranelift.
1033    ///
1034    /// Calling this on a Vm whose closures already populated
1035    /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1036    /// cached entries — the dispatcher will still call into them. For
1037    /// a truly JIT-free run, call this immediately after construction.
1038    pub fn install_null_jit(&mut self) {
1039        self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1040        self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1041    }
1042
1043    /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1044    /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1045    /// time instead (`open_base`, `open_math`, `open_table`, …).
1046    pub fn open_all_libs(&mut self) {
1047        self.open_base();
1048        self.open_math();
1049        self.open_table();
1050        self.open_string();
1051        self.open_utf8();
1052        self.open_os_io();
1053        self.open_debug();
1054        self.open_coroutine();
1055        self.open_package();
1056        // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1057        // operators replace it on 64-bit integers). Only expose it under 5.2
1058        // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1059        // leaking the global into newer dialects.
1060        if self.version == LuaVersion::Lua52 {
1061            self.open_bit32();
1062        }
1063    }
1064
1065    /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1066    /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1067    /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1068    /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1069    /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1070    /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1071    /// once per Vm.
1072    pub fn open_base(&mut self) {
1073        crate::vm::builtins::open_base(self);
1074    }
1075    /// Install the `math` standard library.
1076    pub fn open_math(&mut self) {
1077        crate::vm::lib_math::open_math(self);
1078    }
1079    /// Install the `table` standard library.
1080    pub fn open_table(&mut self) {
1081        crate::vm::lib_table::open_table(self);
1082    }
1083    /// Install the `string` standard library (and the shared string metatable).
1084    pub fn open_string(&mut self) {
1085        crate::vm::lib_string::open_string(self);
1086    }
1087    /// Install the `utf8` standard library (5.3+).
1088    pub fn open_utf8(&mut self) {
1089        crate::vm::lib_utf8::open_utf8(self);
1090    }
1091    /// `os` and `io` are merged because file userdata shares state with both
1092    /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1093    /// wraps `os.execute`'s shell).
1094    pub fn open_os_io(&mut self) {
1095        crate::vm::lib_os_io::open_os_io(self);
1096    }
1097    /// Install the `debug` standard library (introspection / hooks). Off by
1098    /// default for sandbox embedders.
1099    pub fn open_debug(&mut self) {
1100        crate::vm::lib_debug::open_debug(self);
1101    }
1102    /// Install the `coroutine` standard library.
1103    pub fn open_coroutine(&mut self) {
1104        crate::vm::lib_coroutine::open_coroutine(self);
1105    }
1106    /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1107    pub fn open_package(&mut self) {
1108        crate::vm::lib_os_io::open_package(self);
1109    }
1110    /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1111    /// ops on 64-bit integers).
1112    pub fn open_bit32(&mut self) {
1113        crate::vm::lib_bit32::open_bit32(self);
1114    }
1115
1116    /// xoshiro256** next.
1117    pub(crate) fn rng_next(&mut self) -> u64 {
1118        let s = &mut self.rng;
1119        let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1120        let t = s[1] << 17;
1121        s[2] ^= s[0];
1122        s[3] ^= s[1];
1123        s[1] ^= s[2];
1124        s[0] ^= s[3];
1125        s[2] ^= t;
1126        s[3] = s[3].rotate_left(45);
1127        result
1128    }
1129
1130    /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1131    pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1132        // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1133        // state), then 16 discards to spread the seed. Matches PUC's exact
1134        // sequence so the low-level conformance test passes.
1135        self.rng = [a, 0xff, b, 0];
1136        for _ in 0..16 {
1137            self.rng_next();
1138        }
1139    }
1140
1141    /// Wall-clock since VM creation (os.clock approximation).
1142    pub(crate) fn uptime(&self) -> std::time::Duration {
1143        self.started.elapsed()
1144    }
1145
1146    /// Entropy for math.randomseed() with no arguments.
1147    pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1148        let t = std::time::SystemTime::now()
1149            .duration_since(std::time::UNIX_EPOCH)
1150            .map(|d| d.as_nanos() as u64)
1151            .unwrap_or(0);
1152        let addr = &self.rng as *const _ as u64;
1153        (t as i64, addr as i64)
1154    }
1155
1156    /// Allocate a native function object (no upvalues): builtin registration.
1157    pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1158        Value::Native(self.heap.new_native(f, Box::new([])))
1159    }
1160
1161    /// Allocate a native function object with captured upvalues.
1162    pub fn native_with(
1163        &mut self,
1164        f: crate::runtime::value::NativeFn,
1165        upvals: Box<[Value]>,
1166    ) -> Value {
1167        Value::Native(self.heap.new_native(f, upvals))
1168    }
1169
1170    /// Install the shared string metatable (string library, P04).
1171    pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1172        self.type_mt[3] = mt;
1173    }
1174
1175    /// The current globals table (`_G` / `_ENV` source for new chunks).
1176    pub fn globals(&self) -> Gc<Table> {
1177        self.globals
1178    }
1179
1180    /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1181    /// Library code that pushes a known number of fresh slots — e.g.
1182    /// `table.unpack` returning N values — consults this to refuse when
1183    /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1184    /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1185    /// coroutine's already-built table eats a few slots, so an unpack of
1186    /// ~lim values can't fit.
1187    pub(crate) fn stack_room(&self) -> i64 {
1188        PUC_MAXSTACK - (self.stack.len() as i64)
1189    }
1190
1191    /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1192    /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1193    /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1194    /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1195    pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1196        self.globals = env;
1197    }
1198
1199    /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1200    /// 5.5). Determines numeric semantics, available standard libraries, and
1201    /// metamethod behavior.
1202    pub fn version(&self) -> LuaVersion {
1203        self.version
1204    }
1205
1206    /// Set a global by name. `v` may be any `IntoValue`: a primitive
1207    /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1208    /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1209    /// `Gc<NativeClosure>` handle.
1210    ///
1211    /// Returns `Err(LuaError)` only if the globals table overflows
1212    /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1213    /// String interning + key construction cannot fail.
1214    ///
1215    /// ```
1216    /// # use luna_core::vm::Vm;
1217    /// # use luna_core::version::LuaVersion;
1218    /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1219    /// vm.set_global("answer", 42).unwrap();
1220    /// vm.set_global("ratio", 0.5_f64).unwrap();
1221    /// vm.set_global("hello", "world").unwrap();
1222    /// let r = vm.eval("return answer, ratio, hello").unwrap();
1223    /// assert_eq!(r.len(), 3);
1224    /// ```
1225    pub fn set_global<V: crate::vm::IntoValue>(
1226        &mut self,
1227        name: &str,
1228        v: V,
1229    ) -> Result<(), LuaError> {
1230        let v = v.into_value(self);
1231        let k = Value::Str(self.heap.intern(name.as_bytes()));
1232        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1233        unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1234        self.heap
1235            .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1236        Ok(())
1237    }
1238
1239    /// Backward write barrier shorthand for native lib code: demote `t` from
1240    /// BLACK back to gray so the next propagate step re-traces its fields.
1241    /// No-op outside Propagate (parent is never BLACK at mutation time).
1242    pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1243        self.heap
1244            .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1245    }
1246
1247    /// Forward write barrier shorthand: a closed upvalue is a single-slot
1248    /// container — `barrier_forward` is cheaper than `barrier_back` here.
1249    /// No-op outside Propagate.
1250    pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1251        self.heap
1252            .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1253    }
1254
1255    /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1256    /// under non-MacroLua dialects (the macro is stored but the load
1257    /// path only consults the registry when
1258    /// `self.version == LuaVersion::MacroLua`).
1259    ///
1260    /// `name` is stored without the leading `@` — source code writes
1261    /// `@double(x)` to invoke a macro registered as `"double"`.
1262    pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1263        self.macro_registry.register(name, m);
1264    }
1265
1266    /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1267    /// Mostly useful for tests / dogfood resets.
1268    pub fn clear_macros(&mut self) {
1269        self.macro_registry.clear();
1270    }
1271
1272    /// Parse + compile a chunk and close it over the globals table.
1273    pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1274        // a precompiled (binary) chunk is undumped; source is parsed + compiled
1275        let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1276        if is_bytecode && !self.bytecode_loading {
1277            return Err(SyntaxError {
1278                line: 0,
1279                msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1280            });
1281        }
1282        let proto = if is_bytecode {
1283            let allow_puc = self.puc_bytecode_loading;
1284            crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1285                |msg| SyntaxError {
1286                    line: 0,
1287                    msg: msg.into_bytes(),
1288                },
1289            )?
1290        } else if self.version.is_macro_lua() {
1291            // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1292            // token vec, run the macro expander pre-pass against the
1293            // per-Vm registry, then hand the rewritten stream to
1294            // `parse_tokens`. The AST + compiler are dialect-agnostic
1295            // because by this point all `@`/quote tokens are gone.
1296            let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1297            let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1298            loop {
1299                let t = lexer.next_token()?;
1300                let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1301                raw.push(t);
1302                if eof {
1303                    break;
1304                }
1305            }
1306            // Drop the trailing Eof — expander operates on the body and
1307            // `parse_tokens` reinserts Eof when it runs out of tokens.
1308            raw.pop();
1309            let expanded = self.macro_registry.expand(raw)?;
1310            let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1311            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1312        } else {
1313            let ast = parse(src, self.version)?;
1314            compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1315        };
1316        // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1317        // upvalue with the globals table when the closure has *exactly* one
1318        // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1319        // function with two-or-more upvalues keeps every cell at nil; the
1320        // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1321        // :293's `assert(x() == nil)` pins this contract.
1322        let n = proto.upvals.len();
1323        let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1324        if n == 0 {
1325            // synthetic main chunk has no declared upvalues, but the engine
1326            // still expects at least one cell so the host can probe via
1327            // `debug.upvalueid` etc. Match the historical luna shape.
1328            ups.push(
1329                self.heap
1330                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1331            );
1332        } else if n == 1 {
1333            ups.push(
1334                self.heap
1335                    .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1336            );
1337        } else {
1338            for _ in 0..n {
1339                ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1340            }
1341        }
1342        Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1343    }
1344
1345    /// Compile and run `src` as an anonymous chunk; return its results.
1346    /// Source name in the traceback is `"=eval"`. Syntax errors are
1347    /// surfaced as `LuaError` carrying the formatted PUC-style message
1348    /// (interned through the heap so the error value composes with
1349    /// `pcall` / `error_text` like any runtime error).
1350    pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1351        self.eval_chunk(src, "=eval")
1352    }
1353
1354    /// Render an error value for messages/tests. Non-string errors —
1355    /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1356    /// (`"(error object is a table value)"`); embedders that need
1357    /// structured payloads should inspect `e.0` directly. Errors whose
1358    /// text starts with `"native panic:"` indicate a Rust panic
1359    /// crossed `catch_unwind` — the Vm may be inconsistent and should
1360    /// be dropped (do not reuse).
1361    pub fn error_text(&self, e: &LuaError) -> String {
1362        match e.0 {
1363            Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1364            v => format!("(error object is a {} value)", v.type_name()),
1365        }
1366    }
1367
1368    /// Call any callable value from the host (or from natives like pcall).
1369    pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1370        // host-level entry (no enclosing exec): drop any error state from a
1371        // prior call that propagated uncaught (`error_traceback` would
1372        // otherwise leak into the next debug.traceback call).
1373        if self.public_call_depth == 0 {
1374            self.error_traceback = None;
1375        }
1376        self.public_call_depth += 1;
1377        // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1378        // chunk whose body fits the S1 int-arith whitelist short-circuits
1379        // the whole interpreter dispatch and runs straight through the
1380        // mmap'd native code. The lookup is one Cell::get + one match —
1381        // the slow path (compile attempt on first reach) is paid once per
1382        // Proto.
1383        if args.is_empty()
1384            && let Value::Closure(cl) = f
1385            && let Some(vs) = self.try_jit_call(cl)
1386        {
1387            self.public_call_depth -= 1;
1388            return Ok(vs);
1389        }
1390        let r = self.call_value_impl(f, args, true);
1391        self.public_call_depth -= 1;
1392        r
1393    }
1394
1395    /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1396    /// `Some(values)` when the cached native fn is callable for a
1397    /// zero-arg call. (Non-zero-arg dispatch is handled by
1398    /// `try_jit_call_op` from inside `begin_call`.)
1399    fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1400        use crate::runtime::function::JitProtoState;
1401        if !self.jit.enabled {
1402            return None;
1403        }
1404        let proto = cl.proto;
1405        if let JitProtoState::Untried = proto.jit.get() {
1406            self.populate_jit_cache(proto);
1407        }
1408        match proto.jit.get() {
1409            JitProtoState::Compiled {
1410                entry,
1411                num_args: 0,
1412                returns_one,
1413                arg_float_mask: _,
1414                arg_table_mask: _,
1415                ret_is_float,
1416                ret_is_table,
1417            } => {
1418                // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1419                let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1420                // P11-S5c / S5d.J — install the active Vm + closure
1421                // for any Rust helper the JIT'd code may call (e.g.
1422                // `luna_jit_new_table`, `luna_jit_upval_get`) via
1423                // cranelift `Linkage::Import`. RAII clear on return.
1424                // Chunks with no upvalue reads don't touch the closure
1425                // slot, paying nothing.
1426                // v1.1 A1 Session A — route through chunk_compiler so
1427                // the NullJitBackend path stays inert. Raw-ptr arg
1428                // avoids the &mut self borrow conflict against the
1429                // shared self.jit.chunk_compiler read.
1430                let vm_ptr: *mut Vm = self;
1431                let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1432                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1433                let r = unsafe { f() };
1434                drop(_jit_vm_guard);
1435                // P11-S5d.E' — a JIT helper may have detected a metatable
1436                // on a table operand and parked a deopt request here.
1437                // Discard the sentinel value and return None so the caller
1438                // re-runs the call through the interpreter, which honours
1439                // __index/__newindex.
1440                if self.jit.pending_err.take().is_some() {
1441                    return None;
1442                }
1443                Some(if returns_one {
1444                    let v = if ret_is_float {
1445                        Value::Float(f64::from_bits(r as u64))
1446                    } else if ret_is_table {
1447                        Value::Table(crate::runtime::Gc::from_ptr(
1448                            r as *mut crate::runtime::Table,
1449                        ))
1450                    } else {
1451                        Value::Int(r)
1452                    };
1453                    vec![v]
1454                } else {
1455                    Vec::new()
1456                })
1457            }
1458            // Non-zero-arg Compiled state: call_value's empty-args
1459            // fast path can't drive it. Op::Call handles those.
1460            JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1461        }
1462    }
1463
1464    /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1465    /// `Compiled { … }` or `Failed`; idempotent on already-populated
1466    /// states (call sites guard with a get before invoking).
1467    ///
1468    /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1469    /// `proto.code`. Compiled artefacts live in the thread-local
1470    /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1471    /// `Vm`s loading the same source skip the cranelift compile step
1472    /// entirely.
1473    fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1474        use crate::runtime::function::JitProtoState;
1475        let version = self.version();
1476        let pre53 = version <= crate::version::LuaVersion::Lua53;
1477        // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1478        // are Float). The JIT's `GetUpval` ValueRead path uses this
1479        // to default-pin upvalue reads to Float without a tag check.
1480        let float_only = version <= crate::version::LuaVersion::Lua52;
1481        match self
1482            .jit
1483            .chunk_compiler
1484            .try_compile(proto, pre53, float_only)
1485        {
1486            crate::jit::CompileResult::Compiled {
1487                entry,
1488                num_args,
1489                returns_one,
1490                arg_float_mask,
1491                arg_table_mask,
1492                ret_is_float,
1493                ret_is_table,
1494            } => {
1495                proto.jit.set(JitProtoState::Compiled {
1496                    entry,
1497                    num_args,
1498                    returns_one,
1499                    arg_float_mask,
1500                    arg_table_mask,
1501                    ret_is_float,
1502                    ret_is_table,
1503                });
1504            }
1505            crate::jit::CompileResult::Skipped => {
1506                proto.jit.set(JitProtoState::Failed);
1507            }
1508        }
1509    }
1510
1511    /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1512    /// before `push_frame`. Returns `true` when the call was handled
1513    /// in-place (no new Lua frame). Constraints: every arg slot must
1514    /// be `Value::Int`, the cached arity must match the call site's
1515    /// `nargs`, the host wanted-count `wanted` is honoured by
1516    /// `finish_results`. Also bails when a debug hook is armed —
1517    /// JIT'd code does not fire line / call / return hooks, so any
1518    /// active hook makes the interpreter the source of truth.
1519    fn try_jit_call_op(
1520        &mut self,
1521        cl: Gc<LuaClosure>,
1522        func_slot: u32,
1523        nargs: u32,
1524        wanted: i32,
1525    ) -> bool {
1526        use crate::runtime::function::JitProtoState;
1527        if !self.jit.enabled {
1528            return false;
1529        }
1530        // Any active debug hook means the interpreter has to run the
1531        // call so the hook gets the expected events.
1532        if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1533            return false;
1534        }
1535        let proto = cl.proto;
1536        if let JitProtoState::Untried = proto.jit.get() {
1537            self.populate_jit_cache(proto);
1538        }
1539        let JitProtoState::Compiled {
1540            entry,
1541            num_args,
1542            returns_one,
1543            arg_float_mask,
1544            arg_table_mask,
1545            ret_is_float,
1546            ret_is_table,
1547        } = proto.jit.get()
1548        else {
1549            return false;
1550        };
1551        if num_args as u32 != nargs {
1552            return false;
1553        }
1554        // Pack args into i64 bit-patterns per the per-slot expected
1555        // kind. A Float-typed slot accepts Value::Float verbatim and
1556        // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1557        // accepts only Value::Table and passes the raw Gc ptr; an
1558        // Int-typed slot accepts only Value::Int. Any other shape
1559        // bails to the interpreter so the call's actual dynamics
1560        // (metamethod dispatch / type-coerce) take over.
1561        let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1562            [0; crate::jit::MAX_JIT_ARITY as usize];
1563        for i in 0..num_args as usize {
1564            let v = self.stack[(func_slot + 1) as usize + i];
1565            let want_float = (arg_float_mask >> i) & 1 == 1;
1566            let want_table = (arg_table_mask >> i) & 1 == 1;
1567            args[i] = match (want_table, want_float, v) {
1568                (true, _, Value::Table(t)) => t.as_ptr() as i64,
1569                (false, false, Value::Int(x)) => x,
1570                (false, true, Value::Float(f)) => f.to_bits() as i64,
1571                (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1572                _ => return false,
1573            };
1574        }
1575        // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1576        // matching guard in `try_jit_call`.
1577        // v1.1 A1 Session A — route through chunk_compiler.
1578        let vm_ptr: *mut Vm = self;
1579        let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1580        // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1581        let r = unsafe {
1582            match num_args {
1583                0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1584                1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1585                2 => {
1586                    (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1587                }
1588                3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1589                    args[0], args[1], args[2],
1590                ),
1591                4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1592                    args[0], args[1], args[2], args[3],
1593                ),
1594                _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1595            }
1596        };
1597        drop(_jit_vm_guard);
1598        // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1599        // flagged a metatable on a table operand; bail to the interpreter
1600        // so `push_frame` runs the call from scratch.
1601        if self.jit.pending_err.take().is_some() {
1602            return false;
1603        }
1604        // Write result at func_slot, replacing the closure value, then
1605        // hand to finish_results to pad/truncate per the call site's
1606        // `wanted` count.
1607        if returns_one {
1608            let v = if ret_is_float {
1609                Value::Float(f64::from_bits(r as u64))
1610            } else if ret_is_table {
1611                Value::Table(crate::runtime::Gc::from_ptr(
1612                    r as *mut crate::runtime::Table,
1613                ))
1614            } else {
1615                Value::Int(r)
1616            };
1617            self.stack[func_slot as usize] = v;
1618            self.finish_results(func_slot, 1, wanted);
1619        } else {
1620            self.finish_results(func_slot, 0, wanted);
1621        }
1622        true
1623    }
1624
1625    /// `call_value` with control over the `from_c` debug boundary. A `__close`
1626    /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1627    /// invokes it inside that ci), so it is called with `from_c = false`: its
1628    /// debug parent is the closing function, not a synthetic C level.
1629    fn call_value_impl(
1630        &mut self,
1631        f: Value,
1632        args: &[Value],
1633        from_c: bool,
1634    ) -> Result<Vec<Value>, LuaError> {
1635        if self.c_depth >= MAX_C_DEPTH {
1636            return Err(self.rt_err("stack overflow"));
1637        }
1638        self.c_depth += 1;
1639        let func_slot = self.stack.len() as u32;
1640        self.stack.push(f);
1641        self.stack.extend_from_slice(args);
1642        self.top = self.stack.len() as u32;
1643        let r = self.call_at(func_slot, args.len() as u32, from_c);
1644        self.c_depth -= 1;
1645        if r.is_err()
1646            && self.yielding.is_none()
1647            && self.terminating.is_none()
1648            && !self.host_yield_pending
1649            && self.pending_async_native_fut.is_none()
1650        {
1651            // A `coroutine.yield` in flight raises a sentinel error to unwind the
1652            // Rust stack, but the suspended coroutine's frames/registers (which
1653            // sit at/above `func_slot`) must survive for the next resume — so we
1654            // only truncate on a real error. A self-close termination is in the
1655            // same boat: the dying thread's state is discarded wholesale.
1656            // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1657            // the same boat as `yielding`: the next `EvalFuture::poll`
1658            // resumes the same call, so the in-flight frames must
1659            // survive.
1660            self.stack.truncate(func_slot as usize);
1661            self.top = func_slot;
1662        }
1663        r
1664    }
1665
1666    /// Invoke `f` with the running thread marked non-yieldable for the duration
1667    /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1668    /// boundary and errors instead of suspending. Used by library callbacks
1669    /// (sort comparator, gsub replacement) that run via synchronous Rust
1670    /// recursion and so could not be re-entered after a yield.
1671    pub(crate) fn call_noyield(
1672        &mut self,
1673        f: Value,
1674        args: &[Value],
1675    ) -> Result<Vec<Value>, LuaError> {
1676        self.nny += 1;
1677        let r = self.call_value(f, args);
1678        self.nny -= 1;
1679        r
1680    }
1681
1682    // ---- coroutines (P05) ----
1683
1684    pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1685        // The new coroutine inherits the creating thread's current globals
1686        // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1687        // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1688        // it here picks the creator regardless of which coro is running.
1689        self.heap.new_coro(body, self.globals)
1690    }
1691
1692    /// Is `t` the thread whose context is currently live in the VM?
1693    pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1694        match (self.current, t) {
1695            (None, None) => true,
1696            (Some(a), Some(b)) => a.ptr_eq(b),
1697            _ => false,
1698        }
1699    }
1700
1701    /// Read an open-upvalue slot from its owning thread's stack (the live VM
1702    /// stack if that thread is current, else its saved context).
1703    #[doc(hidden)]
1704    pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1705        let s = slot as usize;
1706        if self.is_current_thread(thread) {
1707            self.stack[s]
1708        } else {
1709            match thread {
1710                Some(co) => co.stack[s],
1711                None => self.main_ctx.as_ref().expect("main context").stack[s],
1712            }
1713        }
1714    }
1715
1716    fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1717        let s = slot as usize;
1718        if self.is_current_thread(thread) {
1719            self.stack[s] = v;
1720        } else {
1721            match thread {
1722                Some(co) => {
1723                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1724                    unsafe { co.as_mut() }.stack[s] = v;
1725                    // co.stack is traced by Coro::trace; demote co back to
1726                    // gray so propagate re-traces this slot if it was
1727                    // already black.
1728                    self.heap
1729                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1730                }
1731                None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1732            }
1733        }
1734    }
1735
1736    /// Whether `co` is the main thread's identity object.
1737    pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1738        self.main_coro.is_some_and(|m| m.ptr_eq(co))
1739    }
1740
1741    /// The status of `co` from the caller's view. The main thread's identity
1742    /// object has no stored status — it is "running" when nothing else runs,
1743    /// else "normal" (it resumed the active coroutine).
1744    pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1745        if self.is_main_coro(co) {
1746            if self.current.is_none() {
1747                CoroStatus::Running
1748            } else {
1749                CoroStatus::Normal
1750            }
1751        } else {
1752            co.status
1753        }
1754    }
1755
1756    /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1757    /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1758    /// context. Handlers see the coroutine's death error (if it died by error)
1759    /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1760    /// with error `e` and no handler overrode it; `Err` means a handler raised.
1761    pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1762        // re-entrant close: a __close handler closed its own coroutine while the
1763        // outer close is mid-flight (its context is live). Report success and let
1764        // the outer close finish — re-entering the swap would corrupt the stack.
1765        if self.current.is_some_and(|c| c.ptr_eq(co)) {
1766            return Ok(None);
1767        }
1768        // A chain of coroutines whose `__close` handlers each close the previous
1769        // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1770        // The calling handler's `call_value` has already pushed `c_depth` to the
1771        // cap, so here it reads as full first — report PUC's "C stack overflow"
1772        // before the next handler call would surface the plainer "stack overflow".
1773        if self.c_depth >= MAX_C_DEPTH {
1774            return Err(self.rt_err("C stack overflow"));
1775        }
1776        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1777        let death_err = unsafe { co.as_mut() }.error_value.take();
1778        // swap the caller's live context out (into a GC-rooted home) and the
1779        // coroutine's in, mirroring resume_coro, so the __close handlers run on
1780        // the coroutine's stack while everything stays rooted.
1781        let resumer = self.current;
1782        let rctx = self.take_ctx();
1783        match resumer {
1784            Some(r) => {
1785                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1786                let m = unsafe { r.as_mut() };
1787                m.stack = rctx.stack;
1788                m.frames = rctx.frames;
1789                m.open_upvals = rctx.open_upvals;
1790                m.tbc = rctx.tbc;
1791                m.top = rctx.top;
1792                m.pcall_depth = rctx.pcall_depth;
1793            }
1794            None => self.main_ctx = Some(rctx),
1795        }
1796        self.load_coro_ctx(co);
1797        self.current = Some(co);
1798        let result = self.close_slots(0, death_err);
1799        // discard the (now-closed) coroutine context and restore the caller
1800        let _ = self.take_ctx();
1801        match resumer {
1802            Some(r) => {
1803                self.load_coro_ctx(r);
1804                self.current = Some(r);
1805            }
1806            None => {
1807                let m = self.main_ctx.take().expect("main context saved");
1808                self.put_ctx(m);
1809                self.current = None;
1810            }
1811        }
1812        {
1813            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1814            let m = unsafe { co.as_mut() };
1815            m.status = CoroStatus::Dead;
1816            m.stack = Vec::new();
1817            m.frames = Vec::new();
1818            m.open_upvals = Vec::new();
1819            m.tbc = Vec::new();
1820            m.top = 0;
1821            m.pcall_depth = 0;
1822            m.resume_at = None;
1823            m.error_value = None;
1824        }
1825        result.map(|()| death_err)
1826    }
1827
1828    /// `coroutine.running`: the running thread plus whether it is the main one.
1829    pub(crate) fn running_thread(&self) -> (Value, bool) {
1830        match self.current {
1831            Some(co) => (Value::Coro(co), false),
1832            None => (Value::Coro(self.main_coro.expect("main coro")), true),
1833        }
1834    }
1835
1836    /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1837    /// thread) can yield. The main thread never can; any other coroutine can
1838    /// unless it is dead.
1839    pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1840        match co {
1841            Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1842            // the running thread can yield only outside any non-yieldable C call
1843            None => self.current.is_some() && self.nny == 0,
1844        }
1845    }
1846
1847    /// Why `coroutine.yield` may not suspend the running thread right now, as a
1848    /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1849    /// from "inside an unyieldable C call" (sort/gsub callback).
1850    pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1851        if self.current.is_none() {
1852            Some("attempt to yield from outside a coroutine")
1853        } else if self.nny > 0 {
1854            Some("attempt to yield across a C-call boundary")
1855        } else {
1856            None
1857        }
1858    }
1859
1860    /// The coroutine whose context is currently live (`None` on the main thread).
1861    pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1862        self.current
1863    }
1864
1865    /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1866    /// its pending `__close` handlers, then signal termination. The handlers run
1867    /// here, in place, with the thread still non-yieldable (a yield in one hits
1868    /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1869    /// way a yield does — `exec_with` propagates it past any protecting pcall
1870    /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1871    /// clean death (or, if a handler raised, the coroutine's error).
1872    pub(crate) fn close_running(&mut self) -> LuaError {
1873        let death = match self.close_slots(0, None) {
1874            Ok(()) => None,
1875            Err(e) => Some(e.0),
1876        };
1877        self.terminating = Some(death);
1878        LuaError(Value::Nil)
1879    }
1880
1881    /// `coroutine.status` as seen by the caller.
1882    pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1883        match self.effective_coro_status(co) {
1884            CoroStatus::Suspended => "suspended",
1885            CoroStatus::Running => "running",
1886            CoroStatus::Normal => "normal",
1887            CoroStatus::Dead => "dead",
1888        }
1889    }
1890
1891    fn take_ctx(&mut self) -> SavedCtx {
1892        let saved = SavedCtx {
1893            stack: std::mem::take(&mut self.stack),
1894            frames: std::mem::take(&mut self.frames),
1895            open_upvals: std::mem::take(&mut self.open_upvals),
1896            tbc: std::mem::take(&mut self.tbc),
1897            top: self.top,
1898            pcall_depth: self.pcall_depth,
1899            hook: self.hook,
1900            globals: self.globals,
1901        };
1902        self.frames_resync(); // P17-D Week 1 — frames now empty.
1903        saved
1904    }
1905
1906    fn put_ctx(&mut self, c: SavedCtx) {
1907        self.stack = c.stack;
1908        self.frames = c.frames;
1909        self.open_upvals = c.open_upvals;
1910        self.tbc = c.tbc;
1911        self.top = c.top;
1912        self.pcall_depth = c.pcall_depth;
1913        self.hook = c.hook;
1914        self.globals = c.globals;
1915        self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1916    }
1917
1918    /// Move a coroutine's saved context into the live VM fields.
1919    fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1920        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1921        let m = unsafe { co.as_mut() };
1922        self.stack = std::mem::take(&mut m.stack);
1923        self.frames = std::mem::take(&mut m.frames);
1924        self.open_upvals = std::mem::take(&mut m.open_upvals);
1925        self.tbc = std::mem::take(&mut m.tbc);
1926        self.top = m.top;
1927        self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1928        self.pcall_depth = m.pcall_depth;
1929        self.hook = m.hook;
1930        self.globals = m.globals;
1931    }
1932
1933    /// Save the live VM context back into a coroutine object.
1934    fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1935        let c = self.take_ctx();
1936        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1937        let m = unsafe { co.as_mut() };
1938        m.stack = c.stack;
1939        m.frames = c.frames;
1940        m.open_upvals = c.open_upvals;
1941        m.tbc = c.tbc;
1942        m.top = c.top;
1943        m.pcall_depth = c.pcall_depth;
1944        m.hook = c.hook;
1945        m.globals = c.globals;
1946        // bulk-overwrite of every collectable field traced by Coro::trace:
1947        // demote the coro back to gray so propagate re-traces its new state.
1948        self.heap
1949            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1950    }
1951
1952    /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
1953    /// or errors. Ok(values) carries yielded or returned values; Err carries an
1954    /// error raised inside the coroutine (the coroutine becomes dead).
1955    pub(crate) fn resume_coro(
1956        &mut self,
1957        co: Gc<Coro>,
1958        args: Vec<Value>,
1959    ) -> Result<Vec<Value>, LuaError> {
1960        match co.status {
1961            CoroStatus::Suspended => {}
1962            CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
1963            _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
1964        }
1965        if self.c_depth >= MAX_C_DEPTH {
1966            return Err(self.rt_err("C stack overflow"));
1967        }
1968        self.c_depth += 1;
1969        let resumer = self.current;
1970        // save the resumer's live context away
1971        let rctx = self.take_ctx();
1972        match resumer {
1973            Some(r) => {
1974                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1975                let m = unsafe { r.as_mut() };
1976                m.stack = rctx.stack;
1977                m.frames = rctx.frames;
1978                m.open_upvals = rctx.open_upvals;
1979                m.tbc = rctx.tbc;
1980                m.top = rctx.top;
1981                m.pcall_depth = rctx.pcall_depth;
1982                m.globals = rctx.globals;
1983                m.status = CoroStatus::Normal;
1984                // bulk overwrite of every traced field on r — mirror
1985                // store_coro_ctx's barrier_back so propagate re-traces r.
1986                self.heap
1987                    .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
1988            }
1989            None => self.main_ctx = Some(rctx),
1990        }
1991        // swap the coroutine in
1992        self.load_coro_ctx(co);
1993        {
1994            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1995            let m = unsafe { co.as_mut() };
1996            m.status = CoroStatus::Running;
1997            m.resumer = resumer;
1998        }
1999        // co.resumer is a traced Gc field; barrier_back covers the new
2000        // resumer reference and any future field writes during this call.
2001        self.heap
2002            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2003        self.current = Some(co);
2004
2005        // drive it
2006        let drive = if co.started {
2007            self.coro_continue(&args)
2008        } else {
2009            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2010            unsafe { co.as_mut() }.started = true;
2011            self.coro_first(co.body, &args)
2012        };
2013
2014        // classify: a self-close termination or a pending yield each win over
2015        // the (sentinel) error they raised to unwind the Rust stack.
2016        let (outcome, status) = if let Some(death) = self.terminating.take() {
2017            // the coroutine closed itself: it dies now, cleanly or with the
2018            // error a `__close` handler raised.
2019            match death {
2020                Some(e) => {
2021                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2022                    unsafe { co.as_mut() }.error_value = Some(e);
2023                    self.heap
2024                        .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2025                    (Err(LuaError(e)), CoroStatus::Dead)
2026                }
2027                None => (Ok(Vec::new()), CoroStatus::Dead),
2028            }
2029        } else {
2030            match self.yielding.take() {
2031                Some((vals, fslot, nres)) => {
2032                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2033                    unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2034                    (Ok(vals), CoroStatus::Suspended)
2035                }
2036                None => {
2037                    // died: a return is clean, an error is remembered so a later
2038                    // `coroutine.close` can report it (PUC lua_closethread).
2039                    // Capture the error-point traceback (set by `unwind` before
2040                    // popping the failing frames) and prepend a synthetic
2041                    // top entry for the C native that initiated the error
2042                    // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2043                    // on the dead coroutine still shows the error site
2044                    // (db.lua :848 family).
2045                    if drive.is_err() {
2046                        let mut tb = self.error_traceback.take().unwrap_or_default();
2047                        if let Some(nm) = self.errored_native.take() {
2048                            let mut prefixed: Vec<u8> = Vec::new();
2049                            prefixed.extend_from_slice(
2050                                format!("\n\t[C]: in function '{nm}'").as_bytes(),
2051                            );
2052                            prefixed.extend(tb);
2053                            tb = prefixed;
2054                        }
2055                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2056                        unsafe { co.as_mut() }.error_traceback = Some(tb);
2057                    }
2058                    if let Err(e) = drive {
2059                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2060                        unsafe { co.as_mut() }.error_value = Some(e.0);
2061                        self.heap
2062                            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063                    }
2064                    (drive, CoroStatus::Dead)
2065                }
2066            }
2067        };
2068
2069        // save the coroutine's context back and restore the resumer
2070        self.store_coro_ctx(co);
2071        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2072        unsafe { co.as_mut() }.status = status;
2073        match resumer {
2074            Some(r) => {
2075                self.load_coro_ctx(r);
2076                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2077                unsafe { r.as_mut() }.status = CoroStatus::Running;
2078                self.current = Some(r);
2079            }
2080            None => {
2081                let m = self.main_ctx.take().expect("main context saved");
2082                self.put_ctx(m);
2083                self.current = None;
2084            }
2085        }
2086        self.c_depth -= 1;
2087        outcome
2088    }
2089
2090    /// First resume: install the body function at slot 0 and run.
2091    fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2092        self.stack.clear();
2093        self.stack.push(body);
2094        self.stack.extend_from_slice(args);
2095        self.top = self.stack.len() as u32;
2096        match self.begin_call(0, Some(args.len() as u32), -1, true) {
2097            Ok(true) => self.exec_with(1),
2098            Ok(false) => Ok(self.take_results(0)),
2099            Err(e) => Err(e),
2100        }
2101    }
2102
2103    /// Resume after a yield: deliver `args` as the results of the call that
2104    /// yielded, then continue the suspended thread.
2105    fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2106        let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2107        let n = args.len() as u32;
2108        // Restore the full register window of the suspended top frame: a yield
2109        // that unwound through a native (call_value) may have left the stack
2110        // shorter than the frame needs. `base + max_stack` is what push_frame
2111        // allocates; `fslot + n` covers the delivered yield results.
2112        let frame_need = self
2113            .frames
2114            .last()
2115            .and_then(CallFrame::lua)
2116            .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2117            .unwrap_or(0);
2118        let need = frame_need.max((fslot + n) as usize);
2119        if self.stack.len() < need {
2120            self.stack.resize(need, Value::Nil);
2121        }
2122        for (i, &v) in args.iter().enumerate() {
2123            self.stack[fslot as usize + i] = v;
2124        }
2125        self.finish_results(fslot, n, nres);
2126        // the suspended `coroutine.yield` (a C call) now returns its resume
2127        // values: fire the matching "return" hook PUC defers until the resume.
2128        self.hook_return(true, 1, n)?;
2129        self.exec_with(1)
2130    }
2131
2132    /// `coroutine.yield`: suspend the running coroutine, recording where to
2133    /// resume. Errors if called outside a coroutine. Returns a sentinel error
2134    /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2135    pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2136        let nres = self.native_nresults;
2137        self.yielding = Some((vals, func_slot, nres));
2138        // value is irrelevant: resume_coro consults `self.yielding`, not this
2139        LuaError(Value::Nil)
2140    }
2141
2142    /// Install or clear the debug hook on the running thread (`debug.sethook`
2143    /// without a thread argument). Arms the calling frame's `oldpc` to the
2144    /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2145    /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2146    /// native return: the very next traceexec compares against the sethook
2147    /// CALL's line. When the install statement and the following statement are
2148    /// on different source lines (db.lua :322), `changedline` fires for that
2149    /// first statement; when they share a line (db.lua :25 wrapper), they do
2150    /// not, so the wrapper line is not re-fired.
2151    pub(crate) fn install_hook(&mut self, hook: HookState) {
2152        self.hook = hook;
2153        if self.hook.line
2154            && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2155        {
2156            f.hook_oldpc = f.pc.saturating_sub(1);
2157        }
2158    }
2159
2160    /// Install a hook on `target` (`None`/current thread → the live VM fields;
2161    /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2162    /// with an optional thread argument.
2163    pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2164        if self.is_current_thread(target) {
2165            self.install_hook(state);
2166        } else if let Some(co) = target {
2167            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2168            let m = unsafe { co.as_mut() };
2169            m.hook = state;
2170            if state.line
2171                && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2172            {
2173                f.hook_oldpc = u32::MAX;
2174            }
2175            // co.hook.func is a traced Value (Coro::trace covers it); demote
2176            // co back to gray so propagate sees the new hook function.
2177            self.heap
2178                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2179        }
2180    }
2181
2182    /// The hook state of `target` (`None`/current → the live VM state).
2183    pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2184        match target {
2185            t if self.is_current_thread(t) => self.hook,
2186            Some(co) => co.hook,
2187            None => self.hook,
2188        }
2189    }
2190
2191    /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2192    /// hooks disabled (PUC clears the mask) and its results/stack growth are
2193    /// discarded so the interrupted frame's register window is untouched.
2194    /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2195    fn run_hook(
2196        &mut self,
2197        event: &[u8],
2198        line: Option<i64>,
2199        from_native: bool,
2200    ) -> Result<(), LuaError> {
2201        // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2202        // synchronous fn pointer call). Both Rust and Lua hooks may be
2203        // installed; both observe each event.
2204        if let Some(rh) = self.hook.rust_func {
2205            let evt = match event {
2206                b"call" => Some(RustHookEvent::Call),
2207                b"return" => Some(RustHookEvent::Return),
2208                b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2209                b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2210                b"count" => Some(RustHookEvent::Count),
2211                _ => None,
2212            };
2213            if let Some(evt) = evt {
2214                let was_in_hook = self.in_hook;
2215                self.in_hook = true;
2216                rh(self, evt);
2217                self.in_hook = was_in_hook;
2218            }
2219        }
2220        let Some(hook) = self.hook.func else {
2221            return Ok(());
2222        };
2223        let saved_top = self.top;
2224        let saved_len = self.stack.len();
2225        let name = Value::Str(self.heap.intern(event));
2226        let lv = line.map_or(Value::Nil, Value::Int);
2227        self.in_hook = true;
2228        // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2229        // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2230        // ci sat below `hookf` (the function being hooked). When that hooked
2231        // function is native, no Lua frame for it exists in luna's `frames`;
2232        // model it as a synthetic C level by pushing the hook with
2233        // `from_c = true` (then `c_frame_name` reads the caller's call
2234        // instruction → e.g. `name = "sethook"`). When the hooked function is
2235        // Lua (its frame is still on the stack), push with `from_c = false`
2236        // so the level descent lands on it directly. The hook's own frame
2237        // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2238        // (PUC `CIST_HOOKED`).
2239        self.pending_is_hook = true;
2240        let r = self.call_value_impl(hook, &[name, lv], from_native);
2241        self.pending_is_hook = false;
2242        self.in_hook = false;
2243        self.stack.truncate(saved_len);
2244        self.top = saved_top;
2245        r.map(|_| ())
2246    }
2247
2248    /// Fire the "call" hook on entry to a function, if armed and not already in
2249    /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2250    /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2251    /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2252    /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2253    fn hook_call_with(
2254        &mut self,
2255        from_native: bool,
2256        nargs: u32,
2257        is_tail: bool,
2258    ) -> Result<(), LuaError> {
2259        if self.hook.call
2260            && !self.in_hook
2261            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2262        {
2263            self.hook_ftransfer = 1;
2264            self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2265            // PUC 5.1 didn't distinguish tail-call events — every call,
2266            // including tail-calls, fired plain `"call"`. 5.2 introduced
2267            // the separate `"tail call"` event (mask `"c"` covers both).
2268            // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2269            // "return","tail return","return","tail return"}`.
2270            let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2271                b"tail call"
2272            } else {
2273                b"call"
2274            };
2275            self.run_hook(event, None, from_native)?;
2276        }
2277        Ok(())
2278    }
2279
2280    pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2281        self.hook_call_with(from_native, nargs, false)
2282    }
2283
2284    /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2285    /// the first result slot relative to the activation's func slot, ntransfer
2286    /// the number of results.
2287    pub(crate) fn hook_return(
2288        &mut self,
2289        from_native: bool,
2290        ftransfer: u32,
2291        nresults: u32,
2292    ) -> Result<(), LuaError> {
2293        if self.hook.ret
2294            && !self.in_hook
2295            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2296        {
2297            self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2298            self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2299            self.run_hook(b"return", None, from_native)?;
2300        }
2301        Ok(())
2302    }
2303
2304    /// PUC "tail return" event — fires once per tail call that collapsed
2305    /// into the activation now returning, *after* its own "return" event.
2306    /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2307    fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2308        if self.hook.ret
2309            && !self.in_hook
2310            && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2311        {
2312            self.run_hook(b"tail return", None, false)?;
2313        }
2314        Ok(())
2315    }
2316
2317    /// Call a metamethod with a single expected result.
2318    fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2319        let mut r = self.call_value(f, args)?;
2320        Ok(if r.is_empty() {
2321            Value::Nil
2322        } else {
2323            r.swap_remove(0)
2324        })
2325    }
2326
2327    /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2328    /// driven through the interpreter loop with a `Meta` continuation, so a
2329    /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2330    /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2331    /// Returns to the caller with the call set up — the opcode arm must do no
2332    /// further work on the running frame and let the loop iterate. `tm` is
2333    /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2334    /// born from this call inherits it via `pending_tm`, so
2335    /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2336    /// (db.lua :878).
2337    fn begin_meta_call(
2338        &mut self,
2339        func: Value,
2340        args: &[Value],
2341        action: MetaAction,
2342        tm: &'static str,
2343    ) -> Result<(), LuaError> {
2344        let saved_top = self.top;
2345        let cont_slot = self.stack.len() as u32;
2346        self.stack.push(func);
2347        self.stack.extend_from_slice(args);
2348        self.top = self.stack.len() as u32;
2349        frames_push_sync(
2350            &mut self.frames,
2351            &mut self.frames_top,
2352            CallFrame::Cont(NativeCont {
2353                kind: ContKind::Meta(MetaCont { action, saved_top }),
2354                func_slot: cont_slot,
2355                nresults: 1,
2356            }),
2357        );
2358        let saved_tm = self.pending_tm.replace(tm);
2359        // begin_call drives a Lua metamethod through the loop (returns true) or
2360        // runs a native one inline (returns false, leaving results at cont_slot
2361        // for the loop head to pick up); either way the Meta cont resolves there.
2362        let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2363        // Native callees never consumed pending_tm (push_frame is only hit on
2364        // a Lua callee); restore so it doesn't leak to a later push_frame.
2365        self.pending_tm = saved_tm;
2366        r?;
2367        Ok(())
2368    }
2369
2370    /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2371    fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2372        match self.index_step(t, key)? {
2373            MmOut::Done(v) => self.stack[dst as usize] = v,
2374            MmOut::Mm { func, recv } => {
2375                self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2376            }
2377            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2378        }
2379        Ok(())
2380    }
2381
2382    /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2383    fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2384        match self.newindex_step(t, key, v)? {
2385            MmOut::Done(_) => {}
2386            MmOut::Mm { func, recv } => {
2387                self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2388            }
2389            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2390        }
2391        Ok(())
2392    }
2393
2394    /// Apply a comparison opcode's outcome: a known boolean drives the
2395    /// conditional skip directly; a metamethod is called yieldably, its
2396    /// truthiness driving the skip on return.
2397    fn op_compare(
2398        &mut self,
2399        step: MmOut,
2400        l: Value,
2401        r: Value,
2402        k: bool,
2403        tm: &'static str,
2404    ) -> Result<(), LuaError> {
2405        match step {
2406            MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2407            MmOut::Mm { func, .. } => {
2408                self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2409            }
2410            MmOut::CompareSynth { func } => {
2411                // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2412                // negation are driven through `MetaAction::Compare` so the
2413                // metamethod call can yield like any other compare.
2414                self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2415            }
2416        }
2417        Ok(())
2418    }
2419
2420    /// Complete a VM instruction whose metamethod just returned `result` (PUC
2421    /// `luaV_finishOp`). The running frame is already back on top.
2422    fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2423        match action {
2424            MetaAction::Store { dst } => self.stack[dst as usize] = result,
2425            MetaAction::Discard => {}
2426            MetaAction::Compare { k, negate } => {
2427                let t = if negate {
2428                    !result.truthy()
2429                } else {
2430                    result.truthy()
2431                };
2432                self.cond_skip(t, k);
2433            }
2434            MetaAction::Concat { dst, base_a } => {
2435                self.stack[dst as usize] = result;
2436                self.top = dst + 1;
2437                self.concat_run(base_a)?;
2438            }
2439        }
2440        Ok(())
2441    }
2442
2443    // ---- metatables ----
2444
2445    pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2446        match v {
2447            Value::Table(t) => t.metatable(),
2448            Value::Userdata(u) => u.metatable(),
2449            v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2450        }
2451    }
2452
2453    /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2454    /// non-table). No-op for tables (they carry their own).
2455    pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2456        if let Some(i) = type_mt_slot(v) {
2457            self.type_mt[i] = mt;
2458        }
2459    }
2460
2461    /// The metamethod of `v` for `mm`, or nil.
2462    pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2463        match self.metatable_of(v) {
2464            Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2465            None => Value::Nil,
2466        }
2467    }
2468
2469    /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2470    /// only fires when both operands carry a metatable that exposes the same
2471    /// implementation. Returns the metamethod to call, or `Nil` when no
2472    /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2473    /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2474    pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2475        let mt1 = self.metatable_of(l);
2476        let Some(mt1) = mt1 else { return Value::Nil };
2477        let key = Value::Str(self.mm_names[mm as usize]);
2478        let tm1 = mt1.get(key);
2479        if tm1.is_nil() {
2480            return Value::Nil;
2481        }
2482        let mt2 = self.metatable_of(r);
2483        let Some(mt2) = mt2 else { return Value::Nil };
2484        if mt1.as_ptr() == mt2.as_ptr() {
2485            return tm1;
2486        }
2487        let tm2 = mt2.get(key);
2488        if tm2.is_nil() {
2489            return Value::Nil;
2490        }
2491        if tm1.raw_eq(tm2) {
2492            return tm1;
2493        }
2494        Value::Nil
2495    }
2496
2497    /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2498    /// or full userdata whose metatable carries a string `__name` reports that
2499    /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2500    pub(crate) fn obj_typename(&self, v: Value) -> String {
2501        if matches!(v, Value::Table(_) | Value::Userdata(_))
2502            && let Value::Str(s) = self.get_mm(v, Mm::Name)
2503        {
2504            return String::from_utf8_lossy(s.as_bytes()).into_owned();
2505        }
2506        v.type_name().to_string()
2507    }
2508
2509    fn call_at(
2510        &mut self,
2511        func_slot: u32,
2512        nargs: u32,
2513        from_c: bool,
2514    ) -> Result<Vec<Value>, LuaError> {
2515        if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2516            self.exec()
2517        } else {
2518            // native completed inline; results at func_slot..top
2519            Ok(self.take_results(func_slot))
2520        }
2521    }
2522
2523    /// Switch the `collectgarbage` mode, returning the previous mode name.
2524    pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2525        std::mem::replace(&mut self.gc_mode, new)
2526    }
2527
2528    /// Whether the current `collectgarbage` mode is "generational" (where a
2529    /// "step" is a minor collection — a full atomic pass — rather than a paced
2530    /// incremental sweep).
2531    pub(crate) fn gc_mode_is_generational(&self) -> bool {
2532        self.gc_mode == "generational"
2533    }
2534
2535    /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2536    /// completes a whole cycle at once).
2537    pub(crate) fn gc_stepsize(&self) -> i64 {
2538        self.gc_stepsize
2539    }
2540
2541    /// `collectgarbage("param", name [,value])`: read (or set, returning the
2542    /// previous value of) a pacing parameter. Returns `None` for an unknown
2543    /// name so the caller can raise PUC's `invalid parameter` error. The
2544    /// collector is stop-the-world, so these only round-trip for API fidelity.
2545    pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2546        let slot = match name {
2547            b"pause" => &mut self.gc_pause,
2548            b"stepmul" => &mut self.gc_stepmul,
2549            b"stepsize" => &mut self.gc_stepsize,
2550            _ => return None,
2551        };
2552        let prev = *slot;
2553        if let Some(v) = set {
2554            *slot = v;
2555        }
2556        Some(prev)
2557    }
2558
2559    /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2560    /// paced sweep via `Vm::gc_step`.
2561    ///
2562    /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2563    /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2564    /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2565    /// suspected UAF is structurally closed — born objects no longer become
2566    /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2567    ///
2568    /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2569    /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2570    /// O(SWEEP_DIVISOR) safe-points regardless of size.
2571    #[inline(always)]
2572    pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2573        if self.gc_finalizing {
2574            return;
2575        }
2576        if !self.heap.gc_due() {
2577            return;
2578        }
2579        self.gc_top = live_top;
2580        // PUC stepmul: % of allocation rate. Higher = more GC work per
2581        // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2582        // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2583        const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2584        const MIN_BUDGET: usize = 64_000;
2585        let stepmul = self.gc_stepmul.max(1) as usize;
2586        let divisor = (SWEEP_BASE / stepmul).max(1);
2587        let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2588        if self.gc_step(budget) {
2589            self.heap.rearm_gc_pause(self.gc_pause);
2590        }
2591    }
2592
2593    /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2594    /// roots (open upvalues, which are not first-class Values). Shared by the
2595    /// full collector and the incremental-sweep driver so both snapshot the
2596    /// exact same live set.
2597    fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2598        let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2599        roots.push(Value::Table(self.globals));
2600        for mt in self.type_mt.into_iter().flatten() {
2601            roots.push(Value::Table(mt));
2602        }
2603        for &n in &self.mm_names {
2604            roots.push(Value::Str(n));
2605        }
2606        // root only the running thread's live registers (PUC marks [stack, top)):
2607        // freed temporaries above `gc_top` are excluded so weak values stranded
2608        // there are not pinned. Suspended threads (main_ctx, other coroutines)
2609        // stay whole-rooted below — safe over-rooting, and they are not the
2610        // thread whose weak-table loop is under test.
2611        let live = (self.gc_top as usize).min(self.stack.len());
2612        roots.extend_from_slice(&self.stack[..live]);
2613        for cf in &self.frames {
2614            match cf {
2615                CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2616                CallFrame::Cont(NativeCont {
2617                    kind: ContKind::Xpcall { handler },
2618                    ..
2619                }) => roots.push(*handler),
2620                CallFrame::Cont(NativeCont {
2621                    kind: ContKind::Close(cc),
2622                    ..
2623                }) => {
2624                    // Root the error threaded through this close chain so a
2625                    // `collectgarbage()` inside a sibling `__close` handler
2626                    // does not free it before the next handler is invoked
2627                    // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2628                    if let Some(e) = cc.pending {
2629                        roots.push(e);
2630                    }
2631                    if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2632                        roots.push(err);
2633                    }
2634                }
2635                CallFrame::Cont(_) => {}
2636            }
2637        }
2638        if let Some(e) = self.closing_err {
2639            roots.push(e);
2640        }
2641        // B12 host roots — Lua-facade handles keep their referenced
2642        // values alive across calls/yields. Trace the whole vector;
2643        // unused slots (post-`unpin_all`) carry Value::Nil which the
2644        // GC ignores.
2645        for slot in &self.host_roots {
2646            // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2647            roots.push(slot.value);
2648        }
2649        // the running thread's debug hook (suspended threads root theirs via
2650        // Coro::trace / the main_ctx sweep below)
2651        if let Some(h) = self.hook.func {
2652            roots.push(h);
2653        }
2654        // the running coroutine (its saved-context fields live in the VM, but
2655        // the object itself + its resumer chain must stay reachable)
2656        if let Some(co) = self.current {
2657            roots.push(Value::Coro(co));
2658        }
2659        if let Some(mc) = self.main_coro {
2660            roots.push(Value::Coro(mc));
2661        }
2662        // debug.getregistry() and io library state
2663        if let Some(r) = self.registry {
2664            roots.push(Value::Table(r));
2665        }
2666        if let Some(mt) = self.file_mt {
2667            roots.push(Value::Table(mt));
2668        }
2669        if let Some(f) = self.io_input {
2670            roots.push(Value::Userdata(f));
2671        }
2672        if let Some(f) = self.io_output {
2673            roots.push(Value::Userdata(f));
2674        }
2675        // the main thread's saved context while a coroutine runs
2676        if let Some(m) = &self.main_ctx {
2677            roots.extend_from_slice(&m.stack);
2678            if let Some(h) = m.hook.func {
2679                roots.push(h);
2680            }
2681            for cf in &m.frames {
2682                match cf {
2683                    CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2684                    CallFrame::Cont(NativeCont {
2685                        kind: ContKind::Xpcall { handler },
2686                        ..
2687                    }) => roots.push(*handler),
2688                    CallFrame::Cont(_) => {}
2689                }
2690            }
2691        }
2692        let mut extra: Vec<*mut GcHeader> = self
2693            .open_upvals
2694            .iter()
2695            .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2696            .collect();
2697        if let Some(m) = &self.main_ctx {
2698            extra.extend(
2699                m.open_upvals
2700                    .iter()
2701                    .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2702            );
2703        }
2704        (roots, extra)
2705    }
2706
2707    /// Run a full collection with the VM's roots, then run any `__gc`
2708    /// finalizers the collection scheduled. A no-op (returns 0) when already
2709    /// inside a finalizer — the collector is not reentrant (PUC).
2710    pub fn collect_garbage(&mut self) -> usize {
2711        if self.gc_finalizing {
2712            return 0;
2713        }
2714        let (roots, extra) = self.gc_roots();
2715        let freed = self.heap.collect_ex(&roots, &extra);
2716        self.run_finalizers();
2717        freed
2718    }
2719
2720    /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2721    /// threw; gc.lua's "errors during collection" probe relies on it. This
2722    /// variant runs the same cycle but propagates the captured finalizer
2723    /// error to the explicit caller.
2724    pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2725        if self.gc_finalizing {
2726            return Ok(0);
2727        }
2728        let (roots, extra) = self.gc_roots();
2729        let freed = self.heap.collect_ex(&roots, &extra);
2730        self.run_finalizers_or_err()?;
2731        Ok(freed)
2732    }
2733
2734    /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2735    /// should report fail rather than collect).
2736    pub(crate) fn gc_is_finalizing(&self) -> bool {
2737        self.gc_finalizing
2738    }
2739
2740    /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2741    /// = true indicates more pieces follow (concatenated until the first
2742    /// `to_cont = false` call flushes the whole line). Mirrors
2743    /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2744    ///   * If the buffer is fresh, `to_cont` is false, and the message is
2745    ///     `@<word>`, treat as a control message — only `@on` / `@off` are
2746    ///     recognised; any other `@…` is silently ignored.
2747    ///   * Otherwise, while the state is `Off`, drop the piece; while `On`,
2748    ///     accumulate, and flush to stderr + `warn_log` on the
2749    ///     non-continuation call.
2750    pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2751        if self.warn_buf.is_empty()
2752            && !to_cont
2753            && let Some(b'@') = msg.first().copied()
2754        {
2755            match &msg[1..] {
2756                b"on" => self.warn_state = WarnState::On,
2757                b"off" => self.warn_state = WarnState::Off,
2758                _ => {} // unknown control — silently ignored (PUC checkcontrol)
2759            }
2760            return;
2761        }
2762        if self.warn_state == WarnState::Off {
2763            // drop continuation pieces too — PUC `warnfoff` is the trampoline
2764            return;
2765        }
2766        self.warn_buf.extend_from_slice(msg);
2767        if !to_cont {
2768            let line = std::mem::take(&mut self.warn_buf);
2769            eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2770            self.warn_log.push(line);
2771        }
2772    }
2773
2774    /// Drain the in-process warning log (one entry per emitted message, sans
2775    /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2776    /// assert on warn output without scraping stderr.
2777    pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2778        std::mem::take(&mut self.warn_log)
2779    }
2780
2781    /// Arm the cooperative instruction budget (P09 embedding). The run loop
2782    /// decrements this once per dispatch turn; on zero it raises a catchable
2783    /// `"instruction budget exceeded"` error and disarms itself so the host
2784    /// can resume with a fresh budget on the next call. `None` removes the
2785    /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2786    /// short-script semantics.
2787    pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2788        self.instr_budget = budget;
2789    }
2790
2791    /// Remaining instruction budget (None when unbounded).
2792    pub fn instr_budget_remaining(&self) -> Option<i64> {
2793        self.instr_budget
2794    }
2795
2796    /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2797    /// **must** disable JIT when relying on `instr_budget` — see the
2798    /// `jit_enabled` field doc for the rationale.
2799    pub fn set_jit_enabled(&mut self, enabled: bool) {
2800        self.jit.enabled = enabled;
2801    }
2802
2803    /// Current JIT enable state.
2804    pub fn jit_enabled(&self) -> bool {
2805        self.jit.enabled
2806    }
2807
2808    /// Toggle the trace JIT (P12). Off by default while the sprint
2809    /// develops. When enabled, hot back-edges are counted on
2810    /// `Proto.trace_hot_count`; once the counter passes
2811    /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2812    /// mode at the back-edge target. Stays a no-op until S2's
2813    /// trace lowerer and S3's dispatcher land.
2814    pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2815        self.jit.trace_enabled = enabled;
2816    }
2817
2818    /// P16-A — opt-in flag for the self-link cycle catch. See field
2819    /// docs for the correctness blocker. Default `false`.
2820    pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2821        self.jit.p16_self_link_enabled = enabled;
2822    }
2823
2824    /// Current state of the P16-A self-link cycle catch.
2825    pub fn p16_self_link_enabled(&self) -> bool {
2826        self.jit.p16_self_link_enabled
2827    }
2828
2829    /// Current trace-JIT enable state.
2830    pub fn trace_jit_enabled(&self) -> bool {
2831        self.jit.trace_enabled
2832    }
2833
2834    /// Number of traces that have closed cleanly (looped back to the
2835    /// head PC) since this Vm was constructed. Cumulative; used by
2836    /// tests + tuning. Will become the dominant signal once S2's
2837    /// compile + cache lands.
2838    pub fn trace_closed_count(&self) -> u64 {
2839        self.jit.counters.closed
2840    }
2841
2842    /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2843    /// hit an un-recordable op — the latter lands at S2).
2844    pub fn trace_aborted_count(&self) -> u64 {
2845        self.jit.counters.aborted
2846    }
2847
2848    /// P13-S13-G v2 — number of compiled traces whose close shape
2849    /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2850    /// pin `dispatchable=false` because the dispatcher can't
2851    /// resume at a depth>0 PC without the matching CallFrames.
2852    /// S4-step4b's frame-mat helper could synthesise those, but
2853    /// the InlineAbort emit path isn't wired up yet — fresh
2854    /// pickup work for S13-G v2-full.
2855    pub fn trace_inline_abort_count(&self) -> u64 {
2856        self.jit.counters.inline_abort
2857    }
2858
2859    /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2860    pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2861        &self.jit.counters.dispatch_off_reasons
2862    }
2863
2864    /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2865    pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2866        &self.jit.counters.compile_failed_reasons
2867    }
2868
2869    /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2870    /// `(is_call_triggered, ops_len)` for every trace that closed.
2871    pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2872        &self.jit.counters.closed_lens
2873    }
2874
2875    /// P12-S2.C — number of closed traces the lowerer compiled and
2876    /// parked on `Proto.traces`. Re-records of the same head_pc are
2877    /// deduped (the second close finds the head_pc already cached
2878    /// and skips compile), so this never exceeds `trace_closed_count`.
2879    pub fn trace_compiled_count(&self) -> u64 {
2880        self.jit.counters.compiled
2881    }
2882
2883    /// P12-S2.C — number of closed traces the lowerer rejected
2884    /// (any of the bail conditions in
2885    /// `crate::jit::trace::try_compile_trace`).
2886    pub fn trace_compile_failed_count(&self) -> u64 {
2887        self.jit.counters.compile_failed
2888    }
2889
2890    /// P12-S3 — number of times the dispatcher jumped into a
2891    /// compiled trace. Bumps on every entry; `trace_deopt_count`
2892    /// counts the subset where the trace returned with a parked
2893    /// `jit_pending_err`.
2894    pub fn trace_dispatched_count(&self) -> u64 {
2895        self.jit.counters.dispatched
2896    }
2897
2898    /// P12-S3 — number of trace entries that came back with
2899    /// `jit_pending_err` set (typically a metatable shadowed an
2900    /// index inside a helper, forcing the dispatcher to fall back
2901    /// to the interpreter without committing the trace's result).
2902    pub fn trace_deopt_count(&self) -> u64 {
2903        self.jit.counters.deopt
2904    }
2905
2906    /// P15-A v1 — number of times the dispatcher started a side
2907    /// trace recording (an `exit_hit_counts` slot crossed
2908    /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
2909    /// was None and trace JIT was enabled). Each unit is exactly one
2910    /// `start_side_trace` call; the actual compile success counts
2911    /// under [`Self::trace_compiled_count`] like any other trace.
2912    /// Probe use: distinguishes the "side-trace pipeline fired"
2913    /// signal from the "primary back-edge / call-trigger fired"
2914    /// signal so v0-v3 architectural progress is visible without
2915    /// reading per-counter histograms.
2916    pub fn trace_side_trace_started_count(&self) -> u64 {
2917        self.jit.counters.side_trace_started
2918    }
2919
2920    /// P15-A v2-A — number of side-trace recordings that closed,
2921    /// compiled successfully, AND patched their parent's
2922    /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
2923    /// dispatch through these ptrs yet (v2-B/C job), but the
2924    /// counter + ptr write proves the compile + link pipeline is
2925    /// complete end-to-end.
2926    pub fn trace_side_trace_compiled_count(&self) -> u64 {
2927        self.jit.counters.side_trace_compiled
2928    }
2929
2930    /// P15-A v2-C-A5-C — number of side traces that compiled
2931    /// successfully but were SHEDDED by the close-handler shape-
2932    /// match gate (`exit_tags_match_entry_tags`). High ratios
2933    /// vs. `trace_side_trace_compiled_count` indicate the
2934    /// architecture is shedding lots of would-be side traces;
2935    /// useful as a tuning probe for future relaxation of the
2936    /// gate or for child-IR re-specialisation against parent's
2937    /// exit shape.
2938    pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
2939        self.jit.counters.side_trace_shape_mismatch
2940    }
2941
2942    /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
2943    /// classified as `crate::jit::trace::EscapeState::Sinkable`
2944    /// across every successfully compiled trace on this Vm. The
2945    /// count is post-demotion: sites pre-emit drops back to Escaped
2946    /// for not meeting v1 sunk-emit criteria are NOT counted.
2947    /// `trace_sunk_alloc_count` matches one-for-one today (every
2948    /// surviving Sinkable site goes through sunk emit).
2949    pub fn trace_sinkable_seen_count(&self) -> u64 {
2950        self.jit.counters.sinkable_seen
2951    }
2952
2953    /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
2954    pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
2955        self.jit.counters.accum_bufferable_seen
2956    }
2957
2958    /// P15-prep — total dispatch hits across all known traces,
2959    /// broken into hot-exit telemetry (max single-exit count,
2960    /// total dispatches, exit count). Used by probes to identify
2961    /// hot side-exits as side-trace candidates.
2962    ///
2963    /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
2964    /// recursively, so inner functions' traces are reported.
2965    pub fn trace_exit_hit_summary(
2966        &self,
2967        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
2968    ) -> Vec<(u32, Vec<u32>)> {
2969        fn walk(
2970            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
2971            out: &mut Vec<(u32, Vec<u32>)>,
2972        ) {
2973            for ct in proto.traces.borrow().iter() {
2974                let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
2975                out.push((ct.head_pc, counts));
2976            }
2977            for inner in proto.protos.iter() {
2978                walk(*inner, out);
2979            }
2980        }
2981        let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
2982        walk(cl.proto, &mut out);
2983        out
2984    }
2985
2986    /// P15-A v0 — surface every side-exit slot whose hit count is
2987    /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
2988    /// `cl.proto` (recursively walking `proto.protos`). Returned
2989    /// entries are side-trace candidates: each carries the parent
2990    /// trace's `(head_proto, head_pc)`, the exit's index in the
2991    /// parent's `exit_hit_counts`, and the side trace's natural
2992    /// entry shape (`cont_pc` + `exit_tags`).
2993    ///
2994    /// Layout of `exit_hit_counts` (mirrored by the iter):
2995    /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
2996    ///   window-sized exit_tags).
2997    /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
2998    ///   → `per_exit_tags[i]` (per-cont_pc caller-window tags).
2999    /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3000    ///   exit_tags = `ct.exit_tags`).
3001    pub fn hot_exit_iter(
3002        &self,
3003        cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3004    ) -> Vec<crate::jit::trace::HotExitInfo> {
3005        use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3006        fn walk(
3007            proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3008            out: &mut Vec<HotExitInfo>,
3009        ) {
3010            for ct in proto.traces.borrow().iter() {
3011                let inline_n = ct.per_exit_inline.len();
3012                let tags_n = ct.per_exit_tags.len();
3013                debug_assert_eq!(
3014                    ct.exit_hit_counts.len(),
3015                    inline_n + tags_n + 1,
3016                    "exit_hit_counts layout invariant violated"
3017                );
3018                for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3019                    let hits = cell.get();
3020                    if hits < HOTEXIT_THRESHOLD {
3021                        continue;
3022                    }
3023                    let (cont_pc, exit_tags) = if idx < inline_n {
3024                        let ent = &ct.per_exit_inline[idx];
3025                        (ent.cont_pc, ent.exit_tags.clone())
3026                    } else if idx < inline_n + tags_n {
3027                        let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3028                        (*pc, tags.clone())
3029                    } else {
3030                        (ct.head_pc, ct.exit_tags.clone())
3031                    };
3032                    out.push(HotExitInfo {
3033                        head_proto: proto,
3034                        head_pc: ct.head_pc,
3035                        exit_idx: idx,
3036                        hits,
3037                        cont_pc,
3038                        exit_tags,
3039                    });
3040                }
3041            }
3042            for inner in proto.protos.iter() {
3043                walk(*inner, out);
3044            }
3045        }
3046        let mut out: Vec<HotExitInfo> = Vec::new();
3047        walk(cl.proto, &mut out);
3048        out
3049    }
3050
3051    /// P12-S5-B — sum of NewTable sites that actually took the
3052    /// sunk-emit path across every successfully compiled trace on
3053    /// this Vm. Each counted site skips its heap `Gc<Table>`
3054    /// allocation per dispatch; the array part lives as Cranelift
3055    /// `Variable`s for the duration of the trace.
3056    pub fn trace_sunk_alloc_count(&self) -> u64 {
3057        self.jit.counters.sunk_alloc
3058    }
3059
3060    /// P12-S5-C — sum of materialise-helper emit sites across every
3061    /// successfully compiled trace on this Vm. Each unit is a
3062    /// (site × cmp side-exit) pair whose IR reconstructs a heap
3063    /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3064    /// emit is wiring materialise into the right side-exits.
3065    pub fn trace_materialize_emit_count(&self) -> u64 {
3066        self.jit.counters.materialize_emit
3067    }
3068
3069    /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3070    /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3071    /// replaces a `Heap::new_closure_inline` call on the dispatch
3072    /// path; the count is static (one per matching op per compiled
3073    /// trace), summed at compile success.
3074    pub fn trace_closure_emit_count(&self) -> u64 {
3075        self.jit.counters.closure_emit
3076    }
3077
3078    /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3079    /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3080    /// whether a self-recursive function actually walked the depth
3081    /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3082    /// across traces and Vm activations; reset only on `Vm::new`.
3083    pub fn trace_max_depth_seen(&self) -> u8 {
3084        self.jit.max_depth_seen
3085    }
3086
3087    /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3088    /// dispatch time). The frame-materialization helper reads `.base`
3089    /// to compute offsets for each inlined frame's window.
3090    #[doc(hidden)]
3091    pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3092        match self.frames.last() {
3093            Some(CallFrame::Lua(f)) => Some(*f),
3094            _ => None,
3095        }
3096    }
3097
3098    /// P12-S4-step4b — ensure the value stack covers indices
3099    /// `[0..need)`. Extends with Nil if shorter. Called by the
3100    /// frame-materialization helper before pushing an inlined frame
3101    /// whose register window may exceed the current stack length.
3102    #[doc(hidden)]
3103    pub fn jit_ensure_stack(&mut self, need: usize) {
3104        if self.stack.len() < need {
3105            self.stack.resize(need, Value::Nil);
3106        }
3107    }
3108
3109    /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3110    /// `__close` handlers would run (any active tbc slot ≥ from
3111    /// holding a non-nil/false Value); if so, parks a deopt sentinel
3112    /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3113    /// IR branches to the deopt block. Otherwise performs the safe
3114    /// part of close — `close_from(from)` to close open upvals +
3115    /// drop any drained tbc entries ≥ from — and returns 0.
3116    ///
3117    /// Returns are i64-shaped so the cranelift import sig stays
3118    /// trivial (i64 → i64 mapping).
3119    #[doc(hidden)]
3120    pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3121        if self.jit.pending_err.is_some() {
3122            return 1;
3123        }
3124        let Some(f) = self.jit_last_lua_frame() else {
3125            self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3126            return 1;
3127        };
3128        let from = f.base + start_offset;
3129        let has_handler = self.tbc.iter().any(|&s| {
3130            s >= from && {
3131                let v = self.stack[s as usize];
3132                !matches!(v, Value::Nil | Value::Bool(false))
3133            }
3134        });
3135        if has_handler {
3136            self.jit.pending_err =
3137                Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3138            return 1;
3139        }
3140        self.close_from(from);
3141        // Drain any tbc entries ≥ from (they're nil/false stubs the
3142        // interpreter's drive_close would have skipped silently).
3143        while let Some(&s) = self.tbc.last() {
3144            if s < from {
3145                break;
3146            }
3147            self.tbc.pop();
3148        }
3149        0
3150    }
3151
3152    /// P12-S7-B — spill the trace's current value for a register to
3153    /// the underlying `vm.stack[base + slot_offset]`. Required before
3154    /// an `Op::Closure` whose inner proto has an `in_stack: true`
3155    /// upval at `slot_offset` — the helper's `find_or_create_upval`
3156    /// captures a live pointer to `vm.stack[base + slot_offset]`,
3157    /// which must hold the right value at call time (trace IR's
3158    /// Variable hasn't yet been written back).
3159    ///
3160    /// Parameters arrive as i64 from the IR: `slot_offset` is the
3161    /// caller-frame register index (`u32` in practice, depth=0
3162    /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3163    /// `crate::runtime::value::raw` byte for the slot's RegKind;
3164    /// `raw_bits` is the trace Variable's `use_var` payload
3165    /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3166    /// raw `Gc::as_ptr` cast).
3167    #[doc(hidden)]
3168    pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3169        let Some(f) = self.jit_last_lua_frame() else {
3170            self.jit.pending_err =
3171                Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3172            return;
3173        };
3174        let idx = (f.base as usize) + (slot_offset as usize);
3175        if self.stack.len() <= idx {
3176            self.stack.resize(idx + 1, Value::Nil);
3177        }
3178        // SAFETY: caller (trace JIT IR emit) provides matching
3179        // `(tag, raw_bits)` — same shape produced by Value::unpack.
3180        let v = unsafe {
3181            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3182        };
3183        self.stack[idx] = v;
3184    }
3185
3186    /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3187    /// Mirrors the interp arm (this file ~L5316): copies the
3188    /// generator/state/control triple from `R[A..=A+2]` to
3189    /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3190    /// the iterator function via `begin_call`. v2 only handles
3191    /// `Value::Native` iterators (the canonical `ipairs_iter` /
3192    /// `next` builtins) — a Lua-closure iterator would push a Lua
3193    /// frame mid-trace, breaking `recording_frame_base`, so we
3194    /// deopt by parking a `pending_err` and returning `-1`.
3195    ///
3196    /// `slot_offset` is the caller-frame register index (=
3197    /// `inst.a()` decoded from a u32-wide field). `nvars` is
3198    /// `inst.c() as i32` — the caller's expected return count.
3199    /// P12-S12-C v1 — refresh only the raw payload of
3200    /// `vm.stack[base + slot_offset]`, preserving its existing
3201    /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3202    /// uses this when the slot's `RegKind` is `Unset` (no compile-
3203    /// time tag info; commonly `Str` slots which the trace doesn't
3204    /// model). The interp's previous execution of the same op
3205    /// already populated the slot with the right tag — the trace
3206    /// only needs to swap in its current raw value.
3207    #[doc(hidden)]
3208    pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3209        let Some(f) = self.jit_last_lua_frame() else {
3210            return;
3211        };
3212        let idx = (f.base as usize) + (slot_offset as usize);
3213        if idx >= self.stack.len() {
3214            return;
3215        }
3216        let (tag, _) = self.stack[idx].unpack();
3217        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3218        self.stack[idx] = unsafe {
3219            crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3220        };
3221    }
3222
3223    /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3224    ///
3225    /// Mirrors the interp arm (this file ~L5112): `self.top =
3226    /// base + a + n; concat_run(base + a)`. Result lands at
3227    /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3228    /// deopt (any error from `concat_run` OR detection that the
3229    /// metamethod path was taken — `concat_run` returns `Ok(())`
3230    /// after `begin_meta_call` which has pushed a Lua frame the
3231    /// trace can't safely continue past).
3232    ///
3233    /// The frame-push detection uses `pre/post frames.len()` and
3234    /// unwinds any pushed frames before deopting, so the
3235    /// dispatcher's existing deopt path sees a clean stack.
3236    #[doc(hidden)]
3237    pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3238        if self.jit.pending_err.is_some() {
3239            return -1;
3240        }
3241        let Some(f) = self.jit_last_lua_frame() else {
3242            self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3243            return -1;
3244        };
3245        let abs_a = f.base + slot_offset;
3246        self.top = abs_a + n as u32;
3247        let pre_frames = self.frames.len();
3248        let result = self.concat_run(abs_a);
3249        let post_frames = self.frames.len();
3250        // Frame-push = metamethod path taken (begin_meta_call pushed
3251        // a Lua frame). The trace can't continue past it; unwind +
3252        // deopt so interp redoes Op::Concat in the slow path.
3253        while self.frames.len() > pre_frames {
3254            frames_pop_sync(&mut self.frames, &mut self.frames_top);
3255        }
3256        if let Err(e) = result {
3257            self.jit.pending_err = Some(e);
3258            return -1;
3259        }
3260        if post_frames > pre_frames {
3261            self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3262            return -1;
3263        }
3264        0
3265    }
3266
3267    /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3268    /// accumulator buffer pool, returning a raw pointer. The trace
3269    /// fn's IR holds this pointer in a stack slot through the loop
3270    /// and calls `jit_str_buf_extend` per iter. If the pool is
3271    /// empty, allocate fresh.
3272    ///
3273    /// Safety: the returned pointer is valid until
3274    /// `jit_str_buf_release` is called or the Vm is dropped. The
3275    /// caller MUST not retain it across `enter_jit` boundaries.
3276    #[doc(hidden)]
3277    pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3278        let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3279        // Move into a Box so the pointer is stable until release.
3280        Box::into_raw(Box::new(buf))
3281    }
3282
3283    /// P14-S14-B v2 — return a previously-acquired buffer to the
3284    /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3285    /// buffer is `clear`ed (capacity retained) so the next acquire
3286    /// gets a ready-to-extend Vec.
3287    ///
3288    /// Safety: `buf` must have been returned by a prior
3289    /// `jit_str_buf_acquire` on the same Vm.
3290    #[doc(hidden)]
3291    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3292    pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3293        if buf.is_null() {
3294            return;
3295        }
3296        // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3297        let mut owned = unsafe { Box::from_raw(buf) };
3298        owned.clear();
3299        if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3300            self.jit.str_buf_pool.push(*owned);
3301        }
3302        // Else: drop the buffer.
3303    }
3304
3305    /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3306    /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3307    /// the piece slot) and passes it through; we treat it as a
3308    /// `*mut LuaStr` and append its bytes.
3309    ///
3310    /// Returns 0 on success, -1 if the piece isn't a Str (would
3311    /// trip __concat metamethod path → deopt to interp).
3312    ///
3313    /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3314    /// trace's piece slot raw bits.
3315    #[doc(hidden)]
3316    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3317    pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3318        if buf.is_null() || str_ptr == 0 {
3319            return -1;
3320        }
3321        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3322        let buf = unsafe { &mut *buf };
3323        let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3324        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3325        let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3326        buf.extend_from_slice(bytes);
3327        0
3328    }
3329
3330    /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3331    /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3332    /// the trace to write into the accumulator slot.
3333    ///
3334    /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3335    /// (the v2 hard cap; the trace deopts).
3336    ///
3337    /// Safety: `buf` from prior `acquire`. The buffer is left
3338    /// CLEAR (drained) ready for `release`.
3339    #[doc(hidden)]
3340    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3341    pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3342        if buf.is_null() {
3343            return 0;
3344        }
3345        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3346        let buf = unsafe { &mut *buf };
3347        let bytes = std::mem::take(buf);
3348        // v2 hard cap at 256KB per RFC Q3.
3349        if bytes.len() > 256 * 1024 {
3350            return 0;
3351        }
3352        let gc = self.heap.intern(&bytes);
3353        gc.as_ptr() as i64
3354    }
3355
3356    /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3357    ///
3358    /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3359    /// v3: ipairs `inext` fast path at the top — skip begin_call
3360    ///     when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3361    ///     R[A+2]=Int.
3362    /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3363    ///     caller-provided buffers + return R[A+4]'s tag byte. Lets
3364    ///     emit skip 3 separate `luna_jit_stack_load` calls and 1
3365    ///     `luna_jit_stack_tag` call by reading the buffer via
3366    ///     cranelift `stack_load` IR instead. Returns -1 on deopt.
3367    #[doc(hidden)]
3368    #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3369    pub fn jit_op_tforcall(
3370        &mut self,
3371        slot_offset: u32,
3372        nvars: i32,
3373        ctrl_out: *mut i64,
3374        key_out: *mut i64,
3375        val_out: *mut i64,
3376    ) -> i64 {
3377        if self.jit.pending_err.is_some() {
3378            return -1;
3379        }
3380        let Some(f) = self.jit_last_lua_frame() else {
3381            self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3382            return -1;
3383        };
3384        let abs = f.base + slot_offset;
3385        let need = (abs + 7) as usize;
3386        if self.stack.len() < need {
3387            self.stack.resize(need, Value::Nil);
3388        }
3389        // v3 fast path.
3390        let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3391            && std::ptr::fn_addr_eq(
3392                n.f,
3393                crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3394            )
3395            && let Value::Table(t) = self.stack[(abs + 1) as usize]
3396            && t.metatable().is_none()
3397            && let Value::Int(i) = self.stack[(abs + 2) as usize]
3398        {
3399            let next_i = i.wrapping_add(1);
3400            let v = t.get_int(next_i);
3401            if v.is_nil() {
3402                self.stack[(abs + 4) as usize] = Value::Nil;
3403            } else {
3404                self.stack[(abs + 4) as usize] = Value::Int(next_i);
3405                if (nvars as usize) >= 2 {
3406                    self.stack[(abs + 5) as usize] = v;
3407                }
3408                for j in 2..nvars as usize {
3409                    let slot = abs + 4 + j as u32;
3410                    if (slot as usize) < self.stack.len() {
3411                        self.stack[slot as usize] = Value::Nil;
3412                    }
3413                }
3414            }
3415            true
3416        } else {
3417            false
3418        };
3419        if !took_fast_path {
3420            // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3421            // route through begin_call. Lua-closure iters would push
3422            // a Lua frame mid-trace → deopt.
3423            self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3424            self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3425            self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3426            if !matches!(self.stack[abs as usize], Value::Native(_)) {
3427                self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3428                return -1;
3429            }
3430            if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3431                self.jit.pending_err = Some(e);
3432                return -1;
3433            }
3434        }
3435        // v4 batched writeback — fill the caller's buffers with the
3436        // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3437        // reload via cranelift `stack_load` instead of separate
3438        // `luna_jit_stack_load` helper calls.
3439        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3440        let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3441        let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3442        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3443        let key_raw = unsafe { key_rv.zero };
3444        let val_raw = if (nvars as usize) >= 2 {
3445            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3446            unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3447        } else {
3448            0u64
3449        };
3450        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3451        unsafe {
3452            ctrl_out.write(ctrl_raw as i64);
3453            key_out.write(key_raw as i64);
3454            val_out.write(val_raw as i64);
3455        }
3456        key_tag as i64
3457    }
3458
3459    /// P12-S12-B-v2 — load the raw `i64` payload of
3460    /// `vm.stack[base + slot_offset]` for the active trace's head
3461    /// Lua frame. Used to reload trace IR `Variable`s after a
3462    /// helper has written to `vm.stack` directly (e.g. TForCall's
3463    /// iter results land at `R[A+4..A+4+nvars]`).
3464    #[doc(hidden)]
3465    pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3466        let Some(f) = self.jit_last_lua_frame() else {
3467            return 0;
3468        };
3469        let idx = (f.base as usize) + (slot_offset as usize);
3470        if idx >= self.stack.len() {
3471            return 0;
3472        }
3473        let v = self.stack[idx];
3474        let (_, raw) = v.unpack();
3475        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3476        unsafe { raw.zero as i64 }
3477    }
3478
3479    /// P12-S12-B-v2 — read the tag byte of
3480    /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3481    /// to dispatch on the iterator's return-key tag at runtime
3482    /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3483    /// deopt for v2).
3484    #[doc(hidden)]
3485    pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3486        let Some(f) = self.jit_last_lua_frame() else {
3487            return crate::runtime::value::raw::NIL;
3488        };
3489        let idx = (f.base as usize) + (slot_offset as usize);
3490        if idx >= self.stack.len() {
3491            return crate::runtime::value::raw::NIL;
3492        }
3493        self.stack[idx].unpack().0
3494    }
3495
3496    /// P12-S4-step4b — push a Lua frame onto the call stack with
3497    /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3498    /// at trace side-exits to recreate the inlined call activations
3499    /// the lowerer compiled past. The contract (enforced by the
3500    /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3501    /// `nresults` is the caller's expected count (today always 1
3502    /// because the lowerer bails Op::Call C != 2), and the caller
3503    /// has already called `jit_ensure_stack` to cover
3504    /// `[0..base + cl.proto.max_stack)`.
3505    #[doc(hidden)]
3506    pub fn jit_push_inlined_frame(
3507        &mut self,
3508        cl: Gc<LuaClosure>,
3509        base: u32,
3510        pc: u32,
3511        nresults: i32,
3512    ) {
3513        frames_push_sync(
3514            &mut self.frames,
3515            &mut self.frames_top,
3516            CallFrame::Lua(Frame {
3517                closure: cl,
3518                base,
3519                pc,
3520                // Lua call ABI: callee R[0] sits at caller R[A+1], so
3521                // callee.base = caller.base + A + 1; func_slot is
3522                // caller.base + A = callee.base - 1.
3523                func_slot: base - 1,
3524                n_varargs: 0,
3525                nresults,
3526                hook_oldpc: u32::MAX,
3527                from_c: false,
3528                tm: None,
3529                is_hook: false,
3530                tailcalls: 0,
3531            }),
3532        );
3533    }
3534
3535    /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3536    /// should set to `false` so `load`/`loadstring` reject bytecode input
3537    /// (which bypasses parser limits and could exploit verifier gaps).
3538    pub fn set_bytecode_loading(&mut self, enabled: bool) {
3539        self.bytecode_loading = enabled;
3540    }
3541
3542    /// Current bytecode-loading gate state.
3543    pub fn bytecode_loading(&self) -> bool {
3544        self.bytecode_loading
3545    }
3546
3547    /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3548    /// bytecode is a strictly larger trust surface than luna's own dump
3549    /// format (third-party toolchain bugs, malformed chunks, unknown
3550    /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3551    /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3552    pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3553        self.puc_bytecode_loading = enabled;
3554    }
3555
3556    /// Current PUC bytecode-loading gate state.
3557    pub fn puc_bytecode_loading(&self) -> bool {
3558        self.puc_bytecode_loading
3559    }
3560
3561    /// Take the error traceback captured at the latest error point and
3562    /// reset it. Embedders should call this immediately after a failed
3563    /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3564    /// entry clears it. Returns `None` if no error was in flight.
3565    pub fn take_error_traceback(&mut self) -> Option<String> {
3566        self.error_traceback
3567            .take()
3568            .map(|b| String::from_utf8_lossy(&b).into_owned())
3569    }
3570
3571    /// Arm the soft memory cap (P09 embedding). The run loop checks the
3572    /// heap's tracked byte usage between dispatch turns; on overshoot it
3573    /// first runs a full collect, and if `bytes` still exceeds the cap it
3574    /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3575    /// itself (fire-once: re-arm before the next `call_value` if reusing
3576    /// the Vm across requests). `None` removes the cap. The accounting is
3577    /// approximate — internal Vec/Box capacity overhead is not tracked,
3578    /// so embedders should size the cap with ~2× margin over the desired
3579    /// hard limit and additionally bound the Vm's lifetime (drop after
3580    /// each request).
3581    pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3582        self.heap.mem_cap = cap;
3583    }
3584
3585    /// Approximate bytes the heap is currently holding. Object shells plus
3586    /// every table's internal array/hash boxes (tracked via
3587    /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3588    /// bytecode and closure upvalue slices still go uncounted — this is a
3589    /// lower bound, not a precise `malloc_stats`-style total.
3590    pub fn memory_used(&self) -> usize {
3591        self.heap.bytes()
3592    }
3593
3594    /// Read upvalue slot `i` of the native function currently on top of the
3595    /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3596    /// when no native is running. Public so the C ABI trampoline can fetch
3597    /// the host C function pointer it stashed there at registration time.
3598    pub fn running_native_upvalue(&self, i: usize) -> Value {
3599        match self.running_natives.last() {
3600            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3601            Some(nc) => unsafe {
3602                let upvals = &(*nc.as_ptr()).upvals;
3603                upvals.get(i).copied().unwrap_or(Value::Nil)
3604            },
3605            None => Value::Nil,
3606        }
3607    }
3608
3609    /// Register a table for finalization if its (just-set) metatable carries a
3610    /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3611    /// `__gc` to the metatable afterwards does not retroactively register).
3612    pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3613        if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3614            self.heap.register_finalizable(t);
3615        }
3616    }
3617
3618    /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3619    /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3620    /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3621    /// metatable that `newproxy` returned, which then needs to flow through.
3622    /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3623    /// would re-check at metatable-set time); luna's only userdata
3624    /// finalizables today come via `newproxy`, which registers itself.
3625    #[allow(dead_code)]
3626    pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3627        if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3628            self.heap.register_finalizable_userdata(u);
3629        }
3630    }
3631
3632    /// Run pending `__gc` finalizers (objects the collector resurrected for
3633    /// finalization). Finalizer errors are swallowed — PUC turns them into a
3634    /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3635    fn run_finalizers(&mut self) {
3636        let _ = self.run_finalizers_or_err();
3637    }
3638
3639    fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3640        if self.gc_finalizing {
3641            return Ok(());
3642        }
3643        let pending = self.heap.take_tobefnz();
3644        if pending.is_empty() {
3645            return Ok(());
3646        }
3647        self.gc_finalizing = true;
3648        let mut first_err: Option<LuaError> = None;
3649        for obj in pending {
3650            let gc = self.get_mm(obj, Mm::Gc);
3651            // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3652            // schedule the object for finalization (`__gc = true` is the
3653            // canonical placeholder); only call it at finalize time when it
3654            // is actually a function. gc.lua 5.2 :412 wires up exactly this
3655            // sentinel and then expects no call.
3656            let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3657            if callable {
3658                // PUC `GCTM` sets `CIST_FIN` on the new ci so
3659                // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3660                // `name = "__gc"`. luna threads the same outcome through the
3661                // generic `pending_tm` slot: the Lua frame born from this
3662                // call consumes it in `push_frame`. Saved/restored around the
3663                // call in case the handler is a native (which never pops it).
3664                // Bare event name; `frame_name` / `c_frame_name` add the
3665                // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3666                // the convention used by `__close`, `__index`, …
3667                let saved_tm = self.pending_tm.replace("gc");
3668                // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3669                // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3670                // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3671                // luna mirrors by temporarily tagging the current top Lua
3672                // frame's `tm` to "__gc" for the duration of the call.
3673                let caller_tm_idx = self
3674                    .frames
3675                    .iter()
3676                    .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3677                let saved_caller_tm = caller_tm_idx.and_then(|i| {
3678                    if let CallFrame::Lua(fr) = &mut self.frames[i] {
3679                        let prev = fr.tm;
3680                        fr.tm = Some("gc");
3681                        Some(prev)
3682                    } else {
3683                        None
3684                    }
3685                });
3686                if let Err(e) = self.call_value(gc, &[obj]) {
3687                    // PUC 5.1 GCTM raised the finalizer's error to the
3688                    // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3689                    // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3690                    // wrapped it in `error in __gc metamethod (msg)` first
3691                    // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3692                    // introduced the warning system and switched to "warn
3693                    // then continue" — never re-raise, just route the
3694                    // wrapped message through `warn`. gc.lua 5.5 :378 wires
3695                    // up `_WARN` capture under the `if T then …` block to
3696                    // baseline on the same wrapped string.
3697                    if self.version >= LuaVersion::Lua54 {
3698                        let inner = self.error_text(&e);
3699                        let msg = format!("error in __gc metamethod ({inner})");
3700                        self.emit_warn(msg.as_bytes(), false);
3701                    } else if first_err.is_none() {
3702                        let wrapped = if self.version >= LuaVersion::Lua52 {
3703                            let inner = self.error_text(&e);
3704                            let msg = format!("error in __gc metamethod ({inner})");
3705                            let s = Value::Str(self.heap.intern(msg.as_bytes()));
3706                            LuaError(s)
3707                        } else {
3708                            e
3709                        };
3710                        first_err = Some(wrapped);
3711                    }
3712                }
3713                self.pending_tm = saved_tm;
3714                if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3715                    && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3716                {
3717                    fr.tm = prev; // prev is Option<&'static str>; restore exactly
3718                }
3719            }
3720        }
3721        self.gc_finalizing = false;
3722        match first_err {
3723            Some(e) => Err(e),
3724            None => Ok(()),
3725        }
3726    }
3727
3728    /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3729    /// Crosses up to three phases per call:
3730    ///   1. Pause      → seed Propagate (`gc_start_propagate`)
3731    ///   2. Propagate  → drain gray up to `budget`; on exhaustion run atomic
3732    ///                   (`gc_finish_atomic` → tobefnz populated; finalizers
3733    ///                   run via `run_finalizers`) and enter Sweep
3734    ///   3. Sweep      → `gc_sweep_step` up to (residual) `budget`
3735    /// Returns true when this call completed the cycle's sweep (back to
3736    /// Pause). The budget is spent generously across phases — a large `n`
3737    /// can finish a whole cycle in one call (PUC stop-the-world step).
3738    pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3739        // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3740        // that may hit a safe point and try to step again. Re-entry was OK
3741        // under STW (collect_garbage had its own guard) but here the
3742        // intermediate phase state would corrupt.
3743        if self.gc_finalizing {
3744            return false;
3745        }
3746        if self.heap.gc_phase_is_pause() {
3747            let (roots, extra) = self.gc_roots();
3748            self.heap.gc_start_propagate(&roots, &extra);
3749        }
3750        if self.heap.gc_phase_is_propagate() {
3751            if !self.heap.gc_step_propagate(budget) {
3752                return false;
3753            }
3754            self.heap.gc_finish_atomic();
3755            // any __gc scheduled by atomic — run before sweep so a finalizer
3756            // re-registering `self` re-enters the next cycle, not this sweep
3757            self.run_finalizers();
3758        }
3759        // either we just transitioned, or we entered already in Sweep, or
3760        // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3761        self.heap.gc_sweep_step(budget)
3762    }
3763
3764    // ---- frames & calls ----
3765
3766    /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3767    /// Returns true if a Lua frame was pushed (the dispatch loop continues
3768    /// there), false if a native completed inline.
3769    fn begin_call(
3770        &mut self,
3771        func_slot: u32,
3772        nargs: Option<u32>,
3773        nresults: i32,
3774        from_c: bool,
3775    ) -> Result<bool, LuaError> {
3776        let mut nargs = match nargs {
3777            Some(n) => n,
3778            None => self.top - (func_slot + 1),
3779        };
3780        // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3781        // only for the immediately-following Lua activation. Native dispatch
3782        // (or `__call` resolution) below must not let it leak to the next
3783        // begin_call's frame; restore it just before push_frame for the Lua
3784        // arm so its meaning is preserved across __call chaining.
3785        let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3786        // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3787        // is inserted before the value so it becomes the first argument, and a
3788        // chain of `__call` tables resolves down to a real function.
3789        let mut chain = 0u32;
3790        loop {
3791            match self.stack[func_slot as usize] {
3792                Value::Closure(cl) => {
3793                    // P11-S2c.B JIT fast path: if the Proto's body fits
3794                    // the int-arith whitelist, every arg is `Value::Int`,
3795                    // and the cached arity matches, skip frame setup and
3796                    // run the cached native fn in-place.
3797                    if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
3798                        self.pending_tailcalls = tailcalls;
3799                        return Ok(false);
3800                    }
3801                    self.pending_tailcalls = tailcalls;
3802                    self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
3803                    // P12-S4-step0 — trace-on-call trigger. The frame
3804                    // we just pushed is the callee whose body the
3805                    // recorder will trace. Bump the per-Proto call
3806                    // counter; once it crosses `CALL_HOT_THRESHOLD`
3807                    // and no other trace is in flight, snapshot the
3808                    // callee's register window (R[0..max_stack]) and
3809                    // begin recording at `pc=0`. This is what unlocks
3810                    // tracing for functions whose body has no negative
3811                    // `Op::Jmp` back-edge (`fib`, recursive helpers).
3812                    //
3813                    // Gated on `trace_jit_enabled`, so the default
3814                    // dispatch pays a single not-taken branch.
3815                    if self.jit.trace_enabled {
3816                        let proto = cl.proto;
3817                        let c = proto.call_hot_count.get();
3818                        if c < u32::MAX / 2 {
3819                            proto.call_hot_count.set(c + 1);
3820                        }
3821                        // P13-S13-H — relaxed call-trigger:
3822                        // `c >= THRESHOLD` (was `c == THRESHOLD`) +
3823                        // `!already_cached` short-circuit. Lets a
3824                        // discarded short call-trigger close retry
3825                        // on the next call (fib(10/15/20/25)
3826                        // pathology — first capture is base-case
3827                        // [Lt,Jmp,Return1]; coverage-heuristic
3828                        // discards; next call gets to record at a
3829                        // potentially deeper recursion point).
3830                        // Without `already_cached`, the relaxed
3831                        // condition would re-record over a cached
3832                        // trace every call.
3833                        //
3834                        // P13-S13-K — additionally short-circuit on
3835                        // `proto.trace_gave_up`. The S13-I discard
3836                        // cap force-compiles a partial trace and
3837                        // flips this flag; subsequent calls into
3838                        // this Proto skip the RefCell borrow + Vec
3839                        // scan entirely.
3840                        if proto.trace_gave_up.get() {
3841                            return Ok(true);
3842                        }
3843                        let call_already_cached =
3844                            proto.traces.borrow().iter().any(|t| t.head_pc == 0);
3845                        if c >= crate::jit::trace::CALL_HOT_THRESHOLD
3846                            && self.jit.active_trace.is_none()
3847                            && !call_already_cached
3848                        {
3849                            // The new frame is on top: index in
3850                            // `self.frames` is `len() - 1`.
3851                            let frame_idx = self.frames.len() - 1;
3852                            // Snapshot R[0..max_stack] at the callee's
3853                            // base. `push_frame` resized `self.stack`
3854                            // to `base + max_stack`, so this window is
3855                            // guaranteed in-bounds.
3856                            let f = match &self.frames[frame_idx] {
3857                                CallFrame::Lua(f) => f,
3858                                _ => unreachable!("push_frame just pushed a Lua frame"),
3859                            };
3860                            let max_stack = cl.proto.max_stack as usize;
3861                            let base_us = f.base as usize;
3862                            let mut entry_tags = Vec::with_capacity(max_stack);
3863                            for i in 0..max_stack {
3864                                let (tag, _) = self.stack[base_us + i].unpack();
3865                                entry_tags.push(tag);
3866                            }
3867                            self.jit.active_trace =
3868                                Some(Box::new(crate::jit::trace::TraceRecord::start(
3869                                    cl.proto, 0, entry_tags, true,
3870                                )));
3871                            self.jit.recording_frame_base = frame_idx;
3872                        }
3873                    }
3874                    return Ok(true);
3875                }
3876                Value::Native(nc) => {
3877                    // v1.1 B10 Stage 2 — async-marked NativeClosure.
3878                    // Route through the cooperative-yield mechanism
3879                    // when async_mode is on; reject when called from
3880                    // a sync `eval`/`call_value` path (would have no
3881                    // executor to drive the returned future).
3882                    if nc.is_async {
3883                        if !self.async_mode {
3884                            let s = Value::Str(
3885                                self.heap.intern(b"async native called in sync context"),
3886                            );
3887                            self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
3888                            return Err(LuaError(s));
3889                        }
3890                        // Same root-up bookkeeping as the sync path:
3891                        // pin args + result-count expectation so a
3892                        // collection across the suspend boundary
3893                        // keeps the arg window live.
3894                        self.native_nresults = nresults;
3895                        self.gc_top = func_slot + nargs + 1;
3896                        // v1.3 Phase AS — fire the "call" hook BEFORE
3897                        // building the future. Mirrors the sync native
3898                        // path's `hook_call(true, nargs)` site
3899                        // (`exec.rs` further down) so embedders with a
3900                        // Rust debug hook installed see a Call event
3901                        // for async natives identical to the sync
3902                        // path. The matching "return" hook fires from
3903                        // `commit_async_native_result` in
3904                        // `async_drive.rs` after the future resolves.
3905                        // Placement follows audit §"Open questions"
3906                        // Q6: after the `native_nresults` / `gc_top`
3907                        // pin, before the future is constructed, so a
3908                        // hook body that triggers GC observes the
3909                        // correct pinned window. On hook error the
3910                        // sentinel never returns and
3911                        // `pending_async_native_*` remain `None` —
3912                        // the executor sees `DispatchOutcome::Error`
3913                        // (audit §A.1 edge cases).
3914                        self.hook_call(true, nargs)?;
3915                        // Transmute the stored NativeFn back to its
3916                        // real AsyncNativeFn shape. Sound because
3917                        // `set_async_native` / `create_async_native`
3918                        // installed an AsyncNativeFn through the
3919                        // identically-sized fn-pointer slot, and the
3920                        // `is_async` marker bit is what records that
3921                        // fact.
3922                        let async_fn: crate::vm::async_drive::AsyncNativeFn =
3923                            // SAFETY: same-size fn pointers; provenance
3924                            // preserved through `mem::transmute`. The
3925                            // `is_async` marker is the only safe-to-call
3926                            // gate, set exclusively by
3927                            // `Vm::create_async_native`.
3928                            unsafe { std::mem::transmute(nc.f) };
3929                        let vm_ptr: *mut Vm = self;
3930                        let fut = async_fn(vm_ptr, func_slot, nargs);
3931                        // Stash the future + post-call context for
3932                        // `drive_one` to surface to `EvalFuture::poll`.
3933                        self.pending_async_native_fut = Some(fut);
3934                        self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
3935                            func_slot,
3936                            nargs,
3937                            nresults,
3938                            gc_top: self.gc_top,
3939                        });
3940                        // Sentinel Err walked up to `drive_one` (same
3941                        // shape as `host_yield_pending`'s budget yield).
3942                        // Value::Nil — never seen by user code.
3943                        return Err(LuaError(Value::Nil));
3944                    }
3945                    // pcall/xpcall are yieldable: rather than calling the
3946                    // protected function through the Rust stack (which cannot be
3947                    // suspended), push a continuation frame and drive the call
3948                    // through the interpreter loop (PUC lua_pcallk). A yield
3949                    // inside it is preserved with the thread's saved frames.
3950                    use crate::runtime::value::NativeFn;
3951                    if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
3952                        return self.begin_pcall(func_slot, nargs, nresults);
3953                    }
3954                    if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
3955                        return self.begin_xpcall(func_slot, nargs, nresults);
3956                    }
3957                    // pairs(t) with a __pairs metamethod calls it yieldably (PUC
3958                    // luaB_pairs); without one, fall through to the plain native.
3959                    if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
3960                        let arg = self.stack[(func_slot + 1) as usize];
3961                        if !self.get_mm(arg, Mm::Pairs).is_nil() {
3962                            return self.begin_pairs(func_slot, nresults);
3963                        }
3964                    }
3965                    // a native that collects (e.g. `collectgarbage`) roots up to
3966                    // its own arguments — the caller's live registers all sit
3967                    // below `func_slot` and stay rooted.
3968                    self.native_nresults = nresults;
3969                    self.gc_top = func_slot + nargs + 1;
3970                    // Push the native onto the running-natives chain BEFORE
3971                    // firing the call hook so that `debug.getinfo(level)` and
3972                    // `arg_error` from inside the hook see this native as the
3973                    // currently-running C function (db.lua :344 reads
3974                    // `getinfo(2, "f").func` for the just-entered callee).
3975                    // Popped after the matching return hook fires — even on
3976                    // error, the pop must happen, so the body is bracketed
3977                    // through a scope guard.
3978                    self.running_natives.push(nc);
3979                    self.running_native_slots.push((func_slot, nargs));
3980                    // PUC luaD_precall fires the "call" hook for C functions too.
3981                    // A yield inside the native (coroutine.yield) propagates an
3982                    // Err and the matching "return" hook fires on resume instead.
3983                    if let Err(e) = self.hook_call(true, nargs) {
3984                        self.running_natives.pop();
3985                        self.running_native_slots.pop();
3986                        return Err(e);
3987                    }
3988                    // P09: trap a Rust panic in the native and surface it as
3989                    // a Lua error rather than letting it unwind through the
3990                    // VM into the embedder. The VM's internal state may still
3991                    // be inconsistent after a panic (half-pushed args,
3992                    // dangling GC references), so embedders that catch this
3993                    // class of error should drop and re-create the Vm — but
3994                    // it's still better than tearing the host process down.
3995                    // `AssertUnwindSafe` is sound because the caller is the
3996                    // dispatch loop and any half-done state is fenced behind
3997                    // the immediate Err return below.
3998                    use std::panic::{AssertUnwindSafe, catch_unwind};
3999                    let result =
4000                        match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4001                            Ok(r) => r,
4002                            Err(payload) => {
4003                                let msg = panic_payload_str(&payload);
4004                                let s = Value::Str(
4005                                    self.heap.intern(format!("native panic: {msg}").as_bytes()),
4006                                );
4007                                Err(LuaError(s))
4008                            }
4009                        };
4010                    let nret = match result {
4011                        Ok(n) => n,
4012                        Err(e) => {
4013                            // Stash the offending native's name BEFORE the
4014                            // pop so a dying coroutine's traceback snapshot
4015                            // can prepend `[C]: in function '<name>'`. Use
4016                            // pushglobalfuncname (PUC walks package.loaded
4017                            // to qualify); fall back to "?".
4018                            self.errored_native =
4019                                Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4020                            self.running_natives.pop();
4021                            self.running_native_slots.pop();
4022                            return Err(e);
4023                        }
4024                    };
4025                    // PUC `luaD_poscall` fires the return hook BEFORE moving
4026                    // results into the function's slot — at that point args
4027                    // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4028                    // results above them at `[func_slot + 1 + nargs, …)`.
4029                    // luna's `nat_return` has already written the results
4030                    // into `[func_slot, func_slot + nret)`, so we replay PUC's
4031                    // layout by copying the results up past the preserved
4032                    // args, firing the hook (with ftransfer = nargs + 1, so
4033                    // `getlocal(2, ftransfer..)` reads results), and then
4034                    // copying back for `finish_results`. db.lua :541 reads
4035                    // `getinfo("r").ftransfer` + `getlocal` to inspect a
4036                    // returning native's results this way.
4037                    if self.hook.ret
4038                        && !self.in_hook
4039                        && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4040                    {
4041                        let res_dst = func_slot + nargs + 1;
4042                        let need = (res_dst + nret) as usize;
4043                        if self.stack.len() < need {
4044                            self.stack.resize(need, Value::Nil);
4045                        }
4046                        for i in (0..nret).rev() {
4047                            self.stack[(res_dst + i) as usize] =
4048                                self.stack[(func_slot + i) as usize];
4049                        }
4050                        // widen the C-frame's argument window for getlocal
4051                        if let Some(slot) = self.running_native_slots.last_mut() {
4052                            slot.1 = nargs + nret;
4053                        }
4054                        let hr = self.hook_return(true, nargs + 1, nret);
4055                        if let Some(slot) = self.running_native_slots.last_mut() {
4056                            slot.1 = nargs;
4057                        }
4058                        // restore results into the slot finish_results expects
4059                        for i in 0..nret {
4060                            self.stack[(func_slot + i) as usize] =
4061                                self.stack[(res_dst + i) as usize];
4062                        }
4063                        self.running_natives.pop();
4064                        self.running_native_slots.pop();
4065                        hr?;
4066                    } else {
4067                        self.running_natives.pop();
4068                        self.running_native_slots.pop();
4069                    }
4070                    self.finish_results(func_slot, nret, nresults);
4071                    // the native may have allocated; collect with the results as
4072                    // the live boundary (PUC checks GC after a call returns).
4073                    self.maybe_collect_garbage(self.top);
4074                    return Ok(false);
4075                }
4076                v => {
4077                    let mm = self.get_mm(v, Mm::Call);
4078                    if mm.is_nil() {
4079                        return Err(self.call_err(v));
4080                    }
4081                    chain += 1;
4082                    // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4083                    // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4084                    // and the 5.5 test exercises the new tight bound directly
4085                    // (calls.lua :225 builds a 16-deep chain and expects the
4086                    // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4087                    // deep chain and expects it to succeed.
4088                    let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4089                        15
4090                    } else {
4091                        MAX_CCMT
4092                    };
4093                    if chain > cap {
4094                        return Err(self.rt_err("'__call' chain too long"));
4095                    }
4096                    // slots above shift by one; at a call site those are dead
4097                    // temps of the current frame
4098                    self.stack.insert(func_slot as usize, mm);
4099                    if self.top > func_slot {
4100                        self.top += 1;
4101                    }
4102                    nargs += 1;
4103                }
4104            }
4105        }
4106    }
4107
4108    fn push_frame(
4109        &mut self,
4110        cl: Gc<LuaClosure>,
4111        func_slot: u32,
4112        nargs: u32,
4113        nresults: i32,
4114        from_c: bool,
4115    ) -> Result<(), LuaError> {
4116        if func_slot + 256 > MAX_LUA_STACK {
4117            // PUC `stackerror`: a stack overflow that surfaces while the
4118            // current activation is inside an xpcall message handler is
4119            // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4120            // error handling". errors.lua :606 expects the inner pcall(loop)
4121            // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4122            // message matching "error handling".
4123            let msg = if self.msgh_depth > 0 {
4124                "error in error handling"
4125            } else {
4126                "stack overflow"
4127            };
4128            return Err(self.rt_err(msg));
4129        }
4130        let proto = cl.proto;
4131        let nparams = proto.num_params as u32;
4132        // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4133        // stack just below the new `base`, so a named vararg can be indexed
4134        // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4135        // `[e1..em][p1..pn]` so the fixed params land at the new base.
4136        let n_varargs = if proto.is_vararg {
4137            nargs.saturating_sub(nparams)
4138        } else {
4139            0
4140        };
4141        if n_varargs > 0 {
4142            let s = (func_slot + 1) as usize;
4143            self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4144        }
4145        let base = func_slot + 1 + n_varargs;
4146        let need = (base + proto.max_stack as u32) as usize;
4147        if self.stack.len() < need {
4148            self.stack.resize(need, Value::Nil);
4149        }
4150        // wipe the register window beyond the kept parameters (stale values —
4151        // required for GC-safety and codegen). The varargs below `base` survive.
4152        let kept = nargs.saturating_sub(n_varargs).min(nparams);
4153        // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4154        // need` since `base + nparams <= base + max_stack = need` and `kept <=
4155        // nparams`. `slice::fill` lowers to a single memset on Copy types.
4156        unsafe {
4157            self.stack
4158                .get_unchecked_mut((base + kept) as usize..need)
4159                .fill(Value::Nil);
4160        }
4161        frames_push_sync(
4162            &mut self.frames,
4163            &mut self.frames_top,
4164            CallFrame::Lua(Frame {
4165                closure: cl,
4166                base,
4167                pc: 0,
4168                func_slot,
4169                nresults,
4170                hook_oldpc: u32::MAX,
4171                from_c,
4172                n_varargs,
4173                // single-shot consume: `close_slots` sets pending_tm before each
4174                // handler call; the next Lua frame born is that handler's.
4175                tm: self.pending_tm.take(),
4176                // `run_hook` sets `pending_is_hook` before dispatching the user
4177                // hook so its frame reports `namewhat = "hook"` via getinfo.
4178                is_hook: std::mem::take(&mut self.pending_is_hook),
4179                tailcalls: std::mem::take(&mut self.pending_tailcalls),
4180            }),
4181        );
4182        // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4183        // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4184        // the slot at `base + nparams`; the extras sit just below `base` from
4185        // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4186        // hook; vararg.lua's contradictory expectations were already going to
4187        // fail either way (some asserts want `arg == nil`).
4188        if proto.has_compat_vararg_arg {
4189            let arg_slot = (base + nparams) as usize;
4190            let t = self.heap.new_table();
4191            {
4192                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4193                let tm = unsafe { t.as_mut() };
4194                for i in 0..n_varargs {
4195                    let v = self.stack[(base - n_varargs + i) as usize];
4196                    // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4197                    // below `MAX_ASIZE`
4198                    let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4199                }
4200                let nk = Value::Str(self.heap.intern(b"n"));
4201                tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4202                    .expect("'n' key");
4203            }
4204            // once-per-table barrier mirrors SETLIST: t is born BLACK during
4205            // Propagate and the bulk `set_int`/`set` calls above don't barrier
4206            self.heap
4207                .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4208            self.stack[arg_slot] = Value::Table(t);
4209        }
4210        // PUC luaD_precall fires the "call" hook with the new frame current, so
4211        // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4212        // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4213        // fixed params count — extras already live below `base`).
4214        // A frame born via OP_TailCall fires "tail call" instead (PUC
4215        // luaD_pretailcall) and skips the matching "return" hook on exit.
4216        let is_tail = self
4217            .frames
4218            .last()
4219            .and_then(|f| f.lua())
4220            .is_some_and(|f| f.tailcalls > 0);
4221        self.hook_call_with(false, nparams, is_tail)?;
4222        Ok(())
4223    }
4224
4225    /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4226    /// the protected call `f` through the interpreter loop. The protected
4227    /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4228    /// at `func_slot+1` lets its results land one slot above the continuation —
4229    /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4230    /// Always returns `Ok(true)`: a continuation is now on the stack to be
4231    /// resolved by the loop (even when `f` is a native that already ran inline).
4232    fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4233        if nargs == 0 {
4234            return Err(crate::vm::builtins::raise_str(
4235                self,
4236                "bad argument #1 to 'pcall' (value expected)",
4237            ));
4238        }
4239        if self.pcall_depth >= MAX_C_DEPTH {
4240            return Err(self.rt_err("C stack overflow"));
4241        }
4242        self.pcall_depth += 1;
4243        frames_push_sync(
4244            &mut self.frames,
4245            &mut self.frames_top,
4246            CallFrame::Cont(NativeCont {
4247                kind: ContKind::Pcall,
4248                func_slot,
4249                nresults,
4250            }),
4251        );
4252        // call f (slot func_slot+1) with the remaining args, asking for all
4253        // results; a yield or error inside propagates with the continuation kept
4254        // on the stack (caught by `unwind` / preserved across a yield).
4255        self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4256        Ok(true)
4257    }
4258
4259    /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4260    /// message handler is stashed in the continuation and the arguments are
4261    /// shifted down over the handler's slot so `f`'s args are contiguous.
4262    fn begin_xpcall(
4263        &mut self,
4264        func_slot: u32,
4265        nargs: u32,
4266        nresults: i32,
4267    ) -> Result<bool, LuaError> {
4268        if nargs < 2 {
4269            return Err(crate::vm::builtins::raise_str(
4270                self,
4271                "bad argument #2 to 'xpcall' (value expected)",
4272            ));
4273        }
4274        if self.pcall_depth >= MAX_C_DEPTH {
4275            return Err(self.rt_err("C stack overflow"));
4276        }
4277        self.pcall_depth += 1;
4278        // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4279        // close its gap so f's args become [f@+1, a1@+2, ...].
4280        let handler = self.stack[(func_slot + 2) as usize];
4281        let nfargs = nargs - 2;
4282        for i in 0..nfargs {
4283            self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4284        }
4285        self.top = func_slot + 2 + nfargs;
4286        frames_push_sync(
4287            &mut self.frames,
4288            &mut self.frames_top,
4289            CallFrame::Cont(NativeCont {
4290                kind: ContKind::Xpcall { handler },
4291                func_slot,
4292                nresults,
4293            }),
4294        );
4295        self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4296        Ok(true)
4297    }
4298
4299    /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4300    /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4301    /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4302    /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4303    /// results land exactly where `pairs`'s results belong.
4304    fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4305        let arg = self.stack[(func_slot + 1) as usize];
4306        let mm = self.get_mm(arg, Mm::Pairs);
4307        // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4308        self.stack[func_slot as usize] = mm;
4309        self.top = func_slot + 2;
4310        frames_push_sync(
4311            &mut self.frames,
4312            &mut self.frames_top,
4313            CallFrame::Cont(NativeCont {
4314                kind: ContKind::Pairs,
4315                func_slot,
4316                nresults,
4317            }),
4318        );
4319        self.begin_call(func_slot, Some(1), 4, true)?;
4320        Ok(true)
4321    }
4322
4323    /// The running (top) Lua frame. The interpreter only reads this while a Lua
4324    /// frame is on top — a continuation frame is never the running frame (it is
4325    /// consumed the instant the call it protects unwinds onto it).
4326    #[inline]
4327    fn top_frame(&self) -> &Frame {
4328        self.frames
4329            .last()
4330            .and_then(CallFrame::lua)
4331            .expect("running Lua frame")
4332    }
4333
4334    #[inline]
4335    fn top_frame_mut(&mut self) -> &mut Frame {
4336        self.frames
4337            .last_mut()
4338            .and_then(CallFrame::lua_mut)
4339            .expect("running Lua frame")
4340    }
4341
4342    /// Pad/announce results sitting at func_slot.
4343    pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4344        if wanted < 0 {
4345            self.top = func_slot + nret;
4346        } else {
4347            let wanted = wanted as u32;
4348            let need = (func_slot + wanted) as usize;
4349            if self.stack.len() < need {
4350                self.stack.resize(need, Value::Nil);
4351            }
4352            for i in nret..wanted {
4353                self.stack[(func_slot + i) as usize] = Value::Nil;
4354            }
4355            self.top = func_slot + wanted;
4356        }
4357    }
4358
4359    /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4360    /// Used by `EvalFuture` on the bootstrap poll to compute the
4361    /// `entry_depth` it will pass to subsequent resume slices.
4362    pub(crate) fn frame_count(&self) -> usize {
4363        self.frames.len()
4364    }
4365
4366    fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4367        let nret = self.top - func_slot;
4368        let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4369        self.stack.truncate(func_slot as usize);
4370        self.top = func_slot;
4371        out
4372    }
4373
4374    // ---- open upvalues ----
4375
4376    #[doc(hidden)]
4377    pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4378        match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4379            Ok(i) => self.open_upvals[i].1,
4380            Err(i) => {
4381                let uv = self.heap.new_upvalue(UpvalState::Open {
4382                    slot,
4383                    thread: self.current,
4384                });
4385                self.open_upvals.insert(i, (slot, uv));
4386                uv
4387            }
4388        }
4389    }
4390
4391    pub(crate) fn close_from(&mut self, slot: u32) {
4392        while let Some(&(s, uv)) = self.open_upvals.last() {
4393            if s < slot {
4394                break;
4395            }
4396            let v = self.stack[s as usize];
4397            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4398            unsafe { uv.as_mut() }.set_closed(v);
4399            self.heap
4400                .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4401            self.open_upvals.pop();
4402        }
4403    }
4404
4405    /// Register a to-be-closed slot (TBC op / generic-for closing value).
4406    fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4407        let v = self.stack[slot as usize];
4408        if matches!(v, Value::Nil | Value::Bool(false)) {
4409            return Ok(()); // nil and false are silently ignored
4410        }
4411        if self.get_mm(v, Mm::Close).is_nil() {
4412            // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4413            // (a <type> value)"; the local's name comes from the running
4414            // frame's locvars at this pc.
4415            let tn = v.type_name();
4416            let f = self.top_frame();
4417            let reg = slot - f.base;
4418            let pc = (f.pc as usize).saturating_sub(1);
4419            let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4420                Some(n) => format!("variable '{n}'"),
4421                None => "to-be-closed slot".to_string(),
4422            };
4423            return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4424        }
4425        debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4426        self.tbc.push(slot);
4427        Ok(())
4428    }
4429
4430    /// Close upvalues and run `__close` handlers for slots ≥ `from`
4431    /// (handlers in reverse registration order; PUC luaF_close).
4432    fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4433        self.close_from(from);
4434        // PUC: handlers run in reverse declaration order; an error raised by a
4435        // handler becomes the error object passed to the remaining ones, and
4436        // the rest are still closed. The last raised error propagates.
4437        let mut pending = err;
4438        let mut result = Ok(());
4439        let saved_err = self.closing_err;
4440        // On a normal close the handler runs within the closing function's
4441        // activation (debug parent = that function); during error unwinding the
4442        // function's frame is already gone, so the handler sits at the C
4443        // boundary instead (PUC: luaF_close runs after the ci is restored).
4444        let error_close = err.is_some();
4445        while let Some(&s) = self.tbc.last() {
4446            if s < from {
4447                break;
4448            }
4449            self.tbc.pop();
4450            let v = self.stack[s as usize];
4451            if matches!(v, Value::Nil | Value::Bool(false)) {
4452                continue;
4453            }
4454            let mm = self.get_mm(v, Mm::Close);
4455            if mm.is_nil() {
4456                // PUC `prepclosingmethod`: the __close metamethod was present
4457                // at OP_TBC (else we would have errored there) but has since
4458                // been removed/replaced. Treat as a non-callable target.
4459                let tn = self.obj_typename(v);
4460                let e = self.rt_err(&format!(
4461                    "attempt to call a {tn} value (metamethod 'close')"
4462                ));
4463                pending = Some(e.0);
4464                result = Err(e);
4465                continue;
4466            }
4467            // root the pending error: a handler may trigger a collection
4468            self.closing_err = pending;
4469            // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4470            // getinfo report the handler as "in metamethod 'close'". Saved/
4471            // restored around the call to cover the path where `mm` is a
4472            // native (`push_frame` never consumes it) or it raises before
4473            // reaching push_frame.
4474            let saved_tm = self.pending_tm.replace("close");
4475            // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4476            // is nil on a normal close (5.4 locals.lua :875's
4477            // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4478            // through the yield). PUC 5.5 dropped the trailing nil: a clean
4479            // close passes only `obj`, the error case still passes both
4480            // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4481            // normal-close arms, n=2 for the error arm).
4482            let call = match pending {
4483                Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4484                None => {
4485                    if self.version >= LuaVersion::Lua55 {
4486                        self.call_value_impl(mm, &[v], error_close)
4487                    } else {
4488                        self.call_value_impl(mm, &[v, Value::Nil], error_close)
4489                    }
4490                }
4491            };
4492            self.pending_tm = saved_tm;
4493            if let Err(e) = call {
4494                pending = Some(e.0);
4495                result = Err(e);
4496            }
4497        }
4498        self.closing_err = saved_err;
4499        result
4500    }
4501
4502    /// Yieldable variant of `close_slots`: drive the chain of `__close`
4503    /// handlers for slots ≥ `from` through the interpreter loop with a
4504    /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4505    /// suspends cleanly (the close iteration's state rides on the thread's
4506    /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4507    /// applied to `luaF_close`. `after` runs when every slot is closed; if
4508    /// `after` is `Return` and we've returned past `entry_depth`,
4509    /// `Ok(Some(vals))` carries the result up to the host caller.
4510    fn begin_close(
4511        &mut self,
4512        from: u32,
4513        err: Option<Value>,
4514        after: AfterClose,
4515        entry_depth: usize,
4516    ) -> Result<Option<Vec<Value>>, LuaError> {
4517        self.close_from(from);
4518        self.drive_close(from, err, after, entry_depth)
4519    }
4520
4521    /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4522    /// non-callable-mm error for an `__close` that was reset to a bad value
4523    /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4524    /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4525    /// the interpreter then drives the handler and re-enters this driver via
4526    /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4527    /// the threaded error (if any) propagates or `after` fires.
4528    fn drive_close(
4529        &mut self,
4530        from: u32,
4531        mut pending: Option<Value>,
4532        after: AfterClose,
4533        entry_depth: usize,
4534    ) -> Result<Option<Vec<Value>>, LuaError> {
4535        loop {
4536            let drained = match self.tbc.last() {
4537                None => true,
4538                Some(&s) => s < from,
4539            };
4540            if drained {
4541                return self.finish_close_after(after, pending, entry_depth);
4542            }
4543            let s = self.tbc.pop().expect("tbc non-empty");
4544            let v = self.stack[s as usize];
4545            if matches!(v, Value::Nil | Value::Bool(false)) {
4546                continue;
4547            }
4548            let mm = self.get_mm(v, Mm::Close);
4549            if mm.is_nil() {
4550                let tn = self.obj_typename(v);
4551                let e = self.rt_err(&format!(
4552                    "attempt to call a {tn} value (metamethod 'close')"
4553                ));
4554                pending = Some(e.0);
4555                continue;
4556            }
4557            // A real handler: stage [mm, v, (err?)] above the current top,
4558            // record the close iteration state in a Cont::Close, and let the
4559            // interpreter dispatch the handler. On return the run() head
4560            // re-enters this driver via the Cont::Close consumer.
4561            let func_slot = self.top;
4562            let error_close = pending.is_some();
4563            let need = (func_slot + 3) as usize;
4564            if self.stack.len() < need {
4565                self.stack.resize(need, Value::Nil);
4566            }
4567            self.stack[func_slot as usize] = mm;
4568            self.stack[func_slot as usize + 1] = v;
4569            // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4570            // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4571            let nargs = match pending {
4572                Some(e) => {
4573                    self.stack[func_slot as usize + 2] = e;
4574                    2u32
4575                }
4576                None => {
4577                    if self.version >= LuaVersion::Lua55 {
4578                        1u32
4579                    } else {
4580                        self.stack[func_slot as usize + 2] = Value::Nil;
4581                        2u32
4582                    }
4583                }
4584            };
4585            self.top = func_slot + 1 + nargs;
4586            // Root the pending error during the call (a handler may collect).
4587            let saved_err = self.closing_err;
4588            self.closing_err = pending;
4589            // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4590            // for traceback / getinfo.
4591            let saved_tm = self.pending_tm.replace("close");
4592            frames_push_sync(
4593                &mut self.frames,
4594                &mut self.frames_top,
4595                CallFrame::Cont(NativeCont {
4596                    kind: ContKind::Close(CloseCont {
4597                        from,
4598                        pending,
4599                        after,
4600                    }),
4601                    func_slot,
4602                    nresults: 0,
4603                }),
4604            );
4605            // PUC luaF_close runs a normal close *within* the closing
4606            // function's activation (debug parent = that function); during an
4607            // error unwind the function's frame is already gone and the
4608            // handler sits at the C boundary instead.
4609            let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4610            self.pending_tm = saved_tm;
4611            self.closing_err = saved_err;
4612            r?;
4613            return Ok(None);
4614        }
4615    }
4616
4617    /// Fire `after` once every `__close` handler has run. `Block` propagates
4618    /// any remaining error or simply continues; `Return` performs OP_Return's
4619    /// tail (hook + frame pop + result delivery) and may surface results to
4620    /// the host when the function whose return triggered the close was the
4621    /// entry activation, but only on a clean drain — a pending error skips
4622    /// the return tail and propagates instead. `ResumeUnwind` pops the
4623    /// deferred Lua frame and re-raises, letting a handler's own error win
4624    /// over the original propagating one (PUC luaF_close).
4625    fn finish_close_after(
4626        &mut self,
4627        after: AfterClose,
4628        pending: Option<Value>,
4629        entry_depth: usize,
4630    ) -> Result<Option<Vec<Value>>, LuaError> {
4631        match after {
4632            AfterClose::Block => match pending {
4633                Some(e) => Err(LuaError(e)),
4634                None => Ok(None),
4635            },
4636            AfterClose::Return {
4637                abs_a,
4638                nret,
4639                from_native,
4640            } => match pending {
4641                Some(e) => Err(LuaError(e)),
4642                None => self.complete_return(abs_a, nret, from_native, entry_depth),
4643            },
4644            AfterClose::ResumeUnwind { func_slot, err } => {
4645                // The aborting Lua frame was popped before `begin_close`;
4646                // restore the catcher's stack window down to `func_slot` and
4647                // re-raise — preferring a handler-raised error over the
4648                // original (PUC luaF_close).
4649                self.stack.truncate(func_slot as usize);
4650                self.top = func_slot;
4651                self.tbc.retain(|&s| s < func_slot);
4652                Err(LuaError(pending.unwrap_or(err)))
4653            }
4654        }
4655    }
4656
4657    /// OP_Return's post-close tail: fire the "return" hook (frame still
4658    /// current), pop the Lua frame, slide results into `func_slot`, then
4659    /// either hand them to the host (`Ok(Some(vals))` when we've returned
4660    /// past `entry_depth`), leave them contiguous for an exposed
4661    /// pcall/xpcall continuation, or finish into the caller's expected
4662    /// result slot. Mirrors the synchronous OP_Return tail so both paths
4663    /// share semantics — the `from_native` flag selects the right "return"
4664    /// hook context for `hook_return`.
4665    fn complete_return(
4666        &mut self,
4667        abs_a: u32,
4668        nret: u32,
4669        from_native: bool,
4670        entry_depth: usize,
4671    ) -> Result<Option<Vec<Value>>, LuaError> {
4672        // ftransfer is the local index (1-based) of the first result, as
4673        // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4674        // exposes locals starting at `frame.base` (= func_slot + 1 +
4675        // n_varargs for a vararg call), so the conversion is the absolute
4676        // result slot minus base, plus one to make it 1-based. db.lua 5.4
4677        // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4678        // shape end-to-end.
4679        let ftransfer = self
4680            .frames
4681            .last()
4682            .and_then(CallFrame::lua)
4683            .map(|fr| {
4684                let raw = abs_a.saturating_sub(fr.base) + 1;
4685                // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4686                // local injected at index `numparams + 1`, so getlocal
4687                // numbering shifts results past it (5.5 db.lua :539
4688                // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4689                if fr.closure.proto.has_vararg_table_pseudo {
4690                    raw + 1
4691                } else {
4692                    raw
4693                }
4694            })
4695            .unwrap_or(1);
4696        // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4697        // per tail call that collapsed into this activation, *after* its
4698        // own "return". `tailcalls` tracks that count exactly (PUC
4699        // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4700        // "return" hook fires once even when the activation absorbed
4701        // multiple tail calls — only `istailcall` on getinfo surfaces the
4702        // collapse. 5.1 db.lua :366 pins the event ordering.
4703        let tailcalls = if self.version <= LuaVersion::Lua51 {
4704            self.frames
4705                .last()
4706                .and_then(|f| f.lua())
4707                .map(|f| f.tailcalls)
4708                .unwrap_or(0)
4709        } else {
4710            0
4711        };
4712        self.hook_return(from_native, ftransfer, nret)?;
4713        for _ in 0..tailcalls {
4714            self.hook_tail_return()?;
4715        }
4716        let CallFrame::Lua(fr) =
4717            frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4718        else {
4719            unreachable!("returning from a non-Lua frame")
4720        };
4721        for i in 0..nret {
4722            self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4723        }
4724        if self.frames.len() < entry_depth {
4725            self.top = fr.func_slot + nret;
4726            return Ok(Some(self.take_results(fr.func_slot)));
4727        } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4728            self.top = fr.func_slot + nret;
4729        } else {
4730            self.finish_results(fr.func_slot, nret, fr.nresults);
4731        }
4732        Ok(None)
4733    }
4734
4735    #[doc(hidden)]
4736    pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4737        match cl.upvals()[idx as usize].state() {
4738            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4739            UpvalState::Closed(v) => v,
4740        }
4741    }
4742
4743    fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4744        let uv = cl.upvals()[idx as usize];
4745        match uv.state() {
4746            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4747            UpvalState::Closed(_) => {
4748                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4749                unsafe { uv.as_mut() }.set_closed(v);
4750                // forward barrier: a closed upvalue is single-slot, so the
4751                // forward variant is cheaper than barrier_back (PUC uses
4752                // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4753                // tables / threads).
4754                self.heap
4755                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4756            }
4757        }
4758    }
4759
4760    // ---- register / error helpers ----
4761
4762    #[inline(always)]
4763    fn r(&self, base: u32, i: u32) -> Value {
4764        // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4765        // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4766        // and every bytecode-generated reference falls within `[0, max_stack)`.
4767        // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4768        // reason. The bounds check would re-validate this invariant on every
4769        // op — the dispatch hot path can't afford it.
4770        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4771        unsafe { *self.stack.get_unchecked((base + i) as usize) }
4772    }
4773
4774    #[inline(always)]
4775    fn set_r(&mut self, base: u32, i: u32, v: Value) {
4776        // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
4777        // frame-entry contract.
4778        unsafe {
4779            *self.stack.get_unchecked_mut((base + i) as usize) = v;
4780        }
4781    }
4782
4783    #[doc(hidden)]
4784    pub fn rt_err(&mut self, msg: &str) -> LuaError {
4785        let text = match self.position_prefix() {
4786            Some(p) => format!("{p}{msg}"),
4787            None => msg.to_string(),
4788        };
4789        LuaError(Value::Str(self.heap.intern(text.as_bytes())))
4790    }
4791
4792    pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
4793        let extra = self.subject_varinfo(v);
4794        let tn = self.obj_typename(v);
4795        self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
4796    }
4797
4798    /// Name the offending operand of the current instruction (PUC varinfo) for
4799    /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
4800    /// to the instruction's subject register(s); a native-raised error whose
4801    /// current instruction doesn't hold `bad` simply yields "".
4802    fn subject_varinfo(&self, bad: Value) -> String {
4803        use crate::vm::isa::Op;
4804        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
4805            return String::new();
4806        };
4807        let proto = f.closure.proto;
4808        let p: &crate::runtime::Proto = &proto;
4809        let pc = f.pc as usize;
4810        if pc == 0 || pc > p.code.len() {
4811            return String::new();
4812        }
4813        let instr = p.code[pc - 1];
4814        let mut cands: Vec<u32> = Vec::new();
4815        match instr.op() {
4816            // indexed reads / length / method: the table/object is in B
4817            Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
4818                cands.push(instr.b());
4819            }
4820            // indexed writes / calls: the table/function is in A
4821            Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
4822                cands.push(instr.a());
4823            }
4824            // arithmetic/bitwise: a register operand (B, and C unless constant)
4825            Op::Add
4826            | Op::Sub
4827            | Op::Mul
4828            | Op::Div
4829            | Op::Mod
4830            | Op::Pow
4831            | Op::IDiv
4832            | Op::BAnd
4833            | Op::BOr
4834            | Op::BXor
4835            | Op::Shl
4836            | Op::Shr => {
4837                cands.push(instr.b());
4838                if !instr.k() {
4839                    cands.push(instr.c());
4840                }
4841            }
4842            Op::Unm | Op::BNot => cands.push(instr.b()),
4843            Op::Concat => {
4844                let a = instr.a();
4845                for r in a..a + instr.b() {
4846                    cands.push(r);
4847                }
4848            }
4849            _ => {}
4850        }
4851        for reg in cands {
4852            if self.r(f.base, reg).raw_eq(bad) {
4853                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
4854                    Some((kind, name)) => format!(" ({kind} '{name}')"),
4855                    None => String::new(),
4856                };
4857            }
4858        }
4859        String::new()
4860    }
4861
4862    /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
4863    /// for the call target: "(global 'f')" for a direct call, or "(metamethod
4864    /// 'add')" when the call is a metamethod dispatched by the current opcode.
4865    fn call_err(&mut self, v: Value) -> LuaError {
4866        let extra = self.call_target_varinfo(v);
4867        let tn = self.obj_typename(v);
4868        self.rt_err(&format!("attempt to call a {tn} value{extra}"))
4869    }
4870
4871    /// Name the offending call target. A metamethod dispatch pushes a `Cont`
4872    /// frame before the call, so the opcode that triggered it lives in the
4873    /// nearest *Lua* frame — read that instruction: OP_CALL names the function
4874    /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
4875    fn call_target_varinfo(&self, bad: Value) -> String {
4876        use crate::vm::isa::Op;
4877        let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
4878            return String::new();
4879        };
4880        let proto = f.closure.proto;
4881        let p: &crate::runtime::Proto = &proto;
4882        let pc = f.pc as usize;
4883        if pc == 0 || pc > p.code.len() {
4884            return String::new();
4885        }
4886        let instr = p.code[pc - 1];
4887        match instr.op() {
4888            Op::Call | Op::TailCall => {
4889                let reg = instr.a();
4890                if self.r(f.base, reg).raw_eq(bad) {
4891                    match crate::vm::objname::getobjname(p, pc - 1, reg) {
4892                        Some((kind, name)) => format!(" ({kind} '{name}')"),
4893                        None => String::new(),
4894                    }
4895                } else {
4896                    String::new()
4897                }
4898            }
4899            op => match mm_event_name(op) {
4900                Some(ev) => format!(" (metamethod '{ev}')"),
4901                None => String::new(),
4902            },
4903        }
4904    }
4905
4906    /// "number has no integer representation", enriched (PUC luaG_tointerror)
4907    /// with a "(field 'x')"-style suffix naming the offending operand of the
4908    /// current arithmetic instruction when it can be recovered from bytecode.
4909    fn no_int_rep_err(&mut self) -> LuaError {
4910        let extra = self.bad_operand_varinfo();
4911        self.rt_err(&format!("number{extra} has no integer representation"))
4912    }
4913
4914    /// Inspect the current frame's faulting instruction: find the register
4915    /// operand holding a float with no integer representation and name it.
4916    fn bad_operand_varinfo(&self) -> String {
4917        let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
4918            return String::new();
4919        };
4920        let proto = f.closure.proto;
4921        let p: &crate::runtime::Proto = &proto;
4922        let pc = f.pc as usize;
4923        if pc == 0 || pc > p.code.len() {
4924            return String::new();
4925        }
4926        let instr = p.code[pc - 1];
4927        let mut regs = vec![instr.b()];
4928        if !instr.k() {
4929            regs.push(instr.c());
4930        }
4931        for reg in regs {
4932            let v = self.r(f.base, reg);
4933            if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
4934                return match crate::vm::objname::getobjname(p, pc - 1, reg) {
4935                    Some((kind, name)) => format!(" ({kind} '{name}')"),
4936                    None => String::new(),
4937                };
4938            }
4939        }
4940        String::new()
4941    }
4942
4943    /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
4944    /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
4945    /// frame is a C function (e.g. a pcall Cont parked above `require`'s
4946    /// native call), PUC pushes no prefix — match that by looking only at the
4947    /// topmost frame directly and bailing if it is anything but a Lua frame.
4948    pub(crate) fn position_prefix(&self) -> Option<String> {
4949        let f = self.frames.last().and_then(CallFrame::lua)?;
4950        let proto = f.closure.proto;
4951        if proto.source.as_bytes().is_empty() {
4952            return Some(self.stripped_prefix());
4953        }
4954        if proto.lines.is_empty() {
4955            return None;
4956        }
4957        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
4958        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4959        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
4960        let display = crate::vm::lib_debug::chunk_id(raw);
4961        let src = String::from_utf8_lossy(&display).into_owned();
4962        Some(format!("{src}:{line}: "))
4963    }
4964
4965    /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
4966    /// for the source and renders the line as "?" (so the prefix reads
4967    /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
4968    /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
4969    /// matches `^%?:%-1:`).
4970    fn stripped_prefix(&self) -> String {
4971        if self.version >= crate::version::LuaVersion::Lua55 {
4972            "?:?: ".to_string()
4973        } else {
4974            "?:-1: ".to_string()
4975        }
4976    }
4977
4978    /// Position prefix of the Lua frame `level` steps up from the running C
4979    /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
4980    /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
4981    /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
4982    /// caller's frame is reported even across pcall/xpcall continuations.
4983    pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
4984        let fi = match self.dbg_frame(level)? {
4985            DbgKind::Lua(fi) => fi,
4986            DbgKind::C(_) | DbgKind::Tail(_) => return None,
4987        };
4988        let f = self.frames[fi].lua()?;
4989        let proto = f.closure.proto;
4990        // PUC luaG_addinfo: a stripped chunk has no source — see
4991        // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
4992        if proto.source.as_bytes().is_empty() {
4993            return Some(self.stripped_prefix());
4994        }
4995        // a stripped chunk carries no per-instruction line info
4996        if proto.lines.is_empty() {
4997            return None;
4998        }
4999        let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5000        // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5001        // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5002        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5003        let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5004        let display = crate::vm::lib_debug::chunk_id(raw);
5005        let src = String::from_utf8_lossy(&display).into_owned();
5006        Some(format!("{src}:{line}: "))
5007    }
5008
5009    // ---- the interpreter ----
5010
5011    fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5012        let entry_depth = self.frames.len();
5013        self.exec_with(entry_depth)
5014    }
5015
5016    /// Run from the current top frame down to (but not past) `entry_depth`
5017    /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5018    /// runs to completion or a yield.
5019    /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5020    /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5021    /// `EvalFuture::poll` on every poll after the first to walk the
5022    /// existing call frames until the next `BudgetExhausted` or
5023    /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5024    /// embedder reaches it through `Vm::eval_async`.
5025    pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5026        self.exec_with(entry_depth)
5027    }
5028
5029    fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5030        loop {
5031            let r = self.run(entry_depth);
5032            if r.is_err()
5033                && (self.yielding.is_some()
5034                    || self.terminating.is_some()
5035                    || self.host_yield_pending
5036                    || self.pending_async_native_fut.is_some())
5037            {
5038                // a `coroutine.yield` is in flight: keep the frames intact (they
5039                // are the suspended coroutine's saved state) and propagate to
5040                // resume. A self-close termination propagates the same way, so a
5041                // protecting pcall on the way out cannot catch (unwind) it.
5042                // v1.1 B10 — `host_yield_pending` is the async-mode
5043                // analogue: the sentinel must reach `drive_one` without
5044                // a protecting `pcall` swallowing it.
5045                return r;
5046            }
5047            match r {
5048                Ok(vals) => return Ok(vals),
5049                // unwind toward `entry_depth`. A protecting pcall/xpcall
5050                // continuation caught along the way turns the error into
5051                // `false, msg` and the loop resumes running its caller; an
5052                // uncaught error propagates out.
5053                Err(e) => match self.unwind(e.0, entry_depth) {
5054                    Unwound::Caught => continue,
5055                    Unwound::CaughtReturn(vals) => return Ok(vals),
5056                    Unwound::Propagated(err) => return Err(err),
5057                },
5058            }
5059        }
5060    }
5061
5062    /// Unwind the call stack from the error point toward `entry_depth`, running
5063    /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5064    /// continuation frame at/above `entry_depth` (the error is *caught*: its
5065    /// slot receives `false, msg`); if none is reached, the error propagates.
5066    fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5067        // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5068        // error object is nil — so `pcall(function() error(nil) end)` returns
5069        // that string instead of nil, and `assert(nil, nil)` (whose path
5070        // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5071        // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5072        // `doit("error()") == nil` and luna would fail that if it always
5073        // substituted. luna's native `error()` still does its own conversion
5074        // for direct callers.
5075        if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5076            err = Value::Str(self.heap.intern(b"<no error object>"));
5077        }
5078        // The protected call runs in-place among the caller frames' registers,
5079        // so truncating the failed frames here cuts into caller windows below
5080        // the catcher. Snapshot the live length: at the error point the stack
5081        // already spans every surviving frame's window, so restoring it after a
5082        // catch reinstates them all (the reclaimed slots above are dead temps).
5083        // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5084        // we instead clamp the restore to the catcher's caller window when the
5085        // error point was at the stack limit (cause: the next `call_value_impl`
5086        // picks `func_slot = stack.len()` which would otherwise re-overflow).
5087        let saved_len = self.stack.len();
5088        // Snapshot the traceback at the error point — before any frame is
5089        // popped — so an `xpcall` msgh (which runs after the failed frames are
5090        // gone) can still describe the error site. The handler frame about to
5091        // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5092        // visible here; once popped, `debug.traceback` would miss it.
5093        // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5094        // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5095        // recovery — locals.lua:659) re-overflows. Capture-once propagates
5096        // through nested unwinds (inner→outer) without re-running msgh.
5097        if self.error_traceback.is_none() {
5098            self.error_traceback = Some(self.traceback_bytes(1));
5099        }
5100        while self.frames.len() >= entry_depth {
5101            match *self.frames.last().expect("frame") {
5102                // a yieldable-metamethod continuation does not catch: discard the
5103                // abandoned instruction and keep unwinding (PUC drops the partial
5104                // op on error).
5105                CallFrame::Cont(NativeCont {
5106                    kind: ContKind::Meta(mc),
5107                    func_slot,
5108                    ..
5109                }) => {
5110                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5111                    self.stack.truncate(func_slot as usize);
5112                    self.top = mc.saved_top.min(func_slot);
5113                    self.tbc.retain(|&s| s < func_slot);
5114                }
5115                // a __pairs continuation does not catch either: an error inside
5116                // the metamethod propagates past `pairs`.
5117                CallFrame::Cont(NativeCont {
5118                    kind: ContKind::Pairs,
5119                    func_slot,
5120                    ..
5121                }) => {
5122                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5123                    self.stack.truncate(func_slot as usize);
5124                    self.top = func_slot;
5125                    self.tbc.retain(|&s| s < func_slot);
5126                }
5127                // a __close continuation does not catch: drop the half-run
5128                // handler's window, then continue the close yieldably with
5129                // the new error threaded as `pending`. Preserve `cc.after`
5130                // verbatim — `Return`/`Block` originating from an aborting
5131                // OP_Return/OP_Close will be short-circuited by
5132                // `finish_close_after` (pending propagates as Err); a
5133                // `ResumeUnwind` originated by our own Lua-frame handler
5134                // must keep its deferred frame-pop semantics so that frame
5135                // is not orphaned. If a fresh handler yields, `drive_close`
5136                // pushes another `Cont::Close` and we return `Caught` so
5137                // `exec_with` re-enters the run loop.
5138                CallFrame::Cont(NativeCont {
5139                    kind: ContKind::Close(cc),
5140                    func_slot,
5141                    ..
5142                }) => {
5143                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5144                    self.stack.truncate(func_slot as usize);
5145                    self.top = func_slot;
5146                    self.tbc.retain(|&s| s < func_slot);
5147                    match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5148                        Ok(Some(_)) => {
5149                            unreachable!(
5150                                "Block / Return / ResumeUnwind never return host values mid-unwind"
5151                            )
5152                        }
5153                        Ok(None) => return Unwound::Caught,
5154                        Err(e) => {
5155                            err = e.0;
5156                            continue;
5157                        }
5158                    }
5159                }
5160                CallFrame::Cont(nc) => {
5161                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5162                    self.pcall_depth -= 1;
5163                    let result = match nc.kind {
5164                        ContKind::Pcall => err,
5165                        ContKind::Xpcall { handler } => {
5166                            // PUC keeps `L->errfunc` set across the handler's
5167                            // call: `luaG_errormsg` re-fires the handler when
5168                            // it raises (so `xpcall(error, err, 170)` lets the
5169                            // chain bottom out at err(0) → "END"). luna mirrors
5170                            // that by looping until the handler returns or
5171                            // luna's `iters` cap forces termination.
5172                            //
5173                            // The cap models PUC's nCcalls soft window
5174                            // (MAXCCALLS/10*11): once tripped, `stackerror`
5175                            // raises "C stack overflow" via `luaG_runerror`
5176                            // which itself re-enters `luaG_errormsg`, so the
5177                            // handler runs once more with that string and
5178                            // naturally returns it (errors.lua :637 at N=300).
5179                            // We count iterations per Cont::Xpcall rather than
5180                            // a global counter — nested xpcalls each get their
5181                            // own budget, matching the way PUC's stack frames
5182                            // accumulate per dispatch path.
5183                            const MSGH_CAP: u32 = MAX_C_DEPTH;
5184                            let mut cur_err = err;
5185                            let mut iters: u32 = 0;
5186                            let mut capped = false;
5187                            loop {
5188                                if iters >= MSGH_CAP && !capped {
5189                                    cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5190                                    capped = true;
5191                                }
5192                                iters += 1;
5193                                self.msgh_depth += 1;
5194                                let r = self.call_value(handler, &[cur_err]);
5195                                self.msgh_depth -= 1;
5196                                match r {
5197                                    Ok(hr) => {
5198                                        break hr.first().copied().unwrap_or(Value::Nil);
5199                                    }
5200                                    Err(_) if capped => {
5201                                        // the handler still errored on the
5202                                        // synthesized "C stack overflow"; fall
5203                                        // back to PUC's LUA_ERRERR string.
5204                                        break Value::Str(
5205                                            self.heap.intern(b"error in error handling"),
5206                                        );
5207                                    }
5208                                    Err(e) => {
5209                                        cur_err = e.0;
5210                                    }
5211                                }
5212                            }
5213                        }
5214                        ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5215                            unreachable!("Meta/Pairs/Close cont handled above")
5216                        }
5217                    };
5218                    // the error has been caught (pcall/xpcall): the captured
5219                    // traceback was for that error and is no longer in flight.
5220                    self.error_traceback = None;
5221                    let fs = nc.func_slot as usize;
5222                    if self.stack.len() < fs + 2 {
5223                        self.stack.resize(fs + 2, Value::Nil);
5224                    }
5225                    self.stack[fs] = Value::Bool(false);
5226                    self.stack[fs + 1] = result;
5227                    self.top = nc.func_slot + 2;
5228                    self.tbc.retain(|&s| s < nc.func_slot);
5229                    if self.frames.len() < entry_depth {
5230                        return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5231                    }
5232                    self.finish_results(nc.func_slot, 2, nc.nresults);
5233                    // reinstate the caller windows the unwind truncated into,
5234                    // clamped to the catcher's caller window + a `MIN_STACK`
5235                    // reserve. The clamp is a no-op for normal pcall catches
5236                    // (saved_len lies within the caller's max_stack window),
5237                    // and prevents the stack from staying near `MAX_LUA_STACK`
5238                    // after an overflow-recovery catch — which would make the
5239                    // next `call_value_impl` (e.g. a `__close` in the catcher's
5240                    // errorh, locals.lua:659) pick `func_slot = stack.len()`
5241                    // above the limit and re-overflow.
5242                    // Restore the caller's full register window: opcodes
5243                    // index it directly. The cap covers caller's base +
5244                    // `max_stack` + a small reserve. We always resize to
5245                    // exactly this window — previously this clamped
5246                    // `saved_len` from above to prevent staying near
5247                    // `MAX_LUA_STACK` after an overflow-recovery catch, and
5248                    // a yieldable-unwind re-entry adds the dual case where
5249                    // `saved_len` is *below* the window (a prior
5250                    // `ResumeUnwind` truncated). Using the window directly
5251                    // covers both.
5252                    let restore = self
5253                        .frames
5254                        .iter()
5255                        .rev()
5256                        .find_map(CallFrame::lua)
5257                        .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5258                        .unwrap_or(saved_len);
5259                    if self.stack.len() < restore {
5260                        self.stack.resize(restore, Value::Nil);
5261                    } else if self.stack.len() > restore {
5262                        self.stack.truncate(restore);
5263                    }
5264                    return Unwound::Caught;
5265                }
5266                CallFrame::Lua(f) => {
5267                    // Yieldable error-unwind close, PUC luaG_errormsg shape:
5268                    // (1) pop the Lua frame immediately so each `__close`
5269                    // handler runs at the C boundary above — `debug.getinfo`
5270                    // sees the next outer Lua frame's call site (typically
5271                    // `pcall`), not this aborting function (locals.lua:480).
5272                    // (2) drive the close yieldably with
5273                    // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5274                    // it truncates to `func_slot` and re-raises (letting a
5275                    // handler-raised error win over `err`). If a handler
5276                    // yields, `drive_close` pushes `Cont::Close` and we
5277                    // return `Caught` so `exec_with` re-enters the run loop;
5278                    // a synchronous drain returns Err exactly as the old
5279                    // path did.
5280                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5281                    let after = AfterClose::ResumeUnwind {
5282                        func_slot: f.func_slot,
5283                        err,
5284                    };
5285                    match self.begin_close(f.base, Some(err), after, entry_depth) {
5286                        Ok(Some(_)) => {
5287                            unreachable!("ResumeUnwind never returns host values")
5288                        }
5289                        Ok(None) => return Unwound::Caught,
5290                        Err(e) => {
5291                            err = e.0;
5292                            continue;
5293                        }
5294                    }
5295                }
5296            }
5297        }
5298        Unwound::Propagated(LuaError(err))
5299    }
5300
5301    fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5302        loop {
5303            // Fast-path slow-check gate: most embedders run with both
5304            // `instr_budget` and `mem_cap` as None, so a single combined
5305            // is_some test lets the hot loop skip both branches with one
5306            // load + branch instead of two.
5307            if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5308                if let Some(b) = self.instr_budget.as_mut() {
5309                    *b -= 1;
5310                    if *b <= 0 {
5311                        self.instr_budget = None;
5312                        // v1.1 B10 Stage 1 — async-mode cooperative
5313                        // yield. Set a sentinel flag so `exec_with`
5314                        // propagates the Err without `unwind` running
5315                        // (mirroring the `yielding.is_some()` path),
5316                        // and `call_value_impl` preserves the call
5317                        // frames for the next `poll`. Translation back
5318                        // to `DispatchOutcome::BudgetExhausted` happens
5319                        // in `drive_one`. The Err value itself is
5320                        // `Value::Nil` — a pure sentinel, never seen by
5321                        // user code.
5322                        if self.async_mode {
5323                            self.host_yield_pending = true;
5324                            return Err(LuaError(Value::Nil));
5325                        }
5326                        // B6: classify the trip so embedders can
5327                        // distinguish budget exhaustion from a
5328                        // generic Runtime error and retry / give up
5329                        // accordingly.
5330                        self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5331                        let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5332                        return Err(LuaError(s));
5333                    }
5334                }
5335                if let Some(cap) = self.heap.mem_cap
5336                    && self.heap.bytes() > cap
5337                {
5338                    // First try a full collect — embedders set tight caps
5339                    // and the overshoot may be reclaimable (closures kept
5340                    // by short-lived frames, intermediate strings). Only
5341                    // disarm + raise if the cap is still breached after
5342                    // collection. PUC's `LUA_GCEMERGENCY` path matches.
5343                    // gc_top must include `self.top` so the running frame's
5344                    // live locals (e.g. a growing table) are not freed.
5345                    self.gc_top = self.top;
5346                    self.collect_garbage();
5347                    if self.heap.bytes() > cap {
5348                        self.heap.mem_cap = None;
5349                        let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5350                        return Err(LuaError(s));
5351                    }
5352                }
5353            }
5354            // Single combined frame fetch: continuation arm OR Lua arm. Saves
5355            // a second `self.frames.last()` slice access vs the prior split
5356            // form (LLVM doesn't always CSE these across the cont branch).
5357            // A continuation frame on top means the call it protected just
5358            // delivered its results — wrap as `true, results…` and hand to
5359            // the pcall/xpcall caller. The error path is handled by `unwind`;
5360            // this branch is only reached on success/resume completion.
5361            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5362            let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5363            if let &CallFrame::Cont(nc) = frame_peek {
5364                // a yieldable metamethod returned: complete the interrupted
5365                // instruction (PUC luaV_finishOp) and resume the running frame.
5366                if let ContKind::Meta(mc) = nc.kind {
5367                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5368                    let result = if self.top > nc.func_slot {
5369                        self.stack[nc.func_slot as usize]
5370                    } else {
5371                        Value::Nil
5372                    };
5373                    self.stack.truncate(nc.func_slot as usize);
5374                    self.top = mc.saved_top;
5375                    self.finish_meta(mc.action, result)?;
5376                    continue;
5377                }
5378                // a __close handler returned successfully: discard its
5379                // results, restore `top` to the slot the handler was called
5380                // at (the surrounding frame's register window above this slot
5381                // must stay alloc'd — never truncate the underlying stack),
5382                // then continue the close chain (next slot, or fire
5383                // AfterClose). When the close ends an entry activation,
5384                // drive_close hands the results up to exec_with directly.
5385                if let ContKind::Close(cc) = nc.kind {
5386                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5387                    self.top = nc.func_slot;
5388                    if let Some(vals) =
5389                        self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5390                    {
5391                        return Ok(vals);
5392                    }
5393                    continue;
5394                }
5395                // __pairs returned: normalize its results to exactly four
5396                // (iterator, state, control, closing) at pairs's slot, where
5397                // the metamethod was called, and hand them to pairs's caller.
5398                if let ContKind::Pairs = nc.kind {
5399                    frames_pop_sync(&mut self.frames, &mut self.frames_top);
5400                    let total = 4u32;
5401                    let need = (nc.func_slot + total) as usize;
5402                    if self.stack.len() < need {
5403                        self.stack.resize(need, Value::Nil);
5404                    }
5405                    for s in self.top..(nc.func_slot + total) {
5406                        self.stack[s as usize] = Value::Nil;
5407                    }
5408                    self.top = nc.func_slot + total;
5409                    if self.frames.len() < entry_depth {
5410                        return Ok(self.take_results(nc.func_slot));
5411                    }
5412                    self.finish_results(nc.func_slot, total, nc.nresults);
5413                    continue;
5414                }
5415                frames_pop_sync(&mut self.frames, &mut self.frames_top);
5416                self.pcall_depth -= 1;
5417                // f's results sit at nc.func_slot+1.. (f was called one slot
5418                // above the continuation), so writing `true` at the slot makes
5419                // `true, results…` already contiguous.
5420                let nret = self.top - (nc.func_slot + 1);
5421                self.stack[nc.func_slot as usize] = Value::Bool(true);
5422                let total = 1 + nret;
5423                self.top = nc.func_slot + total;
5424                if self.frames.len() < entry_depth {
5425                    return Ok(self.take_results(nc.func_slot));
5426                }
5427                self.finish_results(nc.func_slot, total, nc.nresults);
5428                continue;
5429            }
5430            // GC runs only at the allocation safe points below (PUC's
5431            // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5432            // no longer collects, so a stale full-window `gc_top` cannot leak in.
5433            //
5434            // Hot-path frame fetch: the Cont arm above continues the loop,
5435            // so reaching here means `frame_peek` is the Lua frame. Reuse it
5436            // rather than re-fetching `self.frames.last()`.
5437            let f = match frame_peek {
5438                CallFrame::Lua(f) => f,
5439                _ => unreachable!("Cont frame survived the dispatch loop head"),
5440            };
5441            let cl = f.closure;
5442            let base = f.base;
5443            let func_slot = f.func_slot;
5444            let n_varargs = f.n_varargs;
5445            let pc = f.pc;
5446            let oldpc = f.hook_oldpc;
5447
5448            // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5449            // — every branch / call op only sets `pc` to a valid index, and
5450            // function entry initialises pc=0 with a non-empty body. PUC's
5451            // `vmfetch` uses the equivalent unchecked load.
5452            let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5453
5454            // P12-S1.C/D — trace recording append + close detection.
5455            // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5456            // so default dispatch keeps a single not-taken branch.
5457            //
5458            // - At the head PC with a non-empty record, the trace has
5459            //   looped back to its start: mark `closed = true` and
5460            //   take the record (S2 will compile + cache).
5461            // - Otherwise, capture the op. If the record overflows
5462            //   MAX_TRACE_LEN, abort by dropping it.
5463            if self.jit.trace_enabled
5464                && let Some(_rec) = self.jit.active_trace.as_mut()
5465            {
5466                // P12-S4 — depth tracking. The trace head's frame is
5467                // at index `recording_frame_base`; every Op::Call that
5468                // pushes a new frame bumps the live depth, every
5469                // Op::Return that pops one decrements it.
5470                //
5471                // **Three clean-close conditions** (P12-S4-step4a):
5472                // - `at_head`: cur_depth == 0 AND about-to-execute the
5473                //   trace's head_pc on its head_proto (loop closed back
5474                //   to start). Same for loop-triggered and call-triggered
5475                //   traces — step4a unified the gating so call-triggered
5476                //   no longer closes on the first re-entry (that left
5477                //   fib's body at 7 depth=0 ops; step4a lets it inline
5478                //   up to MAX_INLINE_DEPTH levels before any close).
5479                // - `returned_past_head`: trace head's frame is gone
5480                //   (callee returned past it, or the call-trigger
5481                //   started a recording inside a callee that has now
5482                //   returned). Whatever ops were recorded form the
5483                //   trace body; the lowerer treats the partial trace
5484                //   the same as InlineAbort (dispatchable=false until
5485                //   step4b's frame materialization lands).
5486                // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5487                //   Recording any deeper would just bloat the IR; close
5488                //   with the body we have. Lowerer's existing length
5489                //   gate + InlineAbort path handles short bodies.
5490                let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5491                let cur_depth = if returned_past_head {
5492                    0
5493                } else {
5494                    self.frames.len() - 1 - self.jit.recording_frame_base
5495                };
5496                let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5497                let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5498                let at_head_loop = cur_depth == 0
5499                    && !rec.ops.is_empty()
5500                    && !returned_past_head
5501                    && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5502                    && pc == rec.head_pc;
5503                // P16-A — self-link cycle catch (mirrors LuaJIT's
5504                // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5505                //   1. We're about to execute the head_pc on head_proto
5506                //      at depth > 0 (we're re-entering the trace head
5507                //      from inside an inlined recursion level — UpRec).
5508                //   2. The count of ancestor frames in the recording
5509                //      window that share `head_proto` exceeds
5510                //      [`RECUNROLL_THRESHOLD`] (default 2).
5511                // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5512                // recursion levels are captured, the recorder enters
5513                // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5514                // trips this catch — closing with `SelfRecKind::UpRec`.
5515                // The lowerer's `TraceEnd::SelfLink` tail emits the
5516                // bump-base + branch-to-self loop body.
5517                //
5518                // TailRec vs UpRec: LJ distinguishes via
5519                // `framedepth + retdepth == 0`. luna doesn't track
5520                // retdepth separately; cur_depth == 0 with a non-empty
5521                // call chain in tail position is rare (would require
5522                // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5523                // condition (fib's case); cur_depth == 0 with positive
5524                // ancestor count would route to TailRec, but luna's
5525                // recorder doesn't currently produce that shape because
5526                // tail-call elision pops the caller frame and we'd
5527                // hit `at_head_loop` instead.
5528                let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5529                    if self.jit.p16_self_link_enabled
5530                        && !returned_past_head
5531                        && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5532                        && pc == rec.head_pc
5533                        && cur_depth > 0
5534                    {
5535                        // Count ancestor frames sharing head_proto.
5536                        // self.frames[recording_frame_base..] currently
5537                        // includes the just-pushed frame at the top
5538                        // (the one about to execute head_pc). Ancestors
5539                        // = the slice excluding the top frame.
5540                        let head_proto_ptr = rec.head_proto.as_ptr();
5541                        let last_idx = self.frames.len() - 1;
5542                        let mut count = 0usize;
5543                        for i in self.jit.recording_frame_base..last_idx {
5544                            if let CallFrame::Lua(f) = &self.frames[i]
5545                                && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5546                            {
5547                                count += 1;
5548                            }
5549                        }
5550                        if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5551                            // cur_depth > 0 → UpRec (fib pattern).
5552                            // cur_depth == 0 wouldn't reach this arm.
5553                            Some(crate::jit::trace::SelfRecKind::UpRec)
5554                        } else {
5555                            None
5556                        }
5557                    } else {
5558                        None
5559                    }
5560                };
5561                if let Some(kind) = self_link_trip {
5562                    rec.self_link_kind = Some(kind);
5563                }
5564                let should_close =
5565                    at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5566                if should_close {
5567                    // P13-S13-H — long-trace bias: a call-triggered
5568                    // recording that closed with a very short body
5569                    // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5570                    // binary_trees `make(0)`: 4 ops) is pathological.
5571                    // Compiling + caching it pins `Proto.traces` to a
5572                    // trace that the length gate will refuse to
5573                    // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5574                    // = 40`), AND blocks the back-edge / longer-call
5575                    // path from re-recording the same head_pc (the
5576                    // dedup `already_cached` check below short-
5577                    // circuits). The fix: discard the short call-
5578                    // triggered recording WITHOUT caching, and bias
5579                    // the proto's `call_hot_count` back to
5580                    // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5581                    // sequence of calls retries the trigger at a
5582                    // different (hopefully deeper) recursion point.
5583                    //
5584                    // Back-edge triggered traces are exempt — a
5585                    // tight numeric-for loop's body is legitimately
5586                    // 3 ops (`Add`, ForLoop) and DOES dispatch
5587                    // usefully when re-entered many times.
5588                    // P13-S13-H — coverage heuristic to detect
5589                    // pathologically partial call-triggered traces:
5590                    // for self-recursive / branchy protos like
5591                    // `fib` (~17 bytecode ops) or
5592                    // `binary_trees.make` (~26 ops), the recorder
5593                    // can fire at a BASE-case entry (`fib(0)` or
5594                    // `make(0)`) producing a 3–4 op trace that
5595                    // covers a tiny fraction of the proto's code.
5596                    // That trace is doomed by the length gate
5597                    // post-compile AND blocks any longer follow-up
5598                    // (the dedup `already_cached` check below). The
5599                    // fix: discard call-triggered closes where
5600                    // `rec.ops.len() * 2 < head_proto.code.len()`
5601                    // (less than half the proto's bytecode), so the
5602                    // back-edge / longer call path can take over.
5603                    //
5604                    // Why coverage > raw length:protos with
5605                    // intrinsically short bodies (closure
5606                    // factories: `Closure + Return1` = 2 ops,
5607                    // simple wrappers: `LoadI + Return1` = 2 ops)
5608                    // record 100% coverage even at length 2 — those
5609                    // ARE legitimately short and the closure /
5610                    // sunk-emit lowering paths (S7-A / S9-C) make
5611                    // them worth compiling. The heuristic admits
5612                    // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5613                    // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5614                    // ~26) get discarded.
5615                    //
5616                    // Back-edge triggered traces are unaffected —
5617                    // a tight numeric-for body legitimately covers
5618                    // 3 of ~3 proto ops it can dispatch from
5619                    // (`Add + ForLoop`) and the recorder fires on
5620                    // the back-edge, not call entry.
5621                    //
5622                    // `call_hot_count` is intentionally NOT reset
5623                    // (an earlier draft tried `THRESHOLD - 32` but
5624                    // caused active_trace contention with the
5625                    // outer back-edge trigger — see
5626                    // setlist_b_zero_with_call_c_zero_sunk_emits).
5627                    // We give up on dispatching the pathological
5628                    // shape on the same proto; the back-edge or a
5629                    // longer call path on a deeper recursion point
5630                    // can still record + cache a real trace.
5631                    let proto_code_len = rec.head_proto.code.len();
5632                    let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5633                    // P13-S13-I — per-Proto discard cap. The S13-H
5634                    // relaxed trigger condition (`c >= THRESHOLD &&
5635                    // !already_cached`) means a Proto whose every
5636                    // recording is partial-coverage will re-fire the
5637                    // trigger every call indefinitely (1500+ in
5638                    // `binary_trees`-pattern test). The cap stops
5639                    // discarding after `MAX_DISCARDS_PER_PROTO` —
5640                    // the next close falls through to compile (even
5641                    // if partial), caches the trace, and the
5642                    // `already_cached` short-circuit kills the
5643                    // storm. Dispatch may still be refused
5644                    // post-compile (length gate), but the recorder
5645                    // stops churning.
5646                    const MAX_DISCARDS_PER_PROTO: u32 = 5;
5647                    let prior_discards = rec.head_proto.trace_discard_count.get();
5648                    let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
5649                    // P13-S13-K — flip the `gave_up` flag the
5650                    // moment cap is reached (BEFORE the close-
5651                    // dispatching branch below). The trigger gates
5652                    // short-circuit on this flag, skipping the
5653                    // RefCell + linear `already_cached` scan on
5654                    // every subsequent call to this Proto. Useful
5655                    // for `binary_trees_pattern`-class loads where
5656                    // a single Proto sees ~20k calls post-cap.
5657                    if cap_reached
5658                        && rec.is_call_triggered
5659                        && is_partial_coverage
5660                        && !rec.head_proto.trace_gave_up.get()
5661                    {
5662                        rec.head_proto.trace_gave_up.set(true);
5663                    }
5664                    if rec.is_call_triggered && is_partial_coverage && !cap_reached {
5665                        // Tally as closed (for visibility) but DROP
5666                        // without compile/cache. Use the existing
5667                        // closed-lens accumulator so probes can
5668                        // observe the discarded shape.
5669                        // P13-S13-I — bump discard count BEFORE
5670                        // dropping the recording so the next
5671                        // close sees the updated counter.
5672                        rec.head_proto.trace_discard_count.set(prior_discards + 1);
5673                        self.jit.counters.closed += 1;
5674                        self.jit
5675                            .counters
5676                            .closed_lens
5677                            .push((rec.is_call_triggered, rec.ops.len()));
5678                        self.jit.active_trace = None;
5679                        // Continue with interp loop — don't
5680                        // fall through to compile path.
5681                        // The op at `pc` hasn't dispatched yet;
5682                        // the outer loop iteration handles it.
5683                    } else {
5684                        rec.closed = true;
5685                        // P12-S2.C — detach the closed record, then try
5686                        // to compile it. Dedup by `head_pc`: a Proto
5687                        // already carrying a CompiledTrace for this PC
5688                        // skips recompile (the hot counter caps
5689                        // re-recording at `u32::MAX / 2` anyway, but
5690                        // explicit dedup keeps `Proto.traces` short
5691                        // for the S3 dispatcher's linear scan).
5692                        //
5693                        // No `Vm::run` change for failure: we just bump
5694                        // the failed counter and drop the record. S3
5695                        // will read `Proto.traces` to decide whether to
5696                        // dispatch — until then, this is bookkeeping.
5697                        let head_pc_val = rec.head_pc;
5698                        let closed_record = self
5699                            .jit
5700                            .active_trace
5701                            .take()
5702                            .expect("active_trace was Some this branch");
5703                        self.jit.counters.closed += 1;
5704                        self.jit
5705                            .counters
5706                            .closed_lens
5707                            .push((closed_record.is_call_triggered, closed_record.ops.len()));
5708                        // P12-S5-B fix: cache the trace on the
5709                        // recorder's *head proto*, not the current
5710                        // closure's proto. For non-recursive
5711                        // call-triggered traces, close fires after
5712                        // `Return1` pops the callee frame — `cl` at
5713                        // that point is the CALLER's closure, while
5714                        // `closed_record.head_proto` is the CALLEE's
5715                        // proto (the one we actually want the trace
5716                        // to be discoverable from on the next call).
5717                        // Self-recursive fib closed via depth-cap
5718                        // mid-recursion so `cl.proto == head_proto`
5719                        // happened to coincide — this fix makes that
5720                        // accidental coincidence intentional.
5721                        let head_proto = closed_record.head_proto;
5722                        let already_cached = head_proto
5723                            .traces
5724                            .borrow()
5725                            .iter()
5726                            .any(|t| t.head_pc == head_pc_val);
5727                        if !already_cached {
5728                            // Internal-loop = true: the trace runs in
5729                            // a native loop until a cmp side-exits, so
5730                            // the dispatcher's per-entry marshal cost
5731                            // amortizes across the whole run of
5732                            // iterations the loop's recorded direction
5733                            // stays valid. The lowerer auto-downgrades
5734                            // to one-shot for cmp-less or Call-truncating
5735                            // traces.
5736                            // P15-A v2-C-A6-5 — side traces MUST NOT
5737                            // internal-loop. The parent's recorded prefix
5738                            // (ops at PCs < side trace's head_pc) defines
5739                            // values for registers the child's body reads
5740                            // without re-writing each iter — e.g. for
5741                            // s12_step_b, parent's `pc=19 Add R[12] = R[1]
5742                            // + R[11]` sets R[12], and the child trace
5743                            // (head_pc=24) re-runs `pc=20 Move R[1] =
5744                            // R[12]` each iter via its outer ForLoop
5745                            // internal-loop, ALWAYS reading the stale
5746                            // entry-time R[12]. The parent's Add never
5747                            // re-runs during child's loop, so R[1] gets
5748                            // pinned to one stale value. Force one-shot
5749                            // for side traces: each parent-exit round-
5750                            // trips through dispatcher → parent's Add
5751                            // runs → side trace runs ONE iter → return.
5752                            let opts = crate::jit::trace::CompileOptions {
5753                                internal_loop: closed_record.side_trace_parent.is_none(),
5754                                pre53: self.version() <= LuaVersion::Lua53,
5755                                aot: false,
5756                            };
5757                            // v1.1 A1 Session A — route through trace_compiler.
5758                            match self
5759                                .jit
5760                                .trace_compiler
5761                                .try_compile_trace(&closed_record, opts)
5762                            {
5763                                Some(mut ct) => {
5764                                    // P12-S5-A/B/C — tally Sinkable sites
5765                                    // + actually-sunk-emit sites + materialise
5766                                    // emit sites before moving `ct` into
5767                                    // Proto.traces.
5768                                    self.jit.counters.sinkable_seen +=
5769                                        ct.sinkable_sites_seen as u64;
5770                                    self.jit.counters.accum_bufferable_seen +=
5771                                        ct.accum_bufferable_seen as u64;
5772                                    self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
5773                                    self.jit.counters.materialize_emit +=
5774                                        ct.materialize_emit_count as u64;
5775                                    self.jit.counters.closure_emit += ct.closure_seen as u64;
5776                                    if ct.is_inline_abort_close {
5777                                        self.jit.counters.inline_abort += 1;
5778                                    }
5779                                    if let Some(reason) = ct.dispatch_off_reason {
5780                                        self.jit.counters.dispatch_off_reasons.push(reason);
5781                                    }
5782                                    // P15-A v2-A — side-trace finalisation.
5783                                    // Pin `dispatchable=false` so the
5784                                    // primary lookup `traces.find(|t|
5785                                    // t.head_pc == pc && t.dispatchable)`
5786                                    // never matches this entry — the
5787                                    // side trace is meant to be entered
5788                                    // ONLY through the parent's exit
5789                                    // indirection (v2-B/C IR), not the
5790                                    // back-edge / call-trigger paths.
5791                                    // Then write the entry fn ptr into
5792                                    // the parent's `exit_side_trace_ptrs`
5793                                    // slot so v2-B/C IR can read it.
5794                                    if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
5795                                        closed_record.side_trace_parent
5796                                    {
5797                                        ct.dispatchable = false;
5798                                        let entry_ptr = ct.entry as *const () as *const u8;
5799                                        let _side_trace_head_pc = closed_record.head_pc;
5800                                        let parent_traces = parent_proto.traces.borrow();
5801                                        if let Some(parent_ct) = parent_traces
5802                                            .iter()
5803                                            .find(|t| t.head_pc == parent_head_pc)
5804                                        {
5805                                            // P15-A v2-C-A5-C — shape-match
5806                                            // gate. Find the parent's per-exit
5807                                            // tag snapshot at the wired exit
5808                                            // (inline / tag / global) and
5809                                            // check the child's entry_tags
5810                                            // match. If not, leave the cell
5811                                            // null + skip cache populate so
5812                                            // the future v2-C-A2 IR's
5813                                            // `call_indirect` stays inert at
5814                                            // this exit (the child's
5815                                            // shape-specialised IR would
5816                                            // mis-interpret raw bits the
5817                                            // parent writes to reg_state).
5818                                            let inline_n = parent_ct.per_exit_inline.len();
5819                                            let tags_n = parent_ct.per_exit_tags.len();
5820                                            let parent_exit_tags_slice: &[
5821                                            crate::jit::trace::ExitTag
5822                                        ] = if parent_exit_idx < inline_n {
5823                                            &parent_ct.per_exit_inline
5824                                                [parent_exit_idx]
5825                                                .exit_tags
5826                                        } else if parent_exit_idx
5827                                            < inline_n + tags_n
5828                                        {
5829                                            &parent_ct.per_exit_tags
5830                                                [parent_exit_idx - inline_n]
5831                                                .1
5832                                        } else {
5833                                            &parent_ct.exit_tags
5834                                        };
5835                                            let shape_ok =
5836                                                crate::jit::trace::exit_tags_match_entry_tags(
5837                                                    &ct.entry_tags,
5838                                                    parent_exit_tags_slice,
5839                                                    &parent_ct.entry_tags,
5840                                                );
5841                                            if !shape_ok {
5842                                                self.jit.counters.side_trace_shape_mismatch += 1;
5843                                            }
5844                                            // P15-A v2-C-A4 — write the child's
5845                                            // entry fn ptr to BOTH the legacy
5846                                            // v2-A `exit_side_trace_ptrs[idx]`
5847                                            // cell (kept so v2-A's
5848                                            // walk_any_side_ptr_non_null tests
5849                                            // stay green) AND the per-kind cell
5850                                            // whose heap address the parent's
5851                                            // IR baked (v2-C-A2). The IR-baked
5852                                            // cell is what the call_indirect
5853                                            // gate actually reads. Only write
5854                                            // when A5-C shape gate passes.
5855                                            if shape_ok {
5856                                                if let Some(cell) = parent_ct
5857                                                    .exit_side_trace_ptrs
5858                                                    .get(parent_exit_idx)
5859                                                {
5860                                                    cell.set(entry_ptr);
5861                                                }
5862                                                // Compute (kind, local) for the
5863                                                // IR-baked cell. Layout follows
5864                                                // exit_hit_counts: inline first,
5865                                                // then per_exit_tags, then the
5866                                                // global tail slot.
5867                                                let (sent_kind, sent_local) = if parent_exit_idx
5868                                                    < inline_n
5869                                                {
5870                                                    parent_ct.per_exit_inline[parent_exit_idx]
5871                                                        .side_trace_ptr
5872                                                        .set(entry_ptr);
5873                                                    (
5874                                                        crate::jit::trace::SIDE_SENT_KIND_INLINE,
5875                                                        parent_exit_idx as u32,
5876                                                    )
5877                                                } else if parent_exit_idx < inline_n + tags_n {
5878                                                    let local = parent_exit_idx - inline_n;
5879                                                    if let Some(b) =
5880                                                        parent_ct.tags_side_trace_ptrs.get(local)
5881                                                    {
5882                                                        b.set(entry_ptr);
5883                                                    }
5884                                                    (
5885                                                        crate::jit::trace::SIDE_SENT_KIND_TAG,
5886                                                        local as u32,
5887                                                    )
5888                                                } else {
5889                                                    parent_ct.global_side_trace_ptr.set(entry_ptr);
5890                                                    (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
5891                                                };
5892                                                self.jit.counters.side_trace_compiled += 1;
5893                                                // P15-A v2-D-A8 — flip the
5894                                                // parent's fast-path hint so
5895                                                // the dispatcher knows to do
5896                                                // the tentative decode + cell
5897                                                // check on subsequent
5898                                                // dispatches. Set once and
5899                                                // stays true (we never unwire
5900                                                // a side trace today).
5901                                                parent_ct.has_any_side_wired.set(true);
5902
5903                                                // P15-A v2-C-A1/A4 — populate
5904                                                // the O(1) lookup cache the
5905                                                // dispatcher consults on
5906                                                // sentinel-bit-set returns.
5907                                                // Key is the encoded sentinel
5908                                                // (same encoding the IR ORs
5909                                                // into bits 56..=62 of the
5910                                                // child's i64 return).
5911                                                let sentinel =
5912                                                    crate::jit::trace::encode_side_sentinel(
5913                                                        sent_kind, sent_local,
5914                                                    );
5915                                                let predicted_idx = if std::ptr::eq(
5916                                                    parent_proto.as_ptr(),
5917                                                    head_proto.as_ptr(),
5918                                                ) {
5919                                                    parent_traces.len() as u32
5920                                                } else {
5921                                                    head_proto.traces.borrow().len() as u32
5922                                                };
5923                                                parent_ct
5924                                                    .side_trace_cache
5925                                                    .borrow_mut()
5926                                                    .insert(sentinel, predicted_idx);
5927                                            }
5928                                        }
5929                                        drop(parent_traces);
5930                                    }
5931                                    head_proto.traces.borrow_mut().push(std::rc::Rc::new(ct));
5932                                    self.jit.counters.compiled += 1;
5933                                }
5934                                None => {
5935                                    self.jit.counters.compile_failed += 1;
5936                                    self.jit
5937                                        .counters
5938                                        .compile_failed_reasons
5939                                        .push(self.jit.trace_compiler.last_compile_checkpoint());
5940                                }
5941                            }
5942                        }
5943                    } // P13-S13-H — close the long-trace-bias else branch
5944                } else {
5945                    // P12-S4-step1 + step4a — depth-aware push at the
5946                    // current `cur_depth`. The `depth_cap_hit` /
5947                    // `returned_past_head` early-exit is handled by
5948                    // the `should_close` branch above; reaching here
5949                    // means `cur_depth <= MAX_INLINE_DEPTH` and the
5950                    // trace head's frame is still live.
5951                    let depth_u8 = cur_depth as u8;
5952                    if depth_u8 > self.jit.max_depth_seen {
5953                        self.jit.max_depth_seen = depth_u8;
5954                    }
5955                    // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
5956                    // return / variable return count). Recorder pushed
5957                    // it with var_count=None before the call dispatched;
5958                    // now that the call has returned and we're about to
5959                    // push the next op, top reflects the actual return
5960                    // count. Snapshot top - (caller.base + call.a).
5961                    if let Some(last) = rec.ops.last_mut()
5962                        && matches!(last.inst.op(), crate::vm::isa::Op::Call)
5963                        && last.inst.c() == 0
5964                        && last.var_count.is_none()
5965                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
5966                    {
5967                        let from = f.base + last.inst.a();
5968                        if self.top >= from {
5969                            last.var_count = Some(self.top - from);
5970                        }
5971                    }
5972                    // P12-S9-A/C — for SetList B=0, snapshot the source
5973                    // count = top - A - 1 (mirrors Lua's `n = top - ra
5974                    // - 1` from lvm.c OP_SETLIST). Sources are
5975                    // R[A+1..top), exclusive top. For Call C=0's
5976                    // var_count (the return count = top - A inclusive),
5977                    // see the prior-op fix-up above; here we
5978                    // initialise the current Call op to None and let
5979                    // the fix-up on the next op's push populate it.
5980                    let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
5981                        && inst.b() == 0
5982                        && let Some(f) = self.frames.last().and_then(CallFrame::lua)
5983                    {
5984                        let from = f.base + inst.a();
5985                        if self.top > from {
5986                            Some(self.top - from - 1)
5987                        } else {
5988                            None
5989                        }
5990                    } else {
5991                        None
5992                    };
5993                    let op = crate::jit::trace::RecordedOp {
5994                        proto: cl.proto,
5995                        pc,
5996                        inst,
5997                        inline_depth: depth_u8,
5998                        var_count,
5999                    };
6000                    if !rec.push(op) {
6001                        self.jit.active_trace = None;
6002                        self.jit.counters.aborted += 1;
6003                    }
6004                }
6005            }
6006
6007            // P12-S3 — trace JIT dispatcher.
6008            //
6009            // When the dispatch loop is about to execute the op at
6010            // `pc` and there's a `numeric_only` CompiledTrace cached
6011            // for that `head_pc`, marshal the live regs into an
6012            // i64 buffer, jump into the trace, and resume the
6013            // interpreter at the returned continuation PC.
6014            //
6015            // Skipped (zero overhead) when `trace_jit_enabled` is
6016            // false; the lookup is a borrow + scan over
6017            // `cl.proto.traces`, which is a `Vec` whose size is at
6018            // most one entry per back-edge per Proto in practice.
6019            //
6020            // Marshalling contract — only Int slots survive the
6021            // round-trip cleanly (the reg_state ABI is `*mut i64`
6022            // with no tag info). Any non-Int slot in the affected
6023            // window forces a skip; interp takes over for one op
6024            // and the back-edge brings us back to try again next
6025            // pass (slots that were Nil/Float at one moment can
6026            // settle to Int by the time the next back-edge fires).
6027            //
6028            // A trace that comes back with `vm.jit.pending_err`
6029            // parked is treated as a deopt: clear the err, leave
6030            // the stack as the trace wrote it, and let the
6031            // interpreter run from the same `pc`. The trace itself
6032            // is left cached — a future entry might find no
6033            // metatable in the way and succeed.
6034            // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6035            // of 6 per-field Rc clones. proto.traces is now
6036            // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6037            // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6038            // operations per dispatch × 434k = ~2.2M Rc atomic ops
6039            // (~1-2% gain measured separately).
6040            if self.jit.trace_enabled
6041                && let Some(ct) = {
6042                    let traces = cl.proto.traces.borrow();
6043                    traces
6044                        .iter()
6045                        .find(|t| t.head_pc == pc && t.dispatchable)
6046                        .cloned()
6047                }
6048            {
6049                // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6050                // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6051                // across the entire dispatch block so the fields outlive
6052                // all consumers. Saves 5 Rc::clone per dispatch.
6053                let entry_fn = ct.entry;
6054                let head_pc_val = ct.head_pc;
6055                let window_size = ct.window_size;
6056                let exit_tags = &ct.exit_tags;
6057                let per_exit_tags = &ct.per_exit_tags;
6058                let per_exit_inline = &ct.per_exit_inline;
6059                let compile_entry_tags = &ct.entry_tags;
6060                let global_tag_res_kind = ct.global_tag_res_kind;
6061                let exit_hit_counts = &ct.exit_hit_counts;
6062                let max_stack = cl.proto.max_stack as usize;
6063                let window_size_us = window_size as usize;
6064                let base_us = base as usize;
6065                // P12-S4-step3a — `reg_state` sized to the trace's
6066                // `window_size`, which today equals max_stack but
6067                // S4-step3b will expand for inlined frames.
6068                // Marshal-in still only writes [0..max_stack); slots
6069                // [max_stack..window_size) are zero-initialised and
6070                // filled by the trace's own GetUpval / arith.
6071                // P13-S13-D — reuse the Vm's amortised buffers
6072                // instead of allocating fresh Vecs each dispatch.
6073                // mem::take leaves an empty placeholder we restore
6074                // at the end of the dispatch block (success +
6075                // deopt paths both fall through to the restore).
6076                let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6077                entry_tags.clear();
6078                entry_tags.reserve(max_stack);
6079                let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6080                reg_state.clear();
6081                reg_state.resize(window_size_us, 0i64);
6082                let mut dispatch_ok = true;
6083                for i in 0..max_stack {
6084                    let v = self.stack[base_us + i];
6085                    let (tag, raw) = v.unpack();
6086                    entry_tags.push(tag);
6087                    // P12-S12-C v3 — entry tag guard. The trace's IR
6088                    // is specialised to the compile-time entry tags
6089                    // (via current_kinds propagation from
6090                    // from_entry_tag). A runtime tag mismatch means
6091                    // body ops would mis-interpret raw bits (e.g.
6092                    // treat a Str pointer as Int payload → garbage).
6093                    // Skip dispatch on mismatch so interp handles
6094                    // this entry shape; the trace stays cached for
6095                    // future entries that match.
6096                    if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6097                        dispatch_ok = false;
6098                        break;
6099                    }
6100                    match tag {
6101                        // Int / Float / Table / Nil all marshal
6102                        // to raw payload cleanly; the trace's IR
6103                        // treats the 8-byte slot as an i64 (with
6104                        // f64 ops bitcasting around the boundary).
6105                        crate::runtime::value::raw::INT
6106                        | crate::runtime::value::raw::FLOAT
6107                        | crate::runtime::value::raw::TABLE
6108                        | crate::runtime::value::raw::CLOSURE
6109                        // P12-S12-B-v2 — Native iter slots (e.g.
6110                        // R[A] = ipairs_iter) are present in
6111                        // generic-for traces; the raw bits are a
6112                        // valid `*mut NativeClosure` and round-trip
6113                        // cleanly.
6114                        | crate::runtime::value::raw::NATIVE
6115                        // P12-S12-C v1 — Str slots show up in
6116                        // string-concat traces; raw bits = `*mut
6117                        // LuaStr` (interned, GC-managed). Round-
6118                        // trips cleanly as a heap pointer.
6119                        | crate::runtime::value::raw::STR
6120                        | crate::runtime::value::raw::NIL => {
6121                            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6122                            reg_state[i] = unsafe { raw.zero as i64 };
6123                        }
6124                        _ => {
6125                            dispatch_ok = false;
6126                            break;
6127                        }
6128                    }
6129                }
6130
6131                if dispatch_ok {
6132                    debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6133                    self.jit.pending_err = None;
6134                    // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6135                    // count. A cmp@d>0 side-exit calls the materialize
6136                    // helper which pushes inlined frames onto
6137                    // `vm.frames`; on deopt those frames must be popped
6138                    // before falling through to the interpreter, else
6139                    // the stack grows unboundedly per deopted dispatch.
6140                    let pre_frames = self.frames.len();
6141                    // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6142                    // diagnostic hook. The probe fires once per trace dispatch
6143                    // (regardless of JIT vs AOT origin — both go through this
6144                    // arm), letting the AOT smoke test verify mcode actually
6145                    // executed. Guarded behind `OnceLock` so the env read is
6146                    // a one-time cost per process; not gated on a particular
6147                    // counter so the smoke test gets a deterministic single-
6148                    // line `aot_trace_fired pc=N` per first dispatch.
6149                    if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6150                        eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6151                    }
6152                    let continuation_pc = {
6153                        // v1.1 A1 Session A — chunk_compiler.enter
6154                        // (CraneliftBackend delegates to enter_jit;
6155                        // NullJitBackend returns an inert guard).
6156                        let vm_ptr: *mut Vm = self;
6157                        let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6158                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6159                        unsafe { entry_fn(reg_state.as_mut_ptr()) }
6160                    };
6161                    self.jit.counters.dispatched += 1;
6162
6163                    if self.jit.pending_err.is_some() {
6164                        self.jit.pending_err = None;
6165                        self.jit.counters.deopt += 1;
6166                        // P12-S4-step4b-C-2 — unwind any helper-pushed
6167                        // inlined frames before the interpreter resumes.
6168                        // Don't restore reg_state — the trace's partial
6169                        // writes are discarded; interp re-executes from
6170                        // the original `pc`.
6171                        while self.frames.len() > pre_frames {
6172                            frames_pop_sync(&mut self.frames, &mut self.frames_top);
6173                        }
6174                    } else {
6175                        // Restore each slot using the trace's
6176                        // exit-tag analysis (see ExitTag docs).
6177                        // P12-S4-step4b-C-2 — decode the IR's
6178                        // side-exit shape. Upper 32 bits = (site_idx
6179                        // + 1) for inline cmp side-exits, 0 for
6180                        // legacy clean-tail / non-inline exits.
6181                        // P15-A v2-C-A0 — decode lives in
6182                        // `crate::jit::trace::decode_exit_shape` so
6183                        // v2-C-A3 can reuse it with the SIDE TRACE's
6184                        // shape inputs when the sentinel bit
6185                        // (v2-C-A2) is set on `raw_ret`.
6186                        let raw_ret = continuation_pc as u64;
6187                        // P15-A v2-C-A3 — side-trace return decode.
6188                        // Bit 63 of `raw_ret` is the side-trace
6189                        // marker the parent's IR OR'd in when it
6190                        // tail-called into a wired child trace.
6191                        // Bits 56..=62 carry the sentinel code (the
6192                        // cache key into the parent's
6193                        // `side_trace_cache`); bits 0..=55 are the
6194                        // child's own return value (encoded site or
6195                        // plain cont_pc) which we MUST decode using
6196                        // the CHILD's per_exit_inline / per_exit_tags
6197                        // / exit_tags / exit_hit_counts — not the
6198                        // parent's. The dispatcher snapshot read
6199                        // above holds the parent's shapes; when bit
6200                        // 63 is set we re-fetch the child's via the
6201                        // sentinel-keyed cache.
6202                        let from_side_trace = (raw_ret >> 63) & 1 == 1;
6203                        let (
6204                            decode_inline,
6205                            decode_tags,
6206                            decode_exit_tags,
6207                            decode_hit_counts,
6208                            decode_body,
6209                        ) = if from_side_trace {
6210                            let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6211                            let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6212                            let traces = cl.proto.traces.borrow();
6213                            let child_idx = traces
6214                                .iter()
6215                                .find(|t| t.head_pc == head_pc_val)
6216                                .and_then(|pct| {
6217                                    pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6218                                });
6219                            if let Some(idx) = child_idx
6220                                && let Some(child) = traces.get(idx as usize)
6221                            {
6222                                if crate::jit::trace::v2c_probe_enabled() {
6223                                    eprintln!(
6224                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6225                                        sentinel_code,
6226                                        body,
6227                                        idx,
6228                                        child.n_ops,
6229                                        child.head_pc,
6230                                        child.window_size,
6231                                        pc,
6232                                        window_size,
6233                                        child.dispatchable,
6234                                        child.is_inline_abort_close,
6235                                    );
6236                                }
6237                                (
6238                                    child.per_exit_inline.clone(),
6239                                    child.per_exit_tags.clone(),
6240                                    child.exit_tags.clone(),
6241                                    child.exit_hit_counts.clone(),
6242                                    body,
6243                                )
6244                            } else {
6245                                if crate::jit::trace::v2c_probe_enabled() {
6246                                    eprintln!(
6247                                        "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6248                                        sentinel_code, body,
6249                                    );
6250                                }
6251                                // Cache miss — fall back to parent
6252                                // shapes with the body bits. Best-
6253                                // effort; the trace_side_trace_
6254                                // shape_mismatch_count records this
6255                                // path indirectly (close-handler
6256                                // skips wiring on mismatch so we
6257                                // shouldn't reach here when shape
6258                                // gate held).
6259                                (
6260                                    per_exit_inline.clone(),
6261                                    per_exit_tags.clone(),
6262                                    exit_tags.clone(),
6263                                    exit_hit_counts.clone(),
6264                                    body,
6265                                )
6266                            }
6267                        } else {
6268                            // P15-A v2-D — dispatcher-level side-trace
6269                            // invocation. Replaces v2-C's universal IR
6270                            // gate (`load + icmp + brif` at every
6271                            // emit_store_back callsite, which A6/A7
6272                            // measured as a net perf regression).
6273                            // A8 fast-path: skip the tentative decode +
6274                            // child lookup entirely when `has_any_side
6275                            // _wired == false` (the common case until
6276                            // the first side trace compiles for this
6277                            // parent). For fib_10_x10k and other tight
6278                            // short-trace workloads where most parent
6279                            // traces never get a wired child, this
6280                            // collapses the v2-D overhead to a single
6281                            // `Cell::get()` on the cold path.
6282                            // A8-revert: A8 had `parent_has_side` short-
6283                            // circuit + snapshot hoist; mini N=3 showed
6284                            // A8 lost the btrees_d8 1.02× win (dropped
6285                            // to 0.95×) WITHOUT helping fib_10 (same
6286                            // 0.86×). Drop A8 — accept the always-run
6287                            // v2-D path; the tentative decode + cell
6288                            // load is cheaper than the cost A8 added.
6289                            {
6290                                let tentative = crate::jit::trace::decode_exit_shape(
6291                                    raw_ret,
6292                                    per_exit_inline,
6293                                    per_exit_tags,
6294                                    exit_tags,
6295                                );
6296                                let tentative_exit_idx = tentative.exit_hit_idx;
6297                                let child_invoke = {
6298                                    let traces = cl.proto.traces.borrow();
6299                                    traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
6300                                        |pct| {
6301                                            let cell =
6302                                                pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
6303                                            let fn_ptr = cell.get();
6304                                            if fn_ptr.is_null() {
6305                                                return None;
6306                                            }
6307                                            traces
6308                                                .iter()
6309                                                .find(|t| {
6310                                                    t.entry as *const () as *const u8 == fn_ptr
6311                                                })
6312                                                .map(|child| {
6313                                                    (
6314                                                        child.entry,
6315                                                        child.per_exit_inline.clone(),
6316                                                        child.per_exit_tags.clone(),
6317                                                        child.exit_tags.clone(),
6318                                                        child.exit_hit_counts.clone(),
6319                                                    )
6320                                                })
6321                                        },
6322                                    )
6323                                };
6324                                if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
6325                                    let child_raw_ret = {
6326                                        // v1.1 A1 Session A — chunk_compiler.enter
6327                                        // (side-trace entry).
6328                                        let vm_ptr: *mut Vm = self;
6329                                        let _guard =
6330                                            self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6331                                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6332                                        unsafe { cent(reg_state.as_mut_ptr()) }
6333                                    };
6334                                    (cpi, cpt, cet, chc, child_raw_ret as u64)
6335                                } else {
6336                                    (
6337                                        per_exit_inline.clone(),
6338                                        per_exit_tags.clone(),
6339                                        exit_tags.clone(),
6340                                        exit_hit_counts.clone(),
6341                                        raw_ret,
6342                                    )
6343                                }
6344                            }
6345                        };
6346                        let decoded = crate::jit::trace::decode_exit_shape(
6347                            decode_body,
6348                            &decode_inline,
6349                            &decode_tags,
6350                            &decode_exit_tags,
6351                        );
6352                        let site_id = decoded.site_id;
6353                        let cont_pc = decoded.cont_pc;
6354                        let exit_hit_idx = decoded.exit_hit_idx;
6355                        let exit_tags_for_pc = decoded.exit_tags_for_pc;
6356                        // P15-A v2-C-A3 — for side-trace returns
6357                        // force using_global_exit_tags=false so the
6358                        // restore loop always takes the per-tag slow
6359                        // path (the child's global_tag_res_kind
6360                        // classification isn't plumbed through yet
6361                        // — TODO for a future polish step).
6362                        let using_global_exit_tags = if from_side_trace {
6363                            false
6364                        } else {
6365                            decoded.using_global_exit_tags
6366                        };
6367                        // P15-prep — increment the counter (saturate
6368                        // at u32::MAX to avoid wrap on long runs).
6369                        // P15-A v1 — track whether this increment is
6370                        // the one that crossed `HOTEXIT_THRESHOLD`
6371                        // (transition: previous v < threshold, new v
6372                        // == threshold). The side-trace start is
6373                        // deferred to just before `continue;` so
6374                        // vm.stack and frame.pc are fully restored
6375                        // (the snapshot reads post-restore values).
6376                        let mut side_trace_should_start = false;
6377                        // P15-A v2-C-A3 — for side-trace returns the
6378                        // counter to bump is the CHILD's (decoded
6379                        // shape lookup) — `exit_hit_idx` is into the
6380                        // decoded layout, so use the matching
6381                        // `decode_hit_counts`. For parent decode
6382                        // they're aliased (clone of the parent's
6383                        // own Rc).
6384                        if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
6385                            let v = c.get();
6386                            if v < u32::MAX {
6387                                c.set(v + 1);
6388                            }
6389                            if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
6390                                && self.jit.active_trace.is_none()
6391                                && self.jit.trace_enabled
6392                            {
6393                                side_trace_should_start = true;
6394                            }
6395                        }
6396                        // P12-S4-step4b-C-2 — at an inline cmp@d>0
6397                        // side-exit, the helper has pushed N frames on
6398                        // top of the trace head's frame and
6399                        // `exit_tags_for_pc.len()` covers the full
6400                        // window (caller + each inlined frame's
6401                        // window). Slots beyond `max_stack` belong to
6402                        // an inlined frame: their `Untouched` entries
6403                        // default to Nil (no entry-tag fallback —
6404                        // marshal-in only captured caller slots) and
6405                        // we write to interp stack at `base + i` which
6406                        // mirrors `op_offsets`-derived layout.
6407                        let slot_count = exit_tags_for_pc.len();
6408                        // P12-S4-step4b-C-2 — the helper only extends
6409                        // vm.stack up to the deepest pushed frame's
6410                        // window, but the exit_tags snapshot covers
6411                        // the trace's full `window_size` (which
6412                        // includes depth-N+1 scratch slots that the
6413                        // trace's IR may have written without a
6414                        // matching pushed frame). Extend with Nil so
6415                        // the write at the tail doesn't panic; these
6416                        // slots get overwritten by the writeback loop
6417                        // and won't leak meaningful data past the
6418                        // pushed frames' R[0..max_stack) windows.
6419                        if self.stack.len() < base_us + slot_count {
6420                            self.stack
6421                                .resize(base_us + slot_count, crate::runtime::Value::Nil);
6422                        }
6423                        // P13-S13-E — fast-path restore loop. When
6424                        // we landed on the global `exit_tags`,
6425                        // dispatch on the compile-time
6426                        // classification: skip the loop entirely
6427                        // for `AllUntouched`, do a tag-free
6428                        // `Value::Int(...)` write per slot for
6429                        // `AllInt`, otherwise fall through to the
6430                        // general match-arm loop. site_id > 0
6431                        // (inline frame mat) and per_exit_tags
6432                        // hits always take the general path —
6433                        // their per-side-exit shapes aren't
6434                        // pre-classified yet.
6435                        let fast_path_taken = if using_global_exit_tags {
6436                            match global_tag_res_kind {
6437                                crate::jit::trace::TagResKind::AllUntouched => {
6438                                    // No-op: vm.stack already
6439                                    // matches the trace's post-
6440                                    // entry state for these
6441                                    // slots (entry values not
6442                                    // overridden, or already
6443                                    // spilled by helpers).
6444                                    true
6445                                }
6446                                crate::jit::trace::TagResKind::AllInt => {
6447                                    for i in 0..slot_count {
6448                                        self.stack[base_us + i] =
6449                                            crate::runtime::Value::Int(reg_state[i]);
6450                                    }
6451                                    true
6452                                }
6453                                crate::jit::trace::TagResKind::Mixed => false,
6454                            }
6455                        } else {
6456                            false
6457                        };
6458                        if !fast_path_taken {
6459                            for i in 0..slot_count {
6460                                let tag = match exit_tags_for_pc[i] {
6461                                    crate::jit::trace::ExitTag::Untouched => {
6462                                        if i < max_stack {
6463                                            entry_tags[i]
6464                                        } else {
6465                                            crate::runtime::value::raw::NIL
6466                                        }
6467                                    }
6468                                    crate::jit::trace::ExitTag::Int => {
6469                                        crate::runtime::value::raw::INT
6470                                    }
6471                                    crate::jit::trace::ExitTag::Float => {
6472                                        crate::runtime::value::raw::FLOAT
6473                                    }
6474                                    crate::jit::trace::ExitTag::Table => {
6475                                        crate::runtime::value::raw::TABLE
6476                                    }
6477                                    crate::jit::trace::ExitTag::Closure => {
6478                                        crate::runtime::value::raw::CLOSURE
6479                                    }
6480                                    // P12-S6-A1 — trace actively wrote Nil
6481                                    // to this slot (e.g. via Op::LoadNil).
6482                                    // Restore as Nil regardless of the entry
6483                                    // tag, since the i64 payload is 0 and
6484                                    // packing as the entry tag (e.g. INT)
6485                                    // would mis-type the slot.
6486                                    crate::jit::trace::ExitTag::Nil => {
6487                                        crate::runtime::value::raw::NIL
6488                                    }
6489                                    // P12-S12-C v2 — trace wrote a Str ptr
6490                                    // to this slot (LoadK Str / Move from
6491                                    // Str / Concat result). Restore as
6492                                    // Value::Str with raw bits round-
6493                                    // tripped.
6494                                    crate::jit::trace::ExitTag::Str => {
6495                                        crate::runtime::value::raw::STR
6496                                    }
6497                                };
6498                                // SAFETY: tag is from a verified slot
6499                                // (entry validated above) or pinned by
6500                                // the exit-tag analysis to INT/TABLE.
6501                                // The raw payload sits in reg_state[i].
6502                                // Stack was extended by the materialize
6503                                // helper for inline frames.
6504                                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6505                                self.stack[base_us + i] = unsafe {
6506                                    Value::pack(
6507                                        tag,
6508                                        crate::runtime::value::RawVal {
6509                                            zero: reg_state[i] as u64,
6510                                        },
6511                                    )
6512                                };
6513                            }
6514                        }
6515                        // P12-S4-step4b-C-2 — for non-inline exits the
6516                        // helper was never called (no metas chain for
6517                        // this cont_pc), so `frames.last()` is the
6518                        // trace head's frame and we set its pc to
6519                        // cont_pc as before. For inline exits the
6520                        // helper baked the side-exit PC into the
6521                        // innermost frame's `pc` at push time
6522                        // (chain.last().pc was overridden at emit),
6523                        // so this assignment to `frames.last_mut().pc
6524                        // = cont_pc` is a redundant-but-correct
6525                        // confirmation.
6526                        let _ = &per_exit_inline; // hold the Rc alive across dispatch
6527                        // P12-S4-step4b-C-2 — for inline side-exits the
6528                        // helper has pushed N frames on top. The trace
6529                        // head frame is at `pre_frames - 1`; set its
6530                        // pc to `head_resume_pc` so when the chain
6531                        // eventually pops back to it, interp resumes
6532                        // PAST the trace's depth-0 Op::Call instead of
6533                        // restarting from `head_pc` and re-triggering
6534                        // dispatch (infinite loop). The innermost
6535                        // (helper-pushed) frame already has its pc
6536                        // baked in at compile time, but we still
6537                        // assign `cont_pc` below for parity with the
6538                        // non-inline path (no-op).
6539                        if site_id > 0 {
6540                            let idx = (site_id - 1) as usize;
6541                            let head_resume_pc = decode_inline[idx].head_resume_pc;
6542                            if pre_frames > 0 {
6543                                if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
6544                                    f.pc = head_resume_pc;
6545                                }
6546                            }
6547                        }
6548                        let frames_len_now = self.frames.len();
6549                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6550                        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
6551                            CallFrame::Lua(fmut) => {
6552                                if crate::jit::trace::v2c_probe_enabled() {
6553                                    eprintln!(
6554                                        "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
6555                                        from_side_trace,
6556                                        raw_ret,
6557                                        fmut.pc,
6558                                        cont_pc,
6559                                        site_id,
6560                                        frames_len_now,
6561                                        pre_frames,
6562                                        max_stack,
6563                                    );
6564                                }
6565                                fmut.pc = cont_pc;
6566                            }
6567                            _ => unreachable!("Cont frame at trace dispatch"),
6568                        }
6569                        // P15-A v1 — deferred side-trace start. The
6570                        // increment block above flagged this exit's
6571                        // hit count crossing HOTEXIT_THRESHOLD; now
6572                        // that vm.stack is restored and frame.pc is
6573                        // settled, snapshot entry_tags from the
6574                        // resume frame's window and create the
6575                        // recorder. The recorder's first push fires
6576                        // on the next interp iteration at cont_pc.
6577                        //
6578                        // `head_proto` for the side trace = cl.proto
6579                        // (trace JIT only inlines self-recursive
6580                        // calls today, so cont_pc always lands in
6581                        // the same proto as the parent). Frame base
6582                        // is the resume frame (top of `self.frames`
6583                        // — inline-pushed frames moved this).
6584                        if side_trace_should_start {
6585                            let (resume_base, resume_proto) = match self.frames.last() {
6586                                Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
6587                                _ => (base_us, cl.proto),
6588                            };
6589                            let resume_max_stack = resume_proto.max_stack as usize;
6590                            let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
6591                            // Extend stack if cont_pc's frame window
6592                            // overhangs the current stack len (rare,
6593                            // but inline-pushed frame stack writes
6594                            // only covered the trace's writeback).
6595                            if self.stack.len() < resume_base + resume_max_stack {
6596                                self.stack.resize(
6597                                    resume_base + resume_max_stack,
6598                                    crate::runtime::Value::Nil,
6599                                );
6600                            }
6601                            for i in 0..resume_max_stack {
6602                                let (tag, _) = self.stack[resume_base + i].unpack();
6603                                side_entry_tags.push(tag);
6604                            }
6605                            self.jit.active_trace =
6606                                Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
6607                                    resume_proto,
6608                                    cont_pc,
6609                                    side_entry_tags,
6610                                    cl.proto,
6611                                    head_pc_val,
6612                                    exit_hit_idx,
6613                                )));
6614                            self.jit.recording_frame_base = self.frames.len() - 1;
6615                            self.jit.counters.side_trace_started += 1;
6616                        }
6617                        // P13-S13-D — put the dispatch buffers back
6618                        // before the `continue;` so the next
6619                        // dispatch picks up the same allocation.
6620                        self.jit.reg_state_buf = reg_state;
6621                        self.jit.entry_tags_buf = entry_tags;
6622                        continue;
6623                    }
6624                }
6625                // P13-S13-D — !dispatch_ok / deopt path / non-cont
6626                // exit also restore the buffers before falling
6627                // through to the interp.
6628                self.jit.reg_state_buf = reg_state;
6629                self.jit.entry_tags_buf = entry_tags;
6630            }
6631
6632            // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
6633            // hook code that consults `currentpc = savedpc - 1` lands on the
6634            // instruction now executing. luna mirrors that by advancing
6635            // `f.pc` to `pc + 1` before the hook block — local_at /
6636            // getinfo / line attribution all read f.pc, and the existing
6637            // `pc - 1` convention in those helpers then yields the current
6638            // instruction's pc (db.lua :696: local `A` visible at the
6639            // chunk's return line once OP_CLOSURE has advanced pc).
6640            //
6641            // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
6642            // (cont frames drained above) so the and_then/Option layers are
6643            // dead weight.
6644            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6645            match unsafe { self.frames.last_mut().unwrap_unchecked() } {
6646                CallFrame::Lua(fmut) => fmut.pc = pc + 1,
6647                _ => unreachable!("Cont frame at pc bump"),
6648            }
6649
6650            // count + line hooks (PUC traceexec): before executing the
6651            // instruction. Skipped while the hook itself runs.
6652            // (Parens here are load-bearing — without them `&&` binds tighter
6653            // than `||` and the `!in_hook` guard only gates the rust-hook arm,
6654            // letting a Lua line hook recurse into itself → stack overflow
6655            // on db.lua line-hook assertions. Matches the `hook_call_with` /
6656            // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
6657            if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
6658                let lines = &cl.proto.lines;
6659                let cur_line = if lines.is_empty() {
6660                    None
6661                } else {
6662                    Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
6663                };
6664                // count hook: fire every `count_base` instructions
6665                if self.hook.count {
6666                    self.hook.count_left -= 1;
6667                    if self.hook.count_left <= 0 {
6668                        self.hook.count_left = self.hook.count_base;
6669                        // hooked function is the running Lua frame: its frame
6670                        // is on the stack, so no synthetic C level is needed.
6671                        self.run_hook(b"count", cur_line, false)?;
6672                    }
6673                }
6674                // line hook: fire on a fresh frame, a backward jump (loop), or a
6675                // change of source line.
6676                if self.hook.line {
6677                    if lines.is_empty() {
6678                        // PUC: a stripped chunk has no line info, so
6679                        // `getfuncline` returns -1. The line hook still fires
6680                        // on the first instruction of the new frame (where
6681                        // `npci <= oldpc` holds at oldpc=0), with the line
6682                        // pushed as `nil` instead of an integer (db.lua :1030
6683                        // "hook called without debug info for 1st instruction").
6684                        if oldpc == u32::MAX {
6685                            self.run_hook(b"line", None, false)?;
6686                            self.top_frame_mut().hook_oldpc = pc;
6687                        }
6688                    } else {
6689                        let newline = lines[(pc as usize).min(lines.len() - 1)];
6690                        // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
6691                        // on a backward jump (`pc < oldpc` — strict; an equal pc
6692                        // would re-fire the install-site after `oldpc = pc`),
6693                        // or when the source line changes.
6694                        let fire = oldpc == u32::MAX
6695                            || pc < oldpc
6696                            || newline != lines[(oldpc as usize).min(lines.len() - 1)];
6697                        if fire {
6698                            self.run_hook(b"line", Some(newline as i64), false)?;
6699                        }
6700                        self.top_frame_mut().hook_oldpc = pc;
6701                    }
6702                }
6703            }
6704
6705            match inst.op() {
6706                Op::Move => {
6707                    let v = self.r(base, inst.b());
6708                    self.set_r(base, inst.a(), v);
6709                }
6710                Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
6711                Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
6712                Op::LoadK => {
6713                    let v = cl.proto.consts[inst.bx() as usize];
6714                    self.set_r(base, inst.a(), v);
6715                }
6716                Op::LoadKx => {
6717                    let extra = cl.proto.code[self.pc_of_top() as usize];
6718                    self.bump_pc();
6719                    let v = cl.proto.consts[extra.ax() as usize];
6720                    self.set_r(base, inst.a(), v);
6721                }
6722                Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
6723                Op::LFalseSkip => {
6724                    self.set_r(base, inst.a(), Value::Bool(false));
6725                    self.bump_pc();
6726                }
6727                Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
6728                Op::LoadNil => {
6729                    let a = inst.a();
6730                    for i in 0..=inst.b() {
6731                        self.set_r(base, a + i, Value::Nil);
6732                    }
6733                }
6734                Op::GetUpval => {
6735                    let v = self.upval_get(cl, inst.b());
6736                    self.set_r(base, inst.a(), v);
6737                }
6738                Op::SetUpval => {
6739                    let v = self.r(base, inst.a());
6740                    self.upval_set(cl, inst.b(), v);
6741                }
6742                Op::GetTabUp => {
6743                    let t = self.upval_get(cl, inst.b());
6744                    let key = cl.proto.consts[inst.c() as usize];
6745                    self.op_index(t, key, base + inst.a())?;
6746                }
6747                Op::GetTable => {
6748                    let t = self.r(base, inst.b());
6749                    let key = self.r(base, inst.c());
6750                    self.op_index(t, key, base + inst.a())?;
6751                }
6752                Op::GetI => {
6753                    let t = self.r(base, inst.b());
6754                    self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
6755                }
6756                Op::GetField => {
6757                    let t = self.r(base, inst.b());
6758                    let key = cl.proto.consts[inst.c() as usize];
6759                    // v1.2 D4 A1 — fast path: known-Str const key + no
6760                    // metatable on the table → skip `op_index` /
6761                    // `index_step`'s MAX_TAG_LOOP setup and the outer
6762                    // `Value` match. Falls through to the slow path
6763                    // unchanged when either invariant breaks (so
6764                    // `__index` metamethods, non-Table receivers, and
6765                    // non-Str keys behave exactly as before).
6766                    if let Value::Table(tb) = t
6767                        && tb.metatable().is_none()
6768                        && let Value::Str(s) = key
6769                    {
6770                        let v = tb.get_str(s);
6771                        self.stack[(base + inst.a()) as usize] = v;
6772                    } else {
6773                        self.op_index(t, key, base + inst.a())?;
6774                    }
6775                }
6776                Op::SetTabUp => {
6777                    let t = self.upval_get(cl, inst.a());
6778                    let key = cl.proto.consts[inst.b() as usize];
6779                    let v = self.r(base, inst.c());
6780                    self.op_newindex(t, key, v)?;
6781                }
6782                Op::SetTable => {
6783                    let t = self.r(base, inst.a());
6784                    let key = self.r(base, inst.b());
6785                    let v = self.r(base, inst.c());
6786                    self.op_newindex(t, key, v)?;
6787                }
6788                Op::SetI => {
6789                    let t = self.r(base, inst.a());
6790                    let v = self.r(base, inst.c());
6791                    self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
6792                }
6793                Op::SetField => {
6794                    let t = self.r(base, inst.a());
6795                    let key = cl.proto.consts[inst.b() as usize];
6796                    let v = self.r(base, inst.c());
6797                    self.op_newindex(t, key, v)?;
6798                }
6799                Op::NewTable => {
6800                    let t = self.heap.new_table();
6801                    self.set_r(base, inst.a(), Value::Table(t));
6802                    self.maybe_collect_garbage(base + inst.a() + 1);
6803                }
6804                Op::SetList => {
6805                    let a = inst.a();
6806                    let abs_a = base + a;
6807                    let n = if inst.b() == 0 {
6808                        self.top - (abs_a + 1)
6809                    } else {
6810                        inst.b()
6811                    };
6812                    let offset = if inst.k() {
6813                        let extra = cl.proto.code[self.pc_of_top() as usize];
6814                        self.bump_pc();
6815                        extra.ax() as i64
6816                    } else {
6817                        inst.c() as i64
6818                    };
6819                    let Value::Table(t) = self.r(base, a) else {
6820                        unreachable!("SETLIST on non-table");
6821                    };
6822                    for i in 1..=n {
6823                        let v = self.r(base, a + i);
6824                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6825                        if let Err(TableError::Overflow) =
6826                            unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
6827                        {
6828                            return Err(self.rt_err("table overflow"));
6829                        }
6830                    }
6831                    // one barrier_back covers every store this op did — PUC's
6832                    // `luaC_barrierback_` once-per-table optimisation
6833                    self.heap
6834                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
6835                    // the element temps above the table are now consumed
6836                    self.maybe_collect_garbage(base + a + 1);
6837                }
6838                Op::SelfOp => {
6839                    let o = self.r(base, inst.b());
6840                    self.set_r(base, inst.a() + 1, o);
6841                    // PUC OP_SELF's C is a constant index when the k-flag is
6842                    // set; otherwise it points to a register that holds the
6843                    // (constant-loaded) key. luna's compiler falls back to the
6844                    // register form when the constant index exceeds OP_SELF's
6845                    // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
6846                    // a table with 250+ string keys, where "findfield" lands
6847                    // past const #255). The exec must honour the same split.
6848                    let key = if inst.k() {
6849                        cl.proto.consts[inst.c() as usize]
6850                    } else {
6851                        self.r(base, inst.c())
6852                    };
6853                    self.op_index(o, key, base + inst.a())?;
6854                }
6855                Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
6856                Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
6857                Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
6858                Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
6859                Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
6860                Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
6861                Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
6862                Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
6863                Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
6864                Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
6865                Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
6866                Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
6867                Op::Unm => {
6868                    let v = self.r(base, inst.b());
6869                    match coerce_num(v) {
6870                        Some(Num::Int(i)) => {
6871                            self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
6872                        }
6873                        Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
6874                        None => {
6875                            let mm = self.get_mm(v, Mm::Unm);
6876                            if mm.is_nil() {
6877                                return Err(self.type_err("perform arithmetic on", v));
6878                            }
6879                            let dst = base + inst.a();
6880                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
6881                        }
6882                    }
6883                }
6884                Op::BNot => {
6885                    let v = self.r(base, inst.b());
6886                    match coerce_num(v) {
6887                        Some(n) => {
6888                            let i = self.int_from_num(n)?;
6889                            self.set_r(base, inst.a(), Value::Int(!i));
6890                        }
6891                        None => {
6892                            let mm = self.get_mm(v, Mm::BNot);
6893                            if mm.is_nil() {
6894                                return Err(self.type_err("perform bitwise operation on", v));
6895                            }
6896                            let dst = base + inst.a();
6897                            self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
6898                        }
6899                    }
6900                }
6901                Op::Not => {
6902                    let v = self.r(base, inst.b());
6903                    self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
6904                }
6905                Op::Len => {
6906                    let v = self.r(base, inst.b());
6907                    match self.len_step(v)? {
6908                        MmOut::Done(r) => self.set_r(base, inst.a(), r),
6909                        MmOut::Mm { func, recv } => {
6910                            let dst = base + inst.a();
6911                            self.begin_meta_call(
6912                                func,
6913                                &[recv, recv],
6914                                MetaAction::Store { dst },
6915                                "len",
6916                            )?;
6917                        }
6918                        MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
6919                    }
6920                }
6921                Op::Concat => {
6922                    // right-associative fold over operands at base+a .. base+a+n,
6923                    // in place on the stack so a yielding __concat can suspend.
6924                    let a = inst.a();
6925                    let n = inst.b();
6926                    self.top = base + a + n;
6927                    self.concat_run(base + a)?;
6928                }
6929                Op::Close => {
6930                    // Yieldable: drive __close handlers through the
6931                    // interpreter loop so a coroutine.yield() inside a
6932                    // handler suspends cleanly (locals.lua block-end yield).
6933                    // `drive_close` parks the handler call at `self.top`, so
6934                    // raise `top` past this frame's full register window
6935                    // first — a goto out of a nested for-loop can fire
6936                    // OP_Close while `self.top` still sits at the inner
6937                    // body's working top, which would let `push_frame`'s
6938                    // wipe clobber the outer tbc slot before it could be
6939                    // closed (locals.lua:1219 nested-for goto regression).
6940                    self.top = self.top.max(base + cl.proto.max_stack as u32);
6941                    let _ =
6942                        self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
6943                }
6944                Op::Tbc => {
6945                    self.register_tbc(base + inst.a())?;
6946                }
6947                Op::Jmp => {
6948                    let off = inst.sj();
6949                    // P12-S1.B — trace JIT back-edge counter. A negative
6950                    // jump offset is a loop back-edge (the only canonical
6951                    // backward jumps the compiler emits — `while`, `for`,
6952                    // `repeat`). Tick the per-Proto counter and, once it
6953                    // exceeds the threshold, log a stub promotion that
6954                    // S1.C will turn into actual trace recording. The
6955                    // whole block is gated on `trace_jit_enabled` so
6956                    // existing benches see one branch-not-taken and no
6957                    // counter writes.
6958                    if self.jit.trace_enabled && off < 0 {
6959                        let proto = cl.proto;
6960                        let c = proto.trace_hot_count.get();
6961                        if c < u32::MAX / 2 {
6962                            proto.trace_hot_count.set(c + 1);
6963                        }
6964                        // P13-S13-H — relaxed back-edge trigger:
6965                        // `c >= THRESHOLD` (was `c == THRESHOLD`) so
6966                        // a missed crossing (active_trace busy with
6967                        // a call-trigger, or the recorder slot
6968                        // happened to be in use) doesn't permanently
6969                        // lock this back-edge target out. The
6970                        // `already_cached` short-circuit prevents
6971                        // duplicate recordings: once a trace is
6972                        // cached for this target, subsequent
6973                        // crossings skip the start. This pairs with
6974                        // S13-H's discard-on-partial-coverage close
6975                        // handling — when a short call-trigger is
6976                        // discarded, the back-edge can still find an
6977                        // open slot at the next iteration.
6978                        let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
6979                        // P13-S13-K — gave-up short-circuit. Skip
6980                        // the RefCell borrow + scan when the
6981                        // S13-I cap force-compiled a partial
6982                        // trace on this Proto.
6983                        let back_edge_already_cached = if proto.trace_gave_up.get() {
6984                            true
6985                        } else {
6986                            proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
6987                        };
6988                        if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
6989                            && self.jit.active_trace.is_none()
6990                            && !back_edge_already_cached
6991                        {
6992                            // Back-edge target = pc after `add_pc(off)`,
6993                            // i.e. current `pc + 1 + off` (the dispatch
6994                            // loop has already advanced f.pc to pc+1).
6995                            let target = (pc as i32 + 1 + off as i32).max(0) as u32;
6996                            // Snapshot per-slot Value tag at trace
6997                            // entry so the lowerer's kind tracker
6998                            // knows which arith path to lower
6999                            // (iadd vs fadd, etc.).
7000                            let max_stack = cl.proto.max_stack as usize;
7001                            let base_us = base as usize;
7002                            let mut entry_tags = Vec::with_capacity(max_stack);
7003                            for i in 0..max_stack {
7004                                let (tag, _) = self.stack[base_us + i].unpack();
7005                                entry_tags.push(tag);
7006                            }
7007                            self.jit.active_trace =
7008                                Some(Box::new(crate::jit::trace::TraceRecord::start(
7009                                    cl.proto, target, entry_tags, false,
7010                                )));
7011                            // P12-S4 — record the frame the trace
7012                            // started in. `self.frames.len() - 1`
7013                            // since we're inside the currently-running
7014                            // Lua frame's dispatch.
7015                            self.jit.recording_frame_base = self.frames.len() - 1;
7016                        }
7017                    }
7018                    self.add_pc(off);
7019                }
7020                Op::Eq => {
7021                    let l = self.r(base, inst.a());
7022                    let r = self.r(base, inst.b());
7023                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7024                        if (a == b) != inst.k() {
7025                            self.bump_pc();
7026                        }
7027                    } else {
7028                        let step = self.eq_step(l, r);
7029                        self.op_compare(step, l, r, inst.k(), "eq")?;
7030                    }
7031                }
7032                Op::EqK => {
7033                    let l = self.r(base, inst.a());
7034                    let r = cl.proto.consts[inst.b() as usize];
7035                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7036                        if (a == b) != inst.k() {
7037                            self.bump_pc();
7038                        }
7039                    } else {
7040                        let step = self.eq_step(l, r);
7041                        self.op_compare(step, l, r, inst.k(), "eq")?;
7042                    }
7043                }
7044                Op::Lt => {
7045                    let l = self.r(base, inst.a());
7046                    let r = self.r(base, inst.b());
7047                    // hot path: Int < Int — drops the MmOut + op_compare match
7048                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7049                        if (a < b) != inst.k() {
7050                            self.bump_pc();
7051                        }
7052                    } else {
7053                        let step = self.less_step(l, r, false)?;
7054                        self.op_compare(step, l, r, inst.k(), "lt")?;
7055                    }
7056                }
7057                Op::Le => {
7058                    let l = self.r(base, inst.a());
7059                    let r = self.r(base, inst.b());
7060                    if let (Value::Int(a), Value::Int(b)) = (l, r) {
7061                        if (a <= b) != inst.k() {
7062                            self.bump_pc();
7063                        }
7064                    } else {
7065                        let step = self.less_step(l, r, true)?;
7066                        self.op_compare(step, l, r, inst.k(), "le")?;
7067                    }
7068                }
7069                Op::Test => {
7070                    let cond = self.r(base, inst.a()).truthy();
7071                    self.cond_skip(cond, inst.k());
7072                }
7073                Op::TestSet => {
7074                    let v = self.r(base, inst.b());
7075                    if v.truthy() == inst.k() {
7076                        self.set_r(base, inst.a(), v);
7077                    } else {
7078                        self.bump_pc();
7079                    }
7080                }
7081                Op::Call => {
7082                    let abs = base + inst.a();
7083                    let nargs = if inst.b() == 0 {
7084                        None
7085                    } else {
7086                        Some(inst.b() - 1)
7087                    };
7088                    let wanted = inst.c() as i32 - 1;
7089                    self.begin_call(abs, nargs, wanted, false)?;
7090                }
7091                Op::TailCall => {
7092                    let fr = *self.top_frame();
7093                    let abs = base + inst.a();
7094                    let mut nargs = if inst.b() == 0 {
7095                        self.top - (abs + 1)
7096                    } else {
7097                        inst.b() - 1
7098                    };
7099                    // A tail call pops this frame before begin_call, so a
7100                    // non-callable target would lose its name/position. Report
7101                    // it now (PUC reads funcname from the still-current ci),
7102                    // while the frame is intact, for "(field 'x')"-style info.
7103                    let mut func = self.stack[abs as usize];
7104                    if !matches!(func, Value::Closure(_) | Value::Native(_))
7105                        && self.get_mm(func, Mm::Call).is_nil()
7106                    {
7107                        return Err(self.call_err(func));
7108                    }
7109                    // PUC `luaD_pretailcall` resolves a chain of `__call`
7110                    // metamethods *in place* before deciding whether to
7111                    // collapse this frame. Without that, each __call hop
7112                    // would push a fresh Lua frame and a 10000-deep
7113                    // tail-recursion through a 100-deep __call chain
7114                    // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7115                    // shift args right, install the handler at `abs`, retry.
7116                    // Chain depth limit matches the call-site `begin_call`
7117                    // version cap (5.5 calls.lua :223 — 15 max, then "too
7118                    // long"; 16th wrap fails the call). An infinite
7119                    // self-referential `__call` would otherwise spin.
7120                    let chain_cap = if self.version >= LuaVersion::Lua55 {
7121                        15
7122                    } else {
7123                        MAX_CCMT
7124                    };
7125                    let mut chain = 0u32;
7126                    while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7127                        let mm = self.get_mm(func, Mm::Call);
7128                        if mm.is_nil() {
7129                            return Err(self.call_err(func));
7130                        }
7131                        chain += 1;
7132                        if chain > chain_cap {
7133                            return Err(self.rt_err("'__call' chain too long"));
7134                        }
7135                        let end = (abs + 1 + nargs) as usize;
7136                        if self.stack.len() < end + 1 {
7137                            self.stack.resize(end + 1, Value::Nil);
7138                        }
7139                        for i in (0..=nargs).rev() {
7140                            self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7141                        }
7142                        self.stack[abs as usize] = mm;
7143                        nargs += 1;
7144                        self.top = abs + 1 + nargs;
7145                        func = mm;
7146                    }
7147                    // PUC's tail-call collapse is Lua→Lua only. A tail call to
7148                    // a C function runs the C function under the *current* Lua
7149                    // activation (no frame fold — a C frame has nothing to
7150                    // collapse into); after the C function returns, the
7151                    // calling Lua function returns those results normally.
7152                    // Mirror that: keep our Lua frame on the stack, call the
7153                    // target through `begin_call(abs, …)` as a regular call,
7154                    // and let the fallback `Op::Return` that the compiler
7155                    // emits right after `Op::TailCall` forward the results.
7156                    // 5.1 closure.lua :177's `return getfenv()` from inside
7157                    // foo needs level 1 to resolve to foo, not to the
7158                    // thread's globals fallback that happens when no Lua
7159                    // frame is on the stack.
7160                    let lua_target = matches!(func, Value::Closure(_));
7161                    if lua_target {
7162                        self.close_slots(fr.base, None)?;
7163                        for i in 0..=nargs {
7164                            self.stack[(fr.func_slot + i) as usize] =
7165                                self.stack[(abs + i) as usize];
7166                        }
7167                        // PUC `CIST_TAIL`: the new Lua activation inherits
7168                        // the popped frame's tailcalls count plus one for
7169                        // this collapse. 5.1 db.lua :372 hammers 30000
7170                        // recursive tail calls and expects to see the
7171                        // synthetic tail level for every one of them.
7172                        self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7173                        frames_pop_sync(&mut self.frames, &mut self.frames_top);
7174                        if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7175                            && self.frames.len() < entry_depth
7176                        {
7177                            // a native completed what was this function's result
7178                            return Ok(self.take_results(fr.func_slot));
7179                        }
7180                    } else {
7181                        // Native (or __call-bearing) target: regular call. The
7182                        // results land at `abs..self.top` and the next op (the
7183                        // fallback `Op::Return`) forwards them. `wanted = -1`
7184                        // because the caller will multret them through Return.
7185                        self.begin_call(abs, Some(nargs), -1, false)?;
7186                    }
7187                }
7188                Op::Return | Op::Return0 | Op::Return1 => {
7189                    let (abs_a, nret) = match inst.op() {
7190                        Op::Return0 => (base, 0),
7191                        Op::Return1 => (base + inst.a(), 1),
7192                        _ => {
7193                            let abs_a = base + inst.a();
7194                            let nret = if inst.b() == 0 {
7195                                self.top - abs_a
7196                            } else {
7197                                inst.b() - 1
7198                            };
7199                            (abs_a, nret)
7200                        }
7201                    };
7202                    // close before moving results: __close handlers run above
7203                    // the stack top, so the result region [abs_a..abs_a+nret)
7204                    // stays intact across any yields the close performs.
7205                    // Fixed-count returns may leave `self.top` below the last
7206                    // result slot (the compiler does not always re-bump it);
7207                    // raise it past the result region so `drive_close` parks
7208                    // the handler call *above* — landing at `self.top` would
7209                    // otherwise clobber a result with the handler closure.
7210                    self.top = self.top.max(abs_a + nret);
7211                    if let Some(vals) = self.begin_close(
7212                        base,
7213                        None,
7214                        AfterClose::Return {
7215                            abs_a,
7216                            nret,
7217                            from_native: false,
7218                        },
7219                        entry_depth,
7220                    )? {
7221                        return Ok(vals);
7222                    }
7223                }
7224                Op::ForPrep => self.for_prep(inst, base)?,
7225                Op::ForLoop => {
7226                    // P12 — trace JIT back-edge counter on the
7227                    // numeric-for back-edge. ForLoop is always at
7228                    // a back-edge position (when it continues);
7229                    // for the trace recorder we treat it as the
7230                    // close-detection equivalent of `Op::Jmp` with
7231                    // negative offset. Counter only ticks when the
7232                    // back-edge will actually fire (count > 0 in
7233                    // the 5.4+ Int form, comparable predicates in
7234                    // pre-5.3 / Float). The cheap check up front
7235                    // matches the for_loop helper's branch.
7236                    if self.jit.trace_enabled {
7237                        let a = inst.a();
7238                        let pre53 = self.version() <= LuaVersion::Lua53;
7239                        let take_back_edge =
7240                            match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
7241                                (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
7242                                    count > 0
7243                                }
7244                                (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
7245                                    let next = cur.wrapping_add(st);
7246                                    if st > 0 { next <= lim } else { next >= lim }
7247                                }
7248                                (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
7249                                    let next = cur + st;
7250                                    if st > 0.0 { next <= lim } else { next >= lim }
7251                                }
7252                                _ => false,
7253                            };
7254                        if take_back_edge {
7255                            let proto = cl.proto;
7256                            let c = proto.trace_hot_count.get();
7257                            if c < u32::MAX / 2 {
7258                                proto.trace_hot_count.set(c + 1);
7259                            }
7260                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7261                                && self.jit.active_trace.is_none()
7262                            {
7263                                // ForLoop's back-edge target = pc
7264                                // after `add_pc(-bx)` runs from the
7265                                // already-bumped f.pc (= pc + 1).
7266                                // So target = (pc + 1) - bx.
7267                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
7268                                let max_stack = cl.proto.max_stack as usize;
7269                                let base_us = base as usize;
7270                                let mut entry_tags = Vec::with_capacity(max_stack);
7271                                for i in 0..max_stack {
7272                                    let (tag, _) = self.stack[base_us + i].unpack();
7273                                    entry_tags.push(tag);
7274                                }
7275                                self.jit.active_trace =
7276                                    Some(Box::new(crate::jit::trace::TraceRecord::start(
7277                                        cl.proto, target, entry_tags, false,
7278                                    )));
7279                                // P12-S4 — record the frame the trace
7280                                // started in. The currently-running
7281                                // Lua frame is at len() - 1.
7282                                self.jit.recording_frame_base = self.frames.len() - 1;
7283                            }
7284                        }
7285                    }
7286                    self.for_loop(inst, base);
7287                }
7288                Op::TForPrep => {
7289                    // the 4th control slot is the iterator's closing value
7290                    self.register_tbc(base + inst.a() + 3)?;
7291                    self.add_pc(inst.bx() as i32);
7292                }
7293                Op::TForCall => {
7294                    let abs = base + inst.a();
7295                    let need = (abs + 7) as usize;
7296                    if self.stack.len() < need {
7297                        self.stack.resize(need, Value::Nil);
7298                    }
7299                    self.stack[(abs + 4) as usize] = self.stack[abs as usize];
7300                    self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
7301                    self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
7302                    let nvars = inst.c() as i32;
7303                    self.begin_call(abs + 4, Some(2), nvars, false)?;
7304                }
7305                Op::TForLoop => {
7306                    let a = inst.a();
7307                    let ctrl = self.r(base, a + 4);
7308                    if !ctrl.is_nil() {
7309                        // P12-S12-B v1 — trace JIT back-edge counter on
7310                        // generic-for back-edge. TForLoop sits at the
7311                        // tail of `for k,v in expr do ... end`; recorder
7312                        // treats it as the close-detection equivalent of
7313                        // a negative Op::Jmp. Gate on `take_back_edge`
7314                        // (= `ctrl != nil`) so empty-iter loops don't
7315                        // pollute hot_count. v1 only adds the trigger;
7316                        // whitelist + helper + emit live in v2.
7317                        if self.jit.trace_enabled {
7318                            let proto = cl.proto;
7319                            let c = proto.trace_hot_count.get();
7320                            if c < u32::MAX / 2 {
7321                                proto.trace_hot_count.set(c + 1);
7322                            }
7323                            if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7324                                && self.jit.active_trace.is_none()
7325                            {
7326                                // TForLoop back-edge target = pc after
7327                                // `add_pc(-bx)` runs from the already-
7328                                // bumped f.pc (= pc + 1). So target =
7329                                // (pc + 1) - bx, normally landing on
7330                                // body_top (the op right after TForPrep).
7331                                let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
7332                                let max_stack = cl.proto.max_stack as usize;
7333                                let base_us = base as usize;
7334                                let mut entry_tags = Vec::with_capacity(max_stack);
7335                                for i in 0..max_stack {
7336                                    let (tag, _) = self.stack[base_us + i].unpack();
7337                                    entry_tags.push(tag);
7338                                }
7339                                // P12-S12-B-v5 — snapshot the iter
7340                                // fn's address if Native, so the
7341                                // lowerer can specialise ipairs into
7342                                // inline Table aget IR.
7343                                let iter_ptr =
7344                                    if let Value::Native(n) = self.stack[base_us + a as usize] {
7345                                        Some(n.f as usize)
7346                                    } else {
7347                                        None
7348                                    };
7349                                // P12-S12-C v3 — snapshot R[A+5]'s
7350                                // tag (= current iter's val from
7351                                // the just-fired TForCall). The v5
7352                                // inline aget fast_blk emits a
7353                                // runtime guard against this tag;
7354                                // mixed-tag arrays deopt rather
7355                                // than producing garbage pointers
7356                                // through the v2 spill path.
7357                                let val_slot = base_us + (a as usize) + 5;
7358                                let val_tag = if val_slot < self.stack.len() {
7359                                    Some(self.stack[val_slot].unpack().0)
7360                                } else {
7361                                    None
7362                                };
7363                                let mut rec = crate::jit::trace::TraceRecord::start(
7364                                    cl.proto, target, entry_tags, false,
7365                                );
7366                                rec.tfor_iter_ptr = iter_ptr;
7367                                rec.tfor_val_tag = val_tag;
7368                                self.jit.active_trace = Some(Box::new(rec));
7369                                self.jit.recording_frame_base = self.frames.len() - 1;
7370                            }
7371                        }
7372                        self.set_r(base, a + 2, ctrl);
7373                        self.add_pc(-(inst.bx() as i32));
7374                    }
7375                }
7376                Op::Closure => {
7377                    let proto = cl.proto.protos[inst.bx() as usize];
7378                    let n_ups = proto.upvals.len();
7379                    // P11-S5d.M — build upvals on the stack for small
7380                    // closures, skipping the per-call Vec/Box alloc
7381                    // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
7382                    // = 2 covers most Lua source (1 captured local, or
7383                    // _ENV + a single capture). Beyond that, fall back
7384                    // to a heap Vec.
7385                    use crate::runtime::function::INLINE_UPVALS_N;
7386                    let mut stack_buf: [std::mem::MaybeUninit<
7387                        Gc<crate::runtime::function::Upvalue>,
7388                    >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
7389                    let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
7390                    let use_inline = n_ups <= INLINE_UPVALS_N;
7391                    if !use_inline {
7392                        heap_buf.reserve_exact(n_ups);
7393                    }
7394                    for (i, d) in proto.upvals.iter().enumerate() {
7395                        let uv = if d.in_stack {
7396                            self.find_or_create_upval(base + d.index as u32)
7397                        } else {
7398                            cl.upvals()[d.index as usize]
7399                        };
7400                        if use_inline {
7401                            stack_buf[i] = std::mem::MaybeUninit::new(uv);
7402                        } else {
7403                            heap_buf.push(uv);
7404                        }
7405                    }
7406                    // Tiny shim around the two paths so the 5.1 _ENV
7407                    // clone + cache check below see one uniform
7408                    // `&mut [Gc<Upvalue>]`. The stack_buf slice points
7409                    // into the local frame (still valid through the
7410                    // rest of this Op::Closure handler).
7411                    let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
7412                        // SAFETY: the first n_ups slots of stack_buf
7413                        // were initialised above; we hand out a slice
7414                        // covering exactly them.
7415                        unsafe {
7416                            std::slice::from_raw_parts_mut(
7417                                stack_buf.as_mut_ptr()
7418                                    as *mut Gc<crate::runtime::function::Upvalue>,
7419                                n_ups,
7420                            )
7421                        }
7422                    } else {
7423                        &mut heap_buf[..]
7424                    };
7425                    // PUC 5.1 had per-function environments: every Lua
7426                    // function carried its own `env` slot, snapshotted from
7427                    // the creating function's env at closure time, so a
7428                    // `setfenv` on one closure never bled into a sibling.
7429                    // luna models that by giving the 5.1 closure a *fresh*
7430                    // closed upvalue for whichever cell holds `_ENV`, seeded
7431                    // from the parent's current env value. Only that cell is
7432                    // cloned — every other upvalue keeps its open/shared
7433                    // identity (so e.g. `local function range(...) ...
7434                    // range(...) ... end` still sees its self-reference). 5.2+
7435                    // keeps the shared-upval model (and the proto cache that
7436                    // depends on it).
7437                    let v51 = self.version() <= LuaVersion::Lua51;
7438                    if v51 && proto.env_upval_idx != u8::MAX {
7439                        let i = proto.env_upval_idx as usize;
7440                        let cur = match ups[i].state() {
7441                            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
7442                            UpvalState::Closed(v) => v,
7443                        };
7444                        ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
7445                    }
7446                    let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
7447                    // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
7448                    // and reuses it when every fresh-upvalue binding still
7449                    // points to the same Upvalue object as the cached one.
7450                    // That keeps `function() return outer end` repeated in a
7451                    // loop comparing equal across iterations (the captured
7452                    // outer is a shared open upvalue), while `function()
7453                    // return loop_var end` gets a fresh closure each round
7454                    // because the loop var is re-created per iteration. PUC
7455                    // 5.1 predated the cache, and the per-closure `_ENV`
7456                    // clone above would defeat it anyway, so skip it.
7457                    let nc = if v51 {
7458                        self.heap.new_closure_inline(proto, ups_slice)
7459                    } else {
7460                        let cached = proto.cache.get().filter(|c| {
7461                            c.upvals().len() == ups_slice.len()
7462                                && c.upvals()
7463                                    .iter()
7464                                    .zip(ups_slice.iter())
7465                                    .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
7466                        });
7467                        match cached {
7468                            Some(c) => c,
7469                            None => {
7470                                let n = self.heap.new_closure_inline(proto, ups_slice);
7471                                proto.cache.set(Some(n));
7472                                n
7473                            }
7474                        }
7475                    };
7476                    self.set_r(base, inst.a(), Value::Closure(nc));
7477                    self.maybe_collect_garbage(base + inst.a() + 1);
7478                }
7479                Op::Vararg => {
7480                    let abs_a = base + inst.a();
7481                    let wanted = inst.c() as i32 - 1;
7482                    // A materialized named vararg lives in func_slot (its writes
7483                    // must be visible to `...`); otherwise spread the extra args
7484                    // straight off the stack at func_slot+1 .. +n_varargs.
7485                    let vt = match self.stack[func_slot as usize] {
7486                        Value::Table(t) => Some(t),
7487                        _ => None,
7488                    };
7489                    let n = match vt {
7490                        Some(t) => {
7491                            let n_key = Value::Str(self.heap.intern(b"n"));
7492                            // PUC getnumargs: a named vararg `t.n` set out of the
7493                            // integer range [0, INT_MAX/2] is rejected here
7494                            match t.get(n_key) {
7495                                Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
7496                                _ => return Err(self.rt_err("vararg table has no proper 'n'")),
7497                            }
7498                        }
7499                        None => n_varargs,
7500                    };
7501                    let count = if wanted < 0 { n } else { wanted as u32 };
7502                    let need = (abs_a + count) as usize;
7503                    if self.stack.len() < need {
7504                        self.stack.resize(need, Value::Nil);
7505                    }
7506                    for i in 0..count {
7507                        let v = if i >= n {
7508                            Value::Nil
7509                        } else if let Some(t) = vt {
7510                            t.get_int(i as i64 + 1)
7511                        } else {
7512                            self.stack[(func_slot + 1 + i) as usize]
7513                        };
7514                        self.stack[(abs_a + i) as usize] = v;
7515                    }
7516                    if wanted < 0 {
7517                        self.top = abs_a + count;
7518                    }
7519                }
7520                Op::GetVarg => {
7521                    // materialize the vararg table (PUC table.pack shape) from the
7522                    // stack varargs — used when the named vararg is written /
7523                    // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
7524                    // sees later writes) and in the local register R[A].
7525                    let n = n_varargs;
7526                    let t = self.heap.new_table();
7527                    {
7528                        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7529                        let tm = unsafe { t.as_mut() };
7530                        for i in 0..n {
7531                            let _ = tm.set_int(
7532                                &mut self.heap,
7533                                i as i64 + 1,
7534                                self.stack[(func_slot + 1 + i) as usize],
7535                            );
7536                        }
7537                    }
7538                    let n_key = Value::Str(self.heap.intern(b"n"));
7539                    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7540                    unsafe { t.as_mut() }
7541                        .set(&mut self.heap, n_key, Value::Int(n as i64))
7542                        .expect("'n' is a valid key");
7543                    // once-per-table barrier (mirror SETLIST): t is born BLACK
7544                    // during Propagate; the bulk inserts above don't barrier.
7545                    self.heap
7546                        .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7547                    self.stack[func_slot as usize] = Value::Table(t);
7548                    self.set_r(base, inst.a(), Value::Table(t));
7549                }
7550                Op::VargIdx => {
7551                    // R[A] := vararg[R[C]] without allocating: integer key in
7552                    // [1,n] → that vararg, "n" → the count, else nil.
7553                    let key = self.r(base, inst.c());
7554                    let n = n_varargs;
7555                    let v = match key {
7556                        Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
7557                            self.stack[(func_slot + k as u32) as usize]
7558                        }
7559                        Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
7560                            self.stack[(func_slot + f as u32) as usize]
7561                        }
7562                        Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
7563                        _ => Value::Nil,
7564                    };
7565                    self.set_r(base, inst.a(), v);
7566                }
7567                Op::ErrNNil => {
7568                    let v = self.r(base, inst.a());
7569                    if !matches!(v, Value::Nil) {
7570                        let bx = inst.bx();
7571                        let name = if bx == 0 {
7572                            "?".to_string()
7573                        } else {
7574                            match cl.proto.consts[(bx - 1) as usize] {
7575                                Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
7576                                _ => "?".to_string(),
7577                            }
7578                        };
7579                        return Err(self.rt_err(&format!("global '{name}' already defined")));
7580                    }
7581                }
7582                Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
7583            }
7584        }
7585    }
7586
7587    #[inline(always)]
7588    fn pc_of_top(&self) -> u32 {
7589        self.top_frame().pc
7590    }
7591
7592    #[inline(always)]
7593    fn bump_pc(&mut self) {
7594        // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
7595        // drained at dispatch loop head). Avoids the and_then/lua_mut Option
7596        // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
7597        // up over `fib_28`'s ~500k jumps.
7598        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7599        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7600            CallFrame::Lua(f) => f.pc += 1,
7601            _ => unreachable!("Cont frame at bump_pc"),
7602        }
7603    }
7604
7605    #[inline(always)]
7606    fn add_pc(&mut self, d: i32) {
7607        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7608        match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7609            CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
7610            _ => unreachable!("Cont frame at add_pc"),
7611        }
7612    }
7613
7614    /// PUC conditional-skip convention: the JMP that follows is executed when
7615    /// `cond == k`; otherwise it is skipped.
7616    #[inline(always)]
7617    fn cond_skip(&mut self, cond: bool, k: bool) {
7618        if cond != k {
7619            self.bump_pc();
7620        }
7621    }
7622
7623    // ---- indexing (with __index/__newindex chains) ----
7624
7625    /// The `#` length operation: string byte length, `__len` if present, else
7626    /// the raw table border. Returns the raw length value (may be non-integer
7627    /// when `__len` is exotic).
7628    pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
7629        match self.len_step(v)? {
7630            MmOut::Done(n) => Ok(n),
7631            // PUC calls unary metamethods with the operand twice
7632            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
7633            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7634        }
7635    }
7636
7637    /// Length fast path: a string's byte count or a table's raw border when no
7638    /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
7639    /// called with the operand twice. Errors for a non-table with no `__len`.
7640    fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
7641        match v {
7642            Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
7643            Value::Table(t) => {
7644                let mm = self.get_mm(v, Mm::Len);
7645                if mm.is_nil() {
7646                    Ok(MmOut::Done(Value::Int(t.len())))
7647                } else {
7648                    Ok(MmOut::Mm { func: mm, recv: v })
7649                }
7650            }
7651            _ => {
7652                let mm = self.get_mm(v, Mm::Len);
7653                if mm.is_nil() {
7654                    Err(self.type_err("get length of", v))
7655                } else {
7656                    Ok(MmOut::Mm { func: mm, recv: v })
7657                }
7658            }
7659        }
7660    }
7661
7662    /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
7663    /// value with no integer representation.
7664    pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
7665        match self.len_value(v)? {
7666            Value::Int(i) => Ok(i),
7667            Value::Float(f) => crate::runtime::value::f2i_exact(f)
7668                .ok_or_else(|| self.rt_err("object length is not an integer")),
7669            _ => Err(self.rt_err("object length is not an integer")),
7670        }
7671    }
7672
7673    pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
7674        match self.index_step(t, key)? {
7675            MmOut::Done(v) => Ok(v),
7676            MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
7677            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
7678        }
7679    }
7680
7681    /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
7682    /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
7683    /// are followed inline (no yield possible); only a function link can yield.
7684    fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
7685        let mut cur = t;
7686        for _ in 0..MAX_TAG_LOOP {
7687            let mm = match cur {
7688                Value::Table(tb) => {
7689                    let v = tb.get(key);
7690                    if !v.is_nil() {
7691                        return Ok(MmOut::Done(v));
7692                    }
7693                    let mm = self.get_mm(cur, Mm::Index);
7694                    if mm.is_nil() {
7695                        return Ok(MmOut::Done(Value::Nil));
7696                    }
7697                    mm
7698                }
7699                v => {
7700                    let mm = self.get_mm(v, Mm::Index);
7701                    if mm.is_nil() {
7702                        return Err(self.type_err("index", v));
7703                    }
7704                    mm
7705                }
7706            };
7707            match mm {
7708                Value::Closure(_) | Value::Native(_) => {
7709                    return Ok(MmOut::Mm {
7710                        func: mm,
7711                        recv: cur,
7712                    });
7713                }
7714                next => cur = next,
7715            }
7716        }
7717        Err(self.rt_err("'__index' chain too long; possible loop"))
7718    }
7719
7720    pub(crate) fn newindex_value(
7721        &mut self,
7722        t: Value,
7723        key: Value,
7724        v: Value,
7725    ) -> Result<(), LuaError> {
7726        match self.newindex_step(t, key, v)? {
7727            MmOut::Done(_) => Ok(()),
7728            MmOut::Mm { func, recv } => {
7729                self.call_value(func, &[recv, key, v])?;
7730                Ok(())
7731            }
7732            MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
7733        }
7734    }
7735
7736    /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
7737    /// performed inline (returning `Done`); only a function metamethod (`Mm`)
7738    /// needs an actual call — which the caller may run yieldably.
7739    fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
7740        let mut cur = t;
7741        for _ in 0..MAX_TAG_LOOP {
7742            let mm = match cur {
7743                Value::Table(tb) => {
7744                    if !tb.get(key).is_nil() {
7745                        self.raw_set(tb, key, v)?;
7746                        return Ok(MmOut::Done(Value::Nil));
7747                    }
7748                    let mm = self.get_mm(cur, Mm::NewIndex);
7749                    if mm.is_nil() {
7750                        self.raw_set(tb, key, v)?;
7751                        return Ok(MmOut::Done(Value::Nil));
7752                    }
7753                    mm
7754                }
7755                bad => {
7756                    let mm = self.get_mm(bad, Mm::NewIndex);
7757                    if mm.is_nil() {
7758                        return Err(self.type_err("index", bad));
7759                    }
7760                    mm
7761                }
7762            };
7763            match mm {
7764                Value::Closure(_) | Value::Native(_) => {
7765                    return Ok(MmOut::Mm {
7766                        func: mm,
7767                        recv: cur,
7768                    });
7769                }
7770                next => cur = next,
7771            }
7772        }
7773        Err(self.rt_err("'__newindex' chain too long; possible loop"))
7774    }
7775
7776    fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
7777        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7778        match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
7779            Ok(()) => {
7780                self.heap
7781                    .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7782                Ok(())
7783            }
7784            Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
7785            Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
7786            Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
7787            Err(TableError::InvalidNext) => unreachable!(),
7788        }
7789    }
7790
7791    /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
7792    /// the boolean result; `Mm` (when raw equality fails and both are tables
7793    /// with an `__eq`) carries the metamethod — called with `(l, r)`.
7794    fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
7795        if l.raw_eq(r) {
7796            return MmOut::Done(Value::Bool(true));
7797        }
7798        if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
7799            (l, r)
7800        {
7801            // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
7802            // (and earlier) required the two operands' metatables to expose a
7803            // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
7804            // metatable falls straight back to raw inequality. events.lua 5.1
7805            // :262 bakes this in.
7806            let mm = if self.version() <= LuaVersion::Lua51 {
7807                self.get_comp_mm(l, r, Mm::Eq)
7808            } else {
7809                let mut m = self.get_mm(l, Mm::Eq);
7810                if m.is_nil() {
7811                    m = self.get_mm(r, Mm::Eq);
7812                }
7813                m
7814            };
7815            if !mm.is_nil() {
7816                return MmOut::Mm { func: mm, recv: l };
7817            }
7818        }
7819        MmOut::Done(Value::Bool(false))
7820    }
7821
7822    // ---- arithmetic ----
7823
7824    #[inline(always)]
7825    fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
7826        let l = self.r(base, inst.b());
7827        let r = self.r(base, inst.c());
7828        // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
7829        // binary_trees all hammer these. Skipping coerce_num + the big
7830        // arith_fast match shaves several conditional moves per op.
7831        if let (Value::Int(a), Value::Int(b)) = (l, r) {
7832            let fast = match op {
7833                ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
7834                ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
7835                ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
7836                _ => None,
7837            };
7838            if let Some(v) = fast {
7839                self.set_r(base, inst.a(), v);
7840                return Ok(());
7841            }
7842        }
7843        // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
7844        // and any numeric workload with non-integer accumulators benefits.
7845        if let (Value::Float(a), Value::Float(b)) = (l, r) {
7846            let fast = match op {
7847                ArithOp::Add => Some(Value::Float(a + b)),
7848                ArithOp::Sub => Some(Value::Float(a - b)),
7849                ArithOp::Mul => Some(Value::Float(a * b)),
7850                ArithOp::Div => Some(Value::Float(a / b)),
7851                _ => None,
7852            };
7853            if let Some(v) = fast {
7854                self.set_r(base, inst.a(), v);
7855                return Ok(());
7856            }
7857        }
7858        match self.arith_fast(op, l, r)? {
7859            Some(v) => self.set_r(base, inst.a(), v),
7860            None => {
7861                let mm = self.arith_mm_func(op, l, r)?;
7862                let dst = base + inst.a();
7863                self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
7864            }
7865        }
7866        Ok(())
7867    }
7868
7869    /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
7870    /// directly, `Ok(None)` when a metamethod is required (the caller decides
7871    /// whether to call it synchronously or yieldably).
7872    fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
7873        use ArithOp::*;
7874        match op {
7875            BAnd | BOr | BXor | Shl | Shr => {
7876                // strings coerce for bitwise too (PUC tointegerns via cvt2num)
7877                match (coerce_num(l), coerce_num(r)) {
7878                    (Some(a), Some(b)) => {
7879                        let to_int = |n: Num| match n {
7880                            Num::Int(i) => Some(i),
7881                            Num::Float(f) => crate::runtime::value::f2i_exact(f),
7882                        };
7883                        let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
7884                            // PUC luaG_tointerror: name the offending operand
7885                            return Err(self.no_int_rep_err());
7886                        };
7887                        let v = match op {
7888                            BAnd => a & b,
7889                            BOr => a | b,
7890                            BXor => a ^ b,
7891                            Shl => shift_left(a, b),
7892                            Shr => shift_left(a, b.wrapping_neg()),
7893                            _ => unreachable!(),
7894                        };
7895                        return Ok(Some(Value::Int(v)));
7896                    }
7897                    _ => return Ok(None),
7898                }
7899            }
7900            _ => {}
7901        }
7902        let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
7903            (Some(a), Some(b)) => (a, b),
7904            _ => return Ok(None),
7905        };
7906        let v = match (op, ln, rn) {
7907            (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
7908            (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
7909            (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
7910            (IDiv, Num::Int(a), Num::Int(b)) => {
7911                if b == 0 {
7912                    return Err(self.rt_err("attempt to divide by zero"));
7913                }
7914                let mut q = a.wrapping_div(b);
7915                if (a ^ b) < 0 && q.wrapping_mul(b) != a {
7916                    q -= 1;
7917                }
7918                Value::Int(q)
7919            }
7920            (Mod, Num::Int(a), Num::Int(b)) => {
7921                if b == 0 {
7922                    return Err(self.rt_err("attempt to perform 'n%0'"));
7923                }
7924                let mut m = a.wrapping_rem(b);
7925                if m != 0 && (m ^ b) < 0 {
7926                    m += b;
7927                }
7928                Value::Int(m)
7929            }
7930            (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
7931            (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
7932            (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
7933            (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
7934            (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
7935            (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
7936            (Mod, a, b) => {
7937                let (x, y) = (a.as_f64(), b.as_f64());
7938                // PUC luai_nummod: correct fmod's sign without the `m*y`
7939                // product, which underflows to 0 for tiny denormals
7940                let mut m = x % y;
7941                if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
7942                    m += y;
7943                }
7944                Value::Float(m)
7945            }
7946            _ => unreachable!(),
7947        };
7948        Ok(Some(v))
7949    }
7950
7951    pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
7952        match v {
7953            Value::Int(i) => Ok(i),
7954            Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
7955                Some(i) => Ok(i),
7956                None => Err(self.rt_err("number has no integer representation")),
7957            },
7958            v => Err(self.type_err(what, v)),
7959        }
7960    }
7961
7962    fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
7963        match n {
7964            Num::Int(i) => Ok(i),
7965            Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
7966                Some(i) => Ok(i),
7967                None => Err(self.rt_err("number has no integer representation")),
7968            },
7969        }
7970    }
7971
7972    /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
7973    /// PUC type error when neither operand provides one.
7974    fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
7975        use ArithOp::*;
7976        let event = match op {
7977            Add => Mm::Add,
7978            Sub => Mm::Sub,
7979            Mul => Mm::Mul,
7980            Div => Mm::Div,
7981            Mod => Mm::Mod,
7982            Pow => Mm::Pow,
7983            IDiv => Mm::IDiv,
7984            BAnd => Mm::BAnd,
7985            BOr => Mm::BOr,
7986            BXor => Mm::BXor,
7987            Shl => Mm::Shl,
7988            Shr => Mm::Shr,
7989        };
7990        let mut mm = self.get_mm(l, event);
7991        if mm.is_nil() {
7992            mm = self.get_mm(r, event);
7993        }
7994        if mm.is_nil() {
7995            let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
7996                "perform bitwise operation on"
7997            } else {
7998                "perform arithmetic on"
7999            };
8000            let bad = if coerce_num(l).is_none() { l } else { r };
8001            return Err(self.type_err(what, bad));
8002        }
8003        Ok(mm)
8004    }
8005
8006    // ---- comparison ----
8007
8008    pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8009        match self.less_step(l, r, or_eq)? {
8010            MmOut::Done(v) => Ok(v.truthy()),
8011            MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8012            MmOut::CompareSynth { func } => {
8013                // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8014                // by library code (sort comparator etc.) — no yield expected
8015                // here (a yield would have hit `call_noyield`'s C boundary).
8016                Ok(!self.call_mm1(func, &[r, l])?.truthy())
8017            }
8018        }
8019    }
8020
8021    /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8022    /// carries the boolean result; `Mm` (for non-number/string operands) carries
8023    /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8024    /// neither operand provides one.
8025    fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8026        let b = match (l, r) {
8027            (Value::Int(a), Value::Int(b)) => {
8028                if or_eq {
8029                    a <= b
8030                } else {
8031                    a < b
8032                }
8033            }
8034            (Value::Float(a), Value::Float(b)) => {
8035                if or_eq {
8036                    a <= b
8037                } else {
8038                    a < b
8039                }
8040            }
8041            (Value::Int(a), Value::Float(b)) => {
8042                if or_eq {
8043                    int_le_float(a, b)
8044                } else {
8045                    int_lt_float(a, b)
8046                }
8047            }
8048            (Value::Float(a), Value::Int(b)) => {
8049                if a.is_nan() {
8050                    false
8051                } else if or_eq {
8052                    !int_lt_float(b, a)
8053                } else {
8054                    !int_le_float(b, a)
8055                }
8056            }
8057            (Value::Str(a), Value::Str(b)) => {
8058                let (a, b) = (a.as_bytes(), b.as_bytes());
8059                if or_eq { a <= b } else { a < b }
8060            }
8061            (l, r) => {
8062                let event = if or_eq { Mm::Le } else { Mm::Lt };
8063                // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8064                // too: both operands' metatables must expose the same
8065                // implementation for `__lt` / `__le` to fire. events.lua 5.1
8066                // :262 expects `c < d` (where `d` has no metatable) to error
8067                // with the default "attempt to compare two table values"
8068                // rather than running c's `__lt` blindly.
8069                let mm = if self.version() <= LuaVersion::Lua51 {
8070                    self.get_comp_mm(l, r, event)
8071                } else {
8072                    let mut m = self.get_mm(l, event);
8073                    if m.is_nil() {
8074                        m = self.get_mm(r, event);
8075                    }
8076                    m
8077                };
8078                // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8079                // operand carries `__le`. 5.4 dropped the synthesis (now
8080                // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8081                // on the synthesis — its metatable defines only `__lt`.
8082                // The fallback calls `__lt(r, l)` synchronously (the suite's
8083                // `__lt` doesn't yield) and negates the result; the yieldable
8084                // `__lt` path stays reserved for the explicit `<` operator.
8085                if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8086                    let lt = Mm::Lt;
8087                    let mut mm_lt = self.get_mm(l, lt);
8088                    if mm_lt.is_nil() {
8089                        mm_lt = self.get_mm(r, lt);
8090                    }
8091                    if !mm_lt.is_nil() {
8092                        return Ok(MmOut::CompareSynth { func: mm_lt });
8093                    }
8094                }
8095                if mm.is_nil() {
8096                    // PUC luaG_ordererror: "two X values" when the operand
8097                    // types match, "X with Y" otherwise (objtypename-aware).
8098                    let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8099                    return Err(self.rt_err(&if t1 == t2 {
8100                        format!("attempt to compare two {t1} values")
8101                    } else {
8102                        format!("attempt to compare {t1} with {t2}")
8103                    }));
8104                }
8105                return Ok(MmOut::Mm { func: mm, recv: l });
8106            }
8107        };
8108        Ok(MmOut::Done(Value::Bool(b)))
8109    }
8110
8111    // ---- numeric for ----
8112
8113    fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8114        let a = inst.a();
8115        let init = self.r(base, a);
8116        let limit = self.r(base, a + 1);
8117        let step = self.r(base, a + 2);
8118        let (Some(init_n), Some(limit_n), Some(step_n)) =
8119            (as_num(init), as_num(limit), as_num(step))
8120        else {
8121            // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8122            // PUC checks limit, then step, then initial value.
8123            let (what, bad) = if as_num(limit).is_none() {
8124                ("limit", limit)
8125            } else if as_num(step).is_none() {
8126                ("step", step)
8127            } else {
8128                ("initial value", init)
8129            };
8130            let tn = self.obj_typename(bad);
8131            return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8132        };
8133        // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8134        // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8135        // first test, so each successful iteration emits a backward `OP_FORLOOP`
8136        // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8137        // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8138        // distance in luna's encoding is `loop_pc - prep_pc`; firing
8139        // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8140        let pre53 = self.version() <= LuaVersion::Lua53;
8141        match (init_n, step_n) {
8142            (Num::Int(i0), Num::Int(st)) => {
8143                if st == 0 {
8144                    return Err(self.rt_err("'for' step is zero"));
8145                }
8146                if pre53 {
8147                    // PUC 5.3 `forlimit`: int limit passes through; float limit
8148                    // gets clamped to MIN/MAX with a `stopnow` flag set only
8149                    // when the clamp is unreachable (positive float with a
8150                    // negative step → limit=MAX, stopnow; negative float with
8151                    // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8152                    // `init = 0` so OP_FORLOOP's first test against the
8153                    // unreachable clamp fails cleanly. An ordinary in-range
8154                    // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8155                    // lets OP_FORLOOP's natural test reject the first step.
8156                    let (lim, stopnow) = match limit_n {
8157                        Num::Int(l) => (l, false),
8158                        Num::Float(f) => {
8159                            if f.is_nan() {
8160                                (0, true)
8161                            } else if f >= i64::MAX as f64 + 1.0 {
8162                                // beyond +MAX: unreachable for a decreasing loop
8163                                (i64::MAX, st < 0)
8164                            } else if f <= i64::MIN as f64 {
8165                                // beyond -MIN: unreachable for an increasing loop
8166                                (i64::MIN, st >= 0)
8167                            } else if st > 0 {
8168                                (f.floor() as i64, false)
8169                            } else {
8170                                (f.ceil() as i64, false)
8171                            }
8172                        }
8173                    };
8174                    let initv = if stopnow { 0 } else { i0 };
8175                    let pre = initv.wrapping_sub(st);
8176                    self.set_r(base, a, Value::Int(pre));
8177                    self.set_r(base, a + 1, Value::Int(lim));
8178                    self.set_r(base, a + 2, Value::Int(st));
8179                    self.add_pc(inst.bx() as i32 - 1);
8180                    return Ok(());
8181                }
8182                let (lim, empty) = int_for_limit(limit_n, i0, st);
8183                if empty {
8184                    self.add_pc(inst.bx() as i32);
8185                    return Ok(());
8186                }
8187                let count = if st > 0 {
8188                    (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8189                } else {
8190                    (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8191                };
8192                self.set_r(base, a, Value::Int(i0));
8193                self.set_r(base, a + 1, Value::Int(count as i64));
8194                self.set_r(base, a + 2, Value::Int(st));
8195                self.set_r(base, a + 3, Value::Int(i0));
8196            }
8197            _ => {
8198                let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8199                if st == 0.0 {
8200                    return Err(self.rt_err("'for' step is zero"));
8201                }
8202                if pre53 {
8203                    let pre = x0 - st;
8204                    self.set_r(base, a, Value::Float(pre));
8205                    self.set_r(base, a + 1, Value::Float(lim));
8206                    self.set_r(base, a + 2, Value::Float(st));
8207                    self.add_pc(inst.bx() as i32 - 1);
8208                    return Ok(());
8209                }
8210                let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8211                if !runs {
8212                    self.add_pc(inst.bx() as i32);
8213                    return Ok(());
8214                }
8215                self.set_r(base, a, Value::Float(x0));
8216                self.set_r(base, a + 1, Value::Float(lim));
8217                self.set_r(base, a + 2, Value::Float(st));
8218                self.set_r(base, a + 3, Value::Float(x0));
8219            }
8220        }
8221        Ok(())
8222    }
8223
8224    #[inline(always)]
8225    fn for_loop(&mut self, inst: Inst, base: u32) {
8226        let a = inst.a();
8227        // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
8228        // directly (R[a+1] holds the limit, *not* a remaining-count) so the
8229        // first iteration's test fires through the same backward-jump path as
8230        // every later iteration. 5.4+ switched to the count-based form luna
8231        // already uses for `Int`; the float branch was already PUC-3.x-style.
8232        let pre53 = self.version() <= LuaVersion::Lua53;
8233        match self.r(base, a) {
8234            Value::Int(cur) if pre53 => {
8235                let Value::Int(lim) = self.r(base, a + 1) else {
8236                    unreachable!()
8237                };
8238                let Value::Int(st) = self.r(base, a + 2) else {
8239                    unreachable!()
8240                };
8241                let next = cur.wrapping_add(st);
8242                let cont = if st > 0 { next <= lim } else { next >= lim };
8243                if cont {
8244                    self.set_r(base, a, Value::Int(next));
8245                    self.set_r(base, a + 3, Value::Int(next));
8246                    self.add_pc(-(inst.bx() as i32));
8247                }
8248            }
8249            Value::Int(cur) => {
8250                let Value::Int(count) = self.r(base, a + 1) else {
8251                    unreachable!()
8252                };
8253                if count > 0 {
8254                    let Value::Int(st) = self.r(base, a + 2) else {
8255                        unreachable!()
8256                    };
8257                    let next = cur.wrapping_add(st);
8258                    self.set_r(base, a, Value::Int(next));
8259                    self.set_r(base, a + 1, Value::Int(count - 1));
8260                    self.set_r(base, a + 3, Value::Int(next));
8261                    self.add_pc(-(inst.bx() as i32));
8262                }
8263            }
8264            Value::Float(cur) => {
8265                let Value::Float(lim) = self.r(base, a + 1) else {
8266                    unreachable!()
8267                };
8268                let Value::Float(st) = self.r(base, a + 2) else {
8269                    unreachable!()
8270                };
8271                let next = cur + st;
8272                let cont = if st > 0.0 { next <= lim } else { next >= lim };
8273                if cont {
8274                    self.set_r(base, a, Value::Float(next));
8275                    self.set_r(base, a + 3, Value::Float(next));
8276                    self.add_pc(-(inst.bx() as i32));
8277                }
8278            }
8279            _ => unreachable!("corrupt for-loop state"),
8280        }
8281    }
8282
8283    // ---- native helpers (used by builtins) ----
8284
8285    /// A native function's own captured upvalue (self lives at func_slot).
8286    ///
8287    /// Public so `native_typed` trampolines and embedders authoring
8288    /// stateful natives via `native_with(...)` can read their upvals.
8289    pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
8290        let Value::Native(nc) = self.stack[func_slot as usize] else {
8291            unreachable!("native frame without native closure");
8292        };
8293        nc.upvals[i]
8294    }
8295
8296    /// Number of upvalues captured by the native at `func_slot` (variadic
8297    /// captures such as the `io.lines` format list).
8298    pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
8299        let Value::Native(nc) = self.stack[func_slot as usize] else {
8300            unreachable!("native frame without native closure");
8301        };
8302        nc.upvals.len()
8303    }
8304
8305    /// Write a native function's own upvalue (stateful iterators).
8306    pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
8307        let Value::Native(nc) = self.stack[func_slot as usize] else {
8308            unreachable!("native frame without native closure");
8309        };
8310        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8311        unsafe { nc.as_mut() }.upvals[i] = v;
8312        // NativeClosure.upvals is traced as part of its Trace; a long-lived
8313        // stateful iterator closure (e.g. string.gmatch) sees many writes —
8314        // barrier_back once-and-done is cheaper than per-child forward.
8315        self.heap
8316            .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
8317    }
8318
8319    /// Read the i-th positional argument inside a `NativeFn` body
8320    /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
8321    /// matching PUC's "missing arg is nil" contract. Public so embedders
8322    /// can author their own natives.
8323    pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
8324        if i < nargs {
8325            self.stack[(func_slot + 1 + i) as usize]
8326        } else {
8327            Value::Nil
8328        }
8329    }
8330
8331    /// Push the return values of a `NativeFn` and return their count
8332    /// (analogous to pushing N values then `return N` from a C function).
8333    /// Public so embedders can author their own natives.
8334    pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
8335        let need = func_slot as usize + vals.len();
8336        if self.stack.len() < need {
8337            self.stack.resize(need, Value::Nil);
8338        }
8339        for (i, &v) in vals.iter().enumerate() {
8340            self.stack[func_slot as usize + i] = v;
8341        }
8342        vals.len() as u32
8343    }
8344
8345    /// Fast string concatenation of an adjacent pair, or `None` when a
8346    /// `__concat` metamethod is required.
8347    fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8348        let legacy = self.version <= crate::version::LuaVersion::Lua52;
8349        // Length-check fast paths for both string operands BEFORE the
8350        // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
8351        // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
8352        // overflow on the first pair that would exceed `INT_MAX` instead
8353        // of allocating multi-GB intermediates first.
8354        let max_str = i32::MAX as usize;
8355        if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
8356            let a_len = ls.as_bytes().len();
8357            let b_len = rs.as_bytes().len();
8358            let new_len = a_len.checked_add(b_len);
8359            if new_len.is_none() || new_len.unwrap() > max_str {
8360                return Err(self.rt_err("string length overflow"));
8361            }
8362        }
8363        match (concat_piece(l, legacy), concat_piece(r, legacy)) {
8364            (Some(a), Some(b)) => {
8365                // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
8366                // concat past it raises "string length overflow"
8367                // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
8368                // exactly this wall).
8369                let new_len = a.len().checked_add(b.len());
8370                if new_len.is_none() || new_len.unwrap() > max_str {
8371                    return Err(self.rt_err("string length overflow"));
8372                }
8373                let mut combined = a;
8374                combined.extend_from_slice(&b);
8375                Ok(Some(Value::Str(self.heap.intern(&combined))))
8376            }
8377            _ => Ok(None),
8378        }
8379    }
8380
8381    /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
8382    /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
8383    /// either finishing (result at `base_a`) or arming a yieldable `__concat`
8384    /// call — its `Meta` continuation re-enters here on the metamethod's return.
8385    fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
8386        // Sum the lengths of all all-Str operands BEFORE starting the
8387        // right-associative fold so a 129-operand `a..a..…` chain
8388        // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
8389        // not after dozens of multi-GB intermediate intern+hash rounds.
8390        // A non-Str operand falls through to the per-pair check.
8391        let max_str = i32::MAX as usize;
8392        let mut total: usize = 0;
8393        let mut all_str = true;
8394        for slot in base_a..self.top {
8395            match self.stack[slot as usize] {
8396                Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
8397                    Some(t) if t <= max_str => total = t,
8398                    _ => return Err(self.rt_err("string length overflow")),
8399                },
8400                _ => {
8401                    all_str = false;
8402                    break;
8403                }
8404            }
8405        }
8406        let _ = all_str; // discrimination already captured by early returns above
8407        while self.top.saturating_sub(base_a) >= 2 {
8408            let i = self.top - 1; // rightmost operand
8409            let x = self.stack[(i - 1) as usize];
8410            let y = self.stack[i as usize];
8411            match self.concat_pair(x, y)? {
8412                Some(s) => {
8413                    self.stack[(i - 1) as usize] = s;
8414                    self.top = i; // consumed y
8415                }
8416                None => {
8417                    let mut mm = self.get_mm(x, Mm::Concat);
8418                    if mm.is_nil() {
8419                        mm = self.get_mm(y, Mm::Concat);
8420                    }
8421                    if mm.is_nil() {
8422                        let legacy = self.version <= crate::version::LuaVersion::Lua52;
8423                        let bad = if concat_piece(x, legacy).is_none() {
8424                            x
8425                        } else {
8426                            y
8427                        };
8428                        return Err(self.type_err("concatenate", bad));
8429                    }
8430                    // result lands at i-1, dropping y (top→i); resume continues.
8431                    let dst = i - 1;
8432                    self.begin_meta_call(
8433                        mm,
8434                        &[x, y],
8435                        MetaAction::Concat { dst, base_a },
8436                        "concat",
8437                    )?;
8438                    return Ok(());
8439                }
8440            }
8441        }
8442        self.maybe_collect_garbage(base_a + 1);
8443        Ok(())
8444    }
8445
8446    /// tostring with __tostring / __name support.
8447    pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
8448        let mm = self.get_mm(v, Mm::ToString);
8449        if !mm.is_nil() {
8450            return match self.call_mm1(mm, &[v])? {
8451                Value::Str(s) => Ok(s.as_bytes().to_vec()),
8452                _ => Err(self.rt_err("'__tostring' must return a string")),
8453            };
8454        }
8455        if let Value::Table(t) = v
8456            && let Value::Str(name) = self.get_mm(v, Mm::Name)
8457        {
8458            let mut out = name.as_bytes().to_vec();
8459            out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
8460            return Ok(out);
8461        }
8462        Ok(self.tostring_basic(v))
8463    }
8464
8465    /// Basic tostring (no metamethods).
8466    pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
8467        match v {
8468            Value::Nil => b"nil".to_vec(),
8469            Value::Bool(true) => b"true".to_vec(),
8470            Value::Bool(false) => b"false".to_vec(),
8471            Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
8472            // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
8473            // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
8474            // distinguishable from `print(2)`. pm.lua :13 builds patterns by
8475            // concatenating these renderings.
8476            Value::Float(f) => {
8477                let legacy = self.version <= crate::version::LuaVersion::Lua52;
8478                numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
8479            }
8480            Value::Str(s) => s.as_bytes().to_vec(),
8481            Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
8482            Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
8483            Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
8484            Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
8485            // PUC names file handles `file (0x…)`; a bare userdata is
8486            // `userdata: 0x…`. The io library overrides this via __tostring.
8487            Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
8488            // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
8489            // (the "light" qualifier only appears in `luaL_typeerror`).
8490            Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
8491        }
8492    }
8493}
8494
8495#[derive(Clone, Copy, PartialEq, Eq)]
8496enum ArithOp {
8497    Add,
8498    Sub,
8499    Mul,
8500    Mod,
8501    Pow,
8502    Div,
8503    IDiv,
8504    BAnd,
8505    BOr,
8506    BXor,
8507    Shl,
8508    Shr,
8509}
8510
8511impl ArithOp {
8512    /// PUC metamethod event name (`__add` → "add" etc.) used by
8513    /// `debug.getinfo(level, "n")` inside a metamethod handler.
8514    fn mm_name(self) -> &'static str {
8515        match self {
8516            ArithOp::Add => "add",
8517            ArithOp::Sub => "sub",
8518            ArithOp::Mul => "mul",
8519            ArithOp::Mod => "mod",
8520            ArithOp::Pow => "pow",
8521            ArithOp::Div => "div",
8522            ArithOp::IDiv => "idiv",
8523            ArithOp::BAnd => "band",
8524            ArithOp::BOr => "bor",
8525            ArithOp::BXor => "bxor",
8526            ArithOp::Shl => "shl",
8527            ArithOp::Shr => "shr",
8528        }
8529    }
8530}
8531
8532fn as_num(v: Value) -> Option<Num> {
8533    match v {
8534        Value::Int(i) => Some(Num::Int(i)),
8535        Value::Float(f) => Some(Num::Float(f)),
8536        // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
8537        Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
8538        _ => None,
8539    }
8540}
8541
8542/// A concatenable operand's byte form (string, or a number coerced to its
8543/// string), or `None` when only a `__concat` metamethod can handle it.
8544/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
8545/// suffix on integer-valued floats) — see `num_to_string_for`.
8546fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
8547    match v {
8548        Value::Str(s) => Some(s.as_bytes().to_vec()),
8549        Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
8550        Value::Float(x) => {
8551            Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
8552        }
8553        _ => None,
8554    }
8555}
8556
8557/// Index into the per-basic-type metatable table for a non-table value
8558/// (None for tables, which carry their own metatable).
8559fn type_mt_slot(v: Value) -> Option<usize> {
8560    match v {
8561        Value::Nil => Some(0),
8562        Value::Bool(_) => Some(1),
8563        Value::Int(_) | Value::Float(_) => Some(2),
8564        Value::Str(_) => Some(3),
8565        Value::Closure(_) | Value::Native(_) => Some(4),
8566        // tables and full userdata carry their own metatable; threads and
8567        // light userdata have none (PUC keeps a shared per-type mt slot for
8568        // light, but luna doesn't expose it — no test gates on it yet).
8569        Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
8570    }
8571}
8572
8573/// Number, or string coerced to number (5.5 default string-arith coercion).
8574fn coerce_num(v: Value) -> Option<Num> {
8575    match v {
8576        Value::Int(i) => Some(Num::Int(i)),
8577        Value::Float(f) => Some(Num::Float(f)),
8578        Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
8579        _ => None,
8580    }
8581}
8582
8583/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
8584/// reverse direction.
8585fn shift_left(a: i64, b: i64) -> i64 {
8586    if b < 0 {
8587        if b <= -64 {
8588            0
8589        } else {
8590            ((a as u64) >> (-b as u32)) as i64
8591        }
8592    } else if b >= 64 {
8593        0
8594    } else {
8595        ((a as u64) << (b as u32)) as i64
8596    }
8597}
8598
8599/// i < f, exactly (PUC LTintfloat shape).
8600fn int_lt_float(i: i64, f: f64) -> bool {
8601    if f.is_nan() {
8602        return false;
8603    }
8604    if f >= 9_223_372_036_854_775_808.0 {
8605        return true;
8606    }
8607    if f < -9_223_372_036_854_775_808.0 {
8608        return false;
8609    }
8610    let ff = f.floor();
8611    let fi = ff as i64;
8612    if f == ff { i < fi } else { i <= fi }
8613}
8614
8615/// i <= f, exactly.
8616fn int_le_float(i: i64, f: f64) -> bool {
8617    if f.is_nan() {
8618        return false;
8619    }
8620    if f >= 9_223_372_036_854_775_808.0 {
8621        return true;
8622    }
8623    if f < -9_223_372_036_854_775_808.0 {
8624        return false;
8625    }
8626    i <= f.floor() as i64
8627}
8628
8629/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
8630/// (clipped limit, loop-is-empty).
8631fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
8632    match limit {
8633        Num::Int(l) => {
8634            let empty = if step > 0 { init > l } else { init < l };
8635            (l, empty)
8636        }
8637        Num::Float(f) => {
8638            if f.is_nan() {
8639                return (0, true);
8640            }
8641            if step > 0 {
8642                if f >= 9_223_372_036_854_775_808.0 {
8643                    (i64::MAX, false)
8644                } else {
8645                    let l = f.floor();
8646                    if l < -9_223_372_036_854_775_808.0 {
8647                        (i64::MIN, true)
8648                    } else {
8649                        let li = l as i64;
8650                        (li, init > li)
8651                    }
8652                }
8653            } else if f <= -9_223_372_036_854_775_808.0 {
8654                (i64::MIN, false)
8655            } else {
8656                let l = f.ceil();
8657                if l >= 9_223_372_036_854_775_808.0 {
8658                    // PUC forlimit: a positive limit beyond the integer range
8659                    // is unreachable for a decreasing loop — empty.
8660                    (i64::MAX, true)
8661                } else {
8662                    let li = l as i64;
8663                    (li, init < li)
8664                }
8665            }
8666        }
8667    }
8668}
8669
8670/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
8671/// `@file` / `=name` markers in `source`).
8672fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
8673    // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8674    let b = unsafe { crate::runtime::string::bytes_of(p) };
8675    match b.first() {
8676        Some(b'@') | Some(b'=') => &b[1..],
8677        _ => b,
8678    }
8679}
8680
8681impl Vm {
8682    /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
8683    /// that called the current native. Returns (closure, current line,
8684    /// extra vararg count).
8685    /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
8686    /// function running at `level`, recovered from the caller's call
8687    /// instruction (PUC funcnamefromcode). None for the main chunk or a
8688    /// tail/anonymous call with no recoverable name.
8689    /// A debug-level position: either a real Lua frame (by index) or a synthetic
8690    /// C frame standing for a call_value boundary (metamethod / pcall / __close /
8691    /// coroutine body), which `debug.getinfo` and traceback report as "C".
8692    /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
8693    /// frame at `level`'s current pc, as (name, value). Locals are visited in
8694    /// registration order (start pc, then register) to match luaF_getlocalname.
8695    pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
8696        if n == 0 {
8697            return None;
8698        }
8699        let fi = match self.dbg_frame(level)? {
8700            DbgKind::Lua(fi) => fi,
8701            // Tail-call placeholder has no real frame backing it — no locals
8702            // exist to read or write here. PUC `findlocal` returns NULL on
8703            // a CIST_TAIL activation.
8704            DbgKind::Tail(_) => return None,
8705            // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
8706            // for slot `n` inside the argument window (db.lua :408-:413, and
8707            // the call/return hook reads of math.sin / select args via
8708            // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
8709            // meaningful for a C frame here.
8710            DbgKind::C(fi) => {
8711                if n < 1 {
8712                    return None;
8713                }
8714                let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
8715                if (n as u32) > nargs {
8716                    return None;
8717                }
8718                let slot = (func_slot + n as u32) as usize;
8719                let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
8720                return Some((self.temporary_locvar_name().to_string(), val));
8721            }
8722        };
8723        let f = self.frames[fi].lua()?;
8724        // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
8725        // is the first extra arg passed to the function (`...[1]`), `-2` the
8726        // second, etc. The 5.5 stack layout parks varargs in
8727        // [func_slot + 1, base), so the i-th is at `func_slot + i`.
8728        if n < 0 {
8729            let i = (-n) as u32;
8730            if i == 0 || i > f.n_varargs {
8731                return None;
8732            }
8733            let val = self
8734                .stack
8735                .get((f.func_slot + i) as usize)
8736                .copied()
8737                .unwrap_or(Value::Nil);
8738            return Some((self.vararg_locvar_name().to_string(), val));
8739        }
8740        let proto = f.closure.proto;
8741        // PUC's parser injects a hidden `(vararg table)` locvar for an
8742        // anonymous-vararg function (lparser.c new_localvarliteral), sitting
8743        // right after the fixed parameters (`numparams + 1`). Main chunks
8744        // and `(...t)` named-vararg funcs do NOT get one — gate on the
8745        // compiler-set flag, not on `is_vararg`. luna keeps user locals in
8746        // their declared registers (no shadow slot allocated), so we expose
8747        // that hidden index purely in this debug view.
8748        let num_params = proto.num_params as i64;
8749        let vararg_slot = if proto.has_vararg_table_pseudo {
8750            Some(num_params + 1)
8751        } else {
8752            None
8753        };
8754        if vararg_slot == Some(n) {
8755            return Some(("(vararg table)".to_string(), Value::Nil));
8756        }
8757        let pc = (f.pc as usize).saturating_sub(1);
8758        let mut active: Vec<&crate::runtime::LocVar> = proto
8759            .locvars
8760            .iter()
8761            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
8762            .collect();
8763        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
8764        let mut idx: i64 = n - 1;
8765        if let Some(vs) = vararg_slot
8766            && n > vs
8767        {
8768            idx -= 1;
8769        }
8770        let idx = idx as usize;
8771        if let Some(lv) = active.get(idx) {
8772            let val = self
8773                .stack
8774                .get((f.base + lv.reg) as usize)
8775                .copied()
8776                .unwrap_or(Value::Nil);
8777            return Some((lv.name.to_string(), val));
8778        }
8779        // PUC `luaG_findlocal` fallback: `n` is past the named locals but
8780        // still inside the frame's live register window — report a
8781        // "(temporary)" (e.g. an arithmetic intermediate). The limit is
8782        // the next frame's func slot (`ci->next->func.p`) so the
8783        // temporary window stops where the callee's frame begins
8784        // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
8785        // an out-of-range slot).
8786        let limit = self
8787            .frames
8788            .get(fi + 1)
8789            .and_then(|cf| cf.lua())
8790            .map(|nf| nf.func_slot)
8791            .unwrap_or_else(|| self.top.max(f.base));
8792        let temp_reg = idx as u32;
8793        if f.base + temp_reg < limit {
8794            let val = self
8795                .stack
8796                .get((f.base + temp_reg) as usize)
8797                .copied()
8798                .unwrap_or(Value::Nil);
8799            return Some((self.lua_temporary_locvar_name().to_string(), val));
8800        }
8801        None
8802    }
8803
8804    /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
8805    /// the local / vararg name on success, `None` when the slot does not
8806    /// resolve. Mirrors `local_at`'s indexing exactly.
8807    pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
8808        if n == 0 {
8809            return None;
8810        }
8811        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
8812            return None;
8813        };
8814        let f = self.frames[fi].lua()?;
8815        if n < 0 {
8816            let i = (-n) as u32;
8817            if i == 0 || i > f.n_varargs {
8818                return None;
8819            }
8820            let slot = (f.func_slot + i) as usize;
8821            if let Some(s) = self.stack.get_mut(slot) {
8822                *s = v;
8823            }
8824            return Some(self.vararg_locvar_name().to_string());
8825        }
8826        let proto = f.closure.proto;
8827        let num_params = proto.num_params as i64;
8828        let vararg_slot = if proto.has_vararg_table_pseudo {
8829            Some(num_params + 1)
8830        } else {
8831            None
8832        };
8833        if vararg_slot == Some(n) {
8834            // hidden (vararg table) slot has no real storage — accept the
8835            // write as a no-op for PUC parity (db.lua doesn't write to it).
8836            return Some("(vararg table)".to_string());
8837        }
8838        let pc = (f.pc as usize).saturating_sub(1);
8839        let mut active: Vec<&crate::runtime::LocVar> = proto
8840            .locvars
8841            .iter()
8842            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
8843            .collect();
8844        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
8845        let mut idx: i64 = n - 1;
8846        if let Some(vs) = vararg_slot
8847            && n > vs
8848        {
8849            idx -= 1;
8850        }
8851        let idx = idx as usize;
8852        let (name, reg) = if let Some(lv) = active.get(idx) {
8853            (lv.name.to_string(), lv.reg)
8854        } else {
8855            // PUC `luaG_findlocal` fallback into the temporary window —
8856            // bounded by the next frame's func slot (see local_at).
8857            let limit = self
8858                .frames
8859                .get(fi + 1)
8860                .and_then(|cf| cf.lua())
8861                .map(|nf| nf.func_slot)
8862                .unwrap_or_else(|| self.top.max(f.base));
8863            let temp_reg = idx as u32;
8864            if f.base + temp_reg >= limit {
8865                return None;
8866            }
8867            (self.lua_temporary_locvar_name().to_string(), temp_reg)
8868        };
8869        let slot = (f.base + reg) as usize;
8870        if let Some(s) = self.stack.get_mut(slot) {
8871            *s = v;
8872        }
8873        Some(name)
8874    }
8875
8876    /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
8877    /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
8878    /// reads from `co.stack`. Returns `None` for out-of-range, for negative
8879    /// vararg indexing past `n_varargs`, or for a register past the live
8880    /// window. Naming follows the same priority as `local_at`: named locals,
8881    /// then `(vararg)` for negative `n`, then `(vararg table)` for the
8882    /// explicit-`(...)` pseudo, else `(temporary)` in the live register
8883    /// window.
8884    pub(crate) fn local_at_coro(
8885        &self,
8886        co: Gc<crate::runtime::Coro>,
8887        level: i64,
8888        n: i64,
8889    ) -> Option<(String, Value)> {
8890        if level < 1 || n == 0 {
8891            return None;
8892        }
8893        let frames = &co.frames;
8894        // Logical level: iterate Lua frames from the top.
8895        let lua_indices: Vec<usize> = (0..frames.len())
8896            .rev()
8897            .filter(|&i| frames[i].lua().is_some())
8898            .collect();
8899        let fi = *lua_indices.get((level - 1) as usize)?;
8900        let f = frames[fi].lua()?;
8901        if n < 0 {
8902            let i = (-n) as u32;
8903            if i == 0 || i > f.n_varargs {
8904                return None;
8905            }
8906            let val = co
8907                .stack
8908                .get((f.func_slot + i) as usize)
8909                .copied()
8910                .unwrap_or(Value::Nil);
8911            return Some((self.vararg_locvar_name().to_string(), val));
8912        }
8913        let proto = f.closure.proto;
8914        let num_params = proto.num_params as i64;
8915        let vararg_slot = if proto.has_vararg_table_pseudo {
8916            Some(num_params + 1)
8917        } else {
8918            None
8919        };
8920        if vararg_slot == Some(n) {
8921            return Some(("(vararg table)".to_string(), Value::Nil));
8922        }
8923        let pc = (f.pc as usize).saturating_sub(1);
8924        let mut active: Vec<&crate::runtime::LocVar> = proto
8925            .locvars
8926            .iter()
8927            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
8928            .collect();
8929        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
8930        let mut idx: i64 = n - 1;
8931        if let Some(vs) = vararg_slot
8932            && n > vs
8933        {
8934            idx -= 1;
8935        }
8936        let idx = idx as usize;
8937        if let Some(lv) = active.get(idx) {
8938            let val = co
8939                .stack
8940                .get((f.base + lv.reg) as usize)
8941                .copied()
8942                .unwrap_or(Value::Nil);
8943            return Some((lv.name.to_string(), val));
8944        }
8945        let limit = frames
8946            .get(fi + 1)
8947            .and_then(|cf| cf.lua())
8948            .map(|nf| nf.func_slot)
8949            .unwrap_or(co.top.max(f.base));
8950        let temp_reg = idx as u32;
8951        if f.base + temp_reg < limit {
8952            let val = co
8953                .stack
8954                .get((f.base + temp_reg) as usize)
8955                .copied()
8956                .unwrap_or(Value::Nil);
8957            return Some((self.lua_temporary_locvar_name().to_string(), val));
8958        }
8959        None
8960    }
8961
8962    /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
8963    /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
8964    pub(crate) fn local_set_coro(
8965        &mut self,
8966        co: Gc<crate::runtime::Coro>,
8967        level: i64,
8968        n: i64,
8969        v: Value,
8970    ) -> Option<String> {
8971        if level < 1 || n == 0 {
8972            return None;
8973        }
8974        let lua_indices: Vec<usize> = (0..co.frames.len())
8975            .rev()
8976            .filter(|&i| co.frames[i].lua().is_some())
8977            .collect();
8978        let fi = *lua_indices.get((level - 1) as usize)?;
8979        let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
8980            let f = co.frames[fi].lua()?;
8981            (
8982                f.func_slot,
8983                f.n_varargs,
8984                f.base,
8985                f.closure.proto,
8986                co.top.max(f.base),
8987                co.frames
8988                    .get(fi + 1)
8989                    .and_then(|cf| cf.lua())
8990                    .map(|nf| nf.func_slot),
8991            )
8992        };
8993        if n < 0 {
8994            let i = (-n) as u32;
8995            if i == 0 || i > n_varargs {
8996                return None;
8997            }
8998            let slot = (func_slot + i) as usize;
8999            // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9000            let stack = unsafe { &mut co.as_mut().stack };
9001            if let Some(s) = stack.get_mut(slot) {
9002                *s = v;
9003            }
9004            // co.stack values are traced — once-per-call barrier so propagate
9005            // sees the new value if co was already BLACK this cycle.
9006            self.heap
9007                .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9008            return Some(self.vararg_locvar_name().to_string());
9009        }
9010        let num_params = proto.num_params as i64;
9011        let vararg_slot = if proto.has_vararg_table_pseudo {
9012            Some(num_params + 1)
9013        } else {
9014            None
9015        };
9016        if vararg_slot == Some(n) {
9017            return Some("(vararg table)".to_string());
9018        }
9019        let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9020        let mut active: Vec<&crate::runtime::LocVar> = proto
9021            .locvars
9022            .iter()
9023            .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9024            .collect();
9025        active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9026        let mut idx: i64 = n - 1;
9027        if let Some(vs) = vararg_slot
9028            && n > vs
9029        {
9030            idx -= 1;
9031        }
9032        let idx = idx as usize;
9033        let (name, reg) = if let Some(lv) = active.get(idx) {
9034            (lv.name.to_string(), lv.reg)
9035        } else {
9036            let limit = next_func_slot.unwrap_or(top_for_temp);
9037            let temp_reg = idx as u32;
9038            if base + temp_reg >= limit {
9039                return None;
9040            }
9041            (self.lua_temporary_locvar_name().to_string(), temp_reg)
9042        };
9043        let slot = (base + reg) as usize;
9044        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9045        let stack = unsafe { &mut co.as_mut().stack };
9046        if let Some(s) = stack.get_mut(slot) {
9047            *s = v;
9048        }
9049        // co.stack values are traced — once-per-call barrier so propagate
9050        // sees the new value if co was already BLACK this cycle.
9051        self.heap
9052            .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9053        Some(name)
9054    }
9055
9056    /// Frame info for a level on a suspended coroutine (PUC
9057    /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9058    /// Returns the closure + currentline + extraargs + istailcall for the
9059    /// level-th Lua activation in `co.frames`. None if level overshoots.
9060    pub(crate) fn coro_frame_info(
9061        &self,
9062        co: Gc<crate::runtime::Coro>,
9063        level: i64,
9064    ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9065        if level < 1 {
9066            return None;
9067        }
9068        let lua_indices: Vec<usize> = (0..co.frames.len())
9069            .rev()
9070            .filter(|&i| co.frames[i].lua().is_some())
9071            .collect();
9072        let fi = *lua_indices.get((level - 1) as usize)?;
9073        let f = co.frames[fi].lua()?;
9074        let proto = f.closure.proto;
9075        let pc = (f.pc as usize)
9076            .saturating_sub(1)
9077            .min(proto.lines.len().saturating_sub(1));
9078        let line = proto.lines.get(pc).copied().unwrap_or(0);
9079        Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9080    }
9081
9082    /// Whether `level` resolves to any live activation (PUC lua_getstack).
9083    pub(crate) fn level_in_range(&self, level: i64) -> bool {
9084        self.dbg_frame(level).is_some()
9085    }
9086
9087    /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9088    /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9089    /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9090    /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9091    pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9092        if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9093            "(*vararg)"
9094        } else {
9095            "(vararg)"
9096        }
9097    }
9098
9099    /// PUC's debug-API placeholder for an unnamed temporary on a C
9100    /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9101    /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9102    /// their spelling.
9103    pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9104        if matches!(
9105            self.version,
9106            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9107        ) {
9108            // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9109            // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9110            // to `(C temporary)`.
9111            "(*temporary)"
9112        } else {
9113            "(C temporary)"
9114        }
9115    }
9116
9117    /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9118    /// (an arithmetic intermediate sitting past the last named local on a
9119    /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9120    /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9121    /// spelling.
9122    pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9123        if matches!(
9124            self.version,
9125            LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9126        ) {
9127            "(*temporary)"
9128        } else {
9129            "(temporary)"
9130        }
9131    }
9132
9133    /// The Lua closure running at `level` on the current thread, or `None`
9134    /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9135    /// need this to reach the function whose env they read or rewrite.
9136    pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9137        // `DbgKind::Tail` also falls into the else branch — a tail-call
9138        // placeholder has no closure of its own, so PUC's `lua_getstack` +
9139        // `getfunc` for that level returns no function, and `getfenv(level)`
9140        // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9141        let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9142            return None;
9143        };
9144        Some(self.frames[fi].lua()?.closure)
9145    }
9146
9147    pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9148        if level < 1 {
9149            return false;
9150        }
9151        let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9152        (level as usize) <= count
9153    }
9154
9155    pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9156        if level < 1 {
9157            return None;
9158        }
9159        // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9160        // activation counts as a level, and each Lua activation's
9161        // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9162        // dropped the synthetic shape: `istailcall` becomes a flag on the
9163        // real frame and Cont activations no longer count separately.
9164        // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9165        // `getinfo(2).func == g1` pins the 5.2+ shape.
9166        let v51 = self.version <= LuaVersion::Lua51;
9167        let mut lvl = level;
9168        for fi in (0..self.frames.len()).rev() {
9169            match &self.frames[fi] {
9170                CallFrame::Lua(f) => {
9171                    lvl -= 1;
9172                    if lvl == 0 {
9173                        return Some(DbgKind::Lua(fi));
9174                    }
9175                    if v51 {
9176                        // 5.1 reports one synthetic CIST_TAIL level per
9177                        // collapsed tail call (PUC `lua_getstack` subtracts
9178                        // `ci->u.l.tailcalls` from the remaining level).
9179                        for _ in 0..f.tailcalls {
9180                            lvl -= 1;
9181                            if lvl == 0 {
9182                                return Some(DbgKind::Tail(fi));
9183                            }
9184                        }
9185                    }
9186                    if f.from_c {
9187                        lvl -= 1;
9188                        if lvl == 0 {
9189                            return Some(DbgKind::C(fi));
9190                        }
9191                    }
9192                }
9193                CallFrame::Cont(_) => {
9194                    if !v51 {
9195                        continue;
9196                    }
9197                    lvl -= 1;
9198                    if lvl == 0 {
9199                        let parent = (0..fi)
9200                            .rev()
9201                            .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9202                        return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9203                    }
9204                }
9205            }
9206        }
9207        None
9208    }
9209
9210    pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9211        let f = self.frames[fi].lua()?;
9212        // metamethod handler frames carry the event tag (e.g. "close" for
9213        // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9214        if f.is_hook {
9215            return Some(("hook", "?".to_string()));
9216        }
9217        if let Some(tm) = f.tm {
9218            return Some(("metamethod", tm_debug_name(self.version, tm)));
9219        }
9220        // a frame entered across a C boundary has no naming call instruction
9221        if fi == 0 || f.from_c {
9222            return None;
9223        }
9224        // the caller's call instruction names this frame; a continuation frame
9225        // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
9226        // already short-circuits those.
9227        let caller = self.frames[fi - 1].lua()?;
9228        let caller_proto = caller.closure.proto;
9229        let p: &crate::runtime::Proto = &caller_proto;
9230        let call_pc = (caller.pc as usize).checked_sub(1)?;
9231        let instr = *p.code.get(call_pc)?;
9232        match instr.op() {
9233            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9234            Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9235            _ => None,
9236        }
9237    }
9238
9239    /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
9240    /// (PUC names a C function from the call instruction that invoked it). The
9241    /// native was called by the nearest Lua frame below `fi` (skipping pcall/
9242    /// xpcall continuations); that frame's call instruction names it.
9243    pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9244        // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
9245        // the synthetic C edge between the __gc finalizer (top Lua frame, has
9246        // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
9247        // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
9248        // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
9249        // sets the tag on the handler frame only, so level 2 there still
9250        // names the calling Lua frame's call instruction (5.5 locals.lua
9251        // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
9252        if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
9253            && fr.tm == Some("gc")
9254        {
9255            let name = tm_debug_name(self.version, "gc");
9256            return Some(("metamethod", name));
9257        }
9258        let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
9259        let caller = self.frames[caller_fi].lua()?;
9260        let p = &caller.closure.proto;
9261        let call_pc = (caller.pc as usize).checked_sub(1)?;
9262        let instr = *p.code.get(call_pc)?;
9263        match instr.op() {
9264            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9265            _ => None,
9266        }
9267    }
9268
9269    /// Native value currently sitting on the synthetic C edge identified by
9270    /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
9271    /// above `fi` (each one corresponds to one native pushing the hook) and
9272    /// indexes into `running_natives` from the top, also skipping the caller
9273    /// of `getinfo` itself (the native that is currently asking).
9274    /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
9275    /// expects the just-entered C function.
9276    pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
9277        let idx = self.c_frame_native_idx(fi)?;
9278        Some(Value::Native(self.running_natives[idx]))
9279    }
9280
9281    /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
9282    /// so `local_at` can index the native's argument window like PUC's
9283    /// `(C temporary)` path. Returns `None` when no matching native exists
9284    /// (e.g. the C edge corresponds to a non-native boundary).
9285    pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
9286        let idx = self.c_frame_native_idx(fi)?;
9287        self.running_native_slots.get(idx).copied()
9288    }
9289
9290    fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
9291        let n_above = self.frames[fi..]
9292            .iter()
9293            .filter_map(CallFrame::lua)
9294            .filter(|f| f.from_c)
9295            .count();
9296        if n_above == 0 {
9297            return None;
9298        }
9299        // running_natives.last() is the native currently executing (the one
9300        // that called getinfo). Pop it conceptually, then take the n_above-th
9301        // entry from the top of what remains.
9302        let nr = self.running_natives.len().checked_sub(1)?;
9303        nr.checked_sub(n_above)
9304    }
9305
9306    /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
9307    /// native whose function pointer matches `target`, and return its qualified
9308    /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
9309    /// `None` if no match is found. Used by `arg_error` when the running native
9310    /// was invoked from another native (PUC `ar.name == NULL` at level 0).
9311    pub(crate) fn pushglobalfuncname(
9312        &mut self,
9313        target: crate::runtime::value::NativeFn,
9314    ) -> Option<String> {
9315        let pkg_k = Value::Str(self.heap.intern(b"package"));
9316        let pkg = match self.globals().get(pkg_k) {
9317            Value::Table(t) => t,
9318            _ => return None,
9319        };
9320        let loaded_k = Value::Str(self.heap.intern(b"loaded"));
9321        let loaded = match pkg.get(loaded_k) {
9322            Value::Table(t) => t,
9323            _ => return None,
9324        };
9325        let matches = |v: Value| -> bool {
9326            matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
9327        };
9328        let mut k = Value::Nil;
9329        while let Ok(Some((nk, nv))) = loaded.next(k) {
9330            k = nk;
9331            let Value::Str(outer) = nk else { continue };
9332            let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
9333            if matches(nv) {
9334                return Some(if outer == "_G" { String::new() } else { outer });
9335            }
9336            if let Value::Table(inner_t) = nv {
9337                let mut k2 = Value::Nil;
9338                while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
9339                    k2 = nk2;
9340                    if matches(nv2)
9341                        && let Value::Str(inner) = nk2
9342                    {
9343                        let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
9344                        return Some(if outer == "_G" {
9345                            inner
9346                        } else {
9347                            format!("{outer}.{inner}")
9348                        });
9349                    }
9350                }
9351            }
9352        }
9353        None
9354    }
9355
9356    /// Name and namewhat of the native currently running on behalf of the top
9357    /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
9358    /// `luaL_argerror` rewrite a method call's self-argument error.
9359    pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
9360        let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
9361        let p = &caller.closure.proto;
9362        let call_pc = (caller.pc as usize).checked_sub(1)?;
9363        let instr = *p.code.get(call_pc)?;
9364        match instr.op() {
9365            Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9366            _ => None,
9367        }
9368    }
9369
9370    pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
9371        let f = self.frames[fi].lua().expect("Lua frame");
9372        let proto = f.closure.proto;
9373        let pc = (f.pc as usize)
9374            .saturating_sub(1)
9375            .min(proto.lines.len().saturating_sub(1));
9376        let line = proto.lines.get(pc).copied().unwrap_or(0);
9377        // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
9378        // (independent of any later write to a materialized vararg table's `n`).
9379        // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
9380        // any nonzero `tailcalls` count flips it true.
9381        (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
9382    }
9383
9384    /// Read an upvalue cell of a closure (debug.getupvalue).
9385    pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
9386        match cl.upvals()[idx].state() {
9387            UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
9388            UpvalState::Closed(v) => v,
9389        }
9390    }
9391
9392    /// Write an upvalue cell of a closure (debug.setupvalue).
9393    pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
9394        let uv = cl.upvals()[idx];
9395        match uv.state() {
9396            UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
9397            UpvalState::Closed(_) => {
9398                // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9399                unsafe { uv.as_mut() }.set_closed(v);
9400                self.heap
9401                    .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
9402            }
9403        }
9404    }
9405
9406    /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
9407    /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
9408    /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
9409    /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
9410    /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
9411    /// else `"function <src:line_defined>"` for an anonymous Lua function.
9412    /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
9413    /// Walks the coroutine's saved frames and prepends a synthetic C-level
9414    /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
9415    /// (its `resume_at` marker is set). `level` skips entries from the top
9416    /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
9417    /// frame; etc.). db.lua :764-:768 sample several levels.
9418    pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
9419        use crate::runtime::CoroStatus;
9420        const LEVELS1: usize = 10;
9421        const LEVELS2: usize = 11;
9422        #[derive(Clone, Copy)]
9423        enum VFrame<'a> {
9424            Lua(&'a crate::runtime::function::Frame),
9425            CPcall,
9426            CXpcall,
9427            CYield,
9428            /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
9429            /// call collapsed into the next Lua frame down the chain.
9430            Tail,
9431        }
9432        let v51 = self.version <= LuaVersion::Lua51;
9433        let mut visible: Vec<VFrame<'_>> = Vec::new();
9434        // PUC's level 0 entry on a suspended coroutine is the C call where it
9435        // paused — `coroutine.yield` for a yielded thread.
9436        if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
9437            visible.push(VFrame::CYield);
9438        }
9439        for cf in co.frames.iter().rev() {
9440            match cf {
9441                CallFrame::Lua(f) => {
9442                    visible.push(VFrame::Lua(f));
9443                    if v51 {
9444                        for _ in 0..f.tailcalls {
9445                            visible.push(VFrame::Tail);
9446                        }
9447                    }
9448                }
9449                CallFrame::Cont(nc) => match nc.kind {
9450                    ContKind::Pcall => visible.push(VFrame::CPcall),
9451                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
9452                    _ => {}
9453                },
9454            }
9455        }
9456        if level < 0 {
9457            level = 0;
9458        }
9459        if (level as usize) >= visible.len() {
9460            return Vec::new();
9461        }
9462        let visible = &visible[level as usize..];
9463        let total = visible.len();
9464        let mut out = Vec::new();
9465        // To name a Lua frame, PUC consults the caller's OP_CALL via
9466        // getobjname: find the index `fi` of the current frame in co.frames,
9467        // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
9468        let coro_frame_name = |frames: &[CallFrame],
9469                               target: &crate::runtime::function::Frame|
9470         -> Option<(&'static str, String)> {
9471            let fi = frames
9472                .iter()
9473                .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
9474            if fi == 0 || target.from_c {
9475                return None;
9476            }
9477            let caller = frames[fi - 1].lua()?;
9478            let p = &caller.closure.proto;
9479            let call_pc = (caller.pc as usize).checked_sub(1)?;
9480            let instr = *p.code.get(call_pc)?;
9481            match instr.op() {
9482                Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9483                Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9484                _ => None,
9485            }
9486        };
9487        let frames = &co.frames;
9488        let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
9489            VFrame::Lua(f) => {
9490                let proto = f.closure.proto;
9491                let src = chunk_display_name(proto.source.as_ptr());
9492                let pc = (f.pc as usize)
9493                    .saturating_sub(1)
9494                    .min(proto.lines.len().saturating_sub(1));
9495                let line = proto.lines.get(pc).copied().unwrap_or(0);
9496                out.extend_from_slice(b"\n\t");
9497                out.extend_from_slice(src);
9498                out.extend_from_slice(format!(":{line}: in ").as_bytes());
9499                if let Some((namewhat, name)) = coro_frame_name(frames, f) {
9500                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
9501                } else if proto.line_defined == 0 {
9502                    out.extend_from_slice(b"main chunk");
9503                } else {
9504                    out.extend_from_slice(
9505                        format!(
9506                            "function <{}:{}>",
9507                            String::from_utf8_lossy(src),
9508                            proto.line_defined
9509                        )
9510                        .as_bytes(),
9511                    );
9512                }
9513            }
9514            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
9515            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
9516            VFrame::CYield => {
9517                // PUC `pushglobalfuncname` reports `yield` as
9518                // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
9519                // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
9520                // `'yield'` (5.5 :841).
9521                let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
9522                if qualified {
9523                    out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
9524                } else {
9525                    out.extend_from_slice(b"\n\t[C]: in function 'yield'");
9526                }
9527            }
9528            VFrame::Tail => {
9529                // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
9530                // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
9531                // :403 asserts these appear once per collapsed tail call.
9532                out.extend_from_slice(b"\n\t(...tail calls...)");
9533            }
9534        };
9535        if total <= LEVELS1 + LEVELS2 {
9536            for &v in visible {
9537                emit(&mut out, v);
9538            }
9539        } else {
9540            for &v in &visible[..LEVELS1] {
9541                emit(&mut out, v);
9542            }
9543            let skip = total - LEVELS1 - LEVELS2;
9544            out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
9545            for &v in &visible[total - LEVELS2..] {
9546                emit(&mut out, v);
9547            }
9548        }
9549        out
9550    }
9551
9552    pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
9553        // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
9554        // (11) bottom frames; if there are more, the middle is collapsed into
9555        // a `"...\t(skipping N levels)"` marker. Without this, a stack-
9556        // overflow traceback would balloon to tens of megabytes (errors.lua's
9557        // stack-overflow test ran string.gmatch over the resulting buffer).
9558        const LEVELS1: usize = 10;
9559        const LEVELS2: usize = 11;
9560        // Collect visible frames in top-down order (deepest first). Both Lua
9561        // activations and pcall/xpcall continuations (which stand in for a
9562        // C-level pcall on the stack) are visible; PUC's traceback enumerates
9563        // both via lua_getstack. db.lua :715 expects "pcall" to appear.
9564        #[derive(Clone, Copy)]
9565        enum VFrame {
9566            Lua(usize),
9567            CPcall,
9568            CXpcall,
9569        }
9570        let mut visible: Vec<VFrame> = Vec::new();
9571        for (fi, cf) in self.frames.iter().enumerate().rev() {
9572            match cf {
9573                CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
9574                CallFrame::Cont(nc) => match nc.kind {
9575                    ContKind::Pcall => visible.push(VFrame::CPcall),
9576                    ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
9577                    _ => {}
9578                },
9579            }
9580        }
9581        // PUC `luaL_traceback` starts enumerating at the given `level` (in
9582        // terms of L1's CallInfo chain). For the running-thread case the C
9583        // frame for debug.traceback itself is level 0 and luna's `visible`
9584        // doesn't include it — so level=1 (PUC default) means "emit from the
9585        // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
9586        // the top. level<=0 emits nothing extra here (d_traceback handles the
9587        // "[C]: in function 'traceback'" prefix for level==0 separately).
9588        let skip = (level - 1).max(0) as usize;
9589        if skip >= visible.len() {
9590            return Vec::new();
9591        }
9592        let visible = &visible[skip..];
9593        let total = visible.len();
9594        let mut out = Vec::new();
9595        let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
9596            VFrame::Lua(fi) => {
9597                let f = this.frames[fi].lua().expect("Lua frame");
9598                let proto = f.closure.proto;
9599                let src = chunk_display_name(proto.source.as_ptr());
9600                let pc = (f.pc as usize)
9601                    .saturating_sub(1)
9602                    .min(proto.lines.len().saturating_sub(1));
9603                let line = proto.lines.get(pc).copied().unwrap_or(0);
9604                out.extend_from_slice(b"\n\t");
9605                out.extend_from_slice(src);
9606                out.extend_from_slice(format!(":{line}: in ").as_bytes());
9607                if let Some((namewhat, name)) = this.frame_name(fi) {
9608                    out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
9609                } else if proto.line_defined == 0 {
9610                    out.extend_from_slice(b"main chunk");
9611                } else {
9612                    out.extend_from_slice(
9613                        format!(
9614                            "function <{}:{}>",
9615                            String::from_utf8_lossy(src),
9616                            proto.line_defined
9617                        )
9618                        .as_bytes(),
9619                    );
9620                }
9621            }
9622            VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
9623            VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
9624        };
9625        if total <= LEVELS1 + LEVELS2 {
9626            for &v in visible {
9627                emit_frame(&mut out, v, self);
9628            }
9629        } else {
9630            for &v in &visible[..LEVELS1] {
9631                emit_frame(&mut out, v, self);
9632            }
9633            let dropped = total - LEVELS1 - LEVELS2;
9634            out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
9635            for &v in &visible[total - LEVELS2..] {
9636                emit_frame(&mut out, v, self);
9637            }
9638        }
9639        out
9640    }
9641}
9642
9643// ────────────────────────────────────────────────────────────────────
9644// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
9645//
9646// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
9647// trace-meta section after `vm.load`, resolves each entry's
9648// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
9649// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
9650// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
9651// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
9652// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
9653// mcode without further plumbing — same code path the runtime JIT
9654// uses.
9655//
9656// Why a separate impl block: keeps the AOT API surface (one fn) easy
9657// to locate when grep'ing for `install_aot_trace`, without dragging
9658// the 8500-line `impl Vm` block above.
9659// ────────────────────────────────────────────────────────────────────
9660
9661impl Vm {
9662    /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
9663    /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
9664    /// fires it at the trace's `head_pc`. This is the runtime install
9665    /// API the deploy-side `luna-runtime-helpers` resolver calls once
9666    /// per AOT-emitted trace meta entry, after looking up `proto` by
9667    /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
9668    ///
9669    /// # What this does
9670    ///
9671    /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
9672    /// The trace's `entry` fn ptr must already point at runnable
9673    /// machine code (the AOT linker resolved the symbol at link time;
9674    /// the deploy resolver passes the address verbatim).
9675    ///
9676    /// # What this does NOT do
9677    ///
9678    /// - **No deduplication.** Calling twice with the same `head_pc`
9679    ///   pushes two entries; the dispatcher's `find` will pick the
9680    ///   first match. The deploy resolver is responsible for not
9681    ///   double-installing.
9682    /// - **No invalidation of the runtime JIT cache.** If the runtime
9683    ///   JIT later records + compiles a trace for the same
9684    ///   `(proto, head_pc)`, both coexist on `proto.traces` and the
9685    ///   dispatcher's `find` picks whichever appears first. AOT
9686    ///   traces install before any runtime recording is possible
9687    ///   (resolver runs before `vm.load` returns its first closure),
9688    ///   so AOT traces win the race for the same site.
9689    /// - **No coverage gating.** AOT traces are trusted by
9690    ///   construction — they were validated at compile time. Setting
9691    ///   `dispatchable: false` on the input would silently disable
9692    ///   dispatch; the caller controls that flag.
9693    ///
9694    /// # Safety / soundness
9695    ///
9696    /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
9697    /// machine code). Soundness contract:
9698    ///
9699    /// - The fn pointer must remain valid for the `Vm`'s lifetime.
9700    ///   In the AOT-binary deploy shape this is trivially satisfied —
9701    ///   the fn lives in the binary's `.text`.
9702    /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
9703    ///   what the trace's IR actually compiled against; the dispatcher
9704    ///   uses them to marshal `reg_state` in and out without further
9705    ///   validation. A mismatch corrupts vm.stack.
9706    ///
9707    /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
9708    /// invariants hold; this fn is a plain push — no validation that
9709    /// would slow the dispatcher's hot path either.
9710    pub fn install_aot_trace(
9711        &mut self,
9712        proto: crate::runtime::Gc<crate::runtime::function::Proto>,
9713        trace: crate::jit::trace::CompiledTrace,
9714    ) {
9715        let _ = self; // resolver passes &mut Vm for symmetry with future
9716        // pending-install + hash-walk variants; nothing on `self` to
9717        // mutate today because the install target lives on the Proto.
9718        proto.traces.borrow_mut().push(std::rc::Rc::new(trace));
9719    }
9720
9721    /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
9722    /// reachable from `root` and return `(proto, stable_hash)` pairs
9723    /// for every Proto found. Used by the deploy-side resolver to
9724    /// match AOT-emitted `proto_hash` keys against the freshly
9725    /// `undump`'d chunk's protos.
9726    ///
9727    /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
9728    /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
9729    /// multiple nested closures (rare; the cache field would catch
9730    /// the closure-side dedup, not the Proto side) is reported once.
9731    ///
9732    /// # Why on `&Vm` and not a free fn
9733    ///
9734    /// Keeps the AOT install API discoverable on the Vm surface —
9735    /// `vm.collect_proto_hashes(root)` reads naturally next to
9736    /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
9737    /// any Vm field, so `&self` (read-only) is enough.
9738    pub fn collect_proto_hashes(
9739        &self,
9740        root: crate::runtime::Gc<crate::runtime::function::Proto>,
9741    ) -> Vec<(
9742        crate::runtime::Gc<crate::runtime::function::Proto>,
9743        [u8; 16],
9744    )> {
9745        let _ = self;
9746        let mut out = Vec::new();
9747        let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
9748            std::collections::HashSet::new();
9749        let mut queue: std::collections::VecDeque<
9750            crate::runtime::Gc<crate::runtime::function::Proto>,
9751        > = std::collections::VecDeque::new();
9752        queue.push_back(root);
9753        while let Some(p) = queue.pop_front() {
9754            let key = p.as_ptr() as *const _;
9755            if !seen.insert(key) {
9756                continue;
9757            }
9758            out.push((p, p.stable_hash()));
9759            for &child in p.protos.iter() {
9760                queue.push_back(child);
9761            }
9762        }
9763        out
9764    }
9765}