luna_core/vm/exec.rs
1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16 AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17 MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48 /// The GC heap owned by this VM. Embedders normally interact via the
49 /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50 /// the heap directly.
51 pub heap: Heap,
52 stack: Vec<Value>,
53 frames: Vec<CallFrame>,
54 /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55 /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56 /// helpers (debug-asserted on use). NOT consumed by readers yet;
57 /// week 1 is pure scaffold. Week 2-N migrations replace readers
58 /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59 /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60 frames_top: u32,
61 /// open upvalues, sorted ascending by stack slot
62 open_upvals: Vec<(u32, Gc<Upvalue>)>,
63 /// to-be-closed slots, ascending
64 tbc: Vec<u32>,
65 /// logical stack top for multi-result sequences
66 pub(crate) top: u32,
67 globals: Gc<Table>,
68 /// shared metatable for all strings (populated by the string lib, P04)
69 /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70 /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71 /// own. Settable via debug.setmetatable.
72 type_mt: [Option<Gc<Table>>; 5],
73 /// pre-interned metamethod event names, indexed by `Mm`
74 mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75 /// native↔Lua nesting depth (PUC C-stack guard analogue)
76 c_depth: u32,
77 /// number of live pcall/xpcall continuation frames on the running thread
78 /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79 /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80 /// with the coroutine context, since continuations survive a yield.
81 pcall_depth: u32,
82 /// number of non-yieldable C calls in flight on the running thread (PUC's
83 /// `L->nny`). A library callback that runs via synchronous Rust recursion
84 /// (sort comparator, gsub replacement) cannot be continued across a yield,
85 /// so it bumps this for its duration; `coroutine.yield` inside hits the
86 /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87 /// never cross such a call), so it needs no per-thread save/restore.
88 nny: u32,
89 /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90 /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91 /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92 /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93 /// loop)` then matches. PUC tracks this via the soft-cap window
94 /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95 /// scope explicitly.
96 msgh_depth: u32,
97 /// set by a coroutine closing itself (`coroutine.close()` on the running
98 /// thread): the to-be-closed handlers have already run; the thread must now
99 /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100 /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101 /// unwind, so a protecting pcall cannot catch it) the termination.
102 terminating: Option<Option<Value>>,
103 /// xoshiro256** state (math.random)
104 rng: [u64; 4],
105 /// VM creation time (os.clock)
106 started: std::time::Instant,
107 version: LuaVersion,
108 /// error object being threaded through a chain of __close handlers; a GC
109 /// root for the duration (a handler may trigger collection)
110 closing_err: Option<Value>,
111 /// the coroutine whose context is currently live in the fields above;
112 /// `None` while the main thread runs (P05)
113 current: Option<Gc<crate::runtime::Coro>>,
114 /// the main thread's saved execution context while a coroutine runs
115 main_ctx: Option<SavedCtx>,
116 /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117 /// values plus the slot/result-count needed to finish the yielding call on
118 /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119 yielding: Option<(Vec<Value>, u32, i32)>,
120 /// results expected by the in-flight native call (so `yield` knows how many
121 /// values its call site wants when it suspends)
122 native_nresults: i32,
123 /// identity object for the main thread, returned by `coroutine.running`
124 /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125 main_coro: Option<Gc<Coro>>,
126 /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127 /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128 /// switches report the previous one, as PUC does.
129 gc_mode: &'static str,
130 /// the live-register boundary of the running thread for GC rooting (PUC's
131 /// `L->top`): set precisely at each GC safe point so freed temporary
132 /// registers above it are not rooted. Without this the collector roots the
133 /// whole stack window, pinning weak-table values stranded in stale temps
134 /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135 pub(crate) gc_top: u32,
136 /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137 /// is still stop-the-world, so these are stored/returned for API fidelity
138 /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139 /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140 gc_pause: i64,
141 gc_stepmul: i64,
142 gc_stepsize: i64,
143 /// true while `__gc` finalizers are being run, so a finalizer that calls
144 /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145 /// `collectgarbage` yields fail).
146 gc_finalizing: bool,
147 /// C ABI scratch (`capi` module): the host-visible value stack that C
148 /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149 /// Kept here (instead of in a separate `LuaState` wrapper) so the
150 /// trampoline that bridges to a `LuaCFunction` can safely cast the
151 /// Vm pointer it already holds to the public `*mut LuaState` type
152 /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153 pub capi_stack: Vec<crate::runtime::Value>,
154 /// Pinned CString backing the pointer last returned by `lua_tostring`;
155 /// valid until the next `lua_tostring` on the same Vm.
156 pub capi_cstr_pin: Option<std::ffi::CString>,
157 /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158 /// concatenate across continuation calls until a non-`tocont` call
159 /// flushes; the default warnf recognises `@on`/`@off` control messages
160 /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161 /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162 /// keep the older raise semantics).
163 pub(crate) warn_state: WarnState,
164 pub(crate) warn_buf: Vec<u8>,
165 /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166 /// loop decrements once per dispatch turn. When it hits zero the loop
167 /// raises a catchable "instruction budget exceeded" error so the embedder
168 /// can yield control back to its caller (short-script eval, game
169 /// frame budgets). `None` = unbounded; reset on each call via
170 /// `set_instr_budget`.
171 pub(crate) instr_budget: Option<i64>,
172 // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173 // `self.jit` below + `crate::vm::jit_state` for field docs.
174 // (Was: jit_enabled here.)
175 // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176 // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177 // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178 // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179 // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180 // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181 // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182 // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183 // materialize_emit,closure_emit}_count — all moved to JitState.
184 /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185 /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186 /// precompiled chunks (which bypass the parser's depth / opcode
187 /// limits). When `false`, the loader rejects any source whose first
188 /// byte is the bytecode signature `\27` ("`\27Lua`").
189 pub(crate) bytecode_loading: bool,
190 /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191 /// a strictly larger trust surface than luna's own dump format
192 /// (third-party toolchain bugs, malformed chunks, unknown opcode
193 /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194 /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195 /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196 /// Embedder toggles via `set_puc_bytecode_loading`.
197 pub(crate) puc_bytecode_loading: bool,
198 /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199 /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200 /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201 /// source exceeds this, the loader returns the PUC-shaped
202 /// `not enough memory` error before the host allocator is asked to
203 /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204 /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205 /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206 /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207 /// Embedders that genuinely need to load > 256 MiB sources widen the
208 /// cap via [`Vm::set_loader_input_budget`].
209 pub(crate) loader_input_budget: usize,
210 /// In-process log of fully-emitted warnings (each entry = one flushed
211 /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212 /// tests assert what was warned without scraping stderr.
213 pub(crate) warn_log: Vec<Vec<u8>>,
214 /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215 /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216 /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217 /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218 /// shape stub for db.lua :328; if other registry-keyed APIs land later
219 /// they can share this table.
220 pub(crate) registry: Option<Gc<Table>>,
221 /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222 /// registry entry); attached to every file userdata the io library makes
223 pub(crate) file_mt: Option<Gc<Table>>,
224 /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225 pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226 pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227 /// the running thread's debug hook state (`debug.sethook`); per-thread,
228 /// swapped with the execution context on a coroutine resume/yield
229 pub(crate) hook: HookState,
230 /// true while the hook itself runs, so its own execution fires no events
231 /// (PUC clears the mask for the duration)
232 pub(crate) in_hook: bool,
233 /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234 /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235 /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236 /// accumulate the count instead of capping at 1.
237 pub(crate) pending_tailcalls: u32,
238 /// Name of the C native that just propagated an error (captured before
239 /// the native is popped from `running_natives`). Lets a dying coroutine
240 /// preserve `[C]: in function '<name>'` at the top of its traceback
241 /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242 /// luna's native frames are off-stack so we stash the name explicitly.
243 pub(crate) errored_native: Option<String>,
244 /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245 /// (relative to the activation's func slot) and the number transferred.
246 /// Set just before firing a call/return hook, read by `getinfo("r")`.
247 pub(crate) hook_ftransfer: u16,
248 pub(crate) hook_ntransfer: u16,
249 /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250 /// pushed by `push_frame`; `close_slots` sets this before calling a
251 /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252 /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253 pending_tm: Option<&'static str>,
254 /// `true` when the next `push_frame` is the user hook function itself,
255 /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256 /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257 pending_is_hook: bool,
258 /// traceback snapshot taken at the error point (the first `unwind` entry
259 /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260 /// the failed frames are popped — can still see the error point's stack
261 /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262 /// stack intact; we approximate by snapshotting the string and letting
263 /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264 /// `call_value` entry (`public_call_depth == 0`).
265 pub(crate) error_traceback: Option<Vec<u8>>,
266 /// nesting depth of public `call_value` entries (host vs. internal). The
267 /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268 /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269 public_call_depth: u32,
270 /// stack of native (`Value::Native`) closures currently running on the
271 /// Rust call stack. `begin_call` pushes the closure before invoking
272 /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273 /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274 /// caller is C, not Lua) and qualify the running function's name via
275 /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276 pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277 /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278 /// the native's argument-window head and width, so `debug.getlocal`
279 /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280 pub(crate) running_native_slots: Vec<(u32, u32)>,
281 // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282 // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283 // trace_compiler — all moved to JitState. See `jit` below.
284 /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285 /// when `chunk_compiler` / `trace_compiler` are
286 /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287 ///
288 /// `#[doc(hidden)] pub` so the `luna` crate's
289 /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290 /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291 /// field). Not part of the embedder-facing API surface.
292 #[doc(hidden)]
293 pub jit: crate::vm::jit_state::JitState,
294
295 /// B12 host roots — append-only `Vec<Value>` traced as an extra
296 /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297 /// `LuaRoot`) hold indices into this vector so the underlying
298 /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299 ///
300 /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301 /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302 /// the trade-offs between `Drop` plumbing and append-only memory
303 /// growth have a richer ergonomics envelope to live in.
304 pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305 /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306 /// back if non-empty, else extends `host_roots`. Generation
307 /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308 pub(crate) host_roots_free: Vec<u32>,
309
310 /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311 /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312 /// callback). Each entry is one in-flight working buffer; `gc_roots`
313 /// extends with every contained `Value` so a `collectgarbage()`
314 /// inside the comparator cannot free strings/tables snapshotted
315 /// here. Nested sorts push a new buffer on entry, pop on exit
316 /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317 /// regression).
318 pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320 /// v1.3 Phase ML — MacroLua compile-time macro registry.
321 /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322 /// `@if` / `@gensym`) at construction time when `version ==
323 /// LuaVersion::MacroLua`; embedders register custom macros via
324 /// [`Vm::define_macro`]. The expander runs once per `load()` call
325 /// between lexing and parsing (only when `is_macro_lua()`).
326 pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328 /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329 /// by `TypeId::of::<T>()` for embedder types implementing
330 /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331 /// [`Vm::register_userdata`]; metatables are pinned via
332 /// [`Vm::pin_host`] at registration time so the entry's
333 /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334 pub(crate) userdata_metatables:
335 std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337 /// B6 — classification of the most recent error raised on this Vm.
338 /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339 /// at well-known sites (syntax errors, instr-budget trips, native
340 /// callback errors, type errors).
341 pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343 /// B6 — `(source_name, line)` of the most recent error. Set by the
344 /// dispatcher / lexer / parser; cleared when a new call_value
345 /// enters cleanly.
346 pub(crate) last_error_source: Option<(String, u32)>,
347
348 /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349 /// the dispatcher hot loop yields cooperatively (sets
350 /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351 /// to `EvalFuture::poll`) instead of returning a real
352 /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353 /// for the duration of the future; restored to `false` on
354 /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355 /// leave it `false` so v1.0 behavior is preserved exactly.
356 pub(crate) async_mode: bool,
357
358 /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359 /// before driving a slice. The dispatcher itself does not call it
360 /// (the future's poll loop does `wake_by_ref` after observing
361 /// `BudgetExhausted`), but storing the waker keeps the door open
362 /// for Stage 2 async natives to wake the host directly from a
363 /// helper future.
364 pub(crate) async_waker: Option<std::task::Waker>,
365
366 /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367 /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368 /// Default 10_000 (RFC §D5). Tunable via
369 /// [`Vm::set_async_slice`].
370 pub(crate) async_slice_size: i64,
371
372 /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373 /// budget exhaustion fires; checked by `exec_with` (so the
374 /// sentinel propagates without `unwind` running, mirroring
375 /// `yielding.is_some()`) and by `call_value_impl` (so the call
376 /// frames survive for the next poll). Cleared by `drive_one`
377 /// after translating it to `DispatchOutcome::BudgetExhausted`.
378 pub(crate) host_yield_pending: bool,
379
380 /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381 /// when an async-marked [`NativeClosure`] is invoked under
382 /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383 /// mechanism as `host_yield_pending` — see `exec_with` +
384 /// `call_value_impl`), stashes the in-flight future +
385 /// post-completion context here, and surfaces them to
386 /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387 /// once the future is moved out into a
388 /// `DispatchOutcome::AsyncNativeAwaiting`.
389 pub(crate) pending_async_native_fut:
390 Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392 /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393 /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394 /// commit the future's eventual `Ok(nret)` back into the calling
395 /// frame's expected result slots. Recorded by the dispatcher;
396 /// consumed by [`Vm::commit_async_native_result`] after the
397 /// future resolves.
398 pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411 pub func_slot: u32,
412 /// Recorded for parity with the sync native-call path's
413 /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414 /// hook firing + traceback shaping. Not yet read in Stage 2.
415 #[allow(dead_code)]
416 pub nargs: u32,
417 pub nresults: i32,
418 /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419 /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420 /// unread today; carried so an Stage 3 audit can confirm the
421 /// pre-suspend root window matches the post-resume one.
422 #[allow(dead_code)]
423 pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430 /// the hook function (`None` when no hook is installed)
431 pub func: Option<Value>,
432 /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433 /// (Rust first); both can be installed simultaneously, but most
434 /// embedders pick one.
435 pub rust_func: Option<RustDebugHook>,
436 /// LUA_MASKCALL — fire on function entry
437 pub call: bool,
438 /// LUA_MASKRET — fire on function return
439 pub ret: bool,
440 /// LUA_MASKLINE — fire on source-line change
441 pub line: bool,
442 /// LUA_MASKCOUNT — fire every `count_base` instructions
443 pub count: bool,
444 /// instruction count between count events (PUC basehookcount)
445 pub count_base: i64,
446 /// instructions left until the next count event (PUC hookcount)
447 pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459 /// Function entry (`hook_call` analogue).
460 Call,
461 /// Function return (`hook_return` analogue).
462 Return,
463 /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464 TailCall,
465 /// Source-line change (the `u32` is the 1-based line number).
466 Line(u32),
467 /// Instruction count event (fires every `count_base` instructions).
468 Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483 stack: Vec<Value>,
484 frames: Vec<CallFrame>,
485 open_upvals: Vec<(u32, Gc<Upvalue>)>,
486 tbc: Vec<u32>,
487 top: u32,
488 pcall_depth: u32,
489 hook: HookState,
490 /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491 /// the rest of the suspended state so each thread can keep its own
492 /// `setfenv(0, env)` rewire without the swap leaking into another
493 /// thread (5.1 closure.lua :177).
494 globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499 /// caught by a pcall/xpcall continuation; resume running its caller
500 Caught,
501 /// caught by a continuation that was the entry-level activation; these are
502 /// the call's (wrapped) results
503 CaughtReturn(Vec<Value>),
504 /// no protecting continuation up to `entry_depth`; propagate the error
505 Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511 Lua(usize),
512 /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513 /// used to name the native via its invoking call instruction.
514 C(usize),
515 /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516 /// caller's activation, so `debug.getinfo(level)` at this slot returns
517 /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518 /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519 /// both shapes). The index points at the *tail-called* frame whose
520 /// `is_tail` flag induced this synthetic level.
521 Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528 /// index → the looked-up value; newindex → done (raw set performed);
529 /// comparison → the boolean result already known
530 Done(Value),
531 /// a metamethod to call; `recv` is the chain element it was found on (the
532 /// extra args — key / value — are supplied by the caller)
533 Mm { func: Value, recv: Value },
534 /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535 /// carries `__le` — `op_compare` swaps the args and negates the result.
536 /// Lives separate from `Mm` so the synth path can stay yieldable without
537 /// every other Mm caller learning a swap flag they would never set.
538 CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545 Index,
546 NewIndex,
547 Call,
548 ToString,
549 Metatable,
550 Name,
551 Eq,
552 Lt,
553 Le,
554 Concat,
555 Len,
556 Add,
557 Sub,
558 Mul,
559 Div,
560 Mod,
561 Pow,
562 IDiv,
563 BAnd,
564 BOr,
565 BXor,
566 Shl,
567 Shr,
568 Unm,
569 BNot,
570 Close,
571 Gc,
572 Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576 "__index",
577 "__newindex",
578 "__call",
579 "__tostring",
580 "__metatable",
581 "__name",
582 "__eq",
583 "__lt",
584 "__le",
585 "__concat",
586 "__len",
587 "__add",
588 "__sub",
589 "__mul",
590 "__div",
591 "__mod",
592 "__pow",
593 "__idiv",
594 "__band",
595 "__bor",
596 "__bxor",
597 "__shl",
598 "__shr",
599 "__unm",
600 "__bnot",
601 "__close",
602 "__gc",
603 "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614 if version <= LuaVersion::Lua53 {
615 format!("__{tm}")
616 } else if tm == "gc" {
617 "__gc".to_string()
618 } else {
619 tm.to_string()
620 }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626 use crate::vm::isa::Op;
627 Some(match op {
628 Op::Add => "add",
629 Op::Sub => "sub",
630 Op::Mul => "mul",
631 Op::Div => "div",
632 Op::Mod => "mod",
633 Op::Pow => "pow",
634 Op::IDiv => "idiv",
635 Op::BAnd => "band",
636 Op::BOr => "bor",
637 Op::BXor => "bxor",
638 Op::Shl => "shl",
639 Op::Shr => "shr",
640 Op::Unm => "unm",
641 Op::BNot => "bnot",
642 Op::Concat => "concat",
643 Op::Len => "len",
644 Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645 Op::SetField | Op::SetTable | Op::SetI => "newindex",
646 Op::Eq | Op::EqK => "eq",
647 Op::Lt => "lt",
648 Op::Le => "le",
649 _ => return None,
650 })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678 /// `warn` calls are silently dropped (default after `warn("@off")`).
679 Off,
680 /// `warn` calls are delivered to stderr (after `warn("@on")`).
681 On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689 if let Some(s) = payload.downcast_ref::<String>() {
690 return s.clone();
691 }
692 if let Some(s) = payload.downcast_ref::<&'static str>() {
693 return (*s).to_string();
694 }
695 "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702 /// Parse or compile failure.
703 Syntax(SyntaxError),
704 /// Runtime error raised during execution.
705 Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709 fn from(e: SyntaxError) -> Error {
710 Error::Syntax(e)
711 }
712}
713
714impl From<LuaError> for Error {
715 fn from(e: LuaError) -> Error {
716 Error::Runtime(e)
717 }
718}
719
720impl Drop for Vm {
721 fn drop(&mut self) {
722 // state close: run `__gc` for every still-registered finalizable before
723 // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724 // single pass — objects created by a closing finalizer are not
725 // re-finalized (they go to the heap's free list directly).
726 self.heap.queue_all_finalizers();
727 self.run_finalizers();
728 }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742 frames.push(cf);
743 // Shadow maintenance is debug-only: release builds skip the
744 // increment + assertion entirely. The shadow's purpose in Week 1
745 // is to VERIFY the assumed invariant (frames_top == frames.len())
746 // across all push/pop sites; once Week 2+ migrates readers to
747 // consume the shadow, release will run the increment unconditionally.
748 #[cfg(debug_assertions)]
749 {
750 *frames_top += 1;
751 debug_assert_eq!(
752 *frames_top as usize,
753 frames.len(),
754 "P17-D frames_top out of sync after push",
755 );
756 }
757 #[cfg(not(debug_assertions))]
758 let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763 let r = frames.pop();
764 #[cfg(debug_assertions)]
765 {
766 if r.is_some() {
767 *frames_top = frames_top.saturating_sub(1);
768 }
769 debug_assert_eq!(
770 *frames_top as usize,
771 frames.len(),
772 "P17-D frames_top out of sync after pop",
773 );
774 }
775 #[cfg(not(debug_assertions))]
776 let _ = frames_top;
777 r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786 static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787 *PROBE_ON.get_or_init(|| {
788 std::env::var("LUNA_AOT_PROBE")
789 .ok()
790 .filter(|v| !v.is_empty())
791 .is_some()
792 })
793}
794
795impl Vm {
796 /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797 /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798 /// the Vec replacement to keep the shadow valid.
799 #[inline(always)]
800 fn frames_resync(&mut self) {
801 // Debug-only Week 1 — see `frames_push_sync` comment.
802 #[cfg(debug_assertions)]
803 {
804 self.frames_top = self.frames.len() as u32;
805 }
806 }
807
808 // ====================================================================
809 // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810 //
811 // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812 // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813 // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814 // sites to consume them. Phase 4 removes Vec<CallFrame>.
815 //
816 // Preconditions (debug-asserted):
817 // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818 // - self.stack.len() > base + max_stack (caller has grown stack)
819 // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820 // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821 //
822 // No release-build cost when unused (LTO strips dead methods).
823 // ====================================================================
824
825 /// Write a Lua frame's closure pointer into `stack[base-2]`.
826 /// The caller must ensure `base >= 2` and the slot is within the
827 /// stack's allocated range.
828 #[inline]
829 #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830 fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831 debug_assert!(
832 base >= 2,
833 "frame closure slot needs base >= 2; got {}",
834 base
835 );
836 let idx = (base - 2) as usize;
837 debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838 self.stack[idx] = Value::Closure(cl);
839 }
840
841 /// Read a Lua frame's closure pointer from `stack[base-2]`.
842 /// Returns `None` if the slot doesn't hold a closure (caller is
843 /// expected to treat that as a corrupt frame).
844 ///
845 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846 /// to avoid the enum-match cost on the hot path. Tag check via
847 /// 1-byte load + branch + `as_closure_unchecked` payload load.
848 #[inline]
849 #[allow(dead_code)]
850 fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851 debug_assert!(base >= 2);
852 let v = self.stack.get((base - 2) as usize)?;
853 if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854 // SAFETY: tag byte just verified == CLOSURE.
855 Some(unsafe { v.as_closure_unchecked() })
856 } else {
857 None
858 }
859 }
860
861 /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862 /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863 /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864 /// cleanly through the value stack without losing bits.
865 #[inline]
866 #[allow(dead_code)]
867 fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868 debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869 let idx = (base - 1) as usize;
870 debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871 self.stack[idx] = Value::Int(marker.to_raw());
872 }
873
874 /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875 /// `None` if the slot isn't a `Value::Int` (caller treats as a
876 /// corrupt frame); the kind tag itself may still be invalid, in
877 /// which case [`FrameMarker::kind`] returns `None` on the result.
878 ///
879 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880 /// for the tag check + `as_int_unchecked` for the payload load.
881 #[inline]
882 #[allow(dead_code)]
883 fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884 debug_assert!(base >= 1);
885 let v = self.stack.get((base - 1) as usize)?;
886 if v.tag_byte() == crate::runtime::value::tag::INT {
887 // SAFETY: tag byte just verified == INT.
888 Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889 unsafe { v.as_int_unchecked() },
890 ))
891 } else {
892 None
893 }
894 }
895
896 /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897 /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898 /// caller is responsible for the rest of the bring-up.
899 fn new_inner(version: LuaVersion) -> Vm {
900 let mut heap = Heap::new();
901 // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902 // values strongly. gc.lua's "weak tables" section relies on that.
903 heap.no_ephemeron = version <= LuaVersion::Lua51;
904 // PUC 5.3 needs two GC cycles to finalize a table caught in a
905 // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906 // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907 heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908 let globals = heap.new_table();
909 let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911 Vm {
912 heap,
913 stack: Vec::new(),
914 frames: Vec::new(),
915 frames_top: 0,
916 open_upvals: Vec::new(),
917 tbc: Vec::new(),
918 top: 0,
919 globals,
920 type_mt: [None; 5],
921 mm_names,
922 c_depth: 0,
923 pcall_depth: 0,
924 nny: 0,
925 msgh_depth: 0,
926 terminating: None,
927 rng: [0; 4],
928 started: std::time::Instant::now(),
929 version,
930 closing_err: None,
931 current: None,
932 main_ctx: None,
933 yielding: None,
934 native_nresults: -1,
935 main_coro: None,
936 gc_mode: "incremental",
937 gc_top: 0,
938 gc_pause: 200,
939 gc_stepmul: 100,
940 gc_stepsize: 13,
941 gc_finalizing: false,
942 capi_stack: Vec::new(),
943 capi_cstr_pin: None,
944 warn_state: WarnState::Off,
945 warn_buf: Vec::new(),
946 warn_log: Vec::new(),
947 instr_budget: None,
948 bytecode_loading: true,
949 puc_bytecode_loading: false,
950 loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951 registry: None,
952 file_mt: None,
953 io_input: None,
954 io_output: None,
955 hook: HookState::default(),
956 in_hook: false,
957 pending_tailcalls: 0,
958 errored_native: None,
959 hook_ftransfer: 0,
960 hook_ntransfer: 0,
961 pending_tm: None,
962 pending_is_hook: false,
963 error_traceback: None,
964 public_call_depth: 0,
965 running_natives: Vec::new(),
966 running_native_slots: Vec::new(),
967 // v1.1 A2 — JIT-specific state factored into `JitState`
968 // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969 // `install_jit_backend` / `luaL_newstate` swap in
970 // `CraneliftBackend` for callers that want JIT acceleration.
971 jit: crate::vm::jit_state::JitState::with_null_backend(),
972 // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973 host_roots: Vec::new(),
974 // v1.3 Phase ML — MacroLua registry. Pre-populated with
975 // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976 // when this Vm is constructed under `LuaVersion::MacroLua`.
977 macro_registry: if version == LuaVersion::MacroLua {
978 crate::frontend::macro_expander::MacroRegistry::with_builtins()
979 } else {
980 crate::frontend::macro_expander::MacroRegistry::new()
981 },
982 host_roots_free: Vec::new(),
983 sort_scratch: Vec::new(),
984 // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985 // metatable cache. Populated lazily by register_userdata.
986 userdata_metatables: std::collections::HashMap::new(),
987 // v1.1 B6 — error classification metadata. Defaults to
988 // Runtime; set at known sites (syntax / budget trip /
989 // native error / type error).
990 last_error_kind: crate::vm::error::LuaErrorKind::default(),
991 last_error_source: None,
992 // v1.1 B10 Stage 1 — async embedder fields. Defaults
993 // preserve sync behavior bit-for-bit (`async_mode = false`
994 // means the budget hot loop errors out exactly as v1.0).
995 async_mode: false,
996 async_waker: None,
997 async_slice_size: 10_000,
998 host_yield_pending: false,
999 // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000 // default; populated only by the dispatcher when an
1001 // async-marked NativeClosure is invoked under async_mode.
1002 pending_async_native_fut: None,
1003 pending_async_native_ctx: None,
1004 }
1005 }
1006
1007 /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008 /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009 /// followed by `vm.open_all_libs()`.
1010 pub fn new(version: LuaVersion) -> Vm {
1011 let mut vm = Vm::new_minimal(version);
1012 vm.open_all_libs();
1013 vm
1014 }
1015
1016 /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017 /// that want a sandbox (Redis-style scripts, in-game scripting with
1018 /// a curated API) call this and then `open_base` / `open_math` / etc.
1019 /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020 /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021 pub fn new_minimal(version: LuaVersion) -> Vm {
1022 let mut vm = Vm::new_inner(version);
1023 let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025 unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026 vm.main_coro = Some(mc);
1027 let (a, b) = vm.rng_auto_seed();
1028 vm.rng_seed(a as u64, b as u64);
1029 vm
1030 }
1031
1032 /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033 /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034 /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035 /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036 /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037 /// does NOT evict those cached entries — call right after
1038 /// construction for a clean swap.
1039 ///
1040 /// Naming: `install_jit_backend` (not `install_default_jit`)
1041 /// because the "default" in luna-core is `NullJitBackend`; the
1042 /// "default JIT" lives in the `luna` crate.
1043 pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044 where
1045 C: crate::jit::IntChunkCompiler + 'static,
1046 T: crate::jit::TraceCompiler + 'static,
1047 {
1048 self.jit.chunk_compiler = Box::new(chunk);
1049 self.jit.trace_compiler = Box::new(trace);
1050 }
1051
1052 /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053 /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054 /// the `luna_jit` crate's `install_default_jit` pairs this with
1055 /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056 /// also install a fresh `CraneliftJitStorage`. Storage holds
1057 /// the per-`Vm` JIT cache + handle collections that used to be
1058 /// `thread_local!`s in `luna_jit::jit_backend`.
1059 ///
1060 /// Idempotency: re-installing storage on a Vm that already
1061 /// holds compiled-trace pointers WILL evict their owners (the
1062 /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063 /// pages). Call right after construction for a clean swap.
1064 pub fn install_jit_storage<S>(&mut self, storage: S)
1065 where
1066 S: crate::jit::JitStorage + 'static,
1067 {
1068 self.jit.storage = Box::new(storage);
1069 }
1070
1071 /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072 /// reports "skipped" so every closure stays on the interpreter
1073 /// path, and the trace recorder's compile attempt always returns
1074 /// `None`. Intended for tests that want to verify the trait
1075 /// boundary works in a JIT-free configuration, and for the future
1076 /// `luna-core` build path that ships without Cranelift.
1077 ///
1078 /// Calling this on a Vm whose closures already populated
1079 /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080 /// cached entries — the dispatcher will still call into them. For
1081 /// a truly JIT-free run, call this immediately after construction.
1082 pub fn install_null_jit(&mut self) {
1083 self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084 self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085 }
1086
1087 /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088 /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089 /// time instead (`open_base`, `open_math`, `open_table`, …).
1090 pub fn open_all_libs(&mut self) {
1091 self.open_base();
1092 self.open_math();
1093 self.open_table();
1094 self.open_string();
1095 self.open_utf8();
1096 self.open_os_io();
1097 self.open_debug();
1098 self.open_coroutine();
1099 self.open_package();
1100 // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101 // operators replace it on 64-bit integers). Only expose it under 5.2
1102 // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103 // leaking the global into newer dialects.
1104 if self.version == LuaVersion::Lua52 {
1105 self.open_bit32();
1106 }
1107 }
1108
1109 /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110 /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111 /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112 /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113 /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114 /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115 /// once per Vm.
1116 pub fn open_base(&mut self) {
1117 crate::vm::builtins::open_base(self);
1118 }
1119 /// Install the `math` standard library.
1120 pub fn open_math(&mut self) {
1121 crate::vm::lib_math::open_math(self);
1122 }
1123 /// Install the `table` standard library.
1124 pub fn open_table(&mut self) {
1125 crate::vm::lib_table::open_table(self);
1126 }
1127 /// Install the `string` standard library (and the shared string metatable).
1128 pub fn open_string(&mut self) {
1129 crate::vm::lib_string::open_string(self);
1130 }
1131 /// Install the `utf8` standard library (5.3+).
1132 pub fn open_utf8(&mut self) {
1133 crate::vm::lib_utf8::open_utf8(self);
1134 }
1135 /// `os` and `io` are merged because file userdata shares state with both
1136 /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137 /// wraps `os.execute`'s shell).
1138 pub fn open_os_io(&mut self) {
1139 crate::vm::lib_os_io::open_os_io(self);
1140 }
1141 /// Install the `debug` standard library (introspection / hooks). Off by
1142 /// default for sandbox embedders.
1143 pub fn open_debug(&mut self) {
1144 crate::vm::lib_debug::open_debug(self);
1145 }
1146 /// Install the `coroutine` standard library.
1147 pub fn open_coroutine(&mut self) {
1148 crate::vm::lib_coroutine::open_coroutine(self);
1149 }
1150 /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151 pub fn open_package(&mut self) {
1152 crate::vm::lib_os_io::open_package(self);
1153 }
1154 /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155 /// ops on 64-bit integers).
1156 pub fn open_bit32(&mut self) {
1157 crate::vm::lib_bit32::open_bit32(self);
1158 }
1159
1160 /// xoshiro256** next.
1161 pub(crate) fn rng_next(&mut self) -> u64 {
1162 let s = &mut self.rng;
1163 let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164 let t = s[1] << 17;
1165 s[2] ^= s[0];
1166 s[3] ^= s[1];
1167 s[1] ^= s[2];
1168 s[0] ^= s[3];
1169 s[2] ^= t;
1170 s[3] = s[3].rotate_left(45);
1171 result
1172 }
1173
1174 /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175 pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176 // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177 // state), then 16 discards to spread the seed. Matches PUC's exact
1178 // sequence so the low-level conformance test passes.
1179 self.rng = [a, 0xff, b, 0];
1180 for _ in 0..16 {
1181 self.rng_next();
1182 }
1183 }
1184
1185 /// Wall-clock since VM creation (os.clock approximation).
1186 pub(crate) fn uptime(&self) -> std::time::Duration {
1187 self.started.elapsed()
1188 }
1189
1190 /// Entropy for math.randomseed() with no arguments.
1191 pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192 let t = std::time::SystemTime::now()
1193 .duration_since(std::time::UNIX_EPOCH)
1194 .map(|d| d.as_nanos() as u64)
1195 .unwrap_or(0);
1196 let addr = &self.rng as *const _ as u64;
1197 (t as i64, addr as i64)
1198 }
1199
1200 /// Allocate a native function object (no upvalues): builtin registration.
1201 pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202 Value::Native(self.heap.new_native(f, Box::new([])))
1203 }
1204
1205 /// Allocate a native function object with captured upvalues.
1206 pub fn native_with(
1207 &mut self,
1208 f: crate::runtime::value::NativeFn,
1209 upvals: Box<[Value]>,
1210 ) -> Value {
1211 Value::Native(self.heap.new_native(f, upvals))
1212 }
1213
1214 /// Install the shared string metatable (string library, P04).
1215 pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216 self.type_mt[3] = mt;
1217 }
1218
1219 /// The current globals table (`_G` / `_ENV` source for new chunks).
1220 pub fn globals(&self) -> Gc<Table> {
1221 self.globals
1222 }
1223
1224 /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225 /// Library code that pushes a known number of fresh slots — e.g.
1226 /// `table.unpack` returning N values — consults this to refuse when
1227 /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228 /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229 /// coroutine's already-built table eats a few slots, so an unpack of
1230 /// ~lim values can't fit.
1231 pub(crate) fn stack_room(&self) -> i64 {
1232 PUC_MAXSTACK - (self.stack.len() as i64)
1233 }
1234
1235 /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236 /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237 /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238 /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239 pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240 self.globals = env;
1241 }
1242
1243 /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244 /// 5.5). Determines numeric semantics, available standard libraries, and
1245 /// metamethod behavior.
1246 pub fn version(&self) -> LuaVersion {
1247 self.version
1248 }
1249
1250 /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251 /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252 /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253 /// `Gc<NativeClosure>` handle.
1254 ///
1255 /// Returns `Err(LuaError)` only if the globals table overflows
1256 /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257 /// String interning + key construction cannot fail.
1258 ///
1259 /// ```
1260 /// # use luna_core::vm::Vm;
1261 /// # use luna_core::version::LuaVersion;
1262 /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263 /// vm.set_global("answer", 42).unwrap();
1264 /// vm.set_global("ratio", 0.5_f64).unwrap();
1265 /// vm.set_global("hello", "world").unwrap();
1266 /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267 /// assert_eq!(r.len(), 3);
1268 /// ```
1269 pub fn set_global<V: crate::vm::IntoValue>(
1270 &mut self,
1271 name: &str,
1272 v: V,
1273 ) -> Result<(), LuaError> {
1274 let v = v.into_value(self);
1275 let k = Value::Str(self.heap.intern(name.as_bytes()));
1276 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277 unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278 self.heap
1279 .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280 Ok(())
1281 }
1282
1283 /// Backward write barrier shorthand for native lib code: demote `t` from
1284 /// BLACK back to gray so the next propagate step re-traces its fields.
1285 /// No-op outside Propagate (parent is never BLACK at mutation time).
1286 pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287 self.heap
1288 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289 }
1290
1291 /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292 /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293 /// No-op outside Propagate.
1294 pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295 self.heap
1296 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297 }
1298
1299 /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300 /// under non-MacroLua dialects (the macro is stored but the load
1301 /// path only consults the registry when
1302 /// `self.version == LuaVersion::MacroLua`).
1303 ///
1304 /// `name` is stored without the leading `@` — source code writes
1305 /// `@double(x)` to invoke a macro registered as `"double"`.
1306 pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307 self.macro_registry.register(name, m);
1308 }
1309
1310 /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311 /// Mostly useful for tests / dogfood resets.
1312 pub fn clear_macros(&mut self) {
1313 self.macro_registry.clear();
1314 }
1315
1316 /// Parse + compile a chunk and close it over the globals table.
1317 pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318 // Reject oversize input *before* handing the parser/lexer a
1319 // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320 // message keeps `heavy.lua::loadrep` compatibility: that test
1321 // accepts either `string length overflow` or `not enough memory`
1322 // as the failure mode for a feeder loop that outruns the host
1323 // allocator. See `set_loader_input_budget`.
1324 if src.len() > self.loader_input_budget {
1325 return Err(SyntaxError {
1326 line: 0,
1327 msg: b"not enough memory".to_vec(),
1328 });
1329 }
1330 // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331 let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332 if is_bytecode && !self.bytecode_loading {
1333 return Err(SyntaxError {
1334 line: 0,
1335 msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336 });
1337 }
1338 let proto = if is_bytecode {
1339 let allow_puc = self.puc_bytecode_loading;
1340 crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341 |msg| SyntaxError {
1342 line: 0,
1343 msg: msg.into_bytes(),
1344 },
1345 )?
1346 } else if self.version.is_macro_lua() {
1347 // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348 // token vec, run the macro expander pre-pass against the
1349 // per-Vm registry, then hand the rewritten stream to
1350 // `parse_tokens`. The AST + compiler are dialect-agnostic
1351 // because by this point all `@`/quote tokens are gone.
1352 let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353 let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354 loop {
1355 let t = lexer.next_token()?;
1356 let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357 raw.push(t);
1358 if eof {
1359 break;
1360 }
1361 }
1362 // Drop the trailing Eof — expander operates on the body and
1363 // `parse_tokens` reinserts Eof when it runs out of tokens.
1364 raw.pop();
1365 let expanded = self.macro_registry.expand(raw)?;
1366 let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368 } else {
1369 let ast = parse(src, self.version)?;
1370 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371 };
1372 // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373 // upvalue with the globals table when the closure has *exactly* one
1374 // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375 // function with two-or-more upvalues keeps every cell at nil; the
1376 // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377 // :293's `assert(x() == nil)` pins this contract.
1378 let n = proto.upvals.len();
1379 let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380 if n == 0 {
1381 // synthetic main chunk has no declared upvalues, but the engine
1382 // still expects at least one cell so the host can probe via
1383 // `debug.upvalueid` etc. Match the historical luna shape.
1384 ups.push(
1385 self.heap
1386 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387 );
1388 } else if n == 1 {
1389 ups.push(
1390 self.heap
1391 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392 );
1393 } else {
1394 for _ in 0..n {
1395 ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396 }
1397 }
1398 Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399 }
1400
1401 /// Compile and run `src` as an anonymous chunk; return its results.
1402 /// Source name in the traceback is `"=eval"`. Syntax errors are
1403 /// surfaced as `LuaError` carrying the formatted PUC-style message
1404 /// (interned through the heap so the error value composes with
1405 /// `pcall` / `error_text` like any runtime error).
1406 pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407 self.eval_chunk(src, "=eval")
1408 }
1409
1410 /// Render an error value for messages/tests. Non-string errors —
1411 /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412 /// (`"(error object is a table value)"`); embedders that need
1413 /// structured payloads should inspect `e.0` directly. Errors whose
1414 /// text starts with `"native panic:"` indicate a Rust panic
1415 /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416 /// be dropped (do not reuse).
1417 pub fn error_text(&self, e: &LuaError) -> String {
1418 match e.0 {
1419 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420 v => format!("(error object is a {} value)", v.type_name()),
1421 }
1422 }
1423
1424 /// Call any callable value from the host (or from natives like pcall).
1425 pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426 // host-level entry (no enclosing exec): drop any error state from a
1427 // prior call that propagated uncaught (`error_traceback` would
1428 // otherwise leak into the next debug.traceback call).
1429 if self.public_call_depth == 0 {
1430 self.error_traceback = None;
1431 }
1432 self.public_call_depth += 1;
1433 // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434 // chunk whose body fits the S1 int-arith whitelist short-circuits
1435 // the whole interpreter dispatch and runs straight through the
1436 // mmap'd native code. The lookup is one Cell::get + one match —
1437 // the slow path (compile attempt on first reach) is paid once per
1438 // Proto.
1439 if args.is_empty()
1440 && let Value::Closure(cl) = f
1441 && let Some(vs) = self.try_jit_call(cl)
1442 {
1443 self.public_call_depth -= 1;
1444 return Ok(vs);
1445 }
1446 let r = self.call_value_impl(f, args, true);
1447 self.public_call_depth -= 1;
1448 r
1449 }
1450
1451 /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452 /// `Some(values)` when the cached native fn is callable for a
1453 /// zero-arg call. (Non-zero-arg dispatch is handled by
1454 /// `try_jit_call_op` from inside `begin_call`.)
1455 fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456 use crate::runtime::function::JitProtoState;
1457 if !self.jit.enabled {
1458 return None;
1459 }
1460 let proto = cl.proto;
1461 if let JitProtoState::Untried = proto.jit.get() {
1462 self.populate_jit_cache(proto);
1463 }
1464 match proto.jit.get() {
1465 JitProtoState::Compiled {
1466 entry,
1467 num_args: 0,
1468 returns_one,
1469 arg_float_mask: _,
1470 arg_table_mask: _,
1471 ret_is_float,
1472 ret_is_table,
1473 } => {
1474 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475 let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476 // P11-S5c / S5d.J — install the active Vm + closure
1477 // for any Rust helper the JIT'd code may call (e.g.
1478 // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479 // cranelift `Linkage::Import`. RAII clear on return.
1480 // Chunks with no upvalue reads don't touch the closure
1481 // slot, paying nothing.
1482 // v1.1 A1 Session A — route through chunk_compiler so
1483 // the NullJitBackend path stays inert. Raw-ptr arg
1484 // avoids the &mut self borrow conflict against the
1485 // shared self.jit.chunk_compiler read.
1486 let vm_ptr: *mut Vm = self;
1487 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489 let r = unsafe { f() };
1490 drop(_jit_vm_guard);
1491 // P11-S5d.E' — a JIT helper may have detected a metatable
1492 // on a table operand and parked a deopt request here.
1493 // Discard the sentinel value and return None so the caller
1494 // re-runs the call through the interpreter, which honours
1495 // __index/__newindex.
1496 if self.jit.pending_err.take().is_some() {
1497 return None;
1498 }
1499 Some(if returns_one {
1500 let v = if ret_is_float {
1501 Value::Float(f64::from_bits(r as u64))
1502 } else if ret_is_table {
1503 Value::Table(crate::runtime::Gc::from_ptr(
1504 r as *mut crate::runtime::Table,
1505 ))
1506 } else {
1507 Value::Int(r)
1508 };
1509 vec![v]
1510 } else {
1511 Vec::new()
1512 })
1513 }
1514 // Non-zero-arg Compiled state: call_value's empty-args
1515 // fast path can't drive it. Op::Call handles those.
1516 JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517 }
1518 }
1519
1520 /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521 /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522 /// states (call sites guard with a get before invoking).
1523 ///
1524 /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525 /// `proto.code`. Compiled artefacts live in the thread-local
1526 /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527 /// `Vm`s loading the same source skip the cranelift compile step
1528 /// entirely.
1529 fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530 use crate::runtime::function::JitProtoState;
1531 let version = self.version();
1532 let pre53 = version <= crate::version::LuaVersion::Lua53;
1533 // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534 // are Float). The JIT's `GetUpval` ValueRead path uses this
1535 // to default-pin upvalue reads to Float without a tag check.
1536 let float_only = version <= crate::version::LuaVersion::Lua52;
1537 // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538 // trait method can take `&mut dyn JitStorage` without
1539 // double-borrowing self.jit.
1540 let jit = &mut self.jit;
1541 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542 match jit
1543 .chunk_compiler
1544 .try_compile(storage, proto, pre53, float_only)
1545 {
1546 crate::jit::CompileResult::Compiled {
1547 entry,
1548 num_args,
1549 returns_one,
1550 arg_float_mask,
1551 arg_table_mask,
1552 ret_is_float,
1553 ret_is_table,
1554 } => {
1555 proto.jit.set(JitProtoState::Compiled {
1556 entry,
1557 num_args,
1558 returns_one,
1559 arg_float_mask,
1560 arg_table_mask,
1561 ret_is_float,
1562 ret_is_table,
1563 });
1564 }
1565 crate::jit::CompileResult::Skipped => {
1566 proto.jit.set(JitProtoState::Failed);
1567 }
1568 }
1569 }
1570
1571 /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572 /// before `push_frame`. Returns `true` when the call was handled
1573 /// in-place (no new Lua frame). Constraints: every arg slot must
1574 /// be `Value::Int`, the cached arity must match the call site's
1575 /// `nargs`, the host wanted-count `wanted` is honoured by
1576 /// `finish_results`. Also bails when a debug hook is armed —
1577 /// JIT'd code does not fire line / call / return hooks, so any
1578 /// active hook makes the interpreter the source of truth.
1579 fn try_jit_call_op(
1580 &mut self,
1581 cl: Gc<LuaClosure>,
1582 func_slot: u32,
1583 nargs: u32,
1584 wanted: i32,
1585 ) -> bool {
1586 use crate::runtime::function::JitProtoState;
1587 if !self.jit.enabled {
1588 return false;
1589 }
1590 // Any active debug hook means the interpreter has to run the
1591 // call so the hook gets the expected events.
1592 if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593 return false;
1594 }
1595 let proto = cl.proto;
1596 if let JitProtoState::Untried = proto.jit.get() {
1597 self.populate_jit_cache(proto);
1598 }
1599 let JitProtoState::Compiled {
1600 entry,
1601 num_args,
1602 returns_one,
1603 arg_float_mask,
1604 arg_table_mask,
1605 ret_is_float,
1606 ret_is_table,
1607 } = proto.jit.get()
1608 else {
1609 return false;
1610 };
1611 if num_args as u32 != nargs {
1612 return false;
1613 }
1614 // Pack args into i64 bit-patterns per the per-slot expected
1615 // kind. A Float-typed slot accepts Value::Float verbatim and
1616 // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617 // accepts only Value::Table and passes the raw Gc ptr; an
1618 // Int-typed slot accepts only Value::Int. Any other shape
1619 // bails to the interpreter so the call's actual dynamics
1620 // (metamethod dispatch / type-coerce) take over.
1621 let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622 [0; crate::jit::MAX_JIT_ARITY as usize];
1623 for i in 0..num_args as usize {
1624 let v = self.stack[(func_slot + 1) as usize + i];
1625 let want_float = (arg_float_mask >> i) & 1 == 1;
1626 let want_table = (arg_table_mask >> i) & 1 == 1;
1627 args[i] = match (want_table, want_float, v) {
1628 (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629 (false, false, Value::Int(x)) => x,
1630 (false, true, Value::Float(f)) => f.to_bits() as i64,
1631 (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632 _ => return false,
1633 };
1634 }
1635 // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636 // matching guard in `try_jit_call`.
1637 // v1.1 A1 Session A — route through chunk_compiler.
1638 let vm_ptr: *mut Vm = self;
1639 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641 let r = unsafe {
1642 match num_args {
1643 0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644 1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645 2 => {
1646 (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647 }
1648 3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649 args[0], args[1], args[2],
1650 ),
1651 4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652 args[0], args[1], args[2], args[3],
1653 ),
1654 _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655 }
1656 };
1657 drop(_jit_vm_guard);
1658 // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659 // flagged a metatable on a table operand; bail to the interpreter
1660 // so `push_frame` runs the call from scratch.
1661 if self.jit.pending_err.take().is_some() {
1662 return false;
1663 }
1664 // Write result at func_slot, replacing the closure value, then
1665 // hand to finish_results to pad/truncate per the call site's
1666 // `wanted` count.
1667 if returns_one {
1668 let v = if ret_is_float {
1669 Value::Float(f64::from_bits(r as u64))
1670 } else if ret_is_table {
1671 Value::Table(crate::runtime::Gc::from_ptr(
1672 r as *mut crate::runtime::Table,
1673 ))
1674 } else {
1675 Value::Int(r)
1676 };
1677 self.stack[func_slot as usize] = v;
1678 self.finish_results(func_slot, 1, wanted);
1679 } else {
1680 self.finish_results(func_slot, 0, wanted);
1681 }
1682 true
1683 }
1684
1685 /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686 /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687 /// invokes it inside that ci), so it is called with `from_c = false`: its
1688 /// debug parent is the closing function, not a synthetic C level.
1689 fn call_value_impl(
1690 &mut self,
1691 f: Value,
1692 args: &[Value],
1693 from_c: bool,
1694 ) -> Result<Vec<Value>, LuaError> {
1695 if self.c_depth >= MAX_C_DEPTH {
1696 return Err(self.rt_err("stack overflow"));
1697 }
1698 self.c_depth += 1;
1699 let func_slot = self.stack.len() as u32;
1700 self.stack.push(f);
1701 self.stack.extend_from_slice(args);
1702 self.top = self.stack.len() as u32;
1703 let r = self.call_at(func_slot, args.len() as u32, from_c);
1704 self.c_depth -= 1;
1705 if r.is_err()
1706 && self.yielding.is_none()
1707 && self.terminating.is_none()
1708 && !self.host_yield_pending
1709 && self.pending_async_native_fut.is_none()
1710 {
1711 // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712 // Rust stack, but the suspended coroutine's frames/registers (which
1713 // sit at/above `func_slot`) must survive for the next resume — so we
1714 // only truncate on a real error. A self-close termination is in the
1715 // same boat: the dying thread's state is discarded wholesale.
1716 // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717 // the same boat as `yielding`: the next `EvalFuture::poll`
1718 // resumes the same call, so the in-flight frames must
1719 // survive.
1720 self.stack.truncate(func_slot as usize);
1721 self.top = func_slot;
1722 }
1723 r
1724 }
1725
1726 /// Invoke `f` with the running thread marked non-yieldable for the duration
1727 /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728 /// boundary and errors instead of suspending. Used by library callbacks
1729 /// (sort comparator, gsub replacement) that run via synchronous Rust
1730 /// recursion and so could not be re-entered after a yield.
1731 pub(crate) fn call_noyield(
1732 &mut self,
1733 f: Value,
1734 args: &[Value],
1735 ) -> Result<Vec<Value>, LuaError> {
1736 self.nny += 1;
1737 let r = self.call_value(f, args);
1738 self.nny -= 1;
1739 r
1740 }
1741
1742 // ---- coroutines (P05) ----
1743
1744 pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745 // The new coroutine inherits the creating thread's current globals
1746 // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747 // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748 // it here picks the creator regardless of which coro is running.
1749 self.heap.new_coro(body, self.globals)
1750 }
1751
1752 /// Is `t` the thread whose context is currently live in the VM?
1753 pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754 match (self.current, t) {
1755 (None, None) => true,
1756 (Some(a), Some(b)) => a.ptr_eq(b),
1757 _ => false,
1758 }
1759 }
1760
1761 /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762 /// stack if that thread is current, else its saved context).
1763 #[doc(hidden)]
1764 pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765 let s = slot as usize;
1766 if self.is_current_thread(thread) {
1767 self.stack[s]
1768 } else {
1769 match thread {
1770 Some(co) => co.stack[s],
1771 None => self.main_ctx.as_ref().expect("main context").stack[s],
1772 }
1773 }
1774 }
1775
1776 fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777 let s = slot as usize;
1778 if self.is_current_thread(thread) {
1779 self.stack[s] = v;
1780 } else {
1781 match thread {
1782 Some(co) => {
1783 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784 unsafe { co.as_mut() }.stack[s] = v;
1785 // co.stack is traced by Coro::trace; demote co back to
1786 // gray so propagate re-traces this slot if it was
1787 // already black.
1788 self.heap
1789 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790 }
1791 None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792 }
1793 }
1794 }
1795
1796 /// Whether `co` is the main thread's identity object.
1797 pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798 self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799 }
1800
1801 /// The status of `co` from the caller's view. The main thread's identity
1802 /// object has no stored status — it is "running" when nothing else runs,
1803 /// else "normal" (it resumed the active coroutine).
1804 pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805 if self.is_main_coro(co) {
1806 if self.current.is_none() {
1807 CoroStatus::Running
1808 } else {
1809 CoroStatus::Normal
1810 }
1811 } else {
1812 co.status
1813 }
1814 }
1815
1816 /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817 /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818 /// context. Handlers see the coroutine's death error (if it died by error)
1819 /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820 /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821 pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822 // re-entrant close: a __close handler closed its own coroutine while the
1823 // outer close is mid-flight (its context is live). Report success and let
1824 // the outer close finish — re-entering the swap would corrupt the stack.
1825 if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826 return Ok(None);
1827 }
1828 // A chain of coroutines whose `__close` handlers each close the previous
1829 // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830 // The calling handler's `call_value` has already pushed `c_depth` to the
1831 // cap, so here it reads as full first — report PUC's "C stack overflow"
1832 // before the next handler call would surface the plainer "stack overflow".
1833 if self.c_depth >= MAX_C_DEPTH {
1834 return Err(self.rt_err("C stack overflow"));
1835 }
1836 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837 let death_err = unsafe { co.as_mut() }.error_value.take();
1838 // swap the caller's live context out (into a GC-rooted home) and the
1839 // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840 // the coroutine's stack while everything stays rooted.
1841 let resumer = self.current;
1842 let rctx = self.take_ctx();
1843 match resumer {
1844 Some(r) => {
1845 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846 let m = unsafe { r.as_mut() };
1847 m.stack = rctx.stack;
1848 m.frames = rctx.frames;
1849 m.open_upvals = rctx.open_upvals;
1850 m.tbc = rctx.tbc;
1851 m.top = rctx.top;
1852 m.pcall_depth = rctx.pcall_depth;
1853 }
1854 None => self.main_ctx = Some(rctx),
1855 }
1856 self.load_coro_ctx(co);
1857 self.current = Some(co);
1858 let result = self.close_slots(0, death_err);
1859 // discard the (now-closed) coroutine context and restore the caller
1860 let _ = self.take_ctx();
1861 match resumer {
1862 Some(r) => {
1863 self.load_coro_ctx(r);
1864 self.current = Some(r);
1865 }
1866 None => {
1867 let m = self.main_ctx.take().expect("main context saved");
1868 self.put_ctx(m);
1869 self.current = None;
1870 }
1871 }
1872 {
1873 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874 let m = unsafe { co.as_mut() };
1875 m.status = CoroStatus::Dead;
1876 m.stack = Vec::new();
1877 m.frames = Vec::new();
1878 m.open_upvals = Vec::new();
1879 m.tbc = Vec::new();
1880 m.top = 0;
1881 m.pcall_depth = 0;
1882 m.resume_at = None;
1883 m.error_value = None;
1884 }
1885 result.map(|()| death_err)
1886 }
1887
1888 /// `coroutine.running`: the running thread plus whether it is the main one.
1889 pub(crate) fn running_thread(&self) -> (Value, bool) {
1890 match self.current {
1891 Some(co) => (Value::Coro(co), false),
1892 None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893 }
1894 }
1895
1896 /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897 /// thread) can yield. The main thread never can; any other coroutine can
1898 /// unless it is dead.
1899 pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900 match co {
1901 Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902 // the running thread can yield only outside any non-yieldable C call
1903 None => self.current.is_some() && self.nny == 0,
1904 }
1905 }
1906
1907 /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908 /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909 /// from "inside an unyieldable C call" (sort/gsub callback).
1910 pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911 if self.current.is_none() {
1912 Some("attempt to yield from outside a coroutine")
1913 } else if self.nny > 0 {
1914 Some("attempt to yield across a C-call boundary")
1915 } else {
1916 None
1917 }
1918 }
1919
1920 /// The coroutine whose context is currently live (`None` on the main thread).
1921 pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922 self.current
1923 }
1924
1925 /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926 /// its pending `__close` handlers, then signal termination. The handlers run
1927 /// here, in place, with the thread still non-yieldable (a yield in one hits
1928 /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929 /// way a yield does — `exec_with` propagates it past any protecting pcall
1930 /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931 /// clean death (or, if a handler raised, the coroutine's error).
1932 pub(crate) fn close_running(&mut self) -> LuaError {
1933 let death = match self.close_slots(0, None) {
1934 Ok(()) => None,
1935 Err(e) => Some(e.0),
1936 };
1937 self.terminating = Some(death);
1938 LuaError(Value::Nil)
1939 }
1940
1941 /// `coroutine.status` as seen by the caller.
1942 pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943 match self.effective_coro_status(co) {
1944 CoroStatus::Suspended => "suspended",
1945 CoroStatus::Running => "running",
1946 CoroStatus::Normal => "normal",
1947 CoroStatus::Dead => "dead",
1948 }
1949 }
1950
1951 fn take_ctx(&mut self) -> SavedCtx {
1952 let saved = SavedCtx {
1953 stack: std::mem::take(&mut self.stack),
1954 frames: std::mem::take(&mut self.frames),
1955 open_upvals: std::mem::take(&mut self.open_upvals),
1956 tbc: std::mem::take(&mut self.tbc),
1957 top: self.top,
1958 pcall_depth: self.pcall_depth,
1959 hook: self.hook,
1960 globals: self.globals,
1961 };
1962 self.frames_resync(); // P17-D Week 1 — frames now empty.
1963 saved
1964 }
1965
1966 fn put_ctx(&mut self, c: SavedCtx) {
1967 self.stack = c.stack;
1968 self.frames = c.frames;
1969 self.open_upvals = c.open_upvals;
1970 self.tbc = c.tbc;
1971 self.top = c.top;
1972 self.pcall_depth = c.pcall_depth;
1973 self.hook = c.hook;
1974 self.globals = c.globals;
1975 self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976 }
1977
1978 /// Move a coroutine's saved context into the live VM fields.
1979 fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981 let m = unsafe { co.as_mut() };
1982 self.stack = std::mem::take(&mut m.stack);
1983 self.frames = std::mem::take(&mut m.frames);
1984 self.open_upvals = std::mem::take(&mut m.open_upvals);
1985 self.tbc = std::mem::take(&mut m.tbc);
1986 self.top = m.top;
1987 self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988 self.pcall_depth = m.pcall_depth;
1989 self.hook = m.hook;
1990 self.globals = m.globals;
1991 }
1992
1993 /// Save the live VM context back into a coroutine object.
1994 fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995 let c = self.take_ctx();
1996 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997 let m = unsafe { co.as_mut() };
1998 m.stack = c.stack;
1999 m.frames = c.frames;
2000 m.open_upvals = c.open_upvals;
2001 m.tbc = c.tbc;
2002 m.top = c.top;
2003 m.pcall_depth = c.pcall_depth;
2004 m.hook = c.hook;
2005 m.globals = c.globals;
2006 // bulk-overwrite of every collectable field traced by Coro::trace:
2007 // demote the coro back to gray so propagate re-traces its new state.
2008 self.heap
2009 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010 }
2011
2012 /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013 /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014 /// error raised inside the coroutine (the coroutine becomes dead).
2015 pub(crate) fn resume_coro(
2016 &mut self,
2017 co: Gc<Coro>,
2018 args: Vec<Value>,
2019 ) -> Result<Vec<Value>, LuaError> {
2020 match co.status {
2021 CoroStatus::Suspended => {}
2022 CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
2023 _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
2024 }
2025 if self.c_depth >= MAX_C_DEPTH {
2026 return Err(self.rt_err("C stack overflow"));
2027 }
2028 self.c_depth += 1;
2029 let resumer = self.current;
2030 // save the resumer's live context away
2031 let rctx = self.take_ctx();
2032 match resumer {
2033 Some(r) => {
2034 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035 let m = unsafe { r.as_mut() };
2036 m.stack = rctx.stack;
2037 m.frames = rctx.frames;
2038 m.open_upvals = rctx.open_upvals;
2039 m.tbc = rctx.tbc;
2040 m.top = rctx.top;
2041 m.pcall_depth = rctx.pcall_depth;
2042 m.globals = rctx.globals;
2043 m.status = CoroStatus::Normal;
2044 // bulk overwrite of every traced field on r — mirror
2045 // store_coro_ctx's barrier_back so propagate re-traces r.
2046 self.heap
2047 .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048 }
2049 None => self.main_ctx = Some(rctx),
2050 }
2051 // swap the coroutine in
2052 self.load_coro_ctx(co);
2053 {
2054 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055 let m = unsafe { co.as_mut() };
2056 m.status = CoroStatus::Running;
2057 m.resumer = resumer;
2058 }
2059 // co.resumer is a traced Gc field; barrier_back covers the new
2060 // resumer reference and any future field writes during this call.
2061 self.heap
2062 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063 self.current = Some(co);
2064
2065 // drive it
2066 let drive = if co.started {
2067 self.coro_continue(&args)
2068 } else {
2069 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070 unsafe { co.as_mut() }.started = true;
2071 self.coro_first(co.body, &args)
2072 };
2073
2074 // classify: a self-close termination or a pending yield each win over
2075 // the (sentinel) error they raised to unwind the Rust stack.
2076 let (outcome, status) = if let Some(death) = self.terminating.take() {
2077 // the coroutine closed itself: it dies now, cleanly or with the
2078 // error a `__close` handler raised.
2079 match death {
2080 Some(e) => {
2081 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082 unsafe { co.as_mut() }.error_value = Some(e);
2083 self.heap
2084 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085 (Err(LuaError(e)), CoroStatus::Dead)
2086 }
2087 None => (Ok(Vec::new()), CoroStatus::Dead),
2088 }
2089 } else {
2090 match self.yielding.take() {
2091 Some((vals, fslot, nres)) => {
2092 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093 unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094 (Ok(vals), CoroStatus::Suspended)
2095 }
2096 None => {
2097 // died: a return is clean, an error is remembered so a later
2098 // `coroutine.close` can report it (PUC lua_closethread).
2099 // Capture the error-point traceback (set by `unwind` before
2100 // popping the failing frames) and prepend a synthetic
2101 // top entry for the C native that initiated the error
2102 // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103 // on the dead coroutine still shows the error site
2104 // (db.lua :848 family).
2105 if drive.is_err() {
2106 let mut tb = self.error_traceback.take().unwrap_or_default();
2107 if let Some(nm) = self.errored_native.take() {
2108 let mut prefixed: Vec<u8> = Vec::new();
2109 prefixed.extend_from_slice(
2110 format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111 );
2112 prefixed.extend(tb);
2113 tb = prefixed;
2114 }
2115 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116 unsafe { co.as_mut() }.error_traceback = Some(tb);
2117 }
2118 if let Err(e) = drive {
2119 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120 unsafe { co.as_mut() }.error_value = Some(e.0);
2121 self.heap
2122 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123 }
2124 (drive, CoroStatus::Dead)
2125 }
2126 }
2127 };
2128
2129 // save the coroutine's context back and restore the resumer
2130 self.store_coro_ctx(co);
2131 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132 unsafe { co.as_mut() }.status = status;
2133 match resumer {
2134 Some(r) => {
2135 self.load_coro_ctx(r);
2136 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137 unsafe { r.as_mut() }.status = CoroStatus::Running;
2138 self.current = Some(r);
2139 }
2140 None => {
2141 let m = self.main_ctx.take().expect("main context saved");
2142 self.put_ctx(m);
2143 self.current = None;
2144 }
2145 }
2146 self.c_depth -= 1;
2147 outcome
2148 }
2149
2150 /// First resume: install the body function at slot 0 and run.
2151 fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152 self.stack.clear();
2153 self.stack.push(body);
2154 self.stack.extend_from_slice(args);
2155 self.top = self.stack.len() as u32;
2156 match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157 Ok(true) => self.exec_with(1),
2158 Ok(false) => Ok(self.take_results(0)),
2159 Err(e) => Err(e),
2160 }
2161 }
2162
2163 /// Resume after a yield: deliver `args` as the results of the call that
2164 /// yielded, then continue the suspended thread.
2165 fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166 let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167 let n = args.len() as u32;
2168 // Restore the full register window of the suspended top frame: a yield
2169 // that unwound through a native (call_value) may have left the stack
2170 // shorter than the frame needs. `base + max_stack` is what push_frame
2171 // allocates; `fslot + n` covers the delivered yield results.
2172 let frame_need = self
2173 .frames
2174 .last()
2175 .and_then(CallFrame::lua)
2176 .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177 .unwrap_or(0);
2178 let need = frame_need.max((fslot + n) as usize);
2179 if self.stack.len() < need {
2180 self.stack.resize(need, Value::Nil);
2181 }
2182 for (i, &v) in args.iter().enumerate() {
2183 self.stack[fslot as usize + i] = v;
2184 }
2185 self.finish_results(fslot, n, nres);
2186 // the suspended `coroutine.yield` (a C call) now returns its resume
2187 // values: fire the matching "return" hook PUC defers until the resume.
2188 self.hook_return(true, 1, n)?;
2189 self.exec_with(1)
2190 }
2191
2192 /// `coroutine.yield`: suspend the running coroutine, recording where to
2193 /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194 /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195 pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196 let nres = self.native_nresults;
2197 self.yielding = Some((vals, func_slot, nres));
2198 // value is irrelevant: resume_coro consults `self.yielding`, not this
2199 LuaError(Value::Nil)
2200 }
2201
2202 /// Install or clear the debug hook on the running thread (`debug.sethook`
2203 /// without a thread argument). Arms the calling frame's `oldpc` to the
2204 /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205 /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206 /// native return: the very next traceexec compares against the sethook
2207 /// CALL's line. When the install statement and the following statement are
2208 /// on different source lines (db.lua :322), `changedline` fires for that
2209 /// first statement; when they share a line (db.lua :25 wrapper), they do
2210 /// not, so the wrapper line is not re-fired.
2211 pub(crate) fn install_hook(&mut self, hook: HookState) {
2212 self.hook = hook;
2213 if self.hook.line
2214 && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215 {
2216 f.hook_oldpc = f.pc.saturating_sub(1);
2217 }
2218 }
2219
2220 /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221 /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222 /// with an optional thread argument.
2223 ///
2224 /// `target == None` means "no explicit thread argument" — PUC binds that
2225 /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226 /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227 /// of whether that's the main thread or a currently-resumed coroutine
2228 /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229 /// `store_coro_ctx`). So a `None` target should always route to
2230 /// `install_hook` on the live fields. The pre-fix predicate gate
2231 /// `is_current_thread(target)` returned `false` when running inside a
2232 /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233 /// and silently dropped the hook on the floor — the install happened on
2234 /// no thread at all.
2235 pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236 if target.is_none() || self.is_current_thread(target) {
2237 self.install_hook(state);
2238 } else if let Some(co) = target {
2239 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240 let m = unsafe { co.as_mut() };
2241 m.hook = state;
2242 if state.line
2243 && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244 {
2245 f.hook_oldpc = u32::MAX;
2246 }
2247 // co.hook.func is a traced Value (Coro::trace covers it); demote
2248 // co back to gray so propagate sees the new hook function.
2249 self.heap
2250 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251 }
2252 }
2253
2254 /// The hook state of `target` (`None`/current → the live VM state).
2255 pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256 match target {
2257 t if self.is_current_thread(t) => self.hook,
2258 Some(co) => co.hook,
2259 None => self.hook,
2260 }
2261 }
2262
2263 /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264 /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265 /// discarded so the interrupted frame's register window is untouched.
2266 /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267 fn run_hook(
2268 &mut self,
2269 event: &[u8],
2270 line: Option<i64>,
2271 from_native: bool,
2272 ) -> Result<(), LuaError> {
2273 // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274 // synchronous fn pointer call). Both Rust and Lua hooks may be
2275 // installed; both observe each event.
2276 if let Some(rh) = self.hook.rust_func {
2277 let evt = match event {
2278 b"call" => Some(RustHookEvent::Call),
2279 b"return" => Some(RustHookEvent::Return),
2280 b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281 b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282 b"count" => Some(RustHookEvent::Count),
2283 _ => None,
2284 };
2285 if let Some(evt) = evt {
2286 let was_in_hook = self.in_hook;
2287 self.in_hook = true;
2288 rh(self, evt);
2289 self.in_hook = was_in_hook;
2290 }
2291 }
2292 let Some(hook) = self.hook.func else {
2293 return Ok(());
2294 };
2295 let saved_top = self.top;
2296 let saved_len = self.stack.len();
2297 let name = Value::Str(self.heap.intern(event));
2298 let lv = line.map_or(Value::Nil, Value::Int);
2299 self.in_hook = true;
2300 // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301 // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302 // ci sat below `hookf` (the function being hooked). When that hooked
2303 // function is native, no Lua frame for it exists in luna's `frames`;
2304 // model it as a synthetic C level by pushing the hook with
2305 // `from_c = true` (then `c_frame_name` reads the caller's call
2306 // instruction → e.g. `name = "sethook"`). When the hooked function is
2307 // Lua (its frame is still on the stack), push with `from_c = false`
2308 // so the level descent lands on it directly. The hook's own frame
2309 // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310 // (PUC `CIST_HOOKED`).
2311 self.pending_is_hook = true;
2312 let r = self.call_value_impl(hook, &[name, lv], from_native);
2313 self.pending_is_hook = false;
2314 self.in_hook = false;
2315 self.stack.truncate(saved_len);
2316 self.top = saved_top;
2317 r.map(|_| ())
2318 }
2319
2320 /// Fire the "call" hook on entry to a function, if armed and not already in
2321 /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322 /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323 /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324 /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325 fn hook_call_with(
2326 &mut self,
2327 from_native: bool,
2328 nargs: u32,
2329 is_tail: bool,
2330 ) -> Result<(), LuaError> {
2331 if self.hook.call
2332 && !self.in_hook
2333 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334 {
2335 self.hook_ftransfer = 1;
2336 self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337 // PUC 5.1 didn't distinguish tail-call events — every call,
2338 // including tail-calls, fired plain `"call"`. 5.2 introduced
2339 // the separate `"tail call"` event (mask `"c"` covers both).
2340 // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341 // "return","tail return","return","tail return"}`.
2342 let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343 b"tail call"
2344 } else {
2345 b"call"
2346 };
2347 self.run_hook(event, None, from_native)?;
2348 }
2349 Ok(())
2350 }
2351
2352 pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353 self.hook_call_with(from_native, nargs, false)
2354 }
2355
2356 /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357 /// the first result slot relative to the activation's func slot, ntransfer
2358 /// the number of results.
2359 pub(crate) fn hook_return(
2360 &mut self,
2361 from_native: bool,
2362 ftransfer: u32,
2363 nresults: u32,
2364 ) -> Result<(), LuaError> {
2365 if self.hook.ret
2366 && !self.in_hook
2367 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368 {
2369 self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370 self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371 self.run_hook(b"return", None, from_native)?;
2372 }
2373 Ok(())
2374 }
2375
2376 /// PUC "tail return" event — fires once per tail call that collapsed
2377 /// into the activation now returning, *after* its own "return" event.
2378 /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379 fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380 if self.hook.ret
2381 && !self.in_hook
2382 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383 {
2384 self.run_hook(b"tail return", None, false)?;
2385 }
2386 Ok(())
2387 }
2388
2389 /// Call a metamethod with a single expected result.
2390 fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391 let mut r = self.call_value(f, args)?;
2392 Ok(if r.is_empty() {
2393 Value::Nil
2394 } else {
2395 r.swap_remove(0)
2396 })
2397 }
2398
2399 /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400 /// driven through the interpreter loop with a `Meta` continuation, so a
2401 /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402 /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403 /// Returns to the caller with the call set up — the opcode arm must do no
2404 /// further work on the running frame and let the loop iterate. `tm` is
2405 /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406 /// born from this call inherits it via `pending_tm`, so
2407 /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408 /// (db.lua :878).
2409 fn begin_meta_call(
2410 &mut self,
2411 func: Value,
2412 args: &[Value],
2413 action: MetaAction,
2414 tm: &'static str,
2415 ) -> Result<(), LuaError> {
2416 let saved_top = self.top;
2417 let cont_slot = self.stack.len() as u32;
2418 self.stack.push(func);
2419 self.stack.extend_from_slice(args);
2420 self.top = self.stack.len() as u32;
2421 frames_push_sync(
2422 &mut self.frames,
2423 &mut self.frames_top,
2424 CallFrame::Cont(NativeCont {
2425 kind: ContKind::Meta(MetaCont { action, saved_top }),
2426 func_slot: cont_slot,
2427 nresults: 1,
2428 }),
2429 );
2430 let saved_tm = self.pending_tm.replace(tm);
2431 // begin_call drives a Lua metamethod through the loop (returns true) or
2432 // runs a native one inline (returns false, leaving results at cont_slot
2433 // for the loop head to pick up); either way the Meta cont resolves there.
2434 let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435 // Native callees never consumed pending_tm (push_frame is only hit on
2436 // a Lua callee); restore so it doesn't leak to a later push_frame.
2437 self.pending_tm = saved_tm;
2438 r?;
2439 Ok(())
2440 }
2441
2442 /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443 fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444 match self.index_step(t, key)? {
2445 MmOut::Done(v) => self.stack[dst as usize] = v,
2446 MmOut::Mm { func, recv } => {
2447 self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2448 }
2449 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2450 }
2451 Ok(())
2452 }
2453
2454 /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2455 fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2456 match self.newindex_step(t, key, v)? {
2457 MmOut::Done(_) => {}
2458 MmOut::Mm { func, recv } => {
2459 self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2460 }
2461 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2462 }
2463 Ok(())
2464 }
2465
2466 /// Apply a comparison opcode's outcome: a known boolean drives the
2467 /// conditional skip directly; a metamethod is called yieldably, its
2468 /// truthiness driving the skip on return.
2469 fn op_compare(
2470 &mut self,
2471 step: MmOut,
2472 l: Value,
2473 r: Value,
2474 k: bool,
2475 tm: &'static str,
2476 ) -> Result<(), LuaError> {
2477 match step {
2478 MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2479 MmOut::Mm { func, .. } => {
2480 self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2481 }
2482 MmOut::CompareSynth { func } => {
2483 // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2484 // negation are driven through `MetaAction::Compare` so the
2485 // metamethod call can yield like any other compare.
2486 self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2487 }
2488 }
2489 Ok(())
2490 }
2491
2492 /// Complete a VM instruction whose metamethod just returned `result` (PUC
2493 /// `luaV_finishOp`). The running frame is already back on top.
2494 fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2495 match action {
2496 MetaAction::Store { dst } => self.stack[dst as usize] = result,
2497 MetaAction::Discard => {}
2498 MetaAction::Compare { k, negate } => {
2499 let t = if negate {
2500 !result.truthy()
2501 } else {
2502 result.truthy()
2503 };
2504 self.cond_skip(t, k);
2505 }
2506 MetaAction::Concat { dst, base_a } => {
2507 self.stack[dst as usize] = result;
2508 self.top = dst + 1;
2509 self.concat_run(base_a)?;
2510 }
2511 }
2512 Ok(())
2513 }
2514
2515 // ---- metatables ----
2516
2517 pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2518 match v {
2519 Value::Table(t) => t.metatable(),
2520 Value::Userdata(u) => u.metatable(),
2521 v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2522 }
2523 }
2524
2525 /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2526 /// non-table). No-op for tables (they carry their own).
2527 pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2528 if let Some(i) = type_mt_slot(v) {
2529 self.type_mt[i] = mt;
2530 }
2531 }
2532
2533 /// The metamethod of `v` for `mm`, or nil.
2534 pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2535 match self.metatable_of(v) {
2536 Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2537 None => Value::Nil,
2538 }
2539 }
2540
2541 /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2542 /// only fires when both operands carry a metatable that exposes the same
2543 /// implementation. Returns the metamethod to call, or `Nil` when no
2544 /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2545 /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2546 pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2547 let mt1 = self.metatable_of(l);
2548 let Some(mt1) = mt1 else { return Value::Nil };
2549 let key = Value::Str(self.mm_names[mm as usize]);
2550 let tm1 = mt1.get(key);
2551 if tm1.is_nil() {
2552 return Value::Nil;
2553 }
2554 let mt2 = self.metatable_of(r);
2555 let Some(mt2) = mt2 else { return Value::Nil };
2556 if mt1.as_ptr() == mt2.as_ptr() {
2557 return tm1;
2558 }
2559 let tm2 = mt2.get(key);
2560 if tm2.is_nil() {
2561 return Value::Nil;
2562 }
2563 if tm1.raw_eq(tm2) {
2564 return tm1;
2565 }
2566 Value::Nil
2567 }
2568
2569 /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2570 /// or full userdata whose metatable carries a string `__name` reports that
2571 /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2572 pub(crate) fn obj_typename(&self, v: Value) -> String {
2573 if matches!(v, Value::Table(_) | Value::Userdata(_))
2574 && let Value::Str(s) = self.get_mm(v, Mm::Name)
2575 {
2576 return String::from_utf8_lossy(s.as_bytes()).into_owned();
2577 }
2578 v.type_name().to_string()
2579 }
2580
2581 fn call_at(
2582 &mut self,
2583 func_slot: u32,
2584 nargs: u32,
2585 from_c: bool,
2586 ) -> Result<Vec<Value>, LuaError> {
2587 if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2588 self.exec()
2589 } else {
2590 // native completed inline; results at func_slot..top
2591 Ok(self.take_results(func_slot))
2592 }
2593 }
2594
2595 /// Switch the `collectgarbage` mode, returning the previous mode name.
2596 pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2597 std::mem::replace(&mut self.gc_mode, new)
2598 }
2599
2600 /// Whether the current `collectgarbage` mode is "generational" (where a
2601 /// "step" is a minor collection — a full atomic pass — rather than a paced
2602 /// incremental sweep).
2603 pub(crate) fn gc_mode_is_generational(&self) -> bool {
2604 self.gc_mode == "generational"
2605 }
2606
2607 /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2608 /// completes a whole cycle at once).
2609 pub(crate) fn gc_stepsize(&self) -> i64 {
2610 self.gc_stepsize
2611 }
2612
2613 /// `collectgarbage("param", name [,value])`: read (or set, returning the
2614 /// previous value of) a pacing parameter. Returns `None` for an unknown
2615 /// name so the caller can raise PUC's `invalid parameter` error. The
2616 /// collector is stop-the-world, so these only round-trip for API fidelity.
2617 pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2618 let slot = match name {
2619 b"pause" => &mut self.gc_pause,
2620 b"stepmul" => &mut self.gc_stepmul,
2621 b"stepsize" => &mut self.gc_stepsize,
2622 _ => return None,
2623 };
2624 let prev = *slot;
2625 if let Some(v) = set {
2626 *slot = v;
2627 }
2628 Some(prev)
2629 }
2630
2631 /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2632 /// paced sweep via `Vm::gc_step`.
2633 ///
2634 /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2635 /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2636 /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2637 /// suspected UAF is structurally closed — born objects no longer become
2638 /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2639 ///
2640 /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2641 /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2642 /// O(SWEEP_DIVISOR) safe-points regardless of size.
2643 #[inline(always)]
2644 pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2645 if self.gc_finalizing {
2646 return;
2647 }
2648 if !self.heap.gc_due() {
2649 return;
2650 }
2651 // v2.4 Phase Cleanup REVERTED — the v2.2.0
2652 // `gc_top = live_top.max(self.top)` workaround is **still
2653 // load-bearing** on Windows even after v2.3's
2654 // `finish_results` slot-clear. macOS + Docker linux/amd64
2655 // both pass with bare `live_top`, but Windows
2656 // STATUS_ACCESS_VIOLATION's on `Lua55/gc.lua`'s weak-table
2657 // + step-GC stress without the over-root. The wider
2658 // gc_top stays as the v2.4 production fix; tightening to
2659 // bare live_top is a v2.5+ follow-up that requires either
2660 // (a) per-frame `[base, base + max_stack)` gc_roots walk
2661 // (rejected in v2.2.1 plan-state amendment log — broke
2662 // db.lua) or (b) PUC L->top discipline migration through
2663 // every safe-point. Tracked in v2.4 plan-state amendments
2664 // log.
2665 self.gc_top = live_top.max(self.top);
2666 // PUC stepmul: % of allocation rate. Higher = more GC work per
2667 // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2668 // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2669 const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2670 const MIN_BUDGET: usize = 64_000;
2671 let stepmul = self.gc_stepmul.max(1) as usize;
2672 let divisor = (SWEEP_BASE / stepmul).max(1);
2673 let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2674 if self.gc_step(budget) {
2675 self.heap.rearm_gc_pause(self.gc_pause);
2676 }
2677 }
2678
2679 /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2680 /// roots (open upvalues, which are not first-class Values). Shared by the
2681 /// full collector and the incremental-sweep driver so both snapshot the
2682 /// exact same live set.
2683 fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2684 let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2685 roots.push(Value::Table(self.globals));
2686 for mt in self.type_mt.into_iter().flatten() {
2687 roots.push(Value::Table(mt));
2688 }
2689 for &n in &self.mm_names {
2690 roots.push(Value::Str(n));
2691 }
2692 // root only the running thread's live registers (PUC marks [stack, top)):
2693 // freed temporaries above `gc_top` are excluded so weak values stranded
2694 // there are not pinned. Suspended threads (main_ctx, other coroutines)
2695 // stay whole-rooted below — safe over-rooting, and they are not the
2696 // thread whose weak-table loop is under test.
2697 let live = (self.gc_top as usize).min(self.stack.len());
2698 roots.extend_from_slice(&self.stack[..live]);
2699 for cf in &self.frames {
2700 match cf {
2701 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2702 CallFrame::Cont(NativeCont {
2703 kind: ContKind::Xpcall { handler },
2704 ..
2705 }) => roots.push(*handler),
2706 CallFrame::Cont(NativeCont {
2707 kind: ContKind::Close(cc),
2708 ..
2709 }) => {
2710 // Root the error threaded through this close chain so a
2711 // `collectgarbage()` inside a sibling `__close` handler
2712 // does not free it before the next handler is invoked
2713 // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2714 if let Some(e) = cc.pending {
2715 roots.push(e);
2716 }
2717 if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2718 roots.push(err);
2719 }
2720 }
2721 CallFrame::Cont(_) => {}
2722 }
2723 }
2724 if let Some(e) = self.closing_err {
2725 roots.push(e);
2726 }
2727 // B12 host roots — Lua-facade handles keep their referenced
2728 // values alive across calls/yields. Trace the whole vector;
2729 // unused slots (post-`unpin_all`) carry Value::Nil which the
2730 // GC ignores.
2731 for slot in &self.host_roots {
2732 // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2733 roots.push(slot.value);
2734 }
2735 // v2.1 — `table.sort` and similar builtins stash their working
2736 // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2737 // comparator callback doesn't free strings/tables snapshotted
2738 // off the live table (sort.lua's `load(..)(); collectgarbage()`
2739 // compare regression).
2740 for buf in &self.sort_scratch {
2741 roots.extend_from_slice(buf);
2742 }
2743 // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2744 // mid-execution. Without rooting them here, a `collectgarbage()`
2745 // invoked inside the running native (sort.lua AA `load(..)();
2746 // collectgarbage()` compare callback regression) sweeps the
2747 // closure that's actively executing, leaving `nc.upvals`
2748 // dangling and the Rust local `nc` pointing at recycled memory
2749 // — the SIGSEGV pops on the very next field access or pop.
2750 for &nc in &self.running_natives {
2751 roots.push(Value::Native(nc));
2752 }
2753 // the running thread's debug hook (suspended threads root theirs via
2754 // Coro::trace / the main_ctx sweep below)
2755 if let Some(h) = self.hook.func {
2756 roots.push(h);
2757 }
2758 // the running coroutine (its saved-context fields live in the VM, but
2759 // the object itself + its resumer chain must stay reachable)
2760 if let Some(co) = self.current {
2761 roots.push(Value::Coro(co));
2762 }
2763 if let Some(mc) = self.main_coro {
2764 roots.push(Value::Coro(mc));
2765 }
2766 // debug.getregistry() and io library state
2767 if let Some(r) = self.registry {
2768 roots.push(Value::Table(r));
2769 }
2770 if let Some(mt) = self.file_mt {
2771 roots.push(Value::Table(mt));
2772 }
2773 if let Some(f) = self.io_input {
2774 roots.push(Value::Userdata(f));
2775 }
2776 if let Some(f) = self.io_output {
2777 roots.push(Value::Userdata(f));
2778 }
2779 // the main thread's saved context while a coroutine runs
2780 if let Some(m) = &self.main_ctx {
2781 roots.extend_from_slice(&m.stack);
2782 if let Some(h) = m.hook.func {
2783 roots.push(h);
2784 }
2785 for cf in &m.frames {
2786 match cf {
2787 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2788 CallFrame::Cont(NativeCont {
2789 kind: ContKind::Xpcall { handler },
2790 ..
2791 }) => roots.push(*handler),
2792 CallFrame::Cont(_) => {}
2793 }
2794 }
2795 }
2796 let mut extra: Vec<*mut GcHeader> = self
2797 .open_upvals
2798 .iter()
2799 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2800 .collect();
2801 if let Some(m) = &self.main_ctx {
2802 extra.extend(
2803 m.open_upvals
2804 .iter()
2805 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2806 );
2807 }
2808 (roots, extra)
2809 }
2810
2811 /// Run a full collection with the VM's roots, then run any `__gc`
2812 /// finalizers the collection scheduled. A no-op (returns 0) when already
2813 /// inside a finalizer — the collector is not reentrant (PUC).
2814 pub fn collect_garbage(&mut self) -> usize {
2815 if self.gc_finalizing {
2816 return 0;
2817 }
2818 let (roots, extra) = self.gc_roots();
2819 let freed = self.heap.collect_ex(&roots, &extra);
2820 self.run_finalizers();
2821 freed
2822 }
2823
2824 /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2825 /// threw; gc.lua's "errors during collection" probe relies on it. This
2826 /// variant runs the same cycle but propagates the captured finalizer
2827 /// error to the explicit caller.
2828 pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2829 if self.gc_finalizing {
2830 return Ok(0);
2831 }
2832 let (roots, extra) = self.gc_roots();
2833 let freed = self.heap.collect_ex(&roots, &extra);
2834 self.run_finalizers_or_err()?;
2835 Ok(freed)
2836 }
2837
2838 /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2839 /// should report fail rather than collect).
2840 pub(crate) fn gc_is_finalizing(&self) -> bool {
2841 self.gc_finalizing
2842 }
2843
2844 /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2845 /// = true indicates more pieces follow (concatenated until the first
2846 /// `to_cont = false` call flushes the whole line). Mirrors
2847 /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2848 /// * If the buffer is fresh, `to_cont` is false, and the message is
2849 /// `@<word>`, treat as a control message — only `@on` / `@off` are
2850 /// recognised; any other `@…` is silently ignored.
2851 /// * Otherwise, while the state is `Off`, drop the piece; while `On`,
2852 /// accumulate, and flush to stderr + `warn_log` on the
2853 /// non-continuation call.
2854 pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2855 if self.warn_buf.is_empty()
2856 && !to_cont
2857 && let Some(b'@') = msg.first().copied()
2858 {
2859 match &msg[1..] {
2860 b"on" => self.warn_state = WarnState::On,
2861 b"off" => self.warn_state = WarnState::Off,
2862 _ => {} // unknown control — silently ignored (PUC checkcontrol)
2863 }
2864 return;
2865 }
2866 if self.warn_state == WarnState::Off {
2867 // drop continuation pieces too — PUC `warnfoff` is the trampoline
2868 return;
2869 }
2870 self.warn_buf.extend_from_slice(msg);
2871 if !to_cont {
2872 let line = std::mem::take(&mut self.warn_buf);
2873 eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2874 self.warn_log.push(line);
2875 }
2876 }
2877
2878 /// Drain the in-process warning log (one entry per emitted message, sans
2879 /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2880 /// assert on warn output without scraping stderr.
2881 pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2882 std::mem::take(&mut self.warn_log)
2883 }
2884
2885 /// Arm the cooperative instruction budget (P09 embedding). The run loop
2886 /// decrements this once per dispatch turn; on zero it raises a catchable
2887 /// `"instruction budget exceeded"` error and disarms itself so the host
2888 /// can resume with a fresh budget on the next call. `None` removes the
2889 /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2890 /// short-script semantics.
2891 pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2892 self.instr_budget = budget;
2893 }
2894
2895 /// Remaining instruction budget (None when unbounded).
2896 pub fn instr_budget_remaining(&self) -> Option<i64> {
2897 self.instr_budget
2898 }
2899
2900 /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2901 /// **must** disable JIT when relying on `instr_budget` — see the
2902 /// `jit_enabled` field doc for the rationale.
2903 pub fn set_jit_enabled(&mut self, enabled: bool) {
2904 self.jit.enabled = enabled;
2905 }
2906
2907 /// Current JIT enable state.
2908 pub fn jit_enabled(&self) -> bool {
2909 self.jit.enabled
2910 }
2911
2912 /// Toggle the trace JIT (P12). Off by default while the sprint
2913 /// develops. When enabled, hot back-edges are counted on
2914 /// `Proto.trace_hot_count`; once the counter passes
2915 /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2916 /// mode at the back-edge target. Stays a no-op until S2's
2917 /// trace lowerer and S3's dispatcher land.
2918 pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2919 self.jit.trace_enabled = enabled;
2920 }
2921
2922 /// P16-A — opt-in flag for the self-link cycle catch. See field
2923 /// docs for the correctness blocker. Default `false`.
2924 pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2925 self.jit.p16_self_link_enabled = enabled;
2926 }
2927
2928 /// Current state of the P16-A self-link cycle catch.
2929 pub fn p16_self_link_enabled(&self) -> bool {
2930 self.jit.p16_self_link_enabled
2931 }
2932
2933 /// Current trace-JIT enable state.
2934 pub fn trace_jit_enabled(&self) -> bool {
2935 self.jit.trace_enabled
2936 }
2937
2938 /// Number of traces that have closed cleanly (looped back to the
2939 /// head PC) since this Vm was constructed. Cumulative; used by
2940 /// tests + tuning. Will become the dominant signal once S2's
2941 /// compile + cache lands.
2942 pub fn trace_closed_count(&self) -> u64 {
2943 self.jit.counters.closed
2944 }
2945
2946 /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2947 /// hit an un-recordable op — the latter lands at S2).
2948 pub fn trace_aborted_count(&self) -> u64 {
2949 self.jit.counters.aborted
2950 }
2951
2952 /// P13-S13-G v2 — number of compiled traces whose close shape
2953 /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2954 /// pin `dispatchable=false` because the dispatcher can't
2955 /// resume at a depth>0 PC without the matching CallFrames.
2956 /// S4-step4b's frame-mat helper could synthesise those, but
2957 /// the InlineAbort emit path isn't wired up yet — fresh
2958 /// pickup work for S13-G v2-full.
2959 pub fn trace_inline_abort_count(&self) -> u64 {
2960 self.jit.counters.inline_abort
2961 }
2962
2963 /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2964 pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2965 &self.jit.counters.dispatch_off_reasons
2966 }
2967
2968 /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2969 pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2970 &self.jit.counters.compile_failed_reasons
2971 }
2972
2973 /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2974 /// `(is_call_triggered, ops_len)` for every trace that closed.
2975 pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2976 &self.jit.counters.closed_lens
2977 }
2978
2979 /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
2980 /// Per-reason close-cause counts (recorder-side abort/discard +
2981 /// lowerer-side dispatch_off labels) keyed by `&'static str`.
2982 pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
2983 &self.jit.counters.close_cause_counts
2984 }
2985
2986 /// v2.0 Track-R R3b — number of compiled traces whose
2987 /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
2988 /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
2989 /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
2990 /// hot loop with p16-on. R3b keeps `dispatchable = false` even
2991 /// when this count bumps; R3d will lift it.
2992 pub fn trace_downrec_link_compiled_count(&self) -> u64 {
2993 self.jit.counters.downrec_link_compiled
2994 }
2995
2996 /// v2.0 Track-R R3c — see
2997 /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
2998 /// of times the dispatcher's `is_downrec_sentinel` arm fired and
2999 /// classified the return as a caller-pc-guard HIT.
3000 pub fn trace_downrec_dispatched_count(&self) -> u64 {
3001 self.jit.counters.downrec_dispatched
3002 }
3003
3004 /// v2.0 Track-R R3c — see
3005 /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
3006 /// times the dispatcher entered a `downrec_link`-bearing trace and
3007 /// the trace returned via the lowerer's deopt block (caller-pc
3008 /// guard MISS), or the dispatcher itself force-deopted via the
3009 /// stitch-cycle checkpoint.
3010 pub fn trace_downrec_deopt_count(&self) -> u64 {
3011 self.jit.counters.downrec_deopt
3012 }
3013
3014 /// v2.0 Track-R R3d — see
3015 /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3016 /// Number of compiled traces whose lowerer emitted a multi-way
3017 /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3018 /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3019 pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3020 self.jit.counters.multi_way_guard_emitted
3021 }
3022
3023 /// P12-S2.C — number of closed traces the lowerer compiled and
3024 /// parked on `Proto.traces`. Re-records of the same head_pc are
3025 /// deduped (the second close finds the head_pc already cached
3026 /// and skips compile), so this never exceeds `trace_closed_count`.
3027 pub fn trace_compiled_count(&self) -> u64 {
3028 self.jit.counters.compiled
3029 }
3030
3031 /// v2.1 Phase 1I.B — number of times the recorder captured a
3032 /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3033 /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3034 /// by the Phase 1I.B opt-in fire test to verify the env gate
3035 /// wiring round-trips end-to-end (env -> recorder -> snapshot
3036 /// -> counter -> getter -> assertion).
3037 pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3038 self.jit.counters.field_ic_snapshot_captured
3039 }
3040
3041 /// P12-S2.C — number of closed traces the lowerer rejected
3042 /// (any of the bail conditions in
3043 /// `crate::jit::trace::try_compile_trace`).
3044 pub fn trace_compile_failed_count(&self) -> u64 {
3045 self.jit.counters.compile_failed
3046 }
3047
3048 /// P12-S3 — number of times the dispatcher jumped into a
3049 /// compiled trace. Bumps on every entry; `trace_deopt_count`
3050 /// counts the subset where the trace returned with a parked
3051 /// `jit_pending_err`.
3052 pub fn trace_dispatched_count(&self) -> u64 {
3053 self.jit.counters.dispatched
3054 }
3055
3056 /// P12-S3 — number of trace entries that came back with
3057 /// `jit_pending_err` set (typically a metatable shadowed an
3058 /// index inside a helper, forcing the dispatcher to fall back
3059 /// to the interpreter without committing the trace's result).
3060 pub fn trace_deopt_count(&self) -> u64 {
3061 self.jit.counters.deopt
3062 }
3063
3064 /// P15-A v1 — number of times the dispatcher started a side
3065 /// trace recording (an `exit_hit_counts` slot crossed
3066 /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3067 /// was None and trace JIT was enabled). Each unit is exactly one
3068 /// `start_side_trace` call; the actual compile success counts
3069 /// under [`Self::trace_compiled_count`] like any other trace.
3070 /// Probe use: distinguishes the "side-trace pipeline fired"
3071 /// signal from the "primary back-edge / call-trigger fired"
3072 /// signal so v0-v3 architectural progress is visible without
3073 /// reading per-counter histograms.
3074 pub fn trace_side_trace_started_count(&self) -> u64 {
3075 self.jit.counters.side_trace_started
3076 }
3077
3078 /// P15-A v2-A — number of side-trace recordings that closed,
3079 /// compiled successfully, AND patched their parent's
3080 /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3081 /// dispatch through these ptrs yet (v2-B/C job), but the
3082 /// counter + ptr write proves the compile + link pipeline is
3083 /// complete end-to-end.
3084 pub fn trace_side_trace_compiled_count(&self) -> u64 {
3085 self.jit.counters.side_trace_compiled
3086 }
3087
3088 /// P15-A v2-C-A5-C — number of side traces that compiled
3089 /// successfully but were SHEDDED by the close-handler shape-
3090 /// match gate (`exit_tags_match_entry_tags`). High ratios
3091 /// vs. `trace_side_trace_compiled_count` indicate the
3092 /// architecture is shedding lots of would-be side traces;
3093 /// useful as a tuning probe for future relaxation of the
3094 /// gate or for child-IR re-specialisation against parent's
3095 /// exit shape.
3096 pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3097 self.jit.counters.side_trace_shape_mismatch
3098 }
3099
3100 /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3101 /// classified as `crate::jit::trace::EscapeState::Sinkable`
3102 /// across every successfully compiled trace on this Vm. The
3103 /// count is post-demotion: sites pre-emit drops back to Escaped
3104 /// for not meeting v1 sunk-emit criteria are NOT counted.
3105 /// `trace_sunk_alloc_count` matches one-for-one today (every
3106 /// surviving Sinkable site goes through sunk emit).
3107 pub fn trace_sinkable_seen_count(&self) -> u64 {
3108 self.jit.counters.sinkable_seen
3109 }
3110
3111 /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3112 pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3113 self.jit.counters.accum_bufferable_seen
3114 }
3115
3116 /// P15-prep — total dispatch hits across all known traces,
3117 /// broken into hot-exit telemetry (max single-exit count,
3118 /// total dispatches, exit count). Used by probes to identify
3119 /// hot side-exits as side-trace candidates.
3120 ///
3121 /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3122 /// recursively, so inner functions' traces are reported.
3123 pub fn trace_exit_hit_summary(
3124 &self,
3125 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3126 ) -> Vec<(u32, Vec<u32>)> {
3127 fn walk(
3128 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3129 out: &mut Vec<(u32, Vec<u32>)>,
3130 ) {
3131 for ct in proto.traces.borrow().iter() {
3132 let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3133 out.push((ct.head_pc, counts));
3134 }
3135 for inner in proto.protos.iter() {
3136 walk(*inner, out);
3137 }
3138 }
3139 let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3140 walk(cl.proto, &mut out);
3141 out
3142 }
3143
3144 /// P15-A v0 — surface every side-exit slot whose hit count is
3145 /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3146 /// `cl.proto` (recursively walking `proto.protos`). Returned
3147 /// entries are side-trace candidates: each carries the parent
3148 /// trace's `(head_proto, head_pc)`, the exit's index in the
3149 /// parent's `exit_hit_counts`, and the side trace's natural
3150 /// entry shape (`cont_pc` + `exit_tags`).
3151 ///
3152 /// Layout of `exit_hit_counts` (mirrored by the iter):
3153 /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3154 /// window-sized exit_tags).
3155 /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3156 /// → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3157 /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3158 /// exit_tags = `ct.exit_tags`).
3159 pub fn hot_exit_iter(
3160 &self,
3161 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3162 ) -> Vec<crate::jit::trace::HotExitInfo> {
3163 use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3164 fn walk(
3165 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3166 out: &mut Vec<HotExitInfo>,
3167 ) {
3168 for ct in proto.traces.borrow().iter() {
3169 let inline_n = ct.per_exit_inline.len();
3170 let tags_n = ct.per_exit_tags.len();
3171 debug_assert_eq!(
3172 ct.exit_hit_counts.len(),
3173 inline_n + tags_n + 1,
3174 "exit_hit_counts layout invariant violated"
3175 );
3176 for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3177 let hits = cell.get();
3178 if hits < HOTEXIT_THRESHOLD {
3179 continue;
3180 }
3181 let (cont_pc, exit_tags) = if idx < inline_n {
3182 let ent = &ct.per_exit_inline[idx];
3183 (ent.cont_pc, ent.exit_tags.clone())
3184 } else if idx < inline_n + tags_n {
3185 let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3186 (*pc, tags.clone())
3187 } else {
3188 (ct.head_pc, ct.exit_tags.clone())
3189 };
3190 out.push(HotExitInfo {
3191 head_proto: proto,
3192 head_pc: ct.head_pc,
3193 exit_idx: idx,
3194 hits,
3195 cont_pc,
3196 exit_tags,
3197 });
3198 }
3199 }
3200 for inner in proto.protos.iter() {
3201 walk(*inner, out);
3202 }
3203 }
3204 let mut out: Vec<HotExitInfo> = Vec::new();
3205 walk(cl.proto, &mut out);
3206 out
3207 }
3208
3209 /// P12-S5-B — sum of NewTable sites that actually took the
3210 /// sunk-emit path across every successfully compiled trace on
3211 /// this Vm. Each counted site skips its heap `Gc<Table>`
3212 /// allocation per dispatch; the array part lives as Cranelift
3213 /// `Variable`s for the duration of the trace.
3214 pub fn trace_sunk_alloc_count(&self) -> u64 {
3215 self.jit.counters.sunk_alloc
3216 }
3217
3218 /// P12-S5-C — sum of materialise-helper emit sites across every
3219 /// successfully compiled trace on this Vm. Each unit is a
3220 /// (site × cmp side-exit) pair whose IR reconstructs a heap
3221 /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3222 /// emit is wiring materialise into the right side-exits.
3223 pub fn trace_materialize_emit_count(&self) -> u64 {
3224 self.jit.counters.materialize_emit
3225 }
3226
3227 /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3228 /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3229 /// replaces a `Heap::new_closure_inline` call on the dispatch
3230 /// path; the count is static (one per matching op per compiled
3231 /// trace), summed at compile success.
3232 pub fn trace_closure_emit_count(&self) -> u64 {
3233 self.jit.counters.closure_emit
3234 }
3235
3236 /// v2.0 Stage 7 polish 6 fire experiment — see
3237 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3238 /// Number of compiled traces whose `per_exit_inline.len() > 0`
3239 /// (depth>0 inlined cmp side-exits emitted).
3240 pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3241 self.jit.counters.per_exit_inline_compiled
3242 }
3243
3244 /// v2.0 Stage 7 polish 6 fire experiment — see
3245 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3246 /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3247 /// `dispatchable == true` — i.e. the count of compiled traces
3248 /// that would actually exercise the AOT polish 6 chain-reloc +
3249 /// deploy-resolver path.
3250 pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3251 self.jit.counters.per_exit_inline_dispatchable
3252 }
3253
3254 /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3255 /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3256 /// whether a self-recursive function actually walked the depth
3257 /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3258 /// across traces and Vm activations; reset only on `Vm::new`.
3259 pub fn trace_max_depth_seen(&self) -> u8 {
3260 self.jit.max_depth_seen
3261 }
3262
3263 /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3264 /// dispatch time). The frame-materialization helper reads `.base`
3265 /// to compute offsets for each inlined frame's window.
3266 #[doc(hidden)]
3267 pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3268 match self.frames.last() {
3269 Some(CallFrame::Lua(f)) => Some(*f),
3270 _ => None,
3271 }
3272 }
3273
3274 /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3275 /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3276 /// by `luna-tools` (`luna-profile`'s sampler walks this from
3277 /// inside a `Count` hook). Sibling-module scope: not part of the
3278 /// public embedder surface, but `inspect::frames_for_profile` is.
3279 #[doc(hidden)]
3280 pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3281 &self.frames
3282 }
3283
3284 /// P12-S4-step4b — ensure the value stack covers indices
3285 /// `[0..need)`. Extends with Nil if shorter. Called by the
3286 /// frame-materialization helper before pushing an inlined frame
3287 /// whose register window may exceed the current stack length.
3288 #[doc(hidden)]
3289 pub fn jit_ensure_stack(&mut self, need: usize) {
3290 if self.stack.len() < need {
3291 self.stack.resize(need, Value::Nil);
3292 }
3293 }
3294
3295 /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3296 /// `__close` handlers would run (any active tbc slot ≥ from
3297 /// holding a non-nil/false Value); if so, parks a deopt sentinel
3298 /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3299 /// IR branches to the deopt block. Otherwise performs the safe
3300 /// part of close — `close_from(from)` to close open upvals +
3301 /// drop any drained tbc entries ≥ from — and returns 0.
3302 ///
3303 /// Returns are i64-shaped so the cranelift import sig stays
3304 /// trivial (i64 → i64 mapping).
3305 #[doc(hidden)]
3306 pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3307 if self.jit.pending_err.is_some() {
3308 return 1;
3309 }
3310 let Some(f) = self.jit_last_lua_frame() else {
3311 self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3312 return 1;
3313 };
3314 let from = f.base + start_offset;
3315 let has_handler = self.tbc.iter().any(|&s| {
3316 s >= from && {
3317 let v = self.stack[s as usize];
3318 !matches!(v, Value::Nil | Value::Bool(false))
3319 }
3320 });
3321 if has_handler {
3322 self.jit.pending_err =
3323 Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3324 return 1;
3325 }
3326 self.close_from(from);
3327 // Drain any tbc entries ≥ from (they're nil/false stubs the
3328 // interpreter's drive_close would have skipped silently).
3329 while let Some(&s) = self.tbc.last() {
3330 if s < from {
3331 break;
3332 }
3333 self.tbc.pop();
3334 }
3335 0
3336 }
3337
3338 /// P12-S7-B — spill the trace's current value for a register to
3339 /// the underlying `vm.stack[base + slot_offset]`. Required before
3340 /// an `Op::Closure` whose inner proto has an `in_stack: true`
3341 /// upval at `slot_offset` — the helper's `find_or_create_upval`
3342 /// captures a live pointer to `vm.stack[base + slot_offset]`,
3343 /// which must hold the right value at call time (trace IR's
3344 /// Variable hasn't yet been written back).
3345 ///
3346 /// Parameters arrive as i64 from the IR: `slot_offset` is the
3347 /// caller-frame register index (`u32` in practice, depth=0
3348 /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3349 /// `crate::runtime::value::raw` byte for the slot's RegKind;
3350 /// `raw_bits` is the trace Variable's `use_var` payload
3351 /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3352 /// raw `Gc::as_ptr` cast).
3353 #[doc(hidden)]
3354 pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3355 let Some(f) = self.jit_last_lua_frame() else {
3356 self.jit.pending_err =
3357 Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3358 return;
3359 };
3360 let idx = (f.base as usize) + (slot_offset as usize);
3361 if self.stack.len() <= idx {
3362 self.stack.resize(idx + 1, Value::Nil);
3363 }
3364 // SAFETY: caller (trace JIT IR emit) provides matching
3365 // `(tag, raw_bits)` — same shape produced by Value::unpack.
3366 let v = unsafe {
3367 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3368 };
3369 self.stack[idx] = v;
3370 }
3371
3372 /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3373 /// Mirrors the interp arm (this file ~L5316): copies the
3374 /// generator/state/control triple from `R[A..=A+2]` to
3375 /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3376 /// the iterator function via `begin_call`. v2 only handles
3377 /// `Value::Native` iterators (the canonical `ipairs_iter` /
3378 /// `next` builtins) — a Lua-closure iterator would push a Lua
3379 /// frame mid-trace, breaking `recording_frame_base`, so we
3380 /// deopt by parking a `pending_err` and returning `-1`.
3381 ///
3382 /// `slot_offset` is the caller-frame register index (=
3383 /// `inst.a()` decoded from a u32-wide field). `nvars` is
3384 /// `inst.c() as i32` — the caller's expected return count.
3385 /// P12-S12-C v1 — refresh only the raw payload of
3386 /// `vm.stack[base + slot_offset]`, preserving its existing
3387 /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3388 /// uses this when the slot's `RegKind` is `Unset` (no compile-
3389 /// time tag info; commonly `Str` slots which the trace doesn't
3390 /// model). The interp's previous execution of the same op
3391 /// already populated the slot with the right tag — the trace
3392 /// only needs to swap in its current raw value.
3393 #[doc(hidden)]
3394 pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3395 let Some(f) = self.jit_last_lua_frame() else {
3396 return;
3397 };
3398 let idx = (f.base as usize) + (slot_offset as usize);
3399 if idx >= self.stack.len() {
3400 return;
3401 }
3402 let (tag, _) = self.stack[idx].unpack();
3403 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3404 self.stack[idx] = unsafe {
3405 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3406 };
3407 }
3408
3409 /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3410 ///
3411 /// Mirrors the interp arm (this file ~L5112): `self.top =
3412 /// base + a + n; concat_run(base + a)`. Result lands at
3413 /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3414 /// deopt (any error from `concat_run` OR detection that the
3415 /// metamethod path was taken — `concat_run` returns `Ok(())`
3416 /// after `begin_meta_call` which has pushed a Lua frame the
3417 /// trace can't safely continue past).
3418 ///
3419 /// The frame-push detection uses `pre/post frames.len()` and
3420 /// unwinds any pushed frames before deopting, so the
3421 /// dispatcher's existing deopt path sees a clean stack.
3422 #[doc(hidden)]
3423 pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3424 if self.jit.pending_err.is_some() {
3425 return -1;
3426 }
3427 let Some(f) = self.jit_last_lua_frame() else {
3428 self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3429 return -1;
3430 };
3431 let abs_a = f.base + slot_offset;
3432 self.top = abs_a + n as u32;
3433 let pre_frames = self.frames.len();
3434 let result = self.concat_run(abs_a);
3435 let post_frames = self.frames.len();
3436 // Frame-push = metamethod path taken (begin_meta_call pushed
3437 // a Lua frame). The trace can't continue past it; unwind +
3438 // deopt so interp redoes Op::Concat in the slow path.
3439 while self.frames.len() > pre_frames {
3440 frames_pop_sync(&mut self.frames, &mut self.frames_top);
3441 }
3442 if let Err(e) = result {
3443 self.jit.pending_err = Some(e);
3444 return -1;
3445 }
3446 if post_frames > pre_frames {
3447 self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3448 return -1;
3449 }
3450 0
3451 }
3452
3453 /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3454 /// accumulator buffer pool, returning a raw pointer. The trace
3455 /// fn's IR holds this pointer in a stack slot through the loop
3456 /// and calls `jit_str_buf_extend` per iter. If the pool is
3457 /// empty, allocate fresh.
3458 ///
3459 /// Safety: the returned pointer is valid until
3460 /// `jit_str_buf_release` is called or the Vm is dropped. The
3461 /// caller MUST not retain it across `enter_jit` boundaries.
3462 #[doc(hidden)]
3463 pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3464 let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3465 // Move into a Box so the pointer is stable until release.
3466 Box::into_raw(Box::new(buf))
3467 }
3468
3469 /// P14-S14-B v2 — return a previously-acquired buffer to the
3470 /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3471 /// buffer is `clear`ed (capacity retained) so the next acquire
3472 /// gets a ready-to-extend Vec.
3473 ///
3474 /// Safety: `buf` must have been returned by a prior
3475 /// `jit_str_buf_acquire` on the same Vm.
3476 #[doc(hidden)]
3477 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3478 pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3479 if buf.is_null() {
3480 return;
3481 }
3482 // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3483 let mut owned = unsafe { Box::from_raw(buf) };
3484 owned.clear();
3485 if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3486 self.jit.str_buf_pool.push(*owned);
3487 }
3488 // Else: drop the buffer.
3489 }
3490
3491 /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3492 /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3493 /// the piece slot) and passes it through; we treat it as a
3494 /// `*mut LuaStr` and append its bytes.
3495 ///
3496 /// Returns 0 on success, -1 if the piece isn't a Str (would
3497 /// trip __concat metamethod path → deopt to interp).
3498 ///
3499 /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3500 /// trace's piece slot raw bits.
3501 #[doc(hidden)]
3502 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3503 pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3504 if buf.is_null() || str_ptr == 0 {
3505 return -1;
3506 }
3507 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3508 let buf = unsafe { &mut *buf };
3509 let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3510 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3511 let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3512 buf.extend_from_slice(bytes);
3513 0
3514 }
3515
3516 /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3517 /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3518 /// the trace to write into the accumulator slot.
3519 ///
3520 /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3521 /// (the v2 hard cap; the trace deopts).
3522 ///
3523 /// Safety: `buf` from prior `acquire`. The buffer is left
3524 /// CLEAR (drained) ready for `release`.
3525 #[doc(hidden)]
3526 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3527 pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3528 if buf.is_null() {
3529 return 0;
3530 }
3531 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3532 let buf = unsafe { &mut *buf };
3533 let bytes = std::mem::take(buf);
3534 // v2 hard cap at 256KB per RFC Q3.
3535 if bytes.len() > 256 * 1024 {
3536 return 0;
3537 }
3538 let gc = self.heap.intern(&bytes);
3539 gc.as_ptr() as i64
3540 }
3541
3542 /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3543 ///
3544 /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3545 /// v3: ipairs `inext` fast path at the top — skip begin_call
3546 /// when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3547 /// R[A+2]=Int.
3548 /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3549 /// caller-provided buffers + return R[A+4]'s tag byte. Lets
3550 /// emit skip 3 separate `luna_jit_stack_load` calls and 1
3551 /// `luna_jit_stack_tag` call by reading the buffer via
3552 /// cranelift `stack_load` IR instead. Returns -1 on deopt.
3553 #[doc(hidden)]
3554 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3555 pub fn jit_op_tforcall(
3556 &mut self,
3557 slot_offset: u32,
3558 nvars: i32,
3559 ctrl_out: *mut i64,
3560 key_out: *mut i64,
3561 val_out: *mut i64,
3562 ) -> i64 {
3563 if self.jit.pending_err.is_some() {
3564 return -1;
3565 }
3566 let Some(f) = self.jit_last_lua_frame() else {
3567 self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3568 return -1;
3569 };
3570 let abs = f.base + slot_offset;
3571 let need = (abs + 7) as usize;
3572 if self.stack.len() < need {
3573 self.stack.resize(need, Value::Nil);
3574 }
3575 // v3 fast path.
3576 let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3577 && std::ptr::fn_addr_eq(
3578 n.f,
3579 crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3580 )
3581 && let Value::Table(t) = self.stack[(abs + 1) as usize]
3582 && t.metatable().is_none()
3583 && let Value::Int(i) = self.stack[(abs + 2) as usize]
3584 {
3585 let next_i = i.wrapping_add(1);
3586 let v = t.get_int(next_i);
3587 if v.is_nil() {
3588 self.stack[(abs + 4) as usize] = Value::Nil;
3589 } else {
3590 self.stack[(abs + 4) as usize] = Value::Int(next_i);
3591 if (nvars as usize) >= 2 {
3592 self.stack[(abs + 5) as usize] = v;
3593 }
3594 for j in 2..nvars as usize {
3595 let slot = abs + 4 + j as u32;
3596 if (slot as usize) < self.stack.len() {
3597 self.stack[slot as usize] = Value::Nil;
3598 }
3599 }
3600 }
3601 true
3602 } else {
3603 false
3604 };
3605 if !took_fast_path {
3606 // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3607 // route through begin_call. Lua-closure iters would push
3608 // a Lua frame mid-trace → deopt.
3609 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3610 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3611 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3612 if !matches!(self.stack[abs as usize], Value::Native(_)) {
3613 self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3614 return -1;
3615 }
3616 if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3617 self.jit.pending_err = Some(e);
3618 return -1;
3619 }
3620 }
3621 // v4 batched writeback — fill the caller's buffers with the
3622 // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3623 // reload via cranelift `stack_load` instead of separate
3624 // `luna_jit_stack_load` helper calls.
3625 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3626 let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3627 let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3628 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3629 let key_raw = unsafe { key_rv.zero };
3630 let val_raw = if (nvars as usize) >= 2 {
3631 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3632 unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3633 } else {
3634 0u64
3635 };
3636 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3637 unsafe {
3638 ctrl_out.write(ctrl_raw as i64);
3639 key_out.write(key_raw as i64);
3640 val_out.write(val_raw as i64);
3641 }
3642 key_tag as i64
3643 }
3644
3645 /// P12-S12-B-v2 — load the raw `i64` payload of
3646 /// `vm.stack[base + slot_offset]` for the active trace's head
3647 /// Lua frame. Used to reload trace IR `Variable`s after a
3648 /// helper has written to `vm.stack` directly (e.g. TForCall's
3649 /// iter results land at `R[A+4..A+4+nvars]`).
3650 #[doc(hidden)]
3651 pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3652 let Some(f) = self.jit_last_lua_frame() else {
3653 return 0;
3654 };
3655 let idx = (f.base as usize) + (slot_offset as usize);
3656 if idx >= self.stack.len() {
3657 return 0;
3658 }
3659 let v = self.stack[idx];
3660 let (_, raw) = v.unpack();
3661 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3662 unsafe { raw.zero as i64 }
3663 }
3664
3665 /// P12-S12-B-v2 — read the tag byte of
3666 /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3667 /// to dispatch on the iterator's return-key tag at runtime
3668 /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3669 /// deopt for v2).
3670 #[doc(hidden)]
3671 pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3672 let Some(f) = self.jit_last_lua_frame() else {
3673 return crate::runtime::value::raw::NIL;
3674 };
3675 let idx = (f.base as usize) + (slot_offset as usize);
3676 if idx >= self.stack.len() {
3677 return crate::runtime::value::raw::NIL;
3678 }
3679 self.stack[idx].unpack().0
3680 }
3681
3682 /// P12-S4-step4b — push a Lua frame onto the call stack with
3683 /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3684 /// at trace side-exits to recreate the inlined call activations
3685 /// the lowerer compiled past. The contract (enforced by the
3686 /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3687 /// `nresults` is the caller's expected count (today always 1
3688 /// because the lowerer bails Op::Call C != 2), and the caller
3689 /// has already called `jit_ensure_stack` to cover
3690 /// `[0..base + cl.proto.max_stack)`.
3691 #[doc(hidden)]
3692 pub fn jit_push_inlined_frame(
3693 &mut self,
3694 cl: Gc<LuaClosure>,
3695 base: u32,
3696 pc: u32,
3697 nresults: i32,
3698 ) {
3699 frames_push_sync(
3700 &mut self.frames,
3701 &mut self.frames_top,
3702 CallFrame::Lua(Frame {
3703 closure: cl,
3704 base,
3705 pc,
3706 // Lua call ABI: callee R[0] sits at caller R[A+1], so
3707 // callee.base = caller.base + A + 1; func_slot is
3708 // caller.base + A = callee.base - 1.
3709 func_slot: base - 1,
3710 n_varargs: 0,
3711 nresults,
3712 hook_oldpc: u32::MAX,
3713 from_c: false,
3714 tm: None,
3715 is_hook: false,
3716 tailcalls: 0,
3717 }),
3718 );
3719 }
3720
3721 /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3722 /// should set to `false` so `load`/`loadstring` reject bytecode input
3723 /// (which bypasses parser limits and could exploit verifier gaps).
3724 pub fn set_bytecode_loading(&mut self, enabled: bool) {
3725 self.bytecode_loading = enabled;
3726 }
3727
3728 /// Current bytecode-loading gate state.
3729 pub fn bytecode_loading(&self) -> bool {
3730 self.bytecode_loading
3731 }
3732
3733 /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3734 /// bytecode is a strictly larger trust surface than luna's own dump
3735 /// format (third-party toolchain bugs, malformed chunks, unknown
3736 /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3737 /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3738 pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3739 self.puc_bytecode_loading = enabled;
3740 }
3741
3742 /// Current PUC bytecode-loading gate state.
3743 pub fn puc_bytecode_loading(&self) -> bool {
3744 self.puc_bytecode_loading
3745 }
3746
3747 /// Default loader input budget — 256 MiB.
3748 ///
3749 /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3750 /// sources whose byte length crosses this cap, returning the
3751 /// PUC-shaped `not enough memory` error rather than letting the
3752 /// host allocator try (and crash) to hold the next chunk.
3753 pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3754
3755 /// Set the loader input byte budget (see
3756 /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3757 /// effectively disable. Smaller caps are honored verbatim — a 0
3758 /// cap rejects every non-empty source.
3759 pub fn set_loader_input_budget(&mut self, bytes: usize) {
3760 self.loader_input_budget = bytes;
3761 }
3762
3763 /// Current loader input byte budget.
3764 pub fn loader_input_budget(&self) -> usize {
3765 self.loader_input_budget
3766 }
3767
3768 /// Take the error traceback captured at the latest error point and
3769 /// reset it. Embedders should call this immediately after a failed
3770 /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3771 /// entry clears it. Returns `None` if no error was in flight.
3772 pub fn take_error_traceback(&mut self) -> Option<String> {
3773 self.error_traceback
3774 .take()
3775 .map(|b| String::from_utf8_lossy(&b).into_owned())
3776 }
3777
3778 /// Arm the soft memory cap (P09 embedding). The run loop checks the
3779 /// heap's tracked byte usage between dispatch turns; on overshoot it
3780 /// first runs a full collect, and if `bytes` still exceeds the cap it
3781 /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3782 /// itself (fire-once: re-arm before the next `call_value` if reusing
3783 /// the Vm across requests). `None` removes the cap. The accounting is
3784 /// approximate — internal Vec/Box capacity overhead is not tracked,
3785 /// so embedders should size the cap with ~2× margin over the desired
3786 /// hard limit and additionally bound the Vm's lifetime (drop after
3787 /// each request).
3788 pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3789 self.heap.mem_cap = cap;
3790 }
3791
3792 /// Approximate bytes the heap is currently holding. Object shells plus
3793 /// every table's internal array/hash boxes (tracked via
3794 /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3795 /// bytecode and closure upvalue slices still go uncounted — this is a
3796 /// lower bound, not a precise `malloc_stats`-style total.
3797 pub fn memory_used(&self) -> usize {
3798 self.heap.bytes()
3799 }
3800
3801 /// Read upvalue slot `i` of the native function currently on top of the
3802 /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3803 /// when no native is running. Public so the C ABI trampoline can fetch
3804 /// the host C function pointer it stashed there at registration time.
3805 pub fn running_native_upvalue(&self, i: usize) -> Value {
3806 match self.running_natives.last() {
3807 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3808 Some(nc) => unsafe {
3809 let upvals = &(*nc.as_ptr()).upvals;
3810 upvals.get(i).copied().unwrap_or(Value::Nil)
3811 },
3812 None => Value::Nil,
3813 }
3814 }
3815
3816 /// Register a table for finalization if its (just-set) metatable carries a
3817 /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3818 /// `__gc` to the metatable afterwards does not retroactively register).
3819 pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3820 if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3821 self.heap.register_finalizable(t);
3822 }
3823 }
3824
3825 /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3826 /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3827 /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3828 /// metatable that `newproxy` returned, which then needs to flow through.
3829 /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3830 /// would re-check at metatable-set time); luna's only userdata
3831 /// finalizables today come via `newproxy`, which registers itself.
3832 #[allow(dead_code)]
3833 pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3834 if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3835 self.heap.register_finalizable_userdata(u);
3836 }
3837 }
3838
3839 /// Run pending `__gc` finalizers (objects the collector resurrected for
3840 /// finalization). Finalizer errors are swallowed — PUC turns them into a
3841 /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3842 fn run_finalizers(&mut self) {
3843 let _ = self.run_finalizers_or_err();
3844 }
3845
3846 fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3847 if self.gc_finalizing {
3848 return Ok(());
3849 }
3850 let pending = self.heap.take_tobefnz();
3851 if pending.is_empty() {
3852 return Ok(());
3853 }
3854 self.gc_finalizing = true;
3855 let mut first_err: Option<LuaError> = None;
3856 for obj in pending {
3857 let gc = self.get_mm(obj, Mm::Gc);
3858 // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3859 // schedule the object for finalization (`__gc = true` is the
3860 // canonical placeholder); only call it at finalize time when it
3861 // is actually a function. gc.lua 5.2 :412 wires up exactly this
3862 // sentinel and then expects no call.
3863 let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3864 if callable {
3865 // PUC `GCTM` sets `CIST_FIN` on the new ci so
3866 // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3867 // `name = "__gc"`. luna threads the same outcome through the
3868 // generic `pending_tm` slot: the Lua frame born from this
3869 // call consumes it in `push_frame`. Saved/restored around the
3870 // call in case the handler is a native (which never pops it).
3871 // Bare event name; `frame_name` / `c_frame_name` add the
3872 // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3873 // the convention used by `__close`, `__index`, …
3874 let saved_tm = self.pending_tm.replace("gc");
3875 // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3876 // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3877 // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3878 // luna mirrors by temporarily tagging the current top Lua
3879 // frame's `tm` to "__gc" for the duration of the call.
3880 let caller_tm_idx = self
3881 .frames
3882 .iter()
3883 .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3884 let saved_caller_tm = caller_tm_idx.and_then(|i| {
3885 if let CallFrame::Lua(fr) = &mut self.frames[i] {
3886 let prev = fr.tm;
3887 fr.tm = Some("gc");
3888 Some(prev)
3889 } else {
3890 None
3891 }
3892 });
3893 if let Err(e) = self.call_value(gc, &[obj]) {
3894 // PUC 5.1 GCTM raised the finalizer's error to the
3895 // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3896 // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3897 // wrapped it in `error in __gc metamethod (msg)` first
3898 // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3899 // introduced the warning system and switched to "warn
3900 // then continue" — never re-raise, just route the
3901 // wrapped message through `warn`. gc.lua 5.5 :378 wires
3902 // up `_WARN` capture under the `if T then …` block to
3903 // baseline on the same wrapped string.
3904 if self.version >= LuaVersion::Lua54 {
3905 let inner = self.error_text(&e);
3906 let msg = format!("error in __gc metamethod ({inner})");
3907 self.emit_warn(msg.as_bytes(), false);
3908 } else if first_err.is_none() {
3909 let wrapped = if self.version >= LuaVersion::Lua52 {
3910 let inner = self.error_text(&e);
3911 let msg = format!("error in __gc metamethod ({inner})");
3912 let s = Value::Str(self.heap.intern(msg.as_bytes()));
3913 LuaError(s)
3914 } else {
3915 e
3916 };
3917 first_err = Some(wrapped);
3918 }
3919 }
3920 self.pending_tm = saved_tm;
3921 if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3922 && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3923 {
3924 fr.tm = prev; // prev is Option<&'static str>; restore exactly
3925 }
3926 }
3927 }
3928 self.gc_finalizing = false;
3929 match first_err {
3930 Some(e) => Err(e),
3931 None => Ok(()),
3932 }
3933 }
3934
3935 /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3936 /// Crosses up to three phases per call:
3937 /// 1. Pause → seed Propagate (`gc_start_propagate`)
3938 /// 2. Propagate → drain gray up to `budget`; on exhaustion run atomic
3939 /// (`gc_finish_atomic` → tobefnz populated; finalizers
3940 /// run via `run_finalizers`) and enter Sweep
3941 /// 3. Sweep → `gc_sweep_step` up to (residual) `budget`
3942 /// Returns true when this call completed the cycle's sweep (back to
3943 /// Pause). The budget is spent generously across phases — a large `n`
3944 /// can finish a whole cycle in one call (PUC stop-the-world step).
3945 pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3946 // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3947 // that may hit a safe point and try to step again. Re-entry was OK
3948 // under STW (collect_garbage had its own guard) but here the
3949 // intermediate phase state would corrupt.
3950 if self.gc_finalizing {
3951 return false;
3952 }
3953 if self.heap.gc_phase_is_pause() {
3954 let (roots, extra) = self.gc_roots();
3955 self.heap.gc_start_propagate(&roots, &extra);
3956 }
3957 if self.heap.gc_phase_is_propagate() {
3958 if !self.heap.gc_step_propagate(budget) {
3959 return false;
3960 }
3961 self.heap.gc_finish_atomic();
3962 // any __gc scheduled by atomic — run before sweep so a finalizer
3963 // re-registering `self` re-enters the next cycle, not this sweep
3964 self.run_finalizers();
3965 }
3966 // either we just transitioned, or we entered already in Sweep, or
3967 // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3968 self.heap.gc_sweep_step(budget)
3969 }
3970
3971 // ---- frames & calls ----
3972
3973 /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3974 /// Returns true if a Lua frame was pushed (the dispatch loop continues
3975 /// there), false if a native completed inline.
3976 fn begin_call(
3977 &mut self,
3978 func_slot: u32,
3979 nargs: Option<u32>,
3980 nresults: i32,
3981 from_c: bool,
3982 ) -> Result<bool, LuaError> {
3983 let mut nargs = match nargs {
3984 Some(n) => n,
3985 None => self.top - (func_slot + 1),
3986 };
3987 // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3988 // only for the immediately-following Lua activation. Native dispatch
3989 // (or `__call` resolution) below must not let it leak to the next
3990 // begin_call's frame; restore it just before push_frame for the Lua
3991 // arm so its meaning is preserved across __call chaining.
3992 let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3993 // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3994 // is inserted before the value so it becomes the first argument, and a
3995 // chain of `__call` tables resolves down to a real function.
3996 let mut chain = 0u32;
3997 loop {
3998 match self.stack[func_slot as usize] {
3999 Value::Closure(cl) => {
4000 // P11-S2c.B JIT fast path: if the Proto's body fits
4001 // the int-arith whitelist, every arg is `Value::Int`,
4002 // and the cached arity matches, skip frame setup and
4003 // run the cached native fn in-place.
4004 if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
4005 self.pending_tailcalls = tailcalls;
4006 return Ok(false);
4007 }
4008 self.pending_tailcalls = tailcalls;
4009 self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4010 // P12-S4-step0 — trace-on-call trigger. The frame
4011 // we just pushed is the callee whose body the
4012 // recorder will trace. Bump the per-Proto call
4013 // counter; once it crosses `CALL_HOT_THRESHOLD`
4014 // and no other trace is in flight, snapshot the
4015 // callee's register window (R[0..max_stack]) and
4016 // begin recording at `pc=0`. This is what unlocks
4017 // tracing for functions whose body has no negative
4018 // `Op::Jmp` back-edge (`fib`, recursive helpers).
4019 //
4020 // Gated on `trace_jit_enabled`, so the default
4021 // dispatch pays a single not-taken branch.
4022 if self.jit.trace_enabled {
4023 let proto = cl.proto;
4024 let c = proto.call_hot_count.get();
4025 if c < u32::MAX / 2 {
4026 proto.call_hot_count.set(c + 1);
4027 }
4028 // P13-S13-H — relaxed call-trigger:
4029 // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4030 // `!already_cached` short-circuit. Lets a
4031 // discarded short call-trigger close retry
4032 // on the next call (fib(10/15/20/25)
4033 // pathology — first capture is base-case
4034 // [Lt,Jmp,Return1]; coverage-heuristic
4035 // discards; next call gets to record at a
4036 // potentially deeper recursion point).
4037 // Without `already_cached`, the relaxed
4038 // condition would re-record over a cached
4039 // trace every call.
4040 //
4041 // P13-S13-K — additionally short-circuit on
4042 // `proto.trace_gave_up`. The S13-I discard
4043 // cap force-compiles a partial trace and
4044 // flips this flag; subsequent calls into
4045 // this Proto skip the RefCell borrow + Vec
4046 // scan entirely.
4047 if proto.trace_gave_up.get() {
4048 return Ok(true);
4049 }
4050 let call_already_cached =
4051 proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4052 if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4053 && self.jit.active_trace.is_none()
4054 && !call_already_cached
4055 {
4056 // The new frame is on top: index in
4057 // `self.frames` is `len() - 1`.
4058 let frame_idx = self.frames.len() - 1;
4059 // Snapshot R[0..max_stack] at the callee's
4060 // base. `push_frame` resized `self.stack`
4061 // to `base + max_stack`, so this window is
4062 // guaranteed in-bounds.
4063 let f = match &self.frames[frame_idx] {
4064 CallFrame::Lua(f) => f,
4065 _ => unreachable!("push_frame just pushed a Lua frame"),
4066 };
4067 let max_stack = cl.proto.max_stack as usize;
4068 let base_us = f.base as usize;
4069 let mut entry_tags = Vec::with_capacity(max_stack);
4070 for i in 0..max_stack {
4071 let (tag, _) = self.stack[base_us + i].unpack();
4072 entry_tags.push(tag);
4073 }
4074 self.jit.active_trace =
4075 Some(Box::new(crate::jit::trace::TraceRecord::start(
4076 cl.proto, 0, entry_tags, true,
4077 )));
4078 self.jit.recording_frame_base = frame_idx;
4079 }
4080 }
4081 return Ok(true);
4082 }
4083 Value::Native(nc) => {
4084 // v1.1 B10 Stage 2 — async-marked NativeClosure.
4085 // Route through the cooperative-yield mechanism
4086 // when async_mode is on; reject when called from
4087 // a sync `eval`/`call_value` path (would have no
4088 // executor to drive the returned future).
4089 if nc.is_async {
4090 if !self.async_mode {
4091 let s = Value::Str(
4092 self.heap.intern(b"async native called in sync context"),
4093 );
4094 self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4095 return Err(LuaError(s));
4096 }
4097 // Same root-up bookkeeping as the sync path:
4098 // pin args + result-count expectation so a
4099 // collection across the suspend boundary
4100 // keeps the arg window live.
4101 self.native_nresults = nresults;
4102 self.gc_top = func_slot + nargs + 1;
4103 // v1.3 Phase AS — fire the "call" hook BEFORE
4104 // building the future. Mirrors the sync native
4105 // path's `hook_call(true, nargs)` site
4106 // (`exec.rs` further down) so embedders with a
4107 // Rust debug hook installed see a Call event
4108 // for async natives identical to the sync
4109 // path. The matching "return" hook fires from
4110 // `commit_async_native_result` in
4111 // `async_drive.rs` after the future resolves.
4112 // Placement follows audit §"Open questions"
4113 // Q6: after the `native_nresults` / `gc_top`
4114 // pin, before the future is constructed, so a
4115 // hook body that triggers GC observes the
4116 // correct pinned window. On hook error the
4117 // sentinel never returns and
4118 // `pending_async_native_*` remain `None` —
4119 // the executor sees `DispatchOutcome::Error`
4120 // (audit §A.1 edge cases).
4121 self.hook_call(true, nargs)?;
4122 // Transmute the stored NativeFn back to its
4123 // real AsyncNativeFn shape. Sound because
4124 // `set_async_native` / `create_async_native`
4125 // installed an AsyncNativeFn through the
4126 // identically-sized fn-pointer slot, and the
4127 // `is_async` marker bit is what records that
4128 // fact.
4129 let async_fn: crate::vm::async_drive::AsyncNativeFn =
4130 // SAFETY: same-size fn pointers; provenance
4131 // preserved through `mem::transmute`. The
4132 // `is_async` marker is the only safe-to-call
4133 // gate, set exclusively by
4134 // `Vm::create_async_native`.
4135 unsafe { std::mem::transmute(nc.f) };
4136 let vm_ptr: *mut Vm = self;
4137 let fut = async_fn(vm_ptr, func_slot, nargs);
4138 // Stash the future + post-call context for
4139 // `drive_one` to surface to `EvalFuture::poll`.
4140 self.pending_async_native_fut = Some(fut);
4141 self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4142 func_slot,
4143 nargs,
4144 nresults,
4145 gc_top: self.gc_top,
4146 });
4147 // Sentinel Err walked up to `drive_one` (same
4148 // shape as `host_yield_pending`'s budget yield).
4149 // Value::Nil — never seen by user code.
4150 return Err(LuaError(Value::Nil));
4151 }
4152 // pcall/xpcall are yieldable: rather than calling the
4153 // protected function through the Rust stack (which cannot be
4154 // suspended), push a continuation frame and drive the call
4155 // through the interpreter loop (PUC lua_pcallk). A yield
4156 // inside it is preserved with the thread's saved frames.
4157 use crate::runtime::value::NativeFn;
4158 if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4159 return self.begin_pcall(func_slot, nargs, nresults);
4160 }
4161 if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4162 return self.begin_xpcall(func_slot, nargs, nresults);
4163 }
4164 // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4165 // luaB_pairs); without one, fall through to the plain native.
4166 if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4167 let arg = self.stack[(func_slot + 1) as usize];
4168 if !self.get_mm(arg, Mm::Pairs).is_nil() {
4169 return self.begin_pairs(func_slot, nresults);
4170 }
4171 }
4172 // a native that collects (e.g. `collectgarbage`) roots up to
4173 // its own arguments — the caller's live registers all sit
4174 // below `func_slot` and stay rooted.
4175 self.native_nresults = nresults;
4176 self.gc_top = func_slot + nargs + 1;
4177 // Push the native onto the running-natives chain BEFORE
4178 // firing the call hook so that `debug.getinfo(level)` and
4179 // `arg_error` from inside the hook see this native as the
4180 // currently-running C function (db.lua :344 reads
4181 // `getinfo(2, "f").func` for the just-entered callee).
4182 // Popped after the matching return hook fires — even on
4183 // error, the pop must happen, so the body is bracketed
4184 // through a scope guard.
4185 self.running_natives.push(nc);
4186 self.running_native_slots.push((func_slot, nargs));
4187 // PUC luaD_precall fires the "call" hook for C functions too.
4188 // A yield inside the native (coroutine.yield) propagates an
4189 // Err and the matching "return" hook fires on resume instead.
4190 if let Err(e) = self.hook_call(true, nargs) {
4191 self.running_natives.pop();
4192 self.running_native_slots.pop();
4193 return Err(e);
4194 }
4195 // P09: trap a Rust panic in the native and surface it as
4196 // a Lua error rather than letting it unwind through the
4197 // VM into the embedder. The VM's internal state may still
4198 // be inconsistent after a panic (half-pushed args,
4199 // dangling GC references), so embedders that catch this
4200 // class of error should drop and re-create the Vm — but
4201 // it's still better than tearing the host process down.
4202 // `AssertUnwindSafe` is sound because the caller is the
4203 // dispatch loop and any half-done state is fenced behind
4204 // the immediate Err return below.
4205 use std::panic::{AssertUnwindSafe, catch_unwind};
4206 let result =
4207 match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4208 Ok(r) => r,
4209 Err(payload) => {
4210 let msg = panic_payload_str(&payload);
4211 let s = Value::Str(
4212 self.heap.intern(format!("native panic: {msg}").as_bytes()),
4213 );
4214 Err(LuaError(s))
4215 }
4216 };
4217 let nret = match result {
4218 Ok(n) => n,
4219 Err(e) => {
4220 // Stash the offending native's name BEFORE the
4221 // pop so a dying coroutine's traceback snapshot
4222 // can prepend `[C]: in function '<name>'`. Use
4223 // pushglobalfuncname (PUC walks package.loaded
4224 // to qualify); fall back to "?".
4225 self.errored_native =
4226 Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4227 self.running_natives.pop();
4228 self.running_native_slots.pop();
4229 return Err(e);
4230 }
4231 };
4232 // PUC `luaD_poscall` fires the return hook BEFORE moving
4233 // results into the function's slot — at that point args
4234 // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4235 // results above them at `[func_slot + 1 + nargs, …)`.
4236 // luna's `nat_return` has already written the results
4237 // into `[func_slot, func_slot + nret)`, so we replay PUC's
4238 // layout by copying the results up past the preserved
4239 // args, firing the hook (with ftransfer = nargs + 1, so
4240 // `getlocal(2, ftransfer..)` reads results), and then
4241 // copying back for `finish_results`. db.lua :541 reads
4242 // `getinfo("r").ftransfer` + `getlocal` to inspect a
4243 // returning native's results this way.
4244 if self.hook.ret
4245 && !self.in_hook
4246 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4247 {
4248 let res_dst = func_slot + nargs + 1;
4249 let need = (res_dst + nret) as usize;
4250 if self.stack.len() < need {
4251 self.stack.resize(need, Value::Nil);
4252 }
4253 for i in (0..nret).rev() {
4254 self.stack[(res_dst + i) as usize] =
4255 self.stack[(func_slot + i) as usize];
4256 }
4257 // widen the C-frame's argument window for getlocal
4258 if let Some(slot) = self.running_native_slots.last_mut() {
4259 slot.1 = nargs + nret;
4260 }
4261 let hr = self.hook_return(true, nargs + 1, nret);
4262 if let Some(slot) = self.running_native_slots.last_mut() {
4263 slot.1 = nargs;
4264 }
4265 // restore results into the slot finish_results expects
4266 for i in 0..nret {
4267 self.stack[(func_slot + i) as usize] =
4268 self.stack[(res_dst + i) as usize];
4269 }
4270 self.running_natives.pop();
4271 self.running_native_slots.pop();
4272 hr?;
4273 } else {
4274 self.running_natives.pop();
4275 self.running_native_slots.pop();
4276 }
4277 self.finish_results(func_slot, nret, nresults);
4278 // the native may have allocated; collect with the results as
4279 // the live boundary (PUC checks GC after a call returns).
4280 self.maybe_collect_garbage(self.top);
4281 return Ok(false);
4282 }
4283 v => {
4284 let mm = self.get_mm(v, Mm::Call);
4285 if mm.is_nil() {
4286 return Err(self.call_err(v));
4287 }
4288 chain += 1;
4289 // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4290 // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4291 // and the 5.5 test exercises the new tight bound directly
4292 // (calls.lua :225 builds a 16-deep chain and expects the
4293 // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4294 // deep chain and expects it to succeed.
4295 let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4296 15
4297 } else {
4298 MAX_CCMT
4299 };
4300 if chain > cap {
4301 return Err(self.rt_err("'__call' chain too long"));
4302 }
4303 // slots above shift by one; at a call site those are dead
4304 // temps of the current frame
4305 self.stack.insert(func_slot as usize, mm);
4306 if self.top > func_slot {
4307 self.top += 1;
4308 }
4309 nargs += 1;
4310 }
4311 }
4312 }
4313 }
4314
4315 fn push_frame(
4316 &mut self,
4317 cl: Gc<LuaClosure>,
4318 func_slot: u32,
4319 nargs: u32,
4320 nresults: i32,
4321 from_c: bool,
4322 ) -> Result<(), LuaError> {
4323 if func_slot + 256 > MAX_LUA_STACK {
4324 // PUC `stackerror`: a stack overflow that surfaces while the
4325 // current activation is inside an xpcall message handler is
4326 // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4327 // error handling". errors.lua :606 expects the inner pcall(loop)
4328 // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4329 // message matching "error handling".
4330 let msg = if self.msgh_depth > 0 {
4331 "error in error handling"
4332 } else {
4333 "stack overflow"
4334 };
4335 return Err(self.rt_err(msg));
4336 }
4337 let proto = cl.proto;
4338 let nparams = proto.num_params as u32;
4339 // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4340 // stack just below the new `base`, so a named vararg can be indexed
4341 // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4342 // `[e1..em][p1..pn]` so the fixed params land at the new base.
4343 let n_varargs = if proto.is_vararg {
4344 nargs.saturating_sub(nparams)
4345 } else {
4346 0
4347 };
4348 if n_varargs > 0 {
4349 let s = (func_slot + 1) as usize;
4350 self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4351 }
4352 let base = func_slot + 1 + n_varargs;
4353 let need = (base + proto.max_stack as u32) as usize;
4354 if self.stack.len() < need {
4355 self.stack.resize(need, Value::Nil);
4356 }
4357 // wipe the register window beyond the kept parameters (stale values —
4358 // required for GC-safety and codegen). The varargs below `base` survive.
4359 let kept = nargs.saturating_sub(n_varargs).min(nparams);
4360 // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4361 // need` since `base + nparams <= base + max_stack = need` and `kept <=
4362 // nparams`. `slice::fill` lowers to a single memset on Copy types.
4363 unsafe {
4364 self.stack
4365 .get_unchecked_mut((base + kept) as usize..need)
4366 .fill(Value::Nil);
4367 }
4368 frames_push_sync(
4369 &mut self.frames,
4370 &mut self.frames_top,
4371 CallFrame::Lua(Frame {
4372 closure: cl,
4373 base,
4374 pc: 0,
4375 func_slot,
4376 nresults,
4377 hook_oldpc: u32::MAX,
4378 from_c,
4379 n_varargs,
4380 // single-shot consume: `close_slots` sets pending_tm before each
4381 // handler call; the next Lua frame born is that handler's.
4382 tm: self.pending_tm.take(),
4383 // `run_hook` sets `pending_is_hook` before dispatching the user
4384 // hook so its frame reports `namewhat = "hook"` via getinfo.
4385 is_hook: std::mem::take(&mut self.pending_is_hook),
4386 tailcalls: std::mem::take(&mut self.pending_tailcalls),
4387 }),
4388 );
4389 // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4390 // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4391 // the slot at `base + nparams`; the extras sit just below `base` from
4392 // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4393 // hook; vararg.lua's contradictory expectations were already going to
4394 // fail either way (some asserts want `arg == nil`).
4395 if proto.has_compat_vararg_arg {
4396 let arg_slot = (base + nparams) as usize;
4397 let t = self.heap.new_table();
4398 {
4399 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4400 let tm = unsafe { t.as_mut() };
4401 for i in 0..n_varargs {
4402 let v = self.stack[(base - n_varargs + i) as usize];
4403 // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4404 // below `MAX_ASIZE`
4405 let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4406 }
4407 let nk = Value::Str(self.heap.intern(b"n"));
4408 tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4409 .expect("'n' key");
4410 }
4411 // once-per-table barrier mirrors SETLIST: t is born BLACK during
4412 // Propagate and the bulk `set_int`/`set` calls above don't barrier
4413 self.heap
4414 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4415 self.stack[arg_slot] = Value::Table(t);
4416 }
4417 // PUC luaD_precall fires the "call" hook with the new frame current, so
4418 // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4419 // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4420 // fixed params count — extras already live below `base`).
4421 // A frame born via OP_TailCall fires "tail call" instead (PUC
4422 // luaD_pretailcall) and skips the matching "return" hook on exit.
4423 let is_tail = self
4424 .frames
4425 .last()
4426 .and_then(|f| f.lua())
4427 .is_some_and(|f| f.tailcalls > 0);
4428 self.hook_call_with(false, nparams, is_tail)?;
4429 Ok(())
4430 }
4431
4432 /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4433 /// the protected call `f` through the interpreter loop. The protected
4434 /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4435 /// at `func_slot+1` lets its results land one slot above the continuation —
4436 /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4437 /// Always returns `Ok(true)`: a continuation is now on the stack to be
4438 /// resolved by the loop (even when `f` is a native that already ran inline).
4439 fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4440 if nargs == 0 {
4441 return Err(crate::vm::builtins::raise_str(
4442 self,
4443 "bad argument #1 to 'pcall' (value expected)",
4444 ));
4445 }
4446 if self.pcall_depth >= MAX_C_DEPTH {
4447 return Err(self.rt_err("C stack overflow"));
4448 }
4449 self.pcall_depth += 1;
4450 frames_push_sync(
4451 &mut self.frames,
4452 &mut self.frames_top,
4453 CallFrame::Cont(NativeCont {
4454 kind: ContKind::Pcall,
4455 func_slot,
4456 nresults,
4457 }),
4458 );
4459 // call f (slot func_slot+1) with the remaining args, asking for all
4460 // results; a yield or error inside propagates with the continuation kept
4461 // on the stack (caught by `unwind` / preserved across a yield).
4462 self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4463 Ok(true)
4464 }
4465
4466 /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4467 /// message handler is stashed in the continuation and the arguments are
4468 /// shifted down over the handler's slot so `f`'s args are contiguous.
4469 fn begin_xpcall(
4470 &mut self,
4471 func_slot: u32,
4472 nargs: u32,
4473 nresults: i32,
4474 ) -> Result<bool, LuaError> {
4475 if nargs < 2 {
4476 return Err(crate::vm::builtins::raise_str(
4477 self,
4478 "bad argument #2 to 'xpcall' (value expected)",
4479 ));
4480 }
4481 if self.pcall_depth >= MAX_C_DEPTH {
4482 return Err(self.rt_err("C stack overflow"));
4483 }
4484 self.pcall_depth += 1;
4485 // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4486 // close its gap so f's args become [f@+1, a1@+2, ...].
4487 let handler = self.stack[(func_slot + 2) as usize];
4488 let nfargs = nargs - 2;
4489 for i in 0..nfargs {
4490 self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4491 }
4492 self.top = func_slot + 2 + nfargs;
4493 frames_push_sync(
4494 &mut self.frames,
4495 &mut self.frames_top,
4496 CallFrame::Cont(NativeCont {
4497 kind: ContKind::Xpcall { handler },
4498 func_slot,
4499 nresults,
4500 }),
4501 );
4502 self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4503 Ok(true)
4504 }
4505
4506 /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4507 /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4508 /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4509 /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4510 /// results land exactly where `pairs`'s results belong.
4511 fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4512 let arg = self.stack[(func_slot + 1) as usize];
4513 let mm = self.get_mm(arg, Mm::Pairs);
4514 // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4515 self.stack[func_slot as usize] = mm;
4516 self.top = func_slot + 2;
4517 frames_push_sync(
4518 &mut self.frames,
4519 &mut self.frames_top,
4520 CallFrame::Cont(NativeCont {
4521 kind: ContKind::Pairs,
4522 func_slot,
4523 nresults,
4524 }),
4525 );
4526 self.begin_call(func_slot, Some(1), 4, true)?;
4527 Ok(true)
4528 }
4529
4530 /// The running (top) Lua frame. The interpreter only reads this while a Lua
4531 /// frame is on top — a continuation frame is never the running frame (it is
4532 /// consumed the instant the call it protects unwinds onto it).
4533 #[inline]
4534 fn top_frame(&self) -> &Frame {
4535 self.frames
4536 .last()
4537 .and_then(CallFrame::lua)
4538 .expect("running Lua frame")
4539 }
4540
4541 #[inline]
4542 fn top_frame_mut(&mut self) -> &mut Frame {
4543 self.frames
4544 .last_mut()
4545 .and_then(CallFrame::lua_mut)
4546 .expect("running Lua frame")
4547 }
4548
4549 /// Pad/announce results sitting at func_slot.
4550 pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4551 // v2.3 P1B-A: capture the call's high-water-mark before
4552 // setting the new top so we can Nil-clear slots that the
4553 // call temporarily wrote but no longer holds — matching
4554 // PUC's `L->top` discipline (slots past L->top are "free"
4555 // and the next push overwrites them). Without this clear,
4556 // a stale `Value::Closure` (e.g. the called function
4557 // itself, when wanted = 0) sits at `func_slot` and a
4558 // later GC with wider `gc_top` traces it after the
4559 // closure has been freed by a previous narrow safe-point
4560 // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4561 // sort.lua AA case).
4562 let prev_top = self.top as usize;
4563 if wanted < 0 {
4564 self.top = func_slot + nret;
4565 } else {
4566 let wanted = wanted as u32;
4567 let need = (func_slot + wanted) as usize;
4568 if self.stack.len() < need {
4569 self.stack.resize(need, Value::Nil);
4570 }
4571 for i in nret..wanted {
4572 self.stack[(func_slot + i) as usize] = Value::Nil;
4573 }
4574 self.top = func_slot + wanted;
4575 }
4576 let new_top = self.top as usize;
4577 let clear_end = prev_top.min(self.stack.len());
4578 if new_top < clear_end {
4579 for slot in &mut self.stack[new_top..clear_end] {
4580 *slot = Value::Nil;
4581 }
4582 }
4583 }
4584
4585 /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4586 /// Used by `EvalFuture` on the bootstrap poll to compute the
4587 /// `entry_depth` it will pass to subsequent resume slices.
4588 pub(crate) fn frame_count(&self) -> usize {
4589 self.frames.len()
4590 }
4591
4592 fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4593 let nret = self.top - func_slot;
4594 let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4595 self.stack.truncate(func_slot as usize);
4596 self.top = func_slot;
4597 out
4598 }
4599
4600 // ---- open upvalues ----
4601
4602 #[doc(hidden)]
4603 pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4604 match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4605 Ok(i) => self.open_upvals[i].1,
4606 Err(i) => {
4607 let uv = self.heap.new_upvalue(UpvalState::Open {
4608 slot,
4609 thread: self.current,
4610 });
4611 self.open_upvals.insert(i, (slot, uv));
4612 uv
4613 }
4614 }
4615 }
4616
4617 pub(crate) fn close_from(&mut self, slot: u32) {
4618 while let Some(&(s, uv)) = self.open_upvals.last() {
4619 if s < slot {
4620 break;
4621 }
4622 let v = self.stack[s as usize];
4623 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4624 unsafe { uv.as_mut() }.set_closed(v);
4625 self.heap
4626 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4627 self.open_upvals.pop();
4628 }
4629 }
4630
4631 /// Register a to-be-closed slot (TBC op / generic-for closing value).
4632 fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4633 let v = self.stack[slot as usize];
4634 if matches!(v, Value::Nil | Value::Bool(false)) {
4635 return Ok(()); // nil and false are silently ignored
4636 }
4637 if self.get_mm(v, Mm::Close).is_nil() {
4638 // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4639 // (a <type> value)"; the local's name comes from the running
4640 // frame's locvars at this pc.
4641 let tn = v.type_name();
4642 let f = self.top_frame();
4643 let reg = slot - f.base;
4644 let pc = (f.pc as usize).saturating_sub(1);
4645 let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4646 Some(n) => format!("variable '{n}'"),
4647 None => "to-be-closed slot".to_string(),
4648 };
4649 return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4650 }
4651 debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4652 self.tbc.push(slot);
4653 Ok(())
4654 }
4655
4656 /// Close upvalues and run `__close` handlers for slots ≥ `from`
4657 /// (handlers in reverse registration order; PUC luaF_close).
4658 fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4659 self.close_from(from);
4660 // PUC: handlers run in reverse declaration order; an error raised by a
4661 // handler becomes the error object passed to the remaining ones, and
4662 // the rest are still closed. The last raised error propagates.
4663 let mut pending = err;
4664 let mut result = Ok(());
4665 let saved_err = self.closing_err;
4666 // On a normal close the handler runs within the closing function's
4667 // activation (debug parent = that function); during error unwinding the
4668 // function's frame is already gone, so the handler sits at the C
4669 // boundary instead (PUC: luaF_close runs after the ci is restored).
4670 let error_close = err.is_some();
4671 while let Some(&s) = self.tbc.last() {
4672 if s < from {
4673 break;
4674 }
4675 self.tbc.pop();
4676 let v = self.stack[s as usize];
4677 if matches!(v, Value::Nil | Value::Bool(false)) {
4678 continue;
4679 }
4680 let mm = self.get_mm(v, Mm::Close);
4681 if mm.is_nil() {
4682 // PUC `prepclosingmethod`: the __close metamethod was present
4683 // at OP_TBC (else we would have errored there) but has since
4684 // been removed/replaced. Treat as a non-callable target.
4685 let tn = self.obj_typename(v);
4686 let e = self.rt_err(&format!(
4687 "attempt to call a {tn} value (metamethod 'close')"
4688 ));
4689 pending = Some(e.0);
4690 result = Err(e);
4691 continue;
4692 }
4693 // root the pending error: a handler may trigger a collection
4694 self.closing_err = pending;
4695 // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4696 // getinfo report the handler as "in metamethod 'close'". Saved/
4697 // restored around the call to cover the path where `mm` is a
4698 // native (`push_frame` never consumes it) or it raises before
4699 // reaching push_frame.
4700 let saved_tm = self.pending_tm.replace("close");
4701 // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4702 // is nil on a normal close (5.4 locals.lua :875's
4703 // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4704 // through the yield). PUC 5.5 dropped the trailing nil: a clean
4705 // close passes only `obj`, the error case still passes both
4706 // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4707 // normal-close arms, n=2 for the error arm).
4708 let call = match pending {
4709 Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4710 None => {
4711 if self.version >= LuaVersion::Lua55 {
4712 self.call_value_impl(mm, &[v], error_close)
4713 } else {
4714 self.call_value_impl(mm, &[v, Value::Nil], error_close)
4715 }
4716 }
4717 };
4718 self.pending_tm = saved_tm;
4719 if let Err(e) = call {
4720 pending = Some(e.0);
4721 result = Err(e);
4722 }
4723 }
4724 self.closing_err = saved_err;
4725 result
4726 }
4727
4728 /// Yieldable variant of `close_slots`: drive the chain of `__close`
4729 /// handlers for slots ≥ `from` through the interpreter loop with a
4730 /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4731 /// suspends cleanly (the close iteration's state rides on the thread's
4732 /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4733 /// applied to `luaF_close`. `after` runs when every slot is closed; if
4734 /// `after` is `Return` and we've returned past `entry_depth`,
4735 /// `Ok(Some(vals))` carries the result up to the host caller.
4736 fn begin_close(
4737 &mut self,
4738 from: u32,
4739 err: Option<Value>,
4740 after: AfterClose,
4741 entry_depth: usize,
4742 ) -> Result<Option<Vec<Value>>, LuaError> {
4743 self.close_from(from);
4744 self.drive_close(from, err, after, entry_depth)
4745 }
4746
4747 /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4748 /// non-callable-mm error for an `__close` that was reset to a bad value
4749 /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4750 /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4751 /// the interpreter then drives the handler and re-enters this driver via
4752 /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4753 /// the threaded error (if any) propagates or `after` fires.
4754 fn drive_close(
4755 &mut self,
4756 from: u32,
4757 mut pending: Option<Value>,
4758 after: AfterClose,
4759 entry_depth: usize,
4760 ) -> Result<Option<Vec<Value>>, LuaError> {
4761 loop {
4762 let drained = match self.tbc.last() {
4763 None => true,
4764 Some(&s) => s < from,
4765 };
4766 if drained {
4767 return self.finish_close_after(after, pending, entry_depth);
4768 }
4769 let s = self.tbc.pop().expect("tbc non-empty");
4770 let v = self.stack[s as usize];
4771 if matches!(v, Value::Nil | Value::Bool(false)) {
4772 continue;
4773 }
4774 let mm = self.get_mm(v, Mm::Close);
4775 if mm.is_nil() {
4776 let tn = self.obj_typename(v);
4777 let e = self.rt_err(&format!(
4778 "attempt to call a {tn} value (metamethod 'close')"
4779 ));
4780 pending = Some(e.0);
4781 continue;
4782 }
4783 // A real handler: stage [mm, v, (err?)] above the current top,
4784 // record the close iteration state in a Cont::Close, and let the
4785 // interpreter dispatch the handler. On return the run() head
4786 // re-enters this driver via the Cont::Close consumer.
4787 let func_slot = self.top;
4788 let error_close = pending.is_some();
4789 let need = (func_slot + 3) as usize;
4790 if self.stack.len() < need {
4791 self.stack.resize(need, Value::Nil);
4792 }
4793 self.stack[func_slot as usize] = mm;
4794 self.stack[func_slot as usize + 1] = v;
4795 // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4796 // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4797 let nargs = match pending {
4798 Some(e) => {
4799 self.stack[func_slot as usize + 2] = e;
4800 2u32
4801 }
4802 None => {
4803 if self.version >= LuaVersion::Lua55 {
4804 1u32
4805 } else {
4806 self.stack[func_slot as usize + 2] = Value::Nil;
4807 2u32
4808 }
4809 }
4810 };
4811 self.top = func_slot + 1 + nargs;
4812 // Root the pending error during the call (a handler may collect).
4813 let saved_err = self.closing_err;
4814 self.closing_err = pending;
4815 // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4816 // for traceback / getinfo.
4817 let saved_tm = self.pending_tm.replace("close");
4818 frames_push_sync(
4819 &mut self.frames,
4820 &mut self.frames_top,
4821 CallFrame::Cont(NativeCont {
4822 kind: ContKind::Close(CloseCont {
4823 from,
4824 pending,
4825 after,
4826 }),
4827 func_slot,
4828 nresults: 0,
4829 }),
4830 );
4831 // PUC luaF_close runs a normal close *within* the closing
4832 // function's activation (debug parent = that function); during an
4833 // error unwind the function's frame is already gone and the
4834 // handler sits at the C boundary instead.
4835 let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4836 self.pending_tm = saved_tm;
4837 self.closing_err = saved_err;
4838 r?;
4839 return Ok(None);
4840 }
4841 }
4842
4843 /// Fire `after` once every `__close` handler has run. `Block` propagates
4844 /// any remaining error or simply continues; `Return` performs OP_Return's
4845 /// tail (hook + frame pop + result delivery) and may surface results to
4846 /// the host when the function whose return triggered the close was the
4847 /// entry activation, but only on a clean drain — a pending error skips
4848 /// the return tail and propagates instead. `ResumeUnwind` pops the
4849 /// deferred Lua frame and re-raises, letting a handler's own error win
4850 /// over the original propagating one (PUC luaF_close).
4851 fn finish_close_after(
4852 &mut self,
4853 after: AfterClose,
4854 pending: Option<Value>,
4855 entry_depth: usize,
4856 ) -> Result<Option<Vec<Value>>, LuaError> {
4857 match after {
4858 AfterClose::Block => match pending {
4859 Some(e) => Err(LuaError(e)),
4860 None => Ok(None),
4861 },
4862 AfterClose::Return {
4863 abs_a,
4864 nret,
4865 from_native,
4866 } => match pending {
4867 Some(e) => Err(LuaError(e)),
4868 None => self.complete_return(abs_a, nret, from_native, entry_depth),
4869 },
4870 AfterClose::ResumeUnwind { func_slot, err } => {
4871 // The aborting Lua frame was popped before `begin_close`;
4872 // restore the catcher's stack window down to `func_slot` and
4873 // re-raise — preferring a handler-raised error over the
4874 // original (PUC luaF_close).
4875 self.stack.truncate(func_slot as usize);
4876 self.top = func_slot;
4877 self.tbc.retain(|&s| s < func_slot);
4878 Err(LuaError(pending.unwrap_or(err)))
4879 }
4880 }
4881 }
4882
4883 /// OP_Return's post-close tail: fire the "return" hook (frame still
4884 /// current), pop the Lua frame, slide results into `func_slot`, then
4885 /// either hand them to the host (`Ok(Some(vals))` when we've returned
4886 /// past `entry_depth`), leave them contiguous for an exposed
4887 /// pcall/xpcall continuation, or finish into the caller's expected
4888 /// result slot. Mirrors the synchronous OP_Return tail so both paths
4889 /// share semantics — the `from_native` flag selects the right "return"
4890 /// hook context for `hook_return`.
4891 fn complete_return(
4892 &mut self,
4893 abs_a: u32,
4894 nret: u32,
4895 from_native: bool,
4896 entry_depth: usize,
4897 ) -> Result<Option<Vec<Value>>, LuaError> {
4898 // ftransfer is the local index (1-based) of the first result, as
4899 // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4900 // exposes locals starting at `frame.base` (= func_slot + 1 +
4901 // n_varargs for a vararg call), so the conversion is the absolute
4902 // result slot minus base, plus one to make it 1-based. db.lua 5.4
4903 // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4904 // shape end-to-end.
4905 let ftransfer = self
4906 .frames
4907 .last()
4908 .and_then(CallFrame::lua)
4909 .map(|fr| {
4910 let raw = abs_a.saturating_sub(fr.base) + 1;
4911 // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4912 // local injected at index `numparams + 1`, so getlocal
4913 // numbering shifts results past it (5.5 db.lua :539
4914 // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4915 if fr.closure.proto.has_vararg_table_pseudo {
4916 raw + 1
4917 } else {
4918 raw
4919 }
4920 })
4921 .unwrap_or(1);
4922 // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4923 // per tail call that collapsed into this activation, *after* its
4924 // own "return". `tailcalls` tracks that count exactly (PUC
4925 // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4926 // "return" hook fires once even when the activation absorbed
4927 // multiple tail calls — only `istailcall` on getinfo surfaces the
4928 // collapse. 5.1 db.lua :366 pins the event ordering.
4929 let tailcalls = if self.version <= LuaVersion::Lua51 {
4930 self.frames
4931 .last()
4932 .and_then(|f| f.lua())
4933 .map(|f| f.tailcalls)
4934 .unwrap_or(0)
4935 } else {
4936 0
4937 };
4938 self.hook_return(from_native, ftransfer, nret)?;
4939 for _ in 0..tailcalls {
4940 self.hook_tail_return()?;
4941 }
4942 let CallFrame::Lua(fr) =
4943 frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4944 else {
4945 unreachable!("returning from a non-Lua frame")
4946 };
4947 for i in 0..nret {
4948 self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4949 }
4950 if self.frames.len() < entry_depth {
4951 self.top = fr.func_slot + nret;
4952 return Ok(Some(self.take_results(fr.func_slot)));
4953 } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4954 self.top = fr.func_slot + nret;
4955 } else {
4956 self.finish_results(fr.func_slot, nret, fr.nresults);
4957 }
4958 Ok(None)
4959 }
4960
4961 #[doc(hidden)]
4962 pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4963 match cl.upvals()[idx as usize].state() {
4964 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4965 UpvalState::Closed(v) => v,
4966 }
4967 }
4968
4969 fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4970 let uv = cl.upvals()[idx as usize];
4971 match uv.state() {
4972 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4973 UpvalState::Closed(_) => {
4974 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4975 unsafe { uv.as_mut() }.set_closed(v);
4976 // forward barrier: a closed upvalue is single-slot, so the
4977 // forward variant is cheaper than barrier_back (PUC uses
4978 // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4979 // tables / threads).
4980 self.heap
4981 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4982 }
4983 }
4984 }
4985
4986 // ---- register / error helpers ----
4987
4988 #[inline(always)]
4989 fn r(&self, base: u32, i: u32) -> Value {
4990 // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4991 // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4992 // and every bytecode-generated reference falls within `[0, max_stack)`.
4993 // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4994 // reason. The bounds check would re-validate this invariant on every
4995 // op — the dispatch hot path can't afford it.
4996 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4997 unsafe { *self.stack.get_unchecked((base + i) as usize) }
4998 }
4999
5000 #[inline(always)]
5001 fn set_r(&mut self, base: u32, i: u32, v: Value) {
5002 // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
5003 // frame-entry contract.
5004 unsafe {
5005 *self.stack.get_unchecked_mut((base + i) as usize) = v;
5006 }
5007 }
5008
5009 #[doc(hidden)]
5010 pub fn rt_err(&mut self, msg: &str) -> LuaError {
5011 let text = match self.position_prefix() {
5012 Some(p) => format!("{p}{msg}"),
5013 None => msg.to_string(),
5014 };
5015 LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5016 }
5017
5018 pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5019 let extra = self.subject_varinfo(v);
5020 let tn = self.obj_typename(v);
5021 self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5022 }
5023
5024 /// Name the offending operand of the current instruction (PUC varinfo) for
5025 /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5026 /// to the instruction's subject register(s); a native-raised error whose
5027 /// current instruction doesn't hold `bad` simply yields "".
5028 fn subject_varinfo(&self, bad: Value) -> String {
5029 use crate::vm::isa::Op;
5030 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5031 return String::new();
5032 };
5033 let proto = f.closure.proto;
5034 let p: &crate::runtime::Proto = &proto;
5035 let pc = f.pc as usize;
5036 if pc == 0 || pc > p.code.len() {
5037 return String::new();
5038 }
5039 let instr = p.code[pc - 1];
5040 let mut cands: Vec<u32> = Vec::new();
5041 match instr.op() {
5042 // indexed reads / length / method: the table/object is in B
5043 Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5044 cands.push(instr.b());
5045 }
5046 // indexed writes / calls: the table/function is in A
5047 Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5048 cands.push(instr.a());
5049 }
5050 // arithmetic/bitwise: a register operand (B, and C unless constant)
5051 Op::Add
5052 | Op::Sub
5053 | Op::Mul
5054 | Op::Div
5055 | Op::Mod
5056 | Op::Pow
5057 | Op::IDiv
5058 | Op::BAnd
5059 | Op::BOr
5060 | Op::BXor
5061 | Op::Shl
5062 | Op::Shr => {
5063 cands.push(instr.b());
5064 if !instr.k() {
5065 cands.push(instr.c());
5066 }
5067 }
5068 Op::Unm | Op::BNot => cands.push(instr.b()),
5069 Op::Concat => {
5070 let a = instr.a();
5071 for r in a..a + instr.b() {
5072 cands.push(r);
5073 }
5074 }
5075 _ => {}
5076 }
5077 for reg in cands {
5078 if self.r(f.base, reg).raw_eq(bad) {
5079 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5080 Some((kind, name)) => format!(" ({kind} '{name}')"),
5081 None => String::new(),
5082 };
5083 }
5084 }
5085 String::new()
5086 }
5087
5088 /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5089 /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5090 /// 'add')" when the call is a metamethod dispatched by the current opcode.
5091 fn call_err(&mut self, v: Value) -> LuaError {
5092 let extra = self.call_target_varinfo(v);
5093 let tn = self.obj_typename(v);
5094 self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5095 }
5096
5097 /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5098 /// frame before the call, so the opcode that triggered it lives in the
5099 /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5100 /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5101 fn call_target_varinfo(&self, bad: Value) -> String {
5102 use crate::vm::isa::Op;
5103 let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5104 return String::new();
5105 };
5106 let proto = f.closure.proto;
5107 let p: &crate::runtime::Proto = &proto;
5108 let pc = f.pc as usize;
5109 if pc == 0 || pc > p.code.len() {
5110 return String::new();
5111 }
5112 let instr = p.code[pc - 1];
5113 match instr.op() {
5114 Op::Call | Op::TailCall => {
5115 let reg = instr.a();
5116 if self.r(f.base, reg).raw_eq(bad) {
5117 match crate::vm::objname::getobjname(p, pc - 1, reg) {
5118 Some((kind, name)) => format!(" ({kind} '{name}')"),
5119 None => String::new(),
5120 }
5121 } else {
5122 String::new()
5123 }
5124 }
5125 op => match mm_event_name(op) {
5126 Some(ev) => format!(" (metamethod '{ev}')"),
5127 None => String::new(),
5128 },
5129 }
5130 }
5131
5132 /// "number has no integer representation", enriched (PUC luaG_tointerror)
5133 /// with a "(field 'x')"-style suffix naming the offending operand of the
5134 /// current arithmetic instruction when it can be recovered from bytecode.
5135 fn no_int_rep_err(&mut self) -> LuaError {
5136 let extra = self.bad_operand_varinfo();
5137 self.rt_err(&format!("number{extra} has no integer representation"))
5138 }
5139
5140 /// Inspect the current frame's faulting instruction: find the register
5141 /// operand holding a float with no integer representation and name it.
5142 fn bad_operand_varinfo(&self) -> String {
5143 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5144 return String::new();
5145 };
5146 let proto = f.closure.proto;
5147 let p: &crate::runtime::Proto = &proto;
5148 let pc = f.pc as usize;
5149 if pc == 0 || pc > p.code.len() {
5150 return String::new();
5151 }
5152 let instr = p.code[pc - 1];
5153 let mut regs = vec![instr.b()];
5154 if !instr.k() {
5155 regs.push(instr.c());
5156 }
5157 for reg in regs {
5158 let v = self.r(f.base, reg);
5159 if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5160 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5161 Some((kind, name)) => format!(" ({kind} '{name}')"),
5162 None => String::new(),
5163 };
5164 }
5165 }
5166 String::new()
5167 }
5168
5169 /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5170 /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5171 /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5172 /// native call), PUC pushes no prefix — match that by looking only at the
5173 /// topmost frame directly and bailing if it is anything but a Lua frame.
5174 pub(crate) fn position_prefix(&self) -> Option<String> {
5175 let f = self.frames.last().and_then(CallFrame::lua)?;
5176 let proto = f.closure.proto;
5177 if proto.source.as_bytes().is_empty() {
5178 return Some(self.stripped_prefix());
5179 }
5180 if proto.lines.is_empty() {
5181 return None;
5182 }
5183 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5184 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5185 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5186 let display = crate::vm::lib_debug::chunk_id(raw);
5187 let src = String::from_utf8_lossy(&display).into_owned();
5188 Some(format!("{src}:{line}: "))
5189 }
5190
5191 /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5192 /// for the source and renders the line as "?" (so the prefix reads
5193 /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5194 /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5195 /// matches `^%?:%-1:`).
5196 fn stripped_prefix(&self) -> String {
5197 if self.version >= crate::version::LuaVersion::Lua55 {
5198 "?:?: ".to_string()
5199 } else {
5200 "?:-1: ".to_string()
5201 }
5202 }
5203
5204 /// Position prefix of the Lua frame `level` steps up from the running C
5205 /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5206 /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5207 /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5208 /// caller's frame is reported even across pcall/xpcall continuations.
5209 pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5210 let fi = match self.dbg_frame(level)? {
5211 DbgKind::Lua(fi) => fi,
5212 DbgKind::C(_) | DbgKind::Tail(_) => return None,
5213 };
5214 let f = self.frames[fi].lua()?;
5215 let proto = f.closure.proto;
5216 // PUC luaG_addinfo: a stripped chunk has no source — see
5217 // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5218 if proto.source.as_bytes().is_empty() {
5219 return Some(self.stripped_prefix());
5220 }
5221 // a stripped chunk carries no per-instruction line info
5222 if proto.lines.is_empty() {
5223 return None;
5224 }
5225 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5226 // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5227 // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5228 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5229 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5230 let display = crate::vm::lib_debug::chunk_id(raw);
5231 let src = String::from_utf8_lossy(&display).into_owned();
5232 Some(format!("{src}:{line}: "))
5233 }
5234
5235 // ---- the interpreter ----
5236
5237 fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5238 let entry_depth = self.frames.len();
5239 self.exec_with(entry_depth)
5240 }
5241
5242 /// Run from the current top frame down to (but not past) `entry_depth`
5243 /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5244 /// runs to completion or a yield.
5245 /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5246 /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5247 /// `EvalFuture::poll` on every poll after the first to walk the
5248 /// existing call frames until the next `BudgetExhausted` or
5249 /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5250 /// embedder reaches it through `Vm::eval_async`.
5251 pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5252 self.exec_with(entry_depth)
5253 }
5254
5255 fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5256 loop {
5257 let r = self.run(entry_depth);
5258 if r.is_err()
5259 && (self.yielding.is_some()
5260 || self.terminating.is_some()
5261 || self.host_yield_pending
5262 || self.pending_async_native_fut.is_some())
5263 {
5264 // a `coroutine.yield` is in flight: keep the frames intact (they
5265 // are the suspended coroutine's saved state) and propagate to
5266 // resume. A self-close termination propagates the same way, so a
5267 // protecting pcall on the way out cannot catch (unwind) it.
5268 // v1.1 B10 — `host_yield_pending` is the async-mode
5269 // analogue: the sentinel must reach `drive_one` without
5270 // a protecting `pcall` swallowing it.
5271 return r;
5272 }
5273 match r {
5274 Ok(vals) => return Ok(vals),
5275 // unwind toward `entry_depth`. A protecting pcall/xpcall
5276 // continuation caught along the way turns the error into
5277 // `false, msg` and the loop resumes running its caller; an
5278 // uncaught error propagates out.
5279 Err(e) => match self.unwind(e.0, entry_depth) {
5280 Unwound::Caught => continue,
5281 Unwound::CaughtReturn(vals) => return Ok(vals),
5282 Unwound::Propagated(err) => return Err(err),
5283 },
5284 }
5285 }
5286 }
5287
5288 /// Unwind the call stack from the error point toward `entry_depth`, running
5289 /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5290 /// continuation frame at/above `entry_depth` (the error is *caught*: its
5291 /// slot receives `false, msg`); if none is reached, the error propagates.
5292 fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5293 // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5294 // error object is nil — so `pcall(function() error(nil) end)` returns
5295 // that string instead of nil, and `assert(nil, nil)` (whose path
5296 // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5297 // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5298 // `doit("error()") == nil` and luna would fail that if it always
5299 // substituted. luna's native `error()` still does its own conversion
5300 // for direct callers.
5301 if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5302 err = Value::Str(self.heap.intern(b"<no error object>"));
5303 }
5304 // The protected call runs in-place among the caller frames' registers,
5305 // so truncating the failed frames here cuts into caller windows below
5306 // the catcher. Snapshot the live length: at the error point the stack
5307 // already spans every surviving frame's window, so restoring it after a
5308 // catch reinstates them all (the reclaimed slots above are dead temps).
5309 // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5310 // we instead clamp the restore to the catcher's caller window when the
5311 // error point was at the stack limit (cause: the next `call_value_impl`
5312 // picks `func_slot = stack.len()` which would otherwise re-overflow).
5313 let saved_len = self.stack.len();
5314 // Snapshot the traceback at the error point — before any frame is
5315 // popped — so an `xpcall` msgh (which runs after the failed frames are
5316 // gone) can still describe the error site. The handler frame about to
5317 // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5318 // visible here; once popped, `debug.traceback` would miss it.
5319 // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5320 // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5321 // recovery — locals.lua:659) re-overflows. Capture-once propagates
5322 // through nested unwinds (inner→outer) without re-running msgh.
5323 if self.error_traceback.is_none() {
5324 self.error_traceback = Some(self.traceback_bytes(1));
5325 }
5326 while self.frames.len() >= entry_depth {
5327 match *self.frames.last().expect("frame") {
5328 // a yieldable-metamethod continuation does not catch: discard the
5329 // abandoned instruction and keep unwinding (PUC drops the partial
5330 // op on error).
5331 CallFrame::Cont(NativeCont {
5332 kind: ContKind::Meta(mc),
5333 func_slot,
5334 ..
5335 }) => {
5336 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5337 self.stack.truncate(func_slot as usize);
5338 self.top = mc.saved_top.min(func_slot);
5339 self.tbc.retain(|&s| s < func_slot);
5340 }
5341 // a __pairs continuation does not catch either: an error inside
5342 // the metamethod propagates past `pairs`.
5343 CallFrame::Cont(NativeCont {
5344 kind: ContKind::Pairs,
5345 func_slot,
5346 ..
5347 }) => {
5348 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5349 self.stack.truncate(func_slot as usize);
5350 self.top = func_slot;
5351 self.tbc.retain(|&s| s < func_slot);
5352 }
5353 // a __close continuation does not catch: drop the half-run
5354 // handler's window, then continue the close yieldably with
5355 // the new error threaded as `pending`. Preserve `cc.after`
5356 // verbatim — `Return`/`Block` originating from an aborting
5357 // OP_Return/OP_Close will be short-circuited by
5358 // `finish_close_after` (pending propagates as Err); a
5359 // `ResumeUnwind` originated by our own Lua-frame handler
5360 // must keep its deferred frame-pop semantics so that frame
5361 // is not orphaned. If a fresh handler yields, `drive_close`
5362 // pushes another `Cont::Close` and we return `Caught` so
5363 // `exec_with` re-enters the run loop.
5364 CallFrame::Cont(NativeCont {
5365 kind: ContKind::Close(cc),
5366 func_slot,
5367 ..
5368 }) => {
5369 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5370 self.stack.truncate(func_slot as usize);
5371 self.top = func_slot;
5372 self.tbc.retain(|&s| s < func_slot);
5373 match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5374 Ok(Some(_)) => {
5375 unreachable!(
5376 "Block / Return / ResumeUnwind never return host values mid-unwind"
5377 )
5378 }
5379 Ok(None) => return Unwound::Caught,
5380 Err(e) => {
5381 err = e.0;
5382 continue;
5383 }
5384 }
5385 }
5386 CallFrame::Cont(nc) => {
5387 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5388 self.pcall_depth -= 1;
5389 let result = match nc.kind {
5390 ContKind::Pcall => err,
5391 ContKind::Xpcall { handler } => {
5392 // PUC keeps `L->errfunc` set across the handler's
5393 // call: `luaG_errormsg` re-fires the handler when
5394 // it raises (so `xpcall(error, err, 170)` lets the
5395 // chain bottom out at err(0) → "END"). luna mirrors
5396 // that by looping until the handler returns or
5397 // luna's `iters` cap forces termination.
5398 //
5399 // The cap models PUC's nCcalls soft window
5400 // (MAXCCALLS/10*11): once tripped, `stackerror`
5401 // raises "C stack overflow" via `luaG_runerror`
5402 // which itself re-enters `luaG_errormsg`, so the
5403 // handler runs once more with that string and
5404 // naturally returns it (errors.lua :637 at N=300).
5405 // We count iterations per Cont::Xpcall rather than
5406 // a global counter — nested xpcalls each get their
5407 // own budget, matching the way PUC's stack frames
5408 // accumulate per dispatch path.
5409 const MSGH_CAP: u32 = MAX_C_DEPTH;
5410 let mut cur_err = err;
5411 let mut iters: u32 = 0;
5412 let mut capped = false;
5413 loop {
5414 if iters >= MSGH_CAP && !capped {
5415 cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5416 capped = true;
5417 }
5418 iters += 1;
5419 self.msgh_depth += 1;
5420 let r = self.call_value(handler, &[cur_err]);
5421 self.msgh_depth -= 1;
5422 match r {
5423 Ok(hr) => {
5424 break hr.first().copied().unwrap_or(Value::Nil);
5425 }
5426 Err(_) if capped => {
5427 // the handler still errored on the
5428 // synthesized "C stack overflow"; fall
5429 // back to PUC's LUA_ERRERR string.
5430 break Value::Str(
5431 self.heap.intern(b"error in error handling"),
5432 );
5433 }
5434 Err(e) => {
5435 cur_err = e.0;
5436 }
5437 }
5438 }
5439 }
5440 ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5441 unreachable!("Meta/Pairs/Close cont handled above")
5442 }
5443 };
5444 // the error has been caught (pcall/xpcall): the captured
5445 // traceback was for that error and is no longer in flight.
5446 self.error_traceback = None;
5447 let fs = nc.func_slot as usize;
5448 if self.stack.len() < fs + 2 {
5449 self.stack.resize(fs + 2, Value::Nil);
5450 }
5451 self.stack[fs] = Value::Bool(false);
5452 self.stack[fs + 1] = result;
5453 self.top = nc.func_slot + 2;
5454 self.tbc.retain(|&s| s < nc.func_slot);
5455 if self.frames.len() < entry_depth {
5456 return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5457 }
5458 self.finish_results(nc.func_slot, 2, nc.nresults);
5459 // reinstate the caller windows the unwind truncated into,
5460 // clamped to the catcher's caller window + a `MIN_STACK`
5461 // reserve. The clamp is a no-op for normal pcall catches
5462 // (saved_len lies within the caller's max_stack window),
5463 // and prevents the stack from staying near `MAX_LUA_STACK`
5464 // after an overflow-recovery catch — which would make the
5465 // next `call_value_impl` (e.g. a `__close` in the catcher's
5466 // errorh, locals.lua:659) pick `func_slot = stack.len()`
5467 // above the limit and re-overflow.
5468 // Restore the caller's full register window: opcodes
5469 // index it directly. The cap covers caller's base +
5470 // `max_stack` + a small reserve. We always resize to
5471 // exactly this window — previously this clamped
5472 // `saved_len` from above to prevent staying near
5473 // `MAX_LUA_STACK` after an overflow-recovery catch, and
5474 // a yieldable-unwind re-entry adds the dual case where
5475 // `saved_len` is *below* the window (a prior
5476 // `ResumeUnwind` truncated). Using the window directly
5477 // covers both.
5478 let restore = self
5479 .frames
5480 .iter()
5481 .rev()
5482 .find_map(CallFrame::lua)
5483 .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5484 .unwrap_or(saved_len);
5485 if self.stack.len() < restore {
5486 self.stack.resize(restore, Value::Nil);
5487 } else if self.stack.len() > restore {
5488 self.stack.truncate(restore);
5489 }
5490 return Unwound::Caught;
5491 }
5492 CallFrame::Lua(f) => {
5493 // Yieldable error-unwind close, PUC luaG_errormsg shape:
5494 // (1) pop the Lua frame immediately so each `__close`
5495 // handler runs at the C boundary above — `debug.getinfo`
5496 // sees the next outer Lua frame's call site (typically
5497 // `pcall`), not this aborting function (locals.lua:480).
5498 // (2) drive the close yieldably with
5499 // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5500 // it truncates to `func_slot` and re-raises (letting a
5501 // handler-raised error win over `err`). If a handler
5502 // yields, `drive_close` pushes `Cont::Close` and we
5503 // return `Caught` so `exec_with` re-enters the run loop;
5504 // a synchronous drain returns Err exactly as the old
5505 // path did.
5506 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5507 let after = AfterClose::ResumeUnwind {
5508 func_slot: f.func_slot,
5509 err,
5510 };
5511 match self.begin_close(f.base, Some(err), after, entry_depth) {
5512 Ok(Some(_)) => {
5513 unreachable!("ResumeUnwind never returns host values")
5514 }
5515 Ok(None) => return Unwound::Caught,
5516 Err(e) => {
5517 err = e.0;
5518 continue;
5519 }
5520 }
5521 }
5522 }
5523 }
5524 Unwound::Propagated(LuaError(err))
5525 }
5526
5527 fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5528 loop {
5529 // Fast-path slow-check gate: most embedders run with both
5530 // `instr_budget` and `mem_cap` as None, so a single combined
5531 // is_some test lets the hot loop skip both branches with one
5532 // load + branch instead of two.
5533 if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5534 if let Some(b) = self.instr_budget.as_mut() {
5535 *b -= 1;
5536 if *b <= 0 {
5537 self.instr_budget = None;
5538 // v1.1 B10 Stage 1 — async-mode cooperative
5539 // yield. Set a sentinel flag so `exec_with`
5540 // propagates the Err without `unwind` running
5541 // (mirroring the `yielding.is_some()` path),
5542 // and `call_value_impl` preserves the call
5543 // frames for the next `poll`. Translation back
5544 // to `DispatchOutcome::BudgetExhausted` happens
5545 // in `drive_one`. The Err value itself is
5546 // `Value::Nil` — a pure sentinel, never seen by
5547 // user code.
5548 if self.async_mode {
5549 self.host_yield_pending = true;
5550 return Err(LuaError(Value::Nil));
5551 }
5552 // B6: classify the trip so embedders can
5553 // distinguish budget exhaustion from a
5554 // generic Runtime error and retry / give up
5555 // accordingly.
5556 self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5557 let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5558 return Err(LuaError(s));
5559 }
5560 }
5561 if let Some(cap) = self.heap.mem_cap
5562 && self.heap.bytes() > cap
5563 {
5564 // First try a full collect — embedders set tight caps
5565 // and the overshoot may be reclaimable (closures kept
5566 // by short-lived frames, intermediate strings). Only
5567 // disarm + raise if the cap is still breached after
5568 // collection. PUC's `LUA_GCEMERGENCY` path matches.
5569 //
5570 // v2.4 Phase Cleanup REVERTED — the v2.2.0
5571 // `gc_top = self.stack.len()` workaround for
5572 // UAF-B is **still load-bearing** here even after
5573 // v2.3's `finish_results` slot-clear. The cap
5574 // fires during table mutation (`a[i] = i` inside
5575 // a tight loop) at a point that is NOT a
5576 // finish_results boundary — the table grows past
5577 // self.top but never goes through a CALL/RETURN,
5578 // so slot-clear never sees the growing region.
5579 // Docker linux/amd64 toomanyidx_memory_cap
5580 // SIGSEGV'd on the revert; the over-root stays
5581 // as the v2.4 production fix. Tracked in v2.4
5582 // plan-state amendments log.
5583 self.gc_top = self.stack.len() as u32;
5584 self.collect_garbage();
5585 if self.heap.bytes() > cap {
5586 self.heap.mem_cap = None;
5587 let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5588 return Err(LuaError(s));
5589 }
5590 }
5591 }
5592 // Single combined frame fetch: continuation arm OR Lua arm. Saves
5593 // a second `self.frames.last()` slice access vs the prior split
5594 // form (LLVM doesn't always CSE these across the cont branch).
5595 // A continuation frame on top means the call it protected just
5596 // delivered its results — wrap as `true, results…` and hand to
5597 // the pcall/xpcall caller. The error path is handled by `unwind`;
5598 // this branch is only reached on success/resume completion.
5599 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5600 let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5601 if let &CallFrame::Cont(nc) = frame_peek {
5602 // a yieldable metamethod returned: complete the interrupted
5603 // instruction (PUC luaV_finishOp) and resume the running frame.
5604 if let ContKind::Meta(mc) = nc.kind {
5605 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5606 let result = if self.top > nc.func_slot {
5607 self.stack[nc.func_slot as usize]
5608 } else {
5609 Value::Nil
5610 };
5611 self.stack.truncate(nc.func_slot as usize);
5612 self.top = mc.saved_top;
5613 self.finish_meta(mc.action, result)?;
5614 continue;
5615 }
5616 // a __close handler returned successfully: discard its
5617 // results, restore `top` to the slot the handler was called
5618 // at (the surrounding frame's register window above this slot
5619 // must stay alloc'd — never truncate the underlying stack),
5620 // then continue the close chain (next slot, or fire
5621 // AfterClose). When the close ends an entry activation,
5622 // drive_close hands the results up to exec_with directly.
5623 if let ContKind::Close(cc) = nc.kind {
5624 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5625 self.top = nc.func_slot;
5626 if let Some(vals) =
5627 self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5628 {
5629 return Ok(vals);
5630 }
5631 continue;
5632 }
5633 // __pairs returned: normalize its results to exactly four
5634 // (iterator, state, control, closing) at pairs's slot, where
5635 // the metamethod was called, and hand them to pairs's caller.
5636 if let ContKind::Pairs = nc.kind {
5637 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5638 let total = 4u32;
5639 let need = (nc.func_slot + total) as usize;
5640 if self.stack.len() < need {
5641 self.stack.resize(need, Value::Nil);
5642 }
5643 for s in self.top..(nc.func_slot + total) {
5644 self.stack[s as usize] = Value::Nil;
5645 }
5646 self.top = nc.func_slot + total;
5647 if self.frames.len() < entry_depth {
5648 return Ok(self.take_results(nc.func_slot));
5649 }
5650 self.finish_results(nc.func_slot, total, nc.nresults);
5651 continue;
5652 }
5653 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5654 self.pcall_depth -= 1;
5655 // f's results sit at nc.func_slot+1.. (f was called one slot
5656 // above the continuation), so writing `true` at the slot makes
5657 // `true, results…` already contiguous.
5658 let nret = self.top - (nc.func_slot + 1);
5659 self.stack[nc.func_slot as usize] = Value::Bool(true);
5660 let total = 1 + nret;
5661 self.top = nc.func_slot + total;
5662 if self.frames.len() < entry_depth {
5663 return Ok(self.take_results(nc.func_slot));
5664 }
5665 self.finish_results(nc.func_slot, total, nc.nresults);
5666 continue;
5667 }
5668 // GC runs only at the allocation safe points below (PUC's
5669 // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5670 // no longer collects, so a stale full-window `gc_top` cannot leak in.
5671 //
5672 // Hot-path frame fetch: the Cont arm above continues the loop,
5673 // so reaching here means `frame_peek` is the Lua frame. Reuse it
5674 // rather than re-fetching `self.frames.last()`.
5675 let f = match frame_peek {
5676 CallFrame::Lua(f) => f,
5677 _ => unreachable!("Cont frame survived the dispatch loop head"),
5678 };
5679 let cl = f.closure;
5680 let base = f.base;
5681 let func_slot = f.func_slot;
5682 let n_varargs = f.n_varargs;
5683 let pc = f.pc;
5684 let oldpc = f.hook_oldpc;
5685
5686 // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5687 // — every branch / call op only sets `pc` to a valid index, and
5688 // function entry initialises pc=0 with a non-empty body. PUC's
5689 // `vmfetch` uses the equivalent unchecked load.
5690 let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5691
5692 // P12-S1.C/D — trace recording append + close detection.
5693 // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5694 // so default dispatch keeps a single not-taken branch.
5695 //
5696 // - At the head PC with a non-empty record, the trace has
5697 // looped back to its start: mark `closed = true` and
5698 // take the record (S2 will compile + cache).
5699 // - Otherwise, capture the op. If the record overflows
5700 // MAX_TRACE_LEN, abort by dropping it.
5701 if self.jit.trace_enabled
5702 && let Some(_rec) = self.jit.active_trace.as_mut()
5703 {
5704 // P12-S4 — depth tracking. The trace head's frame is
5705 // at index `recording_frame_base`; every Op::Call that
5706 // pushes a new frame bumps the live depth, every
5707 // Op::Return that pops one decrements it.
5708 //
5709 // **Three clean-close conditions** (P12-S4-step4a):
5710 // - `at_head`: cur_depth == 0 AND about-to-execute the
5711 // trace's head_pc on its head_proto (loop closed back
5712 // to start). Same for loop-triggered and call-triggered
5713 // traces — step4a unified the gating so call-triggered
5714 // no longer closes on the first re-entry (that left
5715 // fib's body at 7 depth=0 ops; step4a lets it inline
5716 // up to MAX_INLINE_DEPTH levels before any close).
5717 // - `returned_past_head`: trace head's frame is gone
5718 // (callee returned past it, or the call-trigger
5719 // started a recording inside a callee that has now
5720 // returned). Whatever ops were recorded form the
5721 // trace body; the lowerer treats the partial trace
5722 // the same as InlineAbort (dispatchable=false until
5723 // step4b's frame materialization lands).
5724 // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5725 // Recording any deeper would just bloat the IR; close
5726 // with the body we have. Lowerer's existing length
5727 // gate + InlineAbort path handles short bodies.
5728 let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5729 let cur_depth = if returned_past_head {
5730 0
5731 } else {
5732 self.frames.len() - 1 - self.jit.recording_frame_base
5733 };
5734 let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5735 let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5736 let at_head_loop = cur_depth == 0
5737 && !rec.ops.is_empty()
5738 && !returned_past_head
5739 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5740 && pc == rec.head_pc;
5741 // P16-A — self-link cycle catch (mirrors LuaJIT's
5742 // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5743 // 1. We're about to execute the head_pc on head_proto
5744 // at depth > 0 (we're re-entering the trace head
5745 // from inside an inlined recursion level — UpRec).
5746 // 2. The count of ancestor frames in the recording
5747 // window that share `head_proto` exceeds
5748 // [`RECUNROLL_THRESHOLD`] (default 2).
5749 // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5750 // recursion levels are captured, the recorder enters
5751 // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5752 // trips this catch — closing with `SelfRecKind::UpRec`.
5753 // The lowerer's `TraceEnd::SelfLink` tail emits the
5754 // bump-base + branch-to-self loop body.
5755 //
5756 // TailRec vs UpRec: LJ distinguishes via
5757 // `framedepth + retdepth == 0`. luna doesn't track
5758 // retdepth separately; cur_depth == 0 with a non-empty
5759 // call chain in tail position is rare (would require
5760 // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5761 // condition (fib's case); cur_depth == 0 with positive
5762 // ancestor count would route to TailRec, but luna's
5763 // recorder doesn't currently produce that shape because
5764 // tail-call elision pops the caller frame and we'd
5765 // hit `at_head_loop` instead.
5766 let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5767 if self.jit.p16_self_link_enabled
5768 && !returned_past_head
5769 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5770 && pc == rec.head_pc
5771 && cur_depth > 0
5772 {
5773 // Count ancestor frames sharing head_proto.
5774 // self.frames[recording_frame_base..] currently
5775 // includes the just-pushed frame at the top
5776 // (the one about to execute head_pc). Ancestors
5777 // = the slice excluding the top frame.
5778 let head_proto_ptr = rec.head_proto.as_ptr();
5779 let last_idx = self.frames.len() - 1;
5780 let mut count = 0usize;
5781 for i in self.jit.recording_frame_base..last_idx {
5782 if let CallFrame::Lua(f) = &self.frames[i]
5783 && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5784 {
5785 count += 1;
5786 }
5787 }
5788 if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5789 // cur_depth > 0 → UpRec (fib pattern).
5790 // cur_depth == 0 wouldn't reach this arm.
5791 Some(crate::jit::trace::SelfRecKind::UpRec)
5792 } else {
5793 None
5794 }
5795 } else {
5796 None
5797 }
5798 };
5799 if let Some(kind) = self_link_trip {
5800 // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
5801 // self-recursive patterns at frame depth >= 2.
5802 //
5803 // Pre sub-0: a SelfLink trip at the head_pc re-entry
5804 // unconditionally stamped `self_link_kind`. The
5805 // R3a `downrec_close` marker can only fire from the
5806 // depth>0 Op::Return path (`rec.retfs` chain),
5807 // which never reaches the recorder for fib(28)-like
5808 // shapes that hit the SelfLink cycle catch BEFORE
5809 // any base-case Return — leaving `downrec_close`
5810 // None and routing the trace through R1's safe
5811 // `dispatchable=false` `"self-link-retf-r1"` path
5812 // (audit measured `trace_dispatched = 0`).
5813 //
5814 // Sub-0 lift: when the SelfLink trip fires AND
5815 // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
5816 // gate already requires this — kept explicit as a
5817 // safety floor), route the close through `downrec_
5818 // close` INSTEAD of `self_link_kind`. The recorder
5819 // synthesises the close marker from the most
5820 // recent Op::Call at depth `cur_depth - 1`:
5821 // - `return_pc` = `call.pc + 1` (caller's resume
5822 // PC after the recursive call returns; mirror
5823 // of R3a's `caller_pc` derivation at the
5824 // depth>0 Op::Return capture path below).
5825 // - `target_proto` = `call.proto` (caller's
5826 // proto; equals `rec.head_proto` for self-
5827 // recursion).
5828 // - `depth_delta` = `1` (today's recorder always
5829 // unrolls one level; R3a uses the same
5830 // constant).
5831 //
5832 // The lowerer's `end_idx` picker (`trace.rs:3729`)
5833 // routes through `TraceEnd::DownRec` ahead of the
5834 // `self_link_kind` arm; the R3b/R3d lowerer arm
5835 // emits the stitch-sentinel + caller-pc-guard
5836 // scaffold. Single-candidate guard chain (sub-0's
5837 // recorder produces 1 caller_pc candidate because
5838 // `rec.retfs` is empty) keeps `dispatchable=false`
5839 // + `"downrec-stitch-pending"` label (per R3d's
5840 // `multi_way_candidate_count >= 2` gate at
5841 // `trace.rs:7385`). Net behaviour: trace compiles
5842 // under DownRec routing; interp runs the
5843 // recursion naturally → result 317811.
5844 //
5845 // The `cur_depth >= 2` gate is automatically
5846 // satisfied by the count > RECUNROLL_THRESHOLD=2
5847 // trip condition (3 ancestor frames sharing
5848 // head_proto implies cur_depth >= 3), kept
5849 // explicit so a future RECUNROLL_THRESHOLD tweak
5850 // doesn't silently flip shallow-recursion
5851 // shapes (cur_depth == 1) onto the DownRec arm.
5852 //
5853 // R3.3+ sub-1/2/3/4 will replace the depth-baked
5854 // op_offsets[] addressing with runtime base_var
5855 // threading so the trace's recorded body is
5856 // depth-relative and the DownRec dispatch
5857 // becomes wall-clock-positive. Sub-0 is the
5858 // routing scaffold; it does not aim for gain.
5859 let _ = kind;
5860 let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
5861 let caller_depth_u8 = (cur_depth - 1) as u8;
5862 if let Some(call_op) = rec.ops.iter().rev().find(|r| {
5863 r.inline_depth == caller_depth_u8
5864 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
5865 }) {
5866 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
5867 return_pc: call_op.pc + 1,
5868 target_proto: call_op.proto,
5869 depth_delta: 1,
5870 });
5871 true
5872 } else {
5873 false
5874 }
5875 };
5876 if relaxed_to_downrec {
5877 // R2 close-cause taxonomy: tag the lift so
5878 // probes can tally the fire rate. Mirrors
5879 // R3a's `"downrec-restart"` bump for the
5880 // depth>0 Op::Return path (different trip
5881 // origin, same downstream routing). The
5882 // existing `"self-link-retf-r1"` label still
5883 // fires for trips that DON'T relax (no
5884 // candidate Op::Call ancestor in rec.ops, or
5885 // cur_depth < 2) via the lowerer's
5886 // dispatch_off_reason mirror at the close
5887 // handler — kept as a regression safety net.
5888 self.jit
5889 .counters
5890 .bump_close_cause("selflink-yields-to-downrec");
5891 } else {
5892 rec.self_link_kind = Some(kind);
5893 }
5894 }
5895 let should_close =
5896 at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5897 if should_close {
5898 // P13-S13-H — long-trace bias: a call-triggered
5899 // recording that closed with a very short body
5900 // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5901 // binary_trees `make(0)`: 4 ops) is pathological.
5902 // Compiling + caching it pins `Proto.traces` to a
5903 // trace that the length gate will refuse to
5904 // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5905 // = 40`), AND blocks the back-edge / longer-call
5906 // path from re-recording the same head_pc (the
5907 // dedup `already_cached` check below short-
5908 // circuits). The fix: discard the short call-
5909 // triggered recording WITHOUT caching, and bias
5910 // the proto's `call_hot_count` back to
5911 // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5912 // sequence of calls retries the trigger at a
5913 // different (hopefully deeper) recursion point.
5914 //
5915 // Back-edge triggered traces are exempt — a
5916 // tight numeric-for loop's body is legitimately
5917 // 3 ops (`Add`, ForLoop) and DOES dispatch
5918 // usefully when re-entered many times.
5919 // P13-S13-H — coverage heuristic to detect
5920 // pathologically partial call-triggered traces:
5921 // for self-recursive / branchy protos like
5922 // `fib` (~17 bytecode ops) or
5923 // `binary_trees.make` (~26 ops), the recorder
5924 // can fire at a BASE-case entry (`fib(0)` or
5925 // `make(0)`) producing a 3–4 op trace that
5926 // covers a tiny fraction of the proto's code.
5927 // That trace is doomed by the length gate
5928 // post-compile AND blocks any longer follow-up
5929 // (the dedup `already_cached` check below). The
5930 // fix: discard call-triggered closes where
5931 // `rec.ops.len() * 2 < head_proto.code.len()`
5932 // (less than half the proto's bytecode), so the
5933 // back-edge / longer call path can take over.
5934 //
5935 // Why coverage > raw length:protos with
5936 // intrinsically short bodies (closure
5937 // factories: `Closure + Return1` = 2 ops,
5938 // simple wrappers: `LoadI + Return1` = 2 ops)
5939 // record 100% coverage even at length 2 — those
5940 // ARE legitimately short and the closure /
5941 // sunk-emit lowering paths (S7-A / S9-C) make
5942 // them worth compiling. The heuristic admits
5943 // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5944 // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5945 // ~26) get discarded.
5946 //
5947 // Back-edge triggered traces are unaffected —
5948 // a tight numeric-for body legitimately covers
5949 // 3 of ~3 proto ops it can dispatch from
5950 // (`Add + ForLoop`) and the recorder fires on
5951 // the back-edge, not call entry.
5952 //
5953 // `call_hot_count` is intentionally NOT reset
5954 // (an earlier draft tried `THRESHOLD - 32` but
5955 // caused active_trace contention with the
5956 // outer back-edge trigger — see
5957 // setlist_b_zero_with_call_c_zero_sunk_emits).
5958 // We give up on dispatching the pathological
5959 // shape on the same proto; the back-edge or a
5960 // longer call path on a deeper recursion point
5961 // can still record + cache a real trace.
5962 let proto_code_len = rec.head_proto.code.len();
5963 let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5964 // P13-S13-I — per-Proto discard cap. The S13-H
5965 // relaxed trigger condition (`c >= THRESHOLD &&
5966 // !already_cached`) means a Proto whose every
5967 // recording is partial-coverage will re-fire the
5968 // trigger every call indefinitely (1500+ in
5969 // `binary_trees`-pattern test). The cap stops
5970 // discarding after `MAX_DISCARDS_PER_PROTO` —
5971 // the next close falls through to compile (even
5972 // if partial), caches the trace, and the
5973 // `already_cached` short-circuit kills the
5974 // storm. Dispatch may still be refused
5975 // post-compile (length gate), but the recorder
5976 // stops churning.
5977 const MAX_DISCARDS_PER_PROTO: u32 = 5;
5978 let prior_discards = rec.head_proto.trace_discard_count.get();
5979 let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
5980 // P13-S13-K — flip the `gave_up` flag the
5981 // moment cap is reached (BEFORE the close-
5982 // dispatching branch below). The trigger gates
5983 // short-circuit on this flag, skipping the
5984 // RefCell + linear `already_cached` scan on
5985 // every subsequent call to this Proto. Useful
5986 // for `binary_trees_pattern`-class loads where
5987 // a single Proto sees ~20k calls post-cap.
5988 if cap_reached
5989 && rec.is_call_triggered
5990 && is_partial_coverage
5991 && !rec.head_proto.trace_gave_up.get()
5992 {
5993 rec.head_proto.trace_gave_up.set(true);
5994 }
5995 if rec.is_call_triggered && is_partial_coverage && !cap_reached {
5996 // Tally as closed (for visibility) but DROP
5997 // without compile/cache. Use the existing
5998 // closed-lens accumulator so probes can
5999 // observe the discarded shape.
6000 // P13-S13-I — bump discard count BEFORE
6001 // dropping the recording so the next
6002 // close sees the updated counter.
6003 rec.head_proto.trace_discard_count.set(prior_discards + 1);
6004 self.jit.counters.closed += 1;
6005 self.jit
6006 .counters
6007 .closed_lens
6008 .push((rec.is_call_triggered, rec.ops.len()));
6009 // v2.0 Track-R R2 — partial-coverage discard
6010 // close path. Pre-R2 this site bumped `closed`
6011 // + `closed_lens` (visibility) but no per-
6012 // reason label, so probes couldn't separate a
6013 // real successful close from a discard tally.
6014 // Tag explicitly to make the recorder-side
6015 // close-cause taxonomy single-source.
6016 self.jit
6017 .counters
6018 .bump_close_cause("partial-coverage-discard");
6019 self.jit.active_trace = None;
6020 // Continue with interp loop — don't
6021 // fall through to compile path.
6022 // The op at `pc` hasn't dispatched yet;
6023 // the outer loop iteration handles it.
6024 } else {
6025 rec.closed = true;
6026 // P12-S2.C — detach the closed record, then try
6027 // to compile it. Dedup by `head_pc`: a Proto
6028 // already carrying a CompiledTrace for this PC
6029 // skips recompile (the hot counter caps
6030 // re-recording at `u32::MAX / 2` anyway, but
6031 // explicit dedup keeps `Proto.traces` short
6032 // for the S3 dispatcher's linear scan).
6033 //
6034 // No `Vm::run` change for failure: we just bump
6035 // the failed counter and drop the record. S3
6036 // will read `Proto.traces` to decide whether to
6037 // dispatch — until then, this is bookkeeping.
6038 let head_pc_val = rec.head_pc;
6039 let closed_record = self
6040 .jit
6041 .active_trace
6042 .take()
6043 .expect("active_trace was Some this branch");
6044 self.jit.counters.closed += 1;
6045 self.jit
6046 .counters
6047 .closed_lens
6048 .push((closed_record.is_call_triggered, closed_record.ops.len()));
6049 // P12-S5-B fix: cache the trace on the
6050 // recorder's *head proto*, not the current
6051 // closure's proto. For non-recursive
6052 // call-triggered traces, close fires after
6053 // `Return1` pops the callee frame — `cl` at
6054 // that point is the CALLER's closure, while
6055 // `closed_record.head_proto` is the CALLEE's
6056 // proto (the one we actually want the trace
6057 // to be discoverable from on the next call).
6058 // Self-recursive fib closed via depth-cap
6059 // mid-recursion so `cl.proto == head_proto`
6060 // happened to coincide — this fix makes that
6061 // accidental coincidence intentional.
6062 let head_proto = closed_record.head_proto;
6063 let already_cached = head_proto
6064 .traces
6065 .borrow()
6066 .iter()
6067 .any(|t| t.head_pc == head_pc_val);
6068 if !already_cached {
6069 // Internal-loop = true: the trace runs in
6070 // a native loop until a cmp side-exits, so
6071 // the dispatcher's per-entry marshal cost
6072 // amortizes across the whole run of
6073 // iterations the loop's recorded direction
6074 // stays valid. The lowerer auto-downgrades
6075 // to one-shot for cmp-less or Call-truncating
6076 // traces.
6077 // P15-A v2-C-A6-5 — side traces MUST NOT
6078 // internal-loop. The parent's recorded prefix
6079 // (ops at PCs < side trace's head_pc) defines
6080 // values for registers the child's body reads
6081 // without re-writing each iter — e.g. for
6082 // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6083 // + R[11]` sets R[12], and the child trace
6084 // (head_pc=24) re-runs `pc=20 Move R[1] =
6085 // R[12]` each iter via its outer ForLoop
6086 // internal-loop, ALWAYS reading the stale
6087 // entry-time R[12]. The parent's Add never
6088 // re-runs during child's loop, so R[1] gets
6089 // pinned to one stale value. Force one-shot
6090 // for side traces: each parent-exit round-
6091 // trips through dispatcher → parent's Add
6092 // runs → side trace runs ONE iter → return.
6093 let opts = crate::jit::trace::CompileOptions {
6094 internal_loop: closed_record.side_trace_parent.is_none(),
6095 pre53: self.version() <= LuaVersion::Lua53,
6096 aot: false,
6097 };
6098 // v1.1 A1 Session A — route through trace_compiler.
6099 // v2.0 Track J sub-step J-B — split-borrow JitState
6100 // so the trait method can take `&mut dyn JitStorage`.
6101 let result = {
6102 let jit = &mut self.jit;
6103 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6104 jit.trace_compiler
6105 .try_compile_trace(storage, &closed_record, opts)
6106 };
6107 match result {
6108 Some(mut ct) => {
6109 // P12-S5-A/B/C — tally Sinkable sites
6110 // + actually-sunk-emit sites + materialise
6111 // emit sites before moving `ct` into
6112 // Proto.traces.
6113 self.jit.counters.sinkable_seen +=
6114 ct.sinkable_sites_seen as u64;
6115 self.jit.counters.accum_bufferable_seen +=
6116 ct.accum_bufferable_seen as u64;
6117 self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6118 self.jit.counters.materialize_emit +=
6119 ct.materialize_emit_count as u64;
6120 self.jit.counters.closure_emit += ct.closure_seen as u64;
6121 if ct.is_inline_abort_close {
6122 self.jit.counters.inline_abort += 1;
6123 }
6124 // v2.0 Stage 7 polish 6 fire
6125 // experiment — split tally so a
6126 // probe can answer the AOT
6127 // `accepted_with_per_exit_inline`
6128 // gate's question at the JIT
6129 // surface too: how many compiled
6130 // traces emitted depth>0 cmp
6131 // side-exits, and how many of
6132 // those survived all the
6133 // `dispatchable = false` pins
6134 // (`InlineAbort-gate`,
6135 // `self-link-retf-r1`,
6136 // `downrec-stitch-pending`, etc.).
6137 if !ct.per_exit_inline.is_empty() {
6138 self.jit.counters.per_exit_inline_compiled += 1;
6139 if ct.dispatchable {
6140 self.jit.counters.per_exit_inline_dispatchable += 1;
6141 }
6142 }
6143 if let Some(reason) = ct.dispatch_off_reason {
6144 self.jit.counters.dispatch_off_reasons.push(reason);
6145 // v2.0 Track-R R2 — mirror
6146 // the ordered Vec push into
6147 // the per-reason HashMap so
6148 // probes can answer "how many
6149 // of each dispatch_off label
6150 // fired" in O(1) without
6151 // walking the Vec. Same
6152 // bucket as the recorder-side
6153 // abort/discard tags above.
6154 self.jit.counters.bump_close_cause(reason);
6155 }
6156 // v2.0 Track-R R3b — count
6157 // compiled traces that carry a
6158 // down-recursion stitch link.
6159 // Bumped here (not at the lowerer
6160 // emit site) because the Vm's
6161 // JitCounters live on the Vm,
6162 // and the lowerer doesn't have a
6163 // Vm handle. R3b's regression
6164 // pin reads this via
6165 // `Vm::trace_downrec_link_compiled_count`.
6166 if ct.downrec_link.is_some() {
6167 self.jit.counters.downrec_link_compiled += 1;
6168 }
6169 // v2.0 Track-R R3d — multi-way
6170 // guard emit counter. Bumped when
6171 // the lowerer's R3d arm collected
6172 // >= 2 distinct caller_pc candidates
6173 // and lifted `dispatchable=true`.
6174 // R3c's single-CMP shape stores
6175 // `1` here without bumping; non-
6176 // DownRec closes store `0`.
6177 if ct.downrec_multi_way_count >= 2 {
6178 self.jit.counters.multi_way_guard_emitted += 1;
6179 }
6180 // P15-A v2-A — side-trace finalisation.
6181 // Pin `dispatchable=false` so the
6182 // primary lookup `traces.find(|t|
6183 // t.head_pc == pc && t.dispatchable)`
6184 // never matches this entry — the
6185 // side trace is meant to be entered
6186 // ONLY through the parent's exit
6187 // indirection (v2-B/C IR), not the
6188 // back-edge / call-trigger paths.
6189 // Then write the entry fn ptr into
6190 // the parent's `exit_side_trace_ptrs`
6191 // slot so v2-B/C IR can read it.
6192 if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6193 closed_record.side_trace_parent
6194 {
6195 ct.dispatchable = false;
6196 let entry_ptr = ct.entry as *const () as *const u8;
6197 let _side_trace_head_pc = closed_record.head_pc;
6198 let parent_traces = parent_proto.traces.borrow();
6199 if let Some(parent_ct) = parent_traces
6200 .iter()
6201 .find(|t| t.head_pc == parent_head_pc)
6202 {
6203 // P15-A v2-C-A5-C — shape-match
6204 // gate. Find the parent's per-exit
6205 // tag snapshot at the wired exit
6206 // (inline / tag / global) and
6207 // check the child's entry_tags
6208 // match. If not, leave the cell
6209 // null + skip cache populate so
6210 // the future v2-C-A2 IR's
6211 // `call_indirect` stays inert at
6212 // this exit (the child's
6213 // shape-specialised IR would
6214 // mis-interpret raw bits the
6215 // parent writes to reg_state).
6216 let inline_n = parent_ct.per_exit_inline.len();
6217 let tags_n = parent_ct.per_exit_tags.len();
6218 let parent_exit_tags_slice: &[
6219 crate::jit::trace::ExitTag
6220 ] = if parent_exit_idx < inline_n {
6221 &parent_ct.per_exit_inline
6222 [parent_exit_idx]
6223 .exit_tags
6224 } else if parent_exit_idx
6225 < inline_n + tags_n
6226 {
6227 &parent_ct.per_exit_tags
6228 [parent_exit_idx - inline_n]
6229 .1
6230 } else {
6231 &parent_ct.exit_tags
6232 };
6233 let shape_ok =
6234 crate::jit::trace::exit_tags_match_entry_tags(
6235 &ct.entry_tags,
6236 parent_exit_tags_slice,
6237 &parent_ct.entry_tags,
6238 );
6239 if !shape_ok {
6240 self.jit.counters.side_trace_shape_mismatch += 1;
6241 }
6242 // P15-A v2-C-A4 — write the child's
6243 // entry fn ptr to BOTH the legacy
6244 // v2-A `exit_side_trace_ptrs[idx]`
6245 // cell (kept so v2-A's
6246 // walk_any_side_ptr_non_null tests
6247 // stay green) AND the per-kind cell
6248 // whose heap address the parent's
6249 // IR baked (v2-C-A2). The IR-baked
6250 // cell is what the call_indirect
6251 // gate actually reads. Only write
6252 // when A5-C shape gate passes.
6253 if shape_ok {
6254 if let Some(cell) = parent_ct
6255 .exit_side_trace_ptrs
6256 .get(parent_exit_idx)
6257 {
6258 cell.set(entry_ptr);
6259 }
6260 // Compute (kind, local) for the
6261 // IR-baked cell. Layout follows
6262 // exit_hit_counts: inline first,
6263 // then per_exit_tags, then the
6264 // global tail slot.
6265 let (sent_kind, sent_local) = if parent_exit_idx
6266 < inline_n
6267 {
6268 parent_ct.per_exit_inline[parent_exit_idx]
6269 .side_trace_ptr
6270 .set(entry_ptr);
6271 (
6272 crate::jit::trace::SIDE_SENT_KIND_INLINE,
6273 parent_exit_idx as u32,
6274 )
6275 } else if parent_exit_idx < inline_n + tags_n {
6276 let local = parent_exit_idx - inline_n;
6277 if let Some(b) =
6278 parent_ct.tags_side_trace_ptrs.get(local)
6279 {
6280 b.set(entry_ptr);
6281 }
6282 (
6283 crate::jit::trace::SIDE_SENT_KIND_TAG,
6284 local as u32,
6285 )
6286 } else {
6287 parent_ct.global_side_trace_ptr.set(entry_ptr);
6288 (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6289 };
6290 self.jit.counters.side_trace_compiled += 1;
6291 // P15-A v2-D-A8 — flip the
6292 // parent's fast-path hint so
6293 // the dispatcher knows to do
6294 // the tentative decode + cell
6295 // check on subsequent
6296 // dispatches. Set once and
6297 // stays true (we never unwire
6298 // a side trace today).
6299 parent_ct.has_any_side_wired.set(true);
6300
6301 // P15-A v2-C-A1/A4 — populate
6302 // the O(1) lookup cache the
6303 // dispatcher consults on
6304 // sentinel-bit-set returns.
6305 // Key is the encoded sentinel
6306 // (same encoding the IR ORs
6307 // into bits 56..=62 of the
6308 // child's i64 return).
6309 let sentinel =
6310 crate::jit::trace::encode_side_sentinel(
6311 sent_kind, sent_local,
6312 );
6313 let predicted_idx = if std::ptr::eq(
6314 parent_proto.as_ptr(),
6315 head_proto.as_ptr(),
6316 ) {
6317 parent_traces.len() as u32
6318 } else {
6319 head_proto.traces.borrow().len() as u32
6320 };
6321 parent_ct
6322 .side_trace_cache
6323 .borrow_mut()
6324 .insert(sentinel, predicted_idx);
6325 }
6326 }
6327 drop(parent_traces);
6328 }
6329 head_proto.traces.borrow_mut().push(TArc::new(ct));
6330 self.jit.counters.compiled += 1;
6331 }
6332 None => {
6333 self.jit.counters.compile_failed += 1;
6334 self.jit
6335 .counters
6336 .compile_failed_reasons
6337 .push(self.jit.trace_compiler.last_compile_checkpoint());
6338 }
6339 }
6340 }
6341 } // P13-S13-H — close the long-trace-bias else branch
6342 } else {
6343 // P12-S4-step1 + step4a — depth-aware push at the
6344 // current `cur_depth`. The `depth_cap_hit` /
6345 // `returned_past_head` early-exit is handled by
6346 // the `should_close` branch above; reaching here
6347 // means `cur_depth <= MAX_INLINE_DEPTH` and the
6348 // trace head's frame is still live.
6349 let depth_u8 = cur_depth as u8;
6350 if depth_u8 > self.jit.max_depth_seen {
6351 self.jit.max_depth_seen = depth_u8;
6352 }
6353 // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6354 // return / variable return count). Recorder pushed
6355 // it with var_count=None before the call dispatched;
6356 // now that the call has returned and we're about to
6357 // push the next op, top reflects the actual return
6358 // count. Snapshot top - (caller.base + call.a).
6359 if let Some(last) = rec.ops.last_mut()
6360 && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6361 && last.inst.c() == 0
6362 && last.var_count.is_none()
6363 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6364 {
6365 let from = f.base + last.inst.a();
6366 if self.top >= from {
6367 last.var_count = Some(self.top - from);
6368 }
6369 }
6370 // P12-S9-A/C — for SetList B=0, snapshot the source
6371 // count = top - A - 1 (mirrors Lua's `n = top - ra
6372 // - 1` from lvm.c OP_SETLIST). Sources are
6373 // R[A+1..top), exclusive top. For Call C=0's
6374 // var_count (the return count = top - A inclusive),
6375 // see the prior-op fix-up above; here we
6376 // initialise the current Call op to None and let
6377 // the fix-up on the next op's push populate it.
6378 let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6379 && inst.b() == 0
6380 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6381 {
6382 let from = f.base + inst.a();
6383 if self.top > from {
6384 Some(self.top - from - 1)
6385 } else {
6386 None
6387 }
6388 } else {
6389 None
6390 };
6391 let op = crate::jit::trace::RecordedOp {
6392 proto: cl.proto,
6393 pc,
6394 inst,
6395 inline_depth: depth_u8,
6396 var_count,
6397 };
6398 // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6399 // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6400 // Captured as a side-channel `RetfRecord` parallel to
6401 // `ops` when `p16_self_link_enabled` is on. R3's
6402 // down-rec stitch consumes these to guard side-trace
6403 // inlined-frame topology against the recorded shape.
6404 // Gated on the same flag as the cycle catch so the
6405 // ship-default path (p16 off) sees zero behavior
6406 // change. `caller_pc` is the recorded enclosing Call's
6407 // pc + 1 — interp's resume point after the inlined
6408 // frame pops.
6409 if self.jit.p16_self_link_enabled
6410 && depth_u8 > 0
6411 && matches!(
6412 inst.op(),
6413 crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6414 )
6415 {
6416 let results: u8 = match inst.op() {
6417 crate::vm::isa::Op::Return0 => 0,
6418 crate::vm::isa::Op::Return1 => 1,
6419 _ => 0,
6420 };
6421 // Most recent Op::Call recorded at the caller's
6422 // depth (`depth_u8 - 1`) is the frame this Return
6423 // is unwinding from. Reverse scan stops at the
6424 // first match.
6425 let caller_depth = depth_u8 - 1;
6426 let caller_call = rec.ops.iter().rev().find(|r| {
6427 r.inline_depth == caller_depth
6428 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6429 });
6430 let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6431 // v2.0 Track-R R3a — capture the caller's proto
6432 // for the RetfRecord. LuaJIT `IR_RETF.op1`
6433 // equivalent. For fib(28) the caller's proto
6434 // equals the trace head; for future mutual
6435 // recursion the recorded Op::Call's proto is the
6436 // right target. Fallback to head_proto when no
6437 // enclosing Call op was captured (mirrors
6438 // `caller_pc`'s fallback to the Return's own pc).
6439 let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6440 rec.retfs.push(crate::jit::trace::RetfRecord {
6441 from_depth: depth_u8,
6442 to_depth: caller_depth,
6443 results,
6444 caller_pc,
6445 proto: caller_proto,
6446 });
6447 // v2.0 Track-R R3a — DownRec close trigger:
6448 // count RetfRecords on this recording whose
6449 // `proto` matches `caller_proto` (LuaJIT
6450 // `check_downrec_unroll` chain filter
6451 // `op1 == ptref`). Threshold mirrors
6452 // RECUNROLL_THRESHOLD; first trip stamps the
6453 // `downrec_close` marker, subsequent retfs
6454 // keep the marker without overwrite. The
6455 // lowerer's end_idx picker routes through
6456 // TraceEnd::DownRec when the marker is set;
6457 // R3a's tail emit still falls through to R1's
6458 // safe deopt path so fib(28) result stays
6459 // 317_811. R3b lifts.
6460 if rec.downrec_close.is_none() {
6461 let caller_proto_ptr = caller_proto.as_ptr();
6462 let prior_match_count = rec
6463 .retfs
6464 .iter()
6465 .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6466 .count();
6467 // Strictly-greater-than threshold matches
6468 // LuaJIT `count + J->tailcalled > recunroll`.
6469 // The newly-pushed retf is already counted.
6470 if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6471 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6472 return_pc: caller_pc,
6473 target_proto: caller_proto,
6474 depth_delta: 1,
6475 });
6476 // R2 close-cause taxonomy: tag the
6477 // restart with `"downrec-restart"`. R3b
6478 // adds `"downrec-stitch-failed"` when
6479 // the lifted back-edge falls back to
6480 // deopt.
6481 self.jit.counters.bump_close_cause("downrec-restart");
6482 }
6483 }
6484 }
6485 // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6486 // FIRST eligible Op::GetField site under env-gate
6487 // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6488 // - R[B] is Value::Table with metatable.is_none()
6489 // - K[C] is Value::Str
6490 // - The string key actually occupies a hash slot
6491 // (so the IC's slot_idx is a real index, not
6492 // a probe sentinel).
6493 // Once captured, subsequent GetFields skip this
6494 // logic (rec.field_ic_snapshot.is_some() short-
6495 // circuits). Env-OFF short-circuits on the cached
6496 // atomic check inside field_ic_enabled().
6497 if rec.field_ic_snapshot.is_none()
6498 && matches!(inst.op(), crate::vm::isa::Op::GetField)
6499 && crate::jit::trace_types::field_ic_enabled()
6500 {
6501 let b = inst.b();
6502 let c_idx = inst.c() as usize;
6503 let r_b = self.stack[(base + b) as usize];
6504 if let Value::Table(g) = r_b
6505 && g.metatable().is_none()
6506 && c_idx < cl.proto.consts.len()
6507 && let Value::Str(s) = cl.proto.consts[c_idx]
6508 {
6509 let key = Value::Str(s);
6510 let tbl_ref = &*g;
6511 if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6512 && let Some(val) = tbl_ref.node_val_at(slot_idx)
6513 {
6514 let op_idx = rec.ops.len() as u32;
6515 rec.field_ic_snapshot =
6516 Some(crate::jit::trace_types::FieldIcSnapshot {
6517 op_idx,
6518 nodes_len: tbl_ref.nodes_capacity() as u64,
6519 slot_idx: slot_idx as u64,
6520 key_ptr_bits: s.as_ptr() as u64,
6521 cached_val_tag: val.tag_byte(),
6522 });
6523 self.jit.counters.field_ic_snapshot_captured += 1;
6524 }
6525 }
6526 }
6527 if !rec.push(op) {
6528 // v2.0 Track-R R2 — recorder overflow
6529 // (MAX_TRACE_LEN). Pre-R2 this site bumped
6530 // `aborted` with no reason label, leaving the
6531 // overflow indistinguishable from any other
6532 // abort cause that might be added later.
6533 // Tag it explicitly under the close-cause
6534 // bucket so probes can tally overflow vs
6535 // other abort causes in O(1).
6536 self.jit.active_trace = None;
6537 self.jit.counters.aborted += 1;
6538 self.jit.counters.bump_close_cause("trace-overflow");
6539 }
6540 }
6541 }
6542
6543 // P12-S3 — trace JIT dispatcher.
6544 //
6545 // When the dispatch loop is about to execute the op at
6546 // `pc` and there's a `numeric_only` CompiledTrace cached
6547 // for that `head_pc`, marshal the live regs into an
6548 // i64 buffer, jump into the trace, and resume the
6549 // interpreter at the returned continuation PC.
6550 //
6551 // Skipped (zero overhead) when `trace_jit_enabled` is
6552 // false; the lookup is a borrow + scan over
6553 // `cl.proto.traces`, which is a `Vec` whose size is at
6554 // most one entry per back-edge per Proto in practice.
6555 //
6556 // Marshalling contract — only Int slots survive the
6557 // round-trip cleanly (the reg_state ABI is `*mut i64`
6558 // with no tag info). Any non-Int slot in the affected
6559 // window forces a skip; interp takes over for one op
6560 // and the back-edge brings us back to try again next
6561 // pass (slots that were Nil/Float at one moment can
6562 // settle to Int by the time the next back-edge fires).
6563 //
6564 // A trace that comes back with `vm.jit.pending_err`
6565 // parked is treated as a deopt: clear the err, leave
6566 // the stack as the trace wrote it, and let the
6567 // interpreter run from the same `pc`. The trace itself
6568 // is left cached — a future entry might find no
6569 // metatable in the way and succeed.
6570 // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6571 // of 6 per-field Rc clones. proto.traces is now
6572 // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6573 // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6574 // operations per dispatch × 434k = ~2.2M Rc atomic ops
6575 // (~1-2% gain measured separately).
6576 // v2.0 Track-R R3c — one-shot consume of the
6577 // `suppress_downrec_admit_once` flag. Set by the R3c
6578 // downrec post-invoke arm below when it force-deopts the
6579 // trace (caller-pc guard miss OR cycle-budget exhausted)
6580 // so the NEXT interpreter loop iteration skips the
6581 // downrec admit, lets interp run the op at `head_pc`,
6582 // advances `pc` past `head_pc`, and breaks the otherwise-
6583 // infinite admit loop. Reading + clearing here means a
6584 // single dispatch tick consumes the suppression — the
6585 // following tick re-admits naturally (with the budget
6586 // also reset by the deopt site).
6587 let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6588 self.jit.suppress_downrec_admit_once = false;
6589 if self.jit.trace_enabled
6590 && let Some(ct) = {
6591 let traces = cl.proto.traces.borrow();
6592 traces
6593 .iter()
6594 .find(|t| {
6595 if t.head_pc != pc {
6596 return false;
6597 }
6598 let is_downrec = t.downrec_link.is_some();
6599 // v2.0 Track-R R3c — the one-shot suppress
6600 // flag blocks any admit (primary or fallback)
6601 // for `downrec_link`-bearing traces so the
6602 // next interp iter can run the natural op
6603 // at `head_pc` and advance past it. R3d's
6604 // `dispatchable=true` lift means the suppress
6605 // must also cover the primary `t.dispatchable`
6606 // arm — otherwise the lifted lookup would
6607 // immediately re-admit after a force-deopt
6608 // and the infinite loop returns.
6609 if is_downrec && downrec_admit_blocked {
6610 return false;
6611 }
6612 // Primary arm: `dispatchable=true` traces
6613 // (R3d-lifted DownRec or normal traces).
6614 // Fallback arm: R3c-shape `dispatchable=false`
6615 // DownRec traces (single-CMP guard kept
6616 // pinned because the 90% miss-rate would
6617 // make blind admit perf-negative).
6618 t.dispatchable || is_downrec
6619 })
6620 .cloned()
6621 }
6622 {
6623 // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6624 // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6625 // across the entire dispatch block so the fields outlive
6626 // all consumers. Saves 5 Rc::clone per dispatch.
6627 let entry_fn = ct.entry;
6628 let head_pc_val = ct.head_pc;
6629 let window_size = ct.window_size;
6630 let exit_tags = &ct.exit_tags;
6631 let per_exit_tags = &ct.per_exit_tags;
6632 let per_exit_inline = &ct.per_exit_inline;
6633 let compile_entry_tags = &ct.entry_tags;
6634 let global_tag_res_kind = ct.global_tag_res_kind;
6635 let exit_hit_counts = &ct.exit_hit_counts;
6636 let max_stack = cl.proto.max_stack as usize;
6637 let window_size_us = window_size as usize;
6638 let base_us = base as usize;
6639 // P12-S4-step3a — `reg_state` sized to the trace's
6640 // `window_size`, which today equals max_stack but
6641 // S4-step3b will expand for inlined frames.
6642 // Marshal-in still only writes [0..max_stack); slots
6643 // [max_stack..window_size) are zero-initialised and
6644 // filled by the trace's own GetUpval / arith.
6645 // P13-S13-D — reuse the Vm's amortised buffers
6646 // instead of allocating fresh Vecs each dispatch.
6647 // mem::take leaves an empty placeholder we restore
6648 // at the end of the dispatch block (success +
6649 // deopt paths both fall through to the restore).
6650 let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6651 entry_tags.clear();
6652 entry_tags.reserve(max_stack);
6653 // v2.0 Track-R R3c — this trace was admitted via the
6654 // `downrec_link.is_some()` arm rather than the normal
6655 // `dispatchable=true` arm. The pre-invoke path
6656 // populates a reserved saved-PC slot just past the
6657 // normal register window so R3b's lowerer guard load
6658 // (`reg_state[window_size]`) compares the runtime
6659 // saved caller PC against the recorded `dr_return_pc`.
6660 //
6661 // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6662 // gate. After R3d lifts `dispatchable = true` for
6663 // multi-way guards, the trace's body still emits the
6664 // R3b/R3d sentinel shape on return — the saved-PC slot
6665 // and post-invoke classifier must keep firing.
6666 // `downrec_link.is_some()` is the unique structural
6667 // signal that the trace closes via DownRec.
6668 let is_downrec_entry = ct.downrec_link.is_some();
6669 let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6670 reg_state.clear();
6671 // v2.0 Track-R R3c — when admitting a downrec trace,
6672 // size the buffer to `window_size + 1` so the lowerer
6673 // can `load(I64, ..., reg_state, window_size * 8)`
6674 // for the saved caller PC guard input. The extra slot
6675 // is the LAST element so cranelift's existing
6676 // `0..window_size` accesses are unaffected.
6677 let reg_state_len = if is_downrec_entry {
6678 window_size_us + 1
6679 } else {
6680 window_size_us
6681 };
6682 reg_state.resize(reg_state_len, 0i64);
6683 let mut dispatch_ok = true;
6684 for i in 0..max_stack {
6685 let v = self.stack[base_us + i];
6686 let (tag, raw) = v.unpack();
6687 entry_tags.push(tag);
6688 // P12-S12-C v3 — entry tag guard. The trace's IR
6689 // is specialised to the compile-time entry tags
6690 // (via current_kinds propagation from
6691 // from_entry_tag). A runtime tag mismatch means
6692 // body ops would mis-interpret raw bits (e.g.
6693 // treat a Str pointer as Int payload → garbage).
6694 // Skip dispatch on mismatch so interp handles
6695 // this entry shape; the trace stays cached for
6696 // future entries that match.
6697 if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6698 dispatch_ok = false;
6699 break;
6700 }
6701 match tag {
6702 // Int / Float / Table / Nil all marshal
6703 // to raw payload cleanly; the trace's IR
6704 // treats the 8-byte slot as an i64 (with
6705 // f64 ops bitcasting around the boundary).
6706 crate::runtime::value::raw::INT
6707 | crate::runtime::value::raw::FLOAT
6708 | crate::runtime::value::raw::TABLE
6709 | crate::runtime::value::raw::CLOSURE
6710 // P12-S12-B-v2 — Native iter slots (e.g.
6711 // R[A] = ipairs_iter) are present in
6712 // generic-for traces; the raw bits are a
6713 // valid `*mut NativeClosure` and round-trip
6714 // cleanly.
6715 | crate::runtime::value::raw::NATIVE
6716 // P12-S12-C v1 — Str slots show up in
6717 // string-concat traces; raw bits = `*mut
6718 // LuaStr` (interned, GC-managed). Round-
6719 // trips cleanly as a heap pointer.
6720 | crate::runtime::value::raw::STR
6721 | crate::runtime::value::raw::NIL => {
6722 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6723 reg_state[i] = unsafe { raw.zero as i64 };
6724 }
6725 _ => {
6726 dispatch_ok = false;
6727 break;
6728 }
6729 }
6730 }
6731
6732 if dispatch_ok {
6733 debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6734 self.jit.pending_err = None;
6735 // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6736 // count. A cmp@d>0 side-exit calls the materialize
6737 // helper which pushes inlined frames onto
6738 // `vm.frames`; on deopt those frames must be popped
6739 // before falling through to the interpreter, else
6740 // the stack grows unboundedly per deopted dispatch.
6741 let pre_frames = self.frames.len();
6742 // v2.0 Track-R R3c — saved-PC slot population. The
6743 // recorded `dr_return_pc` on the closing trace is
6744 // the caller's resume PC captured at a depth>0
6745 // Return push (recorder push site, see R3a verdict
6746 // §3). The natural runtime analogue for self-
6747 // stitch is the dispatching frame's PARENT frame's
6748 // PC: the trace's head_pc sits inside a Lua frame,
6749 // and the parent (caller) frame's `pc` is what
6750 // luna would observe as `[base-8]` in the LJ
6751 // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6752 // the parent isn't a Lua frame (top-level dispatch
6753 // — first invocation through `call_value`), no
6754 // saved PC exists; we write 0, which always
6755 // mismatches the recorded `dr_return_pc != 0`
6756 // invariant pinned by R3b
6757 // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6758 // debug_assert!(dr_return_pc != 0, ...)`).
6759 if is_downrec_entry {
6760 let saved_pc: i64 = if pre_frames >= 2 {
6761 match &self.frames[pre_frames - 2] {
6762 CallFrame::Lua(parent) => parent.pc as i64,
6763 CallFrame::Cont(_) => 0,
6764 }
6765 } else {
6766 0
6767 };
6768 reg_state[window_size_us] = saved_pc;
6769 }
6770 // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6771 // diagnostic hook. The probe fires once per trace dispatch
6772 // (regardless of JIT vs AOT origin — both go through this
6773 // arm), letting the AOT smoke test verify mcode actually
6774 // executed. Guarded behind `OnceLock` so the env read is
6775 // a one-time cost per process; not gated on a particular
6776 // counter so the smoke test gets a deterministic single-
6777 // line `aot_trace_fired pc=N` per first dispatch.
6778 if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6779 eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6780 }
6781 let continuation_pc = {
6782 // v1.1 A1 Session A — chunk_compiler.enter
6783 // (CraneliftBackend delegates to enter_jit;
6784 // NullJitBackend returns an inert guard).
6785 let vm_ptr: *mut Vm = self;
6786 let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6787 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6788 unsafe { entry_fn(reg_state.as_mut_ptr()) }
6789 };
6790 self.jit.counters.dispatched += 1;
6791
6792 if self.jit.pending_err.is_some() {
6793 self.jit.pending_err = None;
6794 self.jit.counters.deopt += 1;
6795 // P12-S4-step4b-C-2 — unwind any helper-pushed
6796 // inlined frames before the interpreter resumes.
6797 // Don't restore reg_state — the trace's partial
6798 // writes are discarded; interp re-executes from
6799 // the original `pc`.
6800 while self.frames.len() > pre_frames {
6801 frames_pop_sync(&mut self.frames, &mut self.frames_top);
6802 }
6803 if is_downrec_entry {
6804 // v2.0 Track-R R3c — pending_err observed
6805 // mid-trace inside a downrec admit. Treat
6806 // it as a guard miss: bump `downrec_deopt`
6807 // and suppress the next downrec admit so
6808 // interp can advance past `head_pc` and
6809 // the same trace doesn't immediately re-
6810 // fire on the next loop iteration.
6811 self.jit.counters.downrec_deopt += 1;
6812 self.jit.suppress_downrec_admit_once = true;
6813 }
6814 } else if is_downrec_entry && {
6815 // v2.0 Track-R R3d — only enter the R3c/R3d
6816 // downrec classifier for returns whose shape
6817 // matches the lowerer's `downrec_idx_opt` tail
6818 // emit: either the stitch_blk DOWNREC sentinel
6819 // (HIT) or the deopt_blk GLOBAL-sentinel-with-
6820 // body==head_pc (MISS via guard fail). Any
6821 // other return from a downrec trace (intermediate
6822 // body cmp side-exit, GetField inference fail,
6823 // etc.) carries a different sentinel/body shape
6824 // and means the body exited BEFORE reaching the
6825 // downrec close — classify those through the
6826 // normal decode path (else branch below) so
6827 // reg_state restores + pc advances correctly.
6828 // The pre-R3d behavior (R3c) classified them all
6829 // as MISS and skipped the normal restore, which
6830 // inflated `downrec_deopt` with non-downrec
6831 // events and lost the trace's mid-flight writes.
6832 let raw_ret = continuation_pc as u64;
6833 let from_side_trace = (raw_ret >> 63) & 1 == 1;
6834 let sentinel_code = if from_side_trace {
6835 ((raw_ret >> 56) & 0x7F) as u32
6836 } else {
6837 0
6838 };
6839 let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6840 let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
6841 crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
6842 0,
6843 );
6844 from_side_trace
6845 && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
6846 || (sentinel_code == global_deopt_code
6847 && raw_body == head_pc_val as u64))
6848 } {
6849 // R3d downrec event classifier.
6850 let raw_ret = continuation_pc as u64;
6851 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6852 if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
6853 // Guard HIT — saved_pc matched one of the
6854 // baked candidates and the trace's
6855 // `stitch_blk` arm returned the DOWNREC
6856 // sentinel. Cycle-safety checkpoint:
6857 // decrement budget; on underflow,
6858 // reclassify as deopt + reset budget.
6859 // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
6860 // ~all natural HITs in a hot loop fire
6861 // before reset pressure.
6862 if self.jit.stitch_depth_remaining > 0 {
6863 self.jit.stitch_depth_remaining -= 1;
6864 self.jit.counters.downrec_dispatched += 1;
6865 } else {
6866 self.jit.counters.downrec_deopt += 1;
6867 self.jit.stitch_depth_remaining =
6868 crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
6869 }
6870 } else {
6871 // Guard MISS via the lowerer's deopt_blk
6872 // arm (GLOBAL sentinel + body == head_pc).
6873 // The deopt_blk emit performs the
6874 // store-back via `emit_store_back_and_return_pc`,
6875 // so the live stack already reflects the
6876 // body's writes; no extra restore needed
6877 // from the dispatcher side.
6878 self.jit.counters.downrec_deopt += 1;
6879 }
6880 self.jit.suppress_downrec_admit_once = true;
6881 // Pop helper-pushed inlined frames (defensive —
6882 // R3d's emit shape doesn't push frames in the
6883 // tail, but a body side-exit before reaching
6884 // the tail may have via the materialize helper).
6885 while self.frames.len() > pre_frames {
6886 frames_pop_sync(&mut self.frames, &mut self.frames_top);
6887 }
6888 self.jit.reg_state_buf = reg_state;
6889 self.jit.entry_tags_buf = entry_tags;
6890 continue;
6891 } else {
6892 // Restore each slot using the trace's
6893 // exit-tag analysis (see ExitTag docs).
6894 // P12-S4-step4b-C-2 — decode the IR's
6895 // side-exit shape. Upper 32 bits = (site_idx
6896 // + 1) for inline cmp side-exits, 0 for
6897 // legacy clean-tail / non-inline exits.
6898 // P15-A v2-C-A0 — decode lives in
6899 // `crate::jit::trace::decode_exit_shape` so
6900 // v2-C-A3 can reuse it with the SIDE TRACE's
6901 // shape inputs when the sentinel bit
6902 // (v2-C-A2) is set on `raw_ret`.
6903 let raw_ret = continuation_pc as u64;
6904 // P15-A v2-C-A3 — side-trace return decode.
6905 // Bit 63 of `raw_ret` is the side-trace
6906 // marker the parent's IR OR'd in when it
6907 // tail-called into a wired child trace.
6908 // Bits 56..=62 carry the sentinel code (the
6909 // cache key into the parent's
6910 // `side_trace_cache`); bits 0..=55 are the
6911 // child's own return value (encoded site or
6912 // plain cont_pc) which we MUST decode using
6913 // the CHILD's per_exit_inline / per_exit_tags
6914 // / exit_tags / exit_hit_counts — not the
6915 // parent's. The dispatcher snapshot read
6916 // above holds the parent's shapes; when bit
6917 // 63 is set we re-fetch the child's via the
6918 // sentinel-keyed cache.
6919 let from_side_trace = (raw_ret >> 63) & 1 == 1;
6920 let (
6921 decode_inline,
6922 decode_tags,
6923 decode_exit_tags,
6924 decode_hit_counts,
6925 decode_body,
6926 ) = if from_side_trace {
6927 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6928 let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6929 let traces = cl.proto.traces.borrow();
6930 let child_idx = traces
6931 .iter()
6932 .find(|t| t.head_pc == head_pc_val)
6933 .and_then(|pct| {
6934 pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6935 });
6936 if let Some(idx) = child_idx
6937 && let Some(child) = traces.get(idx as usize)
6938 {
6939 if crate::jit::trace::v2c_probe_enabled() {
6940 eprintln!(
6941 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6942 sentinel_code,
6943 body,
6944 idx,
6945 child.n_ops,
6946 child.head_pc,
6947 child.window_size,
6948 pc,
6949 window_size,
6950 child.dispatchable,
6951 child.is_inline_abort_close,
6952 );
6953 }
6954 (
6955 child.per_exit_inline.clone(),
6956 child.per_exit_tags.clone(),
6957 child.exit_tags.clone(),
6958 child.exit_hit_counts.clone(),
6959 body,
6960 )
6961 } else {
6962 if crate::jit::trace::v2c_probe_enabled() {
6963 eprintln!(
6964 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6965 sentinel_code, body,
6966 );
6967 }
6968 // Cache miss — fall back to parent
6969 // shapes with the body bits. Best-
6970 // effort; the trace_side_trace_
6971 // shape_mismatch_count records this
6972 // path indirectly (close-handler
6973 // skips wiring on mismatch so we
6974 // shouldn't reach here when shape
6975 // gate held).
6976 (
6977 per_exit_inline.clone(),
6978 per_exit_tags.clone(),
6979 exit_tags.clone(),
6980 exit_hit_counts.clone(),
6981 body,
6982 )
6983 }
6984 } else {
6985 // P15-A v2-D — dispatcher-level side-trace
6986 // invocation. Replaces v2-C's universal IR
6987 // gate (`load + icmp + brif` at every
6988 // emit_store_back callsite, which A6/A7
6989 // measured as a net perf regression).
6990 // A8 fast-path: skip the tentative decode +
6991 // child lookup entirely when `has_any_side
6992 // _wired == false` (the common case until
6993 // the first side trace compiles for this
6994 // parent). For fib_10_x10k and other tight
6995 // short-trace workloads where most parent
6996 // traces never get a wired child, this
6997 // collapses the v2-D overhead to a single
6998 // `Cell::get()` on the cold path.
6999 // A8-revert: A8 had `parent_has_side` short-
7000 // circuit + snapshot hoist; mini N=3 showed
7001 // A8 lost the btrees_d8 1.02× win (dropped
7002 // to 0.95×) WITHOUT helping fib_10 (same
7003 // 0.86×). Drop A8 — accept the always-run
7004 // v2-D path; the tentative decode + cell
7005 // load is cheaper than the cost A8 added.
7006 {
7007 let tentative = crate::jit::trace::decode_exit_shape(
7008 raw_ret,
7009 per_exit_inline,
7010 per_exit_tags,
7011 exit_tags,
7012 );
7013 let tentative_exit_idx = tentative.exit_hit_idx;
7014 let child_invoke = {
7015 let traces = cl.proto.traces.borrow();
7016 traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7017 |pct| {
7018 let cell =
7019 pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7020 let fn_ptr = cell.get();
7021 if fn_ptr.is_null() {
7022 return None;
7023 }
7024 traces
7025 .iter()
7026 .find(|t| {
7027 t.entry as *const () as *const u8 == fn_ptr
7028 })
7029 .map(|child| {
7030 (
7031 child.entry,
7032 child.per_exit_inline.clone(),
7033 child.per_exit_tags.clone(),
7034 child.exit_tags.clone(),
7035 child.exit_hit_counts.clone(),
7036 )
7037 })
7038 },
7039 )
7040 };
7041 if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7042 let child_raw_ret = {
7043 // v1.1 A1 Session A — chunk_compiler.enter
7044 // (side-trace entry).
7045 let vm_ptr: *mut Vm = self;
7046 let _guard =
7047 self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7048 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7049 unsafe { cent(reg_state.as_mut_ptr()) }
7050 };
7051 (cpi, cpt, cet, chc, child_raw_ret as u64)
7052 } else {
7053 (
7054 per_exit_inline.clone(),
7055 per_exit_tags.clone(),
7056 exit_tags.clone(),
7057 exit_hit_counts.clone(),
7058 raw_ret,
7059 )
7060 }
7061 }
7062 };
7063 let decoded = crate::jit::trace::decode_exit_shape(
7064 decode_body,
7065 &decode_inline,
7066 &decode_tags,
7067 &decode_exit_tags,
7068 );
7069 let site_id = decoded.site_id;
7070 let cont_pc = decoded.cont_pc;
7071 let exit_hit_idx = decoded.exit_hit_idx;
7072 let exit_tags_for_pc = decoded.exit_tags_for_pc;
7073 // P15-A v2-C-A3 — for side-trace returns
7074 // force using_global_exit_tags=false so the
7075 // restore loop always takes the per-tag slow
7076 // path (the child's global_tag_res_kind
7077 // classification isn't plumbed through yet
7078 // — TODO for a future polish step).
7079 let using_global_exit_tags = if from_side_trace {
7080 false
7081 } else {
7082 decoded.using_global_exit_tags
7083 };
7084 // P15-prep — increment the counter (saturate
7085 // at u32::MAX to avoid wrap on long runs).
7086 // P15-A v1 — track whether this increment is
7087 // the one that crossed `HOTEXIT_THRESHOLD`
7088 // (transition: previous v < threshold, new v
7089 // == threshold). The side-trace start is
7090 // deferred to just before `continue;` so
7091 // vm.stack and frame.pc are fully restored
7092 // (the snapshot reads post-restore values).
7093 let mut side_trace_should_start = false;
7094 // P15-A v2-C-A3 — for side-trace returns the
7095 // counter to bump is the CHILD's (decoded
7096 // shape lookup) — `exit_hit_idx` is into the
7097 // decoded layout, so use the matching
7098 // `decode_hit_counts`. For parent decode
7099 // they're aliased (clone of the parent's
7100 // own Rc).
7101 if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7102 let v = c.get();
7103 if v < u32::MAX {
7104 c.set(v + 1);
7105 }
7106 if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7107 && self.jit.active_trace.is_none()
7108 && self.jit.trace_enabled
7109 {
7110 side_trace_should_start = true;
7111 }
7112 }
7113 // P12-S4-step4b-C-2 — at an inline cmp@d>0
7114 // side-exit, the helper has pushed N frames on
7115 // top of the trace head's frame and
7116 // `exit_tags_for_pc.len()` covers the full
7117 // window (caller + each inlined frame's
7118 // window). Slots beyond `max_stack` belong to
7119 // an inlined frame: their `Untouched` entries
7120 // default to Nil (no entry-tag fallback —
7121 // marshal-in only captured caller slots) and
7122 // we write to interp stack at `base + i` which
7123 // mirrors `op_offsets`-derived layout.
7124 let slot_count = exit_tags_for_pc.len();
7125 // P12-S4-step4b-C-2 — the helper only extends
7126 // vm.stack up to the deepest pushed frame's
7127 // window, but the exit_tags snapshot covers
7128 // the trace's full `window_size` (which
7129 // includes depth-N+1 scratch slots that the
7130 // trace's IR may have written without a
7131 // matching pushed frame). Extend with Nil so
7132 // the write at the tail doesn't panic; these
7133 // slots get overwritten by the writeback loop
7134 // and won't leak meaningful data past the
7135 // pushed frames' R[0..max_stack) windows.
7136 if self.stack.len() < base_us + slot_count {
7137 self.stack
7138 .resize(base_us + slot_count, crate::runtime::Value::Nil);
7139 }
7140 // P13-S13-E — fast-path restore loop. When
7141 // we landed on the global `exit_tags`,
7142 // dispatch on the compile-time
7143 // classification: skip the loop entirely
7144 // for `AllUntouched`, do a tag-free
7145 // `Value::Int(...)` write per slot for
7146 // `AllInt`, otherwise fall through to the
7147 // general match-arm loop. site_id > 0
7148 // (inline frame mat) and per_exit_tags
7149 // hits always take the general path —
7150 // their per-side-exit shapes aren't
7151 // pre-classified yet.
7152 let fast_path_taken = if using_global_exit_tags {
7153 match global_tag_res_kind {
7154 crate::jit::trace::TagResKind::AllUntouched => {
7155 // No-op: vm.stack already
7156 // matches the trace's post-
7157 // entry state for these
7158 // slots (entry values not
7159 // overridden, or already
7160 // spilled by helpers).
7161 true
7162 }
7163 crate::jit::trace::TagResKind::AllInt => {
7164 for i in 0..slot_count {
7165 self.stack[base_us + i] =
7166 crate::runtime::Value::Int(reg_state[i]);
7167 }
7168 true
7169 }
7170 crate::jit::trace::TagResKind::Mixed => false,
7171 }
7172 } else {
7173 false
7174 };
7175 if !fast_path_taken {
7176 for i in 0..slot_count {
7177 let tag = match exit_tags_for_pc[i] {
7178 crate::jit::trace::ExitTag::Untouched => {
7179 if i < max_stack {
7180 entry_tags[i]
7181 } else {
7182 crate::runtime::value::raw::NIL
7183 }
7184 }
7185 crate::jit::trace::ExitTag::Int => {
7186 crate::runtime::value::raw::INT
7187 }
7188 crate::jit::trace::ExitTag::Float => {
7189 crate::runtime::value::raw::FLOAT
7190 }
7191 crate::jit::trace::ExitTag::Table => {
7192 crate::runtime::value::raw::TABLE
7193 }
7194 crate::jit::trace::ExitTag::Closure => {
7195 crate::runtime::value::raw::CLOSURE
7196 }
7197 // P12-S6-A1 — trace actively wrote Nil
7198 // to this slot (e.g. via Op::LoadNil).
7199 // Restore as Nil regardless of the entry
7200 // tag, since the i64 payload is 0 and
7201 // packing as the entry tag (e.g. INT)
7202 // would mis-type the slot.
7203 crate::jit::trace::ExitTag::Nil => {
7204 crate::runtime::value::raw::NIL
7205 }
7206 // P12-S12-C v2 — trace wrote a Str ptr
7207 // to this slot (LoadK Str / Move from
7208 // Str / Concat result). Restore as
7209 // Value::Str with raw bits round-
7210 // tripped.
7211 crate::jit::trace::ExitTag::Str => {
7212 crate::runtime::value::raw::STR
7213 }
7214 };
7215 // SAFETY: tag is from a verified slot
7216 // (entry validated above) or pinned by
7217 // the exit-tag analysis to INT/TABLE.
7218 // The raw payload sits in reg_state[i].
7219 // Stack was extended by the materialize
7220 // helper for inline frames.
7221 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7222 self.stack[base_us + i] = unsafe {
7223 Value::pack(
7224 tag,
7225 crate::runtime::value::RawVal {
7226 zero: reg_state[i] as u64,
7227 },
7228 )
7229 };
7230 }
7231 }
7232 // P12-S4-step4b-C-2 — for non-inline exits the
7233 // helper was never called (no metas chain for
7234 // this cont_pc), so `frames.last()` is the
7235 // trace head's frame and we set its pc to
7236 // cont_pc as before. For inline exits the
7237 // helper baked the side-exit PC into the
7238 // innermost frame's `pc` at push time
7239 // (chain.last().pc was overridden at emit),
7240 // so this assignment to `frames.last_mut().pc
7241 // = cont_pc` is a redundant-but-correct
7242 // confirmation.
7243 let _ = &per_exit_inline; // hold the Rc alive across dispatch
7244 // P12-S4-step4b-C-2 — for inline side-exits the
7245 // helper has pushed N frames on top. The trace
7246 // head frame is at `pre_frames - 1`; set its
7247 // pc to `head_resume_pc` so when the chain
7248 // eventually pops back to it, interp resumes
7249 // PAST the trace's depth-0 Op::Call instead of
7250 // restarting from `head_pc` and re-triggering
7251 // dispatch (infinite loop). The innermost
7252 // (helper-pushed) frame already has its pc
7253 // baked in at compile time, but we still
7254 // assign `cont_pc` below for parity with the
7255 // non-inline path (no-op).
7256 if site_id > 0 {
7257 let idx = (site_id - 1) as usize;
7258 let head_resume_pc = decode_inline[idx].head_resume_pc;
7259 if pre_frames > 0 {
7260 if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7261 f.pc = head_resume_pc;
7262 }
7263 }
7264 }
7265 let frames_len_now = self.frames.len();
7266 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7267 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7268 CallFrame::Lua(fmut) => {
7269 if crate::jit::trace::v2c_probe_enabled() {
7270 eprintln!(
7271 "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7272 from_side_trace,
7273 raw_ret,
7274 fmut.pc,
7275 cont_pc,
7276 site_id,
7277 frames_len_now,
7278 pre_frames,
7279 max_stack,
7280 );
7281 }
7282 fmut.pc = cont_pc;
7283 }
7284 _ => unreachable!("Cont frame at trace dispatch"),
7285 }
7286 // P15-A v1 — deferred side-trace start. The
7287 // increment block above flagged this exit's
7288 // hit count crossing HOTEXIT_THRESHOLD; now
7289 // that vm.stack is restored and frame.pc is
7290 // settled, snapshot entry_tags from the
7291 // resume frame's window and create the
7292 // recorder. The recorder's first push fires
7293 // on the next interp iteration at cont_pc.
7294 //
7295 // `head_proto` for the side trace = cl.proto
7296 // (trace JIT only inlines self-recursive
7297 // calls today, so cont_pc always lands in
7298 // the same proto as the parent). Frame base
7299 // is the resume frame (top of `self.frames`
7300 // — inline-pushed frames moved this).
7301 if side_trace_should_start {
7302 let (resume_base, resume_proto) = match self.frames.last() {
7303 Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7304 _ => (base_us, cl.proto),
7305 };
7306 let resume_max_stack = resume_proto.max_stack as usize;
7307 let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7308 // Extend stack if cont_pc's frame window
7309 // overhangs the current stack len (rare,
7310 // but inline-pushed frame stack writes
7311 // only covered the trace's writeback).
7312 if self.stack.len() < resume_base + resume_max_stack {
7313 self.stack.resize(
7314 resume_base + resume_max_stack,
7315 crate::runtime::Value::Nil,
7316 );
7317 }
7318 for i in 0..resume_max_stack {
7319 let (tag, _) = self.stack[resume_base + i].unpack();
7320 side_entry_tags.push(tag);
7321 }
7322 self.jit.active_trace =
7323 Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7324 resume_proto,
7325 cont_pc,
7326 side_entry_tags,
7327 cl.proto,
7328 head_pc_val,
7329 exit_hit_idx,
7330 )));
7331 self.jit.recording_frame_base = self.frames.len() - 1;
7332 self.jit.counters.side_trace_started += 1;
7333 }
7334 // P13-S13-D — put the dispatch buffers back
7335 // before the `continue;` so the next
7336 // dispatch picks up the same allocation.
7337 self.jit.reg_state_buf = reg_state;
7338 self.jit.entry_tags_buf = entry_tags;
7339 continue;
7340 }
7341 }
7342 // P13-S13-D — !dispatch_ok / deopt path / non-cont
7343 // exit also restore the buffers before falling
7344 // through to the interp.
7345 self.jit.reg_state_buf = reg_state;
7346 self.jit.entry_tags_buf = entry_tags;
7347 }
7348
7349 // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7350 // hook code that consults `currentpc = savedpc - 1` lands on the
7351 // instruction now executing. luna mirrors that by advancing
7352 // `f.pc` to `pc + 1` before the hook block — local_at /
7353 // getinfo / line attribution all read f.pc, and the existing
7354 // `pc - 1` convention in those helpers then yields the current
7355 // instruction's pc (db.lua :696: local `A` visible at the
7356 // chunk's return line once OP_CLOSURE has advanced pc).
7357 //
7358 // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7359 // (cont frames drained above) so the and_then/Option layers are
7360 // dead weight.
7361 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7362 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7363 CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7364 _ => unreachable!("Cont frame at pc bump"),
7365 }
7366
7367 // count + line hooks (PUC traceexec): before executing the
7368 // instruction. Skipped while the hook itself runs.
7369 // (Parens here are load-bearing — without them `&&` binds tighter
7370 // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7371 // letting a Lua line hook recurse into itself → stack overflow
7372 // on db.lua line-hook assertions. Matches the `hook_call_with` /
7373 // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7374 if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7375 let lines = &cl.proto.lines;
7376 let cur_line = if lines.is_empty() {
7377 None
7378 } else {
7379 Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7380 };
7381 // count hook: fire every `count_base` instructions
7382 if self.hook.count {
7383 self.hook.count_left -= 1;
7384 if self.hook.count_left <= 0 {
7385 self.hook.count_left = self.hook.count_base;
7386 // hooked function is the running Lua frame: its frame
7387 // is on the stack, so no synthetic C level is needed.
7388 self.run_hook(b"count", cur_line, false)?;
7389 }
7390 }
7391 // line hook: fire on a fresh frame, a backward jump (loop), or a
7392 // change of source line.
7393 if self.hook.line {
7394 if lines.is_empty() {
7395 // PUC: a stripped chunk has no line info, so
7396 // `getfuncline` returns -1. The line hook still fires
7397 // on the first instruction of the new frame (where
7398 // `npci <= oldpc` holds at oldpc=0), with the line
7399 // pushed as `nil` instead of an integer (db.lua :1030
7400 // "hook called without debug info for 1st instruction").
7401 if oldpc == u32::MAX {
7402 self.run_hook(b"line", None, false)?;
7403 self.top_frame_mut().hook_oldpc = pc;
7404 }
7405 } else {
7406 let newline = lines[(pc as usize).min(lines.len() - 1)];
7407 // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7408 // on a backward jump (`pc < oldpc` — strict; an equal pc
7409 // would re-fire the install-site after `oldpc = pc`),
7410 // or when the source line changes.
7411 let fire = oldpc == u32::MAX
7412 || pc < oldpc
7413 || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7414 if fire {
7415 self.run_hook(b"line", Some(newline as i64), false)?;
7416 }
7417 self.top_frame_mut().hook_oldpc = pc;
7418 }
7419 }
7420 }
7421
7422 match inst.op() {
7423 Op::Move => {
7424 let v = self.r(base, inst.b());
7425 self.set_r(base, inst.a(), v);
7426 }
7427 Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7428 Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7429 Op::LoadK => {
7430 let v = cl.proto.consts[inst.bx() as usize];
7431 self.set_r(base, inst.a(), v);
7432 }
7433 Op::LoadKx => {
7434 let extra = cl.proto.code[self.pc_of_top() as usize];
7435 self.bump_pc();
7436 let v = cl.proto.consts[extra.ax() as usize];
7437 self.set_r(base, inst.a(), v);
7438 }
7439 Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7440 Op::LFalseSkip => {
7441 self.set_r(base, inst.a(), Value::Bool(false));
7442 self.bump_pc();
7443 }
7444 Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7445 Op::LoadNil => {
7446 let a = inst.a();
7447 for i in 0..=inst.b() {
7448 self.set_r(base, a + i, Value::Nil);
7449 }
7450 }
7451 Op::GetUpval => {
7452 let v = self.upval_get(cl, inst.b());
7453 self.set_r(base, inst.a(), v);
7454 }
7455 Op::SetUpval => {
7456 let v = self.r(base, inst.a());
7457 self.upval_set(cl, inst.b(), v);
7458 }
7459 Op::GetTabUp => {
7460 let t = self.upval_get(cl, inst.b());
7461 let key = cl.proto.consts[inst.c() as usize];
7462 self.op_index(t, key, base + inst.a())?;
7463 }
7464 Op::GetTable => {
7465 let t = self.r(base, inst.b());
7466 let key = self.r(base, inst.c());
7467 self.op_index(t, key, base + inst.a())?;
7468 }
7469 Op::GetI => {
7470 let t = self.r(base, inst.b());
7471 self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7472 }
7473 Op::GetField => {
7474 let t = self.r(base, inst.b());
7475 let key = cl.proto.consts[inst.c() as usize];
7476 // v1.2 D4 A1 — fast path: known-Str const key + no
7477 // metatable on the table → skip `op_index` /
7478 // `index_step`'s MAX_TAG_LOOP setup and the outer
7479 // `Value` match. Falls through to the slow path
7480 // unchanged when either invariant breaks (so
7481 // `__index` metamethods, non-Table receivers, and
7482 // non-Str keys behave exactly as before).
7483 if let Value::Table(tb) = t
7484 && tb.metatable().is_none()
7485 && let Value::Str(s) = key
7486 {
7487 let v = tb.get_str(s);
7488 self.stack[(base + inst.a()) as usize] = v;
7489 } else {
7490 self.op_index(t, key, base + inst.a())?;
7491 }
7492 }
7493 Op::SetTabUp => {
7494 let t = self.upval_get(cl, inst.a());
7495 let key = cl.proto.consts[inst.b() as usize];
7496 let v = self.r(base, inst.c());
7497 self.op_newindex(t, key, v)?;
7498 }
7499 Op::SetTable => {
7500 let t = self.r(base, inst.a());
7501 let key = self.r(base, inst.b());
7502 let v = self.r(base, inst.c());
7503 self.op_newindex(t, key, v)?;
7504 }
7505 Op::SetI => {
7506 let t = self.r(base, inst.a());
7507 let v = self.r(base, inst.c());
7508 self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7509 }
7510 Op::SetField => {
7511 let t = self.r(base, inst.a());
7512 let key = cl.proto.consts[inst.b() as usize];
7513 let v = self.r(base, inst.c());
7514 self.op_newindex(t, key, v)?;
7515 }
7516 Op::NewTable => {
7517 let t = self.heap.new_table();
7518 self.set_r(base, inst.a(), Value::Table(t));
7519 self.maybe_collect_garbage(base + inst.a() + 1);
7520 }
7521 Op::SetList => {
7522 let a = inst.a();
7523 let abs_a = base + a;
7524 let n = if inst.b() == 0 {
7525 self.top - (abs_a + 1)
7526 } else {
7527 inst.b()
7528 };
7529 let offset = if inst.k() {
7530 let extra = cl.proto.code[self.pc_of_top() as usize];
7531 self.bump_pc();
7532 extra.ax() as i64
7533 } else {
7534 inst.c() as i64
7535 };
7536 let Value::Table(t) = self.r(base, a) else {
7537 unreachable!("SETLIST on non-table");
7538 };
7539 for i in 1..=n {
7540 let v = self.r(base, a + i);
7541 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7542 if let Err(TableError::Overflow) =
7543 unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7544 {
7545 return Err(self.rt_err("table overflow"));
7546 }
7547 }
7548 // one barrier_back covers every store this op did — PUC's
7549 // `luaC_barrierback_` once-per-table optimisation
7550 self.heap
7551 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7552 // the element temps above the table are now consumed
7553 self.maybe_collect_garbage(base + a + 1);
7554 }
7555 Op::SelfOp => {
7556 let o = self.r(base, inst.b());
7557 self.set_r(base, inst.a() + 1, o);
7558 // PUC OP_SELF's C is a constant index when the k-flag is
7559 // set; otherwise it points to a register that holds the
7560 // (constant-loaded) key. luna's compiler falls back to the
7561 // register form when the constant index exceeds OP_SELF's
7562 // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7563 // a table with 250+ string keys, where "findfield" lands
7564 // past const #255). The exec must honour the same split.
7565 let key = if inst.k() {
7566 cl.proto.consts[inst.c() as usize]
7567 } else {
7568 self.r(base, inst.c())
7569 };
7570 self.op_index(o, key, base + inst.a())?;
7571 }
7572 Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7573 Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7574 Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7575 Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7576 Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7577 Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7578 Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7579 Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7580 Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7581 Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7582 Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7583 Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7584 Op::Unm => {
7585 let v = self.r(base, inst.b());
7586 match coerce_num(v) {
7587 Some(Num::Int(i)) => {
7588 self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7589 }
7590 Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7591 None => {
7592 let mm = self.get_mm(v, Mm::Unm);
7593 if mm.is_nil() {
7594 return Err(self.type_err("perform arithmetic on", v));
7595 }
7596 let dst = base + inst.a();
7597 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7598 }
7599 }
7600 }
7601 Op::BNot => {
7602 let v = self.r(base, inst.b());
7603 match coerce_num(v) {
7604 Some(n) => {
7605 let i = self.int_from_num(n)?;
7606 self.set_r(base, inst.a(), Value::Int(!i));
7607 }
7608 None => {
7609 let mm = self.get_mm(v, Mm::BNot);
7610 if mm.is_nil() {
7611 return Err(self.type_err("perform bitwise operation on", v));
7612 }
7613 let dst = base + inst.a();
7614 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7615 }
7616 }
7617 }
7618 Op::Not => {
7619 let v = self.r(base, inst.b());
7620 self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7621 }
7622 Op::Len => {
7623 let v = self.r(base, inst.b());
7624 match self.len_step(v)? {
7625 MmOut::Done(r) => self.set_r(base, inst.a(), r),
7626 MmOut::Mm { func, recv } => {
7627 let dst = base + inst.a();
7628 self.begin_meta_call(
7629 func,
7630 &[recv, recv],
7631 MetaAction::Store { dst },
7632 "len",
7633 )?;
7634 }
7635 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7636 }
7637 }
7638 Op::Concat => {
7639 // right-associative fold over operands at base+a .. base+a+n,
7640 // in place on the stack so a yielding __concat can suspend.
7641 let a = inst.a();
7642 let n = inst.b();
7643 self.top = base + a + n;
7644 self.concat_run(base + a)?;
7645 }
7646 Op::Close => {
7647 // Yieldable: drive __close handlers through the
7648 // interpreter loop so a coroutine.yield() inside a
7649 // handler suspends cleanly (locals.lua block-end yield).
7650 // `drive_close` parks the handler call at `self.top`, so
7651 // raise `top` past this frame's full register window
7652 // first — a goto out of a nested for-loop can fire
7653 // OP_Close while `self.top` still sits at the inner
7654 // body's working top, which would let `push_frame`'s
7655 // wipe clobber the outer tbc slot before it could be
7656 // closed (locals.lua:1219 nested-for goto regression).
7657 self.top = self.top.max(base + cl.proto.max_stack as u32);
7658 let _ =
7659 self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7660 }
7661 Op::Tbc => {
7662 self.register_tbc(base + inst.a())?;
7663 }
7664 Op::Jmp => {
7665 let off = inst.sj();
7666 // P12-S1.B — trace JIT back-edge counter. A negative
7667 // jump offset is a loop back-edge (the only canonical
7668 // backward jumps the compiler emits — `while`, `for`,
7669 // `repeat`). Tick the per-Proto counter and, once it
7670 // exceeds the threshold, log a stub promotion that
7671 // S1.C will turn into actual trace recording. The
7672 // whole block is gated on `trace_jit_enabled` so
7673 // existing benches see one branch-not-taken and no
7674 // counter writes.
7675 if self.jit.trace_enabled && off < 0 {
7676 let proto = cl.proto;
7677 let c = proto.trace_hot_count.get();
7678 if c < u32::MAX / 2 {
7679 proto.trace_hot_count.set(c + 1);
7680 }
7681 // P13-S13-H — relaxed back-edge trigger:
7682 // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7683 // a missed crossing (active_trace busy with
7684 // a call-trigger, or the recorder slot
7685 // happened to be in use) doesn't permanently
7686 // lock this back-edge target out. The
7687 // `already_cached` short-circuit prevents
7688 // duplicate recordings: once a trace is
7689 // cached for this target, subsequent
7690 // crossings skip the start. This pairs with
7691 // S13-H's discard-on-partial-coverage close
7692 // handling — when a short call-trigger is
7693 // discarded, the back-edge can still find an
7694 // open slot at the next iteration.
7695 let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7696 // P13-S13-K — gave-up short-circuit. Skip
7697 // the RefCell borrow + scan when the
7698 // S13-I cap force-compiled a partial
7699 // trace on this Proto.
7700 let back_edge_already_cached = if proto.trace_gave_up.get() {
7701 true
7702 } else {
7703 proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7704 };
7705 if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7706 && self.jit.active_trace.is_none()
7707 && !back_edge_already_cached
7708 {
7709 // Back-edge target = pc after `add_pc(off)`,
7710 // i.e. current `pc + 1 + off` (the dispatch
7711 // loop has already advanced f.pc to pc+1).
7712 let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7713 // Snapshot per-slot Value tag at trace
7714 // entry so the lowerer's kind tracker
7715 // knows which arith path to lower
7716 // (iadd vs fadd, etc.).
7717 let max_stack = cl.proto.max_stack as usize;
7718 let base_us = base as usize;
7719 let mut entry_tags = Vec::with_capacity(max_stack);
7720 for i in 0..max_stack {
7721 let (tag, _) = self.stack[base_us + i].unpack();
7722 entry_tags.push(tag);
7723 }
7724 self.jit.active_trace =
7725 Some(Box::new(crate::jit::trace::TraceRecord::start(
7726 cl.proto, target, entry_tags, false,
7727 )));
7728 // P12-S4 — record the frame the trace
7729 // started in. `self.frames.len() - 1`
7730 // since we're inside the currently-running
7731 // Lua frame's dispatch.
7732 self.jit.recording_frame_base = self.frames.len() - 1;
7733 }
7734 }
7735 self.add_pc(off);
7736 }
7737 Op::Eq => {
7738 let l = self.r(base, inst.a());
7739 let r = self.r(base, inst.b());
7740 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7741 if (a == b) != inst.k() {
7742 self.bump_pc();
7743 }
7744 } else {
7745 let step = self.eq_step(l, r);
7746 self.op_compare(step, l, r, inst.k(), "eq")?;
7747 }
7748 }
7749 Op::EqK => {
7750 let l = self.r(base, inst.a());
7751 let r = cl.proto.consts[inst.b() as usize];
7752 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7753 if (a == b) != inst.k() {
7754 self.bump_pc();
7755 }
7756 } else {
7757 let step = self.eq_step(l, r);
7758 self.op_compare(step, l, r, inst.k(), "eq")?;
7759 }
7760 }
7761 Op::Lt => {
7762 let l = self.r(base, inst.a());
7763 let r = self.r(base, inst.b());
7764 // hot path: Int < Int — drops the MmOut + op_compare match
7765 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7766 if (a < b) != inst.k() {
7767 self.bump_pc();
7768 }
7769 } else {
7770 let step = self.less_step(l, r, false)?;
7771 self.op_compare(step, l, r, inst.k(), "lt")?;
7772 }
7773 }
7774 Op::Le => {
7775 let l = self.r(base, inst.a());
7776 let r = self.r(base, inst.b());
7777 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7778 if (a <= b) != inst.k() {
7779 self.bump_pc();
7780 }
7781 } else {
7782 let step = self.less_step(l, r, true)?;
7783 self.op_compare(step, l, r, inst.k(), "le")?;
7784 }
7785 }
7786 Op::Test => {
7787 let cond = self.r(base, inst.a()).truthy();
7788 self.cond_skip(cond, inst.k());
7789 }
7790 Op::TestSet => {
7791 let v = self.r(base, inst.b());
7792 if v.truthy() == inst.k() {
7793 self.set_r(base, inst.a(), v);
7794 } else {
7795 self.bump_pc();
7796 }
7797 }
7798 Op::Call => {
7799 let abs = base + inst.a();
7800 let nargs = if inst.b() == 0 {
7801 None
7802 } else {
7803 Some(inst.b() - 1)
7804 };
7805 let wanted = inst.c() as i32 - 1;
7806 self.begin_call(abs, nargs, wanted, false)?;
7807 }
7808 Op::TailCall => {
7809 let fr = *self.top_frame();
7810 let abs = base + inst.a();
7811 let mut nargs = if inst.b() == 0 {
7812 self.top - (abs + 1)
7813 } else {
7814 inst.b() - 1
7815 };
7816 // A tail call pops this frame before begin_call, so a
7817 // non-callable target would lose its name/position. Report
7818 // it now (PUC reads funcname from the still-current ci),
7819 // while the frame is intact, for "(field 'x')"-style info.
7820 let mut func = self.stack[abs as usize];
7821 if !matches!(func, Value::Closure(_) | Value::Native(_))
7822 && self.get_mm(func, Mm::Call).is_nil()
7823 {
7824 return Err(self.call_err(func));
7825 }
7826 // PUC `luaD_pretailcall` resolves a chain of `__call`
7827 // metamethods *in place* before deciding whether to
7828 // collapse this frame. Without that, each __call hop
7829 // would push a fresh Lua frame and a 10000-deep
7830 // tail-recursion through a 100-deep __call chain
7831 // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7832 // shift args right, install the handler at `abs`, retry.
7833 // Chain depth limit matches the call-site `begin_call`
7834 // version cap (5.5 calls.lua :223 — 15 max, then "too
7835 // long"; 16th wrap fails the call). An infinite
7836 // self-referential `__call` would otherwise spin.
7837 let chain_cap = if self.version >= LuaVersion::Lua55 {
7838 15
7839 } else {
7840 MAX_CCMT
7841 };
7842 let mut chain = 0u32;
7843 while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7844 let mm = self.get_mm(func, Mm::Call);
7845 if mm.is_nil() {
7846 return Err(self.call_err(func));
7847 }
7848 chain += 1;
7849 if chain > chain_cap {
7850 return Err(self.rt_err("'__call' chain too long"));
7851 }
7852 let end = (abs + 1 + nargs) as usize;
7853 if self.stack.len() < end + 1 {
7854 self.stack.resize(end + 1, Value::Nil);
7855 }
7856 for i in (0..=nargs).rev() {
7857 self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7858 }
7859 self.stack[abs as usize] = mm;
7860 nargs += 1;
7861 self.top = abs + 1 + nargs;
7862 func = mm;
7863 }
7864 // PUC's tail-call collapse is Lua→Lua only. A tail call to
7865 // a C function runs the C function under the *current* Lua
7866 // activation (no frame fold — a C frame has nothing to
7867 // collapse into); after the C function returns, the
7868 // calling Lua function returns those results normally.
7869 // Mirror that: keep our Lua frame on the stack, call the
7870 // target through `begin_call(abs, …)` as a regular call,
7871 // and let the fallback `Op::Return` that the compiler
7872 // emits right after `Op::TailCall` forward the results.
7873 // 5.1 closure.lua :177's `return getfenv()` from inside
7874 // foo needs level 1 to resolve to foo, not to the
7875 // thread's globals fallback that happens when no Lua
7876 // frame is on the stack.
7877 let lua_target = matches!(func, Value::Closure(_));
7878 if lua_target {
7879 self.close_slots(fr.base, None)?;
7880 for i in 0..=nargs {
7881 self.stack[(fr.func_slot + i) as usize] =
7882 self.stack[(abs + i) as usize];
7883 }
7884 // PUC `CIST_TAIL`: the new Lua activation inherits
7885 // the popped frame's tailcalls count plus one for
7886 // this collapse. 5.1 db.lua :372 hammers 30000
7887 // recursive tail calls and expects to see the
7888 // synthetic tail level for every one of them.
7889 self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7890 frames_pop_sync(&mut self.frames, &mut self.frames_top);
7891 if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7892 && self.frames.len() < entry_depth
7893 {
7894 // a native completed what was this function's result
7895 return Ok(self.take_results(fr.func_slot));
7896 }
7897 } else {
7898 // Native (or __call-bearing) target: regular call. The
7899 // results land at `abs..self.top` and the next op (the
7900 // fallback `Op::Return`) forwards them. `wanted = -1`
7901 // because the caller will multret them through Return.
7902 self.begin_call(abs, Some(nargs), -1, false)?;
7903 }
7904 }
7905 Op::Return | Op::Return0 | Op::Return1 => {
7906 let (abs_a, nret) = match inst.op() {
7907 Op::Return0 => (base, 0),
7908 Op::Return1 => (base + inst.a(), 1),
7909 _ => {
7910 let abs_a = base + inst.a();
7911 let nret = if inst.b() == 0 {
7912 self.top - abs_a
7913 } else {
7914 inst.b() - 1
7915 };
7916 (abs_a, nret)
7917 }
7918 };
7919 // close before moving results: __close handlers run above
7920 // the stack top, so the result region [abs_a..abs_a+nret)
7921 // stays intact across any yields the close performs.
7922 // Fixed-count returns may leave `self.top` below the last
7923 // result slot (the compiler does not always re-bump it);
7924 // raise it past the result region so `drive_close` parks
7925 // the handler call *above* — landing at `self.top` would
7926 // otherwise clobber a result with the handler closure.
7927 self.top = self.top.max(abs_a + nret);
7928 if let Some(vals) = self.begin_close(
7929 base,
7930 None,
7931 AfterClose::Return {
7932 abs_a,
7933 nret,
7934 from_native: false,
7935 },
7936 entry_depth,
7937 )? {
7938 return Ok(vals);
7939 }
7940 }
7941 Op::ForPrep => self.for_prep(inst, base)?,
7942 Op::ForLoop => {
7943 // P12 — trace JIT back-edge counter on the
7944 // numeric-for back-edge. ForLoop is always at
7945 // a back-edge position (when it continues);
7946 // for the trace recorder we treat it as the
7947 // close-detection equivalent of `Op::Jmp` with
7948 // negative offset. Counter only ticks when the
7949 // back-edge will actually fire (count > 0 in
7950 // the 5.4+ Int form, comparable predicates in
7951 // pre-5.3 / Float). The cheap check up front
7952 // matches the for_loop helper's branch.
7953 if self.jit.trace_enabled {
7954 let a = inst.a();
7955 let pre53 = self.version() <= LuaVersion::Lua53;
7956 let take_back_edge =
7957 match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
7958 (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
7959 count > 0
7960 }
7961 (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
7962 let next = cur.wrapping_add(st);
7963 if st > 0 { next <= lim } else { next >= lim }
7964 }
7965 (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
7966 let next = cur + st;
7967 if st > 0.0 { next <= lim } else { next >= lim }
7968 }
7969 _ => false,
7970 };
7971 if take_back_edge {
7972 let proto = cl.proto;
7973 let c = proto.trace_hot_count.get();
7974 if c < u32::MAX / 2 {
7975 proto.trace_hot_count.set(c + 1);
7976 }
7977 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7978 && self.jit.active_trace.is_none()
7979 {
7980 // ForLoop's back-edge target = pc
7981 // after `add_pc(-bx)` runs from the
7982 // already-bumped f.pc (= pc + 1).
7983 // So target = (pc + 1) - bx.
7984 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
7985 let max_stack = cl.proto.max_stack as usize;
7986 let base_us = base as usize;
7987 let mut entry_tags = Vec::with_capacity(max_stack);
7988 for i in 0..max_stack {
7989 let (tag, _) = self.stack[base_us + i].unpack();
7990 entry_tags.push(tag);
7991 }
7992 self.jit.active_trace =
7993 Some(Box::new(crate::jit::trace::TraceRecord::start(
7994 cl.proto, target, entry_tags, false,
7995 )));
7996 // P12-S4 — record the frame the trace
7997 // started in. The currently-running
7998 // Lua frame is at len() - 1.
7999 self.jit.recording_frame_base = self.frames.len() - 1;
8000 }
8001 }
8002 }
8003 self.for_loop(inst, base);
8004 }
8005 Op::TForPrep => {
8006 // the 4th control slot is the iterator's closing value
8007 self.register_tbc(base + inst.a() + 3)?;
8008 self.add_pc(inst.bx() as i32);
8009 }
8010 Op::TForCall => {
8011 let abs = base + inst.a();
8012 let need = (abs + 7) as usize;
8013 if self.stack.len() < need {
8014 self.stack.resize(need, Value::Nil);
8015 }
8016 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8017 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8018 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8019 let nvars = inst.c() as i32;
8020 self.begin_call(abs + 4, Some(2), nvars, false)?;
8021 }
8022 Op::TForLoop => {
8023 let a = inst.a();
8024 let ctrl = self.r(base, a + 4);
8025 if !ctrl.is_nil() {
8026 // P12-S12-B v1 — trace JIT back-edge counter on
8027 // generic-for back-edge. TForLoop sits at the
8028 // tail of `for k,v in expr do ... end`; recorder
8029 // treats it as the close-detection equivalent of
8030 // a negative Op::Jmp. Gate on `take_back_edge`
8031 // (= `ctrl != nil`) so empty-iter loops don't
8032 // pollute hot_count. v1 only adds the trigger;
8033 // whitelist + helper + emit live in v2.
8034 if self.jit.trace_enabled {
8035 let proto = cl.proto;
8036 let c = proto.trace_hot_count.get();
8037 if c < u32::MAX / 2 {
8038 proto.trace_hot_count.set(c + 1);
8039 }
8040 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8041 && self.jit.active_trace.is_none()
8042 {
8043 // TForLoop back-edge target = pc after
8044 // `add_pc(-bx)` runs from the already-
8045 // bumped f.pc (= pc + 1). So target =
8046 // (pc + 1) - bx, normally landing on
8047 // body_top (the op right after TForPrep).
8048 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8049 let max_stack = cl.proto.max_stack as usize;
8050 let base_us = base as usize;
8051 let mut entry_tags = Vec::with_capacity(max_stack);
8052 for i in 0..max_stack {
8053 let (tag, _) = self.stack[base_us + i].unpack();
8054 entry_tags.push(tag);
8055 }
8056 // P12-S12-B-v5 — snapshot the iter
8057 // fn's address if Native, so the
8058 // lowerer can specialise ipairs into
8059 // inline Table aget IR.
8060 let iter_ptr =
8061 if let Value::Native(n) = self.stack[base_us + a as usize] {
8062 Some(n.f as usize)
8063 } else {
8064 None
8065 };
8066 // P12-S12-C v3 — snapshot R[A+5]'s
8067 // tag (= current iter's val from
8068 // the just-fired TForCall). The v5
8069 // inline aget fast_blk emits a
8070 // runtime guard against this tag;
8071 // mixed-tag arrays deopt rather
8072 // than producing garbage pointers
8073 // through the v2 spill path.
8074 let val_slot = base_us + (a as usize) + 5;
8075 let val_tag = if val_slot < self.stack.len() {
8076 Some(self.stack[val_slot].unpack().0)
8077 } else {
8078 None
8079 };
8080 let mut rec = crate::jit::trace::TraceRecord::start(
8081 cl.proto, target, entry_tags, false,
8082 );
8083 rec.tfor_iter_ptr = iter_ptr;
8084 rec.tfor_val_tag = val_tag;
8085 self.jit.active_trace = Some(Box::new(rec));
8086 self.jit.recording_frame_base = self.frames.len() - 1;
8087 }
8088 }
8089 self.set_r(base, a + 2, ctrl);
8090 self.add_pc(-(inst.bx() as i32));
8091 }
8092 }
8093 Op::Closure => {
8094 let proto = cl.proto.protos[inst.bx() as usize];
8095 let n_ups = proto.upvals.len();
8096 // P11-S5d.M — build upvals on the stack for small
8097 // closures, skipping the per-call Vec/Box alloc
8098 // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8099 // = 2 covers most Lua source (1 captured local, or
8100 // _ENV + a single capture). Beyond that, fall back
8101 // to a heap Vec.
8102 use crate::runtime::function::INLINE_UPVALS_N;
8103 let mut stack_buf: [std::mem::MaybeUninit<
8104 Gc<crate::runtime::function::Upvalue>,
8105 >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8106 let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8107 let use_inline = n_ups <= INLINE_UPVALS_N;
8108 if !use_inline {
8109 heap_buf.reserve_exact(n_ups);
8110 }
8111 for (i, d) in proto.upvals.iter().enumerate() {
8112 let uv = if d.in_stack {
8113 self.find_or_create_upval(base + d.index as u32)
8114 } else {
8115 cl.upvals()[d.index as usize]
8116 };
8117 if use_inline {
8118 stack_buf[i] = std::mem::MaybeUninit::new(uv);
8119 } else {
8120 heap_buf.push(uv);
8121 }
8122 }
8123 // Tiny shim around the two paths so the 5.1 _ENV
8124 // clone + cache check below see one uniform
8125 // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8126 // into the local frame (still valid through the
8127 // rest of this Op::Closure handler).
8128 let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8129 // SAFETY: the first n_ups slots of stack_buf
8130 // were initialised above; we hand out a slice
8131 // covering exactly them.
8132 unsafe {
8133 std::slice::from_raw_parts_mut(
8134 stack_buf.as_mut_ptr()
8135 as *mut Gc<crate::runtime::function::Upvalue>,
8136 n_ups,
8137 )
8138 }
8139 } else {
8140 &mut heap_buf[..]
8141 };
8142 // PUC 5.1 had per-function environments: every Lua
8143 // function carried its own `env` slot, snapshotted from
8144 // the creating function's env at closure time, so a
8145 // `setfenv` on one closure never bled into a sibling.
8146 // luna models that by giving the 5.1 closure a *fresh*
8147 // closed upvalue for whichever cell holds `_ENV`, seeded
8148 // from the parent's current env value. Only that cell is
8149 // cloned — every other upvalue keeps its open/shared
8150 // identity (so e.g. `local function range(...) ...
8151 // range(...) ... end` still sees its self-reference). 5.2+
8152 // keeps the shared-upval model (and the proto cache that
8153 // depends on it).
8154 let v51 = self.version() <= LuaVersion::Lua51;
8155 if v51 && proto.env_upval_idx != u8::MAX {
8156 let i = proto.env_upval_idx as usize;
8157 let cur = match ups[i].state() {
8158 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8159 UpvalState::Closed(v) => v,
8160 };
8161 ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8162 }
8163 let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8164 // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8165 // and reuses it when every fresh-upvalue binding still
8166 // points to the same Upvalue object as the cached one.
8167 // That keeps `function() return outer end` repeated in a
8168 // loop comparing equal across iterations (the captured
8169 // outer is a shared open upvalue), while `function()
8170 // return loop_var end` gets a fresh closure each round
8171 // because the loop var is re-created per iteration. PUC
8172 // 5.1 predated the cache, and the per-closure `_ENV`
8173 // clone above would defeat it anyway, so skip it.
8174 let nc = if v51 {
8175 self.heap.new_closure_inline(proto, ups_slice)
8176 } else {
8177 let cached = proto.cache.get().filter(|c| {
8178 c.upvals().len() == ups_slice.len()
8179 && c.upvals()
8180 .iter()
8181 .zip(ups_slice.iter())
8182 .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8183 });
8184 match cached {
8185 Some(c) => c,
8186 None => {
8187 let n = self.heap.new_closure_inline(proto, ups_slice);
8188 proto.cache.set(Some(n));
8189 n
8190 }
8191 }
8192 };
8193 self.set_r(base, inst.a(), Value::Closure(nc));
8194 self.maybe_collect_garbage(base + inst.a() + 1);
8195 }
8196 Op::Vararg => {
8197 let abs_a = base + inst.a();
8198 let wanted = inst.c() as i32 - 1;
8199 // A materialized named vararg lives in func_slot (its writes
8200 // must be visible to `...`); otherwise spread the extra args
8201 // straight off the stack at func_slot+1 .. +n_varargs.
8202 let vt = match self.stack[func_slot as usize] {
8203 Value::Table(t) => Some(t),
8204 _ => None,
8205 };
8206 let n = match vt {
8207 Some(t) => {
8208 let n_key = Value::Str(self.heap.intern(b"n"));
8209 // PUC getnumargs: a named vararg `t.n` set out of the
8210 // integer range [0, INT_MAX/2] is rejected here
8211 match t.get(n_key) {
8212 Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8213 _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8214 }
8215 }
8216 None => n_varargs,
8217 };
8218 let count = if wanted < 0 { n } else { wanted as u32 };
8219 let need = (abs_a + count) as usize;
8220 if self.stack.len() < need {
8221 self.stack.resize(need, Value::Nil);
8222 }
8223 for i in 0..count {
8224 let v = if i >= n {
8225 Value::Nil
8226 } else if let Some(t) = vt {
8227 t.get_int(i as i64 + 1)
8228 } else {
8229 self.stack[(func_slot + 1 + i) as usize]
8230 };
8231 self.stack[(abs_a + i) as usize] = v;
8232 }
8233 if wanted < 0 {
8234 self.top = abs_a + count;
8235 }
8236 }
8237 Op::GetVarg => {
8238 // materialize the vararg table (PUC table.pack shape) from the
8239 // stack varargs — used when the named vararg is written /
8240 // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8241 // sees later writes) and in the local register R[A].
8242 let n = n_varargs;
8243 let t = self.heap.new_table();
8244 {
8245 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8246 let tm = unsafe { t.as_mut() };
8247 for i in 0..n {
8248 let _ = tm.set_int(
8249 &mut self.heap,
8250 i as i64 + 1,
8251 self.stack[(func_slot + 1 + i) as usize],
8252 );
8253 }
8254 }
8255 let n_key = Value::Str(self.heap.intern(b"n"));
8256 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8257 unsafe { t.as_mut() }
8258 .set(&mut self.heap, n_key, Value::Int(n as i64))
8259 .expect("'n' is a valid key");
8260 // once-per-table barrier (mirror SETLIST): t is born BLACK
8261 // during Propagate; the bulk inserts above don't barrier.
8262 self.heap
8263 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8264 self.stack[func_slot as usize] = Value::Table(t);
8265 self.set_r(base, inst.a(), Value::Table(t));
8266 }
8267 Op::VargIdx => {
8268 // R[A] := vararg[R[C]] without allocating: integer key in
8269 // [1,n] → that vararg, "n" → the count, else nil.
8270 let key = self.r(base, inst.c());
8271 let n = n_varargs;
8272 let v = match key {
8273 Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8274 self.stack[(func_slot + k as u32) as usize]
8275 }
8276 Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8277 self.stack[(func_slot + f as u32) as usize]
8278 }
8279 Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8280 _ => Value::Nil,
8281 };
8282 self.set_r(base, inst.a(), v);
8283 }
8284 Op::ErrNNil => {
8285 let v = self.r(base, inst.a());
8286 if !matches!(v, Value::Nil) {
8287 let bx = inst.bx();
8288 let name = if bx == 0 {
8289 "?".to_string()
8290 } else {
8291 match cl.proto.consts[(bx - 1) as usize] {
8292 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8293 _ => "?".to_string(),
8294 }
8295 };
8296 return Err(self.rt_err(&format!("global '{name}' already defined")));
8297 }
8298 }
8299 Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8300 }
8301 }
8302 }
8303
8304 #[inline(always)]
8305 fn pc_of_top(&self) -> u32 {
8306 self.top_frame().pc
8307 }
8308
8309 #[inline(always)]
8310 fn bump_pc(&mut self) {
8311 // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8312 // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8313 // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8314 // up over `fib_28`'s ~500k jumps.
8315 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8316 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8317 CallFrame::Lua(f) => f.pc += 1,
8318 _ => unreachable!("Cont frame at bump_pc"),
8319 }
8320 }
8321
8322 #[inline(always)]
8323 fn add_pc(&mut self, d: i32) {
8324 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8325 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8326 CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8327 _ => unreachable!("Cont frame at add_pc"),
8328 }
8329 }
8330
8331 /// PUC conditional-skip convention: the JMP that follows is executed when
8332 /// `cond == k`; otherwise it is skipped.
8333 #[inline(always)]
8334 fn cond_skip(&mut self, cond: bool, k: bool) {
8335 if cond != k {
8336 self.bump_pc();
8337 }
8338 }
8339
8340 // ---- indexing (with __index/__newindex chains) ----
8341
8342 /// The `#` length operation: string byte length, `__len` if present, else
8343 /// the raw table border. Returns the raw length value (may be non-integer
8344 /// when `__len` is exotic).
8345 pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8346 match self.len_step(v)? {
8347 MmOut::Done(n) => Ok(n),
8348 // PUC calls unary metamethods with the operand twice
8349 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8350 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8351 }
8352 }
8353
8354 /// Length fast path: a string's byte count or a table's raw border when no
8355 /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8356 /// called with the operand twice. Errors for a non-table with no `__len`.
8357 fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8358 match v {
8359 Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8360 Value::Table(t) => {
8361 let mm = self.get_mm(v, Mm::Len);
8362 if mm.is_nil() {
8363 Ok(MmOut::Done(Value::Int(t.len())))
8364 } else {
8365 Ok(MmOut::Mm { func: mm, recv: v })
8366 }
8367 }
8368 _ => {
8369 let mm = self.get_mm(v, Mm::Len);
8370 if mm.is_nil() {
8371 Err(self.type_err("get length of", v))
8372 } else {
8373 Ok(MmOut::Mm { func: mm, recv: v })
8374 }
8375 }
8376 }
8377 }
8378
8379 /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8380 /// value with no integer representation.
8381 pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8382 match self.len_value(v)? {
8383 Value::Int(i) => Ok(i),
8384 Value::Float(f) => crate::runtime::value::f2i_exact(f)
8385 .ok_or_else(|| self.rt_err("object length is not an integer")),
8386 _ => Err(self.rt_err("object length is not an integer")),
8387 }
8388 }
8389
8390 pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8391 match self.index_step(t, key)? {
8392 MmOut::Done(v) => Ok(v),
8393 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8394 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8395 }
8396 }
8397
8398 /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8399 /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8400 /// are followed inline (no yield possible); only a function link can yield.
8401 fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8402 let mut cur = t;
8403 for _ in 0..MAX_TAG_LOOP {
8404 let mm = match cur {
8405 Value::Table(tb) => {
8406 let v = tb.get(key);
8407 if !v.is_nil() {
8408 return Ok(MmOut::Done(v));
8409 }
8410 let mm = self.get_mm(cur, Mm::Index);
8411 if mm.is_nil() {
8412 return Ok(MmOut::Done(Value::Nil));
8413 }
8414 mm
8415 }
8416 v => {
8417 let mm = self.get_mm(v, Mm::Index);
8418 if mm.is_nil() {
8419 return Err(self.type_err("index", v));
8420 }
8421 mm
8422 }
8423 };
8424 match mm {
8425 Value::Closure(_) | Value::Native(_) => {
8426 return Ok(MmOut::Mm {
8427 func: mm,
8428 recv: cur,
8429 });
8430 }
8431 next => cur = next,
8432 }
8433 }
8434 Err(self.rt_err("'__index' chain too long; possible loop"))
8435 }
8436
8437 pub(crate) fn newindex_value(
8438 &mut self,
8439 t: Value,
8440 key: Value,
8441 v: Value,
8442 ) -> Result<(), LuaError> {
8443 match self.newindex_step(t, key, v)? {
8444 MmOut::Done(_) => Ok(()),
8445 MmOut::Mm { func, recv } => {
8446 self.call_value(func, &[recv, key, v])?;
8447 Ok(())
8448 }
8449 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8450 }
8451 }
8452
8453 /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8454 /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8455 /// needs an actual call — which the caller may run yieldably.
8456 fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8457 let mut cur = t;
8458 for _ in 0..MAX_TAG_LOOP {
8459 let mm = match cur {
8460 Value::Table(tb) => {
8461 // PI-A3 single-walk collapse — Table::try_set_existing
8462 // fuses the prior `tb.get(key).is_nil()` gate and
8463 // `raw_set` walk into one chain traversal when the
8464 // key is already present with a non-nil value. The
8465 // __newindex chain semantics are preserved by the
8466 // identity (slot_nil ⇔ fire_newindex); see
8467 // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8468 //
8469 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8470 // heap is single-threaded and the pointer is live as
8471 // long as it is reachable from active roots (see
8472 // heap.rs:5-7). Mirrors the raw_set wrapper below.
8473 if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8474 self.heap
8475 .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8476 return Ok(MmOut::Done(Value::Nil));
8477 }
8478 let mm = self.get_mm(cur, Mm::NewIndex);
8479 if mm.is_nil() {
8480 self.raw_set(tb, key, v)?;
8481 return Ok(MmOut::Done(Value::Nil));
8482 }
8483 mm
8484 }
8485 bad => {
8486 let mm = self.get_mm(bad, Mm::NewIndex);
8487 if mm.is_nil() {
8488 return Err(self.type_err("index", bad));
8489 }
8490 mm
8491 }
8492 };
8493 match mm {
8494 Value::Closure(_) | Value::Native(_) => {
8495 return Ok(MmOut::Mm {
8496 func: mm,
8497 recv: cur,
8498 });
8499 }
8500 next => cur = next,
8501 }
8502 }
8503 Err(self.rt_err("'__newindex' chain too long; possible loop"))
8504 }
8505
8506 fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8507 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8508 match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8509 Ok(()) => {
8510 self.heap
8511 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8512 Ok(())
8513 }
8514 Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8515 Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8516 Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8517 Err(TableError::InvalidNext) => unreachable!(),
8518 }
8519 }
8520
8521 /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8522 /// the boolean result; `Mm` (when raw equality fails and both are tables
8523 /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8524 fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8525 if l.raw_eq(r) {
8526 return MmOut::Done(Value::Bool(true));
8527 }
8528 if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8529 (l, r)
8530 {
8531 // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8532 // (and earlier) required the two operands' metatables to expose a
8533 // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8534 // metatable falls straight back to raw inequality. events.lua 5.1
8535 // :262 bakes this in.
8536 let mm = if self.version() <= LuaVersion::Lua51 {
8537 self.get_comp_mm(l, r, Mm::Eq)
8538 } else {
8539 let mut m = self.get_mm(l, Mm::Eq);
8540 if m.is_nil() {
8541 m = self.get_mm(r, Mm::Eq);
8542 }
8543 m
8544 };
8545 if !mm.is_nil() {
8546 return MmOut::Mm { func: mm, recv: l };
8547 }
8548 }
8549 MmOut::Done(Value::Bool(false))
8550 }
8551
8552 // ---- arithmetic ----
8553
8554 #[inline(always)]
8555 fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8556 let l = self.r(base, inst.b());
8557 let r = self.r(base, inst.c());
8558 // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8559 // binary_trees all hammer these. Skipping coerce_num + the big
8560 // arith_fast match shaves several conditional moves per op.
8561 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8562 let fast = match op {
8563 ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8564 ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8565 ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8566 _ => None,
8567 };
8568 if let Some(v) = fast {
8569 self.set_r(base, inst.a(), v);
8570 return Ok(());
8571 }
8572 }
8573 // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8574 // and any numeric workload with non-integer accumulators benefits.
8575 if let (Value::Float(a), Value::Float(b)) = (l, r) {
8576 let fast = match op {
8577 ArithOp::Add => Some(Value::Float(a + b)),
8578 ArithOp::Sub => Some(Value::Float(a - b)),
8579 ArithOp::Mul => Some(Value::Float(a * b)),
8580 ArithOp::Div => Some(Value::Float(a / b)),
8581 _ => None,
8582 };
8583 if let Some(v) = fast {
8584 self.set_r(base, inst.a(), v);
8585 return Ok(());
8586 }
8587 }
8588 match self.arith_fast(op, l, r)? {
8589 Some(v) => self.set_r(base, inst.a(), v),
8590 None => {
8591 let mm = self.arith_mm_func(op, l, r)?;
8592 let dst = base + inst.a();
8593 self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8594 }
8595 }
8596 Ok(())
8597 }
8598
8599 /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8600 /// directly, `Ok(None)` when a metamethod is required (the caller decides
8601 /// whether to call it synchronously or yieldably).
8602 fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8603 use ArithOp::*;
8604 match op {
8605 BAnd | BOr | BXor | Shl | Shr => {
8606 // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8607 match (coerce_num(l), coerce_num(r)) {
8608 (Some(a), Some(b)) => {
8609 let to_int = |n: Num| match n {
8610 Num::Int(i) => Some(i),
8611 Num::Float(f) => crate::runtime::value::f2i_exact(f),
8612 };
8613 let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8614 // PUC luaG_tointerror: name the offending operand
8615 return Err(self.no_int_rep_err());
8616 };
8617 let v = match op {
8618 BAnd => a & b,
8619 BOr => a | b,
8620 BXor => a ^ b,
8621 Shl => shift_left(a, b),
8622 Shr => shift_left(a, b.wrapping_neg()),
8623 _ => unreachable!(),
8624 };
8625 return Ok(Some(Value::Int(v)));
8626 }
8627 _ => return Ok(None),
8628 }
8629 }
8630 _ => {}
8631 }
8632 let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8633 (Some(a), Some(b)) => (a, b),
8634 _ => return Ok(None),
8635 };
8636 let v = match (op, ln, rn) {
8637 (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8638 (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8639 (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8640 (IDiv, Num::Int(a), Num::Int(b)) => {
8641 if b == 0 {
8642 return Err(self.rt_err("attempt to divide by zero"));
8643 }
8644 let mut q = a.wrapping_div(b);
8645 if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8646 q -= 1;
8647 }
8648 Value::Int(q)
8649 }
8650 (Mod, Num::Int(a), Num::Int(b)) => {
8651 if b == 0 {
8652 return Err(self.rt_err("attempt to perform 'n%0'"));
8653 }
8654 let mut m = a.wrapping_rem(b);
8655 if m != 0 && (m ^ b) < 0 {
8656 m += b;
8657 }
8658 Value::Int(m)
8659 }
8660 (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8661 (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8662 (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8663 (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8664 (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8665 (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8666 (Mod, a, b) => {
8667 let (x, y) = (a.as_f64(), b.as_f64());
8668 // PUC luai_nummod: correct fmod's sign without the `m*y`
8669 // product, which underflows to 0 for tiny denormals
8670 let mut m = x % y;
8671 if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8672 m += y;
8673 }
8674 Value::Float(m)
8675 }
8676 _ => unreachable!(),
8677 };
8678 Ok(Some(v))
8679 }
8680
8681 pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8682 match v {
8683 Value::Int(i) => Ok(i),
8684 Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8685 Some(i) => Ok(i),
8686 None => Err(self.rt_err("number has no integer representation")),
8687 },
8688 v => Err(self.type_err(what, v)),
8689 }
8690 }
8691
8692 fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8693 match n {
8694 Num::Int(i) => Ok(i),
8695 Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8696 Some(i) => Ok(i),
8697 None => Err(self.rt_err("number has no integer representation")),
8698 },
8699 }
8700 }
8701
8702 /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8703 /// PUC type error when neither operand provides one.
8704 fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8705 use ArithOp::*;
8706 let event = match op {
8707 Add => Mm::Add,
8708 Sub => Mm::Sub,
8709 Mul => Mm::Mul,
8710 Div => Mm::Div,
8711 Mod => Mm::Mod,
8712 Pow => Mm::Pow,
8713 IDiv => Mm::IDiv,
8714 BAnd => Mm::BAnd,
8715 BOr => Mm::BOr,
8716 BXor => Mm::BXor,
8717 Shl => Mm::Shl,
8718 Shr => Mm::Shr,
8719 };
8720 let mut mm = self.get_mm(l, event);
8721 if mm.is_nil() {
8722 mm = self.get_mm(r, event);
8723 }
8724 if mm.is_nil() {
8725 let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8726 "perform bitwise operation on"
8727 } else {
8728 "perform arithmetic on"
8729 };
8730 let bad = if coerce_num(l).is_none() { l } else { r };
8731 return Err(self.type_err(what, bad));
8732 }
8733 Ok(mm)
8734 }
8735
8736 // ---- comparison ----
8737
8738 pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8739 match self.less_step(l, r, or_eq)? {
8740 MmOut::Done(v) => Ok(v.truthy()),
8741 MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8742 MmOut::CompareSynth { func } => {
8743 // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8744 // by library code (sort comparator etc.) — no yield expected
8745 // here (a yield would have hit `call_noyield`'s C boundary).
8746 Ok(!self.call_mm1(func, &[r, l])?.truthy())
8747 }
8748 }
8749 }
8750
8751 /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8752 /// carries the boolean result; `Mm` (for non-number/string operands) carries
8753 /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8754 /// neither operand provides one.
8755 fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8756 let b = match (l, r) {
8757 (Value::Int(a), Value::Int(b)) => {
8758 if or_eq {
8759 a <= b
8760 } else {
8761 a < b
8762 }
8763 }
8764 (Value::Float(a), Value::Float(b)) => {
8765 if or_eq {
8766 a <= b
8767 } else {
8768 a < b
8769 }
8770 }
8771 (Value::Int(a), Value::Float(b)) => {
8772 if or_eq {
8773 int_le_float(a, b)
8774 } else {
8775 int_lt_float(a, b)
8776 }
8777 }
8778 (Value::Float(a), Value::Int(b)) => {
8779 if a.is_nan() {
8780 false
8781 } else if or_eq {
8782 !int_lt_float(b, a)
8783 } else {
8784 !int_le_float(b, a)
8785 }
8786 }
8787 (Value::Str(a), Value::Str(b)) => {
8788 let (a, b) = (a.as_bytes(), b.as_bytes());
8789 if or_eq { a <= b } else { a < b }
8790 }
8791 (l, r) => {
8792 let event = if or_eq { Mm::Le } else { Mm::Lt };
8793 // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8794 // too: both operands' metatables must expose the same
8795 // implementation for `__lt` / `__le` to fire. events.lua 5.1
8796 // :262 expects `c < d` (where `d` has no metatable) to error
8797 // with the default "attempt to compare two table values"
8798 // rather than running c's `__lt` blindly.
8799 let mm = if self.version() <= LuaVersion::Lua51 {
8800 self.get_comp_mm(l, r, event)
8801 } else {
8802 let mut m = self.get_mm(l, event);
8803 if m.is_nil() {
8804 m = self.get_mm(r, event);
8805 }
8806 m
8807 };
8808 // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8809 // operand carries `__le`. 5.4 dropped the synthesis (now
8810 // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8811 // on the synthesis — its metatable defines only `__lt`.
8812 // The fallback calls `__lt(r, l)` synchronously (the suite's
8813 // `__lt` doesn't yield) and negates the result; the yieldable
8814 // `__lt` path stays reserved for the explicit `<` operator.
8815 if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8816 let lt = Mm::Lt;
8817 let mut mm_lt = self.get_mm(l, lt);
8818 if mm_lt.is_nil() {
8819 mm_lt = self.get_mm(r, lt);
8820 }
8821 if !mm_lt.is_nil() {
8822 return Ok(MmOut::CompareSynth { func: mm_lt });
8823 }
8824 }
8825 if mm.is_nil() {
8826 // PUC luaG_ordererror: "two X values" when the operand
8827 // types match, "X with Y" otherwise (objtypename-aware).
8828 let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8829 return Err(self.rt_err(&if t1 == t2 {
8830 format!("attempt to compare two {t1} values")
8831 } else {
8832 format!("attempt to compare {t1} with {t2}")
8833 }));
8834 }
8835 return Ok(MmOut::Mm { func: mm, recv: l });
8836 }
8837 };
8838 Ok(MmOut::Done(Value::Bool(b)))
8839 }
8840
8841 // ---- numeric for ----
8842
8843 fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8844 let a = inst.a();
8845 let init = self.r(base, a);
8846 let limit = self.r(base, a + 1);
8847 let step = self.r(base, a + 2);
8848 let (Some(init_n), Some(limit_n), Some(step_n)) =
8849 (as_num(init), as_num(limit), as_num(step))
8850 else {
8851 // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8852 // PUC checks limit, then step, then initial value.
8853 let (what, bad) = if as_num(limit).is_none() {
8854 ("limit", limit)
8855 } else if as_num(step).is_none() {
8856 ("step", step)
8857 } else {
8858 ("initial value", init)
8859 };
8860 let tn = self.obj_typename(bad);
8861 return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8862 };
8863 // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8864 // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8865 // first test, so each successful iteration emits a backward `OP_FORLOOP`
8866 // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8867 // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8868 // distance in luna's encoding is `loop_pc - prep_pc`; firing
8869 // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8870 let pre53 = self.version() <= LuaVersion::Lua53;
8871 match (init_n, step_n) {
8872 (Num::Int(i0), Num::Int(st)) => {
8873 if st == 0 {
8874 return Err(self.rt_err("'for' step is zero"));
8875 }
8876 if pre53 {
8877 // PUC 5.3 `forlimit`: int limit passes through; float limit
8878 // gets clamped to MIN/MAX with a `stopnow` flag set only
8879 // when the clamp is unreachable (positive float with a
8880 // negative step → limit=MAX, stopnow; negative float with
8881 // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8882 // `init = 0` so OP_FORLOOP's first test against the
8883 // unreachable clamp fails cleanly. An ordinary in-range
8884 // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8885 // lets OP_FORLOOP's natural test reject the first step.
8886 let (lim, stopnow) = match limit_n {
8887 Num::Int(l) => (l, false),
8888 Num::Float(f) => {
8889 if f.is_nan() {
8890 (0, true)
8891 } else if f >= i64::MAX as f64 + 1.0 {
8892 // beyond +MAX: unreachable for a decreasing loop
8893 (i64::MAX, st < 0)
8894 } else if f <= i64::MIN as f64 {
8895 // beyond -MIN: unreachable for an increasing loop
8896 (i64::MIN, st >= 0)
8897 } else if st > 0 {
8898 (f.floor() as i64, false)
8899 } else {
8900 (f.ceil() as i64, false)
8901 }
8902 }
8903 };
8904 let initv = if stopnow { 0 } else { i0 };
8905 let pre = initv.wrapping_sub(st);
8906 self.set_r(base, a, Value::Int(pre));
8907 self.set_r(base, a + 1, Value::Int(lim));
8908 self.set_r(base, a + 2, Value::Int(st));
8909 self.add_pc(inst.bx() as i32 - 1);
8910 return Ok(());
8911 }
8912 let (lim, empty) = int_for_limit(limit_n, i0, st);
8913 if empty {
8914 self.add_pc(inst.bx() as i32);
8915 return Ok(());
8916 }
8917 let count = if st > 0 {
8918 (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8919 } else {
8920 (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8921 };
8922 self.set_r(base, a, Value::Int(i0));
8923 self.set_r(base, a + 1, Value::Int(count as i64));
8924 self.set_r(base, a + 2, Value::Int(st));
8925 self.set_r(base, a + 3, Value::Int(i0));
8926 }
8927 _ => {
8928 let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8929 if st == 0.0 {
8930 return Err(self.rt_err("'for' step is zero"));
8931 }
8932 if pre53 {
8933 let pre = x0 - st;
8934 self.set_r(base, a, Value::Float(pre));
8935 self.set_r(base, a + 1, Value::Float(lim));
8936 self.set_r(base, a + 2, Value::Float(st));
8937 self.add_pc(inst.bx() as i32 - 1);
8938 return Ok(());
8939 }
8940 let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8941 if !runs {
8942 self.add_pc(inst.bx() as i32);
8943 return Ok(());
8944 }
8945 self.set_r(base, a, Value::Float(x0));
8946 self.set_r(base, a + 1, Value::Float(lim));
8947 self.set_r(base, a + 2, Value::Float(st));
8948 self.set_r(base, a + 3, Value::Float(x0));
8949 }
8950 }
8951 Ok(())
8952 }
8953
8954 #[inline(always)]
8955 fn for_loop(&mut self, inst: Inst, base: u32) {
8956 let a = inst.a();
8957 // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
8958 // directly (R[a+1] holds the limit, *not* a remaining-count) so the
8959 // first iteration's test fires through the same backward-jump path as
8960 // every later iteration. 5.4+ switched to the count-based form luna
8961 // already uses for `Int`; the float branch was already PUC-3.x-style.
8962 let pre53 = self.version() <= LuaVersion::Lua53;
8963 match self.r(base, a) {
8964 Value::Int(cur) if pre53 => {
8965 let Value::Int(lim) = self.r(base, a + 1) else {
8966 unreachable!()
8967 };
8968 let Value::Int(st) = self.r(base, a + 2) else {
8969 unreachable!()
8970 };
8971 let next = cur.wrapping_add(st);
8972 let cont = if st > 0 { next <= lim } else { next >= lim };
8973 if cont {
8974 self.set_r(base, a, Value::Int(next));
8975 self.set_r(base, a + 3, Value::Int(next));
8976 self.add_pc(-(inst.bx() as i32));
8977 }
8978 }
8979 Value::Int(cur) => {
8980 let Value::Int(count) = self.r(base, a + 1) else {
8981 unreachable!()
8982 };
8983 if count > 0 {
8984 let Value::Int(st) = self.r(base, a + 2) else {
8985 unreachable!()
8986 };
8987 let next = cur.wrapping_add(st);
8988 self.set_r(base, a, Value::Int(next));
8989 self.set_r(base, a + 1, Value::Int(count - 1));
8990 self.set_r(base, a + 3, Value::Int(next));
8991 self.add_pc(-(inst.bx() as i32));
8992 }
8993 }
8994 Value::Float(cur) => {
8995 let Value::Float(lim) = self.r(base, a + 1) else {
8996 unreachable!()
8997 };
8998 let Value::Float(st) = self.r(base, a + 2) else {
8999 unreachable!()
9000 };
9001 let next = cur + st;
9002 let cont = if st > 0.0 { next <= lim } else { next >= lim };
9003 if cont {
9004 self.set_r(base, a, Value::Float(next));
9005 self.set_r(base, a + 3, Value::Float(next));
9006 self.add_pc(-(inst.bx() as i32));
9007 }
9008 }
9009 _ => unreachable!("corrupt for-loop state"),
9010 }
9011 }
9012
9013 // ---- native helpers (used by builtins) ----
9014
9015 /// A native function's own captured upvalue (self lives at func_slot).
9016 ///
9017 /// Public so `native_typed` trampolines and embedders authoring
9018 /// stateful natives via `native_with(...)` can read their upvals.
9019 pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9020 let Value::Native(nc) = self.stack[func_slot as usize] else {
9021 unreachable!("native frame without native closure");
9022 };
9023 nc.upvals[i]
9024 }
9025
9026 /// Number of upvalues captured by the native at `func_slot` (variadic
9027 /// captures such as the `io.lines` format list).
9028 pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9029 let Value::Native(nc) = self.stack[func_slot as usize] else {
9030 unreachable!("native frame without native closure");
9031 };
9032 nc.upvals.len()
9033 }
9034
9035 /// Write a native function's own upvalue (stateful iterators).
9036 pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9037 let Value::Native(nc) = self.stack[func_slot as usize] else {
9038 unreachable!("native frame without native closure");
9039 };
9040 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9041 unsafe { nc.as_mut() }.upvals[i] = v;
9042 // NativeClosure.upvals is traced as part of its Trace; a long-lived
9043 // stateful iterator closure (e.g. string.gmatch) sees many writes —
9044 // barrier_back once-and-done is cheaper than per-child forward.
9045 self.heap
9046 .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9047 }
9048
9049 /// Read the i-th positional argument inside a `NativeFn` body
9050 /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9051 /// matching PUC's "missing arg is nil" contract. Public so embedders
9052 /// can author their own natives.
9053 pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9054 if i < nargs {
9055 self.stack[(func_slot + 1 + i) as usize]
9056 } else {
9057 Value::Nil
9058 }
9059 }
9060
9061 /// Push the return values of a `NativeFn` and return their count
9062 /// (analogous to pushing N values then `return N` from a C function).
9063 /// Public so embedders can author their own natives.
9064 pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9065 let need = func_slot as usize + vals.len();
9066 if self.stack.len() < need {
9067 self.stack.resize(need, Value::Nil);
9068 }
9069 for (i, &v) in vals.iter().enumerate() {
9070 self.stack[func_slot as usize + i] = v;
9071 }
9072 vals.len() as u32
9073 }
9074
9075 /// Fast string concatenation of an adjacent pair, or `None` when a
9076 /// `__concat` metamethod is required.
9077 fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9078 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9079 // Length-check fast paths for both string operands BEFORE the
9080 // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9081 // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9082 // overflow on the first pair that would exceed `INT_MAX` instead
9083 // of allocating multi-GB intermediates first.
9084 let max_str = i32::MAX as usize;
9085 if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9086 let a_len = ls.as_bytes().len();
9087 let b_len = rs.as_bytes().len();
9088 let new_len = a_len.checked_add(b_len);
9089 if new_len.is_none() || new_len.unwrap() > max_str {
9090 return Err(self.rt_err("string length overflow"));
9091 }
9092 }
9093 match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9094 (Some(a), Some(b)) => {
9095 // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9096 // concat past it raises "string length overflow"
9097 // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9098 // exactly this wall).
9099 let new_len = a.len().checked_add(b.len());
9100 if new_len.is_none() || new_len.unwrap() > max_str {
9101 return Err(self.rt_err("string length overflow"));
9102 }
9103 let mut combined = a;
9104 combined.extend_from_slice(&b);
9105 Ok(Some(Value::Str(self.heap.intern(&combined))))
9106 }
9107 _ => Ok(None),
9108 }
9109 }
9110
9111 /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9112 /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9113 /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9114 /// call — its `Meta` continuation re-enters here on the metamethod's return.
9115 fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9116 // Sum the lengths of all all-Str operands BEFORE starting the
9117 // right-associative fold so a 129-operand `a..a..…` chain
9118 // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9119 // not after dozens of multi-GB intermediate intern+hash rounds.
9120 // A non-Str operand falls through to the per-pair check.
9121 let max_str = i32::MAX as usize;
9122 let mut total: usize = 0;
9123 let mut all_str = true;
9124 for slot in base_a..self.top {
9125 match self.stack[slot as usize] {
9126 Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9127 Some(t) if t <= max_str => total = t,
9128 _ => return Err(self.rt_err("string length overflow")),
9129 },
9130 _ => {
9131 all_str = false;
9132 break;
9133 }
9134 }
9135 }
9136 let _ = all_str; // discrimination already captured by early returns above
9137 while self.top.saturating_sub(base_a) >= 2 {
9138 let i = self.top - 1; // rightmost operand
9139 let x = self.stack[(i - 1) as usize];
9140 let y = self.stack[i as usize];
9141 match self.concat_pair(x, y)? {
9142 Some(s) => {
9143 self.stack[(i - 1) as usize] = s;
9144 self.top = i; // consumed y
9145 }
9146 None => {
9147 let mut mm = self.get_mm(x, Mm::Concat);
9148 if mm.is_nil() {
9149 mm = self.get_mm(y, Mm::Concat);
9150 }
9151 if mm.is_nil() {
9152 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9153 let bad = if concat_piece(x, legacy).is_none() {
9154 x
9155 } else {
9156 y
9157 };
9158 return Err(self.type_err("concatenate", bad));
9159 }
9160 // result lands at i-1, dropping y (top→i); resume continues.
9161 let dst = i - 1;
9162 self.begin_meta_call(
9163 mm,
9164 &[x, y],
9165 MetaAction::Concat { dst, base_a },
9166 "concat",
9167 )?;
9168 return Ok(());
9169 }
9170 }
9171 }
9172 self.maybe_collect_garbage(base_a + 1);
9173 Ok(())
9174 }
9175
9176 /// tostring with __tostring / __name support.
9177 pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9178 let mm = self.get_mm(v, Mm::ToString);
9179 if !mm.is_nil() {
9180 return match self.call_mm1(mm, &[v])? {
9181 Value::Str(s) => Ok(s.as_bytes().to_vec()),
9182 _ => Err(self.rt_err("'__tostring' must return a string")),
9183 };
9184 }
9185 if let Value::Table(t) = v
9186 && let Value::Str(name) = self.get_mm(v, Mm::Name)
9187 {
9188 let mut out = name.as_bytes().to_vec();
9189 out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9190 return Ok(out);
9191 }
9192 Ok(self.tostring_basic(v))
9193 }
9194
9195 /// Basic tostring (no metamethods).
9196 pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9197 match v {
9198 Value::Nil => b"nil".to_vec(),
9199 Value::Bool(true) => b"true".to_vec(),
9200 Value::Bool(false) => b"false".to_vec(),
9201 Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9202 // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9203 // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9204 // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9205 // concatenating these renderings.
9206 Value::Float(f) => {
9207 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9208 numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9209 }
9210 Value::Str(s) => s.as_bytes().to_vec(),
9211 Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9212 Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9213 Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9214 Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9215 // PUC names file handles `file (0x…)`; a bare userdata is
9216 // `userdata: 0x…`. The io library overrides this via __tostring.
9217 Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9218 // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9219 // (the "light" qualifier only appears in `luaL_typeerror`).
9220 Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9221 }
9222 }
9223}
9224
9225#[derive(Clone, Copy, PartialEq, Eq)]
9226enum ArithOp {
9227 Add,
9228 Sub,
9229 Mul,
9230 Mod,
9231 Pow,
9232 Div,
9233 IDiv,
9234 BAnd,
9235 BOr,
9236 BXor,
9237 Shl,
9238 Shr,
9239}
9240
9241impl ArithOp {
9242 /// PUC metamethod event name (`__add` → "add" etc.) used by
9243 /// `debug.getinfo(level, "n")` inside a metamethod handler.
9244 fn mm_name(self) -> &'static str {
9245 match self {
9246 ArithOp::Add => "add",
9247 ArithOp::Sub => "sub",
9248 ArithOp::Mul => "mul",
9249 ArithOp::Mod => "mod",
9250 ArithOp::Pow => "pow",
9251 ArithOp::Div => "div",
9252 ArithOp::IDiv => "idiv",
9253 ArithOp::BAnd => "band",
9254 ArithOp::BOr => "bor",
9255 ArithOp::BXor => "bxor",
9256 ArithOp::Shl => "shl",
9257 ArithOp::Shr => "shr",
9258 }
9259 }
9260}
9261
9262fn as_num(v: Value) -> Option<Num> {
9263 match v {
9264 Value::Int(i) => Some(Num::Int(i)),
9265 Value::Float(f) => Some(Num::Float(f)),
9266 // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9267 Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9268 _ => None,
9269 }
9270}
9271
9272/// A concatenable operand's byte form (string, or a number coerced to its
9273/// string), or `None` when only a `__concat` metamethod can handle it.
9274/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9275/// suffix on integer-valued floats) — see `num_to_string_for`.
9276fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9277 match v {
9278 Value::Str(s) => Some(s.as_bytes().to_vec()),
9279 Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9280 Value::Float(x) => {
9281 Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9282 }
9283 _ => None,
9284 }
9285}
9286
9287/// Index into the per-basic-type metatable table for a non-table value
9288/// (None for tables, which carry their own metatable).
9289fn type_mt_slot(v: Value) -> Option<usize> {
9290 match v {
9291 Value::Nil => Some(0),
9292 Value::Bool(_) => Some(1),
9293 Value::Int(_) | Value::Float(_) => Some(2),
9294 Value::Str(_) => Some(3),
9295 Value::Closure(_) | Value::Native(_) => Some(4),
9296 // tables and full userdata carry their own metatable; threads and
9297 // light userdata have none (PUC keeps a shared per-type mt slot for
9298 // light, but luna doesn't expose it — no test gates on it yet).
9299 Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9300 }
9301}
9302
9303/// Number, or string coerced to number (5.5 default string-arith coercion).
9304fn coerce_num(v: Value) -> Option<Num> {
9305 match v {
9306 Value::Int(i) => Some(Num::Int(i)),
9307 Value::Float(f) => Some(Num::Float(f)),
9308 Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9309 _ => None,
9310 }
9311}
9312
9313/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9314/// reverse direction.
9315fn shift_left(a: i64, b: i64) -> i64 {
9316 if b < 0 {
9317 if b <= -64 {
9318 0
9319 } else {
9320 ((a as u64) >> (-b as u32)) as i64
9321 }
9322 } else if b >= 64 {
9323 0
9324 } else {
9325 ((a as u64) << (b as u32)) as i64
9326 }
9327}
9328
9329/// i < f, exactly (PUC LTintfloat shape).
9330fn int_lt_float(i: i64, f: f64) -> bool {
9331 if f.is_nan() {
9332 return false;
9333 }
9334 if f >= 9_223_372_036_854_775_808.0 {
9335 return true;
9336 }
9337 if f < -9_223_372_036_854_775_808.0 {
9338 return false;
9339 }
9340 let ff = f.floor();
9341 let fi = ff as i64;
9342 if f == ff { i < fi } else { i <= fi }
9343}
9344
9345/// i <= f, exactly.
9346fn int_le_float(i: i64, f: f64) -> bool {
9347 if f.is_nan() {
9348 return false;
9349 }
9350 if f >= 9_223_372_036_854_775_808.0 {
9351 return true;
9352 }
9353 if f < -9_223_372_036_854_775_808.0 {
9354 return false;
9355 }
9356 i <= f.floor() as i64
9357}
9358
9359/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9360/// (clipped limit, loop-is-empty).
9361fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9362 match limit {
9363 Num::Int(l) => {
9364 let empty = if step > 0 { init > l } else { init < l };
9365 (l, empty)
9366 }
9367 Num::Float(f) => {
9368 if f.is_nan() {
9369 return (0, true);
9370 }
9371 if step > 0 {
9372 if f >= 9_223_372_036_854_775_808.0 {
9373 (i64::MAX, false)
9374 } else {
9375 let l = f.floor();
9376 if l < -9_223_372_036_854_775_808.0 {
9377 (i64::MIN, true)
9378 } else {
9379 let li = l as i64;
9380 (li, init > li)
9381 }
9382 }
9383 } else if f <= -9_223_372_036_854_775_808.0 {
9384 (i64::MIN, false)
9385 } else {
9386 let l = f.ceil();
9387 if l >= 9_223_372_036_854_775_808.0 {
9388 // PUC forlimit: a positive limit beyond the integer range
9389 // is unreachable for a decreasing loop — empty.
9390 (i64::MAX, true)
9391 } else {
9392 let li = l as i64;
9393 (li, init < li)
9394 }
9395 }
9396 }
9397 }
9398}
9399
9400/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9401/// `@file` / `=name` markers in `source`).
9402fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9403 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9404 let b = unsafe { crate::runtime::string::bytes_of(p) };
9405 match b.first() {
9406 Some(b'@') | Some(b'=') => &b[1..],
9407 _ => b,
9408 }
9409}
9410
9411impl Vm {
9412 /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9413 /// that called the current native. Returns (closure, current line,
9414 /// extra vararg count).
9415 /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9416 /// function running at `level`, recovered from the caller's call
9417 /// instruction (PUC funcnamefromcode). None for the main chunk or a
9418 /// tail/anonymous call with no recoverable name.
9419 /// A debug-level position: either a real Lua frame (by index) or a synthetic
9420 /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9421 /// coroutine body), which `debug.getinfo` and traceback report as "C".
9422 /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9423 /// frame at `level`'s current pc, as (name, value). Locals are visited in
9424 /// registration order (start pc, then register) to match luaF_getlocalname.
9425 pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9426 if n == 0 {
9427 return None;
9428 }
9429 let fi = match self.dbg_frame(level)? {
9430 DbgKind::Lua(fi) => fi,
9431 // Tail-call placeholder has no real frame backing it — no locals
9432 // exist to read or write here. PUC `findlocal` returns NULL on
9433 // a CIST_TAIL activation.
9434 DbgKind::Tail(_) => return None,
9435 // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9436 // for slot `n` inside the argument window (db.lua :408-:413, and
9437 // the call/return hook reads of math.sin / select args via
9438 // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9439 // meaningful for a C frame here.
9440 DbgKind::C(fi) => {
9441 if n < 1 {
9442 return None;
9443 }
9444 let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9445 if (n as u32) > nargs {
9446 return None;
9447 }
9448 let slot = (func_slot + n as u32) as usize;
9449 let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9450 return Some((self.temporary_locvar_name().to_string(), val));
9451 }
9452 };
9453 let f = self.frames[fi].lua()?;
9454 // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9455 // is the first extra arg passed to the function (`...[1]`), `-2` the
9456 // second, etc. The 5.5 stack layout parks varargs in
9457 // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9458 if n < 0 {
9459 let i = (-n) as u32;
9460 if i == 0 || i > f.n_varargs {
9461 return None;
9462 }
9463 let val = self
9464 .stack
9465 .get((f.func_slot + i) as usize)
9466 .copied()
9467 .unwrap_or(Value::Nil);
9468 return Some((self.vararg_locvar_name().to_string(), val));
9469 }
9470 let proto = f.closure.proto;
9471 // PUC's parser injects a hidden `(vararg table)` locvar for an
9472 // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9473 // right after the fixed parameters (`numparams + 1`). Main chunks
9474 // and `(...t)` named-vararg funcs do NOT get one — gate on the
9475 // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9476 // their declared registers (no shadow slot allocated), so we expose
9477 // that hidden index purely in this debug view.
9478 let num_params = proto.num_params as i64;
9479 let vararg_slot = if proto.has_vararg_table_pseudo {
9480 Some(num_params + 1)
9481 } else {
9482 None
9483 };
9484 if vararg_slot == Some(n) {
9485 return Some(("(vararg table)".to_string(), Value::Nil));
9486 }
9487 let pc = (f.pc as usize).saturating_sub(1);
9488 let mut active: Vec<&crate::runtime::LocVar> = proto
9489 .locvars
9490 .iter()
9491 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9492 .collect();
9493 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9494 let mut idx: i64 = n - 1;
9495 if let Some(vs) = vararg_slot
9496 && n > vs
9497 {
9498 idx -= 1;
9499 }
9500 let idx = idx as usize;
9501 if let Some(lv) = active.get(idx) {
9502 let val = self
9503 .stack
9504 .get((f.base + lv.reg) as usize)
9505 .copied()
9506 .unwrap_or(Value::Nil);
9507 return Some((lv.name.to_string(), val));
9508 }
9509 // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9510 // still inside the frame's live register window — report a
9511 // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9512 // the next frame's func slot (`ci->next->func.p`) so the
9513 // temporary window stops where the callee's frame begins
9514 // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9515 // an out-of-range slot).
9516 let limit = self
9517 .frames
9518 .get(fi + 1)
9519 .and_then(|cf| cf.lua())
9520 .map(|nf| nf.func_slot)
9521 .unwrap_or_else(|| self.top.max(f.base));
9522 let temp_reg = idx as u32;
9523 if f.base + temp_reg < limit {
9524 let val = self
9525 .stack
9526 .get((f.base + temp_reg) as usize)
9527 .copied()
9528 .unwrap_or(Value::Nil);
9529 return Some((self.lua_temporary_locvar_name().to_string(), val));
9530 }
9531 None
9532 }
9533
9534 /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9535 /// the local / vararg name on success, `None` when the slot does not
9536 /// resolve. Mirrors `local_at`'s indexing exactly.
9537 pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9538 if n == 0 {
9539 return None;
9540 }
9541 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9542 return None;
9543 };
9544 let f = self.frames[fi].lua()?;
9545 if n < 0 {
9546 let i = (-n) as u32;
9547 if i == 0 || i > f.n_varargs {
9548 return None;
9549 }
9550 let slot = (f.func_slot + i) as usize;
9551 if let Some(s) = self.stack.get_mut(slot) {
9552 *s = v;
9553 }
9554 return Some(self.vararg_locvar_name().to_string());
9555 }
9556 let proto = f.closure.proto;
9557 let num_params = proto.num_params as i64;
9558 let vararg_slot = if proto.has_vararg_table_pseudo {
9559 Some(num_params + 1)
9560 } else {
9561 None
9562 };
9563 if vararg_slot == Some(n) {
9564 // hidden (vararg table) slot has no real storage — accept the
9565 // write as a no-op for PUC parity (db.lua doesn't write to it).
9566 return Some("(vararg table)".to_string());
9567 }
9568 let pc = (f.pc as usize).saturating_sub(1);
9569 let mut active: Vec<&crate::runtime::LocVar> = proto
9570 .locvars
9571 .iter()
9572 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9573 .collect();
9574 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9575 let mut idx: i64 = n - 1;
9576 if let Some(vs) = vararg_slot
9577 && n > vs
9578 {
9579 idx -= 1;
9580 }
9581 let idx = idx as usize;
9582 let (name, reg) = if let Some(lv) = active.get(idx) {
9583 (lv.name.to_string(), lv.reg)
9584 } else {
9585 // PUC `luaG_findlocal` fallback into the temporary window —
9586 // bounded by the next frame's func slot (see local_at).
9587 let limit = self
9588 .frames
9589 .get(fi + 1)
9590 .and_then(|cf| cf.lua())
9591 .map(|nf| nf.func_slot)
9592 .unwrap_or_else(|| self.top.max(f.base));
9593 let temp_reg = idx as u32;
9594 if f.base + temp_reg >= limit {
9595 return None;
9596 }
9597 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9598 };
9599 let slot = (f.base + reg) as usize;
9600 if let Some(s) = self.stack.get_mut(slot) {
9601 *s = v;
9602 }
9603 Some(name)
9604 }
9605
9606 /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9607 /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9608 /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9609 /// vararg indexing past `n_varargs`, or for a register past the live
9610 /// window. Naming follows the same priority as `local_at`: named locals,
9611 /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9612 /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9613 /// window.
9614 pub(crate) fn local_at_coro(
9615 &self,
9616 co: Gc<crate::runtime::Coro>,
9617 level: i64,
9618 n: i64,
9619 ) -> Option<(String, Value)> {
9620 if level < 1 || n == 0 {
9621 return None;
9622 }
9623 let frames = &co.frames;
9624 // Logical level: iterate Lua frames from the top.
9625 let lua_indices: Vec<usize> = (0..frames.len())
9626 .rev()
9627 .filter(|&i| frames[i].lua().is_some())
9628 .collect();
9629 let fi = *lua_indices.get((level - 1) as usize)?;
9630 let f = frames[fi].lua()?;
9631 if n < 0 {
9632 let i = (-n) as u32;
9633 if i == 0 || i > f.n_varargs {
9634 return None;
9635 }
9636 let val = co
9637 .stack
9638 .get((f.func_slot + i) as usize)
9639 .copied()
9640 .unwrap_or(Value::Nil);
9641 return Some((self.vararg_locvar_name().to_string(), val));
9642 }
9643 let proto = f.closure.proto;
9644 let num_params = proto.num_params as i64;
9645 let vararg_slot = if proto.has_vararg_table_pseudo {
9646 Some(num_params + 1)
9647 } else {
9648 None
9649 };
9650 if vararg_slot == Some(n) {
9651 return Some(("(vararg table)".to_string(), Value::Nil));
9652 }
9653 let pc = (f.pc as usize).saturating_sub(1);
9654 let mut active: Vec<&crate::runtime::LocVar> = proto
9655 .locvars
9656 .iter()
9657 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9658 .collect();
9659 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9660 let mut idx: i64 = n - 1;
9661 if let Some(vs) = vararg_slot
9662 && n > vs
9663 {
9664 idx -= 1;
9665 }
9666 let idx = idx as usize;
9667 if let Some(lv) = active.get(idx) {
9668 let val = co
9669 .stack
9670 .get((f.base + lv.reg) as usize)
9671 .copied()
9672 .unwrap_or(Value::Nil);
9673 return Some((lv.name.to_string(), val));
9674 }
9675 let limit = frames
9676 .get(fi + 1)
9677 .and_then(|cf| cf.lua())
9678 .map(|nf| nf.func_slot)
9679 .unwrap_or(co.top.max(f.base));
9680 let temp_reg = idx as u32;
9681 if f.base + temp_reg < limit {
9682 let val = co
9683 .stack
9684 .get((f.base + temp_reg) as usize)
9685 .copied()
9686 .unwrap_or(Value::Nil);
9687 return Some((self.lua_temporary_locvar_name().to_string(), val));
9688 }
9689 None
9690 }
9691
9692 /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9693 /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9694 pub(crate) fn local_set_coro(
9695 &mut self,
9696 co: Gc<crate::runtime::Coro>,
9697 level: i64,
9698 n: i64,
9699 v: Value,
9700 ) -> Option<String> {
9701 if level < 1 || n == 0 {
9702 return None;
9703 }
9704 let lua_indices: Vec<usize> = (0..co.frames.len())
9705 .rev()
9706 .filter(|&i| co.frames[i].lua().is_some())
9707 .collect();
9708 let fi = *lua_indices.get((level - 1) as usize)?;
9709 let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9710 let f = co.frames[fi].lua()?;
9711 (
9712 f.func_slot,
9713 f.n_varargs,
9714 f.base,
9715 f.closure.proto,
9716 co.top.max(f.base),
9717 co.frames
9718 .get(fi + 1)
9719 .and_then(|cf| cf.lua())
9720 .map(|nf| nf.func_slot),
9721 )
9722 };
9723 if n < 0 {
9724 let i = (-n) as u32;
9725 if i == 0 || i > n_varargs {
9726 return None;
9727 }
9728 let slot = (func_slot + i) as usize;
9729 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9730 let stack = unsafe { &mut co.as_mut().stack };
9731 if let Some(s) = stack.get_mut(slot) {
9732 *s = v;
9733 }
9734 // co.stack values are traced — once-per-call barrier so propagate
9735 // sees the new value if co was already BLACK this cycle.
9736 self.heap
9737 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9738 return Some(self.vararg_locvar_name().to_string());
9739 }
9740 let num_params = proto.num_params as i64;
9741 let vararg_slot = if proto.has_vararg_table_pseudo {
9742 Some(num_params + 1)
9743 } else {
9744 None
9745 };
9746 if vararg_slot == Some(n) {
9747 return Some("(vararg table)".to_string());
9748 }
9749 let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9750 let mut active: Vec<&crate::runtime::LocVar> = proto
9751 .locvars
9752 .iter()
9753 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9754 .collect();
9755 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9756 let mut idx: i64 = n - 1;
9757 if let Some(vs) = vararg_slot
9758 && n > vs
9759 {
9760 idx -= 1;
9761 }
9762 let idx = idx as usize;
9763 let (name, reg) = if let Some(lv) = active.get(idx) {
9764 (lv.name.to_string(), lv.reg)
9765 } else {
9766 let limit = next_func_slot.unwrap_or(top_for_temp);
9767 let temp_reg = idx as u32;
9768 if base + temp_reg >= limit {
9769 return None;
9770 }
9771 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9772 };
9773 let slot = (base + reg) as usize;
9774 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9775 let stack = unsafe { &mut co.as_mut().stack };
9776 if let Some(s) = stack.get_mut(slot) {
9777 *s = v;
9778 }
9779 // co.stack values are traced — once-per-call barrier so propagate
9780 // sees the new value if co was already BLACK this cycle.
9781 self.heap
9782 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9783 Some(name)
9784 }
9785
9786 /// Frame info for a level on a suspended coroutine (PUC
9787 /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9788 /// Returns the closure + currentline + extraargs + istailcall for the
9789 /// level-th Lua activation in `co.frames`. None if level overshoots.
9790 pub(crate) fn coro_frame_info(
9791 &self,
9792 co: Gc<crate::runtime::Coro>,
9793 level: i64,
9794 ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9795 if level < 1 {
9796 return None;
9797 }
9798 let lua_indices: Vec<usize> = (0..co.frames.len())
9799 .rev()
9800 .filter(|&i| co.frames[i].lua().is_some())
9801 .collect();
9802 let fi = *lua_indices.get((level - 1) as usize)?;
9803 let f = co.frames[fi].lua()?;
9804 let proto = f.closure.proto;
9805 let pc = (f.pc as usize)
9806 .saturating_sub(1)
9807 .min(proto.lines.len().saturating_sub(1));
9808 let line = proto.lines.get(pc).copied().unwrap_or(0);
9809 Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9810 }
9811
9812 /// Whether `level` resolves to any live activation (PUC lua_getstack).
9813 pub(crate) fn level_in_range(&self, level: i64) -> bool {
9814 self.dbg_frame(level).is_some()
9815 }
9816
9817 /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9818 /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9819 /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9820 /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9821 pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9822 if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9823 "(*vararg)"
9824 } else {
9825 "(vararg)"
9826 }
9827 }
9828
9829 /// PUC's debug-API placeholder for an unnamed temporary on a C
9830 /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9831 /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9832 /// their spelling.
9833 pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9834 if matches!(
9835 self.version,
9836 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9837 ) {
9838 // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9839 // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9840 // to `(C temporary)`.
9841 "(*temporary)"
9842 } else {
9843 "(C temporary)"
9844 }
9845 }
9846
9847 /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9848 /// (an arithmetic intermediate sitting past the last named local on a
9849 /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9850 /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9851 /// spelling.
9852 pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9853 if matches!(
9854 self.version,
9855 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9856 ) {
9857 "(*temporary)"
9858 } else {
9859 "(temporary)"
9860 }
9861 }
9862
9863 /// The Lua closure running at `level` on the current thread, or `None`
9864 /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9865 /// need this to reach the function whose env they read or rewrite.
9866 pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9867 // `DbgKind::Tail` also falls into the else branch — a tail-call
9868 // placeholder has no closure of its own, so PUC's `lua_getstack` +
9869 // `getfunc` for that level returns no function, and `getfenv(level)`
9870 // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9871 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9872 return None;
9873 };
9874 Some(self.frames[fi].lua()?.closure)
9875 }
9876
9877 pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9878 if level < 1 {
9879 return false;
9880 }
9881 let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9882 (level as usize) <= count
9883 }
9884
9885 pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9886 if level < 1 {
9887 return None;
9888 }
9889 // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9890 // activation counts as a level, and each Lua activation's
9891 // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9892 // dropped the synthetic shape: `istailcall` becomes a flag on the
9893 // real frame and Cont activations no longer count separately.
9894 // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9895 // `getinfo(2).func == g1` pins the 5.2+ shape.
9896 let v51 = self.version <= LuaVersion::Lua51;
9897 let mut lvl = level;
9898 for fi in (0..self.frames.len()).rev() {
9899 match &self.frames[fi] {
9900 CallFrame::Lua(f) => {
9901 lvl -= 1;
9902 if lvl == 0 {
9903 return Some(DbgKind::Lua(fi));
9904 }
9905 if v51 {
9906 // 5.1 reports one synthetic CIST_TAIL level per
9907 // collapsed tail call (PUC `lua_getstack` subtracts
9908 // `ci->u.l.tailcalls` from the remaining level).
9909 for _ in 0..f.tailcalls {
9910 lvl -= 1;
9911 if lvl == 0 {
9912 return Some(DbgKind::Tail(fi));
9913 }
9914 }
9915 }
9916 if f.from_c {
9917 lvl -= 1;
9918 if lvl == 0 {
9919 return Some(DbgKind::C(fi));
9920 }
9921 }
9922 }
9923 CallFrame::Cont(_) => {
9924 if !v51 {
9925 continue;
9926 }
9927 lvl -= 1;
9928 if lvl == 0 {
9929 let parent = (0..fi)
9930 .rev()
9931 .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9932 return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9933 }
9934 }
9935 }
9936 }
9937 None
9938 }
9939
9940 pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9941 let f = self.frames[fi].lua()?;
9942 // metamethod handler frames carry the event tag (e.g. "close" for
9943 // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9944 if f.is_hook {
9945 return Some(("hook", "?".to_string()));
9946 }
9947 if let Some(tm) = f.tm {
9948 return Some(("metamethod", tm_debug_name(self.version, tm)));
9949 }
9950 // a frame entered across a C boundary has no naming call instruction
9951 if fi == 0 || f.from_c {
9952 return None;
9953 }
9954 // the caller's call instruction names this frame; a continuation frame
9955 // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
9956 // already short-circuits those.
9957 let caller = self.frames[fi - 1].lua()?;
9958 let caller_proto = caller.closure.proto;
9959 let p: &crate::runtime::Proto = &caller_proto;
9960 let call_pc = (caller.pc as usize).checked_sub(1)?;
9961 let instr = *p.code.get(call_pc)?;
9962 match instr.op() {
9963 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9964 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9965 _ => None,
9966 }
9967 }
9968
9969 /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
9970 /// (PUC names a C function from the call instruction that invoked it). The
9971 /// native was called by the nearest Lua frame below `fi` (skipping pcall/
9972 /// xpcall continuations); that frame's call instruction names it.
9973 pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9974 // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
9975 // the synthetic C edge between the __gc finalizer (top Lua frame, has
9976 // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
9977 // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
9978 // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
9979 // sets the tag on the handler frame only, so level 2 there still
9980 // names the calling Lua frame's call instruction (5.5 locals.lua
9981 // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
9982 if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
9983 && fr.tm == Some("gc")
9984 {
9985 let name = tm_debug_name(self.version, "gc");
9986 return Some(("metamethod", name));
9987 }
9988 let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
9989 let caller = self.frames[caller_fi].lua()?;
9990 let p = &caller.closure.proto;
9991 let call_pc = (caller.pc as usize).checked_sub(1)?;
9992 let instr = *p.code.get(call_pc)?;
9993 match instr.op() {
9994 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9995 _ => None,
9996 }
9997 }
9998
9999 /// Native value currently sitting on the synthetic C edge identified by
10000 /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
10001 /// above `fi` (each one corresponds to one native pushing the hook) and
10002 /// indexes into `running_natives` from the top, also skipping the caller
10003 /// of `getinfo` itself (the native that is currently asking).
10004 /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10005 /// expects the just-entered C function.
10006 pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10007 let idx = self.c_frame_native_idx(fi)?;
10008 Some(Value::Native(self.running_natives[idx]))
10009 }
10010
10011 /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10012 /// so `local_at` can index the native's argument window like PUC's
10013 /// `(C temporary)` path. Returns `None` when no matching native exists
10014 /// (e.g. the C edge corresponds to a non-native boundary).
10015 pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10016 let idx = self.c_frame_native_idx(fi)?;
10017 self.running_native_slots.get(idx).copied()
10018 }
10019
10020 fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10021 let n_above = self.frames[fi..]
10022 .iter()
10023 .filter_map(CallFrame::lua)
10024 .filter(|f| f.from_c)
10025 .count();
10026 if n_above == 0 {
10027 return None;
10028 }
10029 // running_natives.last() is the native currently executing (the one
10030 // that called getinfo). Pop it conceptually, then take the n_above-th
10031 // entry from the top of what remains.
10032 let nr = self.running_natives.len().checked_sub(1)?;
10033 nr.checked_sub(n_above)
10034 }
10035
10036 /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10037 /// native whose function pointer matches `target`, and return its qualified
10038 /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10039 /// `None` if no match is found. Used by `arg_error` when the running native
10040 /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10041 pub(crate) fn pushglobalfuncname(
10042 &mut self,
10043 target: crate::runtime::value::NativeFn,
10044 ) -> Option<String> {
10045 let pkg_k = Value::Str(self.heap.intern(b"package"));
10046 let pkg = match self.globals().get(pkg_k) {
10047 Value::Table(t) => t,
10048 _ => return None,
10049 };
10050 let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10051 let loaded = match pkg.get(loaded_k) {
10052 Value::Table(t) => t,
10053 _ => return None,
10054 };
10055 let matches = |v: Value| -> bool {
10056 matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10057 };
10058 let mut k = Value::Nil;
10059 while let Ok(Some((nk, nv))) = loaded.next(k) {
10060 k = nk;
10061 let Value::Str(outer) = nk else { continue };
10062 let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10063 if matches(nv) {
10064 return Some(if outer == "_G" { String::new() } else { outer });
10065 }
10066 if let Value::Table(inner_t) = nv {
10067 let mut k2 = Value::Nil;
10068 while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10069 k2 = nk2;
10070 if matches(nv2)
10071 && let Value::Str(inner) = nk2
10072 {
10073 let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10074 return Some(if outer == "_G" {
10075 inner
10076 } else {
10077 format!("{outer}.{inner}")
10078 });
10079 }
10080 }
10081 }
10082 }
10083 None
10084 }
10085
10086 /// Name and namewhat of the native currently running on behalf of the top
10087 /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10088 /// `luaL_argerror` rewrite a method call's self-argument error.
10089 pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10090 let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10091 let p = &caller.closure.proto;
10092 let call_pc = (caller.pc as usize).checked_sub(1)?;
10093 let instr = *p.code.get(call_pc)?;
10094 match instr.op() {
10095 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10096 _ => None,
10097 }
10098 }
10099
10100 pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10101 let f = self.frames[fi].lua().expect("Lua frame");
10102 let proto = f.closure.proto;
10103 let pc = (f.pc as usize)
10104 .saturating_sub(1)
10105 .min(proto.lines.len().saturating_sub(1));
10106 let line = proto.lines.get(pc).copied().unwrap_or(0);
10107 // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10108 // (independent of any later write to a materialized vararg table's `n`).
10109 // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10110 // any nonzero `tailcalls` count flips it true.
10111 (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10112 }
10113
10114 /// Read an upvalue cell of a closure (debug.getupvalue).
10115 pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10116 match cl.upvals()[idx].state() {
10117 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10118 UpvalState::Closed(v) => v,
10119 }
10120 }
10121
10122 /// Write an upvalue cell of a closure (debug.setupvalue).
10123 pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10124 let uv = cl.upvals()[idx];
10125 match uv.state() {
10126 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10127 UpvalState::Closed(_) => {
10128 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10129 unsafe { uv.as_mut() }.set_closed(v);
10130 self.heap
10131 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10132 }
10133 }
10134 }
10135
10136 /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10137 /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10138 /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10139 /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10140 /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10141 /// else `"function <src:line_defined>"` for an anonymous Lua function.
10142 /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10143 /// Walks the coroutine's saved frames and prepends a synthetic C-level
10144 /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10145 /// (its `resume_at` marker is set). `level` skips entries from the top
10146 /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10147 /// frame; etc.). db.lua :764-:768 sample several levels.
10148 pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10149 use crate::runtime::CoroStatus;
10150 const LEVELS1: usize = 10;
10151 const LEVELS2: usize = 11;
10152 #[derive(Clone, Copy)]
10153 enum VFrame<'a> {
10154 Lua(&'a crate::runtime::function::Frame),
10155 CPcall,
10156 CXpcall,
10157 CYield,
10158 /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10159 /// call collapsed into the next Lua frame down the chain.
10160 Tail,
10161 }
10162 let v51 = self.version <= LuaVersion::Lua51;
10163 let mut visible: Vec<VFrame<'_>> = Vec::new();
10164 // PUC's level 0 entry on a suspended coroutine is the C call where it
10165 // paused — `coroutine.yield` for a yielded thread.
10166 if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10167 visible.push(VFrame::CYield);
10168 }
10169 for cf in co.frames.iter().rev() {
10170 match cf {
10171 CallFrame::Lua(f) => {
10172 visible.push(VFrame::Lua(f));
10173 if v51 {
10174 for _ in 0..f.tailcalls {
10175 visible.push(VFrame::Tail);
10176 }
10177 }
10178 }
10179 CallFrame::Cont(nc) => match nc.kind {
10180 ContKind::Pcall => visible.push(VFrame::CPcall),
10181 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10182 _ => {}
10183 },
10184 }
10185 }
10186 if level < 0 {
10187 level = 0;
10188 }
10189 if (level as usize) >= visible.len() {
10190 return Vec::new();
10191 }
10192 let visible = &visible[level as usize..];
10193 let total = visible.len();
10194 let mut out = Vec::new();
10195 // To name a Lua frame, PUC consults the caller's OP_CALL via
10196 // getobjname: find the index `fi` of the current frame in co.frames,
10197 // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10198 let coro_frame_name = |frames: &[CallFrame],
10199 target: &crate::runtime::function::Frame|
10200 -> Option<(&'static str, String)> {
10201 let fi = frames
10202 .iter()
10203 .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10204 if fi == 0 || target.from_c {
10205 return None;
10206 }
10207 let caller = frames[fi - 1].lua()?;
10208 let p = &caller.closure.proto;
10209 let call_pc = (caller.pc as usize).checked_sub(1)?;
10210 let instr = *p.code.get(call_pc)?;
10211 match instr.op() {
10212 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10213 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10214 _ => None,
10215 }
10216 };
10217 let frames = &co.frames;
10218 let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10219 VFrame::Lua(f) => {
10220 let proto = f.closure.proto;
10221 let src = chunk_display_name(proto.source.as_ptr());
10222 let pc = (f.pc as usize)
10223 .saturating_sub(1)
10224 .min(proto.lines.len().saturating_sub(1));
10225 let line = proto.lines.get(pc).copied().unwrap_or(0);
10226 out.extend_from_slice(b"\n\t");
10227 out.extend_from_slice(src);
10228 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10229 if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10230 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10231 } else if proto.line_defined == 0 {
10232 out.extend_from_slice(b"main chunk");
10233 } else {
10234 out.extend_from_slice(
10235 format!(
10236 "function <{}:{}>",
10237 String::from_utf8_lossy(src),
10238 proto.line_defined
10239 )
10240 .as_bytes(),
10241 );
10242 }
10243 }
10244 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10245 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10246 VFrame::CYield => {
10247 // PUC `pushglobalfuncname` reports `yield` as
10248 // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10249 // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10250 // `'yield'` (5.5 :841).
10251 let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10252 if qualified {
10253 out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10254 } else {
10255 out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10256 }
10257 }
10258 VFrame::Tail => {
10259 // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10260 // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10261 // :403 asserts these appear once per collapsed tail call.
10262 out.extend_from_slice(b"\n\t(...tail calls...)");
10263 }
10264 };
10265 if total <= LEVELS1 + LEVELS2 {
10266 for &v in visible {
10267 emit(&mut out, v);
10268 }
10269 } else {
10270 for &v in &visible[..LEVELS1] {
10271 emit(&mut out, v);
10272 }
10273 let skip = total - LEVELS1 - LEVELS2;
10274 out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10275 for &v in &visible[total - LEVELS2..] {
10276 emit(&mut out, v);
10277 }
10278 }
10279 out
10280 }
10281
10282 pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10283 // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10284 // (11) bottom frames; if there are more, the middle is collapsed into
10285 // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10286 // overflow traceback would balloon to tens of megabytes (errors.lua's
10287 // stack-overflow test ran string.gmatch over the resulting buffer).
10288 const LEVELS1: usize = 10;
10289 const LEVELS2: usize = 11;
10290 // Collect visible frames in top-down order (deepest first). Both Lua
10291 // activations and pcall/xpcall continuations (which stand in for a
10292 // C-level pcall on the stack) are visible; PUC's traceback enumerates
10293 // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10294 #[derive(Clone, Copy)]
10295 enum VFrame {
10296 Lua(usize),
10297 CPcall,
10298 CXpcall,
10299 }
10300 let mut visible: Vec<VFrame> = Vec::new();
10301 for (fi, cf) in self.frames.iter().enumerate().rev() {
10302 match cf {
10303 CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10304 CallFrame::Cont(nc) => match nc.kind {
10305 ContKind::Pcall => visible.push(VFrame::CPcall),
10306 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10307 _ => {}
10308 },
10309 }
10310 }
10311 // PUC `luaL_traceback` starts enumerating at the given `level` (in
10312 // terms of L1's CallInfo chain). For the running-thread case the C
10313 // frame for debug.traceback itself is level 0 and luna's `visible`
10314 // doesn't include it — so level=1 (PUC default) means "emit from the
10315 // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10316 // the top. level<=0 emits nothing extra here (d_traceback handles the
10317 // "[C]: in function 'traceback'" prefix for level==0 separately).
10318 let skip = (level - 1).max(0) as usize;
10319 if skip >= visible.len() {
10320 return Vec::new();
10321 }
10322 let visible = &visible[skip..];
10323 let total = visible.len();
10324 let mut out = Vec::new();
10325 let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10326 VFrame::Lua(fi) => {
10327 let f = this.frames[fi].lua().expect("Lua frame");
10328 let proto = f.closure.proto;
10329 let src = chunk_display_name(proto.source.as_ptr());
10330 let pc = (f.pc as usize)
10331 .saturating_sub(1)
10332 .min(proto.lines.len().saturating_sub(1));
10333 let line = proto.lines.get(pc).copied().unwrap_or(0);
10334 out.extend_from_slice(b"\n\t");
10335 out.extend_from_slice(src);
10336 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10337 if let Some((namewhat, name)) = this.frame_name(fi) {
10338 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10339 } else if proto.line_defined == 0 {
10340 out.extend_from_slice(b"main chunk");
10341 } else {
10342 out.extend_from_slice(
10343 format!(
10344 "function <{}:{}>",
10345 String::from_utf8_lossy(src),
10346 proto.line_defined
10347 )
10348 .as_bytes(),
10349 );
10350 }
10351 }
10352 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10353 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10354 };
10355 if total <= LEVELS1 + LEVELS2 {
10356 for &v in visible {
10357 emit_frame(&mut out, v, self);
10358 }
10359 } else {
10360 for &v in &visible[..LEVELS1] {
10361 emit_frame(&mut out, v, self);
10362 }
10363 let dropped = total - LEVELS1 - LEVELS2;
10364 out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10365 for &v in &visible[total - LEVELS2..] {
10366 emit_frame(&mut out, v, self);
10367 }
10368 }
10369 out
10370 }
10371}
10372
10373// ────────────────────────────────────────────────────────────────────
10374// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10375//
10376// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10377// trace-meta section after `vm.load`, resolves each entry's
10378// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10379// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10380// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10381// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10382// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10383// mcode without further plumbing — same code path the runtime JIT
10384// uses.
10385//
10386// Why a separate impl block: keeps the AOT API surface (one fn) easy
10387// to locate when grep'ing for `install_aot_trace`, without dragging
10388// the 8500-line `impl Vm` block above.
10389// ────────────────────────────────────────────────────────────────────
10390
10391impl Vm {
10392 /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10393 /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10394 /// fires it at the trace's `head_pc`. This is the runtime install
10395 /// API the deploy-side `luna-runtime-helpers` resolver calls once
10396 /// per AOT-emitted trace meta entry, after looking up `proto` by
10397 /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10398 ///
10399 /// # What this does
10400 ///
10401 /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10402 /// The trace's `entry` fn ptr must already point at runnable
10403 /// machine code (the AOT linker resolved the symbol at link time;
10404 /// the deploy resolver passes the address verbatim).
10405 ///
10406 /// # What this does NOT do
10407 ///
10408 /// - **No deduplication.** Calling twice with the same `head_pc`
10409 /// pushes two entries; the dispatcher's `find` will pick the
10410 /// first match. The deploy resolver is responsible for not
10411 /// double-installing.
10412 /// - **No invalidation of the runtime JIT cache.** If the runtime
10413 /// JIT later records + compiles a trace for the same
10414 /// `(proto, head_pc)`, both coexist on `proto.traces` and the
10415 /// dispatcher's `find` picks whichever appears first. AOT
10416 /// traces install before any runtime recording is possible
10417 /// (resolver runs before `vm.load` returns its first closure),
10418 /// so AOT traces win the race for the same site.
10419 /// - **No coverage gating.** AOT traces are trusted by
10420 /// construction — they were validated at compile time. Setting
10421 /// `dispatchable: false` on the input would silently disable
10422 /// dispatch; the caller controls that flag.
10423 ///
10424 /// # Safety / soundness
10425 ///
10426 /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10427 /// machine code). Soundness contract:
10428 ///
10429 /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10430 /// In the AOT-binary deploy shape this is trivially satisfied —
10431 /// the fn lives in the binary's `.text`.
10432 /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10433 /// what the trace's IR actually compiled against; the dispatcher
10434 /// uses them to marshal `reg_state` in and out without further
10435 /// validation. A mismatch corrupts vm.stack.
10436 ///
10437 /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10438 /// invariants hold; this fn is a plain push — no validation that
10439 /// would slow the dispatcher's hot path either.
10440 pub fn install_aot_trace(
10441 &mut self,
10442 proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10443 trace: crate::jit::trace::CompiledTrace,
10444 ) {
10445 let _ = self; // resolver passes &mut Vm for symmetry with future
10446 // pending-install + hash-walk variants; nothing on `self` to
10447 // mutate today because the install target lives on the Proto.
10448 proto.traces.borrow_mut().push(TArc::new(trace));
10449 }
10450
10451 /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10452 /// reachable from `root` and return `(proto, stable_hash)` pairs
10453 /// for every Proto found. Used by the deploy-side resolver to
10454 /// match AOT-emitted `proto_hash` keys against the freshly
10455 /// `undump`'d chunk's protos.
10456 ///
10457 /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10458 /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10459 /// multiple nested closures (rare; the cache field would catch
10460 /// the closure-side dedup, not the Proto side) is reported once.
10461 ///
10462 /// # Why on `&Vm` and not a free fn
10463 ///
10464 /// Keeps the AOT install API discoverable on the Vm surface —
10465 /// `vm.collect_proto_hashes(root)` reads naturally next to
10466 /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10467 /// any Vm field, so `&self` (read-only) is enough.
10468 pub fn collect_proto_hashes(
10469 &self,
10470 root: crate::runtime::Gc<crate::runtime::function::Proto>,
10471 ) -> Vec<(
10472 crate::runtime::Gc<crate::runtime::function::Proto>,
10473 [u8; 16],
10474 )> {
10475 let _ = self;
10476 let mut out = Vec::new();
10477 let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10478 std::collections::HashSet::new();
10479 let mut queue: std::collections::VecDeque<
10480 crate::runtime::Gc<crate::runtime::function::Proto>,
10481 > = std::collections::VecDeque::new();
10482 queue.push_back(root);
10483 while let Some(p) = queue.pop_front() {
10484 let key = p.as_ptr() as *const _;
10485 if !seen.insert(key) {
10486 continue;
10487 }
10488 out.push((p, p.stable_hash()));
10489 for &child in p.protos.iter() {
10490 queue.push_back(child);
10491 }
10492 }
10493 out
10494 }
10495}