luna_core/vm/exec.rs
1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16 AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17 MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48 /// The GC heap owned by this VM. Embedders normally interact via the
49 /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50 /// the heap directly.
51 pub heap: Heap,
52 stack: Vec<Value>,
53 frames: Vec<CallFrame>,
54 /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55 /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56 /// helpers (debug-asserted on use). NOT consumed by readers yet;
57 /// week 1 is pure scaffold. Week 2-N migrations replace readers
58 /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59 /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60 frames_top: u32,
61 /// open upvalues, sorted ascending by stack slot
62 open_upvals: Vec<(u32, Gc<Upvalue>)>,
63 /// to-be-closed slots, ascending
64 tbc: Vec<u32>,
65 /// logical stack top for multi-result sequences
66 pub(crate) top: u32,
67 globals: Gc<Table>,
68 /// shared metatable for all strings (populated by the string lib, P04)
69 /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70 /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71 /// own. Settable via debug.setmetatable.
72 type_mt: [Option<Gc<Table>>; 5],
73 /// pre-interned metamethod event names, indexed by `Mm`
74 mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75 /// native↔Lua nesting depth (PUC C-stack guard analogue)
76 c_depth: u32,
77 /// number of live pcall/xpcall continuation frames on the running thread
78 /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79 /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80 /// with the coroutine context, since continuations survive a yield.
81 pcall_depth: u32,
82 /// number of non-yieldable C calls in flight on the running thread (PUC's
83 /// `L->nny`). A library callback that runs via synchronous Rust recursion
84 /// (sort comparator, gsub replacement) cannot be continued across a yield,
85 /// so it bumps this for its duration; `coroutine.yield` inside hits the
86 /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87 /// never cross such a call), so it needs no per-thread save/restore.
88 nny: u32,
89 /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90 /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91 /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92 /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93 /// loop)` then matches. PUC tracks this via the soft-cap window
94 /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95 /// scope explicitly.
96 msgh_depth: u32,
97 /// set by a coroutine closing itself (`coroutine.close()` on the running
98 /// thread): the to-be-closed handlers have already run; the thread must now
99 /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100 /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101 /// unwind, so a protecting pcall cannot catch it) the termination.
102 terminating: Option<Option<Value>>,
103 /// xoshiro256** state (math.random)
104 rng: [u64; 4],
105 /// VM creation time (os.clock)
106 started: std::time::Instant,
107 version: LuaVersion,
108 /// error object being threaded through a chain of __close handlers; a GC
109 /// root for the duration (a handler may trigger collection)
110 closing_err: Option<Value>,
111 /// the coroutine whose context is currently live in the fields above;
112 /// `None` while the main thread runs (P05)
113 current: Option<Gc<crate::runtime::Coro>>,
114 /// the main thread's saved execution context while a coroutine runs
115 main_ctx: Option<SavedCtx>,
116 /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117 /// values plus the slot/result-count needed to finish the yielding call on
118 /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119 yielding: Option<(Vec<Value>, u32, i32)>,
120 /// results expected by the in-flight native call (so `yield` knows how many
121 /// values its call site wants when it suspends)
122 native_nresults: i32,
123 /// identity object for the main thread, returned by `coroutine.running`
124 /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125 main_coro: Option<Gc<Coro>>,
126 /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127 /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128 /// switches report the previous one, as PUC does.
129 gc_mode: &'static str,
130 /// the live-register boundary of the running thread for GC rooting (PUC's
131 /// `L->top`): set precisely at each GC safe point so freed temporary
132 /// registers above it are not rooted. Without this the collector roots the
133 /// whole stack window, pinning weak-table values stranded in stale temps
134 /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135 pub(crate) gc_top: u32,
136 /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137 /// is still stop-the-world, so these are stored/returned for API fidelity
138 /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139 /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140 gc_pause: i64,
141 gc_stepmul: i64,
142 gc_stepsize: i64,
143 /// true while `__gc` finalizers are being run, so a finalizer that calls
144 /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145 /// `collectgarbage` yields fail).
146 gc_finalizing: bool,
147 /// C ABI scratch (`capi` module): the host-visible value stack that C
148 /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149 /// Kept here (instead of in a separate `LuaState` wrapper) so the
150 /// trampoline that bridges to a `LuaCFunction` can safely cast the
151 /// Vm pointer it already holds to the public `*mut LuaState` type
152 /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153 pub capi_stack: Vec<crate::runtime::Value>,
154 /// Pinned CString backing the pointer last returned by `lua_tostring`;
155 /// valid until the next `lua_tostring` on the same Vm.
156 pub capi_cstr_pin: Option<std::ffi::CString>,
157 /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158 /// concatenate across continuation calls until a non-`tocont` call
159 /// flushes; the default warnf recognises `@on`/`@off` control messages
160 /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161 /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162 /// keep the older raise semantics).
163 pub(crate) warn_state: WarnState,
164 pub(crate) warn_buf: Vec<u8>,
165 /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166 /// loop decrements once per dispatch turn. When it hits zero the loop
167 /// raises a catchable "instruction budget exceeded" error so the embedder
168 /// can yield control back to its caller (short-script eval, game
169 /// frame budgets). `None` = unbounded; reset on each call via
170 /// `set_instr_budget`.
171 pub(crate) instr_budget: Option<i64>,
172 // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173 // `self.jit` below + `crate::vm::jit_state` for field docs.
174 // (Was: jit_enabled here.)
175 // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176 // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177 // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178 // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179 // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180 // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181 // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182 // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183 // materialize_emit,closure_emit}_count — all moved to JitState.
184 /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185 /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186 /// precompiled chunks (which bypass the parser's depth / opcode
187 /// limits). When `false`, the loader rejects any source whose first
188 /// byte is the bytecode signature `\27` ("`\27Lua`").
189 pub(crate) bytecode_loading: bool,
190 /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191 /// a strictly larger trust surface than luna's own dump format
192 /// (third-party toolchain bugs, malformed chunks, unknown opcode
193 /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194 /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195 /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196 /// Embedder toggles via `set_puc_bytecode_loading`.
197 pub(crate) puc_bytecode_loading: bool,
198 /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199 /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200 /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201 /// source exceeds this, the loader returns the PUC-shaped
202 /// `not enough memory` error before the host allocator is asked to
203 /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204 /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205 /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206 /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207 /// Embedders that genuinely need to load > 256 MiB sources widen the
208 /// cap via [`Vm::set_loader_input_budget`].
209 pub(crate) loader_input_budget: usize,
210 /// In-process log of fully-emitted warnings (each entry = one flushed
211 /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212 /// tests assert what was warned without scraping stderr.
213 pub(crate) warn_log: Vec<Vec<u8>>,
214 /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215 /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216 /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217 /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218 /// shape stub for db.lua :328; if other registry-keyed APIs land later
219 /// they can share this table.
220 pub(crate) registry: Option<Gc<Table>>,
221 /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222 /// registry entry); attached to every file userdata the io library makes
223 pub(crate) file_mt: Option<Gc<Table>>,
224 /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225 pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226 pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227 /// the running thread's debug hook state (`debug.sethook`); per-thread,
228 /// swapped with the execution context on a coroutine resume/yield
229 pub(crate) hook: HookState,
230 /// true while the hook itself runs, so its own execution fires no events
231 /// (PUC clears the mask for the duration)
232 pub(crate) in_hook: bool,
233 /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234 /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235 /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236 /// accumulate the count instead of capping at 1.
237 pub(crate) pending_tailcalls: u32,
238 /// Name of the C native that just propagated an error (captured before
239 /// the native is popped from `running_natives`). Lets a dying coroutine
240 /// preserve `[C]: in function '<name>'` at the top of its traceback
241 /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242 /// luna's native frames are off-stack so we stash the name explicitly.
243 pub(crate) errored_native: Option<String>,
244 /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245 /// (relative to the activation's func slot) and the number transferred.
246 /// Set just before firing a call/return hook, read by `getinfo("r")`.
247 pub(crate) hook_ftransfer: u16,
248 pub(crate) hook_ntransfer: u16,
249 /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250 /// pushed by `push_frame`; `close_slots` sets this before calling a
251 /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252 /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253 pending_tm: Option<&'static str>,
254 /// `true` when the next `push_frame` is the user hook function itself,
255 /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256 /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257 pending_is_hook: bool,
258 /// traceback snapshot taken at the error point (the first `unwind` entry
259 /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260 /// the failed frames are popped — can still see the error point's stack
261 /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262 /// stack intact; we approximate by snapshotting the string and letting
263 /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264 /// `call_value` entry (`public_call_depth == 0`).
265 pub(crate) error_traceback: Option<Vec<u8>>,
266 /// nesting depth of public `call_value` entries (host vs. internal). The
267 /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268 /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269 public_call_depth: u32,
270 /// stack of native (`Value::Native`) closures currently running on the
271 /// Rust call stack. `begin_call` pushes the closure before invoking
272 /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273 /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274 /// caller is C, not Lua) and qualify the running function's name via
275 /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276 pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277 /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278 /// the native's argument-window head and width, so `debug.getlocal`
279 /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280 pub(crate) running_native_slots: Vec<(u32, u32)>,
281 // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282 // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283 // trace_compiler — all moved to JitState. See `jit` below.
284 /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285 /// when `chunk_compiler` / `trace_compiler` are
286 /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287 ///
288 /// `#[doc(hidden)] pub` so the `luna` crate's
289 /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290 /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291 /// field). Not part of the embedder-facing API surface.
292 #[doc(hidden)]
293 pub jit: crate::vm::jit_state::JitState,
294
295 /// B12 host roots — append-only `Vec<Value>` traced as an extra
296 /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297 /// `LuaRoot`) hold indices into this vector so the underlying
298 /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299 ///
300 /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301 /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302 /// the trade-offs between `Drop` plumbing and append-only memory
303 /// growth have a richer ergonomics envelope to live in.
304 pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305 /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306 /// back if non-empty, else extends `host_roots`. Generation
307 /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308 pub(crate) host_roots_free: Vec<u32>,
309
310 /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311 /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312 /// callback). Each entry is one in-flight working buffer; `gc_roots`
313 /// extends with every contained `Value` so a `collectgarbage()`
314 /// inside the comparator cannot free strings/tables snapshotted
315 /// here. Nested sorts push a new buffer on entry, pop on exit
316 /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317 /// regression).
318 pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320 /// v1.3 Phase ML — MacroLua compile-time macro registry.
321 /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322 /// `@if` / `@gensym`) at construction time when `version ==
323 /// LuaVersion::MacroLua`; embedders register custom macros via
324 /// [`Vm::define_macro`]. The expander runs once per `load()` call
325 /// between lexing and parsing (only when `is_macro_lua()`).
326 pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328 /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329 /// by `TypeId::of::<T>()` for embedder types implementing
330 /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331 /// [`Vm::register_userdata`]; metatables are pinned via
332 /// [`Vm::pin_host`] at registration time so the entry's
333 /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334 pub(crate) userdata_metatables:
335 std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337 /// B6 — classification of the most recent error raised on this Vm.
338 /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339 /// at well-known sites (syntax errors, instr-budget trips, native
340 /// callback errors, type errors).
341 pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343 /// B6 — `(source_name, line)` of the most recent error. Set by the
344 /// dispatcher / lexer / parser; cleared when a new call_value
345 /// enters cleanly.
346 pub(crate) last_error_source: Option<(String, u32)>,
347
348 /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349 /// the dispatcher hot loop yields cooperatively (sets
350 /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351 /// to `EvalFuture::poll`) instead of returning a real
352 /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353 /// for the duration of the future; restored to `false` on
354 /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355 /// leave it `false` so v1.0 behavior is preserved exactly.
356 pub(crate) async_mode: bool,
357
358 /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359 /// before driving a slice. The dispatcher itself does not call it
360 /// (the future's poll loop does `wake_by_ref` after observing
361 /// `BudgetExhausted`), but storing the waker keeps the door open
362 /// for Stage 2 async natives to wake the host directly from a
363 /// helper future.
364 pub(crate) async_waker: Option<std::task::Waker>,
365
366 /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367 /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368 /// Default 10_000 (RFC §D5). Tunable via
369 /// [`Vm::set_async_slice`].
370 pub(crate) async_slice_size: i64,
371
372 /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373 /// budget exhaustion fires; checked by `exec_with` (so the
374 /// sentinel propagates without `unwind` running, mirroring
375 /// `yielding.is_some()`) and by `call_value_impl` (so the call
376 /// frames survive for the next poll). Cleared by `drive_one`
377 /// after translating it to `DispatchOutcome::BudgetExhausted`.
378 pub(crate) host_yield_pending: bool,
379
380 /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381 /// when an async-marked [`NativeClosure`] is invoked under
382 /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383 /// mechanism as `host_yield_pending` — see `exec_with` +
384 /// `call_value_impl`), stashes the in-flight future +
385 /// post-completion context here, and surfaces them to
386 /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387 /// once the future is moved out into a
388 /// `DispatchOutcome::AsyncNativeAwaiting`.
389 pub(crate) pending_async_native_fut:
390 Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392 /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393 /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394 /// commit the future's eventual `Ok(nret)` back into the calling
395 /// frame's expected result slots. Recorded by the dispatcher;
396 /// consumed by [`Vm::commit_async_native_result`] after the
397 /// future resolves.
398 pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411 pub func_slot: u32,
412 /// Recorded for parity with the sync native-call path's
413 /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414 /// hook firing + traceback shaping. Not yet read in Stage 2.
415 #[allow(dead_code)]
416 pub nargs: u32,
417 pub nresults: i32,
418 /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419 /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420 /// unread today; carried so an Stage 3 audit can confirm the
421 /// pre-suspend root window matches the post-resume one.
422 #[allow(dead_code)]
423 pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430 /// the hook function (`None` when no hook is installed)
431 pub func: Option<Value>,
432 /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433 /// (Rust first); both can be installed simultaneously, but most
434 /// embedders pick one.
435 pub rust_func: Option<RustDebugHook>,
436 /// LUA_MASKCALL — fire on function entry
437 pub call: bool,
438 /// LUA_MASKRET — fire on function return
439 pub ret: bool,
440 /// LUA_MASKLINE — fire on source-line change
441 pub line: bool,
442 /// LUA_MASKCOUNT — fire every `count_base` instructions
443 pub count: bool,
444 /// instruction count between count events (PUC basehookcount)
445 pub count_base: i64,
446 /// instructions left until the next count event (PUC hookcount)
447 pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459 /// Function entry (`hook_call` analogue).
460 Call,
461 /// Function return (`hook_return` analogue).
462 Return,
463 /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464 TailCall,
465 /// Source-line change (the `u32` is the 1-based line number).
466 Line(u32),
467 /// Instruction count event (fires every `count_base` instructions).
468 Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483 stack: Vec<Value>,
484 frames: Vec<CallFrame>,
485 open_upvals: Vec<(u32, Gc<Upvalue>)>,
486 tbc: Vec<u32>,
487 top: u32,
488 pcall_depth: u32,
489 hook: HookState,
490 /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491 /// the rest of the suspended state so each thread can keep its own
492 /// `setfenv(0, env)` rewire without the swap leaking into another
493 /// thread (5.1 closure.lua :177).
494 globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499 /// caught by a pcall/xpcall continuation; resume running its caller
500 Caught,
501 /// caught by a continuation that was the entry-level activation; these are
502 /// the call's (wrapped) results
503 CaughtReturn(Vec<Value>),
504 /// no protecting continuation up to `entry_depth`; propagate the error
505 Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511 Lua(usize),
512 /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513 /// used to name the native via its invoking call instruction.
514 C(usize),
515 /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516 /// caller's activation, so `debug.getinfo(level)` at this slot returns
517 /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518 /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519 /// both shapes). The index points at the *tail-called* frame whose
520 /// `is_tail` flag induced this synthetic level.
521 Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528 /// index → the looked-up value; newindex → done (raw set performed);
529 /// comparison → the boolean result already known
530 Done(Value),
531 /// a metamethod to call; `recv` is the chain element it was found on (the
532 /// extra args — key / value — are supplied by the caller)
533 Mm { func: Value, recv: Value },
534 /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535 /// carries `__le` — `op_compare` swaps the args and negates the result.
536 /// Lives separate from `Mm` so the synth path can stay yieldable without
537 /// every other Mm caller learning a swap flag they would never set.
538 CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545 Index,
546 NewIndex,
547 Call,
548 ToString,
549 Metatable,
550 Name,
551 Eq,
552 Lt,
553 Le,
554 Concat,
555 Len,
556 Add,
557 Sub,
558 Mul,
559 Div,
560 Mod,
561 Pow,
562 IDiv,
563 BAnd,
564 BOr,
565 BXor,
566 Shl,
567 Shr,
568 Unm,
569 BNot,
570 Close,
571 Gc,
572 Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576 "__index",
577 "__newindex",
578 "__call",
579 "__tostring",
580 "__metatable",
581 "__name",
582 "__eq",
583 "__lt",
584 "__le",
585 "__concat",
586 "__len",
587 "__add",
588 "__sub",
589 "__mul",
590 "__div",
591 "__mod",
592 "__pow",
593 "__idiv",
594 "__band",
595 "__bor",
596 "__bxor",
597 "__shl",
598 "__shr",
599 "__unm",
600 "__bnot",
601 "__close",
602 "__gc",
603 "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614 if version <= LuaVersion::Lua53 {
615 format!("__{tm}")
616 } else if tm == "gc" {
617 "__gc".to_string()
618 } else {
619 tm.to_string()
620 }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626 use crate::vm::isa::Op;
627 Some(match op {
628 Op::Add => "add",
629 Op::Sub => "sub",
630 Op::Mul => "mul",
631 Op::Div => "div",
632 Op::Mod => "mod",
633 Op::Pow => "pow",
634 Op::IDiv => "idiv",
635 Op::BAnd => "band",
636 Op::BOr => "bor",
637 Op::BXor => "bxor",
638 Op::Shl => "shl",
639 Op::Shr => "shr",
640 Op::Unm => "unm",
641 Op::BNot => "bnot",
642 Op::Concat => "concat",
643 Op::Len => "len",
644 Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645 Op::SetField | Op::SetTable | Op::SetI => "newindex",
646 Op::Eq | Op::EqK => "eq",
647 Op::Lt => "lt",
648 Op::Le => "le",
649 _ => return None,
650 })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678 /// `warn` calls are silently dropped (default after `warn("@off")`).
679 Off,
680 /// `warn` calls are delivered to stderr (after `warn("@on")`).
681 On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689 if let Some(s) = payload.downcast_ref::<String>() {
690 return s.clone();
691 }
692 if let Some(s) = payload.downcast_ref::<&'static str>() {
693 return (*s).to_string();
694 }
695 "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702 /// Parse or compile failure.
703 Syntax(SyntaxError),
704 /// Runtime error raised during execution.
705 Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709 fn from(e: SyntaxError) -> Error {
710 Error::Syntax(e)
711 }
712}
713
714impl From<LuaError> for Error {
715 fn from(e: LuaError) -> Error {
716 Error::Runtime(e)
717 }
718}
719
720impl Drop for Vm {
721 fn drop(&mut self) {
722 // state close: run `__gc` for every still-registered finalizable before
723 // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724 // single pass — objects created by a closing finalizer are not
725 // re-finalized (they go to the heap's free list directly).
726 self.heap.queue_all_finalizers();
727 self.run_finalizers();
728 }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742 frames.push(cf);
743 // Shadow maintenance is debug-only: release builds skip the
744 // increment + assertion entirely. The shadow's purpose in Week 1
745 // is to VERIFY the assumed invariant (frames_top == frames.len())
746 // across all push/pop sites; once Week 2+ migrates readers to
747 // consume the shadow, release will run the increment unconditionally.
748 #[cfg(debug_assertions)]
749 {
750 *frames_top += 1;
751 debug_assert_eq!(
752 *frames_top as usize,
753 frames.len(),
754 "P17-D frames_top out of sync after push",
755 );
756 }
757 #[cfg(not(debug_assertions))]
758 let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763 let r = frames.pop();
764 #[cfg(debug_assertions)]
765 {
766 if r.is_some() {
767 *frames_top = frames_top.saturating_sub(1);
768 }
769 debug_assert_eq!(
770 *frames_top as usize,
771 frames.len(),
772 "P17-D frames_top out of sync after pop",
773 );
774 }
775 #[cfg(not(debug_assertions))]
776 let _ = frames_top;
777 r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786 static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787 *PROBE_ON.get_or_init(|| {
788 std::env::var("LUNA_AOT_PROBE")
789 .ok()
790 .filter(|v| !v.is_empty())
791 .is_some()
792 })
793}
794
795impl Vm {
796 /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797 /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798 /// the Vec replacement to keep the shadow valid.
799 #[inline(always)]
800 fn frames_resync(&mut self) {
801 // Debug-only Week 1 — see `frames_push_sync` comment.
802 #[cfg(debug_assertions)]
803 {
804 self.frames_top = self.frames.len() as u32;
805 }
806 }
807
808 // ====================================================================
809 // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810 //
811 // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812 // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813 // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814 // sites to consume them. Phase 4 removes Vec<CallFrame>.
815 //
816 // Preconditions (debug-asserted):
817 // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818 // - self.stack.len() > base + max_stack (caller has grown stack)
819 // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820 // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821 //
822 // No release-build cost when unused (LTO strips dead methods).
823 // ====================================================================
824
825 /// Write a Lua frame's closure pointer into `stack[base-2]`.
826 /// The caller must ensure `base >= 2` and the slot is within the
827 /// stack's allocated range.
828 #[inline]
829 #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830 fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831 debug_assert!(
832 base >= 2,
833 "frame closure slot needs base >= 2; got {}",
834 base
835 );
836 let idx = (base - 2) as usize;
837 debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838 self.stack[idx] = Value::Closure(cl);
839 }
840
841 /// Read a Lua frame's closure pointer from `stack[base-2]`.
842 /// Returns `None` if the slot doesn't hold a closure (caller is
843 /// expected to treat that as a corrupt frame).
844 ///
845 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846 /// to avoid the enum-match cost on the hot path. Tag check via
847 /// 1-byte load + branch + `as_closure_unchecked` payload load.
848 #[inline]
849 #[allow(dead_code)]
850 fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851 debug_assert!(base >= 2);
852 let v = self.stack.get((base - 2) as usize)?;
853 if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854 // SAFETY: tag byte just verified == CLOSURE.
855 Some(unsafe { v.as_closure_unchecked() })
856 } else {
857 None
858 }
859 }
860
861 /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862 /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863 /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864 /// cleanly through the value stack without losing bits.
865 #[inline]
866 #[allow(dead_code)]
867 fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868 debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869 let idx = (base - 1) as usize;
870 debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871 self.stack[idx] = Value::Int(marker.to_raw());
872 }
873
874 /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875 /// `None` if the slot isn't a `Value::Int` (caller treats as a
876 /// corrupt frame); the kind tag itself may still be invalid, in
877 /// which case [`FrameMarker::kind`] returns `None` on the result.
878 ///
879 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880 /// for the tag check + `as_int_unchecked` for the payload load.
881 #[inline]
882 #[allow(dead_code)]
883 fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884 debug_assert!(base >= 1);
885 let v = self.stack.get((base - 1) as usize)?;
886 if v.tag_byte() == crate::runtime::value::tag::INT {
887 // SAFETY: tag byte just verified == INT.
888 Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889 unsafe { v.as_int_unchecked() },
890 ))
891 } else {
892 None
893 }
894 }
895
896 /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897 /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898 /// caller is responsible for the rest of the bring-up.
899 fn new_inner(version: LuaVersion) -> Vm {
900 let mut heap = Heap::new();
901 // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902 // values strongly. gc.lua's "weak tables" section relies on that.
903 heap.no_ephemeron = version <= LuaVersion::Lua51;
904 // PUC 5.3 needs two GC cycles to finalize a table caught in a
905 // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906 // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907 heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908 let globals = heap.new_table();
909 let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911 Vm {
912 heap,
913 stack: Vec::new(),
914 frames: Vec::new(),
915 frames_top: 0,
916 open_upvals: Vec::new(),
917 tbc: Vec::new(),
918 top: 0,
919 globals,
920 type_mt: [None; 5],
921 mm_names,
922 c_depth: 0,
923 pcall_depth: 0,
924 nny: 0,
925 msgh_depth: 0,
926 terminating: None,
927 rng: [0; 4],
928 started: std::time::Instant::now(),
929 version,
930 closing_err: None,
931 current: None,
932 main_ctx: None,
933 yielding: None,
934 native_nresults: -1,
935 main_coro: None,
936 // PUC 5.4+ boots in GENERATIONAL mode (the first
937 // `collectgarbage("generational")` reports "generational"
938 // as the previous mode — v2.14 dialect fixture 5.4/549;
939 // 5.5 behaves the same, probed against lua5.5). luna's
940 // collector is a single incremental engine either way;
941 // this field is the MODE REPORT the stdlib exposes.
942 gc_mode: if version >= crate::version::LuaVersion::Lua54 {
943 "generational"
944 } else {
945 "incremental"
946 },
947 gc_top: 0,
948 gc_pause: 200,
949 gc_stepmul: 100,
950 gc_stepsize: 13,
951 gc_finalizing: false,
952 capi_stack: Vec::new(),
953 capi_cstr_pin: None,
954 warn_state: WarnState::Off,
955 warn_buf: Vec::new(),
956 warn_log: Vec::new(),
957 instr_budget: None,
958 bytecode_loading: true,
959 puc_bytecode_loading: false,
960 loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
961 registry: None,
962 file_mt: None,
963 io_input: None,
964 io_output: None,
965 hook: HookState::default(),
966 in_hook: false,
967 pending_tailcalls: 0,
968 errored_native: None,
969 hook_ftransfer: 0,
970 hook_ntransfer: 0,
971 pending_tm: None,
972 pending_is_hook: false,
973 error_traceback: None,
974 public_call_depth: 0,
975 running_natives: Vec::new(),
976 running_native_slots: Vec::new(),
977 // v1.1 A2 — JIT-specific state factored into `JitState`
978 // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
979 // `install_jit_backend` / `luaL_newstate` swap in
980 // `CraneliftBackend` for callers that want JIT acceleration.
981 jit: crate::vm::jit_state::JitState::with_null_backend(),
982 // v1.1 B12 — host roots ticket pool for the `Lua` facade.
983 host_roots: Vec::new(),
984 // v1.3 Phase ML — MacroLua registry. Pre-populated with
985 // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
986 // when this Vm is constructed under `LuaVersion::MacroLua`.
987 macro_registry: if version == LuaVersion::MacroLua {
988 crate::frontend::macro_expander::MacroRegistry::with_builtins()
989 } else {
990 crate::frontend::macro_expander::MacroRegistry::new()
991 },
992 host_roots_free: Vec::new(),
993 sort_scratch: Vec::new(),
994 // v1.2 Track B — LuaUserdata trait sugar's per-Vm
995 // metatable cache. Populated lazily by register_userdata.
996 userdata_metatables: std::collections::HashMap::new(),
997 // v1.1 B6 — error classification metadata. Defaults to
998 // Runtime; set at known sites (syntax / budget trip /
999 // native error / type error).
1000 last_error_kind: crate::vm::error::LuaErrorKind::default(),
1001 last_error_source: None,
1002 // v1.1 B10 Stage 1 — async embedder fields. Defaults
1003 // preserve sync behavior bit-for-bit (`async_mode = false`
1004 // means the budget hot loop errors out exactly as v1.0).
1005 async_mode: false,
1006 async_waker: None,
1007 async_slice_size: 10_000,
1008 host_yield_pending: false,
1009 // v1.1 B10 Stage 2 — pending async-native state. Empty by
1010 // default; populated only by the dispatcher when an
1011 // async-marked NativeClosure is invoked under async_mode.
1012 pending_async_native_fut: None,
1013 pending_async_native_ctx: None,
1014 }
1015 }
1016
1017 /// Build a fully-loaded Vm — the default for embedders that want PUC's
1018 /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1019 /// followed by `vm.open_all_libs()`.
1020 pub fn new(version: LuaVersion) -> Vm {
1021 let mut vm = Vm::new_minimal(version);
1022 vm.open_all_libs();
1023 vm
1024 }
1025
1026 /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1027 /// that want a sandbox (Redis-style scripts, in-game scripting with
1028 /// a curated API) call this and then `open_base` / `open_math` / etc.
1029 /// selectively. The Vm is otherwise fully initialized (main coroutine,
1030 /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1031 pub fn new_minimal(version: LuaVersion) -> Vm {
1032 let mut vm = Vm::new_inner(version);
1033 let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1034 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1035 unsafe { mc.as_mut() }.status = CoroStatus::Running;
1036 vm.main_coro = Some(mc);
1037 let (a, b) = vm.rng_auto_seed();
1038 vm.rng_seed(a as u64, b as u64);
1039 vm
1040 }
1041
1042 /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1043 /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1044 /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1045 /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1046 /// closures already populated `Proto.jit: JitProtoState::Compiled`
1047 /// does NOT evict those cached entries — call right after
1048 /// construction for a clean swap.
1049 ///
1050 /// Naming: `install_jit_backend` (not `install_default_jit`)
1051 /// because the "default" in luna-core is `NullJitBackend`; the
1052 /// "default JIT" lives in the `luna` crate.
1053 pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1054 where
1055 C: crate::jit::IntChunkCompiler + 'static,
1056 T: crate::jit::TraceCompiler + 'static,
1057 {
1058 self.jit.chunk_compiler = Box::new(chunk);
1059 self.jit.trace_compiler = Box::new(trace);
1060 }
1061
1062 /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1063 /// storage holder. Default is [`crate::jit::NullJitStorage`];
1064 /// the `luna_jit` crate's `install_default_jit` pairs this with
1065 /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1066 /// also install a fresh `CraneliftJitStorage`. Storage holds
1067 /// the per-`Vm` JIT cache + handle collections that used to be
1068 /// `thread_local!`s in `luna_jit::jit_backend`.
1069 ///
1070 /// Idempotency: re-installing storage on a Vm that already
1071 /// holds compiled-trace pointers WILL evict their owners (the
1072 /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1073 /// pages). Call right after construction for a clean swap.
1074 pub fn install_jit_storage<S>(&mut self, storage: S)
1075 where
1076 S: crate::jit::JitStorage + 'static,
1077 {
1078 self.jit.storage = Box::new(storage);
1079 }
1080
1081 /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1082 /// reports "skipped" so every closure stays on the interpreter
1083 /// path, and the trace recorder's compile attempt always returns
1084 /// `None`. Intended for tests that want to verify the trait
1085 /// boundary works in a JIT-free configuration, and for the future
1086 /// `luna-core` build path that ships without Cranelift.
1087 ///
1088 /// Calling this on a Vm whose closures already populated
1089 /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1090 /// cached entries — the dispatcher will still call into them. For
1091 /// a truly JIT-free run, call this immediately after construction.
1092 pub fn install_null_jit(&mut self) {
1093 self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1094 self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1095 }
1096
1097 /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1098 /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1099 /// time instead (`open_base`, `open_math`, `open_table`, …).
1100 pub fn open_all_libs(&mut self) {
1101 self.open_base();
1102 self.open_math();
1103 self.open_table();
1104 self.open_string();
1105 self.open_utf8();
1106 self.open_os_io();
1107 self.open_debug();
1108 self.open_coroutine();
1109 self.open_package();
1110 // PUC 5.2 introduced `bit32`; 5.3 retired it in the manual BUT
1111 // the stock 5.3 build ships -DLUA_COMPAT_5_2, which keeps the
1112 // library loaded. The diff ground truth is the default build
1113 // (v2.14 dialect fixture 5.3/535), so expose it under 5.2 AND
1114 // 5.3; 5.4 dropped the compat default for real.
1115 if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
1116 self.open_bit32();
1117 }
1118 }
1119
1120 /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1121 /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1122 /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1123 /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1124 /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1125 /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1126 /// once per Vm.
1127 pub fn open_base(&mut self) {
1128 crate::vm::builtins::open_base(self);
1129 }
1130 /// Install the `math` standard library.
1131 pub fn open_math(&mut self) {
1132 crate::vm::lib_math::open_math(self);
1133 }
1134 /// Install the `table` standard library.
1135 pub fn open_table(&mut self) {
1136 crate::vm::lib_table::open_table(self);
1137 }
1138 /// Install the `string` standard library (and the shared string metatable).
1139 pub fn open_string(&mut self) {
1140 crate::vm::lib_string::open_string(self);
1141 }
1142 /// Install the `utf8` standard library (5.3+).
1143 pub fn open_utf8(&mut self) {
1144 crate::vm::lib_utf8::open_utf8(self);
1145 }
1146 /// `os` and `io` are merged because file userdata shares state with both
1147 /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1148 /// wraps `os.execute`'s shell).
1149 pub fn open_os_io(&mut self) {
1150 crate::vm::lib_os_io::open_os_io(self);
1151 }
1152 /// Install the `debug` standard library (introspection / hooks). Off by
1153 /// default for sandbox embedders.
1154 pub fn open_debug(&mut self) {
1155 crate::vm::lib_debug::open_debug(self);
1156 }
1157 /// Install the `coroutine` standard library.
1158 pub fn open_coroutine(&mut self) {
1159 crate::vm::lib_coroutine::open_coroutine(self);
1160 }
1161 /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1162 pub fn open_package(&mut self) {
1163 crate::vm::lib_os_io::open_package(self);
1164 }
1165 /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1166 /// ops on 64-bit integers).
1167 pub fn open_bit32(&mut self) {
1168 crate::vm::lib_bit32::open_bit32(self);
1169 }
1170
1171 /// xoshiro256** next.
1172 pub(crate) fn rng_next(&mut self) -> u64 {
1173 let s = &mut self.rng;
1174 let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1175 let t = s[1] << 17;
1176 s[2] ^= s[0];
1177 s[3] ^= s[1];
1178 s[1] ^= s[2];
1179 s[0] ^= s[3];
1180 s[2] ^= t;
1181 s[3] = s[3].rotate_left(45);
1182 result
1183 }
1184
1185 /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1186 pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1187 // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1188 // state), then 16 discards to spread the seed. Matches PUC's exact
1189 // sequence so the low-level conformance test passes.
1190 self.rng = [a, 0xff, b, 0];
1191 for _ in 0..16 {
1192 self.rng_next();
1193 }
1194 }
1195
1196 /// Wall-clock since VM creation (os.clock approximation).
1197 pub(crate) fn uptime(&self) -> std::time::Duration {
1198 self.started.elapsed()
1199 }
1200
1201 /// Entropy for math.randomseed() with no arguments.
1202 pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1203 let t = std::time::SystemTime::now()
1204 .duration_since(std::time::UNIX_EPOCH)
1205 .map(|d| d.as_nanos() as u64)
1206 .unwrap_or(0);
1207 let addr = &self.rng as *const _ as u64;
1208 (t as i64, addr as i64)
1209 }
1210
1211 /// Allocate a native function object (no upvalues): builtin registration.
1212 pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1213 Value::Native(self.heap.new_native(f, Box::new([])))
1214 }
1215
1216 /// Allocate a native function object with captured upvalues.
1217 pub fn native_with(
1218 &mut self,
1219 f: crate::runtime::value::NativeFn,
1220 upvals: Box<[Value]>,
1221 ) -> Value {
1222 Value::Native(self.heap.new_native(f, upvals))
1223 }
1224
1225 /// Install the shared string metatable (string library, P04).
1226 pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1227 self.type_mt[3] = mt;
1228 }
1229
1230 /// The current globals table (`_G` / `_ENV` source for new chunks).
1231 pub fn globals(&self) -> Gc<Table> {
1232 self.globals
1233 }
1234
1235 /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1236 /// Library code that pushes a known number of fresh slots — e.g.
1237 /// `table.unpack` returning N values — consults this to refuse when
1238 /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1239 /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1240 /// coroutine's already-built table eats a few slots, so an unpack of
1241 /// ~lim values can't fit.
1242 pub(crate) fn stack_room(&self) -> i64 {
1243 PUC_MAXSTACK - (self.stack.len() as i64)
1244 }
1245
1246 /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1247 /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1248 /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1249 /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1250 pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1251 self.globals = env;
1252 }
1253
1254 /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1255 /// 5.5). Determines numeric semantics, available standard libraries, and
1256 /// metamethod behavior.
1257 pub fn version(&self) -> LuaVersion {
1258 self.version
1259 }
1260
1261 /// Set a global by name. `v` may be any `IntoValue`: a primitive
1262 /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1263 /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1264 /// `Gc<NativeClosure>` handle.
1265 ///
1266 /// Returns `Err(LuaError)` only if the globals table overflows
1267 /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1268 /// String interning + key construction cannot fail.
1269 ///
1270 /// ```
1271 /// # use luna_core::vm::Vm;
1272 /// # use luna_core::version::LuaVersion;
1273 /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1274 /// vm.set_global("answer", 42).unwrap();
1275 /// vm.set_global("ratio", 0.5_f64).unwrap();
1276 /// vm.set_global("hello", "world").unwrap();
1277 /// let r = vm.eval("return answer, ratio, hello").unwrap();
1278 /// assert_eq!(r.len(), 3);
1279 /// ```
1280 pub fn set_global<V: crate::vm::IntoValue>(
1281 &mut self,
1282 name: &str,
1283 v: V,
1284 ) -> Result<(), LuaError> {
1285 let v = v.into_value(self);
1286 let k = Value::Str(self.heap.intern(name.as_bytes()));
1287 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1288 unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1289 self.heap
1290 .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1291 Ok(())
1292 }
1293
1294 /// Backward write barrier shorthand for native lib code: demote `t` from
1295 /// BLACK back to gray so the next propagate step re-traces its fields.
1296 /// No-op outside Propagate (parent is never BLACK at mutation time).
1297 pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1298 self.heap
1299 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1300 }
1301
1302 /// Forward write barrier shorthand: a closed upvalue is a single-slot
1303 /// container — `barrier_forward` is cheaper than `barrier_back` here.
1304 /// No-op outside Propagate.
1305 pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1306 self.heap
1307 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1308 }
1309
1310 /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1311 /// under non-MacroLua dialects (the macro is stored but the load
1312 /// path only consults the registry when
1313 /// `self.version == LuaVersion::MacroLua`).
1314 ///
1315 /// `name` is stored without the leading `@` — source code writes
1316 /// `@double(x)` to invoke a macro registered as `"double"`.
1317 pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1318 self.macro_registry.register(name, m);
1319 }
1320
1321 /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1322 /// Mostly useful for tests / dogfood resets.
1323 pub fn clear_macros(&mut self) {
1324 self.macro_registry.clear();
1325 }
1326
1327 /// Parse + compile a chunk and close it over the globals table.
1328 pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1329 // Reject oversize input *before* handing the parser/lexer a
1330 // potentially multi-GB slice. The PUC-shaped `not enough memory`
1331 // message keeps `heavy.lua::loadrep` compatibility: that test
1332 // accepts either `string length overflow` or `not enough memory`
1333 // as the failure mode for a feeder loop that outruns the host
1334 // allocator. See `set_loader_input_budget`.
1335 if src.len() > self.loader_input_budget {
1336 return Err(SyntaxError {
1337 line: 0,
1338 msg: b"not enough memory".to_vec(),
1339 });
1340 }
1341 // a precompiled (binary) chunk is undumped; source is parsed + compiled
1342 let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1343 if is_bytecode && !self.bytecode_loading {
1344 return Err(SyntaxError {
1345 line: 0,
1346 msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1347 });
1348 }
1349 let proto = if is_bytecode {
1350 let allow_puc = self.puc_bytecode_loading;
1351 crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1352 |msg| SyntaxError {
1353 line: 0,
1354 msg: msg.into_bytes(),
1355 },
1356 )?
1357 } else if self.version.is_macro_lua() {
1358 // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1359 // token vec, run the macro expander pre-pass against the
1360 // per-Vm registry, then hand the rewritten stream to
1361 // `parse_tokens`. The AST + compiler are dialect-agnostic
1362 // because by this point all `@`/quote tokens are gone.
1363 let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1364 let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1365 loop {
1366 let t = lexer.next_token()?;
1367 let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1368 raw.push(t);
1369 if eof {
1370 break;
1371 }
1372 }
1373 // Drop the trailing Eof — expander operates on the body and
1374 // `parse_tokens` reinserts Eof when it runs out of tokens.
1375 raw.pop();
1376 let expanded = self.macro_registry.expand(raw)?;
1377 let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1378 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1379 } else {
1380 let ast = parse(src, self.version)?;
1381 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1382 };
1383 // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1384 // upvalue with the globals table when the closure has *exactly* one
1385 // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1386 // function with two-or-more upvalues keeps every cell at nil; the
1387 // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1388 // :293's `assert(x() == nil)` pins this contract.
1389 let n = proto.upvals.len();
1390 let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1391 if n == 0 {
1392 // synthetic main chunk has no declared upvalues, but the engine
1393 // still expects at least one cell so the host can probe via
1394 // `debug.upvalueid` etc. Match the historical luna shape.
1395 ups.push(
1396 self.heap
1397 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1398 );
1399 } else if n == 1 {
1400 ups.push(
1401 self.heap
1402 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1403 );
1404 } else {
1405 for _ in 0..n {
1406 ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1407 }
1408 }
1409 Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1410 }
1411
1412 /// Compile and run `src` as an anonymous chunk; return its results.
1413 /// Source name in the traceback is `"=eval"`. Syntax errors are
1414 /// surfaced as `LuaError` carrying the formatted PUC-style message
1415 /// (interned through the heap so the error value composes with
1416 /// `pcall` / `error_text` like any runtime error).
1417 pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1418 self.eval_chunk(src, "=eval")
1419 }
1420
1421 /// Render an error value for messages/tests. Non-string errors —
1422 /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1423 /// (`"(error object is a table value)"`); embedders that need
1424 /// structured payloads should inspect `e.0` directly. Errors whose
1425 /// text starts with `"native panic:"` indicate a Rust panic
1426 /// crossed `catch_unwind` — the Vm may be inconsistent and should
1427 /// be dropped (do not reuse).
1428 pub fn error_text(&self, e: &LuaError) -> String {
1429 match e.0 {
1430 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1431 v => format!("(error object is a {} value)", v.type_name()),
1432 }
1433 }
1434
1435 /// Render an error value the way PUC's standalone `msghandler`
1436 /// does (lua.c): strings pass through, numbers stringify, and any
1437 /// other object is given a chance at its `__tostring` metamethod
1438 /// (the result must be a string) before collapsing to the
1439 /// `"(error object is a … value)"` tag. Needs `&mut self` because
1440 /// `__tostring` runs arbitrary Lua — `error_text` remains the
1441 /// non-executing variant (v2.14 CV.2, fixture 5.5/321).
1442 pub fn error_display(&mut self, e: &LuaError) -> String {
1443 match e.0 {
1444 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1445 v @ (Value::Int(_) | Value::Float(_)) => {
1446 String::from_utf8_lossy(&self.tostring_basic(v)).into_owned()
1447 }
1448 v => {
1449 let mm = self.get_mm(v, Mm::ToString);
1450 if !mm.is_nil()
1451 && let Ok(r) = self.call_value(mm, &[v])
1452 && let Some(Value::Str(s)) = r.first()
1453 {
1454 return String::from_utf8_lossy(s.as_bytes()).into_owned();
1455 }
1456 format!("(error object is a {} value)", v.type_name())
1457 }
1458 }
1459 }
1460
1461 /// Call any callable value from the host (or from natives like pcall).
1462 pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1463 // host-level entry (no enclosing exec): drop any error state from a
1464 // prior call that propagated uncaught (`error_traceback` would
1465 // otherwise leak into the next debug.traceback call).
1466 if self.public_call_depth == 0 {
1467 self.error_traceback = None;
1468 }
1469 self.public_call_depth += 1;
1470 // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1471 // chunk whose body fits the S1 int-arith whitelist short-circuits
1472 // the whole interpreter dispatch and runs straight through the
1473 // mmap'd native code. The lookup is one Cell::get + one match —
1474 // the slow path (compile attempt on first reach) is paid once per
1475 // Proto.
1476 if args.is_empty()
1477 && let Value::Closure(cl) = f
1478 && let Some(vs) = self.try_jit_call(cl)
1479 {
1480 self.public_call_depth -= 1;
1481 return Ok(vs);
1482 }
1483 let r = self.call_value_impl(f, args, true);
1484 self.public_call_depth -= 1;
1485 r
1486 }
1487
1488 /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1489 /// `Some(values)` when the cached native fn is callable for a
1490 /// zero-arg call. (Non-zero-arg dispatch is handled by
1491 /// `try_jit_call_op` from inside `begin_call`.)
1492 fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1493 use crate::runtime::function::JitProtoState;
1494 if !self.jit.enabled {
1495 return None;
1496 }
1497 let proto = cl.proto;
1498 if let JitProtoState::Untried = proto.jit.get() {
1499 self.populate_jit_cache(proto);
1500 }
1501 match proto.jit.get() {
1502 JitProtoState::Compiled {
1503 entry,
1504 num_args: 0,
1505 returns_one,
1506 arg_float_mask: _,
1507 arg_table_mask: _,
1508 ret_is_float,
1509 ret_is_table,
1510 } => {
1511 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1512 let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1513 // P11-S5c / S5d.J — install the active Vm + closure
1514 // for any Rust helper the JIT'd code may call (e.g.
1515 // `luna_jit_new_table`, `luna_jit_upval_get`) via
1516 // cranelift `Linkage::Import`. RAII clear on return.
1517 // Chunks with no upvalue reads don't touch the closure
1518 // slot, paying nothing.
1519 // v1.1 A1 Session A — route through chunk_compiler so
1520 // the NullJitBackend path stays inert. Raw-ptr arg
1521 // avoids the &mut self borrow conflict against the
1522 // shared self.jit.chunk_compiler read.
1523 let vm_ptr: *mut Vm = self;
1524 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1525 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1526 let r = unsafe { f() };
1527 drop(_jit_vm_guard);
1528 // P11-S5d.E' — a JIT helper may have detected a metatable
1529 // on a table operand and parked a deopt request here.
1530 // Discard the sentinel value and return None so the caller
1531 // re-runs the call through the interpreter, which honours
1532 // __index/__newindex.
1533 if self.jit.pending_err.take().is_some() {
1534 return None;
1535 }
1536 Some(if returns_one {
1537 let v = if ret_is_float {
1538 Value::Float(f64::from_bits(r as u64))
1539 } else if ret_is_table {
1540 Value::Table(crate::runtime::Gc::from_ptr(
1541 r as *mut crate::runtime::Table,
1542 ))
1543 } else {
1544 Value::Int(r)
1545 };
1546 vec![v]
1547 } else {
1548 Vec::new()
1549 })
1550 }
1551 // Non-zero-arg Compiled state: call_value's empty-args
1552 // fast path can't drive it. Op::Call handles those.
1553 JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1554 }
1555 }
1556
1557 /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1558 /// `Compiled { … }` or `Failed`; idempotent on already-populated
1559 /// states (call sites guard with a get before invoking).
1560 ///
1561 /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1562 /// `proto.code`. Compiled artefacts live in the thread-local
1563 /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1564 /// `Vm`s loading the same source skip the cranelift compile step
1565 /// entirely.
1566 fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1567 use crate::runtime::function::JitProtoState;
1568 let version = self.version();
1569 let pre53 = version <= crate::version::LuaVersion::Lua53;
1570 // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1571 // are Float). The JIT's `GetUpval` ValueRead path uses this
1572 // to default-pin upvalue reads to Float without a tag check.
1573 let float_only = version <= crate::version::LuaVersion::Lua52;
1574 // v2.0 Track J sub-step J-B — split-borrow JitState so the
1575 // trait method can take `&mut dyn JitStorage` without
1576 // double-borrowing self.jit.
1577 let jit = &mut self.jit;
1578 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1579 match jit
1580 .chunk_compiler
1581 .try_compile(storage, proto, pre53, float_only)
1582 {
1583 crate::jit::CompileResult::Compiled {
1584 entry,
1585 num_args,
1586 returns_one,
1587 arg_float_mask,
1588 arg_table_mask,
1589 ret_is_float,
1590 ret_is_table,
1591 } => {
1592 proto.jit.set(JitProtoState::Compiled {
1593 entry,
1594 num_args,
1595 returns_one,
1596 arg_float_mask,
1597 arg_table_mask,
1598 ret_is_float,
1599 ret_is_table,
1600 });
1601 }
1602 crate::jit::CompileResult::Skipped => {
1603 proto.jit.set(JitProtoState::Failed);
1604 }
1605 }
1606 }
1607
1608 /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1609 /// before `push_frame`. Returns `true` when the call was handled
1610 /// in-place (no new Lua frame). Constraints: every arg slot must
1611 /// be `Value::Int`, the cached arity must match the call site's
1612 /// `nargs`, the host wanted-count `wanted` is honoured by
1613 /// `finish_results`. Also bails when a debug hook is armed —
1614 /// JIT'd code does not fire line / call / return hooks, so any
1615 /// active hook makes the interpreter the source of truth.
1616 fn try_jit_call_op(
1617 &mut self,
1618 cl: Gc<LuaClosure>,
1619 func_slot: u32,
1620 nargs: u32,
1621 wanted: i32,
1622 ) -> bool {
1623 use crate::runtime::function::JitProtoState;
1624 if !self.jit.enabled {
1625 return false;
1626 }
1627 // Any active debug hook means the interpreter has to run the
1628 // call so the hook gets the expected events.
1629 if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1630 return false;
1631 }
1632 let proto = cl.proto;
1633 if let JitProtoState::Untried = proto.jit.get() {
1634 self.populate_jit_cache(proto);
1635 }
1636 let JitProtoState::Compiled {
1637 entry,
1638 num_args,
1639 returns_one,
1640 arg_float_mask,
1641 arg_table_mask,
1642 ret_is_float,
1643 ret_is_table,
1644 } = proto.jit.get()
1645 else {
1646 return false;
1647 };
1648 if num_args as u32 != nargs {
1649 return false;
1650 }
1651 // Pack args into i64 bit-patterns per the per-slot expected
1652 // kind. A Float-typed slot accepts Value::Float verbatim and
1653 // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1654 // accepts only Value::Table and passes the raw Gc ptr; an
1655 // Int-typed slot accepts only Value::Int. Any other shape
1656 // bails to the interpreter so the call's actual dynamics
1657 // (metamethod dispatch / type-coerce) take over.
1658 let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1659 [0; crate::jit::MAX_JIT_ARITY as usize];
1660 for i in 0..num_args as usize {
1661 let v = self.stack[(func_slot + 1) as usize + i];
1662 let want_float = (arg_float_mask >> i) & 1 == 1;
1663 let want_table = (arg_table_mask >> i) & 1 == 1;
1664 args[i] = match (want_table, want_float, v) {
1665 (true, _, Value::Table(t)) => t.as_ptr() as i64,
1666 (false, false, Value::Int(x)) => x,
1667 (false, true, Value::Float(f)) => f.to_bits() as i64,
1668 (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1669 _ => return false,
1670 };
1671 }
1672 // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1673 // matching guard in `try_jit_call`.
1674 // v1.1 A1 Session A — route through chunk_compiler.
1675 let vm_ptr: *mut Vm = self;
1676 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1677 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1678 let r = unsafe {
1679 match num_args {
1680 0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1681 1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1682 2 => {
1683 (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1684 }
1685 3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1686 args[0], args[1], args[2],
1687 ),
1688 4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1689 args[0], args[1], args[2], args[3],
1690 ),
1691 _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1692 }
1693 };
1694 drop(_jit_vm_guard);
1695 // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1696 // flagged a metatable on a table operand; bail to the interpreter
1697 // so `push_frame` runs the call from scratch.
1698 if self.jit.pending_err.take().is_some() {
1699 return false;
1700 }
1701 // Write result at func_slot, replacing the closure value, then
1702 // hand to finish_results to pad/truncate per the call site's
1703 // `wanted` count.
1704 if returns_one {
1705 let v = if ret_is_float {
1706 Value::Float(f64::from_bits(r as u64))
1707 } else if ret_is_table {
1708 Value::Table(crate::runtime::Gc::from_ptr(
1709 r as *mut crate::runtime::Table,
1710 ))
1711 } else {
1712 Value::Int(r)
1713 };
1714 self.stack[func_slot as usize] = v;
1715 self.finish_results(func_slot, 1, wanted);
1716 } else {
1717 self.finish_results(func_slot, 0, wanted);
1718 }
1719 true
1720 }
1721
1722 /// `call_value` with control over the `from_c` debug boundary. A `__close`
1723 /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1724 /// invokes it inside that ci), so it is called with `from_c = false`: its
1725 /// debug parent is the closing function, not a synthetic C level.
1726 fn call_value_impl(
1727 &mut self,
1728 f: Value,
1729 args: &[Value],
1730 from_c: bool,
1731 ) -> Result<Vec<Value>, LuaError> {
1732 if self.c_depth >= MAX_C_DEPTH {
1733 return Err(self.rt_err("stack overflow"));
1734 }
1735 self.c_depth += 1;
1736 let func_slot = self.stack.len() as u32;
1737 self.stack.push(f);
1738 self.stack.extend_from_slice(args);
1739 self.top = self.stack.len() as u32;
1740 let r = self.call_at(func_slot, args.len() as u32, from_c);
1741 self.c_depth -= 1;
1742 if r.is_err()
1743 && self.yielding.is_none()
1744 && self.terminating.is_none()
1745 && !self.host_yield_pending
1746 && self.pending_async_native_fut.is_none()
1747 {
1748 // A `coroutine.yield` in flight raises a sentinel error to unwind the
1749 // Rust stack, but the suspended coroutine's frames/registers (which
1750 // sit at/above `func_slot`) must survive for the next resume — so we
1751 // only truncate on a real error. A self-close termination is in the
1752 // same boat: the dying thread's state is discarded wholesale.
1753 // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1754 // the same boat as `yielding`: the next `EvalFuture::poll`
1755 // resumes the same call, so the in-flight frames must
1756 // survive.
1757 self.stack.truncate(func_slot as usize);
1758 self.top = func_slot;
1759 }
1760 r
1761 }
1762
1763 /// Invoke `f` with the running thread marked non-yieldable for the duration
1764 /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1765 /// boundary and errors instead of suspending. Used by library callbacks
1766 /// (sort comparator, gsub replacement) that run via synchronous Rust
1767 /// recursion and so could not be re-entered after a yield.
1768 pub(crate) fn call_noyield(
1769 &mut self,
1770 f: Value,
1771 args: &[Value],
1772 ) -> Result<Vec<Value>, LuaError> {
1773 self.nny += 1;
1774 let r = self.call_value(f, args);
1775 self.nny -= 1;
1776 r
1777 }
1778
1779 // ---- coroutines (P05) ----
1780
1781 pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1782 // The new coroutine inherits the creating thread's current globals
1783 // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1784 // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1785 // it here picks the creator regardless of which coro is running.
1786 self.heap.new_coro(body, self.globals)
1787 }
1788
1789 /// Is `t` the thread whose context is currently live in the VM?
1790 pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1791 match (self.current, t) {
1792 (None, None) => true,
1793 (Some(a), Some(b)) => a.ptr_eq(b),
1794 _ => false,
1795 }
1796 }
1797
1798 /// Read an open-upvalue slot from its owning thread's stack (the live VM
1799 /// stack if that thread is current, else its saved context).
1800 #[doc(hidden)]
1801 pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1802 let s = slot as usize;
1803 if self.is_current_thread(thread) {
1804 self.stack[s]
1805 } else {
1806 match thread {
1807 Some(co) => co.stack[s],
1808 None => self.main_ctx.as_ref().expect("main context").stack[s],
1809 }
1810 }
1811 }
1812
1813 fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1814 let s = slot as usize;
1815 if self.is_current_thread(thread) {
1816 self.stack[s] = v;
1817 } else {
1818 match thread {
1819 Some(co) => {
1820 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1821 unsafe { co.as_mut() }.stack[s] = v;
1822 // co.stack is traced by Coro::trace; demote co back to
1823 // gray so propagate re-traces this slot if it was
1824 // already black.
1825 self.heap
1826 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1827 }
1828 None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1829 }
1830 }
1831 }
1832
1833 /// Whether `co` is the main thread's identity object.
1834 pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1835 self.main_coro.is_some_and(|m| m.ptr_eq(co))
1836 }
1837
1838 /// The status of `co` from the caller's view. The main thread's identity
1839 /// object has no stored status — it is "running" when nothing else runs,
1840 /// else "normal" (it resumed the active coroutine).
1841 pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1842 if self.is_main_coro(co) {
1843 if self.current.is_none() {
1844 CoroStatus::Running
1845 } else {
1846 CoroStatus::Normal
1847 }
1848 } else {
1849 co.status
1850 }
1851 }
1852
1853 /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1854 /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1855 /// context. Handlers see the coroutine's death error (if it died by error)
1856 /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1857 /// with error `e` and no handler overrode it; `Err` means a handler raised.
1858 pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1859 // re-entrant close: a __close handler closed its own coroutine while the
1860 // outer close is mid-flight (its context is live). Report success and let
1861 // the outer close finish — re-entering the swap would corrupt the stack.
1862 if self.current.is_some_and(|c| c.ptr_eq(co)) {
1863 return Ok(None);
1864 }
1865 // A chain of coroutines whose `__close` handlers each close the previous
1866 // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1867 // The calling handler's `call_value` has already pushed `c_depth` to the
1868 // cap, so here it reads as full first — report PUC's "C stack overflow"
1869 // before the next handler call would surface the plainer "stack overflow".
1870 if self.c_depth >= MAX_C_DEPTH {
1871 return Err(self.rt_err("C stack overflow"));
1872 }
1873 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874 let death_err = unsafe { co.as_mut() }.error_value.take();
1875 // swap the caller's live context out (into a GC-rooted home) and the
1876 // coroutine's in, mirroring resume_coro, so the __close handlers run on
1877 // the coroutine's stack while everything stays rooted.
1878 let resumer = self.current;
1879 let rctx = self.take_ctx();
1880 match resumer {
1881 Some(r) => {
1882 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1883 let m = unsafe { r.as_mut() };
1884 m.stack = rctx.stack;
1885 m.frames = rctx.frames;
1886 m.open_upvals = rctx.open_upvals;
1887 m.tbc = rctx.tbc;
1888 m.top = rctx.top;
1889 m.pcall_depth = rctx.pcall_depth;
1890 }
1891 None => self.main_ctx = Some(rctx),
1892 }
1893 self.load_coro_ctx(co);
1894 self.current = Some(co);
1895 let result = self.close_slots(0, death_err);
1896 // discard the (now-closed) coroutine context and restore the caller
1897 let _ = self.take_ctx();
1898 match resumer {
1899 Some(r) => {
1900 self.load_coro_ctx(r);
1901 self.current = Some(r);
1902 }
1903 None => {
1904 let m = self.main_ctx.take().expect("main context saved");
1905 self.put_ctx(m);
1906 self.current = None;
1907 }
1908 }
1909 {
1910 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1911 let m = unsafe { co.as_mut() };
1912 m.status = CoroStatus::Dead;
1913 m.stack = Vec::new();
1914 m.frames = Vec::new();
1915 m.open_upvals = Vec::new();
1916 m.tbc = Vec::new();
1917 m.top = 0;
1918 m.pcall_depth = 0;
1919 m.resume_at = None;
1920 m.error_value = None;
1921 }
1922 result.map(|()| death_err)
1923 }
1924
1925 /// `coroutine.running`: the running thread plus whether it is the main one.
1926 pub(crate) fn running_thread(&self) -> (Value, bool) {
1927 match self.current {
1928 Some(co) => (Value::Coro(co), false),
1929 None => (Value::Coro(self.main_coro.expect("main coro")), true),
1930 }
1931 }
1932
1933 /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1934 /// thread) can yield. The main thread never can; any other coroutine can
1935 /// unless it is dead.
1936 pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1937 match co {
1938 Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1939 // the running thread can yield only outside any non-yieldable C call
1940 None => self.current.is_some() && self.nny == 0,
1941 }
1942 }
1943
1944 /// Why `coroutine.yield` may not suspend the running thread right now, as a
1945 /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1946 /// from "inside an unyieldable C call" (sort/gsub callback).
1947 pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1948 if self.current.is_none() {
1949 Some("attempt to yield from outside a coroutine")
1950 } else if self.nny > 0 {
1951 Some("attempt to yield across a C-call boundary")
1952 } else {
1953 None
1954 }
1955 }
1956
1957 /// The coroutine whose context is currently live (`None` on the main thread).
1958 pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1959 self.current
1960 }
1961
1962 /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1963 /// its pending `__close` handlers, then signal termination. The handlers run
1964 /// here, in place, with the thread still non-yieldable (a yield in one hits
1965 /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1966 /// way a yield does — `exec_with` propagates it past any protecting pcall
1967 /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1968 /// clean death (or, if a handler raised, the coroutine's error).
1969 pub(crate) fn close_running(&mut self) -> LuaError {
1970 let death = match self.close_slots(0, None) {
1971 Ok(()) => None,
1972 Err(e) => Some(e.0),
1973 };
1974 self.terminating = Some(death);
1975 LuaError(Value::Nil)
1976 }
1977
1978 /// `coroutine.status` as seen by the caller.
1979 pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1980 match self.effective_coro_status(co) {
1981 CoroStatus::Suspended => "suspended",
1982 CoroStatus::Running => "running",
1983 CoroStatus::Normal => "normal",
1984 CoroStatus::Dead => "dead",
1985 }
1986 }
1987
1988 fn take_ctx(&mut self) -> SavedCtx {
1989 let saved = SavedCtx {
1990 stack: std::mem::take(&mut self.stack),
1991 frames: std::mem::take(&mut self.frames),
1992 open_upvals: std::mem::take(&mut self.open_upvals),
1993 tbc: std::mem::take(&mut self.tbc),
1994 top: self.top,
1995 pcall_depth: self.pcall_depth,
1996 hook: self.hook,
1997 globals: self.globals,
1998 };
1999 self.frames_resync(); // P17-D Week 1 — frames now empty.
2000 saved
2001 }
2002
2003 fn put_ctx(&mut self, c: SavedCtx) {
2004 self.stack = c.stack;
2005 self.frames = c.frames;
2006 self.open_upvals = c.open_upvals;
2007 self.tbc = c.tbc;
2008 self.top = c.top;
2009 self.pcall_depth = c.pcall_depth;
2010 self.hook = c.hook;
2011 self.globals = c.globals;
2012 self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
2013 }
2014
2015 /// Move a coroutine's saved context into the live VM fields.
2016 fn load_coro_ctx(&mut self, co: Gc<Coro>) {
2017 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2018 let m = unsafe { co.as_mut() };
2019 self.stack = std::mem::take(&mut m.stack);
2020 self.frames = std::mem::take(&mut m.frames);
2021 self.open_upvals = std::mem::take(&mut m.open_upvals);
2022 self.tbc = std::mem::take(&mut m.tbc);
2023 self.top = m.top;
2024 self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
2025 self.pcall_depth = m.pcall_depth;
2026 self.hook = m.hook;
2027 self.globals = m.globals;
2028 }
2029
2030 /// Save the live VM context back into a coroutine object.
2031 fn store_coro_ctx(&mut self, co: Gc<Coro>) {
2032 let c = self.take_ctx();
2033 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2034 let m = unsafe { co.as_mut() };
2035 m.stack = c.stack;
2036 m.frames = c.frames;
2037 m.open_upvals = c.open_upvals;
2038 m.tbc = c.tbc;
2039 m.top = c.top;
2040 m.pcall_depth = c.pcall_depth;
2041 m.hook = c.hook;
2042 m.globals = c.globals;
2043 // bulk-overwrite of every collectable field traced by Coro::trace:
2044 // demote the coro back to gray so propagate re-traces its new state.
2045 self.heap
2046 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2047 }
2048
2049 /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2050 /// or errors. Ok(values) carries yielded or returned values; Err carries an
2051 /// error raised inside the coroutine (the coroutine becomes dead).
2052 pub(crate) fn resume_coro(
2053 &mut self,
2054 co: Gc<Coro>,
2055 args: Vec<Value>,
2056 ) -> Result<Vec<Value>, LuaError> {
2057 match co.status {
2058 CoroStatus::Suspended => {}
2059 CoroStatus::Dead => return Err(self.plain_err("cannot resume dead coroutine")),
2060 _ => return Err(self.plain_err("cannot resume non-suspended coroutine")),
2061 }
2062 if self.c_depth >= MAX_C_DEPTH {
2063 return Err(self.plain_err("C stack overflow"));
2064 }
2065 self.c_depth += 1;
2066 let resumer = self.current;
2067 // save the resumer's live context away
2068 let rctx = self.take_ctx();
2069 match resumer {
2070 Some(r) => {
2071 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2072 let m = unsafe { r.as_mut() };
2073 m.stack = rctx.stack;
2074 m.frames = rctx.frames;
2075 m.open_upvals = rctx.open_upvals;
2076 m.tbc = rctx.tbc;
2077 m.top = rctx.top;
2078 m.pcall_depth = rctx.pcall_depth;
2079 m.globals = rctx.globals;
2080 m.status = CoroStatus::Normal;
2081 // bulk overwrite of every traced field on r — mirror
2082 // store_coro_ctx's barrier_back so propagate re-traces r.
2083 self.heap
2084 .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085 }
2086 None => self.main_ctx = Some(rctx),
2087 }
2088 // swap the coroutine in
2089 self.load_coro_ctx(co);
2090 {
2091 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2092 let m = unsafe { co.as_mut() };
2093 m.status = CoroStatus::Running;
2094 m.resumer = resumer;
2095 }
2096 // co.resumer is a traced Gc field; barrier_back covers the new
2097 // resumer reference and any future field writes during this call.
2098 self.heap
2099 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2100 self.current = Some(co);
2101
2102 // drive it
2103 let drive = if co.started {
2104 self.coro_continue(&args)
2105 } else {
2106 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2107 unsafe { co.as_mut() }.started = true;
2108 self.coro_first(co.body, &args)
2109 };
2110
2111 // classify: a self-close termination or a pending yield each win over
2112 // the (sentinel) error they raised to unwind the Rust stack.
2113 let (outcome, status) = if let Some(death) = self.terminating.take() {
2114 // the coroutine closed itself: it dies now, cleanly or with the
2115 // error a `__close` handler raised.
2116 match death {
2117 Some(e) => {
2118 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2119 unsafe { co.as_mut() }.error_value = Some(e);
2120 self.heap
2121 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2122 (Err(LuaError(e)), CoroStatus::Dead)
2123 }
2124 None => (Ok(Vec::new()), CoroStatus::Dead),
2125 }
2126 } else {
2127 match self.yielding.take() {
2128 Some((vals, fslot, nres)) => {
2129 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2130 unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2131 (Ok(vals), CoroStatus::Suspended)
2132 }
2133 None => {
2134 // died: a return is clean, an error is remembered so a later
2135 // `coroutine.close` can report it (PUC lua_closethread).
2136 // Capture the error-point traceback (set by `unwind` before
2137 // popping the failing frames) and prepend a synthetic
2138 // top entry for the C native that initiated the error
2139 // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2140 // on the dead coroutine still shows the error site
2141 // (db.lua :848 family).
2142 if drive.is_err() {
2143 let mut tb = self.error_traceback.take().unwrap_or_default();
2144 if let Some(nm) = self.errored_native.take() {
2145 let mut prefixed: Vec<u8> = Vec::new();
2146 prefixed.extend_from_slice(
2147 format!("\n\t[C]: in function '{nm}'").as_bytes(),
2148 );
2149 prefixed.extend(tb);
2150 tb = prefixed;
2151 }
2152 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2153 unsafe { co.as_mut() }.error_traceback = Some(tb);
2154 }
2155 if let Err(e) = drive {
2156 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2157 unsafe { co.as_mut() }.error_value = Some(e.0);
2158 self.heap
2159 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2160 }
2161 (drive, CoroStatus::Dead)
2162 }
2163 }
2164 };
2165
2166 // save the coroutine's context back and restore the resumer
2167 self.store_coro_ctx(co);
2168 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2169 unsafe { co.as_mut() }.status = status;
2170 match resumer {
2171 Some(r) => {
2172 self.load_coro_ctx(r);
2173 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2174 unsafe { r.as_mut() }.status = CoroStatus::Running;
2175 self.current = Some(r);
2176 }
2177 None => {
2178 let m = self.main_ctx.take().expect("main context saved");
2179 self.put_ctx(m);
2180 self.current = None;
2181 }
2182 }
2183 self.c_depth -= 1;
2184 outcome
2185 }
2186
2187 /// First resume: install the body function at slot 0 and run.
2188 fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2189 self.stack.clear();
2190 self.stack.push(body);
2191 self.stack.extend_from_slice(args);
2192 self.top = self.stack.len() as u32;
2193 match self.begin_call(0, Some(args.len() as u32), -1, true) {
2194 Ok(true) => self.exec_with(1),
2195 Ok(false) => Ok(self.take_results(0)),
2196 Err(e) => Err(e),
2197 }
2198 }
2199
2200 /// Resume after a yield: deliver `args` as the results of the call that
2201 /// yielded, then continue the suspended thread.
2202 fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2203 let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2204 let n = args.len() as u32;
2205 // Restore the full register window of the suspended top frame: a yield
2206 // that unwound through a native (call_value) may have left the stack
2207 // shorter than the frame needs. `base + max_stack` is what push_frame
2208 // allocates; `fslot + n` covers the delivered yield results.
2209 let frame_need = self
2210 .frames
2211 .last()
2212 .and_then(CallFrame::lua)
2213 .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2214 .unwrap_or(0);
2215 let need = frame_need.max((fslot + n) as usize);
2216 if self.stack.len() < need {
2217 self.stack.resize(need, Value::Nil);
2218 }
2219 for (i, &v) in args.iter().enumerate() {
2220 self.stack[fslot as usize + i] = v;
2221 }
2222 self.finish_results(fslot, n, nres);
2223 // the suspended `coroutine.yield` (a C call) now returns its resume
2224 // values: fire the matching "return" hook PUC defers until the resume.
2225 self.hook_return(true, 1, n)?;
2226 self.exec_with(1)
2227 }
2228
2229 /// `coroutine.yield`: suspend the running coroutine, recording where to
2230 /// resume. Errors if called outside a coroutine. Returns a sentinel error
2231 /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2232 pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2233 let nres = self.native_nresults;
2234 self.yielding = Some((vals, func_slot, nres));
2235 // value is irrelevant: resume_coro consults `self.yielding`, not this
2236 LuaError(Value::Nil)
2237 }
2238
2239 /// Install or clear the debug hook on the running thread (`debug.sethook`
2240 /// without a thread argument). Arms the calling frame's `oldpc` to the
2241 /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2242 /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2243 /// native return: the very next traceexec compares against the sethook
2244 /// CALL's line. When the install statement and the following statement are
2245 /// on different source lines (db.lua :322), `changedline` fires for that
2246 /// first statement; when they share a line (db.lua :25 wrapper), they do
2247 /// not, so the wrapper line is not re-fired.
2248 pub(crate) fn install_hook(&mut self, hook: HookState) {
2249 self.hook = hook;
2250 if self.hook.line
2251 && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2252 {
2253 f.hook_oldpc = f.pc.saturating_sub(1);
2254 }
2255 }
2256
2257 /// Install a hook on `target` (`None`/current thread → the live VM fields;
2258 /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2259 /// with an optional thread argument.
2260 ///
2261 /// `target == None` means "no explicit thread argument" — PUC binds that
2262 /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2263 /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2264 /// of whether that's the main thread or a currently-resumed coroutine
2265 /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2266 /// `store_coro_ctx`). So a `None` target should always route to
2267 /// `install_hook` on the live fields. The pre-fix predicate gate
2268 /// `is_current_thread(target)` returned `false` when running inside a
2269 /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2270 /// and silently dropped the hook on the floor — the install happened on
2271 /// no thread at all.
2272 pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2273 if target.is_none() || self.is_current_thread(target) {
2274 self.install_hook(state);
2275 } else if let Some(co) = target {
2276 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2277 let m = unsafe { co.as_mut() };
2278 m.hook = state;
2279 if state.line
2280 && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2281 {
2282 f.hook_oldpc = u32::MAX;
2283 }
2284 // co.hook.func is a traced Value (Coro::trace covers it); demote
2285 // co back to gray so propagate sees the new hook function.
2286 self.heap
2287 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2288 }
2289 }
2290
2291 /// The hook state of `target` (`None`/current → the live VM state).
2292 pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2293 match target {
2294 t if self.is_current_thread(t) => self.hook,
2295 Some(co) => co.hook,
2296 None => self.hook,
2297 }
2298 }
2299
2300 /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2301 /// hooks disabled (PUC clears the mask) and its results/stack growth are
2302 /// discarded so the interrupted frame's register window is untouched.
2303 /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2304 fn run_hook(
2305 &mut self,
2306 event: &[u8],
2307 line: Option<i64>,
2308 from_native: bool,
2309 ) -> Result<(), LuaError> {
2310 // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2311 // synchronous fn pointer call). Both Rust and Lua hooks may be
2312 // installed; both observe each event.
2313 if let Some(rh) = self.hook.rust_func {
2314 let evt = match event {
2315 b"call" => Some(RustHookEvent::Call),
2316 b"return" => Some(RustHookEvent::Return),
2317 b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2318 b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2319 b"count" => Some(RustHookEvent::Count),
2320 _ => None,
2321 };
2322 if let Some(evt) = evt {
2323 let was_in_hook = self.in_hook;
2324 self.in_hook = true;
2325 rh(self, evt);
2326 self.in_hook = was_in_hook;
2327 }
2328 }
2329 let Some(hook) = self.hook.func else {
2330 return Ok(());
2331 };
2332 let saved_top = self.top;
2333 let saved_len = self.stack.len();
2334 let name = Value::Str(self.heap.intern(event));
2335 let lv = line.map_or(Value::Nil, Value::Int);
2336 self.in_hook = true;
2337 // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2338 // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2339 // ci sat below `hookf` (the function being hooked). When that hooked
2340 // function is native, no Lua frame for it exists in luna's `frames`;
2341 // model it as a synthetic C level by pushing the hook with
2342 // `from_c = true` (then `c_frame_name` reads the caller's call
2343 // instruction → e.g. `name = "sethook"`). When the hooked function is
2344 // Lua (its frame is still on the stack), push with `from_c = false`
2345 // so the level descent lands on it directly. The hook's own frame
2346 // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2347 // (PUC `CIST_HOOKED`).
2348 self.pending_is_hook = true;
2349 let r = self.call_value_impl(hook, &[name, lv], from_native);
2350 self.pending_is_hook = false;
2351 self.in_hook = false;
2352 self.stack.truncate(saved_len);
2353 self.top = saved_top;
2354 r.map(|_| ())
2355 }
2356
2357 /// Fire the "call" hook on entry to a function, if armed and not already in
2358 /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2359 /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2360 /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2361 /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2362 fn hook_call_with(
2363 &mut self,
2364 from_native: bool,
2365 nargs: u32,
2366 is_tail: bool,
2367 ) -> Result<(), LuaError> {
2368 if self.hook.call
2369 && !self.in_hook
2370 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2371 {
2372 self.hook_ftransfer = 1;
2373 self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2374 // PUC 5.1 didn't distinguish tail-call events — every call,
2375 // including tail-calls, fired plain `"call"`. 5.2 introduced
2376 // the separate `"tail call"` event (mask `"c"` covers both).
2377 // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2378 // "return","tail return","return","tail return"}`.
2379 let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2380 b"tail call"
2381 } else {
2382 b"call"
2383 };
2384 self.run_hook(event, None, from_native)?;
2385 }
2386 Ok(())
2387 }
2388
2389 pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2390 self.hook_call_with(from_native, nargs, false)
2391 }
2392
2393 /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2394 /// the first result slot relative to the activation's func slot, ntransfer
2395 /// the number of results.
2396 pub(crate) fn hook_return(
2397 &mut self,
2398 from_native: bool,
2399 ftransfer: u32,
2400 nresults: u32,
2401 ) -> Result<(), LuaError> {
2402 if self.hook.ret
2403 && !self.in_hook
2404 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2405 {
2406 self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2407 self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2408 self.run_hook(b"return", None, from_native)?;
2409 }
2410 Ok(())
2411 }
2412
2413 /// PUC "tail return" event — fires once per tail call that collapsed
2414 /// into the activation now returning, *after* its own "return" event.
2415 /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2416 fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2417 if self.hook.ret
2418 && !self.in_hook
2419 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2420 {
2421 self.run_hook(b"tail return", None, false)?;
2422 }
2423 Ok(())
2424 }
2425
2426 /// Call a metamethod with a single expected result.
2427 fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2428 let mut r = self.call_value(f, args)?;
2429 Ok(if r.is_empty() {
2430 Value::Nil
2431 } else {
2432 r.swap_remove(0)
2433 })
2434 }
2435
2436 /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2437 /// driven through the interpreter loop with a `Meta` continuation, so a
2438 /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2439 /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2440 /// Returns to the caller with the call set up — the opcode arm must do no
2441 /// further work on the running frame and let the loop iterate. `tm` is
2442 /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2443 /// born from this call inherits it via `pending_tm`, so
2444 /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2445 /// (db.lua :878).
2446 fn begin_meta_call(
2447 &mut self,
2448 func: Value,
2449 args: &[Value],
2450 action: MetaAction,
2451 tm: &'static str,
2452 ) -> Result<(), LuaError> {
2453 let saved_top = self.top;
2454 let cont_slot = self.stack.len() as u32;
2455 self.stack.push(func);
2456 self.stack.extend_from_slice(args);
2457 self.top = self.stack.len() as u32;
2458 frames_push_sync(
2459 &mut self.frames,
2460 &mut self.frames_top,
2461 CallFrame::Cont(NativeCont {
2462 kind: ContKind::Meta(MetaCont { action, saved_top }),
2463 func_slot: cont_slot,
2464 nresults: 1,
2465 }),
2466 );
2467 let saved_tm = self.pending_tm.replace(tm);
2468 // begin_call drives a Lua metamethod through the loop (returns true) or
2469 // runs a native one inline (returns false, leaving results at cont_slot
2470 // for the loop head to pick up); either way the Meta cont resolves there.
2471 let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2472 // Native callees never consumed pending_tm (push_frame is only hit on
2473 // a Lua callee); restore so it doesn't leak to a later push_frame.
2474 self.pending_tm = saved_tm;
2475 r?;
2476 Ok(())
2477 }
2478
2479 /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2480 fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2481 // v2.13 WUC read-time probe: a collectable key must be live at
2482 // the moment it is used. O(1) membership test against the
2483 // freed-pointer log — gc-verify diagnostic builds only; exact
2484 // under quarantining allocators (ASAN).
2485 #[cfg(feature = "gc-verify")]
2486 if matches!(key, Value::Str(_)) {
2487 let h = match key {
2488 Value::Str(s) => s.as_ptr() as usize,
2489 _ => unreachable!(),
2490 };
2491 if self.heap.recently_freed.contains(&h) {
2492 let (pc, reg_info) = match self.frames.last() {
2493 Some(CallFrame::Lua(f)) => {
2494 let pc = f.pc as usize;
2495 let inst = f.closure.proto.code.get(pc.wrapping_sub(1));
2496 (
2497 pc,
2498 inst.map(|i| {
2499 format!(
2500 "op[pc-1]={:?} a={} b={} c={} base={}",
2501 i.op(),
2502 i.a(),
2503 i.b(),
2504 i.c(),
2505 f.base
2506 )
2507 })
2508 .unwrap_or_default(),
2509 )
2510 }
2511 _ => (0, String::new()),
2512 };
2513 panic!(
2514 "[gc-verify] op_index READ of dead string key {h:#x} \
2515 (gc_top {}, top {}, pc {pc}, {reg_info})",
2516 self.gc_top, self.top,
2517 );
2518 }
2519 }
2520 match self.index_step(t, key)? {
2521 MmOut::Done(v) => self.stack[dst as usize] = v,
2522 MmOut::Mm { func, recv } => {
2523 self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2524 }
2525 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2526 }
2527 Ok(())
2528 }
2529
2530 /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2531 fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2532 match self.newindex_step(t, key, v)? {
2533 MmOut::Done(_) => {}
2534 MmOut::Mm { func, recv } => {
2535 self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2536 }
2537 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2538 }
2539 Ok(())
2540 }
2541
2542 /// Apply a comparison opcode's outcome: a known boolean drives the
2543 /// conditional skip directly; a metamethod is called yieldably, its
2544 /// truthiness driving the skip on return.
2545 fn op_compare(
2546 &mut self,
2547 step: MmOut,
2548 l: Value,
2549 r: Value,
2550 k: bool,
2551 tm: &'static str,
2552 ) -> Result<(), LuaError> {
2553 match step {
2554 MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2555 MmOut::Mm { func, .. } => {
2556 self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2557 }
2558 MmOut::CompareSynth { func } => {
2559 // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2560 // negation are driven through `MetaAction::Compare` so the
2561 // metamethod call can yield like any other compare.
2562 self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2563 }
2564 }
2565 Ok(())
2566 }
2567
2568 /// Complete a VM instruction whose metamethod just returned `result` (PUC
2569 /// `luaV_finishOp`). The running frame is already back on top.
2570 fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2571 match action {
2572 MetaAction::Store { dst } => self.stack[dst as usize] = result,
2573 MetaAction::Discard => {}
2574 MetaAction::Compare { k, negate } => {
2575 let t = if negate {
2576 !result.truthy()
2577 } else {
2578 result.truthy()
2579 };
2580 self.cond_skip(t, k);
2581 }
2582 MetaAction::Concat { dst, base_a } => {
2583 self.stack[dst as usize] = result;
2584 self.top = dst + 1;
2585 self.concat_run(base_a)?;
2586 }
2587 }
2588 Ok(())
2589 }
2590
2591 // ---- metatables ----
2592
2593 pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2594 match v {
2595 Value::Table(t) => t.metatable(),
2596 Value::Userdata(u) => u.metatable(),
2597 v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2598 }
2599 }
2600
2601 /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2602 /// non-table). No-op for tables (they carry their own).
2603 pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2604 if let Some(i) = type_mt_slot(v) {
2605 self.type_mt[i] = mt;
2606 }
2607 }
2608
2609 /// The metamethod of `v` for `mm`, or nil.
2610 pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2611 match self.metatable_of(v) {
2612 Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2613 None => Value::Nil,
2614 }
2615 }
2616
2617 /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2618 /// only fires when both operands carry a metatable that exposes the same
2619 /// implementation. Returns the metamethod to call, or `Nil` when no
2620 /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2621 /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2622 pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2623 let mt1 = self.metatable_of(l);
2624 let Some(mt1) = mt1 else { return Value::Nil };
2625 let key = Value::Str(self.mm_names[mm as usize]);
2626 let tm1 = mt1.get(key);
2627 if tm1.is_nil() {
2628 return Value::Nil;
2629 }
2630 let mt2 = self.metatable_of(r);
2631 let Some(mt2) = mt2 else { return Value::Nil };
2632 if mt1.as_ptr() == mt2.as_ptr() {
2633 return tm1;
2634 }
2635 let tm2 = mt2.get(key);
2636 if tm2.is_nil() {
2637 return Value::Nil;
2638 }
2639 if tm1.raw_eq(tm2) {
2640 return tm1;
2641 }
2642 Value::Nil
2643 }
2644
2645 /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2646 /// or full userdata whose metatable carries a string `__name` reports that
2647 /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2648 pub(crate) fn obj_typename(&self, v: Value) -> String {
2649 if matches!(v, Value::Table(_) | Value::Userdata(_))
2650 && let Value::Str(s) = self.get_mm(v, Mm::Name)
2651 {
2652 return String::from_utf8_lossy(s.as_bytes()).into_owned();
2653 }
2654 v.type_name().to_string()
2655 }
2656
2657 fn call_at(
2658 &mut self,
2659 func_slot: u32,
2660 nargs: u32,
2661 from_c: bool,
2662 ) -> Result<Vec<Value>, LuaError> {
2663 if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2664 self.exec()
2665 } else {
2666 // native completed inline; results at func_slot..top
2667 Ok(self.take_results(func_slot))
2668 }
2669 }
2670
2671 /// Switch the `collectgarbage` mode, returning the previous mode name.
2672 pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2673 std::mem::replace(&mut self.gc_mode, new)
2674 }
2675
2676 /// Whether the current `collectgarbage` mode is "generational" (where a
2677 /// "step" is a minor collection — a full atomic pass — rather than a paced
2678 /// incremental sweep).
2679 pub(crate) fn gc_mode_is_generational(&self) -> bool {
2680 self.gc_mode == "generational"
2681 }
2682
2683 /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2684 /// completes a whole cycle at once).
2685 pub(crate) fn gc_stepsize(&self) -> i64 {
2686 self.gc_stepsize
2687 }
2688
2689 /// `collectgarbage("param", name [,value])`: read (or set, returning the
2690 /// previous value of) a pacing parameter. Returns `None` for an unknown
2691 /// name so the caller can raise PUC's `invalid parameter` error. The
2692 /// collector is stop-the-world, so these only round-trip for API fidelity.
2693 pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2694 let slot = match name {
2695 b"pause" => &mut self.gc_pause,
2696 b"stepmul" => &mut self.gc_stepmul,
2697 b"stepsize" => &mut self.gc_stepsize,
2698 _ => return None,
2699 };
2700 let prev = *slot;
2701 if let Some(v) = set {
2702 *slot = v;
2703 }
2704 Some(prev)
2705 }
2706
2707 /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2708 /// paced sweep via `Vm::gc_step`.
2709 ///
2710 /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2711 /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2712 /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2713 /// suspected UAF is structurally closed — born objects no longer become
2714 /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2715 ///
2716 /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2717 /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2718 /// O(SWEEP_DIVISOR) safe-points regardless of size.
2719 #[inline(always)]
2720 pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2721 if self.gc_finalizing {
2722 return;
2723 }
2724 if !self.heap.gc_due() {
2725 return;
2726 }
2727 // v2.5 P1B-2E: tighten to bare `live_top`. The v2.2.0
2728 // `live_top.max(self.top)` workaround is now obsoleted by
2729 // v2.3's `finish_results` slot-clear + v2.5 P1B-2A
2730 // (Op::TailCall collapse slot-clear) + v2.5 P1B-2B
2731 // (pcall unwind slot-clear). PUC L->top discipline is now
2732 // mirrored at every frame-pop site.
2733 self.gc_top = live_top;
2734 // PUC stepmul: % of allocation rate. Higher = more GC work per
2735 // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2736 // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2737 const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2738 const MIN_BUDGET: usize = 64_000;
2739 let stepmul = self.gc_stepmul.max(1) as usize;
2740 let divisor = (SWEEP_BASE / stepmul).max(1);
2741 let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2742 if self.gc_step(budget) {
2743 self.heap.rearm_gc_pause(self.gc_pause);
2744 }
2745 }
2746
2747 /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2748 /// roots (open upvalues, which are not first-class Values). Shared by the
2749 /// full collector and the incremental-sweep driver so both snapshot the
2750 /// exact same live set.
2751 fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2752 let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2753 roots.push(Value::Table(self.globals));
2754 for mt in self.type_mt.into_iter().flatten() {
2755 roots.push(Value::Table(mt));
2756 }
2757 for &n in &self.mm_names {
2758 roots.push(Value::Str(n));
2759 }
2760 // Root the running thread's live registers (PUC marks [stack, top)).
2761 // `gc_top` is the instruction-level cursor of the last GC
2762 // safe-point: allocation safe-points set it via
2763 // `maybe_collect_garbage(live_top)`, and `begin_call` raises it
2764 // to the callee's argument top when entering a native — PUC's
2765 // `L->top = func + 1 + nargs` C-call discipline. Without that
2766 // raise, an explicit `collectgarbage()` collected with a STALE
2767 // cursor from some earlier (lower) safe-point and freed its own
2768 // caller's register-held strings — UAF-C
2769 // (STATUS_ACCESS_VIOLATION on Windows / ASAN heap-use-after-free
2770 // on Linux; the v2.13 WUC gc-verify frame audit pinpointed the
2771 // under-rooted slots). Values stranded above the cursor stay
2772 // excluded so weak-table entries are not spuriously pinned
2773 // (gc.lua:544 suspended-coroutine collection).
2774 let live = (self.gc_top as usize).min(self.stack.len());
2775 roots.extend_from_slice(&self.stack[..live]);
2776 for cf in &self.frames {
2777 match cf {
2778 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2779 CallFrame::Cont(NativeCont {
2780 kind: ContKind::Xpcall { handler },
2781 ..
2782 }) => roots.push(*handler),
2783 CallFrame::Cont(NativeCont {
2784 kind: ContKind::Close(cc),
2785 ..
2786 }) => {
2787 // Root the error threaded through this close chain so a
2788 // `collectgarbage()` inside a sibling `__close` handler
2789 // does not free it before the next handler is invoked
2790 // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2791 if let Some(e) = cc.pending {
2792 roots.push(e);
2793 }
2794 if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2795 roots.push(err);
2796 }
2797 }
2798 CallFrame::Cont(_) => {}
2799 }
2800 }
2801 if let Some(e) = self.closing_err {
2802 roots.push(e);
2803 }
2804 // B12 host roots — Lua-facade handles keep their referenced
2805 // values alive across calls/yields. Trace the whole vector;
2806 // unused slots (post-`unpin_all`) carry Value::Nil which the
2807 // GC ignores.
2808 for slot in &self.host_roots {
2809 // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2810 roots.push(slot.value);
2811 }
2812 // v2.1 — `table.sort` and similar builtins stash their working
2813 // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2814 // comparator callback doesn't free strings/tables snapshotted
2815 // off the live table (sort.lua's `load(..)(); collectgarbage()`
2816 // compare regression).
2817 for buf in &self.sort_scratch {
2818 roots.extend_from_slice(buf);
2819 }
2820 // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2821 // mid-execution. Without rooting them here, a `collectgarbage()`
2822 // invoked inside the running native (sort.lua AA `load(..)();
2823 // collectgarbage()` compare callback regression) sweeps the
2824 // closure that's actively executing, leaving `nc.upvals`
2825 // dangling and the Rust local `nc` pointing at recycled memory
2826 // — the SIGSEGV pops on the very next field access or pop.
2827 for &nc in &self.running_natives {
2828 roots.push(Value::Native(nc));
2829 }
2830 // the running thread's debug hook (suspended threads root theirs via
2831 // Coro::trace / the main_ctx sweep below)
2832 if let Some(h) = self.hook.func {
2833 roots.push(h);
2834 }
2835 // the running coroutine (its saved-context fields live in the VM, but
2836 // the object itself + its resumer chain must stay reachable)
2837 if let Some(co) = self.current {
2838 roots.push(Value::Coro(co));
2839 }
2840 if let Some(mc) = self.main_coro {
2841 roots.push(Value::Coro(mc));
2842 }
2843 // debug.getregistry() and io library state
2844 if let Some(r) = self.registry {
2845 roots.push(Value::Table(r));
2846 }
2847 if let Some(mt) = self.file_mt {
2848 roots.push(Value::Table(mt));
2849 }
2850 if let Some(f) = self.io_input {
2851 roots.push(Value::Userdata(f));
2852 }
2853 if let Some(f) = self.io_output {
2854 roots.push(Value::Userdata(f));
2855 }
2856 // the main thread's saved context while a coroutine runs
2857 if let Some(m) = &self.main_ctx {
2858 roots.extend_from_slice(&m.stack);
2859 if let Some(h) = m.hook.func {
2860 roots.push(h);
2861 }
2862 for cf in &m.frames {
2863 match cf {
2864 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2865 CallFrame::Cont(NativeCont {
2866 kind: ContKind::Xpcall { handler },
2867 ..
2868 }) => roots.push(*handler),
2869 CallFrame::Cont(_) => {}
2870 }
2871 }
2872 }
2873 let mut extra: Vec<*mut GcHeader> = self
2874 .open_upvals
2875 .iter()
2876 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2877 .collect();
2878 if let Some(m) = &self.main_ctx {
2879 extra.extend(
2880 m.open_upvals
2881 .iter()
2882 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2883 );
2884 }
2885 (roots, extra)
2886 }
2887
2888 /// Run a full collection with the VM's roots, then run any `__gc`
2889 /// finalizers the collection scheduled. A no-op (returns 0) when already
2890 /// inside a finalizer — the collector is not reentrant (PUC).
2891 pub fn collect_garbage(&mut self) -> usize {
2892 if self.gc_finalizing {
2893 return 0;
2894 }
2895 let (roots, extra) = self.gc_roots();
2896 let freed = self.heap.collect_ex(&roots, &extra);
2897 #[cfg(feature = "gc-verify")]
2898 self.verify_frame_regs_live("collect_garbage");
2899 self.run_finalizers();
2900 freed
2901 }
2902
2903 /// v2.13 WUC `gc-verify` — after a collect, every register slot the
2904 /// collector just rooted (`[0, max(gc_top, top))` — the same bound
2905 /// `gc_roots` uses) must hold a live value. A dead value inside the
2906 /// rooted range means the root snapshot and the sweep disagreed —
2907 /// the bug class behind UAF-C. (Slots ABOVE the bound may hold
2908 /// stale dead values legitimately; the interpreter's contract is
2909 /// that it writes them before reading.)
2910 #[cfg(feature = "gc-verify")]
2911 pub(crate) fn verify_frame_regs_live(&self, ctx: &str) {
2912 let live = self.heap.debug_live_set();
2913 let header = |v: Value| -> Option<usize> {
2914 match v {
2915 Value::Str(s) => Some(s.as_ptr() as usize),
2916 Value::Table(t) => Some(t.as_ptr() as usize),
2917 Value::Closure(c) => Some(c.as_ptr() as usize),
2918 Value::Native(n) => Some(n.as_ptr() as usize),
2919 Value::Coro(c) => Some(c.as_ptr() as usize),
2920 Value::Userdata(u) => Some(u.as_ptr() as usize),
2921 _ => None,
2922 }
2923 };
2924 let bound = (self.gc_top as usize).min(self.stack.len());
2925 for i in 0..bound {
2926 if let Some(h) = header(self.stack[i]) {
2927 if !live.contains(&h) {
2928 panic!(
2929 "[gc-verify] {ctx}: rooted stack slot {i} (gc_top {}, top {}) \
2930 holds a dead value {h:#x} after collect",
2931 self.gc_top, self.top,
2932 );
2933 }
2934 }
2935 }
2936 // Diagnostic tier: a dead value ABOVE the cursor is only a bug if
2937 // that register is a named local still in scope (the interpreter
2938 // WILL read it). Cross-check against the proto's LocVar table.
2939 for (fi, cf) in self.frames.iter().enumerate() {
2940 if let CallFrame::Lua(f) = cf {
2941 let base = f.base as usize;
2942 let maxs = f.closure.proto.max_stack as usize;
2943 let hi = (base + maxs).min(self.stack.len());
2944 let pc = f.pc;
2945 for i in bound.max(base)..hi {
2946 if let Some(h) = header(self.stack[i]) {
2947 if !live.contains(&h) {
2948 let reg = (i - base) as u32;
2949 if let Some(lv) = f
2950 .closure
2951 .proto
2952 .locvars
2953 .iter()
2954 .find(|lv| lv.reg == reg && lv.start_pc <= pc && pc < lv.end_pc)
2955 {
2956 panic!(
2957 "[gc-verify] {ctx}: frame {fi} IN-SCOPE LOCAL '{}' \
2958 (reg {reg}, abs {i}, pc {pc}, gc_top {}) holds a \
2959 dead value {h:#x} — live_top cursor excluded a \
2960 live named local",
2961 lv.name, self.gc_top,
2962 );
2963 }
2964 }
2965 }
2966 }
2967 }
2968 }
2969 }
2970
2971 /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2972 /// threw; gc.lua's "errors during collection" probe relies on it. This
2973 /// variant runs the same cycle but propagates the captured finalizer
2974 /// error to the explicit caller.
2975 pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2976 if self.gc_finalizing {
2977 return Ok(0);
2978 }
2979 let (roots, extra) = self.gc_roots();
2980 let freed = self.heap.collect_ex(&roots, &extra);
2981 #[cfg(feature = "gc-verify")]
2982 self.verify_frame_regs_live("collect_garbage_propagating");
2983 self.run_finalizers_or_err()?;
2984 Ok(freed)
2985 }
2986
2987 /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2988 /// should report fail rather than collect).
2989 pub(crate) fn gc_is_finalizing(&self) -> bool {
2990 self.gc_finalizing
2991 }
2992
2993 /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2994 /// = true indicates more pieces follow (concatenated until the first
2995 /// `to_cont = false` call flushes the whole line). Mirrors
2996 /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2997 /// * If the buffer is fresh, `to_cont` is false, and the message is
2998 /// `@<word>`, treat as a control message — only `@on` / `@off` are
2999 /// recognised; any other `@…` is silently ignored.
3000 /// * Otherwise, while the state is `Off`, drop the piece; while `On`,
3001 /// accumulate, and flush to stderr + `warn_log` on the
3002 /// non-continuation call.
3003 pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
3004 if self.warn_buf.is_empty()
3005 && !to_cont
3006 && let Some(b'@') = msg.first().copied()
3007 {
3008 match &msg[1..] {
3009 b"on" => self.warn_state = WarnState::On,
3010 b"off" => self.warn_state = WarnState::Off,
3011 _ => {} // unknown control — silently ignored (PUC checkcontrol)
3012 }
3013 return;
3014 }
3015 if self.warn_state == WarnState::Off {
3016 // drop continuation pieces too — PUC `warnfoff` is the trampoline
3017 return;
3018 }
3019 self.warn_buf.extend_from_slice(msg);
3020 if !to_cont {
3021 let line = std::mem::take(&mut self.warn_buf);
3022 eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
3023 self.warn_log.push(line);
3024 }
3025 }
3026
3027 /// Drain the in-process warning log (one entry per emitted message, sans
3028 /// `"Lua warning: "` prefix and newline). For test harnesses that want to
3029 /// assert on warn output without scraping stderr.
3030 pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
3031 std::mem::take(&mut self.warn_log)
3032 }
3033
3034 /// Arm the cooperative instruction budget (P09 embedding). The run loop
3035 /// decrements this once per dispatch turn; on zero it raises a catchable
3036 /// `"instruction budget exceeded"` error and disarms itself so the host
3037 /// can resume with a fresh budget on the next call. `None` removes the
3038 /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
3039 /// short-script semantics.
3040 pub fn set_instr_budget(&mut self, budget: Option<i64>) {
3041 self.instr_budget = budget;
3042 }
3043
3044 /// Remaining instruction budget (None when unbounded).
3045 pub fn instr_budget_remaining(&self) -> Option<i64> {
3046 self.instr_budget
3047 }
3048
3049 /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
3050 /// **must** disable JIT when relying on `instr_budget` — see the
3051 /// `jit_enabled` field doc for the rationale.
3052 pub fn set_jit_enabled(&mut self, enabled: bool) {
3053 self.jit.enabled = enabled;
3054 }
3055
3056 /// Current JIT enable state.
3057 pub fn jit_enabled(&self) -> bool {
3058 self.jit.enabled
3059 }
3060
3061 /// Toggle the trace JIT (P12). Off by default while the sprint
3062 /// develops. When enabled, hot back-edges are counted on
3063 /// `Proto.trace_hot_count`; once the counter passes
3064 /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
3065 /// mode at the back-edge target. Stays a no-op until S2's
3066 /// trace lowerer and S3's dispatcher land.
3067 pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
3068 self.jit.trace_enabled = enabled;
3069 }
3070
3071 /// P16-A — opt-in flag for the self-link cycle catch. See field
3072 /// docs for the correctness blocker. Default `false`.
3073 pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
3074 self.jit.p16_self_link_enabled = enabled;
3075 }
3076
3077 /// Current state of the P16-A self-link cycle catch.
3078 pub fn p16_self_link_enabled(&self) -> bool {
3079 self.jit.p16_self_link_enabled
3080 }
3081
3082 /// Current trace-JIT enable state.
3083 pub fn trace_jit_enabled(&self) -> bool {
3084 self.jit.trace_enabled
3085 }
3086
3087 /// Number of traces that have closed cleanly (looped back to the
3088 /// head PC) since this Vm was constructed. Cumulative; used by
3089 /// tests + tuning. Will become the dominant signal once S2's
3090 /// compile + cache lands.
3091 pub fn trace_closed_count(&self) -> u64 {
3092 self.jit.counters.closed
3093 }
3094
3095 /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
3096 /// hit an un-recordable op — the latter lands at S2).
3097 pub fn trace_aborted_count(&self) -> u64 {
3098 self.jit.counters.aborted
3099 }
3100
3101 /// P13-S13-G v2 — number of compiled traces whose close shape
3102 /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
3103 /// pin `dispatchable=false` because the dispatcher can't
3104 /// resume at a depth>0 PC without the matching CallFrames.
3105 /// S4-step4b's frame-mat helper could synthesise those, but
3106 /// the InlineAbort emit path isn't wired up yet — fresh
3107 /// pickup work for S13-G v2-full.
3108 pub fn trace_inline_abort_count(&self) -> u64 {
3109 self.jit.counters.inline_abort
3110 }
3111
3112 /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
3113 pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
3114 &self.jit.counters.dispatch_off_reasons
3115 }
3116
3117 /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
3118 pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
3119 &self.jit.counters.compile_failed_reasons
3120 }
3121
3122 /// P13-S13-H — see `JitCounters::closed_lens`. Returns
3123 /// `(is_call_triggered, ops_len)` for every trace that closed.
3124 pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
3125 &self.jit.counters.closed_lens
3126 }
3127
3128 /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
3129 /// Per-reason close-cause counts (recorder-side abort/discard +
3130 /// lowerer-side dispatch_off labels) keyed by `&'static str`.
3131 pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
3132 &self.jit.counters.close_cause_counts
3133 }
3134
3135 /// v2.0 Track-R R3b — number of compiled traces whose
3136 /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
3137 /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
3138 /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
3139 /// hot loop with p16-on. R3b keeps `dispatchable = false` even
3140 /// when this count bumps; R3d will lift it.
3141 pub fn trace_downrec_link_compiled_count(&self) -> u64 {
3142 self.jit.counters.downrec_link_compiled
3143 }
3144
3145 /// v2.0 Track-R R3c — see
3146 /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
3147 /// of times the dispatcher's `is_downrec_sentinel` arm fired and
3148 /// classified the return as a caller-pc-guard HIT.
3149 pub fn trace_downrec_dispatched_count(&self) -> u64 {
3150 self.jit.counters.downrec_dispatched
3151 }
3152
3153 /// v2.0 Track-R R3c — see
3154 /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
3155 /// times the dispatcher entered a `downrec_link`-bearing trace and
3156 /// the trace returned via the lowerer's deopt block (caller-pc
3157 /// guard MISS), or the dispatcher itself force-deopted via the
3158 /// stitch-cycle checkpoint.
3159 pub fn trace_downrec_deopt_count(&self) -> u64 {
3160 self.jit.counters.downrec_deopt
3161 }
3162
3163 /// v2.0 Track-R R3d — see
3164 /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3165 /// Number of compiled traces whose lowerer emitted a multi-way
3166 /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3167 /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3168 pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3169 self.jit.counters.multi_way_guard_emitted
3170 }
3171
3172 /// P12-S2.C — number of closed traces the lowerer compiled and
3173 /// parked on `Proto.traces`. Re-records of the same head_pc are
3174 /// deduped (the second close finds the head_pc already cached
3175 /// and skips compile), so this never exceeds `trace_closed_count`.
3176 pub fn trace_compiled_count(&self) -> u64 {
3177 self.jit.counters.compiled
3178 }
3179
3180 /// v2.1 Phase 1I.B — number of times the recorder captured a
3181 /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3182 /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3183 /// by the Phase 1I.B opt-in fire test to verify the env gate
3184 /// wiring round-trips end-to-end (env -> recorder -> snapshot
3185 /// -> counter -> getter -> assertion).
3186 pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3187 self.jit.counters.field_ic_snapshot_captured
3188 }
3189
3190 /// P12-S2.C — number of closed traces the lowerer rejected
3191 /// (any of the bail conditions in
3192 /// `crate::jit::trace::try_compile_trace`).
3193 pub fn trace_compile_failed_count(&self) -> u64 {
3194 self.jit.counters.compile_failed
3195 }
3196
3197 /// P12-S3 — number of times the dispatcher jumped into a
3198 /// compiled trace. Bumps on every entry; `trace_deopt_count`
3199 /// counts the subset where the trace returned with a parked
3200 /// `jit_pending_err`.
3201 pub fn trace_dispatched_count(&self) -> u64 {
3202 self.jit.counters.dispatched
3203 }
3204
3205 /// P12-S3 — number of trace entries that came back with
3206 /// `jit_pending_err` set (typically a metatable shadowed an
3207 /// index inside a helper, forcing the dispatcher to fall back
3208 /// to the interpreter without committing the trace's result).
3209 pub fn trace_deopt_count(&self) -> u64 {
3210 self.jit.counters.deopt
3211 }
3212
3213 /// P15-A v1 — number of times the dispatcher started a side
3214 /// trace recording (an `exit_hit_counts` slot crossed
3215 /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3216 /// was None and trace JIT was enabled). Each unit is exactly one
3217 /// `start_side_trace` call; the actual compile success counts
3218 /// under [`Self::trace_compiled_count`] like any other trace.
3219 /// Probe use: distinguishes the "side-trace pipeline fired"
3220 /// signal from the "primary back-edge / call-trigger fired"
3221 /// signal so v0-v3 architectural progress is visible without
3222 /// reading per-counter histograms.
3223 pub fn trace_side_trace_started_count(&self) -> u64 {
3224 self.jit.counters.side_trace_started
3225 }
3226
3227 /// P15-A v2-A — number of side-trace recordings that closed,
3228 /// compiled successfully, AND patched their parent's
3229 /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3230 /// dispatch through these ptrs yet (v2-B/C job), but the
3231 /// counter + ptr write proves the compile + link pipeline is
3232 /// complete end-to-end.
3233 pub fn trace_side_trace_compiled_count(&self) -> u64 {
3234 self.jit.counters.side_trace_compiled
3235 }
3236
3237 /// P15-A v2-C-A5-C — number of side traces that compiled
3238 /// successfully but were SHEDDED by the close-handler shape-
3239 /// match gate (`exit_tags_match_entry_tags`). High ratios
3240 /// vs. `trace_side_trace_compiled_count` indicate the
3241 /// architecture is shedding lots of would-be side traces;
3242 /// useful as a tuning probe for future relaxation of the
3243 /// gate or for child-IR re-specialisation against parent's
3244 /// exit shape.
3245 pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3246 self.jit.counters.side_trace_shape_mismatch
3247 }
3248
3249 /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3250 /// classified as `crate::jit::trace::EscapeState::Sinkable`
3251 /// across every successfully compiled trace on this Vm. The
3252 /// count is post-demotion: sites pre-emit drops back to Escaped
3253 /// for not meeting v1 sunk-emit criteria are NOT counted.
3254 /// `trace_sunk_alloc_count` matches one-for-one today (every
3255 /// surviving Sinkable site goes through sunk emit).
3256 pub fn trace_sinkable_seen_count(&self) -> u64 {
3257 self.jit.counters.sinkable_seen
3258 }
3259
3260 /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3261 pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3262 self.jit.counters.accum_bufferable_seen
3263 }
3264
3265 /// P15-prep — total dispatch hits across all known traces,
3266 /// broken into hot-exit telemetry (max single-exit count,
3267 /// total dispatches, exit count). Used by probes to identify
3268 /// hot side-exits as side-trace candidates.
3269 ///
3270 /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3271 /// recursively, so inner functions' traces are reported.
3272 pub fn trace_exit_hit_summary(
3273 &self,
3274 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3275 ) -> Vec<(u32, Vec<u32>)> {
3276 fn walk(
3277 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3278 out: &mut Vec<(u32, Vec<u32>)>,
3279 ) {
3280 for ct in proto.traces.borrow().iter() {
3281 let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3282 out.push((ct.head_pc, counts));
3283 }
3284 for inner in proto.protos.iter() {
3285 walk(*inner, out);
3286 }
3287 }
3288 let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3289 walk(cl.proto, &mut out);
3290 out
3291 }
3292
3293 /// P15-A v0 — surface every side-exit slot whose hit count is
3294 /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3295 /// `cl.proto` (recursively walking `proto.protos`). Returned
3296 /// entries are side-trace candidates: each carries the parent
3297 /// trace's `(head_proto, head_pc)`, the exit's index in the
3298 /// parent's `exit_hit_counts`, and the side trace's natural
3299 /// entry shape (`cont_pc` + `exit_tags`).
3300 ///
3301 /// Layout of `exit_hit_counts` (mirrored by the iter):
3302 /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3303 /// window-sized exit_tags).
3304 /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3305 /// → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3306 /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3307 /// exit_tags = `ct.exit_tags`).
3308 pub fn hot_exit_iter(
3309 &self,
3310 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3311 ) -> Vec<crate::jit::trace::HotExitInfo> {
3312 use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3313 fn walk(
3314 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3315 out: &mut Vec<HotExitInfo>,
3316 ) {
3317 for ct in proto.traces.borrow().iter() {
3318 let inline_n = ct.per_exit_inline.len();
3319 let tags_n = ct.per_exit_tags.len();
3320 debug_assert_eq!(
3321 ct.exit_hit_counts.len(),
3322 inline_n + tags_n + 1,
3323 "exit_hit_counts layout invariant violated"
3324 );
3325 for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3326 let hits = cell.get();
3327 if hits < HOTEXIT_THRESHOLD {
3328 continue;
3329 }
3330 let (cont_pc, exit_tags) = if idx < inline_n {
3331 let ent = &ct.per_exit_inline[idx];
3332 (ent.cont_pc, ent.exit_tags.clone())
3333 } else if idx < inline_n + tags_n {
3334 let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3335 (*pc, tags.clone())
3336 } else {
3337 (ct.head_pc, ct.exit_tags.clone())
3338 };
3339 out.push(HotExitInfo {
3340 head_proto: proto,
3341 head_pc: ct.head_pc,
3342 exit_idx: idx,
3343 hits,
3344 cont_pc,
3345 exit_tags,
3346 });
3347 }
3348 }
3349 for inner in proto.protos.iter() {
3350 walk(*inner, out);
3351 }
3352 }
3353 let mut out: Vec<HotExitInfo> = Vec::new();
3354 walk(cl.proto, &mut out);
3355 out
3356 }
3357
3358 /// P12-S5-B — sum of NewTable sites that actually took the
3359 /// sunk-emit path across every successfully compiled trace on
3360 /// this Vm. Each counted site skips its heap `Gc<Table>`
3361 /// allocation per dispatch; the array part lives as Cranelift
3362 /// `Variable`s for the duration of the trace.
3363 pub fn trace_sunk_alloc_count(&self) -> u64 {
3364 self.jit.counters.sunk_alloc
3365 }
3366
3367 /// P12-S5-C — sum of materialise-helper emit sites across every
3368 /// successfully compiled trace on this Vm. Each unit is a
3369 /// (site × cmp side-exit) pair whose IR reconstructs a heap
3370 /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3371 /// emit is wiring materialise into the right side-exits.
3372 pub fn trace_materialize_emit_count(&self) -> u64 {
3373 self.jit.counters.materialize_emit
3374 }
3375
3376 /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3377 /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3378 /// replaces a `Heap::new_closure_inline` call on the dispatch
3379 /// path; the count is static (one per matching op per compiled
3380 /// trace), summed at compile success.
3381 pub fn trace_closure_emit_count(&self) -> u64 {
3382 self.jit.counters.closure_emit
3383 }
3384
3385 /// v2.0 Stage 7 polish 6 fire experiment — see
3386 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3387 /// Number of compiled traces whose `per_exit_inline.len() > 0`
3388 /// (depth>0 inlined cmp side-exits emitted).
3389 pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3390 self.jit.counters.per_exit_inline_compiled
3391 }
3392
3393 /// v2.0 Stage 7 polish 6 fire experiment — see
3394 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3395 /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3396 /// `dispatchable == true` — i.e. the count of compiled traces
3397 /// that would actually exercise the AOT polish 6 chain-reloc +
3398 /// deploy-resolver path.
3399 pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3400 self.jit.counters.per_exit_inline_dispatchable
3401 }
3402
3403 /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3404 /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3405 /// whether a self-recursive function actually walked the depth
3406 /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3407 /// across traces and Vm activations; reset only on `Vm::new`.
3408 pub fn trace_max_depth_seen(&self) -> u8 {
3409 self.jit.max_depth_seen
3410 }
3411
3412 /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3413 /// dispatch time). The frame-materialization helper reads `.base`
3414 /// to compute offsets for each inlined frame's window.
3415 #[doc(hidden)]
3416 pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3417 match self.frames.last() {
3418 Some(CallFrame::Lua(f)) => Some(*f),
3419 _ => None,
3420 }
3421 }
3422
3423 /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3424 /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3425 /// by `luna-tools` (`luna-profile`'s sampler walks this from
3426 /// inside a `Count` hook). Sibling-module scope: not part of the
3427 /// public embedder surface, but `inspect::frames_for_profile` is.
3428 #[doc(hidden)]
3429 pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3430 &self.frames
3431 }
3432
3433 /// P12-S4-step4b — ensure the value stack covers indices
3434 /// `[0..need)`. Extends with Nil if shorter. Called by the
3435 /// frame-materialization helper before pushing an inlined frame
3436 /// whose register window may exceed the current stack length.
3437 #[doc(hidden)]
3438 pub fn jit_ensure_stack(&mut self, need: usize) {
3439 if self.stack.len() < need {
3440 self.stack.resize(need, Value::Nil);
3441 }
3442 }
3443
3444 /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3445 /// `__close` handlers would run (any active tbc slot ≥ from
3446 /// holding a non-nil/false Value); if so, parks a deopt sentinel
3447 /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3448 /// IR branches to the deopt block. Otherwise performs the safe
3449 /// part of close — `close_from(from)` to close open upvals +
3450 /// drop any drained tbc entries ≥ from — and returns 0.
3451 ///
3452 /// Returns are i64-shaped so the cranelift import sig stays
3453 /// trivial (i64 → i64 mapping).
3454 #[doc(hidden)]
3455 pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3456 if self.jit.pending_err.is_some() {
3457 return 1;
3458 }
3459 let Some(f) = self.jit_last_lua_frame() else {
3460 self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3461 return 1;
3462 };
3463 let from = f.base + start_offset;
3464 let has_handler = self.tbc.iter().any(|&s| {
3465 s >= from && {
3466 let v = self.stack[s as usize];
3467 !matches!(v, Value::Nil | Value::Bool(false))
3468 }
3469 });
3470 if has_handler {
3471 self.jit.pending_err =
3472 Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3473 return 1;
3474 }
3475 self.close_from(from);
3476 // Drain any tbc entries ≥ from (they're nil/false stubs the
3477 // interpreter's drive_close would have skipped silently).
3478 while let Some(&s) = self.tbc.last() {
3479 if s < from {
3480 break;
3481 }
3482 self.tbc.pop();
3483 }
3484 0
3485 }
3486
3487 /// P12-S7-B — spill the trace's current value for a register to
3488 /// the underlying `vm.stack[base + slot_offset]`. Required before
3489 /// an `Op::Closure` whose inner proto has an `in_stack: true`
3490 /// upval at `slot_offset` — the helper's `find_or_create_upval`
3491 /// captures a live pointer to `vm.stack[base + slot_offset]`,
3492 /// which must hold the right value at call time (trace IR's
3493 /// Variable hasn't yet been written back).
3494 ///
3495 /// Parameters arrive as i64 from the IR: `slot_offset` is the
3496 /// caller-frame register index (`u32` in practice, depth=0
3497 /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3498 /// `crate::runtime::value::raw` byte for the slot's RegKind;
3499 /// `raw_bits` is the trace Variable's `use_var` payload
3500 /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3501 /// raw `Gc::as_ptr` cast).
3502 #[doc(hidden)]
3503 pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3504 let Some(f) = self.jit_last_lua_frame() else {
3505 self.jit.pending_err =
3506 Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3507 return;
3508 };
3509 let idx = (f.base as usize) + (slot_offset as usize);
3510 if self.stack.len() <= idx {
3511 self.stack.resize(idx + 1, Value::Nil);
3512 }
3513 // SAFETY: caller (trace JIT IR emit) provides matching
3514 // `(tag, raw_bits)` — same shape produced by Value::unpack.
3515 let v = unsafe {
3516 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3517 };
3518 self.stack[idx] = v;
3519 }
3520
3521 /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3522 /// Mirrors the interp arm (this file ~L5316): copies the
3523 /// generator/state/control triple from `R[A..=A+2]` to
3524 /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3525 /// the iterator function via `begin_call`. v2 only handles
3526 /// `Value::Native` iterators (the canonical `ipairs_iter` /
3527 /// `next` builtins) — a Lua-closure iterator would push a Lua
3528 /// frame mid-trace, breaking `recording_frame_base`, so we
3529 /// deopt by parking a `pending_err` and returning `-1`.
3530 ///
3531 /// `slot_offset` is the caller-frame register index (=
3532 /// `inst.a()` decoded from a u32-wide field). `nvars` is
3533 /// `inst.c() as i32` — the caller's expected return count.
3534 /// P12-S12-C v1 — refresh only the raw payload of
3535 /// `vm.stack[base + slot_offset]`, preserving its existing
3536 /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3537 /// uses this when the slot's `RegKind` is `Unset` (no compile-
3538 /// time tag info; commonly `Str` slots which the trace doesn't
3539 /// model). The interp's previous execution of the same op
3540 /// already populated the slot with the right tag — the trace
3541 /// only needs to swap in its current raw value.
3542 #[doc(hidden)]
3543 pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3544 let Some(f) = self.jit_last_lua_frame() else {
3545 return;
3546 };
3547 let idx = (f.base as usize) + (slot_offset as usize);
3548 if idx >= self.stack.len() {
3549 return;
3550 }
3551 let (tag, _) = self.stack[idx].unpack();
3552 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3553 self.stack[idx] = unsafe {
3554 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3555 };
3556 }
3557
3558 /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3559 ///
3560 /// Mirrors the interp arm (this file ~L5112): `self.top =
3561 /// base + a + n; concat_run(base + a)`. Result lands at
3562 /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3563 /// deopt (any error from `concat_run` OR detection that the
3564 /// metamethod path was taken — `concat_run` returns `Ok(())`
3565 /// after `begin_meta_call` which has pushed a Lua frame the
3566 /// trace can't safely continue past).
3567 ///
3568 /// The frame-push detection uses `pre/post frames.len()` and
3569 /// unwinds any pushed frames before deopting, so the
3570 /// dispatcher's existing deopt path sees a clean stack.
3571 #[doc(hidden)]
3572 pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3573 if self.jit.pending_err.is_some() {
3574 return -1;
3575 }
3576 let Some(f) = self.jit_last_lua_frame() else {
3577 self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3578 return -1;
3579 };
3580 let abs_a = f.base + slot_offset;
3581 self.top = abs_a + n as u32;
3582 let pre_frames = self.frames.len();
3583 let result = self.concat_run(abs_a);
3584 let post_frames = self.frames.len();
3585 // Frame-push = metamethod path taken (begin_meta_call pushed
3586 // a Lua frame). The trace can't continue past it; unwind +
3587 // deopt so interp redoes Op::Concat in the slow path.
3588 while self.frames.len() > pre_frames {
3589 frames_pop_sync(&mut self.frames, &mut self.frames_top);
3590 }
3591 if let Err(e) = result {
3592 self.jit.pending_err = Some(e);
3593 return -1;
3594 }
3595 if post_frames > pre_frames {
3596 self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3597 return -1;
3598 }
3599 0
3600 }
3601
3602 /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3603 /// accumulator buffer pool, returning a raw pointer. The trace
3604 /// fn's IR holds this pointer in a stack slot through the loop
3605 /// and calls `jit_str_buf_extend` per iter. If the pool is
3606 /// empty, allocate fresh.
3607 ///
3608 /// Safety: the returned pointer is valid until
3609 /// `jit_str_buf_release` is called or the Vm is dropped. The
3610 /// caller MUST not retain it across `enter_jit` boundaries.
3611 #[doc(hidden)]
3612 pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3613 let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3614 // Move into a Box so the pointer is stable until release.
3615 Box::into_raw(Box::new(buf))
3616 }
3617
3618 /// P14-S14-B v2 — return a previously-acquired buffer to the
3619 /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3620 /// buffer is `clear`ed (capacity retained) so the next acquire
3621 /// gets a ready-to-extend Vec.
3622 ///
3623 /// Safety: `buf` must have been returned by a prior
3624 /// `jit_str_buf_acquire` on the same Vm.
3625 #[doc(hidden)]
3626 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3627 pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3628 if buf.is_null() {
3629 return;
3630 }
3631 // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3632 let mut owned = unsafe { Box::from_raw(buf) };
3633 owned.clear();
3634 if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3635 self.jit.str_buf_pool.push(*owned);
3636 }
3637 // Else: drop the buffer.
3638 }
3639
3640 /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3641 /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3642 /// the piece slot) and passes it through; we treat it as a
3643 /// `*mut LuaStr` and append its bytes.
3644 ///
3645 /// Returns 0 on success, -1 if the piece isn't a Str (would
3646 /// trip __concat metamethod path → deopt to interp).
3647 ///
3648 /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3649 /// trace's piece slot raw bits.
3650 #[doc(hidden)]
3651 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3652 pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3653 if buf.is_null() || str_ptr == 0 {
3654 return -1;
3655 }
3656 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3657 let buf = unsafe { &mut *buf };
3658 let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3659 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3660 let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3661 buf.extend_from_slice(bytes);
3662 0
3663 }
3664
3665 /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3666 /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3667 /// the trace to write into the accumulator slot.
3668 ///
3669 /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3670 /// (the v2 hard cap; the trace deopts).
3671 ///
3672 /// Safety: `buf` from prior `acquire`. The buffer is left
3673 /// CLEAR (drained) ready for `release`.
3674 #[doc(hidden)]
3675 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3676 pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3677 if buf.is_null() {
3678 return 0;
3679 }
3680 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3681 let buf = unsafe { &mut *buf };
3682 let bytes = std::mem::take(buf);
3683 // v2 hard cap at 256KB per RFC Q3.
3684 if bytes.len() > 256 * 1024 {
3685 return 0;
3686 }
3687 let gc = self.heap.intern(&bytes);
3688 gc.as_ptr() as i64
3689 }
3690
3691 /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3692 ///
3693 /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3694 /// v3: ipairs `inext` fast path at the top — skip begin_call
3695 /// when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3696 /// R[A+2]=Int.
3697 /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3698 /// caller-provided buffers + return R[A+4]'s tag byte. Lets
3699 /// emit skip 3 separate `luna_jit_stack_load` calls and 1
3700 /// `luna_jit_stack_tag` call by reading the buffer via
3701 /// cranelift `stack_load` IR instead. Returns -1 on deopt.
3702 #[doc(hidden)]
3703 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3704 pub fn jit_op_tforcall(
3705 &mut self,
3706 slot_offset: u32,
3707 nvars: i32,
3708 ctrl_out: *mut i64,
3709 key_out: *mut i64,
3710 val_out: *mut i64,
3711 ) -> i64 {
3712 if self.jit.pending_err.is_some() {
3713 return -1;
3714 }
3715 let Some(f) = self.jit_last_lua_frame() else {
3716 self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3717 return -1;
3718 };
3719 let abs = f.base + slot_offset;
3720 let need = (abs + 7) as usize;
3721 if self.stack.len() < need {
3722 self.stack.resize(need, Value::Nil);
3723 }
3724 // v3 fast path.
3725 let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3726 && std::ptr::fn_addr_eq(
3727 n.f,
3728 crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3729 )
3730 && let Value::Table(t) = self.stack[(abs + 1) as usize]
3731 && t.metatable().is_none()
3732 && let Value::Int(i) = self.stack[(abs + 2) as usize]
3733 {
3734 let next_i = i.wrapping_add(1);
3735 let v = t.get_int(next_i);
3736 if v.is_nil() {
3737 self.stack[(abs + 4) as usize] = Value::Nil;
3738 } else {
3739 self.stack[(abs + 4) as usize] = Value::Int(next_i);
3740 if (nvars as usize) >= 2 {
3741 self.stack[(abs + 5) as usize] = v;
3742 }
3743 for j in 2..nvars as usize {
3744 let slot = abs + 4 + j as u32;
3745 if (slot as usize) < self.stack.len() {
3746 self.stack[slot as usize] = Value::Nil;
3747 }
3748 }
3749 }
3750 true
3751 } else {
3752 false
3753 };
3754 if !took_fast_path {
3755 // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3756 // route through begin_call. Lua-closure iters would push
3757 // a Lua frame mid-trace → deopt.
3758 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3759 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3760 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3761 if !matches!(self.stack[abs as usize], Value::Native(_)) {
3762 self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3763 return -1;
3764 }
3765 if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3766 self.jit.pending_err = Some(e);
3767 return -1;
3768 }
3769 }
3770 // v4 batched writeback — fill the caller's buffers with the
3771 // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3772 // reload via cranelift `stack_load` instead of separate
3773 // `luna_jit_stack_load` helper calls.
3774 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3775 let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3776 let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3777 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3778 let key_raw = unsafe { key_rv.zero };
3779 let val_raw = if (nvars as usize) >= 2 {
3780 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3781 unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3782 } else {
3783 0u64
3784 };
3785 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3786 unsafe {
3787 ctrl_out.write(ctrl_raw as i64);
3788 key_out.write(key_raw as i64);
3789 val_out.write(val_raw as i64);
3790 }
3791 key_tag as i64
3792 }
3793
3794 /// P12-S12-B-v2 — load the raw `i64` payload of
3795 /// `vm.stack[base + slot_offset]` for the active trace's head
3796 /// Lua frame. Used to reload trace IR `Variable`s after a
3797 /// helper has written to `vm.stack` directly (e.g. TForCall's
3798 /// iter results land at `R[A+4..A+4+nvars]`).
3799 #[doc(hidden)]
3800 pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3801 let Some(f) = self.jit_last_lua_frame() else {
3802 return 0;
3803 };
3804 let idx = (f.base as usize) + (slot_offset as usize);
3805 if idx >= self.stack.len() {
3806 return 0;
3807 }
3808 let v = self.stack[idx];
3809 let (_, raw) = v.unpack();
3810 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3811 unsafe { raw.zero as i64 }
3812 }
3813
3814 /// P12-S12-B-v2 — read the tag byte of
3815 /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3816 /// to dispatch on the iterator's return-key tag at runtime
3817 /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3818 /// deopt for v2).
3819 #[doc(hidden)]
3820 pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3821 let Some(f) = self.jit_last_lua_frame() else {
3822 return crate::runtime::value::raw::NIL;
3823 };
3824 let idx = (f.base as usize) + (slot_offset as usize);
3825 if idx >= self.stack.len() {
3826 return crate::runtime::value::raw::NIL;
3827 }
3828 self.stack[idx].unpack().0
3829 }
3830
3831 /// P12-S4-step4b — push a Lua frame onto the call stack with
3832 /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3833 /// at trace side-exits to recreate the inlined call activations
3834 /// the lowerer compiled past. The contract (enforced by the
3835 /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3836 /// `nresults` is the caller's expected count (today always 1
3837 /// because the lowerer bails Op::Call C != 2), and the caller
3838 /// has already called `jit_ensure_stack` to cover
3839 /// `[0..base + cl.proto.max_stack)`.
3840 #[doc(hidden)]
3841 pub fn jit_push_inlined_frame(
3842 &mut self,
3843 cl: Gc<LuaClosure>,
3844 base: u32,
3845 pc: u32,
3846 nresults: i32,
3847 ) {
3848 frames_push_sync(
3849 &mut self.frames,
3850 &mut self.frames_top,
3851 CallFrame::Lua(Frame {
3852 closure: cl,
3853 base,
3854 pc,
3855 // Lua call ABI: callee R[0] sits at caller R[A+1], so
3856 // callee.base = caller.base + A + 1; func_slot is
3857 // caller.base + A = callee.base - 1.
3858 func_slot: base - 1,
3859 n_varargs: 0,
3860 nresults,
3861 hook_oldpc: u32::MAX,
3862 from_c: false,
3863 tm: None,
3864 is_hook: false,
3865 tailcalls: 0,
3866 }),
3867 );
3868 }
3869
3870 /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3871 /// should set to `false` so `load`/`loadstring` reject bytecode input
3872 /// (which bypasses parser limits and could exploit verifier gaps).
3873 pub fn set_bytecode_loading(&mut self, enabled: bool) {
3874 self.bytecode_loading = enabled;
3875 }
3876
3877 /// Current bytecode-loading gate state.
3878 pub fn bytecode_loading(&self) -> bool {
3879 self.bytecode_loading
3880 }
3881
3882 /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3883 /// bytecode is a strictly larger trust surface than luna's own dump
3884 /// format (third-party toolchain bugs, malformed chunks, unknown
3885 /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3886 /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3887 pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3888 self.puc_bytecode_loading = enabled;
3889 }
3890
3891 /// Current PUC bytecode-loading gate state.
3892 pub fn puc_bytecode_loading(&self) -> bool {
3893 self.puc_bytecode_loading
3894 }
3895
3896 /// Default loader input budget — 256 MiB.
3897 ///
3898 /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3899 /// sources whose byte length crosses this cap, returning the
3900 /// PUC-shaped `not enough memory` error rather than letting the
3901 /// host allocator try (and crash) to hold the next chunk.
3902 pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3903
3904 /// Set the loader input byte budget (see
3905 /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3906 /// effectively disable. Smaller caps are honored verbatim — a 0
3907 /// cap rejects every non-empty source.
3908 pub fn set_loader_input_budget(&mut self, bytes: usize) {
3909 self.loader_input_budget = bytes;
3910 }
3911
3912 /// Current loader input byte budget.
3913 pub fn loader_input_budget(&self) -> usize {
3914 self.loader_input_budget
3915 }
3916
3917 /// Take the error traceback captured at the latest error point and
3918 /// reset it. Embedders should call this immediately after a failed
3919 /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3920 /// entry clears it. Returns `None` if no error was in flight.
3921 pub fn take_error_traceback(&mut self) -> Option<String> {
3922 self.error_traceback
3923 .take()
3924 .map(|b| String::from_utf8_lossy(&b).into_owned())
3925 }
3926
3927 /// Arm the soft memory cap (P09 embedding). The run loop checks the
3928 /// heap's tracked byte usage between dispatch turns; on overshoot it
3929 /// first runs a full collect, and if `bytes` still exceeds the cap it
3930 /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3931 /// itself (fire-once: re-arm before the next `call_value` if reusing
3932 /// the Vm across requests). `None` removes the cap. The accounting is
3933 /// approximate — internal Vec/Box capacity overhead is not tracked,
3934 /// so embedders should size the cap with ~2× margin over the desired
3935 /// hard limit and additionally bound the Vm's lifetime (drop after
3936 /// each request).
3937 pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3938 self.heap.mem_cap = cap;
3939 }
3940
3941 /// Approximate bytes the heap is currently holding. Object shells plus
3942 /// every table's internal array/hash boxes (tracked via
3943 /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3944 /// bytecode and closure upvalue slices still go uncounted — this is a
3945 /// lower bound, not a precise `malloc_stats`-style total.
3946 pub fn memory_used(&self) -> usize {
3947 self.heap.bytes()
3948 }
3949
3950 /// Read upvalue slot `i` of the native function currently on top of the
3951 /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3952 /// when no native is running. Public so the C ABI trampoline can fetch
3953 /// the host C function pointer it stashed there at registration time.
3954 pub fn running_native_upvalue(&self, i: usize) -> Value {
3955 match self.running_natives.last() {
3956 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3957 Some(nc) => unsafe {
3958 let upvals = &(*nc.as_ptr()).upvals;
3959 upvals.get(i).copied().unwrap_or(Value::Nil)
3960 },
3961 None => Value::Nil,
3962 }
3963 }
3964
3965 /// Register a table for finalization if its (just-set) metatable carries a
3966 /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3967 /// `__gc` to the metatable afterwards does not retroactively register).
3968 pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3969 if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3970 self.heap.register_finalizable(t);
3971 }
3972 }
3973
3974 /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3975 /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3976 /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3977 /// metatable that `newproxy` returned, which then needs to flow through.
3978 /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3979 /// would re-check at metatable-set time); luna's only userdata
3980 /// finalizables today come via `newproxy`, which registers itself.
3981 #[allow(dead_code)]
3982 pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3983 if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3984 self.heap.register_finalizable_userdata(u);
3985 }
3986 }
3987
3988 /// Run pending `__gc` finalizers (objects the collector resurrected for
3989 /// finalization). Finalizer errors are swallowed — PUC turns them into a
3990 /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3991 fn run_finalizers(&mut self) {
3992 let _ = self.run_finalizers_or_err();
3993 }
3994
3995 fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3996 if self.gc_finalizing {
3997 return Ok(());
3998 }
3999 let pending = self.heap.take_tobefnz();
4000 if pending.is_empty() {
4001 return Ok(());
4002 }
4003 self.gc_finalizing = true;
4004 let mut first_err: Option<LuaError> = None;
4005 for obj in pending {
4006 let gc = self.get_mm(obj, Mm::Gc);
4007 // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
4008 // schedule the object for finalization (`__gc = true` is the
4009 // canonical placeholder); only call it at finalize time when it
4010 // is actually a function. gc.lua 5.2 :412 wires up exactly this
4011 // sentinel and then expects no call.
4012 let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
4013 if callable {
4014 // PUC `GCTM` sets `CIST_FIN` on the new ci so
4015 // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
4016 // `name = "__gc"`. luna threads the same outcome through the
4017 // generic `pending_tm` slot: the Lua frame born from this
4018 // call consumes it in `push_frame`. Saved/restored around the
4019 // call in case the handler is a native (which never pops it).
4020 // Bare event name; `frame_name` / `c_frame_name` add the
4021 // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
4022 // the convention used by `__close`, `__index`, …
4023 let saved_tm = self.pending_tm.replace("gc");
4024 // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
4025 // pcall, so `getinfo(2).namewhat` inside the finalizer reads
4026 // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
4027 // luna mirrors by temporarily tagging the current top Lua
4028 // frame's `tm` to "__gc" for the duration of the call.
4029 let caller_tm_idx = self
4030 .frames
4031 .iter()
4032 .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
4033 let saved_caller_tm = caller_tm_idx.and_then(|i| {
4034 if let CallFrame::Lua(fr) = &mut self.frames[i] {
4035 let prev = fr.tm;
4036 fr.tm = Some("gc");
4037 Some(prev)
4038 } else {
4039 None
4040 }
4041 });
4042 if let Err(e) = self.call_value(gc, &[obj]) {
4043 // PUC 5.1 GCTM raised the finalizer's error to the
4044 // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
4045 // baselines on `not pcall(collectgarbage)`). 5.2/5.3
4046 // wrapped it in `error in __gc metamethod (msg)` first
4047 // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
4048 // introduced the warning system and switched to "warn
4049 // then continue" — never re-raise, just route the
4050 // wrapped message through `warn`. gc.lua 5.5 :378 wires
4051 // up `_WARN` capture under the `if T then …` block to
4052 // baseline on the same wrapped string.
4053 if self.version >= LuaVersion::Lua54 {
4054 let inner = self.error_text(&e);
4055 let msg = format!("error in __gc metamethod ({inner})");
4056 self.emit_warn(msg.as_bytes(), false);
4057 } else if first_err.is_none() {
4058 let wrapped = if self.version >= LuaVersion::Lua52 {
4059 let inner = self.error_text(&e);
4060 let msg = format!("error in __gc metamethod ({inner})");
4061 let s = Value::Str(self.heap.intern(msg.as_bytes()));
4062 LuaError(s)
4063 } else {
4064 e
4065 };
4066 first_err = Some(wrapped);
4067 }
4068 }
4069 self.pending_tm = saved_tm;
4070 if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
4071 && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
4072 {
4073 fr.tm = prev; // prev is Option<&'static str>; restore exactly
4074 }
4075 }
4076 }
4077 self.gc_finalizing = false;
4078 match first_err {
4079 Some(e) => Err(e),
4080 None => Ok(()),
4081 }
4082 }
4083
4084 /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
4085 /// Crosses up to three phases per call:
4086 /// 1. Pause → seed Propagate (`gc_start_propagate`)
4087 /// 2. Propagate → drain gray up to `budget`; on exhaustion run atomic
4088 /// (`gc_finish_atomic` → tobefnz populated; finalizers
4089 /// run via `run_finalizers`) and enter Sweep
4090 /// 3. Sweep → `gc_sweep_step` up to (residual) `budget`
4091 /// Returns true when this call completed the cycle's sweep (back to
4092 /// Pause). The budget is spent generously across phases — a large `n`
4093 /// can finish a whole cycle in one call (PUC stop-the-world step).
4094 pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
4095 // Re-entry guard: never recurse — `run_finalizers` calls Lua code
4096 // that may hit a safe point and try to step again. Re-entry was OK
4097 // under STW (collect_garbage had its own guard) but here the
4098 // intermediate phase state would corrupt.
4099 if self.gc_finalizing {
4100 return false;
4101 }
4102 if self.heap.gc_phase_is_pause() {
4103 let (roots, extra) = self.gc_roots();
4104 self.heap.gc_start_propagate(&roots, &extra);
4105 }
4106 if self.heap.gc_phase_is_propagate() {
4107 if !self.heap.gc_step_propagate(budget) {
4108 return false;
4109 }
4110 self.heap.gc_finish_atomic();
4111 // any __gc scheduled by atomic — run before sweep so a finalizer
4112 // re-registering `self` re-enters the next cycle, not this sweep
4113 self.run_finalizers();
4114 }
4115 // either we just transitioned, or we entered already in Sweep, or
4116 // a finalizer started a new cycle (gc_sweep_step is a no-op then)
4117 self.heap.gc_sweep_step(budget)
4118 }
4119
4120 // ---- frames & calls ----
4121
4122 /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
4123 /// Returns true if a Lua frame was pushed (the dispatch loop continues
4124 /// there), false if a native completed inline.
4125 fn begin_call(
4126 &mut self,
4127 func_slot: u32,
4128 nargs: Option<u32>,
4129 nresults: i32,
4130 from_c: bool,
4131 ) -> Result<bool, LuaError> {
4132 let mut nargs = match nargs {
4133 Some(n) => n,
4134 None => self.top - (func_slot + 1),
4135 };
4136 // Consume `pending_is_tail` at the boundary: a tail-call op sets it
4137 // only for the immediately-following Lua activation. Native dispatch
4138 // (or `__call` resolution) below must not let it leak to the next
4139 // begin_call's frame; restore it just before push_frame for the Lua
4140 // arm so its meaning is preserved across __call chaining.
4141 let tailcalls = std::mem::take(&mut self.pending_tailcalls);
4142 // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
4143 // is inserted before the value so it becomes the first argument, and a
4144 // chain of `__call` tables resolves down to a real function.
4145 let mut chain = 0u32;
4146 loop {
4147 match self.stack[func_slot as usize] {
4148 Value::Closure(cl) => {
4149 // P11-S2c.B JIT fast path: if the Proto's body fits
4150 // the int-arith whitelist, every arg is `Value::Int`,
4151 // and the cached arity matches, skip frame setup and
4152 // run the cached native fn in-place.
4153 if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
4154 self.pending_tailcalls = tailcalls;
4155 return Ok(false);
4156 }
4157 self.pending_tailcalls = tailcalls;
4158 self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4159 // P12-S4-step0 — trace-on-call trigger. The frame
4160 // we just pushed is the callee whose body the
4161 // recorder will trace. Bump the per-Proto call
4162 // counter; once it crosses `CALL_HOT_THRESHOLD`
4163 // and no other trace is in flight, snapshot the
4164 // callee's register window (R[0..max_stack]) and
4165 // begin recording at `pc=0`. This is what unlocks
4166 // tracing for functions whose body has no negative
4167 // `Op::Jmp` back-edge (`fib`, recursive helpers).
4168 //
4169 // Gated on `trace_jit_enabled`, so the default
4170 // dispatch pays a single not-taken branch.
4171 if self.jit.trace_enabled {
4172 let proto = cl.proto;
4173 let c = proto.call_hot_count.get();
4174 if c < u32::MAX / 2 {
4175 proto.call_hot_count.set(c + 1);
4176 }
4177 // P13-S13-H — relaxed call-trigger:
4178 // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4179 // `!already_cached` short-circuit. Lets a
4180 // discarded short call-trigger close retry
4181 // on the next call (fib(10/15/20/25)
4182 // pathology — first capture is base-case
4183 // [Lt,Jmp,Return1]; coverage-heuristic
4184 // discards; next call gets to record at a
4185 // potentially deeper recursion point).
4186 // Without `already_cached`, the relaxed
4187 // condition would re-record over a cached
4188 // trace every call.
4189 //
4190 // P13-S13-K — additionally short-circuit on
4191 // `proto.trace_gave_up`. The S13-I discard
4192 // cap force-compiles a partial trace and
4193 // flips this flag; subsequent calls into
4194 // this Proto skip the RefCell borrow + Vec
4195 // scan entirely.
4196 if proto.trace_gave_up.get() {
4197 return Ok(true);
4198 }
4199 let call_already_cached =
4200 proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4201 if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4202 && self.jit.active_trace.is_none()
4203 && !call_already_cached
4204 {
4205 // The new frame is on top: index in
4206 // `self.frames` is `len() - 1`.
4207 let frame_idx = self.frames.len() - 1;
4208 // Snapshot R[0..max_stack] at the callee's
4209 // base. `push_frame` resized `self.stack`
4210 // to `base + max_stack`, so this window is
4211 // guaranteed in-bounds.
4212 let f = match &self.frames[frame_idx] {
4213 CallFrame::Lua(f) => f,
4214 _ => unreachable!("push_frame just pushed a Lua frame"),
4215 };
4216 let max_stack = cl.proto.max_stack as usize;
4217 let base_us = f.base as usize;
4218 let mut entry_tags = Vec::with_capacity(max_stack);
4219 for i in 0..max_stack {
4220 let (tag, _) = self.stack[base_us + i].unpack();
4221 entry_tags.push(tag);
4222 }
4223 self.jit.active_trace =
4224 Some(Box::new(crate::jit::trace::TraceRecord::start(
4225 cl.proto, 0, entry_tags, true,
4226 )));
4227 self.jit.recording_frame_base = frame_idx;
4228 }
4229 }
4230 return Ok(true);
4231 }
4232 Value::Native(nc) => {
4233 // v1.1 B10 Stage 2 — async-marked NativeClosure.
4234 // Route through the cooperative-yield mechanism
4235 // when async_mode is on; reject when called from
4236 // a sync `eval`/`call_value` path (would have no
4237 // executor to drive the returned future).
4238 if nc.is_async {
4239 if !self.async_mode {
4240 let s = Value::Str(
4241 self.heap.intern(b"async native called in sync context"),
4242 );
4243 self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4244 return Err(LuaError(s));
4245 }
4246 // Same root-up bookkeeping as the sync path:
4247 // pin args + result-count expectation so a
4248 // collection across the suspend boundary
4249 // keeps the arg window live.
4250 self.native_nresults = nresults;
4251 self.gc_top = func_slot + nargs + 1;
4252 // v1.3 Phase AS — fire the "call" hook BEFORE
4253 // building the future. Mirrors the sync native
4254 // path's `hook_call(true, nargs)` site
4255 // (`exec.rs` further down) so embedders with a
4256 // Rust debug hook installed see a Call event
4257 // for async natives identical to the sync
4258 // path. The matching "return" hook fires from
4259 // `commit_async_native_result` in
4260 // `async_drive.rs` after the future resolves.
4261 // Placement follows audit §"Open questions"
4262 // Q6: after the `native_nresults` / `gc_top`
4263 // pin, before the future is constructed, so a
4264 // hook body that triggers GC observes the
4265 // correct pinned window. On hook error the
4266 // sentinel never returns and
4267 // `pending_async_native_*` remain `None` —
4268 // the executor sees `DispatchOutcome::Error`
4269 // (audit §A.1 edge cases).
4270 self.hook_call(true, nargs)?;
4271 // Transmute the stored NativeFn back to its
4272 // real AsyncNativeFn shape. Sound because
4273 // `set_async_native` / `create_async_native`
4274 // installed an AsyncNativeFn through the
4275 // identically-sized fn-pointer slot, and the
4276 // `is_async` marker bit is what records that
4277 // fact.
4278 let async_fn: crate::vm::async_drive::AsyncNativeFn =
4279 // SAFETY: same-size fn pointers; provenance
4280 // preserved through `mem::transmute`. The
4281 // `is_async` marker is the only safe-to-call
4282 // gate, set exclusively by
4283 // `Vm::create_async_native`.
4284 unsafe { std::mem::transmute(nc.f) };
4285 let vm_ptr: *mut Vm = self;
4286 let fut = async_fn(vm_ptr, func_slot, nargs);
4287 // Stash the future + post-call context for
4288 // `drive_one` to surface to `EvalFuture::poll`.
4289 self.pending_async_native_fut = Some(fut);
4290 self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4291 func_slot,
4292 nargs,
4293 nresults,
4294 gc_top: self.gc_top,
4295 });
4296 // Sentinel Err walked up to `drive_one` (same
4297 // shape as `host_yield_pending`'s budget yield).
4298 // Value::Nil — never seen by user code.
4299 return Err(LuaError(Value::Nil));
4300 }
4301 // pcall/xpcall are yieldable: rather than calling the
4302 // protected function through the Rust stack (which cannot be
4303 // suspended), push a continuation frame and drive the call
4304 // through the interpreter loop (PUC lua_pcallk). A yield
4305 // inside it is preserved with the thread's saved frames.
4306 use crate::runtime::value::NativeFn;
4307 if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4308 return self.begin_pcall(func_slot, nargs, nresults);
4309 }
4310 if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4311 return self.begin_xpcall(func_slot, nargs, nresults);
4312 }
4313 // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4314 // luaB_pairs); without one, fall through to the plain native.
4315 if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4316 let arg = self.stack[(func_slot + 1) as usize];
4317 if !self.get_mm(arg, Mm::Pairs).is_nil() {
4318 return self.begin_pairs(func_slot, nresults);
4319 }
4320 }
4321 // a native that collects (e.g. `collectgarbage`) roots up to
4322 // its own arguments — the caller's live registers all sit
4323 // below `func_slot` and stay rooted.
4324 self.native_nresults = nresults;
4325 self.gc_top = func_slot + nargs + 1;
4326 // Push the native onto the running-natives chain BEFORE
4327 // firing the call hook so that `debug.getinfo(level)` and
4328 // `arg_error` from inside the hook see this native as the
4329 // currently-running C function (db.lua :344 reads
4330 // `getinfo(2, "f").func` for the just-entered callee).
4331 // Popped after the matching return hook fires — even on
4332 // error, the pop must happen, so the body is bracketed
4333 // through a scope guard.
4334 self.running_natives.push(nc);
4335 self.running_native_slots.push((func_slot, nargs));
4336 // PUC C-call discipline: entering a C function sets
4337 // L->top to func + 1 + nargs, so a collect triggered
4338 // INSIDE the native (explicit `collectgarbage()`, or
4339 // an allocation crossing the GC threshold) roots the
4340 // whole caller window up to and including the
4341 // arguments. Without this raise the cursor is stale —
4342 // parked at some earlier, possibly much lower
4343 // safe-point — and the collect frees register-held
4344 // values of the native's own caller (UAF-C, v2.13
4345 // Track WUC). Never lower it: a re-entrant chain
4346 // (native → Lua → native) must keep the outermost
4347 // window rooted.
4348 self.gc_top = self.gc_top.max(func_slot + 1 + nargs);
4349 // PUC luaD_precall fires the "call" hook for C functions too.
4350 // A yield inside the native (coroutine.yield) propagates an
4351 // Err and the matching "return" hook fires on resume instead.
4352 if let Err(e) = self.hook_call(true, nargs) {
4353 self.running_natives.pop();
4354 self.running_native_slots.pop();
4355 return Err(e);
4356 }
4357 // P09: trap a Rust panic in the native and surface it as
4358 // a Lua error rather than letting it unwind through the
4359 // VM into the embedder. The VM's internal state may still
4360 // be inconsistent after a panic (half-pushed args,
4361 // dangling GC references), so embedders that catch this
4362 // class of error should drop and re-create the Vm — but
4363 // it's still better than tearing the host process down.
4364 // `AssertUnwindSafe` is sound because the caller is the
4365 // dispatch loop and any half-done state is fenced behind
4366 // the immediate Err return below.
4367 use std::panic::{AssertUnwindSafe, catch_unwind};
4368 let result =
4369 match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4370 Ok(r) => r,
4371 Err(payload) => {
4372 let msg = panic_payload_str(&payload);
4373 let s = Value::Str(
4374 self.heap.intern(format!("native panic: {msg}").as_bytes()),
4375 );
4376 Err(LuaError(s))
4377 }
4378 };
4379 let nret = match result {
4380 Ok(n) => n,
4381 Err(e) => {
4382 // Stash the offending native's name BEFORE the
4383 // pop so a dying coroutine's traceback snapshot
4384 // can prepend `[C]: in function '<name>'`. Use
4385 // pushglobalfuncname (PUC walks package.loaded
4386 // to qualify); fall back to "?".
4387 self.errored_native =
4388 Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4389 self.running_natives.pop();
4390 self.running_native_slots.pop();
4391 return Err(e);
4392 }
4393 };
4394 // PUC `luaD_poscall` fires the return hook BEFORE moving
4395 // results into the function's slot — at that point args
4396 // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4397 // results above them at `[func_slot + 1 + nargs, …)`.
4398 // luna's `nat_return` has already written the results
4399 // into `[func_slot, func_slot + nret)`, so we replay PUC's
4400 // layout by copying the results up past the preserved
4401 // args, firing the hook (with ftransfer = nargs + 1, so
4402 // `getlocal(2, ftransfer..)` reads results), and then
4403 // copying back for `finish_results`. db.lua :541 reads
4404 // `getinfo("r").ftransfer` + `getlocal` to inspect a
4405 // returning native's results this way.
4406 if self.hook.ret
4407 && !self.in_hook
4408 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4409 {
4410 let res_dst = func_slot + nargs + 1;
4411 let need = (res_dst + nret) as usize;
4412 if self.stack.len() < need {
4413 self.stack.resize(need, Value::Nil);
4414 }
4415 for i in (0..nret).rev() {
4416 self.stack[(res_dst + i) as usize] =
4417 self.stack[(func_slot + i) as usize];
4418 }
4419 // widen the C-frame's argument window for getlocal
4420 if let Some(slot) = self.running_native_slots.last_mut() {
4421 slot.1 = nargs + nret;
4422 }
4423 let hr = self.hook_return(true, nargs + 1, nret);
4424 if let Some(slot) = self.running_native_slots.last_mut() {
4425 slot.1 = nargs;
4426 }
4427 // restore results into the slot finish_results expects
4428 for i in 0..nret {
4429 self.stack[(func_slot + i) as usize] =
4430 self.stack[(res_dst + i) as usize];
4431 }
4432 self.running_natives.pop();
4433 self.running_native_slots.pop();
4434 hr?;
4435 } else {
4436 self.running_natives.pop();
4437 self.running_native_slots.pop();
4438 }
4439 self.finish_results(func_slot, nret, nresults);
4440 // the native may have allocated; collect with the results as
4441 // the live boundary (PUC checks GC after a call returns).
4442 self.maybe_collect_garbage(self.top);
4443 return Ok(false);
4444 }
4445 v => {
4446 let mm = self.get_mm(v, Mm::Call);
4447 if mm.is_nil() {
4448 return Err(self.call_err(v));
4449 }
4450 chain += 1;
4451 // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4452 // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4453 // and the 5.5 test exercises the new tight bound directly
4454 // (calls.lua :225 builds a 16-deep chain and expects the
4455 // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4456 // deep chain and expects it to succeed.
4457 let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4458 15
4459 } else {
4460 MAX_CCMT
4461 };
4462 if chain > cap {
4463 return Err(self.rt_err("'__call' chain too long"));
4464 }
4465 // slots above shift by one; at a call site those are dead
4466 // temps of the current frame
4467 self.stack.insert(func_slot as usize, mm);
4468 if self.top > func_slot {
4469 self.top += 1;
4470 }
4471 nargs += 1;
4472 }
4473 }
4474 }
4475 }
4476
4477 fn push_frame(
4478 &mut self,
4479 cl: Gc<LuaClosure>,
4480 func_slot: u32,
4481 nargs: u32,
4482 nresults: i32,
4483 from_c: bool,
4484 ) -> Result<(), LuaError> {
4485 if func_slot + 256 > MAX_LUA_STACK {
4486 // PUC `stackerror`: a stack overflow that surfaces while the
4487 // current activation is inside an xpcall message handler is
4488 // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4489 // error handling". errors.lua :606 expects the inner pcall(loop)
4490 // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4491 // message matching "error handling".
4492 let msg = if self.msgh_depth > 0 {
4493 "error in error handling"
4494 } else {
4495 "stack overflow"
4496 };
4497 return Err(self.rt_err(msg));
4498 }
4499 let proto = cl.proto;
4500 let nparams = proto.num_params as u32;
4501 // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4502 // stack just below the new `base`, so a named vararg can be indexed
4503 // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4504 // `[e1..em][p1..pn]` so the fixed params land at the new base.
4505 let n_varargs = if proto.is_vararg {
4506 nargs.saturating_sub(nparams)
4507 } else {
4508 0
4509 };
4510 if n_varargs > 0 {
4511 let s = (func_slot + 1) as usize;
4512 self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4513 }
4514 let base = func_slot + 1 + n_varargs;
4515 let need = (base + proto.max_stack as u32) as usize;
4516 if self.stack.len() < need {
4517 self.stack.resize(need, Value::Nil);
4518 }
4519 // wipe the register window beyond the kept parameters (stale values —
4520 // required for GC-safety and codegen). The varargs below `base` survive.
4521 let kept = nargs.saturating_sub(n_varargs).min(nparams);
4522 // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4523 // need` since `base + nparams <= base + max_stack = need` and `kept <=
4524 // nparams`. `slice::fill` lowers to a single memset on Copy types.
4525 unsafe {
4526 self.stack
4527 .get_unchecked_mut((base + kept) as usize..need)
4528 .fill(Value::Nil);
4529 }
4530 frames_push_sync(
4531 &mut self.frames,
4532 &mut self.frames_top,
4533 CallFrame::Lua(Frame {
4534 closure: cl,
4535 base,
4536 pc: 0,
4537 func_slot,
4538 nresults,
4539 hook_oldpc: u32::MAX,
4540 from_c,
4541 n_varargs,
4542 // single-shot consume: `close_slots` sets pending_tm before each
4543 // handler call; the next Lua frame born is that handler's.
4544 tm: self.pending_tm.take(),
4545 // `run_hook` sets `pending_is_hook` before dispatching the user
4546 // hook so its frame reports `namewhat = "hook"` via getinfo.
4547 is_hook: std::mem::take(&mut self.pending_is_hook),
4548 tailcalls: std::mem::take(&mut self.pending_tailcalls),
4549 }),
4550 );
4551 // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4552 // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4553 // the slot at `base + nparams`; the extras sit just below `base` from
4554 // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4555 // hook; vararg.lua's contradictory expectations were already going to
4556 // fail either way (some asserts want `arg == nil`).
4557 if proto.has_compat_vararg_arg {
4558 let arg_slot = (base + nparams) as usize;
4559 let t = self.heap.new_table();
4560 {
4561 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4562 let tm = unsafe { t.as_mut() };
4563 for i in 0..n_varargs {
4564 let v = self.stack[(base - n_varargs + i) as usize];
4565 // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4566 // below `MAX_ASIZE`
4567 let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4568 }
4569 let nk = Value::Str(self.heap.intern(b"n"));
4570 tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4571 .expect("'n' key");
4572 }
4573 // once-per-table barrier mirrors SETLIST: t is born BLACK during
4574 // Propagate and the bulk `set_int`/`set` calls above don't barrier
4575 self.heap
4576 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4577 self.stack[arg_slot] = Value::Table(t);
4578 }
4579 // PUC luaD_precall fires the "call" hook with the new frame current, so
4580 // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4581 // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4582 // fixed params count — extras already live below `base`).
4583 // A frame born via OP_TailCall fires "tail call" instead (PUC
4584 // luaD_pretailcall) and skips the matching "return" hook on exit.
4585 let is_tail = self
4586 .frames
4587 .last()
4588 .and_then(|f| f.lua())
4589 .is_some_and(|f| f.tailcalls > 0);
4590 self.hook_call_with(false, nparams, is_tail)?;
4591 Ok(())
4592 }
4593
4594 /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4595 /// the protected call `f` through the interpreter loop. The protected
4596 /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4597 /// at `func_slot+1` lets its results land one slot above the continuation —
4598 /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4599 /// Always returns `Ok(true)`: a continuation is now on the stack to be
4600 /// resolved by the loop (even when `f` is a native that already ran inline).
4601 fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4602 if nargs == 0 {
4603 return Err(crate::vm::builtins::raise_str(
4604 self,
4605 "bad argument #1 to 'pcall' (value expected)",
4606 ));
4607 }
4608 if self.pcall_depth >= MAX_C_DEPTH {
4609 return Err(self.rt_err("C stack overflow"));
4610 }
4611 self.pcall_depth += 1;
4612 frames_push_sync(
4613 &mut self.frames,
4614 &mut self.frames_top,
4615 CallFrame::Cont(NativeCont {
4616 kind: ContKind::Pcall,
4617 func_slot,
4618 nresults,
4619 }),
4620 );
4621 // call f (slot func_slot+1) with the remaining args, asking for all
4622 // results; a yield or error inside propagates with the continuation kept
4623 // on the stack (caught by `unwind` / preserved across a yield).
4624 self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4625 Ok(true)
4626 }
4627
4628 /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4629 /// message handler is stashed in the continuation and the arguments are
4630 /// shifted down over the handler's slot so `f`'s args are contiguous.
4631 fn begin_xpcall(
4632 &mut self,
4633 func_slot: u32,
4634 nargs: u32,
4635 nresults: i32,
4636 ) -> Result<bool, LuaError> {
4637 if nargs < 2 {
4638 return Err(crate::vm::builtins::raise_str(
4639 self,
4640 "bad argument #2 to 'xpcall' (value expected)",
4641 ));
4642 }
4643 if self.pcall_depth >= MAX_C_DEPTH {
4644 return Err(self.rt_err("C stack overflow"));
4645 }
4646 self.pcall_depth += 1;
4647 // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4648 // close its gap so f's args become [f@+1, a1@+2, ...].
4649 let handler = self.stack[(func_slot + 2) as usize];
4650 // 5.1: `xpcall (f, err)` takes exactly two parameters — extra
4651 // arguments are NOT forwarded to `f` (5.2 added forwarding;
4652 // 5.1 calls f with zero args). v2.14 dialect fixture 5.1/519.
4653 let nfargs = if self.version <= crate::version::LuaVersion::Lua51 {
4654 0
4655 } else {
4656 nargs - 2
4657 };
4658 for i in 0..nfargs {
4659 self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4660 }
4661 self.top = func_slot + 2 + nfargs;
4662 frames_push_sync(
4663 &mut self.frames,
4664 &mut self.frames_top,
4665 CallFrame::Cont(NativeCont {
4666 kind: ContKind::Xpcall { handler },
4667 func_slot,
4668 nresults,
4669 }),
4670 );
4671 self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4672 Ok(true)
4673 }
4674
4675 /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4676 /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4677 /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4678 /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4679 /// results land exactly where `pairs`'s results belong.
4680 fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4681 let arg = self.stack[(func_slot + 1) as usize];
4682 let mm = self.get_mm(arg, Mm::Pairs);
4683 // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4684 self.stack[func_slot as usize] = mm;
4685 self.top = func_slot + 2;
4686 frames_push_sync(
4687 &mut self.frames,
4688 &mut self.frames_top,
4689 CallFrame::Cont(NativeCont {
4690 kind: ContKind::Pairs,
4691 func_slot,
4692 nresults,
4693 }),
4694 );
4695 self.begin_call(func_slot, Some(1), 4, true)?;
4696 Ok(true)
4697 }
4698
4699 /// The running (top) Lua frame. The interpreter only reads this while a Lua
4700 /// frame is on top — a continuation frame is never the running frame (it is
4701 /// consumed the instant the call it protects unwinds onto it).
4702 #[inline]
4703 fn top_frame(&self) -> &Frame {
4704 self.frames
4705 .last()
4706 .and_then(CallFrame::lua)
4707 .expect("running Lua frame")
4708 }
4709
4710 #[inline]
4711 fn top_frame_mut(&mut self) -> &mut Frame {
4712 self.frames
4713 .last_mut()
4714 .and_then(CallFrame::lua_mut)
4715 .expect("running Lua frame")
4716 }
4717
4718 /// Pad/announce results sitting at func_slot.
4719 pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4720 // v2.3 P1B-A: capture the call's high-water-mark before
4721 // setting the new top so we can Nil-clear slots that the
4722 // call temporarily wrote but no longer holds — matching
4723 // PUC's `L->top` discipline (slots past L->top are "free"
4724 // and the next push overwrites them). Without this clear,
4725 // a stale `Value::Closure` (e.g. the called function
4726 // itself, when wanted = 0) sits at `func_slot` and a
4727 // later GC with wider `gc_top` traces it after the
4728 // closure has been freed by a previous narrow safe-point
4729 // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4730 // sort.lua AA case).
4731 let prev_top = self.top as usize;
4732 if wanted < 0 {
4733 self.top = func_slot + nret;
4734 } else {
4735 let wanted = wanted as u32;
4736 let need = (func_slot + wanted) as usize;
4737 if self.stack.len() < need {
4738 self.stack.resize(need, Value::Nil);
4739 }
4740 for i in nret..wanted {
4741 self.stack[(func_slot + i) as usize] = Value::Nil;
4742 }
4743 self.top = func_slot + wanted;
4744 }
4745 let new_top = self.top as usize;
4746 let clear_end = prev_top.min(self.stack.len());
4747 if new_top < clear_end {
4748 for slot in &mut self.stack[new_top..clear_end] {
4749 *slot = Value::Nil;
4750 }
4751 }
4752 }
4753
4754 /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4755 /// Used by `EvalFuture` on the bootstrap poll to compute the
4756 /// `entry_depth` it will pass to subsequent resume slices.
4757 pub(crate) fn frame_count(&self) -> usize {
4758 self.frames.len()
4759 }
4760
4761 fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4762 let nret = self.top - func_slot;
4763 let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4764 self.stack.truncate(func_slot as usize);
4765 self.top = func_slot;
4766 out
4767 }
4768
4769 // ---- open upvalues ----
4770
4771 #[doc(hidden)]
4772 pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4773 match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4774 Ok(i) => self.open_upvals[i].1,
4775 Err(i) => {
4776 let uv = self.heap.new_upvalue(UpvalState::Open {
4777 slot,
4778 thread: self.current,
4779 });
4780 self.open_upvals.insert(i, (slot, uv));
4781 uv
4782 }
4783 }
4784 }
4785
4786 pub(crate) fn close_from(&mut self, slot: u32) {
4787 while let Some(&(s, uv)) = self.open_upvals.last() {
4788 if s < slot {
4789 break;
4790 }
4791 let v = self.stack[s as usize];
4792 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4793 unsafe { uv.as_mut() }.set_closed(v);
4794 self.heap
4795 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4796 self.open_upvals.pop();
4797 }
4798 }
4799
4800 /// Register a to-be-closed slot (TBC op / generic-for closing value).
4801 fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4802 let v = self.stack[slot as usize];
4803 if matches!(v, Value::Nil | Value::Bool(false)) {
4804 return Ok(()); // nil and false are silently ignored
4805 }
4806 if self.get_mm(v, Mm::Close).is_nil() {
4807 // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4808 // (a <type> value)"; the local's name comes from the running
4809 // frame's locvars at this pc.
4810 let tn = v.type_name();
4811 let f = self.top_frame();
4812 let reg = slot - f.base;
4813 let pc = (f.pc as usize).saturating_sub(1);
4814 let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4815 Some(n) => format!("variable '{n}'"),
4816 None => "to-be-closed slot".to_string(),
4817 };
4818 return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4819 }
4820 debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4821 self.tbc.push(slot);
4822 Ok(())
4823 }
4824
4825 /// Close upvalues and run `__close` handlers for slots ≥ `from`
4826 /// (handlers in reverse registration order; PUC luaF_close).
4827 fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4828 self.close_from(from);
4829 // PUC: handlers run in reverse declaration order; an error raised by a
4830 // handler becomes the error object passed to the remaining ones, and
4831 // the rest are still closed. The last raised error propagates.
4832 let mut pending = err;
4833 let mut result = Ok(());
4834 let saved_err = self.closing_err;
4835 // On a normal close the handler runs within the closing function's
4836 // activation (debug parent = that function); during error unwinding the
4837 // function's frame is already gone, so the handler sits at the C
4838 // boundary instead (PUC: luaF_close runs after the ci is restored).
4839 let error_close = err.is_some();
4840 while let Some(&s) = self.tbc.last() {
4841 if s < from {
4842 break;
4843 }
4844 self.tbc.pop();
4845 let v = self.stack[s as usize];
4846 if matches!(v, Value::Nil | Value::Bool(false)) {
4847 continue;
4848 }
4849 let mm = self.get_mm(v, Mm::Close);
4850 if mm.is_nil() {
4851 // PUC `prepclosingmethod`: the __close metamethod was present
4852 // at OP_TBC (else we would have errored there) but has since
4853 // been removed/replaced. Treat as a non-callable target.
4854 let tn = self.obj_typename(v);
4855 let e = self.rt_err(&format!(
4856 "attempt to call a {tn} value (metamethod 'close')"
4857 ));
4858 pending = Some(e.0);
4859 result = Err(e);
4860 continue;
4861 }
4862 // root the pending error: a handler may trigger a collection
4863 self.closing_err = pending;
4864 // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4865 // getinfo report the handler as "in metamethod 'close'". Saved/
4866 // restored around the call to cover the path where `mm` is a
4867 // native (`push_frame` never consumes it) or it raises before
4868 // reaching push_frame.
4869 let saved_tm = self.pending_tm.replace("close");
4870 // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4871 // is nil on a normal close (5.4 locals.lua :875's
4872 // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4873 // through the yield). PUC 5.5 dropped the trailing nil: a clean
4874 // close passes only `obj`, the error case still passes both
4875 // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4876 // normal-close arms, n=2 for the error arm).
4877 let call = match pending {
4878 Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4879 None => {
4880 if self.version >= LuaVersion::Lua55 {
4881 self.call_value_impl(mm, &[v], error_close)
4882 } else {
4883 self.call_value_impl(mm, &[v, Value::Nil], error_close)
4884 }
4885 }
4886 };
4887 self.pending_tm = saved_tm;
4888 if let Err(e) = call {
4889 pending = Some(e.0);
4890 result = Err(e);
4891 }
4892 }
4893 self.closing_err = saved_err;
4894 result
4895 }
4896
4897 /// Yieldable variant of `close_slots`: drive the chain of `__close`
4898 /// handlers for slots ≥ `from` through the interpreter loop with a
4899 /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4900 /// suspends cleanly (the close iteration's state rides on the thread's
4901 /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4902 /// applied to `luaF_close`. `after` runs when every slot is closed; if
4903 /// `after` is `Return` and we've returned past `entry_depth`,
4904 /// `Ok(Some(vals))` carries the result up to the host caller.
4905 fn begin_close(
4906 &mut self,
4907 from: u32,
4908 err: Option<Value>,
4909 after: AfterClose,
4910 entry_depth: usize,
4911 ) -> Result<Option<Vec<Value>>, LuaError> {
4912 self.close_from(from);
4913 self.drive_close(from, err, after, entry_depth)
4914 }
4915
4916 /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4917 /// non-callable-mm error for an `__close` that was reset to a bad value
4918 /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4919 /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4920 /// the interpreter then drives the handler and re-enters this driver via
4921 /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4922 /// the threaded error (if any) propagates or `after` fires.
4923 fn drive_close(
4924 &mut self,
4925 from: u32,
4926 mut pending: Option<Value>,
4927 after: AfterClose,
4928 entry_depth: usize,
4929 ) -> Result<Option<Vec<Value>>, LuaError> {
4930 loop {
4931 let drained = match self.tbc.last() {
4932 None => true,
4933 Some(&s) => s < from,
4934 };
4935 if drained {
4936 return self.finish_close_after(after, pending, entry_depth);
4937 }
4938 let s = self.tbc.pop().expect("tbc non-empty");
4939 let v = self.stack[s as usize];
4940 if matches!(v, Value::Nil | Value::Bool(false)) {
4941 continue;
4942 }
4943 let mm = self.get_mm(v, Mm::Close);
4944 if mm.is_nil() {
4945 let tn = self.obj_typename(v);
4946 let e = self.rt_err(&format!(
4947 "attempt to call a {tn} value (metamethod 'close')"
4948 ));
4949 pending = Some(e.0);
4950 continue;
4951 }
4952 // A real handler: stage [mm, v, (err?)] above the current top,
4953 // record the close iteration state in a Cont::Close, and let the
4954 // interpreter dispatch the handler. On return the run() head
4955 // re-enters this driver via the Cont::Close consumer.
4956 let func_slot = self.top;
4957 let error_close = pending.is_some();
4958 let need = (func_slot + 3) as usize;
4959 if self.stack.len() < need {
4960 self.stack.resize(need, Value::Nil);
4961 }
4962 self.stack[func_slot as usize] = mm;
4963 self.stack[func_slot as usize + 1] = v;
4964 // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4965 // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4966 let nargs = match pending {
4967 Some(e) => {
4968 self.stack[func_slot as usize + 2] = e;
4969 2u32
4970 }
4971 None => {
4972 if self.version >= LuaVersion::Lua55 {
4973 1u32
4974 } else {
4975 self.stack[func_slot as usize + 2] = Value::Nil;
4976 2u32
4977 }
4978 }
4979 };
4980 self.top = func_slot + 1 + nargs;
4981 // Root the pending error during the call (a handler may collect).
4982 let saved_err = self.closing_err;
4983 self.closing_err = pending;
4984 // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4985 // for traceback / getinfo.
4986 let saved_tm = self.pending_tm.replace("close");
4987 frames_push_sync(
4988 &mut self.frames,
4989 &mut self.frames_top,
4990 CallFrame::Cont(NativeCont {
4991 kind: ContKind::Close(CloseCont {
4992 from,
4993 pending,
4994 after,
4995 }),
4996 func_slot,
4997 nresults: 0,
4998 }),
4999 );
5000 // PUC luaF_close runs a normal close *within* the closing
5001 // function's activation (debug parent = that function); during an
5002 // error unwind the function's frame is already gone and the
5003 // handler sits at the C boundary instead.
5004 let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
5005 self.pending_tm = saved_tm;
5006 self.closing_err = saved_err;
5007 r?;
5008 return Ok(None);
5009 }
5010 }
5011
5012 /// Fire `after` once every `__close` handler has run. `Block` propagates
5013 /// any remaining error or simply continues; `Return` performs OP_Return's
5014 /// tail (hook + frame pop + result delivery) and may surface results to
5015 /// the host when the function whose return triggered the close was the
5016 /// entry activation, but only on a clean drain — a pending error skips
5017 /// the return tail and propagates instead. `ResumeUnwind` pops the
5018 /// deferred Lua frame and re-raises, letting a handler's own error win
5019 /// over the original propagating one (PUC luaF_close).
5020 fn finish_close_after(
5021 &mut self,
5022 after: AfterClose,
5023 pending: Option<Value>,
5024 entry_depth: usize,
5025 ) -> Result<Option<Vec<Value>>, LuaError> {
5026 match after {
5027 AfterClose::Block => match pending {
5028 Some(e) => Err(LuaError(e)),
5029 None => Ok(None),
5030 },
5031 AfterClose::Return {
5032 abs_a,
5033 nret,
5034 from_native,
5035 } => match pending {
5036 Some(e) => Err(LuaError(e)),
5037 None => self.complete_return(abs_a, nret, from_native, entry_depth),
5038 },
5039 AfterClose::ResumeUnwind { func_slot, err } => {
5040 // The aborting Lua frame was popped before `begin_close`;
5041 // restore the catcher's stack window down to `func_slot` and
5042 // re-raise — preferring a handler-raised error over the
5043 // original (PUC luaF_close).
5044 self.stack.truncate(func_slot as usize);
5045 self.top = func_slot;
5046 self.tbc.retain(|&s| s < func_slot);
5047 Err(LuaError(pending.unwrap_or(err)))
5048 }
5049 }
5050 }
5051
5052 /// OP_Return's post-close tail: fire the "return" hook (frame still
5053 /// current), pop the Lua frame, slide results into `func_slot`, then
5054 /// either hand them to the host (`Ok(Some(vals))` when we've returned
5055 /// past `entry_depth`), leave them contiguous for an exposed
5056 /// pcall/xpcall continuation, or finish into the caller's expected
5057 /// result slot. Mirrors the synchronous OP_Return tail so both paths
5058 /// share semantics — the `from_native` flag selects the right "return"
5059 /// hook context for `hook_return`.
5060 fn complete_return(
5061 &mut self,
5062 abs_a: u32,
5063 nret: u32,
5064 from_native: bool,
5065 entry_depth: usize,
5066 ) -> Result<Option<Vec<Value>>, LuaError> {
5067 // ftransfer is the local index (1-based) of the first result, as
5068 // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
5069 // exposes locals starting at `frame.base` (= func_slot + 1 +
5070 // n_varargs for a vararg call), so the conversion is the absolute
5071 // result slot minus base, plus one to make it 1-based. db.lua 5.4
5072 // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
5073 // shape end-to-end.
5074 let ftransfer = self
5075 .frames
5076 .last()
5077 .and_then(CallFrame::lua)
5078 .map(|fr| {
5079 let raw = abs_a.saturating_sub(fr.base) + 1;
5080 // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
5081 // local injected at index `numparams + 1`, so getlocal
5082 // numbering shifts results past it (5.5 db.lua :539
5083 // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
5084 if fr.closure.proto.has_vararg_table_pseudo {
5085 raw + 1
5086 } else {
5087 raw
5088 }
5089 })
5090 .unwrap_or(1);
5091 // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
5092 // per tail call that collapsed into this activation, *after* its
5093 // own "return". `tailcalls` tracks that count exactly (PUC
5094 // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
5095 // "return" hook fires once even when the activation absorbed
5096 // multiple tail calls — only `istailcall` on getinfo surfaces the
5097 // collapse. 5.1 db.lua :366 pins the event ordering.
5098 let tailcalls = if self.version <= LuaVersion::Lua51 {
5099 self.frames
5100 .last()
5101 .and_then(|f| f.lua())
5102 .map(|f| f.tailcalls)
5103 .unwrap_or(0)
5104 } else {
5105 0
5106 };
5107 self.hook_return(from_native, ftransfer, nret)?;
5108 for _ in 0..tailcalls {
5109 self.hook_tail_return()?;
5110 }
5111 let CallFrame::Lua(fr) =
5112 frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
5113 else {
5114 unreachable!("returning from a non-Lua frame")
5115 };
5116 for i in 0..nret {
5117 self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
5118 }
5119 if self.frames.len() < entry_depth {
5120 self.top = fr.func_slot + nret;
5121 return Ok(Some(self.take_results(fr.func_slot)));
5122 } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
5123 self.top = fr.func_slot + nret;
5124 } else {
5125 self.finish_results(fr.func_slot, nret, fr.nresults);
5126 }
5127 Ok(None)
5128 }
5129
5130 #[doc(hidden)]
5131 pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
5132 match cl.upvals()[idx as usize].state() {
5133 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
5134 UpvalState::Closed(v) => v,
5135 }
5136 }
5137
5138 fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
5139 let uv = cl.upvals()[idx as usize];
5140 match uv.state() {
5141 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
5142 UpvalState::Closed(_) => {
5143 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5144 unsafe { uv.as_mut() }.set_closed(v);
5145 // forward barrier: a closed upvalue is single-slot, so the
5146 // forward variant is cheaper than barrier_back (PUC uses
5147 // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
5148 // tables / threads).
5149 self.heap
5150 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
5151 }
5152 }
5153 }
5154
5155 // ---- register / error helpers ----
5156
5157 #[inline(always)]
5158 fn r(&self, base: u32, i: u32) -> Value {
5159 // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
5160 // at frame entry (`push_frame` sizes the stack up to base + max_stack),
5161 // and every bytecode-generated reference falls within `[0, max_stack)`.
5162 // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
5163 // reason. The bounds check would re-validate this invariant on every
5164 // op — the dispatch hot path can't afford it.
5165 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5166 unsafe { *self.stack.get_unchecked((base + i) as usize) }
5167 }
5168
5169 #[inline(always)]
5170 fn set_r(&mut self, base: u32, i: u32, v: Value) {
5171 // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
5172 // frame-entry contract.
5173 unsafe {
5174 *self.stack.get_unchecked_mut((base + i) as usize) = v;
5175 }
5176 }
5177
5178 #[doc(hidden)]
5179 pub fn rt_err(&mut self, msg: &str) -> LuaError {
5180 let text = match self.position_prefix() {
5181 Some(p) => format!("{p}{msg}"),
5182 None => msg.to_string(),
5183 };
5184 LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5185 }
5186
5187 /// Error without the `chunk:line:` position prefix. PUC's
5188 /// `resume_error` (ldo.c) pushes its message as a bare literal,
5189 /// so `cannot resume dead coroutine` etc. must not be prefixed.
5190 pub(crate) fn plain_err(&mut self, msg: &str) -> LuaError {
5191 LuaError(Value::Str(self.heap.intern(msg.as_bytes())))
5192 }
5193
5194 pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5195 let extra = self.subject_varinfo(v);
5196 let tn = self.obj_typename(v);
5197 self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5198 }
5199
5200 /// Name the offending operand of the current instruction (PUC varinfo) for
5201 /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5202 /// to the instruction's subject register(s); a native-raised error whose
5203 /// current instruction doesn't hold `bad` simply yields "".
5204 fn subject_varinfo(&self, bad: Value) -> String {
5205 use crate::vm::isa::Op;
5206 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5207 return String::new();
5208 };
5209 let proto = f.closure.proto;
5210 let p: &crate::runtime::Proto = &proto;
5211 let pc = f.pc as usize;
5212 if pc == 0 || pc > p.code.len() {
5213 return String::new();
5214 }
5215 let instr = p.code[pc - 1];
5216 let mut cands: Vec<u32> = Vec::new();
5217 match instr.op() {
5218 // indexed reads / length / method: the table/object is in B
5219 Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5220 cands.push(instr.b());
5221 }
5222 // indexed writes / calls: the table/function is in A
5223 Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5224 cands.push(instr.a());
5225 }
5226 // arithmetic/bitwise: a register operand (B, and C unless constant)
5227 Op::Add
5228 | Op::Sub
5229 | Op::Mul
5230 | Op::Div
5231 | Op::Mod
5232 | Op::Pow
5233 | Op::IDiv
5234 | Op::BAnd
5235 | Op::BOr
5236 | Op::BXor
5237 | Op::Shl
5238 | Op::Shr => {
5239 cands.push(instr.b());
5240 if !instr.k() {
5241 cands.push(instr.c());
5242 }
5243 }
5244 Op::Unm | Op::BNot => cands.push(instr.b()),
5245 Op::Concat => {
5246 let a = instr.a();
5247 for r in a..a + instr.b() {
5248 cands.push(r);
5249 }
5250 }
5251 _ => {}
5252 }
5253 for reg in cands {
5254 if self.r(f.base, reg).raw_eq(bad) {
5255 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5256 Some((kind, name)) => format!(" ({kind} '{name}')"),
5257 None => String::new(),
5258 };
5259 }
5260 }
5261 String::new()
5262 }
5263
5264 /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5265 /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5266 /// 'add')" when the call is a metamethod dispatched by the current opcode.
5267 fn call_err(&mut self, v: Value) -> LuaError {
5268 let extra = self.call_target_varinfo(v);
5269 let tn = self.obj_typename(v);
5270 self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5271 }
5272
5273 /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5274 /// frame before the call, so the opcode that triggered it lives in the
5275 /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5276 /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5277 fn call_target_varinfo(&self, bad: Value) -> String {
5278 use crate::vm::isa::Op;
5279 let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5280 return String::new();
5281 };
5282 let proto = f.closure.proto;
5283 let p: &crate::runtime::Proto = &proto;
5284 let pc = f.pc as usize;
5285 if pc == 0 || pc > p.code.len() {
5286 return String::new();
5287 }
5288 let instr = p.code[pc - 1];
5289 match instr.op() {
5290 Op::Call | Op::TailCall => {
5291 let reg = instr.a();
5292 if self.r(f.base, reg).raw_eq(bad) {
5293 match crate::vm::objname::getobjname(p, pc - 1, reg) {
5294 Some((kind, name)) => format!(" ({kind} '{name}')"),
5295 None => String::new(),
5296 }
5297 } else {
5298 String::new()
5299 }
5300 }
5301 op => match mm_event_name(op) {
5302 Some(ev) => format!(" (metamethod '{ev}')"),
5303 None => String::new(),
5304 },
5305 }
5306 }
5307
5308 /// "number has no integer representation", enriched (PUC luaG_tointerror)
5309 /// with a "(field 'x')"-style suffix naming the offending operand of the
5310 /// current arithmetic instruction when it can be recovered from bytecode.
5311 fn no_int_rep_err(&mut self) -> LuaError {
5312 let extra = self.bad_operand_varinfo();
5313 self.rt_err(&format!("number{extra} has no integer representation"))
5314 }
5315
5316 /// Inspect the current frame's faulting instruction: find the register
5317 /// operand holding a float with no integer representation and name it.
5318 fn bad_operand_varinfo(&self) -> String {
5319 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5320 return String::new();
5321 };
5322 let proto = f.closure.proto;
5323 let p: &crate::runtime::Proto = &proto;
5324 let pc = f.pc as usize;
5325 if pc == 0 || pc > p.code.len() {
5326 return String::new();
5327 }
5328 let instr = p.code[pc - 1];
5329 let mut regs = vec![instr.b()];
5330 if !instr.k() {
5331 regs.push(instr.c());
5332 }
5333 for reg in regs {
5334 let v = self.r(f.base, reg);
5335 if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5336 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5337 Some((kind, name)) => format!(" ({kind} '{name}')"),
5338 None => String::new(),
5339 };
5340 }
5341 }
5342 String::new()
5343 }
5344
5345 /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5346 /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5347 /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5348 /// native call), PUC pushes no prefix — match that by looking only at the
5349 /// topmost frame directly and bailing if it is anything but a Lua frame.
5350 pub(crate) fn position_prefix(&self) -> Option<String> {
5351 let f = self.frames.last().and_then(CallFrame::lua)?;
5352 let proto = f.closure.proto;
5353 if proto.source.as_bytes().is_empty() {
5354 return Some(self.stripped_prefix());
5355 }
5356 if proto.lines.is_empty() {
5357 return None;
5358 }
5359 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5360 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5361 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5362 let display = crate::vm::lib_debug::chunk_id(raw);
5363 let src = String::from_utf8_lossy(&display).into_owned();
5364 Some(format!("{src}:{line}: "))
5365 }
5366
5367 /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5368 /// for the source and renders the line as "?" (so the prefix reads
5369 /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5370 /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5371 /// matches `^%?:%-1:`).
5372 fn stripped_prefix(&self) -> String {
5373 if self.version >= crate::version::LuaVersion::Lua55 {
5374 "?:?: ".to_string()
5375 } else {
5376 "?:-1: ".to_string()
5377 }
5378 }
5379
5380 /// Position prefix of the Lua frame `level` steps up from the running C
5381 /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5382 /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5383 /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5384 /// caller's frame is reported even across pcall/xpcall continuations.
5385 /// `luaL_where(level)` for `error()`: unlike `dbg_frame` (whose 5.2+
5386 /// level numbering skips Cont activations to match db.lua's getinfo
5387 /// shape), PUC counts EVERY CallInfo — a C caller occupies a level of
5388 /// its own. `pcall(pcall, error, "msg")` must therefore resolve
5389 /// level 1 to the inner pcall (a C activation, no line info → no
5390 /// prefix), not tunnel through to the Lua frame below (v2.13
5391 /// CORPUS-IV fixture 239).
5392 pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5393 if level < 1 {
5394 return None;
5395 }
5396 let v51 = self.version <= LuaVersion::Lua51;
5397 let mut lvl = level;
5398 let mut found: Option<usize> = None;
5399 'walk: for fi in (0..self.frames.len()).rev() {
5400 match &self.frames[fi] {
5401 CallFrame::Lua(f) => {
5402 lvl -= 1;
5403 if lvl == 0 {
5404 found = Some(fi);
5405 break 'walk;
5406 }
5407 if v51 {
5408 for _ in 0..f.tailcalls {
5409 lvl -= 1;
5410 if lvl == 0 {
5411 return None; // synthetic tail level: no line info
5412 }
5413 }
5414 }
5415 if f.from_c {
5416 lvl -= 1;
5417 if lvl == 0 {
5418 return None; // C activation: no line info
5419 }
5420 }
5421 }
5422 CallFrame::Cont(_) => {
5423 // A continuation-driven native (pcall/xpcall/close)
5424 // is a C activation — it takes a level and has no
5425 // line info.
5426 lvl -= 1;
5427 if lvl == 0 {
5428 return None;
5429 }
5430 }
5431 }
5432 }
5433 let fi = found?;
5434 let f = self.frames[fi].lua()?;
5435 let proto = f.closure.proto;
5436 // PUC luaG_addinfo: a stripped chunk has no source — see
5437 // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5438 if proto.source.as_bytes().is_empty() {
5439 return Some(self.stripped_prefix());
5440 }
5441 // a stripped chunk carries no per-instruction line info
5442 if proto.lines.is_empty() {
5443 return None;
5444 }
5445 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5446 // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5447 // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5448 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5449 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5450 let display = crate::vm::lib_debug::chunk_id(raw);
5451 let src = String::from_utf8_lossy(&display).into_owned();
5452 Some(format!("{src}:{line}: "))
5453 }
5454
5455 // ---- the interpreter ----
5456
5457 fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5458 let entry_depth = self.frames.len();
5459 self.exec_with(entry_depth)
5460 }
5461
5462 /// Run from the current top frame down to (but not past) `entry_depth`
5463 /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5464 /// runs to completion or a yield.
5465 /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5466 /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5467 /// `EvalFuture::poll` on every poll after the first to walk the
5468 /// existing call frames until the next `BudgetExhausted` or
5469 /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5470 /// embedder reaches it through `Vm::eval_async`.
5471 pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5472 self.exec_with(entry_depth)
5473 }
5474
5475 fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5476 loop {
5477 let r = self.run(entry_depth);
5478 if r.is_err()
5479 && (self.yielding.is_some()
5480 || self.terminating.is_some()
5481 || self.host_yield_pending
5482 || self.pending_async_native_fut.is_some())
5483 {
5484 // a `coroutine.yield` is in flight: keep the frames intact (they
5485 // are the suspended coroutine's saved state) and propagate to
5486 // resume. A self-close termination propagates the same way, so a
5487 // protecting pcall on the way out cannot catch (unwind) it.
5488 // v1.1 B10 — `host_yield_pending` is the async-mode
5489 // analogue: the sentinel must reach `drive_one` without
5490 // a protecting `pcall` swallowing it.
5491 return r;
5492 }
5493 match r {
5494 Ok(vals) => return Ok(vals),
5495 // unwind toward `entry_depth`. A protecting pcall/xpcall
5496 // continuation caught along the way turns the error into
5497 // `false, msg` and the loop resumes running its caller; an
5498 // uncaught error propagates out.
5499 Err(e) => match self.unwind(e.0, entry_depth) {
5500 Unwound::Caught => continue,
5501 Unwound::CaughtReturn(vals) => return Ok(vals),
5502 Unwound::Propagated(err) => return Err(err),
5503 },
5504 }
5505 }
5506 }
5507
5508 /// Unwind the call stack from the error point toward `entry_depth`, running
5509 /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5510 /// continuation frame at/above `entry_depth` (the error is *caught*: its
5511 /// slot receives `false, msg`); if none is reached, the error propagates.
5512 fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5513 // The protected call runs in-place among the caller frames' registers,
5514 // so truncating the failed frames here cuts into caller windows below
5515 // the catcher. Snapshot the live length: at the error point the stack
5516 // already spans every surviving frame's window, so restoring it after a
5517 // catch reinstates them all (the reclaimed slots above are dead temps).
5518 // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5519 // we instead clamp the restore to the catcher's caller window when the
5520 // error point was at the stack limit (cause: the next `call_value_impl`
5521 // picks `func_slot = stack.len()` which would otherwise re-overflow).
5522 let saved_len = self.stack.len();
5523 // Snapshot the traceback at the error point — before any frame is
5524 // popped — so an `xpcall` msgh (which runs after the failed frames are
5525 // gone) can still describe the error site. The handler frame about to
5526 // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5527 // visible here; once popped, `debug.traceback` would miss it.
5528 // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5529 // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5530 // recovery — locals.lua:659) re-overflows. Capture-once propagates
5531 // through nested unwinds (inner→outer) without re-running msgh.
5532 if self.error_traceback.is_none() {
5533 self.error_traceback = Some(self.traceback_bytes(1));
5534 }
5535 while self.frames.len() >= entry_depth {
5536 match *self.frames.last().expect("frame") {
5537 // a yieldable-metamethod continuation does not catch: discard the
5538 // abandoned instruction and keep unwinding (PUC drops the partial
5539 // op on error).
5540 CallFrame::Cont(NativeCont {
5541 kind: ContKind::Meta(mc),
5542 func_slot,
5543 ..
5544 }) => {
5545 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5546 self.stack.truncate(func_slot as usize);
5547 self.top = mc.saved_top.min(func_slot);
5548 self.tbc.retain(|&s| s < func_slot);
5549 }
5550 // a __pairs continuation does not catch either: an error inside
5551 // the metamethod propagates past `pairs`.
5552 CallFrame::Cont(NativeCont {
5553 kind: ContKind::Pairs,
5554 func_slot,
5555 ..
5556 }) => {
5557 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5558 self.stack.truncate(func_slot as usize);
5559 self.top = func_slot;
5560 self.tbc.retain(|&s| s < func_slot);
5561 }
5562 // a __close continuation does not catch: drop the half-run
5563 // handler's window, then continue the close yieldably with
5564 // the new error threaded as `pending`. Preserve `cc.after`
5565 // verbatim — `Return`/`Block` originating from an aborting
5566 // OP_Return/OP_Close will be short-circuited by
5567 // `finish_close_after` (pending propagates as Err); a
5568 // `ResumeUnwind` originated by our own Lua-frame handler
5569 // must keep its deferred frame-pop semantics so that frame
5570 // is not orphaned. If a fresh handler yields, `drive_close`
5571 // pushes another `Cont::Close` and we return `Caught` so
5572 // `exec_with` re-enters the run loop.
5573 CallFrame::Cont(NativeCont {
5574 kind: ContKind::Close(cc),
5575 func_slot,
5576 ..
5577 }) => {
5578 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5579 self.stack.truncate(func_slot as usize);
5580 self.top = func_slot;
5581 self.tbc.retain(|&s| s < func_slot);
5582 match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5583 Ok(Some(_)) => {
5584 unreachable!(
5585 "Block / Return / ResumeUnwind never return host values mid-unwind"
5586 )
5587 }
5588 Ok(None) => return Unwound::Caught,
5589 Err(e) => {
5590 err = e.0;
5591 continue;
5592 }
5593 }
5594 }
5595 CallFrame::Cont(nc) => {
5596 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5597 self.pcall_depth -= 1;
5598 let result = match nc.kind {
5599 ContKind::Pcall => err,
5600 ContKind::Xpcall { handler } => {
5601 // PUC keeps `L->errfunc` set across the handler's
5602 // call: `luaG_errormsg` re-fires the handler when
5603 // it raises (so `xpcall(error, err, 170)` lets the
5604 // chain bottom out at err(0) → "END"). luna mirrors
5605 // that by looping until the handler returns or
5606 // luna's `iters` cap forces termination.
5607 //
5608 // The cap models PUC's nCcalls soft window
5609 // (MAXCCALLS/10*11): once tripped, `stackerror`
5610 // raises "C stack overflow" via `luaG_runerror`
5611 // which itself re-enters `luaG_errormsg`, so the
5612 // handler runs once more with that string and
5613 // naturally returns it (errors.lua :637 at N=300).
5614 // We count iterations per Cont::Xpcall rather than
5615 // a global counter — nested xpcalls each get their
5616 // own budget, matching the way PUC's stack frames
5617 // accumulate per dispatch path.
5618 const MSGH_CAP: u32 = MAX_C_DEPTH;
5619 let mut cur_err = err;
5620 let mut iters: u32 = 0;
5621 let mut capped = false;
5622 loop {
5623 if iters >= MSGH_CAP && !capped {
5624 cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5625 capped = true;
5626 }
5627 iters += 1;
5628 self.msgh_depth += 1;
5629 let r = self.call_value(handler, &[cur_err]);
5630 self.msgh_depth -= 1;
5631 match r {
5632 Ok(hr) => {
5633 break hr.first().copied().unwrap_or(Value::Nil);
5634 }
5635 Err(_) if capped => {
5636 // the handler still errored on the
5637 // synthesized "C stack overflow"; fall
5638 // back to PUC's LUA_ERRERR string.
5639 break Value::Str(
5640 self.heap.intern(b"error in error handling"),
5641 );
5642 }
5643 Err(e) => {
5644 cur_err = e.0;
5645 }
5646 }
5647 }
5648 }
5649 ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5650 unreachable!("Meta/Pairs/Close cont handled above")
5651 }
5652 };
5653 // PUC 5.5 `luaG_errormsg` substitutes "<no error object>"
5654 // for nil AFTER the message handler ran (ldebug.c:849) —
5655 // so it applies to the pcall-caught object and to an
5656 // xpcall HANDLER'S return value, while the handler itself
5657 // (and a top-level propagation into the host, whose
5658 // `error_display` plays msghandler) still sees the raw
5659 // nil. 5.4- keep nil everywhere (errors.lua :49 asserts
5660 // `doit("error()") == nil`). v2.14 fixture 5.5/334.
5661 let result = if matches!(result, Value::Nil)
5662 && self.version >= crate::version::LuaVersion::Lua55
5663 {
5664 Value::Str(self.heap.intern(b"<no error object>"))
5665 } else {
5666 result
5667 };
5668 // the error has been caught (pcall/xpcall): the captured
5669 // traceback was for that error and is no longer in flight.
5670 self.error_traceback = None;
5671 let fs = nc.func_slot as usize;
5672 if self.stack.len() < fs + 2 {
5673 self.stack.resize(fs + 2, Value::Nil);
5674 }
5675 self.stack[fs] = Value::Bool(false);
5676 self.stack[fs + 1] = result;
5677 self.top = nc.func_slot + 2;
5678 self.tbc.retain(|&s| s < nc.func_slot);
5679 if self.frames.len() < entry_depth {
5680 return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5681 }
5682 self.finish_results(nc.func_slot, 2, nc.nresults);
5683 // reinstate the caller windows the unwind truncated into,
5684 // clamped to the catcher's caller window + a `MIN_STACK`
5685 // reserve. The clamp is a no-op for normal pcall catches
5686 // (saved_len lies within the caller's max_stack window),
5687 // and prevents the stack from staying near `MAX_LUA_STACK`
5688 // after an overflow-recovery catch — which would make the
5689 // next `call_value_impl` (e.g. a `__close` in the catcher's
5690 // errorh, locals.lua:659) pick `func_slot = stack.len()`
5691 // above the limit and re-overflow.
5692 // Restore the caller's full register window: opcodes
5693 // index it directly. The cap covers caller's base +
5694 // `max_stack` + a small reserve. We always resize to
5695 // exactly this window — previously this clamped
5696 // `saved_len` from above to prevent staying near
5697 // `MAX_LUA_STACK` after an overflow-recovery catch, and
5698 // a yieldable-unwind re-entry adds the dual case where
5699 // `saved_len` is *below* the window (a prior
5700 // `ResumeUnwind` truncated). Using the window directly
5701 // covers both.
5702 let restore = self
5703 .frames
5704 .iter()
5705 .rev()
5706 .find_map(CallFrame::lua)
5707 .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5708 .unwrap_or(saved_len);
5709 if self.stack.len() < restore {
5710 self.stack.resize(restore, Value::Nil);
5711 } else if self.stack.len() > restore {
5712 self.stack.truncate(restore);
5713 }
5714 // v2.5 P1B-2B: clear slots vacated by the popped
5715 // frames the unwind walked over. finish_results
5716 // above clears `[nc.func_slot + nresults ..
5717 // nc.func_slot + 2)`, which only covers the
5718 // pcall's own result region — the unwind-popped
5719 // frames' locals in `[nc.func_slot + 2 .. restore)`
5720 // are still in place with whatever Gc-bearing
5721 // Values they last held. Without this clear, a
5722 // later GC marks the stale pointers (UAF-A family
5723 // analog of the v2.3 Op::Return finish_results
5724 // path). PUC's `luaD_pcall` similarly truncates
5725 // L->top to the catcher's level — luna's
5726 // truncate above resizes the Vec but doesn't
5727 // touch slots [func_slot+2..restore) that were
5728 // already present.
5729 let clear_lo = (nc.func_slot as usize + 2).min(self.stack.len());
5730 let clear_hi = restore.min(self.stack.len());
5731 if clear_lo < clear_hi {
5732 for slot in &mut self.stack[clear_lo..clear_hi] {
5733 *slot = Value::Nil;
5734 }
5735 }
5736 return Unwound::Caught;
5737 }
5738 CallFrame::Lua(f) => {
5739 // Yieldable error-unwind close, PUC luaG_errormsg shape:
5740 // (1) pop the Lua frame immediately so each `__close`
5741 // handler runs at the C boundary above — `debug.getinfo`
5742 // sees the next outer Lua frame's call site (typically
5743 // `pcall`), not this aborting function (locals.lua:480).
5744 // (2) drive the close yieldably with
5745 // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5746 // it truncates to `func_slot` and re-raises (letting a
5747 // handler-raised error win over `err`). If a handler
5748 // yields, `drive_close` pushes `Cont::Close` and we
5749 // return `Caught` so `exec_with` re-enters the run loop;
5750 // a synchronous drain returns Err exactly as the old
5751 // path did.
5752 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5753 let after = AfterClose::ResumeUnwind {
5754 func_slot: f.func_slot,
5755 err,
5756 };
5757 match self.begin_close(f.base, Some(err), after, entry_depth) {
5758 Ok(Some(_)) => {
5759 unreachable!("ResumeUnwind never returns host values")
5760 }
5761 Ok(None) => return Unwound::Caught,
5762 Err(e) => {
5763 err = e.0;
5764 continue;
5765 }
5766 }
5767 }
5768 }
5769 }
5770 Unwound::Propagated(LuaError(err))
5771 }
5772
5773 fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5774 loop {
5775 // Fast-path slow-check gate: most embedders run with both
5776 // `instr_budget` and `mem_cap` as None, so a single combined
5777 // is_some test lets the hot loop skip both branches with one
5778 // load + branch instead of two.
5779 if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5780 if let Some(b) = self.instr_budget.as_mut() {
5781 *b -= 1;
5782 if *b <= 0 {
5783 self.instr_budget = None;
5784 // v1.1 B10 Stage 1 — async-mode cooperative
5785 // yield. Set a sentinel flag so `exec_with`
5786 // propagates the Err without `unwind` running
5787 // (mirroring the `yielding.is_some()` path),
5788 // and `call_value_impl` preserves the call
5789 // frames for the next `poll`. Translation back
5790 // to `DispatchOutcome::BudgetExhausted` happens
5791 // in `drive_one`. The Err value itself is
5792 // `Value::Nil` — a pure sentinel, never seen by
5793 // user code.
5794 if self.async_mode {
5795 self.host_yield_pending = true;
5796 return Err(LuaError(Value::Nil));
5797 }
5798 // B6: classify the trip so embedders can
5799 // distinguish budget exhaustion from a
5800 // generic Runtime error and retry / give up
5801 // accordingly.
5802 self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5803 let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5804 return Err(LuaError(s));
5805 }
5806 }
5807 if let Some(cap) = self.heap.mem_cap
5808 && self.heap.bytes() > cap
5809 {
5810 // First try a full collect — embedders set tight caps
5811 // and the overshoot may be reclaimable (closures kept
5812 // by short-lived frames, intermediate strings). Only
5813 // disarm + raise if the cap is still breached after
5814 // collection. PUC's `LUA_GCEMERGENCY` path matches.
5815 //
5816 // v2.6 A.2: tighten mem-cap-fire over-root from
5817 // entire `self.stack.len()` (whole heap) to the
5818 // deepest Lua frame's `base + max_stack` window
5819 // (covers register operands the current opcode
5820 // might reference). The cap fires during table
5821 // mutation in a tight `a[i] = i` loop where `a`
5822 // lives at a frame-register slot past `self.top`
5823 // (OP_NEWINDEX doesn't advance top); the deepest
5824 // frame's max_stack window provably covers it
5825 // since `a` is a register of the executing proto.
5826 //
5827 // Still over-roots caller frames' dead regs
5828 // (slots between caller.base and the callee
5829 // func_slot are live; slots past callee
5830 // func_slot in caller's frame are dead until
5831 // caller resumes). For fire-once cap path this
5832 // residual over-root is acceptable; full
5833 // per-frame walk was canceled per
5834 // `.dev/rfcs/v2.6-plan-state.md` amendments log
5835 // (charter §2.1's strong/weak pass split is
5836 // semantically impossible — weak pass depends on
5837 // strong-pass marks).
5838 let cap_root_top = self
5839 .frames
5840 .iter()
5841 .rev()
5842 .find_map(CallFrame::lua)
5843 .map(|f| f.base + f.closure.proto.max_stack as u32)
5844 .unwrap_or(self.top);
5845 self.gc_top = cap_root_top.max(self.top);
5846 self.collect_garbage();
5847 if self.heap.bytes() > cap {
5848 self.heap.mem_cap = None;
5849 let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5850 return Err(LuaError(s));
5851 }
5852 }
5853 }
5854 // Single combined frame fetch: continuation arm OR Lua arm. Saves
5855 // a second `self.frames.last()` slice access vs the prior split
5856 // form (LLVM doesn't always CSE these across the cont branch).
5857 // A continuation frame on top means the call it protected just
5858 // delivered its results — wrap as `true, results…` and hand to
5859 // the pcall/xpcall caller. The error path is handled by `unwind`;
5860 // this branch is only reached on success/resume completion.
5861 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5862 let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5863 if let &CallFrame::Cont(nc) = frame_peek {
5864 // a yieldable metamethod returned: complete the interrupted
5865 // instruction (PUC luaV_finishOp) and resume the running frame.
5866 if let ContKind::Meta(mc) = nc.kind {
5867 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5868 let result = if self.top > nc.func_slot {
5869 self.stack[nc.func_slot as usize]
5870 } else {
5871 Value::Nil
5872 };
5873 self.stack.truncate(nc.func_slot as usize);
5874 self.top = mc.saved_top;
5875 self.finish_meta(mc.action, result)?;
5876 continue;
5877 }
5878 // a __close handler returned successfully: discard its
5879 // results, restore `top` to the slot the handler was called
5880 // at (the surrounding frame's register window above this slot
5881 // must stay alloc'd — never truncate the underlying stack),
5882 // then continue the close chain (next slot, or fire
5883 // AfterClose). When the close ends an entry activation,
5884 // drive_close hands the results up to exec_with directly.
5885 if let ContKind::Close(cc) = nc.kind {
5886 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5887 self.top = nc.func_slot;
5888 if let Some(vals) =
5889 self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5890 {
5891 return Ok(vals);
5892 }
5893 continue;
5894 }
5895 // __pairs returned: normalize its results to exactly four
5896 // (iterator, state, control, closing) at pairs's slot, where
5897 // the metamethod was called, and hand them to pairs's caller.
5898 if let ContKind::Pairs = nc.kind {
5899 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5900 let total = 4u32;
5901 let need = (nc.func_slot + total) as usize;
5902 if self.stack.len() < need {
5903 self.stack.resize(need, Value::Nil);
5904 }
5905 for s in self.top..(nc.func_slot + total) {
5906 self.stack[s as usize] = Value::Nil;
5907 }
5908 self.top = nc.func_slot + total;
5909 if self.frames.len() < entry_depth {
5910 return Ok(self.take_results(nc.func_slot));
5911 }
5912 self.finish_results(nc.func_slot, total, nc.nresults);
5913 continue;
5914 }
5915 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5916 self.pcall_depth -= 1;
5917 // f's results sit at nc.func_slot+1.. (f was called one slot
5918 // above the continuation), so writing `true` at the slot makes
5919 // `true, results…` already contiguous.
5920 let nret = self.top - (nc.func_slot + 1);
5921 self.stack[nc.func_slot as usize] = Value::Bool(true);
5922 let total = 1 + nret;
5923 self.top = nc.func_slot + total;
5924 if self.frames.len() < entry_depth {
5925 return Ok(self.take_results(nc.func_slot));
5926 }
5927 self.finish_results(nc.func_slot, total, nc.nresults);
5928 continue;
5929 }
5930 // GC runs only at the allocation safe points below (PUC's
5931 // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5932 // no longer collects, so a stale full-window `gc_top` cannot leak in.
5933 //
5934 // Hot-path frame fetch: the Cont arm above continues the loop,
5935 // so reaching here means `frame_peek` is the Lua frame. Reuse it
5936 // rather than re-fetching `self.frames.last()`.
5937 let f = match frame_peek {
5938 CallFrame::Lua(f) => f,
5939 _ => unreachable!("Cont frame survived the dispatch loop head"),
5940 };
5941 let cl = f.closure;
5942 let base = f.base;
5943 let func_slot = f.func_slot;
5944 let n_varargs = f.n_varargs;
5945 let pc = f.pc;
5946 let oldpc = f.hook_oldpc;
5947
5948 // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5949 // — every branch / call op only sets `pc` to a valid index, and
5950 // function entry initialises pc=0 with a non-empty body. PUC's
5951 // `vmfetch` uses the equivalent unchecked load.
5952 let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5953
5954 // P12-S1.C/D — trace recording append + close detection.
5955 // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5956 // so default dispatch keeps a single not-taken branch.
5957 //
5958 // - At the head PC with a non-empty record, the trace has
5959 // looped back to its start: mark `closed = true` and
5960 // take the record (S2 will compile + cache).
5961 // - Otherwise, capture the op. If the record overflows
5962 // MAX_TRACE_LEN, abort by dropping it.
5963 if self.jit.trace_enabled
5964 && let Some(_rec) = self.jit.active_trace.as_mut()
5965 {
5966 // P12-S4 — depth tracking. The trace head's frame is
5967 // at index `recording_frame_base`; every Op::Call that
5968 // pushes a new frame bumps the live depth, every
5969 // Op::Return that pops one decrements it.
5970 //
5971 // **Three clean-close conditions** (P12-S4-step4a):
5972 // - `at_head`: cur_depth == 0 AND about-to-execute the
5973 // trace's head_pc on its head_proto (loop closed back
5974 // to start). Same for loop-triggered and call-triggered
5975 // traces — step4a unified the gating so call-triggered
5976 // no longer closes on the first re-entry (that left
5977 // fib's body at 7 depth=0 ops; step4a lets it inline
5978 // up to MAX_INLINE_DEPTH levels before any close).
5979 // - `returned_past_head`: trace head's frame is gone
5980 // (callee returned past it, or the call-trigger
5981 // started a recording inside a callee that has now
5982 // returned). Whatever ops were recorded form the
5983 // trace body; the lowerer treats the partial trace
5984 // the same as InlineAbort (dispatchable=false until
5985 // step4b's frame materialization lands).
5986 // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5987 // Recording any deeper would just bloat the IR; close
5988 // with the body we have. Lowerer's existing length
5989 // gate + InlineAbort path handles short bodies.
5990 let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5991 let cur_depth = if returned_past_head {
5992 0
5993 } else {
5994 self.frames.len() - 1 - self.jit.recording_frame_base
5995 };
5996 let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5997 let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5998 let at_head_loop = cur_depth == 0
5999 && !rec.ops.is_empty()
6000 && !returned_past_head
6001 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
6002 && pc == rec.head_pc;
6003 // P16-A — self-link cycle catch (mirrors LuaJIT's
6004 // `check_call_unroll` at `lj_record.c:1869`). Trips when:
6005 // 1. We're about to execute the head_pc on head_proto
6006 // at depth > 0 (we're re-entering the trace head
6007 // from inside an inlined recursion level — UpRec).
6008 // 2. The count of ancestor frames in the recording
6009 // window that share `head_proto` exceeds
6010 // [`RECUNROLL_THRESHOLD`] (default 2).
6011 // For fib(N): head_pc=0, head_proto=fib. After 2 inline
6012 // recursion levels are captured, the recorder enters
6013 // the 3rd nested fib frame, sees cur_depth=3 > 2, and
6014 // trips this catch — closing with `SelfRecKind::UpRec`.
6015 // The lowerer's `TraceEnd::SelfLink` tail emits the
6016 // bump-base + branch-to-self loop body.
6017 //
6018 // TailRec vs UpRec: LJ distinguishes via
6019 // `framedepth + retdepth == 0`. luna doesn't track
6020 // retdepth separately; cur_depth == 0 with a non-empty
6021 // call chain in tail position is rare (would require
6022 // explicit Lua TCO). We use cur_depth > 0 as the UpRec
6023 // condition (fib's case); cur_depth == 0 with positive
6024 // ancestor count would route to TailRec, but luna's
6025 // recorder doesn't currently produce that shape because
6026 // tail-call elision pops the caller frame and we'd
6027 // hit `at_head_loop` instead.
6028 let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
6029 if self.jit.p16_self_link_enabled
6030 && !returned_past_head
6031 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
6032 && pc == rec.head_pc
6033 && cur_depth > 0
6034 {
6035 // Count ancestor frames sharing head_proto.
6036 // self.frames[recording_frame_base..] currently
6037 // includes the just-pushed frame at the top
6038 // (the one about to execute head_pc). Ancestors
6039 // = the slice excluding the top frame.
6040 let head_proto_ptr = rec.head_proto.as_ptr();
6041 let last_idx = self.frames.len() - 1;
6042 let mut count = 0usize;
6043 for i in self.jit.recording_frame_base..last_idx {
6044 if let CallFrame::Lua(f) = &self.frames[i]
6045 && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
6046 {
6047 count += 1;
6048 }
6049 }
6050 if count > crate::jit::trace::RECUNROLL_THRESHOLD {
6051 // cur_depth > 0 → UpRec (fib pattern).
6052 // cur_depth == 0 wouldn't reach this arm.
6053 Some(crate::jit::trace::SelfRecKind::UpRec)
6054 } else {
6055 None
6056 }
6057 } else {
6058 None
6059 }
6060 };
6061 if let Some(kind) = self_link_trip {
6062 // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
6063 // self-recursive patterns at frame depth >= 2.
6064 //
6065 // Pre sub-0: a SelfLink trip at the head_pc re-entry
6066 // unconditionally stamped `self_link_kind`. The
6067 // R3a `downrec_close` marker can only fire from the
6068 // depth>0 Op::Return path (`rec.retfs` chain),
6069 // which never reaches the recorder for fib(28)-like
6070 // shapes that hit the SelfLink cycle catch BEFORE
6071 // any base-case Return — leaving `downrec_close`
6072 // None and routing the trace through R1's safe
6073 // `dispatchable=false` `"self-link-retf-r1"` path
6074 // (audit measured `trace_dispatched = 0`).
6075 //
6076 // Sub-0 lift: when the SelfLink trip fires AND
6077 // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
6078 // gate already requires this — kept explicit as a
6079 // safety floor), route the close through `downrec_
6080 // close` INSTEAD of `self_link_kind`. The recorder
6081 // synthesises the close marker from the most
6082 // recent Op::Call at depth `cur_depth - 1`:
6083 // - `return_pc` = `call.pc + 1` (caller's resume
6084 // PC after the recursive call returns; mirror
6085 // of R3a's `caller_pc` derivation at the
6086 // depth>0 Op::Return capture path below).
6087 // - `target_proto` = `call.proto` (caller's
6088 // proto; equals `rec.head_proto` for self-
6089 // recursion).
6090 // - `depth_delta` = `1` (today's recorder always
6091 // unrolls one level; R3a uses the same
6092 // constant).
6093 //
6094 // The lowerer's `end_idx` picker (`trace.rs:3729`)
6095 // routes through `TraceEnd::DownRec` ahead of the
6096 // `self_link_kind` arm; the R3b/R3d lowerer arm
6097 // emits the stitch-sentinel + caller-pc-guard
6098 // scaffold. Single-candidate guard chain (sub-0's
6099 // recorder produces 1 caller_pc candidate because
6100 // `rec.retfs` is empty) keeps `dispatchable=false`
6101 // + `"downrec-stitch-pending"` label (per R3d's
6102 // `multi_way_candidate_count >= 2` gate at
6103 // `trace.rs:7385`). Net behaviour: trace compiles
6104 // under DownRec routing; interp runs the
6105 // recursion naturally → result 317811.
6106 //
6107 // The `cur_depth >= 2` gate is automatically
6108 // satisfied by the count > RECUNROLL_THRESHOLD=2
6109 // trip condition (3 ancestor frames sharing
6110 // head_proto implies cur_depth >= 3), kept
6111 // explicit so a future RECUNROLL_THRESHOLD tweak
6112 // doesn't silently flip shallow-recursion
6113 // shapes (cur_depth == 1) onto the DownRec arm.
6114 //
6115 // R3.3+ sub-1/2/3/4 will replace the depth-baked
6116 // op_offsets[] addressing with runtime base_var
6117 // threading so the trace's recorded body is
6118 // depth-relative and the DownRec dispatch
6119 // becomes wall-clock-positive. Sub-0 is the
6120 // routing scaffold; it does not aim for gain.
6121 let _ = kind;
6122 let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
6123 let caller_depth_u8 = (cur_depth - 1) as u8;
6124 if let Some(call_op) = rec.ops.iter().rev().find(|r| {
6125 r.inline_depth == caller_depth_u8
6126 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6127 }) {
6128 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6129 return_pc: call_op.pc + 1,
6130 target_proto: call_op.proto,
6131 depth_delta: 1,
6132 });
6133 true
6134 } else {
6135 false
6136 }
6137 };
6138 if relaxed_to_downrec {
6139 // R2 close-cause taxonomy: tag the lift so
6140 // probes can tally the fire rate. Mirrors
6141 // R3a's `"downrec-restart"` bump for the
6142 // depth>0 Op::Return path (different trip
6143 // origin, same downstream routing). The
6144 // existing `"self-link-retf-r1"` label still
6145 // fires for trips that DON'T relax (no
6146 // candidate Op::Call ancestor in rec.ops, or
6147 // cur_depth < 2) via the lowerer's
6148 // dispatch_off_reason mirror at the close
6149 // handler — kept as a regression safety net.
6150 self.jit
6151 .counters
6152 .bump_close_cause("selflink-yields-to-downrec");
6153 } else {
6154 rec.self_link_kind = Some(kind);
6155 }
6156 }
6157 let should_close =
6158 at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
6159 if should_close {
6160 // P13-S13-H — long-trace bias: a call-triggered
6161 // recording that closed with a very short body
6162 // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
6163 // binary_trees `make(0)`: 4 ops) is pathological.
6164 // Compiling + caching it pins `Proto.traces` to a
6165 // trace that the length gate will refuse to
6166 // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
6167 // = 40`), AND blocks the back-edge / longer-call
6168 // path from re-recording the same head_pc (the
6169 // dedup `already_cached` check below short-
6170 // circuits). The fix: discard the short call-
6171 // triggered recording WITHOUT caching, and bias
6172 // the proto's `call_hot_count` back to
6173 // `THRESHOLD - HOT_RETRY_WINDOW` so the next
6174 // sequence of calls retries the trigger at a
6175 // different (hopefully deeper) recursion point.
6176 //
6177 // Back-edge triggered traces are exempt — a
6178 // tight numeric-for loop's body is legitimately
6179 // 3 ops (`Add`, ForLoop) and DOES dispatch
6180 // usefully when re-entered many times.
6181 // P13-S13-H — coverage heuristic to detect
6182 // pathologically partial call-triggered traces:
6183 // for self-recursive / branchy protos like
6184 // `fib` (~17 bytecode ops) or
6185 // `binary_trees.make` (~26 ops), the recorder
6186 // can fire at a BASE-case entry (`fib(0)` or
6187 // `make(0)`) producing a 3–4 op trace that
6188 // covers a tiny fraction of the proto's code.
6189 // That trace is doomed by the length gate
6190 // post-compile AND blocks any longer follow-up
6191 // (the dedup `already_cached` check below). The
6192 // fix: discard call-triggered closes where
6193 // `rec.ops.len() * 2 < head_proto.code.len()`
6194 // (less than half the proto's bytecode), so the
6195 // back-edge / longer call path can take over.
6196 //
6197 // Why coverage > raw length:protos with
6198 // intrinsically short bodies (closure
6199 // factories: `Closure + Return1` = 2 ops,
6200 // simple wrappers: `LoadI + Return1` = 2 ops)
6201 // record 100% coverage even at length 2 — those
6202 // ARE legitimately short and the closure /
6203 // sunk-emit lowering paths (S7-A / S9-C) make
6204 // them worth compiling. The heuristic admits
6205 // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
6206 // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
6207 // ~26) get discarded.
6208 //
6209 // Back-edge triggered traces are unaffected —
6210 // a tight numeric-for body legitimately covers
6211 // 3 of ~3 proto ops it can dispatch from
6212 // (`Add + ForLoop`) and the recorder fires on
6213 // the back-edge, not call entry.
6214 //
6215 // `call_hot_count` is intentionally NOT reset
6216 // (an earlier draft tried `THRESHOLD - 32` but
6217 // caused active_trace contention with the
6218 // outer back-edge trigger — see
6219 // setlist_b_zero_with_call_c_zero_sunk_emits).
6220 // We give up on dispatching the pathological
6221 // shape on the same proto; the back-edge or a
6222 // longer call path on a deeper recursion point
6223 // can still record + cache a real trace.
6224 let proto_code_len = rec.head_proto.code.len();
6225 let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
6226 // P13-S13-I — per-Proto discard cap. The S13-H
6227 // relaxed trigger condition (`c >= THRESHOLD &&
6228 // !already_cached`) means a Proto whose every
6229 // recording is partial-coverage will re-fire the
6230 // trigger every call indefinitely (1500+ in
6231 // `binary_trees`-pattern test). The cap stops
6232 // discarding after `MAX_DISCARDS_PER_PROTO` —
6233 // the next close falls through to compile (even
6234 // if partial), caches the trace, and the
6235 // `already_cached` short-circuit kills the
6236 // storm. Dispatch may still be refused
6237 // post-compile (length gate), but the recorder
6238 // stops churning.
6239 const MAX_DISCARDS_PER_PROTO: u32 = 5;
6240 let prior_discards = rec.head_proto.trace_discard_count.get();
6241 let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
6242 // P13-S13-K — flip the `gave_up` flag the
6243 // moment cap is reached (BEFORE the close-
6244 // dispatching branch below). The trigger gates
6245 // short-circuit on this flag, skipping the
6246 // RefCell + linear `already_cached` scan on
6247 // every subsequent call to this Proto. Useful
6248 // for `binary_trees_pattern`-class loads where
6249 // a single Proto sees ~20k calls post-cap.
6250 if cap_reached
6251 && rec.is_call_triggered
6252 && is_partial_coverage
6253 && !rec.head_proto.trace_gave_up.get()
6254 {
6255 rec.head_proto.trace_gave_up.set(true);
6256 }
6257 if rec.is_call_triggered && is_partial_coverage && !cap_reached {
6258 // Tally as closed (for visibility) but DROP
6259 // without compile/cache. Use the existing
6260 // closed-lens accumulator so probes can
6261 // observe the discarded shape.
6262 // P13-S13-I — bump discard count BEFORE
6263 // dropping the recording so the next
6264 // close sees the updated counter.
6265 rec.head_proto.trace_discard_count.set(prior_discards + 1);
6266 self.jit.counters.closed += 1;
6267 self.jit
6268 .counters
6269 .closed_lens
6270 .push((rec.is_call_triggered, rec.ops.len()));
6271 // v2.0 Track-R R2 — partial-coverage discard
6272 // close path. Pre-R2 this site bumped `closed`
6273 // + `closed_lens` (visibility) but no per-
6274 // reason label, so probes couldn't separate a
6275 // real successful close from a discard tally.
6276 // Tag explicitly to make the recorder-side
6277 // close-cause taxonomy single-source.
6278 self.jit
6279 .counters
6280 .bump_close_cause("partial-coverage-discard");
6281 self.jit.active_trace = None;
6282 // Continue with interp loop — don't
6283 // fall through to compile path.
6284 // The op at `pc` hasn't dispatched yet;
6285 // the outer loop iteration handles it.
6286 } else {
6287 rec.closed = true;
6288 // P12-S2.C — detach the closed record, then try
6289 // to compile it. Dedup by `head_pc`: a Proto
6290 // already carrying a CompiledTrace for this PC
6291 // skips recompile (the hot counter caps
6292 // re-recording at `u32::MAX / 2` anyway, but
6293 // explicit dedup keeps `Proto.traces` short
6294 // for the S3 dispatcher's linear scan).
6295 //
6296 // No `Vm::run` change for failure: we just bump
6297 // the failed counter and drop the record. S3
6298 // will read `Proto.traces` to decide whether to
6299 // dispatch — until then, this is bookkeeping.
6300 let head_pc_val = rec.head_pc;
6301 let closed_record = self
6302 .jit
6303 .active_trace
6304 .take()
6305 .expect("active_trace was Some this branch");
6306 self.jit.counters.closed += 1;
6307 self.jit
6308 .counters
6309 .closed_lens
6310 .push((closed_record.is_call_triggered, closed_record.ops.len()));
6311 // P12-S5-B fix: cache the trace on the
6312 // recorder's *head proto*, not the current
6313 // closure's proto. For non-recursive
6314 // call-triggered traces, close fires after
6315 // `Return1` pops the callee frame — `cl` at
6316 // that point is the CALLER's closure, while
6317 // `closed_record.head_proto` is the CALLEE's
6318 // proto (the one we actually want the trace
6319 // to be discoverable from on the next call).
6320 // Self-recursive fib closed via depth-cap
6321 // mid-recursion so `cl.proto == head_proto`
6322 // happened to coincide — this fix makes that
6323 // accidental coincidence intentional.
6324 let head_proto = closed_record.head_proto;
6325 let already_cached = head_proto
6326 .traces
6327 .borrow()
6328 .iter()
6329 .any(|t| t.head_pc == head_pc_val);
6330 if !already_cached {
6331 // Internal-loop = true: the trace runs in
6332 // a native loop until a cmp side-exits, so
6333 // the dispatcher's per-entry marshal cost
6334 // amortizes across the whole run of
6335 // iterations the loop's recorded direction
6336 // stays valid. The lowerer auto-downgrades
6337 // to one-shot for cmp-less or Call-truncating
6338 // traces.
6339 // P15-A v2-C-A6-5 — side traces MUST NOT
6340 // internal-loop. The parent's recorded prefix
6341 // (ops at PCs < side trace's head_pc) defines
6342 // values for registers the child's body reads
6343 // without re-writing each iter — e.g. for
6344 // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6345 // + R[11]` sets R[12], and the child trace
6346 // (head_pc=24) re-runs `pc=20 Move R[1] =
6347 // R[12]` each iter via its outer ForLoop
6348 // internal-loop, ALWAYS reading the stale
6349 // entry-time R[12]. The parent's Add never
6350 // re-runs during child's loop, so R[1] gets
6351 // pinned to one stale value. Force one-shot
6352 // for side traces: each parent-exit round-
6353 // trips through dispatcher → parent's Add
6354 // runs → side trace runs ONE iter → return.
6355 let opts = crate::jit::trace::CompileOptions {
6356 internal_loop: closed_record.side_trace_parent.is_none(),
6357 pre53: self.version() <= LuaVersion::Lua53,
6358 aot: false,
6359 };
6360 // v1.1 A1 Session A — route through trace_compiler.
6361 // v2.0 Track J sub-step J-B — split-borrow JitState
6362 // so the trait method can take `&mut dyn JitStorage`.
6363 let result = {
6364 let jit = &mut self.jit;
6365 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6366 jit.trace_compiler
6367 .try_compile_trace(storage, &closed_record, opts)
6368 };
6369 match result {
6370 Some(mut ct) => {
6371 // P12-S5-A/B/C — tally Sinkable sites
6372 // + actually-sunk-emit sites + materialise
6373 // emit sites before moving `ct` into
6374 // Proto.traces.
6375 self.jit.counters.sinkable_seen +=
6376 ct.sinkable_sites_seen as u64;
6377 self.jit.counters.accum_bufferable_seen +=
6378 ct.accum_bufferable_seen as u64;
6379 self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6380 self.jit.counters.materialize_emit +=
6381 ct.materialize_emit_count as u64;
6382 self.jit.counters.closure_emit += ct.closure_seen as u64;
6383 if ct.is_inline_abort_close {
6384 self.jit.counters.inline_abort += 1;
6385 }
6386 // v2.0 Stage 7 polish 6 fire
6387 // experiment — split tally so a
6388 // probe can answer the AOT
6389 // `accepted_with_per_exit_inline`
6390 // gate's question at the JIT
6391 // surface too: how many compiled
6392 // traces emitted depth>0 cmp
6393 // side-exits, and how many of
6394 // those survived all the
6395 // `dispatchable = false` pins
6396 // (`InlineAbort-gate`,
6397 // `self-link-retf-r1`,
6398 // `downrec-stitch-pending`, etc.).
6399 if !ct.per_exit_inline.is_empty() {
6400 self.jit.counters.per_exit_inline_compiled += 1;
6401 if ct.dispatchable {
6402 self.jit.counters.per_exit_inline_dispatchable += 1;
6403 }
6404 }
6405 if let Some(reason) = ct.dispatch_off_reason {
6406 self.jit.counters.dispatch_off_reasons.push(reason);
6407 // v2.0 Track-R R2 — mirror
6408 // the ordered Vec push into
6409 // the per-reason HashMap so
6410 // probes can answer "how many
6411 // of each dispatch_off label
6412 // fired" in O(1) without
6413 // walking the Vec. Same
6414 // bucket as the recorder-side
6415 // abort/discard tags above.
6416 self.jit.counters.bump_close_cause(reason);
6417 }
6418 // v2.0 Track-R R3b — count
6419 // compiled traces that carry a
6420 // down-recursion stitch link.
6421 // Bumped here (not at the lowerer
6422 // emit site) because the Vm's
6423 // JitCounters live on the Vm,
6424 // and the lowerer doesn't have a
6425 // Vm handle. R3b's regression
6426 // pin reads this via
6427 // `Vm::trace_downrec_link_compiled_count`.
6428 if ct.downrec_link.is_some() {
6429 self.jit.counters.downrec_link_compiled += 1;
6430 }
6431 // v2.0 Track-R R3d — multi-way
6432 // guard emit counter. Bumped when
6433 // the lowerer's R3d arm collected
6434 // >= 2 distinct caller_pc candidates
6435 // and lifted `dispatchable=true`.
6436 // R3c's single-CMP shape stores
6437 // `1` here without bumping; non-
6438 // DownRec closes store `0`.
6439 if ct.downrec_multi_way_count >= 2 {
6440 self.jit.counters.multi_way_guard_emitted += 1;
6441 }
6442 // P15-A v2-A — side-trace finalisation.
6443 // Pin `dispatchable=false` so the
6444 // primary lookup `traces.find(|t|
6445 // t.head_pc == pc && t.dispatchable)`
6446 // never matches this entry — the
6447 // side trace is meant to be entered
6448 // ONLY through the parent's exit
6449 // indirection (v2-B/C IR), not the
6450 // back-edge / call-trigger paths.
6451 // Then write the entry fn ptr into
6452 // the parent's `exit_side_trace_ptrs`
6453 // slot so v2-B/C IR can read it.
6454 if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6455 closed_record.side_trace_parent
6456 {
6457 ct.dispatchable = false;
6458 let entry_ptr = ct.entry as *const () as *const u8;
6459 let _side_trace_head_pc = closed_record.head_pc;
6460 let parent_traces = parent_proto.traces.borrow();
6461 if let Some(parent_ct) = parent_traces
6462 .iter()
6463 .find(|t| t.head_pc == parent_head_pc)
6464 {
6465 // P15-A v2-C-A5-C — shape-match
6466 // gate. Find the parent's per-exit
6467 // tag snapshot at the wired exit
6468 // (inline / tag / global) and
6469 // check the child's entry_tags
6470 // match. If not, leave the cell
6471 // null + skip cache populate so
6472 // the future v2-C-A2 IR's
6473 // `call_indirect` stays inert at
6474 // this exit (the child's
6475 // shape-specialised IR would
6476 // mis-interpret raw bits the
6477 // parent writes to reg_state).
6478 let inline_n = parent_ct.per_exit_inline.len();
6479 let tags_n = parent_ct.per_exit_tags.len();
6480 let parent_exit_tags_slice: &[
6481 crate::jit::trace::ExitTag
6482 ] = if parent_exit_idx < inline_n {
6483 &parent_ct.per_exit_inline
6484 [parent_exit_idx]
6485 .exit_tags
6486 } else if parent_exit_idx
6487 < inline_n + tags_n
6488 {
6489 &parent_ct.per_exit_tags
6490 [parent_exit_idx - inline_n]
6491 .1
6492 } else {
6493 &parent_ct.exit_tags
6494 };
6495 let shape_ok =
6496 crate::jit::trace::exit_tags_match_entry_tags(
6497 &ct.entry_tags,
6498 parent_exit_tags_slice,
6499 &parent_ct.entry_tags,
6500 );
6501 if !shape_ok {
6502 self.jit.counters.side_trace_shape_mismatch += 1;
6503 }
6504 // P15-A v2-C-A4 — write the child's
6505 // entry fn ptr to BOTH the legacy
6506 // v2-A `exit_side_trace_ptrs[idx]`
6507 // cell (kept so v2-A's
6508 // walk_any_side_ptr_non_null tests
6509 // stay green) AND the per-kind cell
6510 // whose heap address the parent's
6511 // IR baked (v2-C-A2). The IR-baked
6512 // cell is what the call_indirect
6513 // gate actually reads. Only write
6514 // when A5-C shape gate passes.
6515 if shape_ok {
6516 if let Some(cell) = parent_ct
6517 .exit_side_trace_ptrs
6518 .get(parent_exit_idx)
6519 {
6520 cell.set(entry_ptr);
6521 }
6522 // Compute (kind, local) for the
6523 // IR-baked cell. Layout follows
6524 // exit_hit_counts: inline first,
6525 // then per_exit_tags, then the
6526 // global tail slot.
6527 let (sent_kind, sent_local) = if parent_exit_idx
6528 < inline_n
6529 {
6530 parent_ct.per_exit_inline[parent_exit_idx]
6531 .side_trace_ptr
6532 .set(entry_ptr);
6533 (
6534 crate::jit::trace::SIDE_SENT_KIND_INLINE,
6535 parent_exit_idx as u32,
6536 )
6537 } else if parent_exit_idx < inline_n + tags_n {
6538 let local = parent_exit_idx - inline_n;
6539 if let Some(b) =
6540 parent_ct.tags_side_trace_ptrs.get(local)
6541 {
6542 b.set(entry_ptr);
6543 }
6544 (
6545 crate::jit::trace::SIDE_SENT_KIND_TAG,
6546 local as u32,
6547 )
6548 } else {
6549 parent_ct.global_side_trace_ptr.set(entry_ptr);
6550 (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6551 };
6552 self.jit.counters.side_trace_compiled += 1;
6553 // P15-A v2-D-A8 — flip the
6554 // parent's fast-path hint so
6555 // the dispatcher knows to do
6556 // the tentative decode + cell
6557 // check on subsequent
6558 // dispatches. Set once and
6559 // stays true (we never unwire
6560 // a side trace today).
6561 parent_ct.has_any_side_wired.set(true);
6562
6563 // P15-A v2-C-A1/A4 — populate
6564 // the O(1) lookup cache the
6565 // dispatcher consults on
6566 // sentinel-bit-set returns.
6567 // Key is the encoded sentinel
6568 // (same encoding the IR ORs
6569 // into bits 56..=62 of the
6570 // child's i64 return).
6571 let sentinel =
6572 crate::jit::trace::encode_side_sentinel(
6573 sent_kind, sent_local,
6574 );
6575 let predicted_idx = if std::ptr::eq(
6576 parent_proto.as_ptr(),
6577 head_proto.as_ptr(),
6578 ) {
6579 parent_traces.len() as u32
6580 } else {
6581 head_proto.traces.borrow().len() as u32
6582 };
6583 parent_ct
6584 .side_trace_cache
6585 .borrow_mut()
6586 .insert(sentinel, predicted_idx);
6587 }
6588 }
6589 drop(parent_traces);
6590 }
6591 head_proto.traces.borrow_mut().push(TArc::new(ct));
6592 self.jit.counters.compiled += 1;
6593 }
6594 None => {
6595 self.jit.counters.compile_failed += 1;
6596 self.jit
6597 .counters
6598 .compile_failed_reasons
6599 .push(self.jit.trace_compiler.last_compile_checkpoint());
6600 }
6601 }
6602 }
6603 } // P13-S13-H — close the long-trace-bias else branch
6604 } else {
6605 // P12-S4-step1 + step4a — depth-aware push at the
6606 // current `cur_depth`. The `depth_cap_hit` /
6607 // `returned_past_head` early-exit is handled by
6608 // the `should_close` branch above; reaching here
6609 // means `cur_depth <= MAX_INLINE_DEPTH` and the
6610 // trace head's frame is still live.
6611 let depth_u8 = cur_depth as u8;
6612 if depth_u8 > self.jit.max_depth_seen {
6613 self.jit.max_depth_seen = depth_u8;
6614 }
6615 // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6616 // return / variable return count). Recorder pushed
6617 // it with var_count=None before the call dispatched;
6618 // now that the call has returned and we're about to
6619 // push the next op, top reflects the actual return
6620 // count. Snapshot top - (caller.base + call.a).
6621 if let Some(last) = rec.ops.last_mut()
6622 && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6623 && last.inst.c() == 0
6624 && last.var_count.is_none()
6625 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6626 {
6627 let from = f.base + last.inst.a();
6628 if self.top >= from {
6629 last.var_count = Some(self.top - from);
6630 }
6631 }
6632 // P12-S9-A/C — for SetList B=0, snapshot the source
6633 // count = top - A - 1 (mirrors Lua's `n = top - ra
6634 // - 1` from lvm.c OP_SETLIST). Sources are
6635 // R[A+1..top), exclusive top. For Call C=0's
6636 // var_count (the return count = top - A inclusive),
6637 // see the prior-op fix-up above; here we
6638 // initialise the current Call op to None and let
6639 // the fix-up on the next op's push populate it.
6640 let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6641 && inst.b() == 0
6642 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6643 {
6644 let from = f.base + inst.a();
6645 if self.top > from {
6646 Some(self.top - from - 1)
6647 } else {
6648 None
6649 }
6650 } else {
6651 None
6652 };
6653 let op = crate::jit::trace::RecordedOp {
6654 proto: cl.proto,
6655 pc,
6656 inst,
6657 inline_depth: depth_u8,
6658 var_count,
6659 };
6660 // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6661 // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6662 // Captured as a side-channel `RetfRecord` parallel to
6663 // `ops` when `p16_self_link_enabled` is on. R3's
6664 // down-rec stitch consumes these to guard side-trace
6665 // inlined-frame topology against the recorded shape.
6666 // Gated on the same flag as the cycle catch so the
6667 // ship-default path (p16 off) sees zero behavior
6668 // change. `caller_pc` is the recorded enclosing Call's
6669 // pc + 1 — interp's resume point after the inlined
6670 // frame pops.
6671 if self.jit.p16_self_link_enabled
6672 && depth_u8 > 0
6673 && matches!(
6674 inst.op(),
6675 crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6676 )
6677 {
6678 let results: u8 = match inst.op() {
6679 crate::vm::isa::Op::Return0 => 0,
6680 crate::vm::isa::Op::Return1 => 1,
6681 _ => 0,
6682 };
6683 // Most recent Op::Call recorded at the caller's
6684 // depth (`depth_u8 - 1`) is the frame this Return
6685 // is unwinding from. Reverse scan stops at the
6686 // first match.
6687 let caller_depth = depth_u8 - 1;
6688 let caller_call = rec.ops.iter().rev().find(|r| {
6689 r.inline_depth == caller_depth
6690 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6691 });
6692 let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6693 // v2.0 Track-R R3a — capture the caller's proto
6694 // for the RetfRecord. LuaJIT `IR_RETF.op1`
6695 // equivalent. For fib(28) the caller's proto
6696 // equals the trace head; for future mutual
6697 // recursion the recorded Op::Call's proto is the
6698 // right target. Fallback to head_proto when no
6699 // enclosing Call op was captured (mirrors
6700 // `caller_pc`'s fallback to the Return's own pc).
6701 let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6702 rec.retfs.push(crate::jit::trace::RetfRecord {
6703 from_depth: depth_u8,
6704 to_depth: caller_depth,
6705 results,
6706 caller_pc,
6707 proto: caller_proto,
6708 });
6709 // v2.0 Track-R R3a — DownRec close trigger:
6710 // count RetfRecords on this recording whose
6711 // `proto` matches `caller_proto` (LuaJIT
6712 // `check_downrec_unroll` chain filter
6713 // `op1 == ptref`). Threshold mirrors
6714 // RECUNROLL_THRESHOLD; first trip stamps the
6715 // `downrec_close` marker, subsequent retfs
6716 // keep the marker without overwrite. The
6717 // lowerer's end_idx picker routes through
6718 // TraceEnd::DownRec when the marker is set;
6719 // R3a's tail emit still falls through to R1's
6720 // safe deopt path so fib(28) result stays
6721 // 317_811. R3b lifts.
6722 if rec.downrec_close.is_none() {
6723 let caller_proto_ptr = caller_proto.as_ptr();
6724 let prior_match_count = rec
6725 .retfs
6726 .iter()
6727 .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6728 .count();
6729 // Strictly-greater-than threshold matches
6730 // LuaJIT `count + J->tailcalled > recunroll`.
6731 // The newly-pushed retf is already counted.
6732 if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6733 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6734 return_pc: caller_pc,
6735 target_proto: caller_proto,
6736 depth_delta: 1,
6737 });
6738 // R2 close-cause taxonomy: tag the
6739 // restart with `"downrec-restart"`. R3b
6740 // adds `"downrec-stitch-failed"` when
6741 // the lifted back-edge falls back to
6742 // deopt.
6743 self.jit.counters.bump_close_cause("downrec-restart");
6744 }
6745 }
6746 }
6747 // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6748 // FIRST eligible Op::GetField site under env-gate
6749 // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6750 // - R[B] is Value::Table with metatable.is_none()
6751 // - K[C] is Value::Str
6752 // - The string key actually occupies a hash slot
6753 // (so the IC's slot_idx is a real index, not
6754 // a probe sentinel).
6755 // Once captured, subsequent GetFields skip this
6756 // logic (rec.field_ic_snapshot.is_some() short-
6757 // circuits). Env-OFF short-circuits on the cached
6758 // atomic check inside field_ic_enabled().
6759 if rec.field_ic_snapshot.is_none()
6760 && matches!(inst.op(), crate::vm::isa::Op::GetField)
6761 && crate::jit::trace_types::field_ic_enabled()
6762 {
6763 let b = inst.b();
6764 let c_idx = inst.c() as usize;
6765 let r_b = self.stack[(base + b) as usize];
6766 if let Value::Table(g) = r_b
6767 && g.metatable().is_none()
6768 && c_idx < cl.proto.consts.len()
6769 && let Value::Str(s) = cl.proto.consts[c_idx]
6770 {
6771 let key = Value::Str(s);
6772 let tbl_ref = &*g;
6773 if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6774 && let Some(val) = tbl_ref.node_val_at(slot_idx)
6775 {
6776 let op_idx = rec.ops.len() as u32;
6777 rec.field_ic_snapshot =
6778 Some(crate::jit::trace_types::FieldIcSnapshot {
6779 op_idx,
6780 nodes_len: tbl_ref.nodes_capacity() as u64,
6781 slot_idx: slot_idx as u64,
6782 key_ptr_bits: s.as_ptr() as u64,
6783 cached_val_tag: val.tag_byte(),
6784 });
6785 self.jit.counters.field_ic_snapshot_captured += 1;
6786 }
6787 }
6788 }
6789 if !rec.push(op) {
6790 // v2.0 Track-R R2 — recorder overflow
6791 // (MAX_TRACE_LEN). Pre-R2 this site bumped
6792 // `aborted` with no reason label, leaving the
6793 // overflow indistinguishable from any other
6794 // abort cause that might be added later.
6795 // Tag it explicitly under the close-cause
6796 // bucket so probes can tally overflow vs
6797 // other abort causes in O(1).
6798 self.jit.active_trace = None;
6799 self.jit.counters.aborted += 1;
6800 self.jit.counters.bump_close_cause("trace-overflow");
6801 }
6802 }
6803 }
6804
6805 // P12-S3 — trace JIT dispatcher.
6806 //
6807 // When the dispatch loop is about to execute the op at
6808 // `pc` and there's a `numeric_only` CompiledTrace cached
6809 // for that `head_pc`, marshal the live regs into an
6810 // i64 buffer, jump into the trace, and resume the
6811 // interpreter at the returned continuation PC.
6812 //
6813 // Skipped (zero overhead) when `trace_jit_enabled` is
6814 // false; the lookup is a borrow + scan over
6815 // `cl.proto.traces`, which is a `Vec` whose size is at
6816 // most one entry per back-edge per Proto in practice.
6817 //
6818 // Marshalling contract — only Int slots survive the
6819 // round-trip cleanly (the reg_state ABI is `*mut i64`
6820 // with no tag info). Any non-Int slot in the affected
6821 // window forces a skip; interp takes over for one op
6822 // and the back-edge brings us back to try again next
6823 // pass (slots that were Nil/Float at one moment can
6824 // settle to Int by the time the next back-edge fires).
6825 //
6826 // A trace that comes back with `vm.jit.pending_err`
6827 // parked is treated as a deopt: clear the err, leave
6828 // the stack as the trace wrote it, and let the
6829 // interpreter run from the same `pc`. The trace itself
6830 // is left cached — a future entry might find no
6831 // metatable in the way and succeed.
6832 // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6833 // of 6 per-field Rc clones. proto.traces is now
6834 // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6835 // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6836 // operations per dispatch × 434k = ~2.2M Rc atomic ops
6837 // (~1-2% gain measured separately).
6838 // v2.0 Track-R R3c — one-shot consume of the
6839 // `suppress_downrec_admit_once` flag. Set by the R3c
6840 // downrec post-invoke arm below when it force-deopts the
6841 // trace (caller-pc guard miss OR cycle-budget exhausted)
6842 // so the NEXT interpreter loop iteration skips the
6843 // downrec admit, lets interp run the op at `head_pc`,
6844 // advances `pc` past `head_pc`, and breaks the otherwise-
6845 // infinite admit loop. Reading + clearing here means a
6846 // single dispatch tick consumes the suppression — the
6847 // following tick re-admits naturally (with the budget
6848 // also reset by the deopt site).
6849 let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6850 self.jit.suppress_downrec_admit_once = false;
6851 if self.jit.trace_enabled
6852 && let Some(ct) = {
6853 let traces = cl.proto.traces.borrow();
6854 traces
6855 .iter()
6856 .find(|t| {
6857 if t.head_pc != pc {
6858 return false;
6859 }
6860 let is_downrec = t.downrec_link.is_some();
6861 // v2.0 Track-R R3c — the one-shot suppress
6862 // flag blocks any admit (primary or fallback)
6863 // for `downrec_link`-bearing traces so the
6864 // next interp iter can run the natural op
6865 // at `head_pc` and advance past it. R3d's
6866 // `dispatchable=true` lift means the suppress
6867 // must also cover the primary `t.dispatchable`
6868 // arm — otherwise the lifted lookup would
6869 // immediately re-admit after a force-deopt
6870 // and the infinite loop returns.
6871 if is_downrec && downrec_admit_blocked {
6872 return false;
6873 }
6874 // Primary arm: `dispatchable=true` traces
6875 // (R3d-lifted DownRec or normal traces).
6876 // Fallback arm: R3c-shape `dispatchable=false`
6877 // DownRec traces (single-CMP guard kept
6878 // pinned because the 90% miss-rate would
6879 // make blind admit perf-negative).
6880 t.dispatchable || is_downrec
6881 })
6882 .cloned()
6883 }
6884 {
6885 // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6886 // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6887 // across the entire dispatch block so the fields outlive
6888 // all consumers. Saves 5 Rc::clone per dispatch.
6889 let entry_fn = ct.entry;
6890 let head_pc_val = ct.head_pc;
6891 let window_size = ct.window_size;
6892 let exit_tags = &ct.exit_tags;
6893 let per_exit_tags = &ct.per_exit_tags;
6894 let per_exit_inline = &ct.per_exit_inline;
6895 let compile_entry_tags = &ct.entry_tags;
6896 let global_tag_res_kind = ct.global_tag_res_kind;
6897 let exit_hit_counts = &ct.exit_hit_counts;
6898 let max_stack = cl.proto.max_stack as usize;
6899 let window_size_us = window_size as usize;
6900 let base_us = base as usize;
6901 // P12-S4-step3a — `reg_state` sized to the trace's
6902 // `window_size`, which today equals max_stack but
6903 // S4-step3b will expand for inlined frames.
6904 // Marshal-in still only writes [0..max_stack); slots
6905 // [max_stack..window_size) are zero-initialised and
6906 // filled by the trace's own GetUpval / arith.
6907 // P13-S13-D — reuse the Vm's amortised buffers
6908 // instead of allocating fresh Vecs each dispatch.
6909 // mem::take leaves an empty placeholder we restore
6910 // at the end of the dispatch block (success +
6911 // deopt paths both fall through to the restore).
6912 let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6913 entry_tags.clear();
6914 entry_tags.reserve(max_stack);
6915 // v2.0 Track-R R3c — this trace was admitted via the
6916 // `downrec_link.is_some()` arm rather than the normal
6917 // `dispatchable=true` arm. The pre-invoke path
6918 // populates a reserved saved-PC slot just past the
6919 // normal register window so R3b's lowerer guard load
6920 // (`reg_state[window_size]`) compares the runtime
6921 // saved caller PC against the recorded `dr_return_pc`.
6922 //
6923 // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6924 // gate. After R3d lifts `dispatchable = true` for
6925 // multi-way guards, the trace's body still emits the
6926 // R3b/R3d sentinel shape on return — the saved-PC slot
6927 // and post-invoke classifier must keep firing.
6928 // `downrec_link.is_some()` is the unique structural
6929 // signal that the trace closes via DownRec.
6930 let is_downrec_entry = ct.downrec_link.is_some();
6931 let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6932 reg_state.clear();
6933 // v2.0 Track-R R3c — when admitting a downrec trace,
6934 // size the buffer to `window_size + 1` so the lowerer
6935 // can `load(I64, ..., reg_state, window_size * 8)`
6936 // for the saved caller PC guard input. The extra slot
6937 // is the LAST element so cranelift's existing
6938 // `0..window_size` accesses are unaffected.
6939 let reg_state_len = if is_downrec_entry {
6940 window_size_us + 1
6941 } else {
6942 window_size_us
6943 };
6944 reg_state.resize(reg_state_len, 0i64);
6945 let mut dispatch_ok = true;
6946 for i in 0..max_stack {
6947 let v = self.stack[base_us + i];
6948 let (tag, raw) = v.unpack();
6949 entry_tags.push(tag);
6950 // P12-S12-C v3 — entry tag guard. The trace's IR
6951 // is specialised to the compile-time entry tags
6952 // (via current_kinds propagation from
6953 // from_entry_tag). A runtime tag mismatch means
6954 // body ops would mis-interpret raw bits (e.g.
6955 // treat a Str pointer as Int payload → garbage).
6956 // Skip dispatch on mismatch so interp handles
6957 // this entry shape; the trace stays cached for
6958 // future entries that match.
6959 if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6960 dispatch_ok = false;
6961 break;
6962 }
6963 match tag {
6964 // Int / Float / Table / Nil all marshal
6965 // to raw payload cleanly; the trace's IR
6966 // treats the 8-byte slot as an i64 (with
6967 // f64 ops bitcasting around the boundary).
6968 crate::runtime::value::raw::INT
6969 | crate::runtime::value::raw::FLOAT
6970 | crate::runtime::value::raw::TABLE
6971 | crate::runtime::value::raw::CLOSURE
6972 // P12-S12-B-v2 — Native iter slots (e.g.
6973 // R[A] = ipairs_iter) are present in
6974 // generic-for traces; the raw bits are a
6975 // valid `*mut NativeClosure` and round-trip
6976 // cleanly.
6977 | crate::runtime::value::raw::NATIVE
6978 // P12-S12-C v1 — Str slots show up in
6979 // string-concat traces; raw bits = `*mut
6980 // LuaStr` (interned, GC-managed). Round-
6981 // trips cleanly as a heap pointer.
6982 | crate::runtime::value::raw::STR
6983 | crate::runtime::value::raw::NIL => {
6984 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6985 reg_state[i] = unsafe { raw.zero as i64 };
6986 }
6987 _ => {
6988 dispatch_ok = false;
6989 break;
6990 }
6991 }
6992 }
6993
6994 if dispatch_ok {
6995 debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6996 self.jit.pending_err = None;
6997 // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6998 // count. A cmp@d>0 side-exit calls the materialize
6999 // helper which pushes inlined frames onto
7000 // `vm.frames`; on deopt those frames must be popped
7001 // before falling through to the interpreter, else
7002 // the stack grows unboundedly per deopted dispatch.
7003 let pre_frames = self.frames.len();
7004 // v2.0 Track-R R3c — saved-PC slot population. The
7005 // recorded `dr_return_pc` on the closing trace is
7006 // the caller's resume PC captured at a depth>0
7007 // Return push (recorder push site, see R3a verdict
7008 // §3). The natural runtime analogue for self-
7009 // stitch is the dispatching frame's PARENT frame's
7010 // PC: the trace's head_pc sits inside a Lua frame,
7011 // and the parent (caller) frame's `pc` is what
7012 // luna would observe as `[base-8]` in the LJ
7013 // `asm_retf` shape (`lj_asm_arm64.h:565`). When
7014 // the parent isn't a Lua frame (top-level dispatch
7015 // — first invocation through `call_value`), no
7016 // saved PC exists; we write 0, which always
7017 // mismatches the recorded `dr_return_pc != 0`
7018 // invariant pinned by R3b
7019 // (`crates/luna-jit/src/jit_backend/trace.rs:7206
7020 // debug_assert!(dr_return_pc != 0, ...)`).
7021 if is_downrec_entry {
7022 let saved_pc: i64 = if pre_frames >= 2 {
7023 match &self.frames[pre_frames - 2] {
7024 CallFrame::Lua(parent) => parent.pc as i64,
7025 CallFrame::Cont(_) => 0,
7026 }
7027 } else {
7028 0
7029 };
7030 reg_state[window_size_us] = saved_pc;
7031 }
7032 // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
7033 // diagnostic hook. The probe fires once per trace dispatch
7034 // (regardless of JIT vs AOT origin — both go through this
7035 // arm), letting the AOT smoke test verify mcode actually
7036 // executed. Guarded behind `OnceLock` so the env read is
7037 // a one-time cost per process; not gated on a particular
7038 // counter so the smoke test gets a deterministic single-
7039 // line `aot_trace_fired pc=N` per first dispatch.
7040 if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
7041 eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
7042 }
7043 let continuation_pc = {
7044 // v1.1 A1 Session A — chunk_compiler.enter
7045 // (CraneliftBackend delegates to enter_jit;
7046 // NullJitBackend returns an inert guard).
7047 let vm_ptr: *mut Vm = self;
7048 let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7049 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7050 unsafe { entry_fn(reg_state.as_mut_ptr()) }
7051 };
7052 self.jit.counters.dispatched += 1;
7053
7054 if self.jit.pending_err.is_some() {
7055 self.jit.pending_err = None;
7056 self.jit.counters.deopt += 1;
7057 // P12-S4-step4b-C-2 — unwind any helper-pushed
7058 // inlined frames before the interpreter resumes.
7059 // Don't restore reg_state — the trace's partial
7060 // writes are discarded; interp re-executes from
7061 // the original `pc`.
7062 while self.frames.len() > pre_frames {
7063 frames_pop_sync(&mut self.frames, &mut self.frames_top);
7064 }
7065 if is_downrec_entry {
7066 // v2.0 Track-R R3c — pending_err observed
7067 // mid-trace inside a downrec admit. Treat
7068 // it as a guard miss: bump `downrec_deopt`
7069 // and suppress the next downrec admit so
7070 // interp can advance past `head_pc` and
7071 // the same trace doesn't immediately re-
7072 // fire on the next loop iteration.
7073 self.jit.counters.downrec_deopt += 1;
7074 self.jit.suppress_downrec_admit_once = true;
7075 }
7076 } else if is_downrec_entry && {
7077 // v2.0 Track-R R3d — only enter the R3c/R3d
7078 // downrec classifier for returns whose shape
7079 // matches the lowerer's `downrec_idx_opt` tail
7080 // emit: either the stitch_blk DOWNREC sentinel
7081 // (HIT) or the deopt_blk GLOBAL-sentinel-with-
7082 // body==head_pc (MISS via guard fail). Any
7083 // other return from a downrec trace (intermediate
7084 // body cmp side-exit, GetField inference fail,
7085 // etc.) carries a different sentinel/body shape
7086 // and means the body exited BEFORE reaching the
7087 // downrec close — classify those through the
7088 // normal decode path (else branch below) so
7089 // reg_state restores + pc advances correctly.
7090 // The pre-R3d behavior (R3c) classified them all
7091 // as MISS and skipped the normal restore, which
7092 // inflated `downrec_deopt` with non-downrec
7093 // events and lost the trace's mid-flight writes.
7094 let raw_ret = continuation_pc as u64;
7095 let from_side_trace = (raw_ret >> 63) & 1 == 1;
7096 let sentinel_code = if from_side_trace {
7097 ((raw_ret >> 56) & 0x7F) as u32
7098 } else {
7099 0
7100 };
7101 let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
7102 let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
7103 crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
7104 0,
7105 );
7106 from_side_trace
7107 && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
7108 || (sentinel_code == global_deopt_code
7109 && raw_body == head_pc_val as u64))
7110 } {
7111 // R3d downrec event classifier.
7112 let raw_ret = continuation_pc as u64;
7113 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
7114 if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
7115 // Guard HIT — saved_pc matched one of the
7116 // baked candidates and the trace's
7117 // `stitch_blk` arm returned the DOWNREC
7118 // sentinel. Cycle-safety checkpoint:
7119 // decrement budget; on underflow,
7120 // reclassify as deopt + reset budget.
7121 // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
7122 // ~all natural HITs in a hot loop fire
7123 // before reset pressure.
7124 if self.jit.stitch_depth_remaining > 0 {
7125 self.jit.stitch_depth_remaining -= 1;
7126 self.jit.counters.downrec_dispatched += 1;
7127 } else {
7128 self.jit.counters.downrec_deopt += 1;
7129 self.jit.stitch_depth_remaining =
7130 crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
7131 }
7132 } else {
7133 // Guard MISS via the lowerer's deopt_blk
7134 // arm (GLOBAL sentinel + body == head_pc).
7135 // The deopt_blk emit performs the
7136 // store-back via `emit_store_back_and_return_pc`,
7137 // so the live stack already reflects the
7138 // body's writes; no extra restore needed
7139 // from the dispatcher side.
7140 self.jit.counters.downrec_deopt += 1;
7141 }
7142 self.jit.suppress_downrec_admit_once = true;
7143 // Pop helper-pushed inlined frames (defensive —
7144 // R3d's emit shape doesn't push frames in the
7145 // tail, but a body side-exit before reaching
7146 // the tail may have via the materialize helper).
7147 while self.frames.len() > pre_frames {
7148 frames_pop_sync(&mut self.frames, &mut self.frames_top);
7149 }
7150 self.jit.reg_state_buf = reg_state;
7151 self.jit.entry_tags_buf = entry_tags;
7152 continue;
7153 } else {
7154 // Restore each slot using the trace's
7155 // exit-tag analysis (see ExitTag docs).
7156 // P12-S4-step4b-C-2 — decode the IR's
7157 // side-exit shape. Upper 32 bits = (site_idx
7158 // + 1) for inline cmp side-exits, 0 for
7159 // legacy clean-tail / non-inline exits.
7160 // P15-A v2-C-A0 — decode lives in
7161 // `crate::jit::trace::decode_exit_shape` so
7162 // v2-C-A3 can reuse it with the SIDE TRACE's
7163 // shape inputs when the sentinel bit
7164 // (v2-C-A2) is set on `raw_ret`.
7165 let raw_ret = continuation_pc as u64;
7166 // P15-A v2-C-A3 — side-trace return decode.
7167 // Bit 63 of `raw_ret` is the side-trace
7168 // marker the parent's IR OR'd in when it
7169 // tail-called into a wired child trace.
7170 // Bits 56..=62 carry the sentinel code (the
7171 // cache key into the parent's
7172 // `side_trace_cache`); bits 0..=55 are the
7173 // child's own return value (encoded site or
7174 // plain cont_pc) which we MUST decode using
7175 // the CHILD's per_exit_inline / per_exit_tags
7176 // / exit_tags / exit_hit_counts — not the
7177 // parent's. The dispatcher snapshot read
7178 // above holds the parent's shapes; when bit
7179 // 63 is set we re-fetch the child's via the
7180 // sentinel-keyed cache.
7181 let from_side_trace = (raw_ret >> 63) & 1 == 1;
7182 let (
7183 decode_inline,
7184 decode_tags,
7185 decode_exit_tags,
7186 decode_hit_counts,
7187 decode_body,
7188 ) = if from_side_trace {
7189 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
7190 let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
7191 let traces = cl.proto.traces.borrow();
7192 let child_idx = traces
7193 .iter()
7194 .find(|t| t.head_pc == head_pc_val)
7195 .and_then(|pct| {
7196 pct.side_trace_cache.borrow().get(&sentinel_code).copied()
7197 });
7198 if let Some(idx) = child_idx
7199 && let Some(child) = traces.get(idx as usize)
7200 {
7201 if crate::jit::trace::v2c_probe_enabled() {
7202 eprintln!(
7203 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
7204 sentinel_code,
7205 body,
7206 idx,
7207 child.n_ops,
7208 child.head_pc,
7209 child.window_size,
7210 pc,
7211 window_size,
7212 child.dispatchable,
7213 child.is_inline_abort_close,
7214 );
7215 }
7216 (
7217 child.per_exit_inline.clone(),
7218 child.per_exit_tags.clone(),
7219 child.exit_tags.clone(),
7220 child.exit_hit_counts.clone(),
7221 body,
7222 )
7223 } else {
7224 if crate::jit::trace::v2c_probe_enabled() {
7225 eprintln!(
7226 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
7227 sentinel_code, body,
7228 );
7229 }
7230 // Cache miss — fall back to parent
7231 // shapes with the body bits. Best-
7232 // effort; the trace_side_trace_
7233 // shape_mismatch_count records this
7234 // path indirectly (close-handler
7235 // skips wiring on mismatch so we
7236 // shouldn't reach here when shape
7237 // gate held).
7238 (
7239 per_exit_inline.clone(),
7240 per_exit_tags.clone(),
7241 exit_tags.clone(),
7242 exit_hit_counts.clone(),
7243 body,
7244 )
7245 }
7246 } else {
7247 // P15-A v2-D — dispatcher-level side-trace
7248 // invocation. Replaces v2-C's universal IR
7249 // gate (`load + icmp + brif` at every
7250 // emit_store_back callsite, which A6/A7
7251 // measured as a net perf regression).
7252 // A8 fast-path: skip the tentative decode +
7253 // child lookup entirely when `has_any_side
7254 // _wired == false` (the common case until
7255 // the first side trace compiles for this
7256 // parent). For fib_10_x10k and other tight
7257 // short-trace workloads where most parent
7258 // traces never get a wired child, this
7259 // collapses the v2-D overhead to a single
7260 // `Cell::get()` on the cold path.
7261 // A8-revert: A8 had `parent_has_side` short-
7262 // circuit + snapshot hoist; mini N=3 showed
7263 // A8 lost the btrees_d8 1.02× win (dropped
7264 // to 0.95×) WITHOUT helping fib_10 (same
7265 // 0.86×). Drop A8 — accept the always-run
7266 // v2-D path; the tentative decode + cell
7267 // load is cheaper than the cost A8 added.
7268 {
7269 let tentative = crate::jit::trace::decode_exit_shape(
7270 raw_ret,
7271 per_exit_inline,
7272 per_exit_tags,
7273 exit_tags,
7274 );
7275 let tentative_exit_idx = tentative.exit_hit_idx;
7276 let child_invoke = {
7277 let traces = cl.proto.traces.borrow();
7278 traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7279 |pct| {
7280 let cell =
7281 pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7282 let fn_ptr = cell.get();
7283 if fn_ptr.is_null() {
7284 return None;
7285 }
7286 traces
7287 .iter()
7288 .find(|t| {
7289 t.entry as *const () as *const u8 == fn_ptr
7290 })
7291 .map(|child| {
7292 (
7293 child.entry,
7294 child.per_exit_inline.clone(),
7295 child.per_exit_tags.clone(),
7296 child.exit_tags.clone(),
7297 child.exit_hit_counts.clone(),
7298 )
7299 })
7300 },
7301 )
7302 };
7303 if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7304 let child_raw_ret = {
7305 // v1.1 A1 Session A — chunk_compiler.enter
7306 // (side-trace entry).
7307 let vm_ptr: *mut Vm = self;
7308 let _guard =
7309 self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7310 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7311 unsafe { cent(reg_state.as_mut_ptr()) }
7312 };
7313 (cpi, cpt, cet, chc, child_raw_ret as u64)
7314 } else {
7315 (
7316 per_exit_inline.clone(),
7317 per_exit_tags.clone(),
7318 exit_tags.clone(),
7319 exit_hit_counts.clone(),
7320 raw_ret,
7321 )
7322 }
7323 }
7324 };
7325 let decoded = crate::jit::trace::decode_exit_shape(
7326 decode_body,
7327 &decode_inline,
7328 &decode_tags,
7329 &decode_exit_tags,
7330 );
7331 let site_id = decoded.site_id;
7332 let cont_pc = decoded.cont_pc;
7333 let exit_hit_idx = decoded.exit_hit_idx;
7334 let exit_tags_for_pc = decoded.exit_tags_for_pc;
7335 // P15-A v2-C-A3 — for side-trace returns
7336 // force using_global_exit_tags=false so the
7337 // restore loop always takes the per-tag slow
7338 // path (the child's global_tag_res_kind
7339 // classification isn't plumbed through yet
7340 // — TODO for a future polish step).
7341 let using_global_exit_tags = if from_side_trace {
7342 false
7343 } else {
7344 decoded.using_global_exit_tags
7345 };
7346 // P15-prep — increment the counter (saturate
7347 // at u32::MAX to avoid wrap on long runs).
7348 // P15-A v1 — track whether this increment is
7349 // the one that crossed `HOTEXIT_THRESHOLD`
7350 // (transition: previous v < threshold, new v
7351 // == threshold). The side-trace start is
7352 // deferred to just before `continue;` so
7353 // vm.stack and frame.pc are fully restored
7354 // (the snapshot reads post-restore values).
7355 let mut side_trace_should_start = false;
7356 // P15-A v2-C-A3 — for side-trace returns the
7357 // counter to bump is the CHILD's (decoded
7358 // shape lookup) — `exit_hit_idx` is into the
7359 // decoded layout, so use the matching
7360 // `decode_hit_counts`. For parent decode
7361 // they're aliased (clone of the parent's
7362 // own Rc).
7363 if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7364 let v = c.get();
7365 if v < u32::MAX {
7366 c.set(v + 1);
7367 }
7368 if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7369 && self.jit.active_trace.is_none()
7370 && self.jit.trace_enabled
7371 {
7372 side_trace_should_start = true;
7373 }
7374 }
7375 // P12-S4-step4b-C-2 — at an inline cmp@d>0
7376 // side-exit, the helper has pushed N frames on
7377 // top of the trace head's frame and
7378 // `exit_tags_for_pc.len()` covers the full
7379 // window (caller + each inlined frame's
7380 // window). Slots beyond `max_stack` belong to
7381 // an inlined frame: their `Untouched` entries
7382 // default to Nil (no entry-tag fallback —
7383 // marshal-in only captured caller slots) and
7384 // we write to interp stack at `base + i` which
7385 // mirrors `op_offsets`-derived layout.
7386 let slot_count = exit_tags_for_pc.len();
7387 // P12-S4-step4b-C-2 — the helper only extends
7388 // vm.stack up to the deepest pushed frame's
7389 // window, but the exit_tags snapshot covers
7390 // the trace's full `window_size` (which
7391 // includes depth-N+1 scratch slots that the
7392 // trace's IR may have written without a
7393 // matching pushed frame). Extend with Nil so
7394 // the write at the tail doesn't panic; these
7395 // slots get overwritten by the writeback loop
7396 // and won't leak meaningful data past the
7397 // pushed frames' R[0..max_stack) windows.
7398 if self.stack.len() < base_us + slot_count {
7399 self.stack
7400 .resize(base_us + slot_count, crate::runtime::Value::Nil);
7401 }
7402 // P13-S13-E — fast-path restore loop. When
7403 // we landed on the global `exit_tags`,
7404 // dispatch on the compile-time
7405 // classification: skip the loop entirely
7406 // for `AllUntouched`, do a tag-free
7407 // `Value::Int(...)` write per slot for
7408 // `AllInt`, otherwise fall through to the
7409 // general match-arm loop. site_id > 0
7410 // (inline frame mat) and per_exit_tags
7411 // hits always take the general path —
7412 // their per-side-exit shapes aren't
7413 // pre-classified yet.
7414 let fast_path_taken = if using_global_exit_tags {
7415 match global_tag_res_kind {
7416 crate::jit::trace::TagResKind::AllUntouched => {
7417 // No-op: vm.stack already
7418 // matches the trace's post-
7419 // entry state for these
7420 // slots (entry values not
7421 // overridden, or already
7422 // spilled by helpers).
7423 true
7424 }
7425 crate::jit::trace::TagResKind::AllInt => {
7426 for i in 0..slot_count {
7427 self.stack[base_us + i] =
7428 crate::runtime::Value::Int(reg_state[i]);
7429 }
7430 true
7431 }
7432 crate::jit::trace::TagResKind::Mixed => false,
7433 }
7434 } else {
7435 false
7436 };
7437 if !fast_path_taken {
7438 for i in 0..slot_count {
7439 let tag = match exit_tags_for_pc[i] {
7440 crate::jit::trace::ExitTag::Untouched => {
7441 if i < max_stack {
7442 entry_tags[i]
7443 } else {
7444 crate::runtime::value::raw::NIL
7445 }
7446 }
7447 crate::jit::trace::ExitTag::Int => {
7448 crate::runtime::value::raw::INT
7449 }
7450 crate::jit::trace::ExitTag::Float => {
7451 crate::runtime::value::raw::FLOAT
7452 }
7453 crate::jit::trace::ExitTag::Table => {
7454 crate::runtime::value::raw::TABLE
7455 }
7456 crate::jit::trace::ExitTag::Closure => {
7457 crate::runtime::value::raw::CLOSURE
7458 }
7459 // P12-S6-A1 — trace actively wrote Nil
7460 // to this slot (e.g. via Op::LoadNil).
7461 // Restore as Nil regardless of the entry
7462 // tag, since the i64 payload is 0 and
7463 // packing as the entry tag (e.g. INT)
7464 // would mis-type the slot.
7465 crate::jit::trace::ExitTag::Nil => {
7466 crate::runtime::value::raw::NIL
7467 }
7468 // P12-S12-C v2 — trace wrote a Str ptr
7469 // to this slot (LoadK Str / Move from
7470 // Str / Concat result). Restore as
7471 // Value::Str with raw bits round-
7472 // tripped.
7473 crate::jit::trace::ExitTag::Str => {
7474 crate::runtime::value::raw::STR
7475 }
7476 };
7477 // SAFETY: tag is from a verified slot
7478 // (entry validated above) or pinned by
7479 // the exit-tag analysis to INT/TABLE.
7480 // The raw payload sits in reg_state[i].
7481 // Stack was extended by the materialize
7482 // helper for inline frames.
7483 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7484 self.stack[base_us + i] = unsafe {
7485 Value::pack(
7486 tag,
7487 crate::runtime::value::RawVal {
7488 zero: reg_state[i] as u64,
7489 },
7490 )
7491 };
7492 }
7493 }
7494 // P12-S4-step4b-C-2 — for non-inline exits the
7495 // helper was never called (no metas chain for
7496 // this cont_pc), so `frames.last()` is the
7497 // trace head's frame and we set its pc to
7498 // cont_pc as before. For inline exits the
7499 // helper baked the side-exit PC into the
7500 // innermost frame's `pc` at push time
7501 // (chain.last().pc was overridden at emit),
7502 // so this assignment to `frames.last_mut().pc
7503 // = cont_pc` is a redundant-but-correct
7504 // confirmation.
7505 let _ = &per_exit_inline; // hold the Rc alive across dispatch
7506 // P12-S4-step4b-C-2 — for inline side-exits the
7507 // helper has pushed N frames on top. The trace
7508 // head frame is at `pre_frames - 1`; set its
7509 // pc to `head_resume_pc` so when the chain
7510 // eventually pops back to it, interp resumes
7511 // PAST the trace's depth-0 Op::Call instead of
7512 // restarting from `head_pc` and re-triggering
7513 // dispatch (infinite loop). The innermost
7514 // (helper-pushed) frame already has its pc
7515 // baked in at compile time, but we still
7516 // assign `cont_pc` below for parity with the
7517 // non-inline path (no-op).
7518 if site_id > 0 {
7519 let idx = (site_id - 1) as usize;
7520 let head_resume_pc = decode_inline[idx].head_resume_pc;
7521 if pre_frames > 0 {
7522 if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7523 f.pc = head_resume_pc;
7524 }
7525 }
7526 }
7527 let frames_len_now = self.frames.len();
7528 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7529 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7530 CallFrame::Lua(fmut) => {
7531 if crate::jit::trace::v2c_probe_enabled() {
7532 eprintln!(
7533 "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7534 from_side_trace,
7535 raw_ret,
7536 fmut.pc,
7537 cont_pc,
7538 site_id,
7539 frames_len_now,
7540 pre_frames,
7541 max_stack,
7542 );
7543 }
7544 fmut.pc = cont_pc;
7545 }
7546 _ => unreachable!("Cont frame at trace dispatch"),
7547 }
7548 // P15-A v1 — deferred side-trace start. The
7549 // increment block above flagged this exit's
7550 // hit count crossing HOTEXIT_THRESHOLD; now
7551 // that vm.stack is restored and frame.pc is
7552 // settled, snapshot entry_tags from the
7553 // resume frame's window and create the
7554 // recorder. The recorder's first push fires
7555 // on the next interp iteration at cont_pc.
7556 //
7557 // `head_proto` for the side trace = cl.proto
7558 // (trace JIT only inlines self-recursive
7559 // calls today, so cont_pc always lands in
7560 // the same proto as the parent). Frame base
7561 // is the resume frame (top of `self.frames`
7562 // — inline-pushed frames moved this).
7563 if side_trace_should_start {
7564 let (resume_base, resume_proto) = match self.frames.last() {
7565 Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7566 _ => (base_us, cl.proto),
7567 };
7568 let resume_max_stack = resume_proto.max_stack as usize;
7569 let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7570 // Extend stack if cont_pc's frame window
7571 // overhangs the current stack len (rare,
7572 // but inline-pushed frame stack writes
7573 // only covered the trace's writeback).
7574 if self.stack.len() < resume_base + resume_max_stack {
7575 self.stack.resize(
7576 resume_base + resume_max_stack,
7577 crate::runtime::Value::Nil,
7578 );
7579 }
7580 for i in 0..resume_max_stack {
7581 let (tag, _) = self.stack[resume_base + i].unpack();
7582 side_entry_tags.push(tag);
7583 }
7584 self.jit.active_trace =
7585 Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7586 resume_proto,
7587 cont_pc,
7588 side_entry_tags,
7589 cl.proto,
7590 head_pc_val,
7591 exit_hit_idx,
7592 )));
7593 self.jit.recording_frame_base = self.frames.len() - 1;
7594 self.jit.counters.side_trace_started += 1;
7595 }
7596 // P13-S13-D — put the dispatch buffers back
7597 // before the `continue;` so the next
7598 // dispatch picks up the same allocation.
7599 self.jit.reg_state_buf = reg_state;
7600 self.jit.entry_tags_buf = entry_tags;
7601 continue;
7602 }
7603 }
7604 // P13-S13-D — !dispatch_ok / deopt path / non-cont
7605 // exit also restore the buffers before falling
7606 // through to the interp.
7607 self.jit.reg_state_buf = reg_state;
7608 self.jit.entry_tags_buf = entry_tags;
7609 }
7610
7611 // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7612 // hook code that consults `currentpc = savedpc - 1` lands on the
7613 // instruction now executing. luna mirrors that by advancing
7614 // `f.pc` to `pc + 1` before the hook block — local_at /
7615 // getinfo / line attribution all read f.pc, and the existing
7616 // `pc - 1` convention in those helpers then yields the current
7617 // instruction's pc (db.lua :696: local `A` visible at the
7618 // chunk's return line once OP_CLOSURE has advanced pc).
7619 //
7620 // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7621 // (cont frames drained above) so the and_then/Option layers are
7622 // dead weight.
7623 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7624 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7625 CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7626 _ => unreachable!("Cont frame at pc bump"),
7627 }
7628
7629 // count + line hooks (PUC traceexec): before executing the
7630 // instruction. Skipped while the hook itself runs.
7631 // (Parens here are load-bearing — without them `&&` binds tighter
7632 // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7633 // letting a Lua line hook recurse into itself → stack overflow
7634 // on db.lua line-hook assertions. Matches the `hook_call_with` /
7635 // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7636 if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7637 let lines = &cl.proto.lines;
7638 let cur_line = if lines.is_empty() {
7639 None
7640 } else {
7641 Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7642 };
7643 // count hook: fire every `count_base` instructions
7644 if self.hook.count {
7645 self.hook.count_left -= 1;
7646 if self.hook.count_left <= 0 {
7647 self.hook.count_left = self.hook.count_base;
7648 // hooked function is the running Lua frame: its frame
7649 // is on the stack, so no synthetic C level is needed.
7650 self.run_hook(b"count", cur_line, false)?;
7651 }
7652 }
7653 // line hook: fire on a fresh frame, a backward jump (loop), or a
7654 // change of source line.
7655 if self.hook.line {
7656 if lines.is_empty() {
7657 // PUC: a stripped chunk has no line info, so
7658 // `getfuncline` returns -1. The line hook still fires
7659 // on the first instruction of the new frame (where
7660 // `npci <= oldpc` holds at oldpc=0), with the line
7661 // pushed as `nil` instead of an integer (db.lua :1030
7662 // "hook called without debug info for 1st instruction").
7663 if oldpc == u32::MAX {
7664 self.run_hook(b"line", None, false)?;
7665 self.top_frame_mut().hook_oldpc = pc;
7666 }
7667 } else {
7668 let newline = lines[(pc as usize).min(lines.len() - 1)];
7669 // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7670 // on a backward jump (`pc < oldpc` — strict; an equal pc
7671 // would re-fire the install-site after `oldpc = pc`),
7672 // or when the source line changes.
7673 let fire = oldpc == u32::MAX
7674 || pc < oldpc
7675 || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7676 if fire {
7677 self.run_hook(b"line", Some(newline as i64), false)?;
7678 }
7679 self.top_frame_mut().hook_oldpc = pc;
7680 }
7681 }
7682 }
7683
7684 match inst.op() {
7685 Op::Move => {
7686 let v = self.r(base, inst.b());
7687 self.set_r(base, inst.a(), v);
7688 }
7689 Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7690 Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7691 Op::LoadK => {
7692 let v = cl.proto.consts[inst.bx() as usize];
7693 self.set_r(base, inst.a(), v);
7694 }
7695 Op::LoadKx => {
7696 let extra = cl.proto.code[self.pc_of_top() as usize];
7697 self.bump_pc();
7698 let v = cl.proto.consts[extra.ax() as usize];
7699 self.set_r(base, inst.a(), v);
7700 }
7701 Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7702 Op::LFalseSkip => {
7703 self.set_r(base, inst.a(), Value::Bool(false));
7704 self.bump_pc();
7705 }
7706 Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7707 Op::LoadNil => {
7708 let a = inst.a();
7709 for i in 0..=inst.b() {
7710 self.set_r(base, a + i, Value::Nil);
7711 }
7712 }
7713 Op::GetUpval => {
7714 let v = self.upval_get(cl, inst.b());
7715 self.set_r(base, inst.a(), v);
7716 }
7717 Op::SetUpval => {
7718 let v = self.r(base, inst.a());
7719 self.upval_set(cl, inst.b(), v);
7720 }
7721 Op::GetTabUp => {
7722 let t = self.upval_get(cl, inst.b());
7723 let key = cl.proto.consts[inst.c() as usize];
7724 self.op_index(t, key, base + inst.a())?;
7725 }
7726 Op::GetTable => {
7727 let t = self.r(base, inst.b());
7728 let key = self.r(base, inst.c());
7729 self.op_index(t, key, base + inst.a())?;
7730 }
7731 Op::GetI => {
7732 let t = self.r(base, inst.b());
7733 self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7734 }
7735 Op::GetField => {
7736 let t = self.r(base, inst.b());
7737 let key = cl.proto.consts[inst.c() as usize];
7738 // v1.2 D4 A1 — fast path: known-Str const key + no
7739 // metatable on the table → skip `op_index` /
7740 // `index_step`'s MAX_TAG_LOOP setup and the outer
7741 // `Value` match. Falls through to the slow path
7742 // unchanged when either invariant breaks (so
7743 // `__index` metamethods, non-Table receivers, and
7744 // non-Str keys behave exactly as before).
7745 if let Value::Table(tb) = t
7746 && tb.metatable().is_none()
7747 && let Value::Str(s) = key
7748 {
7749 let v = tb.get_str(s);
7750 self.stack[(base + inst.a()) as usize] = v;
7751 } else {
7752 self.op_index(t, key, base + inst.a())?;
7753 }
7754 }
7755 Op::SetTabUp => {
7756 let t = self.upval_get(cl, inst.a());
7757 let key = cl.proto.consts[inst.b() as usize];
7758 let v = self.r(base, inst.c());
7759 self.op_newindex(t, key, v)?;
7760 }
7761 Op::SetTable => {
7762 let t = self.r(base, inst.a());
7763 let key = self.r(base, inst.b());
7764 let v = self.r(base, inst.c());
7765 self.op_newindex(t, key, v)?;
7766 }
7767 Op::SetI => {
7768 let t = self.r(base, inst.a());
7769 let v = self.r(base, inst.c());
7770 self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7771 }
7772 Op::SetField => {
7773 let t = self.r(base, inst.a());
7774 let key = cl.proto.consts[inst.b() as usize];
7775 let v = self.r(base, inst.c());
7776 self.op_newindex(t, key, v)?;
7777 }
7778 Op::NewTable => {
7779 let t = self.heap.new_table();
7780 self.set_r(base, inst.a(), Value::Table(t));
7781 self.maybe_collect_garbage(base + inst.a() + 1);
7782 }
7783 Op::SetList => {
7784 let a = inst.a();
7785 let abs_a = base + a;
7786 let n = if inst.b() == 0 {
7787 self.top - (abs_a + 1)
7788 } else {
7789 inst.b()
7790 };
7791 let offset = if inst.k() {
7792 let extra = cl.proto.code[self.pc_of_top() as usize];
7793 self.bump_pc();
7794 extra.ax() as i64
7795 } else {
7796 inst.c() as i64
7797 };
7798 let Value::Table(t) = self.r(base, a) else {
7799 unreachable!("SETLIST on non-table");
7800 };
7801 for i in 1..=n {
7802 let v = self.r(base, a + i);
7803 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7804 if let Err(TableError::Overflow) =
7805 unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7806 {
7807 return Err(self.rt_err("table overflow"));
7808 }
7809 }
7810 // one barrier_back covers every store this op did — PUC's
7811 // `luaC_barrierback_` once-per-table optimisation
7812 self.heap
7813 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7814 // the element temps above the table are now consumed
7815 self.maybe_collect_garbage(base + a + 1);
7816 }
7817 Op::SelfOp => {
7818 let o = self.r(base, inst.b());
7819 self.set_r(base, inst.a() + 1, o);
7820 // PUC OP_SELF's C is a constant index when the k-flag is
7821 // set; otherwise it points to a register that holds the
7822 // (constant-loaded) key. luna's compiler falls back to the
7823 // register form when the constant index exceeds OP_SELF's
7824 // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7825 // a table with 250+ string keys, where "findfield" lands
7826 // past const #255). The exec must honour the same split.
7827 let key = if inst.k() {
7828 cl.proto.consts[inst.c() as usize]
7829 } else {
7830 self.r(base, inst.c())
7831 };
7832 self.op_index(o, key, base + inst.a())?;
7833 }
7834 Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7835 Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7836 Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7837 Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7838 Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7839 Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7840 Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7841 Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7842 Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7843 Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7844 Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7845 Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7846 Op::Unm => {
7847 let v = self.r(base, inst.b());
7848 match coerce_num(v) {
7849 Some(Num::Int(i)) => {
7850 self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7851 }
7852 Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7853 None => {
7854 let mm = self.get_mm(v, Mm::Unm);
7855 if mm.is_nil() {
7856 return Err(self.type_err("perform arithmetic on", v));
7857 }
7858 let dst = base + inst.a();
7859 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7860 }
7861 }
7862 }
7863 Op::BNot => {
7864 let v = self.r(base, inst.b());
7865 match coerce_num(v) {
7866 Some(n) => {
7867 let i = self.int_from_num(n)?;
7868 self.set_r(base, inst.a(), Value::Int(!i));
7869 }
7870 None => {
7871 let mm = self.get_mm(v, Mm::BNot);
7872 if mm.is_nil() {
7873 return Err(self.type_err("perform bitwise operation on", v));
7874 }
7875 let dst = base + inst.a();
7876 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7877 }
7878 }
7879 }
7880 Op::Not => {
7881 let v = self.r(base, inst.b());
7882 self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7883 }
7884 Op::Len => {
7885 let v = self.r(base, inst.b());
7886 match self.len_step(v)? {
7887 MmOut::Done(r) => self.set_r(base, inst.a(), r),
7888 MmOut::Mm { func, recv } => {
7889 let dst = base + inst.a();
7890 self.begin_meta_call(
7891 func,
7892 &[recv, recv],
7893 MetaAction::Store { dst },
7894 "len",
7895 )?;
7896 }
7897 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7898 }
7899 }
7900 Op::Concat => {
7901 // right-associative fold over operands at base+a .. base+a+n,
7902 // in place on the stack so a yielding __concat can suspend.
7903 let a = inst.a();
7904 let n = inst.b();
7905 self.top = base + a + n;
7906 self.concat_run(base + a)?;
7907 }
7908 Op::Close => {
7909 // Yieldable: drive __close handlers through the
7910 // interpreter loop so a coroutine.yield() inside a
7911 // handler suspends cleanly (locals.lua block-end yield).
7912 // `drive_close` parks the handler call at `self.top`, so
7913 // raise `top` past this frame's full register window
7914 // first — a goto out of a nested for-loop can fire
7915 // OP_Close while `self.top` still sits at the inner
7916 // body's working top, which would let `push_frame`'s
7917 // wipe clobber the outer tbc slot before it could be
7918 // closed (locals.lua:1219 nested-for goto regression).
7919 self.top = self.top.max(base + cl.proto.max_stack as u32);
7920 let _ =
7921 self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7922 }
7923 Op::Tbc => {
7924 self.register_tbc(base + inst.a())?;
7925 }
7926 Op::Jmp => {
7927 let off = inst.sj();
7928 // P12-S1.B — trace JIT back-edge counter. A negative
7929 // jump offset is a loop back-edge (the only canonical
7930 // backward jumps the compiler emits — `while`, `for`,
7931 // `repeat`). Tick the per-Proto counter and, once it
7932 // exceeds the threshold, log a stub promotion that
7933 // S1.C will turn into actual trace recording. The
7934 // whole block is gated on `trace_jit_enabled` so
7935 // existing benches see one branch-not-taken and no
7936 // counter writes.
7937 if self.jit.trace_enabled && off < 0 {
7938 let proto = cl.proto;
7939 let c = proto.trace_hot_count.get();
7940 if c < u32::MAX / 2 {
7941 proto.trace_hot_count.set(c + 1);
7942 }
7943 // P13-S13-H — relaxed back-edge trigger:
7944 // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7945 // a missed crossing (active_trace busy with
7946 // a call-trigger, or the recorder slot
7947 // happened to be in use) doesn't permanently
7948 // lock this back-edge target out. The
7949 // `already_cached` short-circuit prevents
7950 // duplicate recordings: once a trace is
7951 // cached for this target, subsequent
7952 // crossings skip the start. This pairs with
7953 // S13-H's discard-on-partial-coverage close
7954 // handling — when a short call-trigger is
7955 // discarded, the back-edge can still find an
7956 // open slot at the next iteration.
7957 let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7958 // P13-S13-K — gave-up short-circuit. Skip
7959 // the RefCell borrow + scan when the
7960 // S13-I cap force-compiled a partial
7961 // trace on this Proto.
7962 let back_edge_already_cached = if proto.trace_gave_up.get() {
7963 true
7964 } else {
7965 proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7966 };
7967 if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7968 && self.jit.active_trace.is_none()
7969 && !back_edge_already_cached
7970 {
7971 // Back-edge target = pc after `add_pc(off)`,
7972 // i.e. current `pc + 1 + off` (the dispatch
7973 // loop has already advanced f.pc to pc+1).
7974 let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7975 // Snapshot per-slot Value tag at trace
7976 // entry so the lowerer's kind tracker
7977 // knows which arith path to lower
7978 // (iadd vs fadd, etc.).
7979 let max_stack = cl.proto.max_stack as usize;
7980 let base_us = base as usize;
7981 let mut entry_tags = Vec::with_capacity(max_stack);
7982 for i in 0..max_stack {
7983 let (tag, _) = self.stack[base_us + i].unpack();
7984 entry_tags.push(tag);
7985 }
7986 self.jit.active_trace =
7987 Some(Box::new(crate::jit::trace::TraceRecord::start(
7988 cl.proto, target, entry_tags, false,
7989 )));
7990 // P12-S4 — record the frame the trace
7991 // started in. `self.frames.len() - 1`
7992 // since we're inside the currently-running
7993 // Lua frame's dispatch.
7994 self.jit.recording_frame_base = self.frames.len() - 1;
7995 }
7996 }
7997 self.add_pc(off);
7998 }
7999 Op::Eq => {
8000 let l = self.r(base, inst.a());
8001 let r = self.r(base, inst.b());
8002 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8003 if (a == b) != inst.k() {
8004 self.bump_pc();
8005 }
8006 } else {
8007 let step = self.eq_step(l, r);
8008 self.op_compare(step, l, r, inst.k(), "eq")?;
8009 }
8010 }
8011 Op::EqK => {
8012 let l = self.r(base, inst.a());
8013 let r = cl.proto.consts[inst.b() as usize];
8014 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8015 if (a == b) != inst.k() {
8016 self.bump_pc();
8017 }
8018 } else {
8019 let step = self.eq_step(l, r);
8020 self.op_compare(step, l, r, inst.k(), "eq")?;
8021 }
8022 }
8023 Op::Lt => {
8024 let l = self.r(base, inst.a());
8025 let r = self.r(base, inst.b());
8026 // hot path: Int < Int — drops the MmOut + op_compare match
8027 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8028 if (a < b) != inst.k() {
8029 self.bump_pc();
8030 }
8031 } else {
8032 let step = self.less_step(l, r, false)?;
8033 self.op_compare(step, l, r, inst.k(), "lt")?;
8034 }
8035 }
8036 Op::Le => {
8037 let l = self.r(base, inst.a());
8038 let r = self.r(base, inst.b());
8039 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8040 if (a <= b) != inst.k() {
8041 self.bump_pc();
8042 }
8043 } else {
8044 let step = self.less_step(l, r, true)?;
8045 self.op_compare(step, l, r, inst.k(), "le")?;
8046 }
8047 }
8048 Op::Test => {
8049 let cond = self.r(base, inst.a()).truthy();
8050 self.cond_skip(cond, inst.k());
8051 }
8052 Op::TestSet => {
8053 let v = self.r(base, inst.b());
8054 if v.truthy() == inst.k() {
8055 self.set_r(base, inst.a(), v);
8056 } else {
8057 self.bump_pc();
8058 }
8059 }
8060 Op::Call => {
8061 let abs = base + inst.a();
8062 let nargs = if inst.b() == 0 {
8063 None
8064 } else {
8065 Some(inst.b() - 1)
8066 };
8067 let wanted = inst.c() as i32 - 1;
8068 self.begin_call(abs, nargs, wanted, false)?;
8069 }
8070 Op::TailCall => {
8071 let fr = *self.top_frame();
8072 let abs = base + inst.a();
8073 let mut nargs = if inst.b() == 0 {
8074 self.top - (abs + 1)
8075 } else {
8076 inst.b() - 1
8077 };
8078 // A tail call pops this frame before begin_call, so a
8079 // non-callable target would lose its name/position. Report
8080 // it now (PUC reads funcname from the still-current ci),
8081 // while the frame is intact, for "(field 'x')"-style info.
8082 let mut func = self.stack[abs as usize];
8083 if !matches!(func, Value::Closure(_) | Value::Native(_))
8084 && self.get_mm(func, Mm::Call).is_nil()
8085 {
8086 return Err(self.call_err(func));
8087 }
8088 // PUC `luaD_pretailcall` resolves a chain of `__call`
8089 // metamethods *in place* before deciding whether to
8090 // collapse this frame. Without that, each __call hop
8091 // would push a fresh Lua frame and a 10000-deep
8092 // tail-recursion through a 100-deep __call chain
8093 // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
8094 // shift args right, install the handler at `abs`, retry.
8095 // Chain depth limit matches the call-site `begin_call`
8096 // version cap (5.5 calls.lua :223 — 15 max, then "too
8097 // long"; 16th wrap fails the call). An infinite
8098 // self-referential `__call` would otherwise spin.
8099 let chain_cap = if self.version >= LuaVersion::Lua55 {
8100 15
8101 } else {
8102 MAX_CCMT
8103 };
8104 let mut chain = 0u32;
8105 while !matches!(func, Value::Closure(_) | Value::Native(_)) {
8106 let mm = self.get_mm(func, Mm::Call);
8107 if mm.is_nil() {
8108 return Err(self.call_err(func));
8109 }
8110 chain += 1;
8111 if chain > chain_cap {
8112 return Err(self.rt_err("'__call' chain too long"));
8113 }
8114 let end = (abs + 1 + nargs) as usize;
8115 if self.stack.len() < end + 1 {
8116 self.stack.resize(end + 1, Value::Nil);
8117 }
8118 for i in (0..=nargs).rev() {
8119 self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
8120 }
8121 self.stack[abs as usize] = mm;
8122 nargs += 1;
8123 self.top = abs + 1 + nargs;
8124 func = mm;
8125 }
8126 // PUC's tail-call collapse is Lua→Lua only. A tail call to
8127 // a C function runs the C function under the *current* Lua
8128 // activation (no frame fold — a C frame has nothing to
8129 // collapse into); after the C function returns, the
8130 // calling Lua function returns those results normally.
8131 // Mirror that: keep our Lua frame on the stack, call the
8132 // target through `begin_call(abs, …)` as a regular call,
8133 // and let the fallback `Op::Return` that the compiler
8134 // emits right after `Op::TailCall` forward the results.
8135 // 5.1 closure.lua :177's `return getfenv()` from inside
8136 // foo needs level 1 to resolve to foo, not to the
8137 // thread's globals fallback that happens when no Lua
8138 // frame is on the stack.
8139 let lua_target = matches!(func, Value::Closure(_));
8140 if lua_target {
8141 self.close_slots(fr.base, None)?;
8142 for i in 0..=nargs {
8143 self.stack[(fr.func_slot + i) as usize] =
8144 self.stack[(abs + i) as usize];
8145 }
8146 // v2.5 P1B-2A: clear the slot range that's now
8147 // stranded by the tail-call collapse. The args
8148 // were copied to `[fr.func_slot..fr.func_slot+
8149 // nargs+1)`; the source slots `[abs..abs+
8150 // nargs+1)` still hold the same `Value::Closure
8151 // / Value::Str / ...` entries, but they're past
8152 // the new call's window. Without this clear, a
8153 // later GC with wider gc_top would mark stale
8154 // pointers there (same UAF-A family the v2.3
8155 // finish_results slot-clear closed for the
8156 // Op::Return path).
8157 let new_top_lower_bound = fr.func_slot + nargs + 1;
8158 let prev_top = (self.top as usize).min(self.stack.len());
8159 if (new_top_lower_bound as usize) < prev_top {
8160 for slot in &mut self.stack[new_top_lower_bound as usize..prev_top] {
8161 *slot = Value::Nil;
8162 }
8163 }
8164 // PUC `CIST_TAIL`: the new Lua activation inherits
8165 // the popped frame's tailcalls count plus one for
8166 // this collapse. 5.1 db.lua :372 hammers 30000
8167 // recursive tail calls and expects to see the
8168 // synthetic tail level for every one of them.
8169 self.pending_tailcalls = fr.tailcalls.saturating_add(1);
8170 frames_pop_sync(&mut self.frames, &mut self.frames_top);
8171 if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
8172 && self.frames.len() < entry_depth
8173 {
8174 // a native completed what was this function's result
8175 return Ok(self.take_results(fr.func_slot));
8176 }
8177 } else {
8178 // Native (or __call-bearing) target: regular call. The
8179 // results land at `abs..self.top` and the next op (the
8180 // fallback `Op::Return`) forwards them. `wanted = -1`
8181 // because the caller will multret them through Return.
8182 self.begin_call(abs, Some(nargs), -1, false)?;
8183 }
8184 }
8185 Op::Return | Op::Return0 | Op::Return1 => {
8186 let (abs_a, nret) = match inst.op() {
8187 Op::Return0 => (base, 0),
8188 Op::Return1 => (base + inst.a(), 1),
8189 _ => {
8190 let abs_a = base + inst.a();
8191 let nret = if inst.b() == 0 {
8192 self.top - abs_a
8193 } else {
8194 inst.b() - 1
8195 };
8196 (abs_a, nret)
8197 }
8198 };
8199 // close before moving results: __close handlers run above
8200 // the stack top, so the result region [abs_a..abs_a+nret)
8201 // stays intact across any yields the close performs.
8202 // Fixed-count returns may leave `self.top` below the last
8203 // result slot (the compiler does not always re-bump it);
8204 // raise it past the result region so `drive_close` parks
8205 // the handler call *above* — landing at `self.top` would
8206 // otherwise clobber a result with the handler closure.
8207 self.top = self.top.max(abs_a + nret);
8208 if let Some(vals) = self.begin_close(
8209 base,
8210 None,
8211 AfterClose::Return {
8212 abs_a,
8213 nret,
8214 from_native: false,
8215 },
8216 entry_depth,
8217 )? {
8218 return Ok(vals);
8219 }
8220 }
8221 Op::ForPrep => self.for_prep(inst, base)?,
8222 Op::ForLoop => {
8223 // P12 — trace JIT back-edge counter on the
8224 // numeric-for back-edge. ForLoop is always at
8225 // a back-edge position (when it continues);
8226 // for the trace recorder we treat it as the
8227 // close-detection equivalent of `Op::Jmp` with
8228 // negative offset. Counter only ticks when the
8229 // back-edge will actually fire (count > 0 in
8230 // the 5.4+ Int form, comparable predicates in
8231 // pre-5.3 / Float). The cheap check up front
8232 // matches the for_loop helper's branch.
8233 if self.jit.trace_enabled {
8234 let a = inst.a();
8235 let pre53 = self.version() <= LuaVersion::Lua53;
8236 let take_back_edge =
8237 match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
8238 (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
8239 count > 0
8240 }
8241 (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
8242 let next = cur.wrapping_add(st);
8243 if st > 0 { next <= lim } else { next >= lim }
8244 }
8245 (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
8246 let next = cur + st;
8247 if st > 0.0 { next <= lim } else { next >= lim }
8248 }
8249 _ => false,
8250 };
8251 if take_back_edge {
8252 let proto = cl.proto;
8253 let c = proto.trace_hot_count.get();
8254 if c < u32::MAX / 2 {
8255 proto.trace_hot_count.set(c + 1);
8256 }
8257 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8258 && self.jit.active_trace.is_none()
8259 {
8260 // ForLoop's back-edge target = pc
8261 // after `add_pc(-bx)` runs from the
8262 // already-bumped f.pc (= pc + 1).
8263 // So target = (pc + 1) - bx.
8264 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8265 let max_stack = cl.proto.max_stack as usize;
8266 let base_us = base as usize;
8267 let mut entry_tags = Vec::with_capacity(max_stack);
8268 for i in 0..max_stack {
8269 let (tag, _) = self.stack[base_us + i].unpack();
8270 entry_tags.push(tag);
8271 }
8272 self.jit.active_trace =
8273 Some(Box::new(crate::jit::trace::TraceRecord::start(
8274 cl.proto, target, entry_tags, false,
8275 )));
8276 // P12-S4 — record the frame the trace
8277 // started in. The currently-running
8278 // Lua frame is at len() - 1.
8279 self.jit.recording_frame_base = self.frames.len() - 1;
8280 }
8281 }
8282 }
8283 self.for_loop(inst, base);
8284 }
8285 Op::TForPrep => {
8286 // the 4th control slot is the iterator's closing value
8287 self.register_tbc(base + inst.a() + 3)?;
8288 self.add_pc(inst.bx() as i32);
8289 }
8290 Op::TForCall => {
8291 let abs = base + inst.a();
8292 let need = (abs + 7) as usize;
8293 if self.stack.len() < need {
8294 self.stack.resize(need, Value::Nil);
8295 }
8296 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8297 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8298 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8299 let nvars = inst.c() as i32;
8300 self.begin_call(abs + 4, Some(2), nvars, false)?;
8301 }
8302 Op::TForLoop => {
8303 let a = inst.a();
8304 let ctrl = self.r(base, a + 4);
8305 if !ctrl.is_nil() {
8306 // P12-S12-B v1 — trace JIT back-edge counter on
8307 // generic-for back-edge. TForLoop sits at the
8308 // tail of `for k,v in expr do ... end`; recorder
8309 // treats it as the close-detection equivalent of
8310 // a negative Op::Jmp. Gate on `take_back_edge`
8311 // (= `ctrl != nil`) so empty-iter loops don't
8312 // pollute hot_count. v1 only adds the trigger;
8313 // whitelist + helper + emit live in v2.
8314 if self.jit.trace_enabled {
8315 let proto = cl.proto;
8316 let c = proto.trace_hot_count.get();
8317 if c < u32::MAX / 2 {
8318 proto.trace_hot_count.set(c + 1);
8319 }
8320 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8321 && self.jit.active_trace.is_none()
8322 {
8323 // TForLoop back-edge target = pc after
8324 // `add_pc(-bx)` runs from the already-
8325 // bumped f.pc (= pc + 1). So target =
8326 // (pc + 1) - bx, normally landing on
8327 // body_top (the op right after TForPrep).
8328 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8329 let max_stack = cl.proto.max_stack as usize;
8330 let base_us = base as usize;
8331 let mut entry_tags = Vec::with_capacity(max_stack);
8332 for i in 0..max_stack {
8333 let (tag, _) = self.stack[base_us + i].unpack();
8334 entry_tags.push(tag);
8335 }
8336 // P12-S12-B-v5 — snapshot the iter
8337 // fn's address if Native, so the
8338 // lowerer can specialise ipairs into
8339 // inline Table aget IR.
8340 let iter_ptr =
8341 if let Value::Native(n) = self.stack[base_us + a as usize] {
8342 Some(n.f as usize)
8343 } else {
8344 None
8345 };
8346 // P12-S12-C v3 — snapshot R[A+5]'s
8347 // tag (= current iter's val from
8348 // the just-fired TForCall). The v5
8349 // inline aget fast_blk emits a
8350 // runtime guard against this tag;
8351 // mixed-tag arrays deopt rather
8352 // than producing garbage pointers
8353 // through the v2 spill path.
8354 let val_slot = base_us + (a as usize) + 5;
8355 let val_tag = if val_slot < self.stack.len() {
8356 Some(self.stack[val_slot].unpack().0)
8357 } else {
8358 None
8359 };
8360 let mut rec = crate::jit::trace::TraceRecord::start(
8361 cl.proto, target, entry_tags, false,
8362 );
8363 rec.tfor_iter_ptr = iter_ptr;
8364 rec.tfor_val_tag = val_tag;
8365 self.jit.active_trace = Some(Box::new(rec));
8366 self.jit.recording_frame_base = self.frames.len() - 1;
8367 }
8368 }
8369 self.set_r(base, a + 2, ctrl);
8370 self.add_pc(-(inst.bx() as i32));
8371 }
8372 }
8373 Op::Closure => {
8374 let proto = cl.proto.protos[inst.bx() as usize];
8375 let n_ups = proto.upvals.len();
8376 // P11-S5d.M — build upvals on the stack for small
8377 // closures, skipping the per-call Vec/Box alloc
8378 // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8379 // = 2 covers most Lua source (1 captured local, or
8380 // _ENV + a single capture). Beyond that, fall back
8381 // to a heap Vec.
8382 use crate::runtime::function::INLINE_UPVALS_N;
8383 let mut stack_buf: [std::mem::MaybeUninit<
8384 Gc<crate::runtime::function::Upvalue>,
8385 >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8386 let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8387 let use_inline = n_ups <= INLINE_UPVALS_N;
8388 if !use_inline {
8389 heap_buf.reserve_exact(n_ups);
8390 }
8391 for (i, d) in proto.upvals.iter().enumerate() {
8392 let uv = if d.in_stack {
8393 self.find_or_create_upval(base + d.index as u32)
8394 } else {
8395 cl.upvals()[d.index as usize]
8396 };
8397 if use_inline {
8398 stack_buf[i] = std::mem::MaybeUninit::new(uv);
8399 } else {
8400 heap_buf.push(uv);
8401 }
8402 }
8403 // Tiny shim around the two paths so the 5.1 _ENV
8404 // clone + cache check below see one uniform
8405 // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8406 // into the local frame (still valid through the
8407 // rest of this Op::Closure handler).
8408 let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8409 // SAFETY: the first n_ups slots of stack_buf
8410 // were initialised above; we hand out a slice
8411 // covering exactly them.
8412 unsafe {
8413 std::slice::from_raw_parts_mut(
8414 stack_buf.as_mut_ptr()
8415 as *mut Gc<crate::runtime::function::Upvalue>,
8416 n_ups,
8417 )
8418 }
8419 } else {
8420 &mut heap_buf[..]
8421 };
8422 // PUC 5.1 had per-function environments: every Lua
8423 // function carried its own `env` slot, snapshotted from
8424 // the creating function's env at closure time, so a
8425 // `setfenv` on one closure never bled into a sibling.
8426 // luna models that by giving the 5.1 closure a *fresh*
8427 // closed upvalue for whichever cell holds `_ENV`, seeded
8428 // from the parent's current env value. Only that cell is
8429 // cloned — every other upvalue keeps its open/shared
8430 // identity (so e.g. `local function range(...) ...
8431 // range(...) ... end` still sees its self-reference). 5.2+
8432 // keeps the shared-upval model (and the proto cache that
8433 // depends on it).
8434 let v51 = self.version() <= LuaVersion::Lua51;
8435 if v51 && proto.env_upval_idx != u8::MAX {
8436 let i = proto.env_upval_idx as usize;
8437 let cur = match ups[i].state() {
8438 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8439 UpvalState::Closed(v) => v,
8440 };
8441 ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8442 }
8443 let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8444 // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8445 // and reuses it when every fresh-upvalue binding still
8446 // points to the same Upvalue object as the cached one.
8447 // That keeps `function() return outer end` repeated in a
8448 // loop comparing equal across iterations (the captured
8449 // outer is a shared open upvalue), while `function()
8450 // return loop_var end` gets a fresh closure each round
8451 // because the loop var is re-created per iteration. PUC
8452 // 5.1 predated the cache, and the per-closure `_ENV`
8453 // clone above would defeat it anyway, so skip it.
8454 let nc = if v51 {
8455 self.heap.new_closure_inline(proto, ups_slice)
8456 } else {
8457 let cached = proto.cache.get().filter(|c| {
8458 c.upvals().len() == ups_slice.len()
8459 && c.upvals()
8460 .iter()
8461 .zip(ups_slice.iter())
8462 .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8463 });
8464 match cached {
8465 Some(c) => c,
8466 None => {
8467 let n = self.heap.new_closure_inline(proto, ups_slice);
8468 proto.cache.set(Some(n));
8469 n
8470 }
8471 }
8472 };
8473 self.set_r(base, inst.a(), Value::Closure(nc));
8474 self.maybe_collect_garbage(base + inst.a() + 1);
8475 }
8476 Op::Vararg => {
8477 let abs_a = base + inst.a();
8478 let wanted = inst.c() as i32 - 1;
8479 // A materialized named vararg lives in func_slot (its writes
8480 // must be visible to `...`); otherwise spread the extra args
8481 // straight off the stack at func_slot+1 .. +n_varargs.
8482 let vt = match self.stack[func_slot as usize] {
8483 Value::Table(t) => Some(t),
8484 _ => None,
8485 };
8486 let n = match vt {
8487 Some(t) => {
8488 let n_key = Value::Str(self.heap.intern(b"n"));
8489 // PUC getnumargs: a named vararg `t.n` set out of the
8490 // integer range [0, INT_MAX/2] is rejected here
8491 match t.get(n_key) {
8492 Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8493 _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8494 }
8495 }
8496 None => n_varargs,
8497 };
8498 let count = if wanted < 0 { n } else { wanted as u32 };
8499 let need = (abs_a + count) as usize;
8500 if self.stack.len() < need {
8501 self.stack.resize(need, Value::Nil);
8502 }
8503 for i in 0..count {
8504 let v = if i >= n {
8505 Value::Nil
8506 } else if let Some(t) = vt {
8507 t.get_int(i as i64 + 1)
8508 } else {
8509 self.stack[(func_slot + 1 + i) as usize]
8510 };
8511 self.stack[(abs_a + i) as usize] = v;
8512 }
8513 if wanted < 0 {
8514 self.top = abs_a + count;
8515 }
8516 }
8517 Op::GetVarg => {
8518 // materialize the vararg table (PUC table.pack shape) from the
8519 // stack varargs — used when the named vararg is written /
8520 // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8521 // sees later writes) and in the local register R[A].
8522 let n = n_varargs;
8523 let t = self.heap.new_table();
8524 {
8525 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8526 let tm = unsafe { t.as_mut() };
8527 for i in 0..n {
8528 let _ = tm.set_int(
8529 &mut self.heap,
8530 i as i64 + 1,
8531 self.stack[(func_slot + 1 + i) as usize],
8532 );
8533 }
8534 }
8535 let n_key = Value::Str(self.heap.intern(b"n"));
8536 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8537 unsafe { t.as_mut() }
8538 .set(&mut self.heap, n_key, Value::Int(n as i64))
8539 .expect("'n' is a valid key");
8540 // once-per-table barrier (mirror SETLIST): t is born BLACK
8541 // during Propagate; the bulk inserts above don't barrier.
8542 self.heap
8543 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8544 self.stack[func_slot as usize] = Value::Table(t);
8545 self.set_r(base, inst.a(), Value::Table(t));
8546 }
8547 Op::VargIdx => {
8548 // R[A] := vararg[R[C]] without allocating: integer key in
8549 // [1,n] → that vararg, "n" → the count, else nil.
8550 let key = self.r(base, inst.c());
8551 let n = n_varargs;
8552 let v = match key {
8553 Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8554 self.stack[(func_slot + k as u32) as usize]
8555 }
8556 Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8557 self.stack[(func_slot + f as u32) as usize]
8558 }
8559 Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8560 _ => Value::Nil,
8561 };
8562 self.set_r(base, inst.a(), v);
8563 }
8564 Op::ErrNNil => {
8565 let v = self.r(base, inst.a());
8566 if !matches!(v, Value::Nil) {
8567 let bx = inst.bx();
8568 let name = if bx == 0 {
8569 "?".to_string()
8570 } else {
8571 match cl.proto.consts[(bx - 1) as usize] {
8572 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8573 _ => "?".to_string(),
8574 }
8575 };
8576 return Err(self.rt_err(&format!("global '{name}' already defined")));
8577 }
8578 }
8579 Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8580 }
8581 }
8582 }
8583
8584 #[inline(always)]
8585 fn pc_of_top(&self) -> u32 {
8586 self.top_frame().pc
8587 }
8588
8589 #[inline(always)]
8590 fn bump_pc(&mut self) {
8591 // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8592 // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8593 // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8594 // up over `fib_28`'s ~500k jumps.
8595 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8596 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8597 CallFrame::Lua(f) => f.pc += 1,
8598 _ => unreachable!("Cont frame at bump_pc"),
8599 }
8600 }
8601
8602 #[inline(always)]
8603 fn add_pc(&mut self, d: i32) {
8604 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8605 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8606 CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8607 _ => unreachable!("Cont frame at add_pc"),
8608 }
8609 }
8610
8611 /// PUC conditional-skip convention: the JMP that follows is executed when
8612 /// `cond == k`; otherwise it is skipped.
8613 #[inline(always)]
8614 fn cond_skip(&mut self, cond: bool, k: bool) {
8615 if cond != k {
8616 self.bump_pc();
8617 }
8618 }
8619
8620 // ---- indexing (with __index/__newindex chains) ----
8621
8622 /// The `#` length operation: string byte length, `__len` if present, else
8623 /// the raw table border. Returns the raw length value (may be non-integer
8624 /// when `__len` is exotic).
8625 pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8626 match self.len_step(v)? {
8627 MmOut::Done(n) => Ok(n),
8628 // PUC calls unary metamethods with the operand twice
8629 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8630 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8631 }
8632 }
8633
8634 /// Length fast path: a string's byte count or a table's raw border when no
8635 /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8636 /// called with the operand twice. Errors for a non-table with no `__len`.
8637 fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8638 match v {
8639 Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8640 Value::Table(t) => {
8641 let mm = self.get_mm(v, Mm::Len);
8642 if mm.is_nil() {
8643 Ok(MmOut::Done(Value::Int(t.len())))
8644 } else {
8645 Ok(MmOut::Mm { func: mm, recv: v })
8646 }
8647 }
8648 _ => {
8649 let mm = self.get_mm(v, Mm::Len);
8650 if mm.is_nil() {
8651 Err(self.type_err("get length of", v))
8652 } else {
8653 Ok(MmOut::Mm { func: mm, recv: v })
8654 }
8655 }
8656 }
8657 }
8658
8659 /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8660 /// value with no integer representation.
8661 pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8662 match self.len_value(v)? {
8663 Value::Int(i) => Ok(i),
8664 Value::Float(f) => crate::runtime::value::f2i_exact(f)
8665 .ok_or_else(|| self.rt_err("object length is not an integer")),
8666 _ => Err(self.rt_err("object length is not an integer")),
8667 }
8668 }
8669
8670 pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8671 match self.index_step(t, key)? {
8672 MmOut::Done(v) => Ok(v),
8673 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8674 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8675 }
8676 }
8677
8678 /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8679 /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8680 /// are followed inline (no yield possible); only a function link can yield.
8681 fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8682 let mut cur = t;
8683 for _ in 0..MAX_TAG_LOOP {
8684 let mm = match cur {
8685 Value::Table(tb) => {
8686 let v = tb.get(key);
8687 if !v.is_nil() {
8688 return Ok(MmOut::Done(v));
8689 }
8690 let mm = self.get_mm(cur, Mm::Index);
8691 if mm.is_nil() {
8692 return Ok(MmOut::Done(Value::Nil));
8693 }
8694 mm
8695 }
8696 v => {
8697 let mm = self.get_mm(v, Mm::Index);
8698 if mm.is_nil() {
8699 return Err(self.type_err("index", v));
8700 }
8701 mm
8702 }
8703 };
8704 match mm {
8705 Value::Closure(_) | Value::Native(_) => {
8706 return Ok(MmOut::Mm {
8707 func: mm,
8708 recv: cur,
8709 });
8710 }
8711 next => cur = next,
8712 }
8713 }
8714 Err(self.rt_err("'__index' chain too long; possible loop"))
8715 }
8716
8717 pub(crate) fn newindex_value(
8718 &mut self,
8719 t: Value,
8720 key: Value,
8721 v: Value,
8722 ) -> Result<(), LuaError> {
8723 match self.newindex_step(t, key, v)? {
8724 MmOut::Done(_) => Ok(()),
8725 MmOut::Mm { func, recv } => {
8726 self.call_value(func, &[recv, key, v])?;
8727 Ok(())
8728 }
8729 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8730 }
8731 }
8732
8733 /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8734 /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8735 /// needs an actual call — which the caller may run yieldably.
8736 fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8737 // v2.13 WUC read-time probe (gc-verify): a dead query key at a
8738 // WRITE site, attributed to the instruction that produced it.
8739 #[cfg(feature = "gc-verify")]
8740 if let Some(p) = (match key {
8741 Value::Str(s) => Some(s.as_ptr() as usize),
8742 Value::Table(t2) => Some(t2.as_ptr() as usize),
8743 _ => None,
8744 }) {
8745 if crate::runtime::gc_verify_probe::is_freed(p) {
8746 let detail = match self.frames.last() {
8747 Some(CallFrame::Lua(f)) => {
8748 let pc = f.pc as usize;
8749 let mut w = String::new();
8750 for q in pc.saturating_sub(6)..(pc + 2) {
8751 if let Some(inst) = f.closure.proto.code.get(q) {
8752 w.push_str(&format!(
8753 "\n [{q}] {:?} a={} b={} c={} k={}",
8754 inst.op(),
8755 inst.a(),
8756 inst.b(),
8757 inst.c(),
8758 inst.k()
8759 ));
8760 }
8761 }
8762 format!("pc={pc} base={} gc_top={} window:{w}", f.base, self.gc_top)
8763 }
8764 _ => "non-Lua frame".into(),
8765 };
8766 panic!("[gc-verify] newindex_step QUERY key {p:#x} freed. {detail}");
8767 }
8768 }
8769 let mut cur = t;
8770 for _ in 0..MAX_TAG_LOOP {
8771 let mm = match cur {
8772 Value::Table(tb) => {
8773 // PI-A3 single-walk collapse — Table::try_set_existing
8774 // fuses the prior `tb.get(key).is_nil()` gate and
8775 // `raw_set` walk into one chain traversal when the
8776 // key is already present with a non-nil value. The
8777 // __newindex chain semantics are preserved by the
8778 // identity (slot_nil ⇔ fire_newindex); see
8779 // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8780 //
8781 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8782 // heap is single-threaded and the pointer is live as
8783 // long as it is reachable from active roots (see
8784 // heap.rs:5-7). Mirrors the raw_set wrapper below.
8785 if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8786 self.heap
8787 .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8788 return Ok(MmOut::Done(Value::Nil));
8789 }
8790 let mm = self.get_mm(cur, Mm::NewIndex);
8791 if mm.is_nil() {
8792 self.raw_set(tb, key, v)?;
8793 return Ok(MmOut::Done(Value::Nil));
8794 }
8795 mm
8796 }
8797 bad => {
8798 let mm = self.get_mm(bad, Mm::NewIndex);
8799 if mm.is_nil() {
8800 return Err(self.type_err("index", bad));
8801 }
8802 mm
8803 }
8804 };
8805 match mm {
8806 Value::Closure(_) | Value::Native(_) => {
8807 return Ok(MmOut::Mm {
8808 func: mm,
8809 recv: cur,
8810 });
8811 }
8812 next => cur = next,
8813 }
8814 }
8815 Err(self.rt_err("'__newindex' chain too long; possible loop"))
8816 }
8817
8818 fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8819 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8820 match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8821 Ok(()) => {
8822 self.heap
8823 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8824 Ok(())
8825 }
8826 Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8827 Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8828 Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8829 Err(TableError::InvalidNext) => unreachable!(),
8830 }
8831 }
8832
8833 /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8834 /// the boolean result; `Mm` (when raw equality fails and both are tables
8835 /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8836 fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8837 if l.raw_eq(r) {
8838 return MmOut::Done(Value::Bool(true));
8839 }
8840 if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8841 (l, r)
8842 {
8843 // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8844 // (and earlier) required the two operands' metatables to expose a
8845 // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8846 // metatable falls straight back to raw inequality. events.lua 5.1
8847 // :262 bakes this in.
8848 let mm = if self.version() <= LuaVersion::Lua51 {
8849 self.get_comp_mm(l, r, Mm::Eq)
8850 } else {
8851 let mut m = self.get_mm(l, Mm::Eq);
8852 if m.is_nil() {
8853 m = self.get_mm(r, Mm::Eq);
8854 }
8855 m
8856 };
8857 if !mm.is_nil() {
8858 return MmOut::Mm { func: mm, recv: l };
8859 }
8860 }
8861 MmOut::Done(Value::Bool(false))
8862 }
8863
8864 // ---- arithmetic ----
8865
8866 #[inline(always)]
8867 fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8868 let l = self.r(base, inst.b());
8869 let r = self.r(base, inst.c());
8870 // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8871 // binary_trees all hammer these. Skipping coerce_num + the big
8872 // arith_fast match shaves several conditional moves per op.
8873 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8874 let fast = match op {
8875 ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8876 ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8877 ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8878 _ => None,
8879 };
8880 if let Some(v) = fast {
8881 self.set_r(base, inst.a(), v);
8882 return Ok(());
8883 }
8884 }
8885 // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8886 // and any numeric workload with non-integer accumulators benefits.
8887 if let (Value::Float(a), Value::Float(b)) = (l, r) {
8888 let fast = match op {
8889 ArithOp::Add => Some(Value::Float(a + b)),
8890 ArithOp::Sub => Some(Value::Float(a - b)),
8891 ArithOp::Mul => Some(Value::Float(a * b)),
8892 ArithOp::Div => Some(Value::Float(a / b)),
8893 _ => None,
8894 };
8895 if let Some(v) = fast {
8896 self.set_r(base, inst.a(), v);
8897 return Ok(());
8898 }
8899 }
8900 match self.arith_fast(op, l, r)? {
8901 Some(v) => self.set_r(base, inst.a(), v),
8902 None => {
8903 let mm = self.arith_mm_func(op, l, r)?;
8904 let dst = base + inst.a();
8905 self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8906 }
8907 }
8908 Ok(())
8909 }
8910
8911 /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8912 /// directly, `Ok(None)` when a metamethod is required (the caller decides
8913 /// whether to call it synchronously or yieldably).
8914 fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8915 use ArithOp::*;
8916 match op {
8917 BAnd | BOr | BXor | Shl | Shr => {
8918 // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8919 match (coerce_num(l), coerce_num(r)) {
8920 (Some(a), Some(b)) => {
8921 let to_int = |n: Num| match n {
8922 Num::Int(i) => Some(i),
8923 Num::Float(f) => crate::runtime::value::f2i_exact(f),
8924 };
8925 let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8926 // PUC luaG_tointerror: name the offending operand
8927 return Err(self.no_int_rep_err());
8928 };
8929 let v = match op {
8930 BAnd => a & b,
8931 BOr => a | b,
8932 BXor => a ^ b,
8933 Shl => shift_left(a, b),
8934 Shr => shift_left(a, b.wrapping_neg()),
8935 _ => unreachable!(),
8936 };
8937 return Ok(Some(Value::Int(v)));
8938 }
8939 _ => return Ok(None),
8940 }
8941 }
8942 _ => {}
8943 }
8944 let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8945 (Some(a), Some(b)) => (a, b),
8946 _ => return Ok(None),
8947 };
8948 let v = match (op, ln, rn) {
8949 (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8950 (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8951 (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8952 (IDiv, Num::Int(a), Num::Int(b)) => {
8953 if b == 0 {
8954 return Err(self.rt_err("attempt to divide by zero"));
8955 }
8956 let mut q = a.wrapping_div(b);
8957 if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8958 q -= 1;
8959 }
8960 Value::Int(q)
8961 }
8962 (Mod, Num::Int(a), Num::Int(b)) => {
8963 if b == 0 {
8964 return Err(self.rt_err("attempt to perform 'n%0'"));
8965 }
8966 let mut m = a.wrapping_rem(b);
8967 if m != 0 && (m ^ b) < 0 {
8968 m += b;
8969 }
8970 Value::Int(m)
8971 }
8972 (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8973 (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8974 (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8975 (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8976 (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8977 (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8978 (Mod, a, b) => {
8979 let (x, y) = (a.as_f64(), b.as_f64());
8980 // PUC luai_nummod: correct fmod's sign without the `m*y`
8981 // product, which underflows to 0 for tiny denormals
8982 let mut m = x % y;
8983 if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8984 m += y;
8985 }
8986 Value::Float(m)
8987 }
8988 _ => unreachable!(),
8989 };
8990 Ok(Some(v))
8991 }
8992
8993 pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8994 match v {
8995 Value::Int(i) => Ok(i),
8996 Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8997 Some(i) => Ok(i),
8998 None => Err(self.rt_err("number has no integer representation")),
8999 },
9000 v => Err(self.type_err(what, v)),
9001 }
9002 }
9003
9004 fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
9005 match n {
9006 Num::Int(i) => Ok(i),
9007 Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
9008 Some(i) => Ok(i),
9009 None => Err(self.rt_err("number has no integer representation")),
9010 },
9011 }
9012 }
9013
9014 /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
9015 /// PUC type error when neither operand provides one.
9016 fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
9017 use ArithOp::*;
9018 let event = match op {
9019 Add => Mm::Add,
9020 Sub => Mm::Sub,
9021 Mul => Mm::Mul,
9022 Div => Mm::Div,
9023 Mod => Mm::Mod,
9024 Pow => Mm::Pow,
9025 IDiv => Mm::IDiv,
9026 BAnd => Mm::BAnd,
9027 BOr => Mm::BOr,
9028 BXor => Mm::BXor,
9029 Shl => Mm::Shl,
9030 Shr => Mm::Shr,
9031 };
9032 let mut mm = self.get_mm(l, event);
9033 if mm.is_nil() {
9034 mm = self.get_mm(r, event);
9035 }
9036 if mm.is_nil() {
9037 let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
9038 "perform bitwise operation on"
9039 } else {
9040 // 5.4+ report string-involved arithmetic faults through
9041 // lstrlib's string-metatable arithmetic handlers, which
9042 // emit the per-op wording `attempt to add a 'string'
9043 // with a 'number'` (operands in syntactic order, quoted
9044 // type names, no varinfo). Non-string faults (nil+1,
9045 // {}+{}) keep the classic VM wording on every dialect —
9046 // v2.14 HC.4, probed against stock 5.1.5-5.5.0.
9047 if self.version >= crate::version::LuaVersion::Lua54
9048 && (matches!(l, Value::Str(_)) || matches!(r, Value::Str(_)))
9049 {
9050 let verb = match op {
9051 Add => "add",
9052 Sub => "sub",
9053 Mul => "mul",
9054 Div => "div",
9055 Mod => "mod",
9056 Pow => "pow",
9057 IDiv => "idiv",
9058 BAnd | BOr | BXor | Shl | Shr => unreachable!(),
9059 };
9060 let t1 = self.obj_typename(l);
9061 let t2 = self.obj_typename(r);
9062 return Err(self.rt_err(&format!("attempt to {verb} a '{t1}' with a '{t2}'")));
9063 }
9064 "perform arithmetic on"
9065 };
9066 let bad = if coerce_num(l).is_none() { l } else { r };
9067 return Err(self.type_err(what, bad));
9068 }
9069 Ok(mm)
9070 }
9071
9072 // ---- comparison ----
9073
9074 pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
9075 match self.less_step(l, r, or_eq)? {
9076 MmOut::Done(v) => Ok(v.truthy()),
9077 MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
9078 MmOut::CompareSynth { func } => {
9079 // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
9080 // by library code (sort comparator etc.) — no yield expected
9081 // here (a yield would have hit `call_noyield`'s C boundary).
9082 Ok(!self.call_mm1(func, &[r, l])?.truthy())
9083 }
9084 }
9085 }
9086
9087 /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
9088 /// carries the boolean result; `Mm` (for non-number/string operands) carries
9089 /// the metamethod — called with `(l, r)`; raises the PUC compare error when
9090 /// neither operand provides one.
9091 fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
9092 let b = match (l, r) {
9093 (Value::Int(a), Value::Int(b)) => {
9094 if or_eq {
9095 a <= b
9096 } else {
9097 a < b
9098 }
9099 }
9100 (Value::Float(a), Value::Float(b)) => {
9101 if or_eq {
9102 a <= b
9103 } else {
9104 a < b
9105 }
9106 }
9107 (Value::Int(a), Value::Float(b)) => {
9108 if or_eq {
9109 int_le_float(a, b)
9110 } else {
9111 int_lt_float(a, b)
9112 }
9113 }
9114 (Value::Float(a), Value::Int(b)) => {
9115 if a.is_nan() {
9116 false
9117 } else if or_eq {
9118 !int_lt_float(b, a)
9119 } else {
9120 !int_le_float(b, a)
9121 }
9122 }
9123 (Value::Str(a), Value::Str(b)) => {
9124 let (a, b) = (a.as_bytes(), b.as_bytes());
9125 if or_eq { a <= b } else { a < b }
9126 }
9127 (l, r) => {
9128 let event = if or_eq { Mm::Le } else { Mm::Lt };
9129 // PUC 5.1's `get_compTM` rule applies to ordered comparisons
9130 // too: both operands' metatables must expose the same
9131 // implementation for `__lt` / `__le` to fire. events.lua 5.1
9132 // :262 expects `c < d` (where `d` has no metatable) to error
9133 // with the default "attempt to compare two table values"
9134 // rather than running c's `__lt` blindly.
9135 let mm = if self.version() <= LuaVersion::Lua51 {
9136 self.get_comp_mm(l, r, event)
9137 } else {
9138 let mut m = self.get_mm(l, event);
9139 if m.is_nil() {
9140 m = self.get_mm(r, event);
9141 }
9142 m
9143 };
9144 // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
9145 // operand carries `__le`. 5.4 dropped the synthesis (now
9146 // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
9147 // on the synthesis — its metatable defines only `__lt`.
9148 // The fallback calls `__lt(r, l)` synchronously (the suite's
9149 // `__lt` doesn't yield) and negates the result; the yieldable
9150 // `__lt` path stays reserved for the explicit `<` operator.
9151 if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
9152 let lt = Mm::Lt;
9153 let mut mm_lt = self.get_mm(l, lt);
9154 if mm_lt.is_nil() {
9155 mm_lt = self.get_mm(r, lt);
9156 }
9157 if !mm_lt.is_nil() {
9158 return Ok(MmOut::CompareSynth { func: mm_lt });
9159 }
9160 }
9161 if mm.is_nil() {
9162 // PUC luaG_ordererror: "two X values" when the operand
9163 // types match, "X with Y" otherwise (objtypename-aware).
9164 let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
9165 return Err(self.rt_err(&if t1 == t2 {
9166 format!("attempt to compare two {t1} values")
9167 } else {
9168 format!("attempt to compare {t1} with {t2}")
9169 }));
9170 }
9171 return Ok(MmOut::Mm { func: mm, recv: l });
9172 }
9173 };
9174 Ok(MmOut::Done(Value::Bool(b)))
9175 }
9176
9177 // ---- numeric for ----
9178
9179 fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
9180 let a = inst.a();
9181 let init = self.r(base, a);
9182 let limit = self.r(base, a + 1);
9183 let step = self.r(base, a + 2);
9184 let (Some(init_n), Some(limit_n), Some(step_n)) =
9185 (as_num(init), as_num(limit), as_num(step))
9186 else {
9187 // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
9188 // PUC checks limit, then step, then initial value.
9189 let (what, bad) = if as_num(limit).is_none() {
9190 ("limit", limit)
9191 } else if as_num(step).is_none() {
9192 ("step", step)
9193 } else {
9194 ("initial value", init)
9195 };
9196 let tn = self.obj_typename(bad);
9197 return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
9198 };
9199 // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
9200 // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
9201 // first test, so each successful iteration emits a backward `OP_FORLOOP`
9202 // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
9203 // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
9204 // distance in luna's encoding is `loop_pc - prep_pc`; firing
9205 // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
9206 let pre53 = self.version() <= LuaVersion::Lua53;
9207 match (init_n, step_n) {
9208 (Num::Int(i0), Num::Int(st)) => {
9209 if st == 0 {
9210 return Err(self.rt_err("'for' step is zero"));
9211 }
9212 if pre53 {
9213 // PUC 5.3 `forlimit`: int limit passes through; float limit
9214 // gets clamped to MIN/MAX with a `stopnow` flag set only
9215 // when the clamp is unreachable (positive float with a
9216 // negative step → limit=MAX, stopnow; negative float with
9217 // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
9218 // `init = 0` so OP_FORLOOP's first test against the
9219 // unreachable clamp fails cleanly. An ordinary in-range
9220 // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
9221 // lets OP_FORLOOP's natural test reject the first step.
9222 let (lim, stopnow) = match limit_n {
9223 Num::Int(l) => (l, false),
9224 Num::Float(f) => {
9225 if f.is_nan() {
9226 (0, true)
9227 } else if f >= i64::MAX as f64 + 1.0 {
9228 // beyond +MAX: unreachable for a decreasing loop
9229 (i64::MAX, st < 0)
9230 } else if f <= i64::MIN as f64 {
9231 // beyond -MIN: unreachable for an increasing loop
9232 (i64::MIN, st >= 0)
9233 } else if st > 0 {
9234 (f.floor() as i64, false)
9235 } else {
9236 (f.ceil() as i64, false)
9237 }
9238 }
9239 };
9240 let initv = if stopnow { 0 } else { i0 };
9241 let pre = initv.wrapping_sub(st);
9242 self.set_r(base, a, Value::Int(pre));
9243 self.set_r(base, a + 1, Value::Int(lim));
9244 self.set_r(base, a + 2, Value::Int(st));
9245 self.add_pc(inst.bx() as i32 - 1);
9246 return Ok(());
9247 }
9248 let (lim, empty) = int_for_limit(limit_n, i0, st);
9249 if empty {
9250 self.add_pc(inst.bx() as i32);
9251 return Ok(());
9252 }
9253 let count = if st > 0 {
9254 (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
9255 } else {
9256 (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
9257 };
9258 self.set_r(base, a, Value::Int(i0));
9259 self.set_r(base, a + 1, Value::Int(count as i64));
9260 self.set_r(base, a + 2, Value::Int(st));
9261 self.set_r(base, a + 3, Value::Int(i0));
9262 }
9263 _ => {
9264 let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
9265 if st == 0.0 {
9266 return Err(self.rt_err("'for' step is zero"));
9267 }
9268 if pre53 {
9269 let pre = x0 - st;
9270 self.set_r(base, a, Value::Float(pre));
9271 self.set_r(base, a + 1, Value::Float(lim));
9272 self.set_r(base, a + 2, Value::Float(st));
9273 self.add_pc(inst.bx() as i32 - 1);
9274 return Ok(());
9275 }
9276 let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
9277 if !runs {
9278 self.add_pc(inst.bx() as i32);
9279 return Ok(());
9280 }
9281 self.set_r(base, a, Value::Float(x0));
9282 self.set_r(base, a + 1, Value::Float(lim));
9283 self.set_r(base, a + 2, Value::Float(st));
9284 self.set_r(base, a + 3, Value::Float(x0));
9285 }
9286 }
9287 Ok(())
9288 }
9289
9290 #[inline(always)]
9291 fn for_loop(&mut self, inst: Inst, base: u32) {
9292 let a = inst.a();
9293 // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
9294 // directly (R[a+1] holds the limit, *not* a remaining-count) so the
9295 // first iteration's test fires through the same backward-jump path as
9296 // every later iteration. 5.4+ switched to the count-based form luna
9297 // already uses for `Int`; the float branch was already PUC-3.x-style.
9298 let pre53 = self.version() <= LuaVersion::Lua53;
9299 match self.r(base, a) {
9300 Value::Int(cur) if pre53 => {
9301 let Value::Int(lim) = self.r(base, a + 1) else {
9302 unreachable!()
9303 };
9304 let Value::Int(st) = self.r(base, a + 2) else {
9305 unreachable!()
9306 };
9307 let next = cur.wrapping_add(st);
9308 let cont = if st > 0 { next <= lim } else { next >= lim };
9309 if cont {
9310 self.set_r(base, a, Value::Int(next));
9311 self.set_r(base, a + 3, Value::Int(next));
9312 self.add_pc(-(inst.bx() as i32));
9313 }
9314 }
9315 Value::Int(cur) => {
9316 let Value::Int(count) = self.r(base, a + 1) else {
9317 unreachable!()
9318 };
9319 if count > 0 {
9320 let Value::Int(st) = self.r(base, a + 2) else {
9321 unreachable!()
9322 };
9323 let next = cur.wrapping_add(st);
9324 self.set_r(base, a, Value::Int(next));
9325 self.set_r(base, a + 1, Value::Int(count - 1));
9326 self.set_r(base, a + 3, Value::Int(next));
9327 self.add_pc(-(inst.bx() as i32));
9328 }
9329 }
9330 Value::Float(cur) => {
9331 let Value::Float(lim) = self.r(base, a + 1) else {
9332 unreachable!()
9333 };
9334 let Value::Float(st) = self.r(base, a + 2) else {
9335 unreachable!()
9336 };
9337 let next = cur + st;
9338 let cont = if st > 0.0 { next <= lim } else { next >= lim };
9339 if cont {
9340 self.set_r(base, a, Value::Float(next));
9341 self.set_r(base, a + 3, Value::Float(next));
9342 self.add_pc(-(inst.bx() as i32));
9343 }
9344 }
9345 _ => unreachable!("corrupt for-loop state"),
9346 }
9347 }
9348
9349 // ---- native helpers (used by builtins) ----
9350
9351 /// A native function's own captured upvalue (self lives at func_slot).
9352 ///
9353 /// Public so `native_typed` trampolines and embedders authoring
9354 /// stateful natives via `native_with(...)` can read their upvals.
9355 pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9356 let Value::Native(nc) = self.stack[func_slot as usize] else {
9357 unreachable!("native frame without native closure");
9358 };
9359 nc.upvals[i]
9360 }
9361
9362 /// Number of upvalues captured by the native at `func_slot` (variadic
9363 /// captures such as the `io.lines` format list).
9364 pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9365 let Value::Native(nc) = self.stack[func_slot as usize] else {
9366 unreachable!("native frame without native closure");
9367 };
9368 nc.upvals.len()
9369 }
9370
9371 /// Write a native function's own upvalue (stateful iterators).
9372 pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9373 let Value::Native(nc) = self.stack[func_slot as usize] else {
9374 unreachable!("native frame without native closure");
9375 };
9376 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9377 unsafe { nc.as_mut() }.upvals[i] = v;
9378 // NativeClosure.upvals is traced as part of its Trace; a long-lived
9379 // stateful iterator closure (e.g. string.gmatch) sees many writes —
9380 // barrier_back once-and-done is cheaper than per-child forward.
9381 self.heap
9382 .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9383 }
9384
9385 /// Read the i-th positional argument inside a `NativeFn` body
9386 /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9387 /// matching PUC's "missing arg is nil" contract. Public so embedders
9388 /// can author their own natives.
9389 pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9390 if i < nargs {
9391 self.stack[(func_slot + 1 + i) as usize]
9392 } else {
9393 Value::Nil
9394 }
9395 }
9396
9397 /// Push the return values of a `NativeFn` and return their count
9398 /// (analogous to pushing N values then `return N` from a C function).
9399 /// Public so embedders can author their own natives.
9400 pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9401 let need = func_slot as usize + vals.len();
9402 if self.stack.len() < need {
9403 self.stack.resize(need, Value::Nil);
9404 }
9405 for (i, &v) in vals.iter().enumerate() {
9406 self.stack[func_slot as usize + i] = v;
9407 }
9408 vals.len() as u32
9409 }
9410
9411 /// Fast string concatenation of an adjacent pair, or `None` when a
9412 /// `__concat` metamethod is required.
9413 fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9414 let legacy = self.float_fmt();
9415 // Length-check fast paths for both string operands BEFORE the
9416 // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9417 // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9418 // overflow on the first pair that would exceed `INT_MAX` instead
9419 // of allocating multi-GB intermediates first.
9420 let max_str = i32::MAX as usize;
9421 if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9422 let a_len = ls.as_bytes().len();
9423 let b_len = rs.as_bytes().len();
9424 let new_len = a_len.checked_add(b_len);
9425 if new_len.is_none() || new_len.unwrap() > max_str {
9426 return Err(self.rt_err("string length overflow"));
9427 }
9428 }
9429 match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9430 (Some(a), Some(b)) => {
9431 // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9432 // concat past it raises "string length overflow"
9433 // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9434 // exactly this wall).
9435 let new_len = a.len().checked_add(b.len());
9436 if new_len.is_none() || new_len.unwrap() > max_str {
9437 return Err(self.rt_err("string length overflow"));
9438 }
9439 let mut combined = a;
9440 combined.extend_from_slice(&b);
9441 Ok(Some(Value::Str(self.heap.intern(&combined))))
9442 }
9443 _ => Ok(None),
9444 }
9445 }
9446
9447 /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9448 /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9449 /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9450 /// call — its `Meta` continuation re-enters here on the metamethod's return.
9451 fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9452 // Sum the lengths of all all-Str operands BEFORE starting the
9453 // right-associative fold so a 129-operand `a..a..…` chain
9454 // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9455 // not after dozens of multi-GB intermediate intern+hash rounds.
9456 // A non-Str operand falls through to the per-pair check.
9457 let max_str = i32::MAX as usize;
9458 let mut total: usize = 0;
9459 let mut all_str = true;
9460 for slot in base_a..self.top {
9461 match self.stack[slot as usize] {
9462 Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9463 Some(t) if t <= max_str => total = t,
9464 _ => return Err(self.rt_err("string length overflow")),
9465 },
9466 _ => {
9467 all_str = false;
9468 break;
9469 }
9470 }
9471 }
9472 let _ = all_str; // discrimination already captured by early returns above
9473 while self.top.saturating_sub(base_a) >= 2 {
9474 let i = self.top - 1; // rightmost operand
9475 let x = self.stack[(i - 1) as usize];
9476 let y = self.stack[i as usize];
9477 match self.concat_pair(x, y)? {
9478 Some(s) => {
9479 self.stack[(i - 1) as usize] = s;
9480 self.top = i; // consumed y
9481 }
9482 None => {
9483 let mut mm = self.get_mm(x, Mm::Concat);
9484 if mm.is_nil() {
9485 mm = self.get_mm(y, Mm::Concat);
9486 }
9487 if mm.is_nil() {
9488 let legacy = self.float_fmt();
9489 let bad = if concat_piece(x, legacy).is_none() {
9490 x
9491 } else {
9492 y
9493 };
9494 return Err(self.type_err("concatenate", bad));
9495 }
9496 // result lands at i-1, dropping y (top→i); resume continues.
9497 let dst = i - 1;
9498 self.begin_meta_call(
9499 mm,
9500 &[x, y],
9501 MetaAction::Concat { dst, base_a },
9502 "concat",
9503 )?;
9504 return Ok(());
9505 }
9506 }
9507 }
9508 self.maybe_collect_garbage(base_a + 1);
9509 Ok(())
9510 }
9511
9512 /// tostring with __tostring / __name support.
9513 pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9514 let mm = self.get_mm(v, Mm::ToString);
9515 if !mm.is_nil() {
9516 return match self.call_mm1(mm, &[v])? {
9517 Value::Str(s) => Ok(s.as_bytes().to_vec()),
9518 _ => Err(self.rt_err("'__tostring' must return a string")),
9519 };
9520 }
9521 if let Value::Table(t) = v
9522 && let Value::Str(name) = self.get_mm(v, Mm::Name)
9523 {
9524 let mut out = name.as_bytes().to_vec();
9525 out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9526 return Ok(out);
9527 }
9528 Ok(self.tostring_basic(v))
9529 }
9530
9531 /// The dialect's float-rendering flavor (v2.14 HD): ≤5.2 %.14g
9532 /// bare, 5.3/5.4 %.14g + ".0", 5.5 two-stage %.15g/%.17g + ".0".
9533 pub(crate) fn float_fmt(&self) -> numeric::FloatFmt {
9534 use crate::version::LuaVersion::*;
9535 match self.version {
9536 Lua51 | Lua52 => numeric::FloatFmt::Legacy14,
9537 Lua53 | Lua54 => numeric::FloatFmt::G14,
9538 _ => numeric::FloatFmt::TwoStage55,
9539 }
9540 }
9541
9542 /// Basic tostring (no metamethods).
9543 pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9544 match v {
9545 Value::Nil => b"nil".to_vec(),
9546 Value::Bool(true) => b"true".to_vec(),
9547 Value::Bool(false) => b"false".to_vec(),
9548 Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9549 // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9550 // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9551 // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9552 // concatenating these renderings.
9553 Value::Float(f) => {
9554 numeric::num_to_string_for(Num::Float(f), self.float_fmt()).into_bytes()
9555 }
9556 Value::Str(s) => s.as_bytes().to_vec(),
9557 Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9558 Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9559 Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9560 Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9561 // PUC names file handles `file (0x…)`; a bare userdata is
9562 // `userdata: 0x…`. The io library overrides this via __tostring.
9563 Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9564 // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9565 // (the "light" qualifier only appears in `luaL_typeerror`).
9566 Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9567 }
9568 }
9569}
9570
9571#[derive(Clone, Copy, PartialEq, Eq)]
9572enum ArithOp {
9573 Add,
9574 Sub,
9575 Mul,
9576 Mod,
9577 Pow,
9578 Div,
9579 IDiv,
9580 BAnd,
9581 BOr,
9582 BXor,
9583 Shl,
9584 Shr,
9585}
9586
9587impl ArithOp {
9588 /// PUC metamethod event name (`__add` → "add" etc.) used by
9589 /// `debug.getinfo(level, "n")` inside a metamethod handler.
9590 fn mm_name(self) -> &'static str {
9591 match self {
9592 ArithOp::Add => "add",
9593 ArithOp::Sub => "sub",
9594 ArithOp::Mul => "mul",
9595 ArithOp::Mod => "mod",
9596 ArithOp::Pow => "pow",
9597 ArithOp::Div => "div",
9598 ArithOp::IDiv => "idiv",
9599 ArithOp::BAnd => "band",
9600 ArithOp::BOr => "bor",
9601 ArithOp::BXor => "bxor",
9602 ArithOp::Shl => "shl",
9603 ArithOp::Shr => "shr",
9604 }
9605 }
9606}
9607
9608fn as_num(v: Value) -> Option<Num> {
9609 match v {
9610 Value::Int(i) => Some(Num::Int(i)),
9611 Value::Float(f) => Some(Num::Float(f)),
9612 // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9613 Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9614 _ => None,
9615 }
9616}
9617
9618/// A concatenable operand's byte form (string, or a number coerced to its
9619/// string), or `None` when only a `__concat` metamethod can handle it.
9620/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9621/// suffix on integer-valued floats) — see `num_to_string_for`.
9622fn concat_piece(v: Value, float_fmt: numeric::FloatFmt) -> Option<Vec<u8>> {
9623 match v {
9624 Value::Str(s) => Some(s.as_bytes().to_vec()),
9625 Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9626 Value::Float(x) => Some(numeric::num_to_string_for(Num::Float(x), float_fmt).into_bytes()),
9627 _ => None,
9628 }
9629}
9630
9631/// Index into the per-basic-type metatable table for a non-table value
9632/// (None for tables, which carry their own metatable).
9633fn type_mt_slot(v: Value) -> Option<usize> {
9634 match v {
9635 Value::Nil => Some(0),
9636 Value::Bool(_) => Some(1),
9637 Value::Int(_) | Value::Float(_) => Some(2),
9638 Value::Str(_) => Some(3),
9639 Value::Closure(_) | Value::Native(_) => Some(4),
9640 // tables and full userdata carry their own metatable; threads and
9641 // light userdata have none (PUC keeps a shared per-type mt slot for
9642 // light, but luna doesn't expose it — no test gates on it yet).
9643 Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9644 }
9645}
9646
9647/// Number, or string coerced to number (5.5 default string-arith coercion).
9648fn coerce_num(v: Value) -> Option<Num> {
9649 match v {
9650 Value::Int(i) => Some(Num::Int(i)),
9651 Value::Float(f) => Some(Num::Float(f)),
9652 Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9653 _ => None,
9654 }
9655}
9656
9657/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9658/// reverse direction.
9659fn shift_left(a: i64, b: i64) -> i64 {
9660 if b < 0 {
9661 if b <= -64 {
9662 0
9663 } else {
9664 ((a as u64) >> (-b as u32)) as i64
9665 }
9666 } else if b >= 64 {
9667 0
9668 } else {
9669 ((a as u64) << (b as u32)) as i64
9670 }
9671}
9672
9673/// i < f, exactly (PUC LTintfloat shape).
9674fn int_lt_float(i: i64, f: f64) -> bool {
9675 if f.is_nan() {
9676 return false;
9677 }
9678 if f >= 9_223_372_036_854_775_808.0 {
9679 return true;
9680 }
9681 if f < -9_223_372_036_854_775_808.0 {
9682 return false;
9683 }
9684 let ff = f.floor();
9685 let fi = ff as i64;
9686 if f == ff { i < fi } else { i <= fi }
9687}
9688
9689/// i <= f, exactly.
9690fn int_le_float(i: i64, f: f64) -> bool {
9691 if f.is_nan() {
9692 return false;
9693 }
9694 if f >= 9_223_372_036_854_775_808.0 {
9695 return true;
9696 }
9697 if f < -9_223_372_036_854_775_808.0 {
9698 return false;
9699 }
9700 i <= f.floor() as i64
9701}
9702
9703/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9704/// (clipped limit, loop-is-empty).
9705fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9706 match limit {
9707 Num::Int(l) => {
9708 let empty = if step > 0 { init > l } else { init < l };
9709 (l, empty)
9710 }
9711 Num::Float(f) => {
9712 if f.is_nan() {
9713 return (0, true);
9714 }
9715 if step > 0 {
9716 if f >= 9_223_372_036_854_775_808.0 {
9717 (i64::MAX, false)
9718 } else {
9719 let l = f.floor();
9720 if l < -9_223_372_036_854_775_808.0 {
9721 (i64::MIN, true)
9722 } else {
9723 let li = l as i64;
9724 (li, init > li)
9725 }
9726 }
9727 } else if f <= -9_223_372_036_854_775_808.0 {
9728 (i64::MIN, false)
9729 } else {
9730 let l = f.ceil();
9731 if l >= 9_223_372_036_854_775_808.0 {
9732 // PUC forlimit: a positive limit beyond the integer range
9733 // is unreachable for a decreasing loop — empty.
9734 (i64::MAX, true)
9735 } else {
9736 let li = l as i64;
9737 (li, init < li)
9738 }
9739 }
9740 }
9741 }
9742}
9743
9744/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9745/// `@file` / `=name` markers in `source`).
9746fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9747 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9748 let b = unsafe { crate::runtime::string::bytes_of(p) };
9749 match b.first() {
9750 Some(b'@') | Some(b'=') => &b[1..],
9751 _ => b,
9752 }
9753}
9754
9755impl Vm {
9756 /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9757 /// that called the current native. Returns (closure, current line,
9758 /// extra vararg count).
9759 /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9760 /// function running at `level`, recovered from the caller's call
9761 /// instruction (PUC funcnamefromcode). None for the main chunk or a
9762 /// tail/anonymous call with no recoverable name.
9763 /// A debug-level position: either a real Lua frame (by index) or a synthetic
9764 /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9765 /// coroutine body), which `debug.getinfo` and traceback report as "C".
9766 /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9767 /// frame at `level`'s current pc, as (name, value). Locals are visited in
9768 /// registration order (start pc, then register) to match luaF_getlocalname.
9769 pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9770 if n == 0 {
9771 return None;
9772 }
9773 let fi = match self.dbg_frame(level)? {
9774 DbgKind::Lua(fi) => fi,
9775 // Tail-call placeholder has no real frame backing it — no locals
9776 // exist to read or write here. PUC `findlocal` returns NULL on
9777 // a CIST_TAIL activation.
9778 DbgKind::Tail(_) => return None,
9779 // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9780 // for slot `n` inside the argument window (db.lua :408-:413, and
9781 // the call/return hook reads of math.sin / select args via
9782 // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9783 // meaningful for a C frame here.
9784 DbgKind::C(fi) => {
9785 if n < 1 {
9786 return None;
9787 }
9788 let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9789 if (n as u32) > nargs {
9790 return None;
9791 }
9792 let slot = (func_slot + n as u32) as usize;
9793 let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9794 return Some((self.temporary_locvar_name().to_string(), val));
9795 }
9796 };
9797 let f = self.frames[fi].lua()?;
9798 // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9799 // is the first extra arg passed to the function (`...[1]`), `-2` the
9800 // second, etc. The 5.5 stack layout parks varargs in
9801 // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9802 if n < 0 {
9803 let i = (-n) as u32;
9804 if i == 0 || i > f.n_varargs {
9805 return None;
9806 }
9807 let val = self
9808 .stack
9809 .get((f.func_slot + i) as usize)
9810 .copied()
9811 .unwrap_or(Value::Nil);
9812 return Some((self.vararg_locvar_name().to_string(), val));
9813 }
9814 let proto = f.closure.proto;
9815 // PUC's parser injects a hidden `(vararg table)` locvar for an
9816 // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9817 // right after the fixed parameters (`numparams + 1`). Main chunks
9818 // and `(...t)` named-vararg funcs do NOT get one — gate on the
9819 // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9820 // their declared registers (no shadow slot allocated), so we expose
9821 // that hidden index purely in this debug view.
9822 let num_params = proto.num_params as i64;
9823 let vararg_slot = if proto.has_vararg_table_pseudo {
9824 Some(num_params + 1)
9825 } else {
9826 None
9827 };
9828 if vararg_slot == Some(n) {
9829 return Some(("(vararg table)".to_string(), Value::Nil));
9830 }
9831 let pc = (f.pc as usize).saturating_sub(1);
9832 let mut active: Vec<&crate::runtime::LocVar> = proto
9833 .locvars
9834 .iter()
9835 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9836 .collect();
9837 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9838 let mut idx: i64 = n - 1;
9839 if let Some(vs) = vararg_slot
9840 && n > vs
9841 {
9842 idx -= 1;
9843 }
9844 let idx = idx as usize;
9845 if let Some(lv) = active.get(idx) {
9846 let val = self
9847 .stack
9848 .get((f.base + lv.reg) as usize)
9849 .copied()
9850 .unwrap_or(Value::Nil);
9851 return Some((lv.name.to_string(), val));
9852 }
9853 // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9854 // still inside the frame's live register window — report a
9855 // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9856 // the next frame's func slot (`ci->next->func.p`) so the
9857 // temporary window stops where the callee's frame begins
9858 // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9859 // an out-of-range slot).
9860 let limit = self
9861 .frames
9862 .get(fi + 1)
9863 .and_then(|cf| cf.lua())
9864 .map(|nf| nf.func_slot)
9865 .unwrap_or_else(|| self.top.max(f.base));
9866 let temp_reg = idx as u32;
9867 if f.base + temp_reg < limit {
9868 let val = self
9869 .stack
9870 .get((f.base + temp_reg) as usize)
9871 .copied()
9872 .unwrap_or(Value::Nil);
9873 return Some((self.lua_temporary_locvar_name().to_string(), val));
9874 }
9875 None
9876 }
9877
9878 /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9879 /// the local / vararg name on success, `None` when the slot does not
9880 /// resolve. Mirrors `local_at`'s indexing exactly.
9881 pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9882 if n == 0 {
9883 return None;
9884 }
9885 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9886 return None;
9887 };
9888 let f = self.frames[fi].lua()?;
9889 if n < 0 {
9890 let i = (-n) as u32;
9891 if i == 0 || i > f.n_varargs {
9892 return None;
9893 }
9894 let slot = (f.func_slot + i) as usize;
9895 if let Some(s) = self.stack.get_mut(slot) {
9896 *s = v;
9897 }
9898 return Some(self.vararg_locvar_name().to_string());
9899 }
9900 let proto = f.closure.proto;
9901 let num_params = proto.num_params as i64;
9902 let vararg_slot = if proto.has_vararg_table_pseudo {
9903 Some(num_params + 1)
9904 } else {
9905 None
9906 };
9907 if vararg_slot == Some(n) {
9908 // hidden (vararg table) slot has no real storage — accept the
9909 // write as a no-op for PUC parity (db.lua doesn't write to it).
9910 return Some("(vararg table)".to_string());
9911 }
9912 let pc = (f.pc as usize).saturating_sub(1);
9913 let mut active: Vec<&crate::runtime::LocVar> = proto
9914 .locvars
9915 .iter()
9916 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9917 .collect();
9918 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9919 let mut idx: i64 = n - 1;
9920 if let Some(vs) = vararg_slot
9921 && n > vs
9922 {
9923 idx -= 1;
9924 }
9925 let idx = idx as usize;
9926 let (name, reg) = if let Some(lv) = active.get(idx) {
9927 (lv.name.to_string(), lv.reg)
9928 } else {
9929 // PUC `luaG_findlocal` fallback into the temporary window —
9930 // bounded by the next frame's func slot (see local_at).
9931 let limit = self
9932 .frames
9933 .get(fi + 1)
9934 .and_then(|cf| cf.lua())
9935 .map(|nf| nf.func_slot)
9936 .unwrap_or_else(|| self.top.max(f.base));
9937 let temp_reg = idx as u32;
9938 if f.base + temp_reg >= limit {
9939 return None;
9940 }
9941 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9942 };
9943 let slot = (f.base + reg) as usize;
9944 if let Some(s) = self.stack.get_mut(slot) {
9945 *s = v;
9946 }
9947 Some(name)
9948 }
9949
9950 /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9951 /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9952 /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9953 /// vararg indexing past `n_varargs`, or for a register past the live
9954 /// window. Naming follows the same priority as `local_at`: named locals,
9955 /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9956 /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9957 /// window.
9958 pub(crate) fn local_at_coro(
9959 &self,
9960 co: Gc<crate::runtime::Coro>,
9961 level: i64,
9962 n: i64,
9963 ) -> Option<(String, Value)> {
9964 if level < 1 || n == 0 {
9965 return None;
9966 }
9967 let frames = &co.frames;
9968 // Logical level: iterate Lua frames from the top.
9969 let lua_indices: Vec<usize> = (0..frames.len())
9970 .rev()
9971 .filter(|&i| frames[i].lua().is_some())
9972 .collect();
9973 let fi = *lua_indices.get((level - 1) as usize)?;
9974 let f = frames[fi].lua()?;
9975 if n < 0 {
9976 let i = (-n) as u32;
9977 if i == 0 || i > f.n_varargs {
9978 return None;
9979 }
9980 let val = co
9981 .stack
9982 .get((f.func_slot + i) as usize)
9983 .copied()
9984 .unwrap_or(Value::Nil);
9985 return Some((self.vararg_locvar_name().to_string(), val));
9986 }
9987 let proto = f.closure.proto;
9988 let num_params = proto.num_params as i64;
9989 let vararg_slot = if proto.has_vararg_table_pseudo {
9990 Some(num_params + 1)
9991 } else {
9992 None
9993 };
9994 if vararg_slot == Some(n) {
9995 return Some(("(vararg table)".to_string(), Value::Nil));
9996 }
9997 let pc = (f.pc as usize).saturating_sub(1);
9998 let mut active: Vec<&crate::runtime::LocVar> = proto
9999 .locvars
10000 .iter()
10001 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
10002 .collect();
10003 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
10004 let mut idx: i64 = n - 1;
10005 if let Some(vs) = vararg_slot
10006 && n > vs
10007 {
10008 idx -= 1;
10009 }
10010 let idx = idx as usize;
10011 if let Some(lv) = active.get(idx) {
10012 let val = co
10013 .stack
10014 .get((f.base + lv.reg) as usize)
10015 .copied()
10016 .unwrap_or(Value::Nil);
10017 return Some((lv.name.to_string(), val));
10018 }
10019 let limit = frames
10020 .get(fi + 1)
10021 .and_then(|cf| cf.lua())
10022 .map(|nf| nf.func_slot)
10023 .unwrap_or(co.top.max(f.base));
10024 let temp_reg = idx as u32;
10025 if f.base + temp_reg < limit {
10026 let val = co
10027 .stack
10028 .get((f.base + temp_reg) as usize)
10029 .copied()
10030 .unwrap_or(Value::Nil);
10031 return Some((self.lua_temporary_locvar_name().to_string(), val));
10032 }
10033 None
10034 }
10035
10036 /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
10037 /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
10038 pub(crate) fn local_set_coro(
10039 &mut self,
10040 co: Gc<crate::runtime::Coro>,
10041 level: i64,
10042 n: i64,
10043 v: Value,
10044 ) -> Option<String> {
10045 if level < 1 || n == 0 {
10046 return None;
10047 }
10048 let lua_indices: Vec<usize> = (0..co.frames.len())
10049 .rev()
10050 .filter(|&i| co.frames[i].lua().is_some())
10051 .collect();
10052 let fi = *lua_indices.get((level - 1) as usize)?;
10053 let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
10054 let f = co.frames[fi].lua()?;
10055 (
10056 f.func_slot,
10057 f.n_varargs,
10058 f.base,
10059 f.closure.proto,
10060 co.top.max(f.base),
10061 co.frames
10062 .get(fi + 1)
10063 .and_then(|cf| cf.lua())
10064 .map(|nf| nf.func_slot),
10065 )
10066 };
10067 if n < 0 {
10068 let i = (-n) as u32;
10069 if i == 0 || i > n_varargs {
10070 return None;
10071 }
10072 let slot = (func_slot + i) as usize;
10073 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10074 let stack = unsafe { &mut co.as_mut().stack };
10075 if let Some(s) = stack.get_mut(slot) {
10076 *s = v;
10077 }
10078 // co.stack values are traced — once-per-call barrier so propagate
10079 // sees the new value if co was already BLACK this cycle.
10080 self.heap
10081 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
10082 return Some(self.vararg_locvar_name().to_string());
10083 }
10084 let num_params = proto.num_params as i64;
10085 let vararg_slot = if proto.has_vararg_table_pseudo {
10086 Some(num_params + 1)
10087 } else {
10088 None
10089 };
10090 if vararg_slot == Some(n) {
10091 return Some("(vararg table)".to_string());
10092 }
10093 let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
10094 let mut active: Vec<&crate::runtime::LocVar> = proto
10095 .locvars
10096 .iter()
10097 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
10098 .collect();
10099 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
10100 let mut idx: i64 = n - 1;
10101 if let Some(vs) = vararg_slot
10102 && n > vs
10103 {
10104 idx -= 1;
10105 }
10106 let idx = idx as usize;
10107 let (name, reg) = if let Some(lv) = active.get(idx) {
10108 (lv.name.to_string(), lv.reg)
10109 } else {
10110 let limit = next_func_slot.unwrap_or(top_for_temp);
10111 let temp_reg = idx as u32;
10112 if base + temp_reg >= limit {
10113 return None;
10114 }
10115 (self.lua_temporary_locvar_name().to_string(), temp_reg)
10116 };
10117 let slot = (base + reg) as usize;
10118 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10119 let stack = unsafe { &mut co.as_mut().stack };
10120 if let Some(s) = stack.get_mut(slot) {
10121 *s = v;
10122 }
10123 // co.stack values are traced — once-per-call barrier so propagate
10124 // sees the new value if co was already BLACK this cycle.
10125 self.heap
10126 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
10127 Some(name)
10128 }
10129
10130 /// Frame info for a level on a suspended coroutine (PUC
10131 /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
10132 /// Returns the closure + currentline + extraargs + istailcall for the
10133 /// level-th Lua activation in `co.frames`. None if level overshoots.
10134 pub(crate) fn coro_frame_info(
10135 &self,
10136 co: Gc<crate::runtime::Coro>,
10137 level: i64,
10138 ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
10139 if level < 1 {
10140 return None;
10141 }
10142 let lua_indices: Vec<usize> = (0..co.frames.len())
10143 .rev()
10144 .filter(|&i| co.frames[i].lua().is_some())
10145 .collect();
10146 let fi = *lua_indices.get((level - 1) as usize)?;
10147 let f = co.frames[fi].lua()?;
10148 let proto = f.closure.proto;
10149 let pc = (f.pc as usize)
10150 .saturating_sub(1)
10151 .min(proto.lines.len().saturating_sub(1));
10152 let line = proto.lines.get(pc).copied().unwrap_or(0);
10153 Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
10154 }
10155
10156 /// Whether `level` resolves to any live activation (PUC lua_getstack).
10157 pub(crate) fn level_in_range(&self, level: i64) -> bool {
10158 self.dbg_frame(level).is_some()
10159 }
10160
10161 /// PUC's debug-API placeholder for an unnamed vararg slot returned by
10162 /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
10163 /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
10164 /// 5.3 :195 / 5.4 :286 baseline on their respective form.
10165 pub(crate) fn vararg_locvar_name(&self) -> &'static str {
10166 if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
10167 "(*vararg)"
10168 } else {
10169 "(vararg)"
10170 }
10171 }
10172
10173 /// PUC's debug-API placeholder for an unnamed temporary on a C
10174 /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
10175 /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
10176 /// their spelling.
10177 pub(crate) fn temporary_locvar_name(&self) -> &'static str {
10178 if matches!(
10179 self.version,
10180 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
10181 ) {
10182 // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
10183 // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
10184 // to `(C temporary)`.
10185 "(*temporary)"
10186 } else {
10187 "(C temporary)"
10188 }
10189 }
10190
10191 /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
10192 /// (an arithmetic intermediate sitting past the last named local on a
10193 /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
10194 /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
10195 /// spelling.
10196 pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
10197 if matches!(
10198 self.version,
10199 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
10200 ) {
10201 "(*temporary)"
10202 } else {
10203 "(temporary)"
10204 }
10205 }
10206
10207 /// The Lua closure running at `level` on the current thread, or `None`
10208 /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
10209 /// need this to reach the function whose env they read or rewrite.
10210 pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
10211 // `DbgKind::Tail` also falls into the else branch — a tail-call
10212 // placeholder has no closure of its own, so PUC's `lua_getstack` +
10213 // `getfunc` for that level returns no function, and `getfenv(level)`
10214 // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
10215 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
10216 return None;
10217 };
10218 Some(self.frames[fi].lua()?.closure)
10219 }
10220
10221 pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
10222 if level < 1 {
10223 return false;
10224 }
10225 let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
10226 (level as usize) <= count
10227 }
10228
10229 pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
10230 if level < 1 {
10231 return None;
10232 }
10233 // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
10234 // activation counts as a level, and each Lua activation's
10235 // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
10236 // dropped the synthetic shape: `istailcall` becomes a flag on the
10237 // real frame and Cont activations no longer count separately.
10238 // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
10239 // `getinfo(2).func == g1` pins the 5.2+ shape.
10240 let v51 = self.version <= LuaVersion::Lua51;
10241 let mut lvl = level;
10242 for fi in (0..self.frames.len()).rev() {
10243 match &self.frames[fi] {
10244 CallFrame::Lua(f) => {
10245 lvl -= 1;
10246 if lvl == 0 {
10247 return Some(DbgKind::Lua(fi));
10248 }
10249 if v51 {
10250 // 5.1 reports one synthetic CIST_TAIL level per
10251 // collapsed tail call (PUC `lua_getstack` subtracts
10252 // `ci->u.l.tailcalls` from the remaining level).
10253 for _ in 0..f.tailcalls {
10254 lvl -= 1;
10255 if lvl == 0 {
10256 return Some(DbgKind::Tail(fi));
10257 }
10258 }
10259 }
10260 if f.from_c {
10261 lvl -= 1;
10262 if lvl == 0 {
10263 return Some(DbgKind::C(fi));
10264 }
10265 }
10266 }
10267 CallFrame::Cont(_) => {
10268 if !v51 {
10269 continue;
10270 }
10271 lvl -= 1;
10272 if lvl == 0 {
10273 let parent = (0..fi)
10274 .rev()
10275 .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
10276 return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
10277 }
10278 }
10279 }
10280 }
10281 None
10282 }
10283
10284 pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10285 let f = self.frames[fi].lua()?;
10286 // metamethod handler frames carry the event tag (e.g. "close" for
10287 // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
10288 if f.is_hook {
10289 return Some(("hook", "?".to_string()));
10290 }
10291 if let Some(tm) = f.tm {
10292 return Some(("metamethod", tm_debug_name(self.version, tm)));
10293 }
10294 // a frame entered across a C boundary has no naming call instruction
10295 if fi == 0 || f.from_c {
10296 return None;
10297 }
10298 // the caller's call instruction names this frame; a continuation frame
10299 // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
10300 // already short-circuits those.
10301 let caller = self.frames[fi - 1].lua()?;
10302 let caller_proto = caller.closure.proto;
10303 let p: &crate::runtime::Proto = &caller_proto;
10304 let call_pc = (caller.pc as usize).checked_sub(1)?;
10305 let instr = *p.code.get(call_pc)?;
10306 match instr.op() {
10307 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10308 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10309 _ => None,
10310 }
10311 }
10312
10313 /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
10314 /// (PUC names a C function from the call instruction that invoked it). The
10315 /// native was called by the nearest Lua frame below `fi` (skipping pcall/
10316 /// xpcall continuations); that frame's call instruction names it.
10317 pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10318 // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
10319 // the synthetic C edge between the __gc finalizer (top Lua frame, has
10320 // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
10321 // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
10322 // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
10323 // sets the tag on the handler frame only, so level 2 there still
10324 // names the calling Lua frame's call instruction (5.5 locals.lua
10325 // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
10326 if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
10327 && fr.tm == Some("gc")
10328 {
10329 let name = tm_debug_name(self.version, "gc");
10330 return Some(("metamethod", name));
10331 }
10332 let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
10333 let caller = self.frames[caller_fi].lua()?;
10334 let p = &caller.closure.proto;
10335 let call_pc = (caller.pc as usize).checked_sub(1)?;
10336 let instr = *p.code.get(call_pc)?;
10337 match instr.op() {
10338 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10339 _ => None,
10340 }
10341 }
10342
10343 /// Native value currently sitting on the synthetic C edge identified by
10344 /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
10345 /// above `fi` (each one corresponds to one native pushing the hook) and
10346 /// indexes into `running_natives` from the top, also skipping the caller
10347 /// of `getinfo` itself (the native that is currently asking).
10348 /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10349 /// expects the just-entered C function.
10350 pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10351 let idx = self.c_frame_native_idx(fi)?;
10352 Some(Value::Native(self.running_natives[idx]))
10353 }
10354
10355 /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10356 /// so `local_at` can index the native's argument window like PUC's
10357 /// `(C temporary)` path. Returns `None` when no matching native exists
10358 /// (e.g. the C edge corresponds to a non-native boundary).
10359 pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10360 let idx = self.c_frame_native_idx(fi)?;
10361 self.running_native_slots.get(idx).copied()
10362 }
10363
10364 fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10365 let n_above = self.frames[fi..]
10366 .iter()
10367 .filter_map(CallFrame::lua)
10368 .filter(|f| f.from_c)
10369 .count();
10370 if n_above == 0 {
10371 return None;
10372 }
10373 // running_natives.last() is the native currently executing (the one
10374 // that called getinfo). Pop it conceptually, then take the n_above-th
10375 // entry from the top of what remains.
10376 let nr = self.running_natives.len().checked_sub(1)?;
10377 nr.checked_sub(n_above)
10378 }
10379
10380 /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10381 /// native whose function pointer matches `target`, and return its qualified
10382 /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10383 /// `None` if no match is found. Used by `arg_error` when the running native
10384 /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10385 /// True when the innermost call frame is a pcall/xpcall
10386 /// continuation — i.e. the currently-running native was invoked
10387 /// DIRECTLY by pcall/xpcall rather than by Lua code. PUC's
10388 /// luaL_argerror sees ar.name == NULL there (the caller is C)
10389 /// and qualifies the name via pushglobalfuncname — so
10390 /// `pcall(coroutine.resume, 42)` blames 'coroutine.resume'
10391 /// (v2.14 fixture 5.5/365).
10392 pub(crate) fn caller_is_protected_cont(&self) -> bool {
10393 matches!(
10394 self.frames.last(),
10395 Some(CallFrame::Cont(nc))
10396 if matches!(nc.kind, ContKind::Pcall | ContKind::Xpcall { .. })
10397 )
10398 }
10399
10400 pub(crate) fn pushglobalfuncname(
10401 &mut self,
10402 target: crate::runtime::value::NativeFn,
10403 ) -> Option<String> {
10404 let pkg_k = Value::Str(self.heap.intern(b"package"));
10405 let pkg = match self.globals().get(pkg_k) {
10406 Value::Table(t) => t,
10407 _ => return None,
10408 };
10409 let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10410 let loaded = match pkg.get(loaded_k) {
10411 Value::Table(t) => t,
10412 _ => return None,
10413 };
10414 let matches = |v: Value| -> bool {
10415 matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10416 };
10417 let mut k = Value::Nil;
10418 while let Ok(Some((nk, nv))) = loaded.next(k) {
10419 k = nk;
10420 let Value::Str(outer) = nk else { continue };
10421 let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10422 if matches(nv) {
10423 return Some(if outer == "_G" { String::new() } else { outer });
10424 }
10425 if let Value::Table(inner_t) = nv {
10426 let mut k2 = Value::Nil;
10427 while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10428 k2 = nk2;
10429 if matches(nv2)
10430 && let Value::Str(inner) = nk2
10431 {
10432 let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10433 return Some(if outer == "_G" {
10434 inner
10435 } else {
10436 format!("{outer}.{inner}")
10437 });
10438 }
10439 }
10440 }
10441 }
10442 None
10443 }
10444
10445 /// Name and namewhat of the native currently running on behalf of the top
10446 /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10447 /// `luaL_argerror` rewrite a method call's self-argument error.
10448 pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10449 let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10450 let p = &caller.closure.proto;
10451 let call_pc = (caller.pc as usize).checked_sub(1)?;
10452 let instr = *p.code.get(call_pc)?;
10453 match instr.op() {
10454 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10455 _ => None,
10456 }
10457 }
10458
10459 pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10460 let f = self.frames[fi].lua().expect("Lua frame");
10461 let proto = f.closure.proto;
10462 let pc = (f.pc as usize)
10463 .saturating_sub(1)
10464 .min(proto.lines.len().saturating_sub(1));
10465 let line = proto.lines.get(pc).copied().unwrap_or(0);
10466 // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10467 // (independent of any later write to a materialized vararg table's `n`).
10468 // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10469 // any nonzero `tailcalls` count flips it true.
10470 (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10471 }
10472
10473 /// Read an upvalue cell of a closure (debug.getupvalue).
10474 pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10475 match cl.upvals()[idx].state() {
10476 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10477 UpvalState::Closed(v) => v,
10478 }
10479 }
10480
10481 /// Write an upvalue cell of a closure (debug.setupvalue).
10482 pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10483 let uv = cl.upvals()[idx];
10484 match uv.state() {
10485 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10486 UpvalState::Closed(_) => {
10487 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10488 unsafe { uv.as_mut() }.set_closed(v);
10489 self.heap
10490 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10491 }
10492 }
10493 }
10494
10495 /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10496 /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10497 /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10498 /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10499 /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10500 /// else `"function <src:line_defined>"` for an anonymous Lua function.
10501 /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10502 /// Walks the coroutine's saved frames and prepends a synthetic C-level
10503 /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10504 /// (its `resume_at` marker is set). `level` skips entries from the top
10505 /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10506 /// frame; etc.). db.lua :764-:768 sample several levels.
10507 pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10508 use crate::runtime::CoroStatus;
10509 const LEVELS1: usize = 10;
10510 const LEVELS2: usize = 11;
10511 #[derive(Clone, Copy)]
10512 enum VFrame<'a> {
10513 Lua(&'a crate::runtime::function::Frame),
10514 CPcall,
10515 CXpcall,
10516 CYield,
10517 /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10518 /// call collapsed into the next Lua frame down the chain.
10519 Tail,
10520 }
10521 let v51 = self.version <= LuaVersion::Lua51;
10522 let mut visible: Vec<VFrame<'_>> = Vec::new();
10523 // PUC's level 0 entry on a suspended coroutine is the C call where it
10524 // paused — `coroutine.yield` for a yielded thread.
10525 if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10526 visible.push(VFrame::CYield);
10527 }
10528 for cf in co.frames.iter().rev() {
10529 match cf {
10530 CallFrame::Lua(f) => {
10531 visible.push(VFrame::Lua(f));
10532 if v51 {
10533 for _ in 0..f.tailcalls {
10534 visible.push(VFrame::Tail);
10535 }
10536 }
10537 }
10538 CallFrame::Cont(nc) => match nc.kind {
10539 ContKind::Pcall => visible.push(VFrame::CPcall),
10540 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10541 _ => {}
10542 },
10543 }
10544 }
10545 if level < 0 {
10546 level = 0;
10547 }
10548 if (level as usize) >= visible.len() {
10549 return Vec::new();
10550 }
10551 let visible = &visible[level as usize..];
10552 let total = visible.len();
10553 let mut out = Vec::new();
10554 // To name a Lua frame, PUC consults the caller's OP_CALL via
10555 // getobjname: find the index `fi` of the current frame in co.frames,
10556 // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10557 let coro_frame_name = |frames: &[CallFrame],
10558 target: &crate::runtime::function::Frame|
10559 -> Option<(&'static str, String)> {
10560 let fi = frames
10561 .iter()
10562 .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10563 if fi == 0 || target.from_c {
10564 return None;
10565 }
10566 let caller = frames[fi - 1].lua()?;
10567 let p = &caller.closure.proto;
10568 let call_pc = (caller.pc as usize).checked_sub(1)?;
10569 let instr = *p.code.get(call_pc)?;
10570 match instr.op() {
10571 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10572 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10573 _ => None,
10574 }
10575 };
10576 let frames = &co.frames;
10577 let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10578 VFrame::Lua(f) => {
10579 let proto = f.closure.proto;
10580 let src = chunk_display_name(proto.source.as_ptr());
10581 let pc = (f.pc as usize)
10582 .saturating_sub(1)
10583 .min(proto.lines.len().saturating_sub(1));
10584 let line = proto.lines.get(pc).copied().unwrap_or(0);
10585 out.extend_from_slice(b"\n\t");
10586 out.extend_from_slice(src);
10587 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10588 if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10589 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10590 } else if proto.line_defined == 0 {
10591 out.extend_from_slice(b"main chunk");
10592 } else {
10593 out.extend_from_slice(
10594 format!(
10595 "function <{}:{}>",
10596 String::from_utf8_lossy(src),
10597 proto.line_defined
10598 )
10599 .as_bytes(),
10600 );
10601 }
10602 }
10603 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10604 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10605 VFrame::CYield => {
10606 // PUC `pushglobalfuncname` reports `yield` as
10607 // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10608 // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10609 // `'yield'` (5.5 :841).
10610 let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10611 if qualified {
10612 out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10613 } else {
10614 out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10615 }
10616 }
10617 VFrame::Tail => {
10618 // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10619 // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10620 // :403 asserts these appear once per collapsed tail call.
10621 out.extend_from_slice(b"\n\t(...tail calls...)");
10622 }
10623 };
10624 if total <= LEVELS1 + LEVELS2 {
10625 for &v in visible {
10626 emit(&mut out, v);
10627 }
10628 } else {
10629 for &v in &visible[..LEVELS1] {
10630 emit(&mut out, v);
10631 }
10632 let skip = total - LEVELS1 - LEVELS2;
10633 out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10634 for &v in &visible[total - LEVELS2..] {
10635 emit(&mut out, v);
10636 }
10637 }
10638 out
10639 }
10640
10641 pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10642 // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10643 // (11) bottom frames; if there are more, the middle is collapsed into
10644 // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10645 // overflow traceback would balloon to tens of megabytes (errors.lua's
10646 // stack-overflow test ran string.gmatch over the resulting buffer).
10647 const LEVELS1: usize = 10;
10648 const LEVELS2: usize = 11;
10649 // Collect visible frames in top-down order (deepest first). Both Lua
10650 // activations and pcall/xpcall continuations (which stand in for a
10651 // C-level pcall on the stack) are visible; PUC's traceback enumerates
10652 // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10653 #[derive(Clone, Copy)]
10654 enum VFrame {
10655 Lua(usize),
10656 CPcall,
10657 CXpcall,
10658 }
10659 let mut visible: Vec<VFrame> = Vec::new();
10660 for (fi, cf) in self.frames.iter().enumerate().rev() {
10661 match cf {
10662 CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10663 CallFrame::Cont(nc) => match nc.kind {
10664 ContKind::Pcall => visible.push(VFrame::CPcall),
10665 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10666 _ => {}
10667 },
10668 }
10669 }
10670 // PUC `luaL_traceback` starts enumerating at the given `level` (in
10671 // terms of L1's CallInfo chain). For the running-thread case the C
10672 // frame for debug.traceback itself is level 0 and luna's `visible`
10673 // doesn't include it — so level=1 (PUC default) means "emit from the
10674 // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10675 // the top. level<=0 emits nothing extra here (d_traceback handles the
10676 // "[C]: in function 'traceback'" prefix for level==0 separately).
10677 let skip = (level - 1).max(0) as usize;
10678 if skip >= visible.len() {
10679 return Vec::new();
10680 }
10681 let visible = &visible[skip..];
10682 let total = visible.len();
10683 let mut out = Vec::new();
10684 let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10685 VFrame::Lua(fi) => {
10686 let f = this.frames[fi].lua().expect("Lua frame");
10687 let proto = f.closure.proto;
10688 let src = chunk_display_name(proto.source.as_ptr());
10689 let pc = (f.pc as usize)
10690 .saturating_sub(1)
10691 .min(proto.lines.len().saturating_sub(1));
10692 let line = proto.lines.get(pc).copied().unwrap_or(0);
10693 out.extend_from_slice(b"\n\t");
10694 out.extend_from_slice(src);
10695 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10696 if let Some((namewhat, name)) = this.frame_name(fi) {
10697 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10698 } else if proto.line_defined == 0 {
10699 out.extend_from_slice(b"main chunk");
10700 } else {
10701 out.extend_from_slice(
10702 format!(
10703 "function <{}:{}>",
10704 String::from_utf8_lossy(src),
10705 proto.line_defined
10706 )
10707 .as_bytes(),
10708 );
10709 }
10710 }
10711 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10712 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10713 };
10714 if total <= LEVELS1 + LEVELS2 {
10715 for &v in visible {
10716 emit_frame(&mut out, v, self);
10717 }
10718 } else {
10719 for &v in &visible[..LEVELS1] {
10720 emit_frame(&mut out, v, self);
10721 }
10722 let dropped = total - LEVELS1 - LEVELS2;
10723 out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10724 for &v in &visible[total - LEVELS2..] {
10725 emit_frame(&mut out, v, self);
10726 }
10727 }
10728 out
10729 }
10730}
10731
10732// ────────────────────────────────────────────────────────────────────
10733// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10734//
10735// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10736// trace-meta section after `vm.load`, resolves each entry's
10737// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10738// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10739// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10740// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10741// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10742// mcode without further plumbing — same code path the runtime JIT
10743// uses.
10744//
10745// Why a separate impl block: keeps the AOT API surface (one fn) easy
10746// to locate when grep'ing for `install_aot_trace`, without dragging
10747// the 8500-line `impl Vm` block above.
10748// ────────────────────────────────────────────────────────────────────
10749
10750impl Vm {
10751 /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10752 /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10753 /// fires it at the trace's `head_pc`. This is the runtime install
10754 /// API the deploy-side `luna-runtime-helpers` resolver calls once
10755 /// per AOT-emitted trace meta entry, after looking up `proto` by
10756 /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10757 ///
10758 /// # What this does
10759 ///
10760 /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10761 /// The trace's `entry` fn ptr must already point at runnable
10762 /// machine code (the AOT linker resolved the symbol at link time;
10763 /// the deploy resolver passes the address verbatim).
10764 ///
10765 /// # What this does NOT do
10766 ///
10767 /// - **No deduplication.** Calling twice with the same `head_pc`
10768 /// pushes two entries; the dispatcher's `find` will pick the
10769 /// first match. The deploy resolver is responsible for not
10770 /// double-installing.
10771 /// - **No invalidation of the runtime JIT cache.** If the runtime
10772 /// JIT later records + compiles a trace for the same
10773 /// `(proto, head_pc)`, both coexist on `proto.traces` and the
10774 /// dispatcher's `find` picks whichever appears first. AOT
10775 /// traces install before any runtime recording is possible
10776 /// (resolver runs before `vm.load` returns its first closure),
10777 /// so AOT traces win the race for the same site.
10778 /// - **No coverage gating.** AOT traces are trusted by
10779 /// construction — they were validated at compile time. Setting
10780 /// `dispatchable: false` on the input would silently disable
10781 /// dispatch; the caller controls that flag.
10782 ///
10783 /// # Safety / soundness
10784 ///
10785 /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10786 /// machine code). Soundness contract:
10787 ///
10788 /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10789 /// In the AOT-binary deploy shape this is trivially satisfied —
10790 /// the fn lives in the binary's `.text`.
10791 /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10792 /// what the trace's IR actually compiled against; the dispatcher
10793 /// uses them to marshal `reg_state` in and out without further
10794 /// validation. A mismatch corrupts vm.stack.
10795 ///
10796 /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10797 /// invariants hold; this fn is a plain push — no validation that
10798 /// would slow the dispatcher's hot path either.
10799 pub fn install_aot_trace(
10800 &mut self,
10801 proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10802 trace: crate::jit::trace::CompiledTrace,
10803 ) {
10804 let _ = self; // resolver passes &mut Vm for symmetry with future
10805 // pending-install + hash-walk variants; nothing on `self` to
10806 // mutate today because the install target lives on the Proto.
10807 proto.traces.borrow_mut().push(TArc::new(trace));
10808 }
10809
10810 /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10811 /// reachable from `root` and return `(proto, stable_hash)` pairs
10812 /// for every Proto found. Used by the deploy-side resolver to
10813 /// match AOT-emitted `proto_hash` keys against the freshly
10814 /// `undump`'d chunk's protos.
10815 ///
10816 /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10817 /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10818 /// multiple nested closures (rare; the cache field would catch
10819 /// the closure-side dedup, not the Proto side) is reported once.
10820 ///
10821 /// # Why on `&Vm` and not a free fn
10822 ///
10823 /// Keeps the AOT install API discoverable on the Vm surface —
10824 /// `vm.collect_proto_hashes(root)` reads naturally next to
10825 /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10826 /// any Vm field, so `&self` (read-only) is enough.
10827 pub fn collect_proto_hashes(
10828 &self,
10829 root: crate::runtime::Gc<crate::runtime::function::Proto>,
10830 ) -> Vec<(
10831 crate::runtime::Gc<crate::runtime::function::Proto>,
10832 [u8; 16],
10833 )> {
10834 let _ = self;
10835 let mut out = Vec::new();
10836 let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10837 std::collections::HashSet::new();
10838 let mut queue: std::collections::VecDeque<
10839 crate::runtime::Gc<crate::runtime::function::Proto>,
10840 > = std::collections::VecDeque::new();
10841 queue.push_back(root);
10842 while let Some(p) = queue.pop_front() {
10843 let key = p.as_ptr() as *const _;
10844 if !seen.insert(key) {
10845 continue;
10846 }
10847 out.push((p, p.stable_hash()));
10848 for &child in p.protos.iter() {
10849 queue.push_back(child);
10850 }
10851 }
10852 out
10853 }
10854}