luna_core/vm/exec.rs
1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16 AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17 MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48 /// The GC heap owned by this VM. Embedders normally interact via the
49 /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50 /// the heap directly.
51 pub heap: Heap,
52 stack: Vec<Value>,
53 frames: Vec<CallFrame>,
54 /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55 /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56 /// helpers (debug-asserted on use). NOT consumed by readers yet;
57 /// week 1 is pure scaffold. Week 2-N migrations replace readers
58 /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59 /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60 frames_top: u32,
61 /// open upvalues, sorted ascending by stack slot
62 open_upvals: Vec<(u32, Gc<Upvalue>)>,
63 /// to-be-closed slots, ascending
64 tbc: Vec<u32>,
65 /// logical stack top for multi-result sequences
66 pub(crate) top: u32,
67 globals: Gc<Table>,
68 /// shared metatable for all strings (populated by the string lib, P04)
69 /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70 /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71 /// own. Settable via debug.setmetatable.
72 type_mt: [Option<Gc<Table>>; 5],
73 /// pre-interned metamethod event names, indexed by `Mm`
74 mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75 /// native↔Lua nesting depth (PUC C-stack guard analogue)
76 c_depth: u32,
77 /// number of live pcall/xpcall continuation frames on the running thread
78 /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79 /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80 /// with the coroutine context, since continuations survive a yield.
81 pcall_depth: u32,
82 /// number of non-yieldable C calls in flight on the running thread (PUC's
83 /// `L->nny`). A library callback that runs via synchronous Rust recursion
84 /// (sort comparator, gsub replacement) cannot be continued across a yield,
85 /// so it bumps this for its duration; `coroutine.yield` inside hits the
86 /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87 /// never cross such a call), so it needs no per-thread save/restore.
88 nny: u32,
89 /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90 /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91 /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92 /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93 /// loop)` then matches. PUC tracks this via the soft-cap window
94 /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95 /// scope explicitly.
96 msgh_depth: u32,
97 /// set by a coroutine closing itself (`coroutine.close()` on the running
98 /// thread): the to-be-closed handlers have already run; the thread must now
99 /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100 /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101 /// unwind, so a protecting pcall cannot catch it) the termination.
102 terminating: Option<Option<Value>>,
103 /// xoshiro256** state (math.random)
104 rng: [u64; 4],
105 /// VM creation time (os.clock)
106 started: std::time::Instant,
107 version: LuaVersion,
108 /// error object being threaded through a chain of __close handlers; a GC
109 /// root for the duration (a handler may trigger collection)
110 closing_err: Option<Value>,
111 /// the coroutine whose context is currently live in the fields above;
112 /// `None` while the main thread runs (P05)
113 current: Option<Gc<crate::runtime::Coro>>,
114 /// the main thread's saved execution context while a coroutine runs
115 main_ctx: Option<SavedCtx>,
116 /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117 /// values plus the slot/result-count needed to finish the yielding call on
118 /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119 yielding: Option<(Vec<Value>, u32, i32)>,
120 /// results expected by the in-flight native call (so `yield` knows how many
121 /// values its call site wants when it suspends)
122 native_nresults: i32,
123 /// identity object for the main thread, returned by `coroutine.running`
124 /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125 main_coro: Option<Gc<Coro>>,
126 /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127 /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128 /// switches report the previous one, as PUC does.
129 gc_mode: &'static str,
130 /// the live-register boundary of the running thread for GC rooting (PUC's
131 /// `L->top`): set precisely at each GC safe point so freed temporary
132 /// registers above it are not rooted. Without this the collector roots the
133 /// whole stack window, pinning weak-table values stranded in stale temps
134 /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135 pub(crate) gc_top: u32,
136 /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137 /// is still stop-the-world, so these are stored/returned for API fidelity
138 /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139 /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140 gc_pause: i64,
141 gc_stepmul: i64,
142 gc_stepsize: i64,
143 /// true while `__gc` finalizers are being run, so a finalizer that calls
144 /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145 /// `collectgarbage` yields fail).
146 gc_finalizing: bool,
147 /// C ABI scratch (`capi` module): the host-visible value stack that C
148 /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149 /// Kept here (instead of in a separate `LuaState` wrapper) so the
150 /// trampoline that bridges to a `LuaCFunction` can safely cast the
151 /// Vm pointer it already holds to the public `*mut LuaState` type
152 /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153 pub capi_stack: Vec<crate::runtime::Value>,
154 /// Pinned CString backing the pointer last returned by `lua_tostring`;
155 /// valid until the next `lua_tostring` on the same Vm.
156 pub capi_cstr_pin: Option<std::ffi::CString>,
157 /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158 /// concatenate across continuation calls until a non-`tocont` call
159 /// flushes; the default warnf recognises `@on`/`@off` control messages
160 /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161 /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162 /// keep the older raise semantics).
163 pub(crate) warn_state: WarnState,
164 pub(crate) warn_buf: Vec<u8>,
165 /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166 /// loop decrements once per dispatch turn. When it hits zero the loop
167 /// raises a catchable "instruction budget exceeded" error so the embedder
168 /// can yield control back to its caller (short-script eval, game
169 /// frame budgets). `None` = unbounded; reset on each call via
170 /// `set_instr_budget`.
171 pub(crate) instr_budget: Option<i64>,
172 // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173 // `self.jit` below + `crate::vm::jit_state` for field docs.
174 // (Was: jit_enabled here.)
175 // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176 // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177 // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178 // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179 // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180 // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181 // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182 // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183 // materialize_emit,closure_emit}_count — all moved to JitState.
184 /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185 /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186 /// precompiled chunks (which bypass the parser's depth / opcode
187 /// limits). When `false`, the loader rejects any source whose first
188 /// byte is the bytecode signature `\27` ("`\27Lua`").
189 pub(crate) bytecode_loading: bool,
190 /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191 /// a strictly larger trust surface than luna's own dump format
192 /// (third-party toolchain bugs, malformed chunks, unknown opcode
193 /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194 /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195 /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196 /// Embedder toggles via `set_puc_bytecode_loading`.
197 pub(crate) puc_bytecode_loading: bool,
198 /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199 /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200 /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201 /// source exceeds this, the loader returns the PUC-shaped
202 /// `not enough memory` error before the host allocator is asked to
203 /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204 /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205 /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206 /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207 /// Embedders that genuinely need to load > 256 MiB sources widen the
208 /// cap via [`Vm::set_loader_input_budget`].
209 pub(crate) loader_input_budget: usize,
210 /// In-process log of fully-emitted warnings (each entry = one flushed
211 /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212 /// tests assert what was warned without scraping stderr.
213 pub(crate) warn_log: Vec<Vec<u8>>,
214 /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215 /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216 /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217 /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218 /// shape stub for db.lua :328; if other registry-keyed APIs land later
219 /// they can share this table.
220 pub(crate) registry: Option<Gc<Table>>,
221 /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222 /// registry entry); attached to every file userdata the io library makes
223 pub(crate) file_mt: Option<Gc<Table>>,
224 /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225 pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226 pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227 /// the running thread's debug hook state (`debug.sethook`); per-thread,
228 /// swapped with the execution context on a coroutine resume/yield
229 pub(crate) hook: HookState,
230 /// true while the hook itself runs, so its own execution fires no events
231 /// (PUC clears the mask for the duration)
232 pub(crate) in_hook: bool,
233 /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234 /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235 /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236 /// accumulate the count instead of capping at 1.
237 pub(crate) pending_tailcalls: u32,
238 /// Name of the C native that just propagated an error (captured before
239 /// the native is popped from `running_natives`). Lets a dying coroutine
240 /// preserve `[C]: in function '<name>'` at the top of its traceback
241 /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242 /// luna's native frames are off-stack so we stash the name explicitly.
243 pub(crate) errored_native: Option<String>,
244 /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245 /// (relative to the activation's func slot) and the number transferred.
246 /// Set just before firing a call/return hook, read by `getinfo("r")`.
247 pub(crate) hook_ftransfer: u16,
248 pub(crate) hook_ntransfer: u16,
249 /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250 /// pushed by `push_frame`; `close_slots` sets this before calling a
251 /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252 /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253 pending_tm: Option<&'static str>,
254 /// `true` when the next `push_frame` is the user hook function itself,
255 /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256 /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257 pending_is_hook: bool,
258 /// traceback snapshot taken at the error point (the first `unwind` entry
259 /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260 /// the failed frames are popped — can still see the error point's stack
261 /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262 /// stack intact; we approximate by snapshotting the string and letting
263 /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264 /// `call_value` entry (`public_call_depth == 0`).
265 pub(crate) error_traceback: Option<Vec<u8>>,
266 /// nesting depth of public `call_value` entries (host vs. internal). The
267 /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268 /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269 public_call_depth: u32,
270 /// stack of native (`Value::Native`) closures currently running on the
271 /// Rust call stack. `begin_call` pushes the closure before invoking
272 /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273 /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274 /// caller is C, not Lua) and qualify the running function's name via
275 /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276 pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277 /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278 /// the native's argument-window head and width, so `debug.getlocal`
279 /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280 pub(crate) running_native_slots: Vec<(u32, u32)>,
281 // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282 // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283 // trace_compiler — all moved to JitState. See `jit` below.
284 /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285 /// when `chunk_compiler` / `trace_compiler` are
286 /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287 ///
288 /// `#[doc(hidden)] pub` so the `luna` crate's
289 /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290 /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291 /// field). Not part of the embedder-facing API surface.
292 #[doc(hidden)]
293 pub jit: crate::vm::jit_state::JitState,
294
295 /// B12 host roots — append-only `Vec<Value>` traced as an extra
296 /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297 /// `LuaRoot`) hold indices into this vector so the underlying
298 /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299 ///
300 /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301 /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302 /// the trade-offs between `Drop` plumbing and append-only memory
303 /// growth have a richer ergonomics envelope to live in.
304 pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305 /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306 /// back if non-empty, else extends `host_roots`. Generation
307 /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308 pub(crate) host_roots_free: Vec<u32>,
309
310 /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311 /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312 /// callback). Each entry is one in-flight working buffer; `gc_roots`
313 /// extends with every contained `Value` so a `collectgarbage()`
314 /// inside the comparator cannot free strings/tables snapshotted
315 /// here. Nested sorts push a new buffer on entry, pop on exit
316 /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317 /// regression).
318 pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320 /// v1.3 Phase ML — MacroLua compile-time macro registry.
321 /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322 /// `@if` / `@gensym`) at construction time when `version ==
323 /// LuaVersion::MacroLua`; embedders register custom macros via
324 /// [`Vm::define_macro`]. The expander runs once per `load()` call
325 /// between lexing and parsing (only when `is_macro_lua()`).
326 pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328 /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329 /// by `TypeId::of::<T>()` for embedder types implementing
330 /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331 /// [`Vm::register_userdata`]; metatables are pinned via
332 /// [`Vm::pin_host`] at registration time so the entry's
333 /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334 pub(crate) userdata_metatables:
335 std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337 /// B6 — classification of the most recent error raised on this Vm.
338 /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339 /// at well-known sites (syntax errors, instr-budget trips, native
340 /// callback errors, type errors).
341 pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343 /// B6 — `(source_name, line)` of the most recent error. Set by the
344 /// dispatcher / lexer / parser; cleared when a new call_value
345 /// enters cleanly.
346 pub(crate) last_error_source: Option<(String, u32)>,
347
348 /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349 /// the dispatcher hot loop yields cooperatively (sets
350 /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351 /// to `EvalFuture::poll`) instead of returning a real
352 /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353 /// for the duration of the future; restored to `false` on
354 /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355 /// leave it `false` so v1.0 behavior is preserved exactly.
356 pub(crate) async_mode: bool,
357
358 /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359 /// before driving a slice. The dispatcher itself does not call it
360 /// (the future's poll loop does `wake_by_ref` after observing
361 /// `BudgetExhausted`), but storing the waker keeps the door open
362 /// for Stage 2 async natives to wake the host directly from a
363 /// helper future.
364 pub(crate) async_waker: Option<std::task::Waker>,
365
366 /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367 /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368 /// Default 10_000 (RFC §D5). Tunable via
369 /// [`Vm::set_async_slice`].
370 pub(crate) async_slice_size: i64,
371
372 /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373 /// budget exhaustion fires; checked by `exec_with` (so the
374 /// sentinel propagates without `unwind` running, mirroring
375 /// `yielding.is_some()`) and by `call_value_impl` (so the call
376 /// frames survive for the next poll). Cleared by `drive_one`
377 /// after translating it to `DispatchOutcome::BudgetExhausted`.
378 pub(crate) host_yield_pending: bool,
379
380 /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381 /// when an async-marked [`NativeClosure`] is invoked under
382 /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383 /// mechanism as `host_yield_pending` — see `exec_with` +
384 /// `call_value_impl`), stashes the in-flight future +
385 /// post-completion context here, and surfaces them to
386 /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387 /// once the future is moved out into a
388 /// `DispatchOutcome::AsyncNativeAwaiting`.
389 pub(crate) pending_async_native_fut:
390 Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392 /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393 /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394 /// commit the future's eventual `Ok(nret)` back into the calling
395 /// frame's expected result slots. Recorded by the dispatcher;
396 /// consumed by [`Vm::commit_async_native_result`] after the
397 /// future resolves.
398 pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411 pub func_slot: u32,
412 /// Recorded for parity with the sync native-call path's
413 /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414 /// hook firing + traceback shaping. Not yet read in Stage 2.
415 #[allow(dead_code)]
416 pub nargs: u32,
417 pub nresults: i32,
418 /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419 /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420 /// unread today; carried so an Stage 3 audit can confirm the
421 /// pre-suspend root window matches the post-resume one.
422 #[allow(dead_code)]
423 pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430 /// the hook function (`None` when no hook is installed)
431 pub func: Option<Value>,
432 /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433 /// (Rust first); both can be installed simultaneously, but most
434 /// embedders pick one.
435 pub rust_func: Option<RustDebugHook>,
436 /// LUA_MASKCALL — fire on function entry
437 pub call: bool,
438 /// LUA_MASKRET — fire on function return
439 pub ret: bool,
440 /// LUA_MASKLINE — fire on source-line change
441 pub line: bool,
442 /// LUA_MASKCOUNT — fire every `count_base` instructions
443 pub count: bool,
444 /// instruction count between count events (PUC basehookcount)
445 pub count_base: i64,
446 /// instructions left until the next count event (PUC hookcount)
447 pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459 /// Function entry (`hook_call` analogue).
460 Call,
461 /// Function return (`hook_return` analogue).
462 Return,
463 /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464 TailCall,
465 /// Source-line change (the `u32` is the 1-based line number).
466 Line(u32),
467 /// Instruction count event (fires every `count_base` instructions).
468 Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483 stack: Vec<Value>,
484 frames: Vec<CallFrame>,
485 open_upvals: Vec<(u32, Gc<Upvalue>)>,
486 tbc: Vec<u32>,
487 top: u32,
488 pcall_depth: u32,
489 hook: HookState,
490 /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491 /// the rest of the suspended state so each thread can keep its own
492 /// `setfenv(0, env)` rewire without the swap leaking into another
493 /// thread (5.1 closure.lua :177).
494 globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499 /// caught by a pcall/xpcall continuation; resume running its caller
500 Caught,
501 /// caught by a continuation that was the entry-level activation; these are
502 /// the call's (wrapped) results
503 CaughtReturn(Vec<Value>),
504 /// no protecting continuation up to `entry_depth`; propagate the error
505 Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511 Lua(usize),
512 /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513 /// used to name the native via its invoking call instruction.
514 C(usize),
515 /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516 /// caller's activation, so `debug.getinfo(level)` at this slot returns
517 /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518 /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519 /// both shapes). The index points at the *tail-called* frame whose
520 /// `is_tail` flag induced this synthetic level.
521 Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528 /// index → the looked-up value; newindex → done (raw set performed);
529 /// comparison → the boolean result already known
530 Done(Value),
531 /// a metamethod to call; `recv` is the chain element it was found on (the
532 /// extra args — key / value — are supplied by the caller)
533 Mm { func: Value, recv: Value },
534 /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535 /// carries `__le` — `op_compare` swaps the args and negates the result.
536 /// Lives separate from `Mm` so the synth path can stay yieldable without
537 /// every other Mm caller learning a swap flag they would never set.
538 CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545 Index,
546 NewIndex,
547 Call,
548 ToString,
549 Metatable,
550 Name,
551 Eq,
552 Lt,
553 Le,
554 Concat,
555 Len,
556 Add,
557 Sub,
558 Mul,
559 Div,
560 Mod,
561 Pow,
562 IDiv,
563 BAnd,
564 BOr,
565 BXor,
566 Shl,
567 Shr,
568 Unm,
569 BNot,
570 Close,
571 Gc,
572 Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576 "__index",
577 "__newindex",
578 "__call",
579 "__tostring",
580 "__metatable",
581 "__name",
582 "__eq",
583 "__lt",
584 "__le",
585 "__concat",
586 "__len",
587 "__add",
588 "__sub",
589 "__mul",
590 "__div",
591 "__mod",
592 "__pow",
593 "__idiv",
594 "__band",
595 "__bor",
596 "__bxor",
597 "__shl",
598 "__shr",
599 "__unm",
600 "__bnot",
601 "__close",
602 "__gc",
603 "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614 if version <= LuaVersion::Lua53 {
615 format!("__{tm}")
616 } else if tm == "gc" {
617 "__gc".to_string()
618 } else {
619 tm.to_string()
620 }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626 use crate::vm::isa::Op;
627 Some(match op {
628 Op::Add => "add",
629 Op::Sub => "sub",
630 Op::Mul => "mul",
631 Op::Div => "div",
632 Op::Mod => "mod",
633 Op::Pow => "pow",
634 Op::IDiv => "idiv",
635 Op::BAnd => "band",
636 Op::BOr => "bor",
637 Op::BXor => "bxor",
638 Op::Shl => "shl",
639 Op::Shr => "shr",
640 Op::Unm => "unm",
641 Op::BNot => "bnot",
642 Op::Concat => "concat",
643 Op::Len => "len",
644 Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645 Op::SetField | Op::SetTable | Op::SetI => "newindex",
646 Op::Eq | Op::EqK => "eq",
647 Op::Lt => "lt",
648 Op::Le => "le",
649 _ => return None,
650 })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678 /// `warn` calls are silently dropped (default after `warn("@off")`).
679 Off,
680 /// `warn` calls are delivered to stderr (after `warn("@on")`).
681 On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689 if let Some(s) = payload.downcast_ref::<String>() {
690 return s.clone();
691 }
692 if let Some(s) = payload.downcast_ref::<&'static str>() {
693 return (*s).to_string();
694 }
695 "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702 /// Parse or compile failure.
703 Syntax(SyntaxError),
704 /// Runtime error raised during execution.
705 Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709 fn from(e: SyntaxError) -> Error {
710 Error::Syntax(e)
711 }
712}
713
714impl From<LuaError> for Error {
715 fn from(e: LuaError) -> Error {
716 Error::Runtime(e)
717 }
718}
719
720impl Drop for Vm {
721 fn drop(&mut self) {
722 // state close: run `__gc` for every still-registered finalizable before
723 // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724 // single pass — objects created by a closing finalizer are not
725 // re-finalized (they go to the heap's free list directly).
726 self.heap.queue_all_finalizers();
727 self.run_finalizers();
728 }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742 frames.push(cf);
743 // Shadow maintenance is debug-only: release builds skip the
744 // increment + assertion entirely. The shadow's purpose in Week 1
745 // is to VERIFY the assumed invariant (frames_top == frames.len())
746 // across all push/pop sites; once Week 2+ migrates readers to
747 // consume the shadow, release will run the increment unconditionally.
748 #[cfg(debug_assertions)]
749 {
750 *frames_top += 1;
751 debug_assert_eq!(
752 *frames_top as usize,
753 frames.len(),
754 "P17-D frames_top out of sync after push",
755 );
756 }
757 #[cfg(not(debug_assertions))]
758 let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763 let r = frames.pop();
764 #[cfg(debug_assertions)]
765 {
766 if r.is_some() {
767 *frames_top = frames_top.saturating_sub(1);
768 }
769 debug_assert_eq!(
770 *frames_top as usize,
771 frames.len(),
772 "P17-D frames_top out of sync after pop",
773 );
774 }
775 #[cfg(not(debug_assertions))]
776 let _ = frames_top;
777 r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786 static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787 *PROBE_ON.get_or_init(|| {
788 std::env::var("LUNA_AOT_PROBE")
789 .ok()
790 .filter(|v| !v.is_empty())
791 .is_some()
792 })
793}
794
795impl Vm {
796 /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797 /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798 /// the Vec replacement to keep the shadow valid.
799 #[inline(always)]
800 fn frames_resync(&mut self) {
801 // Debug-only Week 1 — see `frames_push_sync` comment.
802 #[cfg(debug_assertions)]
803 {
804 self.frames_top = self.frames.len() as u32;
805 }
806 }
807
808 // ====================================================================
809 // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810 //
811 // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812 // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813 // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814 // sites to consume them. Phase 4 removes Vec<CallFrame>.
815 //
816 // Preconditions (debug-asserted):
817 // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818 // - self.stack.len() > base + max_stack (caller has grown stack)
819 // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820 // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821 //
822 // No release-build cost when unused (LTO strips dead methods).
823 // ====================================================================
824
825 /// Write a Lua frame's closure pointer into `stack[base-2]`.
826 /// The caller must ensure `base >= 2` and the slot is within the
827 /// stack's allocated range.
828 #[inline]
829 #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830 fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831 debug_assert!(
832 base >= 2,
833 "frame closure slot needs base >= 2; got {}",
834 base
835 );
836 let idx = (base - 2) as usize;
837 debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838 self.stack[idx] = Value::Closure(cl);
839 }
840
841 /// Read a Lua frame's closure pointer from `stack[base-2]`.
842 /// Returns `None` if the slot doesn't hold a closure (caller is
843 /// expected to treat that as a corrupt frame).
844 ///
845 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846 /// to avoid the enum-match cost on the hot path. Tag check via
847 /// 1-byte load + branch + `as_closure_unchecked` payload load.
848 #[inline]
849 #[allow(dead_code)]
850 fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851 debug_assert!(base >= 2);
852 let v = self.stack.get((base - 2) as usize)?;
853 if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854 // SAFETY: tag byte just verified == CLOSURE.
855 Some(unsafe { v.as_closure_unchecked() })
856 } else {
857 None
858 }
859 }
860
861 /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862 /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863 /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864 /// cleanly through the value stack without losing bits.
865 #[inline]
866 #[allow(dead_code)]
867 fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868 debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869 let idx = (base - 1) as usize;
870 debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871 self.stack[idx] = Value::Int(marker.to_raw());
872 }
873
874 /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875 /// `None` if the slot isn't a `Value::Int` (caller treats as a
876 /// corrupt frame); the kind tag itself may still be invalid, in
877 /// which case [`FrameMarker::kind`] returns `None` on the result.
878 ///
879 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880 /// for the tag check + `as_int_unchecked` for the payload load.
881 #[inline]
882 #[allow(dead_code)]
883 fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884 debug_assert!(base >= 1);
885 let v = self.stack.get((base - 1) as usize)?;
886 if v.tag_byte() == crate::runtime::value::tag::INT {
887 // SAFETY: tag byte just verified == INT.
888 Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889 unsafe { v.as_int_unchecked() },
890 ))
891 } else {
892 None
893 }
894 }
895
896 /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897 /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898 /// caller is responsible for the rest of the bring-up.
899 fn new_inner(version: LuaVersion) -> Vm {
900 let mut heap = Heap::new();
901 // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902 // values strongly. gc.lua's "weak tables" section relies on that.
903 heap.no_ephemeron = version <= LuaVersion::Lua51;
904 // PUC 5.3 needs two GC cycles to finalize a table caught in a
905 // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906 // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907 heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908 let globals = heap.new_table();
909 let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911 Vm {
912 heap,
913 stack: Vec::new(),
914 frames: Vec::new(),
915 frames_top: 0,
916 open_upvals: Vec::new(),
917 tbc: Vec::new(),
918 top: 0,
919 globals,
920 type_mt: [None; 5],
921 mm_names,
922 c_depth: 0,
923 pcall_depth: 0,
924 nny: 0,
925 msgh_depth: 0,
926 terminating: None,
927 rng: [0; 4],
928 started: std::time::Instant::now(),
929 version,
930 closing_err: None,
931 current: None,
932 main_ctx: None,
933 yielding: None,
934 native_nresults: -1,
935 main_coro: None,
936 gc_mode: "incremental",
937 gc_top: 0,
938 gc_pause: 200,
939 gc_stepmul: 100,
940 gc_stepsize: 13,
941 gc_finalizing: false,
942 capi_stack: Vec::new(),
943 capi_cstr_pin: None,
944 warn_state: WarnState::Off,
945 warn_buf: Vec::new(),
946 warn_log: Vec::new(),
947 instr_budget: None,
948 bytecode_loading: true,
949 puc_bytecode_loading: false,
950 loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951 registry: None,
952 file_mt: None,
953 io_input: None,
954 io_output: None,
955 hook: HookState::default(),
956 in_hook: false,
957 pending_tailcalls: 0,
958 errored_native: None,
959 hook_ftransfer: 0,
960 hook_ntransfer: 0,
961 pending_tm: None,
962 pending_is_hook: false,
963 error_traceback: None,
964 public_call_depth: 0,
965 running_natives: Vec::new(),
966 running_native_slots: Vec::new(),
967 // v1.1 A2 — JIT-specific state factored into `JitState`
968 // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969 // `install_jit_backend` / `luaL_newstate` swap in
970 // `CraneliftBackend` for callers that want JIT acceleration.
971 jit: crate::vm::jit_state::JitState::with_null_backend(),
972 // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973 host_roots: Vec::new(),
974 // v1.3 Phase ML — MacroLua registry. Pre-populated with
975 // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976 // when this Vm is constructed under `LuaVersion::MacroLua`.
977 macro_registry: if version == LuaVersion::MacroLua {
978 crate::frontend::macro_expander::MacroRegistry::with_builtins()
979 } else {
980 crate::frontend::macro_expander::MacroRegistry::new()
981 },
982 host_roots_free: Vec::new(),
983 sort_scratch: Vec::new(),
984 // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985 // metatable cache. Populated lazily by register_userdata.
986 userdata_metatables: std::collections::HashMap::new(),
987 // v1.1 B6 — error classification metadata. Defaults to
988 // Runtime; set at known sites (syntax / budget trip /
989 // native error / type error).
990 last_error_kind: crate::vm::error::LuaErrorKind::default(),
991 last_error_source: None,
992 // v1.1 B10 Stage 1 — async embedder fields. Defaults
993 // preserve sync behavior bit-for-bit (`async_mode = false`
994 // means the budget hot loop errors out exactly as v1.0).
995 async_mode: false,
996 async_waker: None,
997 async_slice_size: 10_000,
998 host_yield_pending: false,
999 // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000 // default; populated only by the dispatcher when an
1001 // async-marked NativeClosure is invoked under async_mode.
1002 pending_async_native_fut: None,
1003 pending_async_native_ctx: None,
1004 }
1005 }
1006
1007 /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008 /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009 /// followed by `vm.open_all_libs()`.
1010 pub fn new(version: LuaVersion) -> Vm {
1011 let mut vm = Vm::new_minimal(version);
1012 vm.open_all_libs();
1013 vm
1014 }
1015
1016 /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017 /// that want a sandbox (Redis-style scripts, in-game scripting with
1018 /// a curated API) call this and then `open_base` / `open_math` / etc.
1019 /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020 /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021 pub fn new_minimal(version: LuaVersion) -> Vm {
1022 let mut vm = Vm::new_inner(version);
1023 let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025 unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026 vm.main_coro = Some(mc);
1027 let (a, b) = vm.rng_auto_seed();
1028 vm.rng_seed(a as u64, b as u64);
1029 vm
1030 }
1031
1032 /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033 /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034 /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035 /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036 /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037 /// does NOT evict those cached entries — call right after
1038 /// construction for a clean swap.
1039 ///
1040 /// Naming: `install_jit_backend` (not `install_default_jit`)
1041 /// because the "default" in luna-core is `NullJitBackend`; the
1042 /// "default JIT" lives in the `luna` crate.
1043 pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044 where
1045 C: crate::jit::IntChunkCompiler + 'static,
1046 T: crate::jit::TraceCompiler + 'static,
1047 {
1048 self.jit.chunk_compiler = Box::new(chunk);
1049 self.jit.trace_compiler = Box::new(trace);
1050 }
1051
1052 /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053 /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054 /// the `luna_jit` crate's `install_default_jit` pairs this with
1055 /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056 /// also install a fresh `CraneliftJitStorage`. Storage holds
1057 /// the per-`Vm` JIT cache + handle collections that used to be
1058 /// `thread_local!`s in `luna_jit::jit_backend`.
1059 ///
1060 /// Idempotency: re-installing storage on a Vm that already
1061 /// holds compiled-trace pointers WILL evict their owners (the
1062 /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063 /// pages). Call right after construction for a clean swap.
1064 pub fn install_jit_storage<S>(&mut self, storage: S)
1065 where
1066 S: crate::jit::JitStorage + 'static,
1067 {
1068 self.jit.storage = Box::new(storage);
1069 }
1070
1071 /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072 /// reports "skipped" so every closure stays on the interpreter
1073 /// path, and the trace recorder's compile attempt always returns
1074 /// `None`. Intended for tests that want to verify the trait
1075 /// boundary works in a JIT-free configuration, and for the future
1076 /// `luna-core` build path that ships without Cranelift.
1077 ///
1078 /// Calling this on a Vm whose closures already populated
1079 /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080 /// cached entries — the dispatcher will still call into them. For
1081 /// a truly JIT-free run, call this immediately after construction.
1082 pub fn install_null_jit(&mut self) {
1083 self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084 self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085 }
1086
1087 /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088 /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089 /// time instead (`open_base`, `open_math`, `open_table`, …).
1090 pub fn open_all_libs(&mut self) {
1091 self.open_base();
1092 self.open_math();
1093 self.open_table();
1094 self.open_string();
1095 self.open_utf8();
1096 self.open_os_io();
1097 self.open_debug();
1098 self.open_coroutine();
1099 self.open_package();
1100 // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101 // operators replace it on 64-bit integers). Only expose it under 5.2
1102 // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103 // leaking the global into newer dialects.
1104 if self.version == LuaVersion::Lua52 {
1105 self.open_bit32();
1106 }
1107 }
1108
1109 /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110 /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111 /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112 /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113 /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114 /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115 /// once per Vm.
1116 pub fn open_base(&mut self) {
1117 crate::vm::builtins::open_base(self);
1118 }
1119 /// Install the `math` standard library.
1120 pub fn open_math(&mut self) {
1121 crate::vm::lib_math::open_math(self);
1122 }
1123 /// Install the `table` standard library.
1124 pub fn open_table(&mut self) {
1125 crate::vm::lib_table::open_table(self);
1126 }
1127 /// Install the `string` standard library (and the shared string metatable).
1128 pub fn open_string(&mut self) {
1129 crate::vm::lib_string::open_string(self);
1130 }
1131 /// Install the `utf8` standard library (5.3+).
1132 pub fn open_utf8(&mut self) {
1133 crate::vm::lib_utf8::open_utf8(self);
1134 }
1135 /// `os` and `io` are merged because file userdata shares state with both
1136 /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137 /// wraps `os.execute`'s shell).
1138 pub fn open_os_io(&mut self) {
1139 crate::vm::lib_os_io::open_os_io(self);
1140 }
1141 /// Install the `debug` standard library (introspection / hooks). Off by
1142 /// default for sandbox embedders.
1143 pub fn open_debug(&mut self) {
1144 crate::vm::lib_debug::open_debug(self);
1145 }
1146 /// Install the `coroutine` standard library.
1147 pub fn open_coroutine(&mut self) {
1148 crate::vm::lib_coroutine::open_coroutine(self);
1149 }
1150 /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151 pub fn open_package(&mut self) {
1152 crate::vm::lib_os_io::open_package(self);
1153 }
1154 /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155 /// ops on 64-bit integers).
1156 pub fn open_bit32(&mut self) {
1157 crate::vm::lib_bit32::open_bit32(self);
1158 }
1159
1160 /// xoshiro256** next.
1161 pub(crate) fn rng_next(&mut self) -> u64 {
1162 let s = &mut self.rng;
1163 let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164 let t = s[1] << 17;
1165 s[2] ^= s[0];
1166 s[3] ^= s[1];
1167 s[1] ^= s[2];
1168 s[0] ^= s[3];
1169 s[2] ^= t;
1170 s[3] = s[3].rotate_left(45);
1171 result
1172 }
1173
1174 /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175 pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176 // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177 // state), then 16 discards to spread the seed. Matches PUC's exact
1178 // sequence so the low-level conformance test passes.
1179 self.rng = [a, 0xff, b, 0];
1180 for _ in 0..16 {
1181 self.rng_next();
1182 }
1183 }
1184
1185 /// Wall-clock since VM creation (os.clock approximation).
1186 pub(crate) fn uptime(&self) -> std::time::Duration {
1187 self.started.elapsed()
1188 }
1189
1190 /// Entropy for math.randomseed() with no arguments.
1191 pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192 let t = std::time::SystemTime::now()
1193 .duration_since(std::time::UNIX_EPOCH)
1194 .map(|d| d.as_nanos() as u64)
1195 .unwrap_or(0);
1196 let addr = &self.rng as *const _ as u64;
1197 (t as i64, addr as i64)
1198 }
1199
1200 /// Allocate a native function object (no upvalues): builtin registration.
1201 pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202 Value::Native(self.heap.new_native(f, Box::new([])))
1203 }
1204
1205 /// Allocate a native function object with captured upvalues.
1206 pub fn native_with(
1207 &mut self,
1208 f: crate::runtime::value::NativeFn,
1209 upvals: Box<[Value]>,
1210 ) -> Value {
1211 Value::Native(self.heap.new_native(f, upvals))
1212 }
1213
1214 /// Install the shared string metatable (string library, P04).
1215 pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216 self.type_mt[3] = mt;
1217 }
1218
1219 /// The current globals table (`_G` / `_ENV` source for new chunks).
1220 pub fn globals(&self) -> Gc<Table> {
1221 self.globals
1222 }
1223
1224 /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225 /// Library code that pushes a known number of fresh slots — e.g.
1226 /// `table.unpack` returning N values — consults this to refuse when
1227 /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228 /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229 /// coroutine's already-built table eats a few slots, so an unpack of
1230 /// ~lim values can't fit.
1231 pub(crate) fn stack_room(&self) -> i64 {
1232 PUC_MAXSTACK - (self.stack.len() as i64)
1233 }
1234
1235 /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236 /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237 /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238 /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239 pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240 self.globals = env;
1241 }
1242
1243 /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244 /// 5.5). Determines numeric semantics, available standard libraries, and
1245 /// metamethod behavior.
1246 pub fn version(&self) -> LuaVersion {
1247 self.version
1248 }
1249
1250 /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251 /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252 /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253 /// `Gc<NativeClosure>` handle.
1254 ///
1255 /// Returns `Err(LuaError)` only if the globals table overflows
1256 /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257 /// String interning + key construction cannot fail.
1258 ///
1259 /// ```
1260 /// # use luna_core::vm::Vm;
1261 /// # use luna_core::version::LuaVersion;
1262 /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263 /// vm.set_global("answer", 42).unwrap();
1264 /// vm.set_global("ratio", 0.5_f64).unwrap();
1265 /// vm.set_global("hello", "world").unwrap();
1266 /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267 /// assert_eq!(r.len(), 3);
1268 /// ```
1269 pub fn set_global<V: crate::vm::IntoValue>(
1270 &mut self,
1271 name: &str,
1272 v: V,
1273 ) -> Result<(), LuaError> {
1274 let v = v.into_value(self);
1275 let k = Value::Str(self.heap.intern(name.as_bytes()));
1276 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277 unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278 self.heap
1279 .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280 Ok(())
1281 }
1282
1283 /// Backward write barrier shorthand for native lib code: demote `t` from
1284 /// BLACK back to gray so the next propagate step re-traces its fields.
1285 /// No-op outside Propagate (parent is never BLACK at mutation time).
1286 pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287 self.heap
1288 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289 }
1290
1291 /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292 /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293 /// No-op outside Propagate.
1294 pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295 self.heap
1296 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297 }
1298
1299 /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300 /// under non-MacroLua dialects (the macro is stored but the load
1301 /// path only consults the registry when
1302 /// `self.version == LuaVersion::MacroLua`).
1303 ///
1304 /// `name` is stored without the leading `@` — source code writes
1305 /// `@double(x)` to invoke a macro registered as `"double"`.
1306 pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307 self.macro_registry.register(name, m);
1308 }
1309
1310 /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311 /// Mostly useful for tests / dogfood resets.
1312 pub fn clear_macros(&mut self) {
1313 self.macro_registry.clear();
1314 }
1315
1316 /// Parse + compile a chunk and close it over the globals table.
1317 pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318 // Reject oversize input *before* handing the parser/lexer a
1319 // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320 // message keeps `heavy.lua::loadrep` compatibility: that test
1321 // accepts either `string length overflow` or `not enough memory`
1322 // as the failure mode for a feeder loop that outruns the host
1323 // allocator. See `set_loader_input_budget`.
1324 if src.len() > self.loader_input_budget {
1325 return Err(SyntaxError {
1326 line: 0,
1327 msg: b"not enough memory".to_vec(),
1328 });
1329 }
1330 // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331 let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332 if is_bytecode && !self.bytecode_loading {
1333 return Err(SyntaxError {
1334 line: 0,
1335 msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336 });
1337 }
1338 let proto = if is_bytecode {
1339 let allow_puc = self.puc_bytecode_loading;
1340 crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341 |msg| SyntaxError {
1342 line: 0,
1343 msg: msg.into_bytes(),
1344 },
1345 )?
1346 } else if self.version.is_macro_lua() {
1347 // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348 // token vec, run the macro expander pre-pass against the
1349 // per-Vm registry, then hand the rewritten stream to
1350 // `parse_tokens`. The AST + compiler are dialect-agnostic
1351 // because by this point all `@`/quote tokens are gone.
1352 let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353 let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354 loop {
1355 let t = lexer.next_token()?;
1356 let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357 raw.push(t);
1358 if eof {
1359 break;
1360 }
1361 }
1362 // Drop the trailing Eof — expander operates on the body and
1363 // `parse_tokens` reinserts Eof when it runs out of tokens.
1364 raw.pop();
1365 let expanded = self.macro_registry.expand(raw)?;
1366 let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368 } else {
1369 let ast = parse(src, self.version)?;
1370 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371 };
1372 // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373 // upvalue with the globals table when the closure has *exactly* one
1374 // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375 // function with two-or-more upvalues keeps every cell at nil; the
1376 // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377 // :293's `assert(x() == nil)` pins this contract.
1378 let n = proto.upvals.len();
1379 let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380 if n == 0 {
1381 // synthetic main chunk has no declared upvalues, but the engine
1382 // still expects at least one cell so the host can probe via
1383 // `debug.upvalueid` etc. Match the historical luna shape.
1384 ups.push(
1385 self.heap
1386 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387 );
1388 } else if n == 1 {
1389 ups.push(
1390 self.heap
1391 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392 );
1393 } else {
1394 for _ in 0..n {
1395 ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396 }
1397 }
1398 Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399 }
1400
1401 /// Compile and run `src` as an anonymous chunk; return its results.
1402 /// Source name in the traceback is `"=eval"`. Syntax errors are
1403 /// surfaced as `LuaError` carrying the formatted PUC-style message
1404 /// (interned through the heap so the error value composes with
1405 /// `pcall` / `error_text` like any runtime error).
1406 pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407 self.eval_chunk(src, "=eval")
1408 }
1409
1410 /// Render an error value for messages/tests. Non-string errors —
1411 /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412 /// (`"(error object is a table value)"`); embedders that need
1413 /// structured payloads should inspect `e.0` directly. Errors whose
1414 /// text starts with `"native panic:"` indicate a Rust panic
1415 /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416 /// be dropped (do not reuse).
1417 pub fn error_text(&self, e: &LuaError) -> String {
1418 match e.0 {
1419 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420 v => format!("(error object is a {} value)", v.type_name()),
1421 }
1422 }
1423
1424 /// Call any callable value from the host (or from natives like pcall).
1425 pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426 // host-level entry (no enclosing exec): drop any error state from a
1427 // prior call that propagated uncaught (`error_traceback` would
1428 // otherwise leak into the next debug.traceback call).
1429 if self.public_call_depth == 0 {
1430 self.error_traceback = None;
1431 }
1432 self.public_call_depth += 1;
1433 // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434 // chunk whose body fits the S1 int-arith whitelist short-circuits
1435 // the whole interpreter dispatch and runs straight through the
1436 // mmap'd native code. The lookup is one Cell::get + one match —
1437 // the slow path (compile attempt on first reach) is paid once per
1438 // Proto.
1439 if args.is_empty()
1440 && let Value::Closure(cl) = f
1441 && let Some(vs) = self.try_jit_call(cl)
1442 {
1443 self.public_call_depth -= 1;
1444 return Ok(vs);
1445 }
1446 let r = self.call_value_impl(f, args, true);
1447 self.public_call_depth -= 1;
1448 r
1449 }
1450
1451 /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452 /// `Some(values)` when the cached native fn is callable for a
1453 /// zero-arg call. (Non-zero-arg dispatch is handled by
1454 /// `try_jit_call_op` from inside `begin_call`.)
1455 fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456 use crate::runtime::function::JitProtoState;
1457 if !self.jit.enabled {
1458 return None;
1459 }
1460 let proto = cl.proto;
1461 if let JitProtoState::Untried = proto.jit.get() {
1462 self.populate_jit_cache(proto);
1463 }
1464 match proto.jit.get() {
1465 JitProtoState::Compiled {
1466 entry,
1467 num_args: 0,
1468 returns_one,
1469 arg_float_mask: _,
1470 arg_table_mask: _,
1471 ret_is_float,
1472 ret_is_table,
1473 } => {
1474 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475 let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476 // P11-S5c / S5d.J — install the active Vm + closure
1477 // for any Rust helper the JIT'd code may call (e.g.
1478 // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479 // cranelift `Linkage::Import`. RAII clear on return.
1480 // Chunks with no upvalue reads don't touch the closure
1481 // slot, paying nothing.
1482 // v1.1 A1 Session A — route through chunk_compiler so
1483 // the NullJitBackend path stays inert. Raw-ptr arg
1484 // avoids the &mut self borrow conflict against the
1485 // shared self.jit.chunk_compiler read.
1486 let vm_ptr: *mut Vm = self;
1487 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489 let r = unsafe { f() };
1490 drop(_jit_vm_guard);
1491 // P11-S5d.E' — a JIT helper may have detected a metatable
1492 // on a table operand and parked a deopt request here.
1493 // Discard the sentinel value and return None so the caller
1494 // re-runs the call through the interpreter, which honours
1495 // __index/__newindex.
1496 if self.jit.pending_err.take().is_some() {
1497 return None;
1498 }
1499 Some(if returns_one {
1500 let v = if ret_is_float {
1501 Value::Float(f64::from_bits(r as u64))
1502 } else if ret_is_table {
1503 Value::Table(crate::runtime::Gc::from_ptr(
1504 r as *mut crate::runtime::Table,
1505 ))
1506 } else {
1507 Value::Int(r)
1508 };
1509 vec![v]
1510 } else {
1511 Vec::new()
1512 })
1513 }
1514 // Non-zero-arg Compiled state: call_value's empty-args
1515 // fast path can't drive it. Op::Call handles those.
1516 JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517 }
1518 }
1519
1520 /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521 /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522 /// states (call sites guard with a get before invoking).
1523 ///
1524 /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525 /// `proto.code`. Compiled artefacts live in the thread-local
1526 /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527 /// `Vm`s loading the same source skip the cranelift compile step
1528 /// entirely.
1529 fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530 use crate::runtime::function::JitProtoState;
1531 let version = self.version();
1532 let pre53 = version <= crate::version::LuaVersion::Lua53;
1533 // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534 // are Float). The JIT's `GetUpval` ValueRead path uses this
1535 // to default-pin upvalue reads to Float without a tag check.
1536 let float_only = version <= crate::version::LuaVersion::Lua52;
1537 // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538 // trait method can take `&mut dyn JitStorage` without
1539 // double-borrowing self.jit.
1540 let jit = &mut self.jit;
1541 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542 match jit
1543 .chunk_compiler
1544 .try_compile(storage, proto, pre53, float_only)
1545 {
1546 crate::jit::CompileResult::Compiled {
1547 entry,
1548 num_args,
1549 returns_one,
1550 arg_float_mask,
1551 arg_table_mask,
1552 ret_is_float,
1553 ret_is_table,
1554 } => {
1555 proto.jit.set(JitProtoState::Compiled {
1556 entry,
1557 num_args,
1558 returns_one,
1559 arg_float_mask,
1560 arg_table_mask,
1561 ret_is_float,
1562 ret_is_table,
1563 });
1564 }
1565 crate::jit::CompileResult::Skipped => {
1566 proto.jit.set(JitProtoState::Failed);
1567 }
1568 }
1569 }
1570
1571 /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572 /// before `push_frame`. Returns `true` when the call was handled
1573 /// in-place (no new Lua frame). Constraints: every arg slot must
1574 /// be `Value::Int`, the cached arity must match the call site's
1575 /// `nargs`, the host wanted-count `wanted` is honoured by
1576 /// `finish_results`. Also bails when a debug hook is armed —
1577 /// JIT'd code does not fire line / call / return hooks, so any
1578 /// active hook makes the interpreter the source of truth.
1579 fn try_jit_call_op(
1580 &mut self,
1581 cl: Gc<LuaClosure>,
1582 func_slot: u32,
1583 nargs: u32,
1584 wanted: i32,
1585 ) -> bool {
1586 use crate::runtime::function::JitProtoState;
1587 if !self.jit.enabled {
1588 return false;
1589 }
1590 // Any active debug hook means the interpreter has to run the
1591 // call so the hook gets the expected events.
1592 if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593 return false;
1594 }
1595 let proto = cl.proto;
1596 if let JitProtoState::Untried = proto.jit.get() {
1597 self.populate_jit_cache(proto);
1598 }
1599 let JitProtoState::Compiled {
1600 entry,
1601 num_args,
1602 returns_one,
1603 arg_float_mask,
1604 arg_table_mask,
1605 ret_is_float,
1606 ret_is_table,
1607 } = proto.jit.get()
1608 else {
1609 return false;
1610 };
1611 if num_args as u32 != nargs {
1612 return false;
1613 }
1614 // Pack args into i64 bit-patterns per the per-slot expected
1615 // kind. A Float-typed slot accepts Value::Float verbatim and
1616 // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617 // accepts only Value::Table and passes the raw Gc ptr; an
1618 // Int-typed slot accepts only Value::Int. Any other shape
1619 // bails to the interpreter so the call's actual dynamics
1620 // (metamethod dispatch / type-coerce) take over.
1621 let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622 [0; crate::jit::MAX_JIT_ARITY as usize];
1623 for i in 0..num_args as usize {
1624 let v = self.stack[(func_slot + 1) as usize + i];
1625 let want_float = (arg_float_mask >> i) & 1 == 1;
1626 let want_table = (arg_table_mask >> i) & 1 == 1;
1627 args[i] = match (want_table, want_float, v) {
1628 (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629 (false, false, Value::Int(x)) => x,
1630 (false, true, Value::Float(f)) => f.to_bits() as i64,
1631 (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632 _ => return false,
1633 };
1634 }
1635 // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636 // matching guard in `try_jit_call`.
1637 // v1.1 A1 Session A — route through chunk_compiler.
1638 let vm_ptr: *mut Vm = self;
1639 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641 let r = unsafe {
1642 match num_args {
1643 0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644 1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645 2 => {
1646 (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647 }
1648 3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649 args[0], args[1], args[2],
1650 ),
1651 4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652 args[0], args[1], args[2], args[3],
1653 ),
1654 _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655 }
1656 };
1657 drop(_jit_vm_guard);
1658 // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659 // flagged a metatable on a table operand; bail to the interpreter
1660 // so `push_frame` runs the call from scratch.
1661 if self.jit.pending_err.take().is_some() {
1662 return false;
1663 }
1664 // Write result at func_slot, replacing the closure value, then
1665 // hand to finish_results to pad/truncate per the call site's
1666 // `wanted` count.
1667 if returns_one {
1668 let v = if ret_is_float {
1669 Value::Float(f64::from_bits(r as u64))
1670 } else if ret_is_table {
1671 Value::Table(crate::runtime::Gc::from_ptr(
1672 r as *mut crate::runtime::Table,
1673 ))
1674 } else {
1675 Value::Int(r)
1676 };
1677 self.stack[func_slot as usize] = v;
1678 self.finish_results(func_slot, 1, wanted);
1679 } else {
1680 self.finish_results(func_slot, 0, wanted);
1681 }
1682 true
1683 }
1684
1685 /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686 /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687 /// invokes it inside that ci), so it is called with `from_c = false`: its
1688 /// debug parent is the closing function, not a synthetic C level.
1689 fn call_value_impl(
1690 &mut self,
1691 f: Value,
1692 args: &[Value],
1693 from_c: bool,
1694 ) -> Result<Vec<Value>, LuaError> {
1695 if self.c_depth >= MAX_C_DEPTH {
1696 return Err(self.rt_err("stack overflow"));
1697 }
1698 self.c_depth += 1;
1699 let func_slot = self.stack.len() as u32;
1700 self.stack.push(f);
1701 self.stack.extend_from_slice(args);
1702 self.top = self.stack.len() as u32;
1703 let r = self.call_at(func_slot, args.len() as u32, from_c);
1704 self.c_depth -= 1;
1705 if r.is_err()
1706 && self.yielding.is_none()
1707 && self.terminating.is_none()
1708 && !self.host_yield_pending
1709 && self.pending_async_native_fut.is_none()
1710 {
1711 // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712 // Rust stack, but the suspended coroutine's frames/registers (which
1713 // sit at/above `func_slot`) must survive for the next resume — so we
1714 // only truncate on a real error. A self-close termination is in the
1715 // same boat: the dying thread's state is discarded wholesale.
1716 // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717 // the same boat as `yielding`: the next `EvalFuture::poll`
1718 // resumes the same call, so the in-flight frames must
1719 // survive.
1720 self.stack.truncate(func_slot as usize);
1721 self.top = func_slot;
1722 }
1723 r
1724 }
1725
1726 /// Invoke `f` with the running thread marked non-yieldable for the duration
1727 /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728 /// boundary and errors instead of suspending. Used by library callbacks
1729 /// (sort comparator, gsub replacement) that run via synchronous Rust
1730 /// recursion and so could not be re-entered after a yield.
1731 pub(crate) fn call_noyield(
1732 &mut self,
1733 f: Value,
1734 args: &[Value],
1735 ) -> Result<Vec<Value>, LuaError> {
1736 self.nny += 1;
1737 let r = self.call_value(f, args);
1738 self.nny -= 1;
1739 r
1740 }
1741
1742 // ---- coroutines (P05) ----
1743
1744 pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745 // The new coroutine inherits the creating thread's current globals
1746 // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747 // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748 // it here picks the creator regardless of which coro is running.
1749 self.heap.new_coro(body, self.globals)
1750 }
1751
1752 /// Is `t` the thread whose context is currently live in the VM?
1753 pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754 match (self.current, t) {
1755 (None, None) => true,
1756 (Some(a), Some(b)) => a.ptr_eq(b),
1757 _ => false,
1758 }
1759 }
1760
1761 /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762 /// stack if that thread is current, else its saved context).
1763 #[doc(hidden)]
1764 pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765 let s = slot as usize;
1766 if self.is_current_thread(thread) {
1767 self.stack[s]
1768 } else {
1769 match thread {
1770 Some(co) => co.stack[s],
1771 None => self.main_ctx.as_ref().expect("main context").stack[s],
1772 }
1773 }
1774 }
1775
1776 fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777 let s = slot as usize;
1778 if self.is_current_thread(thread) {
1779 self.stack[s] = v;
1780 } else {
1781 match thread {
1782 Some(co) => {
1783 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784 unsafe { co.as_mut() }.stack[s] = v;
1785 // co.stack is traced by Coro::trace; demote co back to
1786 // gray so propagate re-traces this slot if it was
1787 // already black.
1788 self.heap
1789 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790 }
1791 None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792 }
1793 }
1794 }
1795
1796 /// Whether `co` is the main thread's identity object.
1797 pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798 self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799 }
1800
1801 /// The status of `co` from the caller's view. The main thread's identity
1802 /// object has no stored status — it is "running" when nothing else runs,
1803 /// else "normal" (it resumed the active coroutine).
1804 pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805 if self.is_main_coro(co) {
1806 if self.current.is_none() {
1807 CoroStatus::Running
1808 } else {
1809 CoroStatus::Normal
1810 }
1811 } else {
1812 co.status
1813 }
1814 }
1815
1816 /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817 /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818 /// context. Handlers see the coroutine's death error (if it died by error)
1819 /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820 /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821 pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822 // re-entrant close: a __close handler closed its own coroutine while the
1823 // outer close is mid-flight (its context is live). Report success and let
1824 // the outer close finish — re-entering the swap would corrupt the stack.
1825 if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826 return Ok(None);
1827 }
1828 // A chain of coroutines whose `__close` handlers each close the previous
1829 // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830 // The calling handler's `call_value` has already pushed `c_depth` to the
1831 // cap, so here it reads as full first — report PUC's "C stack overflow"
1832 // before the next handler call would surface the plainer "stack overflow".
1833 if self.c_depth >= MAX_C_DEPTH {
1834 return Err(self.rt_err("C stack overflow"));
1835 }
1836 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837 let death_err = unsafe { co.as_mut() }.error_value.take();
1838 // swap the caller's live context out (into a GC-rooted home) and the
1839 // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840 // the coroutine's stack while everything stays rooted.
1841 let resumer = self.current;
1842 let rctx = self.take_ctx();
1843 match resumer {
1844 Some(r) => {
1845 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846 let m = unsafe { r.as_mut() };
1847 m.stack = rctx.stack;
1848 m.frames = rctx.frames;
1849 m.open_upvals = rctx.open_upvals;
1850 m.tbc = rctx.tbc;
1851 m.top = rctx.top;
1852 m.pcall_depth = rctx.pcall_depth;
1853 }
1854 None => self.main_ctx = Some(rctx),
1855 }
1856 self.load_coro_ctx(co);
1857 self.current = Some(co);
1858 let result = self.close_slots(0, death_err);
1859 // discard the (now-closed) coroutine context and restore the caller
1860 let _ = self.take_ctx();
1861 match resumer {
1862 Some(r) => {
1863 self.load_coro_ctx(r);
1864 self.current = Some(r);
1865 }
1866 None => {
1867 let m = self.main_ctx.take().expect("main context saved");
1868 self.put_ctx(m);
1869 self.current = None;
1870 }
1871 }
1872 {
1873 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874 let m = unsafe { co.as_mut() };
1875 m.status = CoroStatus::Dead;
1876 m.stack = Vec::new();
1877 m.frames = Vec::new();
1878 m.open_upvals = Vec::new();
1879 m.tbc = Vec::new();
1880 m.top = 0;
1881 m.pcall_depth = 0;
1882 m.resume_at = None;
1883 m.error_value = None;
1884 }
1885 result.map(|()| death_err)
1886 }
1887
1888 /// `coroutine.running`: the running thread plus whether it is the main one.
1889 pub(crate) fn running_thread(&self) -> (Value, bool) {
1890 match self.current {
1891 Some(co) => (Value::Coro(co), false),
1892 None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893 }
1894 }
1895
1896 /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897 /// thread) can yield. The main thread never can; any other coroutine can
1898 /// unless it is dead.
1899 pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900 match co {
1901 Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902 // the running thread can yield only outside any non-yieldable C call
1903 None => self.current.is_some() && self.nny == 0,
1904 }
1905 }
1906
1907 /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908 /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909 /// from "inside an unyieldable C call" (sort/gsub callback).
1910 pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911 if self.current.is_none() {
1912 Some("attempt to yield from outside a coroutine")
1913 } else if self.nny > 0 {
1914 Some("attempt to yield across a C-call boundary")
1915 } else {
1916 None
1917 }
1918 }
1919
1920 /// The coroutine whose context is currently live (`None` on the main thread).
1921 pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922 self.current
1923 }
1924
1925 /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926 /// its pending `__close` handlers, then signal termination. The handlers run
1927 /// here, in place, with the thread still non-yieldable (a yield in one hits
1928 /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929 /// way a yield does — `exec_with` propagates it past any protecting pcall
1930 /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931 /// clean death (or, if a handler raised, the coroutine's error).
1932 pub(crate) fn close_running(&mut self) -> LuaError {
1933 let death = match self.close_slots(0, None) {
1934 Ok(()) => None,
1935 Err(e) => Some(e.0),
1936 };
1937 self.terminating = Some(death);
1938 LuaError(Value::Nil)
1939 }
1940
1941 /// `coroutine.status` as seen by the caller.
1942 pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943 match self.effective_coro_status(co) {
1944 CoroStatus::Suspended => "suspended",
1945 CoroStatus::Running => "running",
1946 CoroStatus::Normal => "normal",
1947 CoroStatus::Dead => "dead",
1948 }
1949 }
1950
1951 fn take_ctx(&mut self) -> SavedCtx {
1952 let saved = SavedCtx {
1953 stack: std::mem::take(&mut self.stack),
1954 frames: std::mem::take(&mut self.frames),
1955 open_upvals: std::mem::take(&mut self.open_upvals),
1956 tbc: std::mem::take(&mut self.tbc),
1957 top: self.top,
1958 pcall_depth: self.pcall_depth,
1959 hook: self.hook,
1960 globals: self.globals,
1961 };
1962 self.frames_resync(); // P17-D Week 1 — frames now empty.
1963 saved
1964 }
1965
1966 fn put_ctx(&mut self, c: SavedCtx) {
1967 self.stack = c.stack;
1968 self.frames = c.frames;
1969 self.open_upvals = c.open_upvals;
1970 self.tbc = c.tbc;
1971 self.top = c.top;
1972 self.pcall_depth = c.pcall_depth;
1973 self.hook = c.hook;
1974 self.globals = c.globals;
1975 self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976 }
1977
1978 /// Move a coroutine's saved context into the live VM fields.
1979 fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981 let m = unsafe { co.as_mut() };
1982 self.stack = std::mem::take(&mut m.stack);
1983 self.frames = std::mem::take(&mut m.frames);
1984 self.open_upvals = std::mem::take(&mut m.open_upvals);
1985 self.tbc = std::mem::take(&mut m.tbc);
1986 self.top = m.top;
1987 self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988 self.pcall_depth = m.pcall_depth;
1989 self.hook = m.hook;
1990 self.globals = m.globals;
1991 }
1992
1993 /// Save the live VM context back into a coroutine object.
1994 fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995 let c = self.take_ctx();
1996 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997 let m = unsafe { co.as_mut() };
1998 m.stack = c.stack;
1999 m.frames = c.frames;
2000 m.open_upvals = c.open_upvals;
2001 m.tbc = c.tbc;
2002 m.top = c.top;
2003 m.pcall_depth = c.pcall_depth;
2004 m.hook = c.hook;
2005 m.globals = c.globals;
2006 // bulk-overwrite of every collectable field traced by Coro::trace:
2007 // demote the coro back to gray so propagate re-traces its new state.
2008 self.heap
2009 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010 }
2011
2012 /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013 /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014 /// error raised inside the coroutine (the coroutine becomes dead).
2015 pub(crate) fn resume_coro(
2016 &mut self,
2017 co: Gc<Coro>,
2018 args: Vec<Value>,
2019 ) -> Result<Vec<Value>, LuaError> {
2020 match co.status {
2021 CoroStatus::Suspended => {}
2022 CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
2023 _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
2024 }
2025 if self.c_depth >= MAX_C_DEPTH {
2026 return Err(self.rt_err("C stack overflow"));
2027 }
2028 self.c_depth += 1;
2029 let resumer = self.current;
2030 // save the resumer's live context away
2031 let rctx = self.take_ctx();
2032 match resumer {
2033 Some(r) => {
2034 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035 let m = unsafe { r.as_mut() };
2036 m.stack = rctx.stack;
2037 m.frames = rctx.frames;
2038 m.open_upvals = rctx.open_upvals;
2039 m.tbc = rctx.tbc;
2040 m.top = rctx.top;
2041 m.pcall_depth = rctx.pcall_depth;
2042 m.globals = rctx.globals;
2043 m.status = CoroStatus::Normal;
2044 // bulk overwrite of every traced field on r — mirror
2045 // store_coro_ctx's barrier_back so propagate re-traces r.
2046 self.heap
2047 .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048 }
2049 None => self.main_ctx = Some(rctx),
2050 }
2051 // swap the coroutine in
2052 self.load_coro_ctx(co);
2053 {
2054 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055 let m = unsafe { co.as_mut() };
2056 m.status = CoroStatus::Running;
2057 m.resumer = resumer;
2058 }
2059 // co.resumer is a traced Gc field; barrier_back covers the new
2060 // resumer reference and any future field writes during this call.
2061 self.heap
2062 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063 self.current = Some(co);
2064
2065 // drive it
2066 let drive = if co.started {
2067 self.coro_continue(&args)
2068 } else {
2069 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070 unsafe { co.as_mut() }.started = true;
2071 self.coro_first(co.body, &args)
2072 };
2073
2074 // classify: a self-close termination or a pending yield each win over
2075 // the (sentinel) error they raised to unwind the Rust stack.
2076 let (outcome, status) = if let Some(death) = self.terminating.take() {
2077 // the coroutine closed itself: it dies now, cleanly or with the
2078 // error a `__close` handler raised.
2079 match death {
2080 Some(e) => {
2081 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082 unsafe { co.as_mut() }.error_value = Some(e);
2083 self.heap
2084 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085 (Err(LuaError(e)), CoroStatus::Dead)
2086 }
2087 None => (Ok(Vec::new()), CoroStatus::Dead),
2088 }
2089 } else {
2090 match self.yielding.take() {
2091 Some((vals, fslot, nres)) => {
2092 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093 unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094 (Ok(vals), CoroStatus::Suspended)
2095 }
2096 None => {
2097 // died: a return is clean, an error is remembered so a later
2098 // `coroutine.close` can report it (PUC lua_closethread).
2099 // Capture the error-point traceback (set by `unwind` before
2100 // popping the failing frames) and prepend a synthetic
2101 // top entry for the C native that initiated the error
2102 // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103 // on the dead coroutine still shows the error site
2104 // (db.lua :848 family).
2105 if drive.is_err() {
2106 let mut tb = self.error_traceback.take().unwrap_or_default();
2107 if let Some(nm) = self.errored_native.take() {
2108 let mut prefixed: Vec<u8> = Vec::new();
2109 prefixed.extend_from_slice(
2110 format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111 );
2112 prefixed.extend(tb);
2113 tb = prefixed;
2114 }
2115 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116 unsafe { co.as_mut() }.error_traceback = Some(tb);
2117 }
2118 if let Err(e) = drive {
2119 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120 unsafe { co.as_mut() }.error_value = Some(e.0);
2121 self.heap
2122 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123 }
2124 (drive, CoroStatus::Dead)
2125 }
2126 }
2127 };
2128
2129 // save the coroutine's context back and restore the resumer
2130 self.store_coro_ctx(co);
2131 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132 unsafe { co.as_mut() }.status = status;
2133 match resumer {
2134 Some(r) => {
2135 self.load_coro_ctx(r);
2136 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137 unsafe { r.as_mut() }.status = CoroStatus::Running;
2138 self.current = Some(r);
2139 }
2140 None => {
2141 let m = self.main_ctx.take().expect("main context saved");
2142 self.put_ctx(m);
2143 self.current = None;
2144 }
2145 }
2146 self.c_depth -= 1;
2147 outcome
2148 }
2149
2150 /// First resume: install the body function at slot 0 and run.
2151 fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152 self.stack.clear();
2153 self.stack.push(body);
2154 self.stack.extend_from_slice(args);
2155 self.top = self.stack.len() as u32;
2156 match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157 Ok(true) => self.exec_with(1),
2158 Ok(false) => Ok(self.take_results(0)),
2159 Err(e) => Err(e),
2160 }
2161 }
2162
2163 /// Resume after a yield: deliver `args` as the results of the call that
2164 /// yielded, then continue the suspended thread.
2165 fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166 let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167 let n = args.len() as u32;
2168 // Restore the full register window of the suspended top frame: a yield
2169 // that unwound through a native (call_value) may have left the stack
2170 // shorter than the frame needs. `base + max_stack` is what push_frame
2171 // allocates; `fslot + n` covers the delivered yield results.
2172 let frame_need = self
2173 .frames
2174 .last()
2175 .and_then(CallFrame::lua)
2176 .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177 .unwrap_or(0);
2178 let need = frame_need.max((fslot + n) as usize);
2179 if self.stack.len() < need {
2180 self.stack.resize(need, Value::Nil);
2181 }
2182 for (i, &v) in args.iter().enumerate() {
2183 self.stack[fslot as usize + i] = v;
2184 }
2185 self.finish_results(fslot, n, nres);
2186 // the suspended `coroutine.yield` (a C call) now returns its resume
2187 // values: fire the matching "return" hook PUC defers until the resume.
2188 self.hook_return(true, 1, n)?;
2189 self.exec_with(1)
2190 }
2191
2192 /// `coroutine.yield`: suspend the running coroutine, recording where to
2193 /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194 /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195 pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196 let nres = self.native_nresults;
2197 self.yielding = Some((vals, func_slot, nres));
2198 // value is irrelevant: resume_coro consults `self.yielding`, not this
2199 LuaError(Value::Nil)
2200 }
2201
2202 /// Install or clear the debug hook on the running thread (`debug.sethook`
2203 /// without a thread argument). Arms the calling frame's `oldpc` to the
2204 /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205 /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206 /// native return: the very next traceexec compares against the sethook
2207 /// CALL's line. When the install statement and the following statement are
2208 /// on different source lines (db.lua :322), `changedline` fires for that
2209 /// first statement; when they share a line (db.lua :25 wrapper), they do
2210 /// not, so the wrapper line is not re-fired.
2211 pub(crate) fn install_hook(&mut self, hook: HookState) {
2212 self.hook = hook;
2213 if self.hook.line
2214 && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215 {
2216 f.hook_oldpc = f.pc.saturating_sub(1);
2217 }
2218 }
2219
2220 /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221 /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222 /// with an optional thread argument.
2223 ///
2224 /// `target == None` means "no explicit thread argument" — PUC binds that
2225 /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226 /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227 /// of whether that's the main thread or a currently-resumed coroutine
2228 /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229 /// `store_coro_ctx`). So a `None` target should always route to
2230 /// `install_hook` on the live fields. The pre-fix predicate gate
2231 /// `is_current_thread(target)` returned `false` when running inside a
2232 /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233 /// and silently dropped the hook on the floor — the install happened on
2234 /// no thread at all.
2235 pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236 if target.is_none() || self.is_current_thread(target) {
2237 self.install_hook(state);
2238 } else if let Some(co) = target {
2239 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240 let m = unsafe { co.as_mut() };
2241 m.hook = state;
2242 if state.line
2243 && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244 {
2245 f.hook_oldpc = u32::MAX;
2246 }
2247 // co.hook.func is a traced Value (Coro::trace covers it); demote
2248 // co back to gray so propagate sees the new hook function.
2249 self.heap
2250 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251 }
2252 }
2253
2254 /// The hook state of `target` (`None`/current → the live VM state).
2255 pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256 match target {
2257 t if self.is_current_thread(t) => self.hook,
2258 Some(co) => co.hook,
2259 None => self.hook,
2260 }
2261 }
2262
2263 /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264 /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265 /// discarded so the interrupted frame's register window is untouched.
2266 /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267 fn run_hook(
2268 &mut self,
2269 event: &[u8],
2270 line: Option<i64>,
2271 from_native: bool,
2272 ) -> Result<(), LuaError> {
2273 // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274 // synchronous fn pointer call). Both Rust and Lua hooks may be
2275 // installed; both observe each event.
2276 if let Some(rh) = self.hook.rust_func {
2277 let evt = match event {
2278 b"call" => Some(RustHookEvent::Call),
2279 b"return" => Some(RustHookEvent::Return),
2280 b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281 b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282 b"count" => Some(RustHookEvent::Count),
2283 _ => None,
2284 };
2285 if let Some(evt) = evt {
2286 let was_in_hook = self.in_hook;
2287 self.in_hook = true;
2288 rh(self, evt);
2289 self.in_hook = was_in_hook;
2290 }
2291 }
2292 let Some(hook) = self.hook.func else {
2293 return Ok(());
2294 };
2295 let saved_top = self.top;
2296 let saved_len = self.stack.len();
2297 let name = Value::Str(self.heap.intern(event));
2298 let lv = line.map_or(Value::Nil, Value::Int);
2299 self.in_hook = true;
2300 // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301 // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302 // ci sat below `hookf` (the function being hooked). When that hooked
2303 // function is native, no Lua frame for it exists in luna's `frames`;
2304 // model it as a synthetic C level by pushing the hook with
2305 // `from_c = true` (then `c_frame_name` reads the caller's call
2306 // instruction → e.g. `name = "sethook"`). When the hooked function is
2307 // Lua (its frame is still on the stack), push with `from_c = false`
2308 // so the level descent lands on it directly. The hook's own frame
2309 // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310 // (PUC `CIST_HOOKED`).
2311 self.pending_is_hook = true;
2312 let r = self.call_value_impl(hook, &[name, lv], from_native);
2313 self.pending_is_hook = false;
2314 self.in_hook = false;
2315 self.stack.truncate(saved_len);
2316 self.top = saved_top;
2317 r.map(|_| ())
2318 }
2319
2320 /// Fire the "call" hook on entry to a function, if armed and not already in
2321 /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322 /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323 /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324 /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325 fn hook_call_with(
2326 &mut self,
2327 from_native: bool,
2328 nargs: u32,
2329 is_tail: bool,
2330 ) -> Result<(), LuaError> {
2331 if self.hook.call
2332 && !self.in_hook
2333 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334 {
2335 self.hook_ftransfer = 1;
2336 self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337 // PUC 5.1 didn't distinguish tail-call events — every call,
2338 // including tail-calls, fired plain `"call"`. 5.2 introduced
2339 // the separate `"tail call"` event (mask `"c"` covers both).
2340 // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341 // "return","tail return","return","tail return"}`.
2342 let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343 b"tail call"
2344 } else {
2345 b"call"
2346 };
2347 self.run_hook(event, None, from_native)?;
2348 }
2349 Ok(())
2350 }
2351
2352 pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353 self.hook_call_with(from_native, nargs, false)
2354 }
2355
2356 /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357 /// the first result slot relative to the activation's func slot, ntransfer
2358 /// the number of results.
2359 pub(crate) fn hook_return(
2360 &mut self,
2361 from_native: bool,
2362 ftransfer: u32,
2363 nresults: u32,
2364 ) -> Result<(), LuaError> {
2365 if self.hook.ret
2366 && !self.in_hook
2367 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368 {
2369 self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370 self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371 self.run_hook(b"return", None, from_native)?;
2372 }
2373 Ok(())
2374 }
2375
2376 /// PUC "tail return" event — fires once per tail call that collapsed
2377 /// into the activation now returning, *after* its own "return" event.
2378 /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379 fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380 if self.hook.ret
2381 && !self.in_hook
2382 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383 {
2384 self.run_hook(b"tail return", None, false)?;
2385 }
2386 Ok(())
2387 }
2388
2389 /// Call a metamethod with a single expected result.
2390 fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391 let mut r = self.call_value(f, args)?;
2392 Ok(if r.is_empty() {
2393 Value::Nil
2394 } else {
2395 r.swap_remove(0)
2396 })
2397 }
2398
2399 /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400 /// driven through the interpreter loop with a `Meta` continuation, so a
2401 /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402 /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403 /// Returns to the caller with the call set up — the opcode arm must do no
2404 /// further work on the running frame and let the loop iterate. `tm` is
2405 /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406 /// born from this call inherits it via `pending_tm`, so
2407 /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408 /// (db.lua :878).
2409 fn begin_meta_call(
2410 &mut self,
2411 func: Value,
2412 args: &[Value],
2413 action: MetaAction,
2414 tm: &'static str,
2415 ) -> Result<(), LuaError> {
2416 let saved_top = self.top;
2417 let cont_slot = self.stack.len() as u32;
2418 self.stack.push(func);
2419 self.stack.extend_from_slice(args);
2420 self.top = self.stack.len() as u32;
2421 frames_push_sync(
2422 &mut self.frames,
2423 &mut self.frames_top,
2424 CallFrame::Cont(NativeCont {
2425 kind: ContKind::Meta(MetaCont { action, saved_top }),
2426 func_slot: cont_slot,
2427 nresults: 1,
2428 }),
2429 );
2430 let saved_tm = self.pending_tm.replace(tm);
2431 // begin_call drives a Lua metamethod through the loop (returns true) or
2432 // runs a native one inline (returns false, leaving results at cont_slot
2433 // for the loop head to pick up); either way the Meta cont resolves there.
2434 let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435 // Native callees never consumed pending_tm (push_frame is only hit on
2436 // a Lua callee); restore so it doesn't leak to a later push_frame.
2437 self.pending_tm = saved_tm;
2438 r?;
2439 Ok(())
2440 }
2441
2442 /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443 fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444 match self.index_step(t, key)? {
2445 MmOut::Done(v) => self.stack[dst as usize] = v,
2446 MmOut::Mm { func, recv } => {
2447 self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2448 }
2449 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2450 }
2451 Ok(())
2452 }
2453
2454 /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2455 fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2456 match self.newindex_step(t, key, v)? {
2457 MmOut::Done(_) => {}
2458 MmOut::Mm { func, recv } => {
2459 self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2460 }
2461 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2462 }
2463 Ok(())
2464 }
2465
2466 /// Apply a comparison opcode's outcome: a known boolean drives the
2467 /// conditional skip directly; a metamethod is called yieldably, its
2468 /// truthiness driving the skip on return.
2469 fn op_compare(
2470 &mut self,
2471 step: MmOut,
2472 l: Value,
2473 r: Value,
2474 k: bool,
2475 tm: &'static str,
2476 ) -> Result<(), LuaError> {
2477 match step {
2478 MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2479 MmOut::Mm { func, .. } => {
2480 self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2481 }
2482 MmOut::CompareSynth { func } => {
2483 // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2484 // negation are driven through `MetaAction::Compare` so the
2485 // metamethod call can yield like any other compare.
2486 self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2487 }
2488 }
2489 Ok(())
2490 }
2491
2492 /// Complete a VM instruction whose metamethod just returned `result` (PUC
2493 /// `luaV_finishOp`). The running frame is already back on top.
2494 fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2495 match action {
2496 MetaAction::Store { dst } => self.stack[dst as usize] = result,
2497 MetaAction::Discard => {}
2498 MetaAction::Compare { k, negate } => {
2499 let t = if negate {
2500 !result.truthy()
2501 } else {
2502 result.truthy()
2503 };
2504 self.cond_skip(t, k);
2505 }
2506 MetaAction::Concat { dst, base_a } => {
2507 self.stack[dst as usize] = result;
2508 self.top = dst + 1;
2509 self.concat_run(base_a)?;
2510 }
2511 }
2512 Ok(())
2513 }
2514
2515 // ---- metatables ----
2516
2517 pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2518 match v {
2519 Value::Table(t) => t.metatable(),
2520 Value::Userdata(u) => u.metatable(),
2521 v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2522 }
2523 }
2524
2525 /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2526 /// non-table). No-op for tables (they carry their own).
2527 pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2528 if let Some(i) = type_mt_slot(v) {
2529 self.type_mt[i] = mt;
2530 }
2531 }
2532
2533 /// The metamethod of `v` for `mm`, or nil.
2534 pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2535 match self.metatable_of(v) {
2536 Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2537 None => Value::Nil,
2538 }
2539 }
2540
2541 /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2542 /// only fires when both operands carry a metatable that exposes the same
2543 /// implementation. Returns the metamethod to call, or `Nil` when no
2544 /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2545 /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2546 pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2547 let mt1 = self.metatable_of(l);
2548 let Some(mt1) = mt1 else { return Value::Nil };
2549 let key = Value::Str(self.mm_names[mm as usize]);
2550 let tm1 = mt1.get(key);
2551 if tm1.is_nil() {
2552 return Value::Nil;
2553 }
2554 let mt2 = self.metatable_of(r);
2555 let Some(mt2) = mt2 else { return Value::Nil };
2556 if mt1.as_ptr() == mt2.as_ptr() {
2557 return tm1;
2558 }
2559 let tm2 = mt2.get(key);
2560 if tm2.is_nil() {
2561 return Value::Nil;
2562 }
2563 if tm1.raw_eq(tm2) {
2564 return tm1;
2565 }
2566 Value::Nil
2567 }
2568
2569 /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2570 /// or full userdata whose metatable carries a string `__name` reports that
2571 /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2572 pub(crate) fn obj_typename(&self, v: Value) -> String {
2573 if matches!(v, Value::Table(_) | Value::Userdata(_))
2574 && let Value::Str(s) = self.get_mm(v, Mm::Name)
2575 {
2576 return String::from_utf8_lossy(s.as_bytes()).into_owned();
2577 }
2578 v.type_name().to_string()
2579 }
2580
2581 fn call_at(
2582 &mut self,
2583 func_slot: u32,
2584 nargs: u32,
2585 from_c: bool,
2586 ) -> Result<Vec<Value>, LuaError> {
2587 if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2588 self.exec()
2589 } else {
2590 // native completed inline; results at func_slot..top
2591 Ok(self.take_results(func_slot))
2592 }
2593 }
2594
2595 /// Switch the `collectgarbage` mode, returning the previous mode name.
2596 pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2597 std::mem::replace(&mut self.gc_mode, new)
2598 }
2599
2600 /// Whether the current `collectgarbage` mode is "generational" (where a
2601 /// "step" is a minor collection — a full atomic pass — rather than a paced
2602 /// incremental sweep).
2603 pub(crate) fn gc_mode_is_generational(&self) -> bool {
2604 self.gc_mode == "generational"
2605 }
2606
2607 /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2608 /// completes a whole cycle at once).
2609 pub(crate) fn gc_stepsize(&self) -> i64 {
2610 self.gc_stepsize
2611 }
2612
2613 /// `collectgarbage("param", name [,value])`: read (or set, returning the
2614 /// previous value of) a pacing parameter. Returns `None` for an unknown
2615 /// name so the caller can raise PUC's `invalid parameter` error. The
2616 /// collector is stop-the-world, so these only round-trip for API fidelity.
2617 pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2618 let slot = match name {
2619 b"pause" => &mut self.gc_pause,
2620 b"stepmul" => &mut self.gc_stepmul,
2621 b"stepsize" => &mut self.gc_stepsize,
2622 _ => return None,
2623 };
2624 let prev = *slot;
2625 if let Some(v) = set {
2626 *slot = v;
2627 }
2628 Some(prev)
2629 }
2630
2631 /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2632 /// paced sweep via `Vm::gc_step`.
2633 ///
2634 /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2635 /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2636 /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2637 /// suspected UAF is structurally closed — born objects no longer become
2638 /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2639 ///
2640 /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2641 /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2642 /// O(SWEEP_DIVISOR) safe-points regardless of size.
2643 #[inline(always)]
2644 pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2645 if self.gc_finalizing {
2646 return;
2647 }
2648 if !self.heap.gc_due() {
2649 return;
2650 }
2651 // v2.5 P1B-2E: tighten to bare `live_top`. The v2.2.0
2652 // `live_top.max(self.top)` workaround is now obsoleted by
2653 // v2.3's `finish_results` slot-clear + v2.5 P1B-2A
2654 // (Op::TailCall collapse slot-clear) + v2.5 P1B-2B
2655 // (pcall unwind slot-clear). PUC L->top discipline is now
2656 // mirrored at every frame-pop site.
2657 self.gc_top = live_top;
2658 // PUC stepmul: % of allocation rate. Higher = more GC work per
2659 // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2660 // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2661 const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2662 const MIN_BUDGET: usize = 64_000;
2663 let stepmul = self.gc_stepmul.max(1) as usize;
2664 let divisor = (SWEEP_BASE / stepmul).max(1);
2665 let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2666 if self.gc_step(budget) {
2667 self.heap.rearm_gc_pause(self.gc_pause);
2668 }
2669 }
2670
2671 /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2672 /// roots (open upvalues, which are not first-class Values). Shared by the
2673 /// full collector and the incremental-sweep driver so both snapshot the
2674 /// exact same live set.
2675 fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2676 let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2677 roots.push(Value::Table(self.globals));
2678 for mt in self.type_mt.into_iter().flatten() {
2679 roots.push(Value::Table(mt));
2680 }
2681 for &n in &self.mm_names {
2682 roots.push(Value::Str(n));
2683 }
2684 // root only the running thread's live registers (PUC marks [stack, top)):
2685 // freed temporaries above `gc_top` are excluded so weak values stranded
2686 // there are not pinned. Suspended threads (main_ctx, other coroutines)
2687 // stay whole-rooted below — safe over-rooting, and they are not the
2688 // thread whose weak-table loop is under test.
2689 let live = (self.gc_top as usize).min(self.stack.len());
2690 roots.extend_from_slice(&self.stack[..live]);
2691 for cf in &self.frames {
2692 match cf {
2693 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2694 CallFrame::Cont(NativeCont {
2695 kind: ContKind::Xpcall { handler },
2696 ..
2697 }) => roots.push(*handler),
2698 CallFrame::Cont(NativeCont {
2699 kind: ContKind::Close(cc),
2700 ..
2701 }) => {
2702 // Root the error threaded through this close chain so a
2703 // `collectgarbage()` inside a sibling `__close` handler
2704 // does not free it before the next handler is invoked
2705 // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2706 if let Some(e) = cc.pending {
2707 roots.push(e);
2708 }
2709 if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2710 roots.push(err);
2711 }
2712 }
2713 CallFrame::Cont(_) => {}
2714 }
2715 }
2716 if let Some(e) = self.closing_err {
2717 roots.push(e);
2718 }
2719 // B12 host roots — Lua-facade handles keep their referenced
2720 // values alive across calls/yields. Trace the whole vector;
2721 // unused slots (post-`unpin_all`) carry Value::Nil which the
2722 // GC ignores.
2723 for slot in &self.host_roots {
2724 // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2725 roots.push(slot.value);
2726 }
2727 // v2.1 — `table.sort` and similar builtins stash their working
2728 // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2729 // comparator callback doesn't free strings/tables snapshotted
2730 // off the live table (sort.lua's `load(..)(); collectgarbage()`
2731 // compare regression).
2732 for buf in &self.sort_scratch {
2733 roots.extend_from_slice(buf);
2734 }
2735 // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2736 // mid-execution. Without rooting them here, a `collectgarbage()`
2737 // invoked inside the running native (sort.lua AA `load(..)();
2738 // collectgarbage()` compare callback regression) sweeps the
2739 // closure that's actively executing, leaving `nc.upvals`
2740 // dangling and the Rust local `nc` pointing at recycled memory
2741 // — the SIGSEGV pops on the very next field access or pop.
2742 for &nc in &self.running_natives {
2743 roots.push(Value::Native(nc));
2744 }
2745 // the running thread's debug hook (suspended threads root theirs via
2746 // Coro::trace / the main_ctx sweep below)
2747 if let Some(h) = self.hook.func {
2748 roots.push(h);
2749 }
2750 // the running coroutine (its saved-context fields live in the VM, but
2751 // the object itself + its resumer chain must stay reachable)
2752 if let Some(co) = self.current {
2753 roots.push(Value::Coro(co));
2754 }
2755 if let Some(mc) = self.main_coro {
2756 roots.push(Value::Coro(mc));
2757 }
2758 // debug.getregistry() and io library state
2759 if let Some(r) = self.registry {
2760 roots.push(Value::Table(r));
2761 }
2762 if let Some(mt) = self.file_mt {
2763 roots.push(Value::Table(mt));
2764 }
2765 if let Some(f) = self.io_input {
2766 roots.push(Value::Userdata(f));
2767 }
2768 if let Some(f) = self.io_output {
2769 roots.push(Value::Userdata(f));
2770 }
2771 // the main thread's saved context while a coroutine runs
2772 if let Some(m) = &self.main_ctx {
2773 roots.extend_from_slice(&m.stack);
2774 if let Some(h) = m.hook.func {
2775 roots.push(h);
2776 }
2777 for cf in &m.frames {
2778 match cf {
2779 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2780 CallFrame::Cont(NativeCont {
2781 kind: ContKind::Xpcall { handler },
2782 ..
2783 }) => roots.push(*handler),
2784 CallFrame::Cont(_) => {}
2785 }
2786 }
2787 }
2788 let mut extra: Vec<*mut GcHeader> = self
2789 .open_upvals
2790 .iter()
2791 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2792 .collect();
2793 if let Some(m) = &self.main_ctx {
2794 extra.extend(
2795 m.open_upvals
2796 .iter()
2797 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2798 );
2799 }
2800 (roots, extra)
2801 }
2802
2803 /// Run a full collection with the VM's roots, then run any `__gc`
2804 /// finalizers the collection scheduled. A no-op (returns 0) when already
2805 /// inside a finalizer — the collector is not reentrant (PUC).
2806 pub fn collect_garbage(&mut self) -> usize {
2807 if self.gc_finalizing {
2808 return 0;
2809 }
2810 let (roots, extra) = self.gc_roots();
2811 let freed = self.heap.collect_ex(&roots, &extra);
2812 self.run_finalizers();
2813 freed
2814 }
2815
2816 /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2817 /// threw; gc.lua's "errors during collection" probe relies on it. This
2818 /// variant runs the same cycle but propagates the captured finalizer
2819 /// error to the explicit caller.
2820 pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2821 if self.gc_finalizing {
2822 return Ok(0);
2823 }
2824 let (roots, extra) = self.gc_roots();
2825 let freed = self.heap.collect_ex(&roots, &extra);
2826 self.run_finalizers_or_err()?;
2827 Ok(freed)
2828 }
2829
2830 /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2831 /// should report fail rather than collect).
2832 pub(crate) fn gc_is_finalizing(&self) -> bool {
2833 self.gc_finalizing
2834 }
2835
2836 /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2837 /// = true indicates more pieces follow (concatenated until the first
2838 /// `to_cont = false` call flushes the whole line). Mirrors
2839 /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2840 /// * If the buffer is fresh, `to_cont` is false, and the message is
2841 /// `@<word>`, treat as a control message — only `@on` / `@off` are
2842 /// recognised; any other `@…` is silently ignored.
2843 /// * Otherwise, while the state is `Off`, drop the piece; while `On`,
2844 /// accumulate, and flush to stderr + `warn_log` on the
2845 /// non-continuation call.
2846 pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2847 if self.warn_buf.is_empty()
2848 && !to_cont
2849 && let Some(b'@') = msg.first().copied()
2850 {
2851 match &msg[1..] {
2852 b"on" => self.warn_state = WarnState::On,
2853 b"off" => self.warn_state = WarnState::Off,
2854 _ => {} // unknown control — silently ignored (PUC checkcontrol)
2855 }
2856 return;
2857 }
2858 if self.warn_state == WarnState::Off {
2859 // drop continuation pieces too — PUC `warnfoff` is the trampoline
2860 return;
2861 }
2862 self.warn_buf.extend_from_slice(msg);
2863 if !to_cont {
2864 let line = std::mem::take(&mut self.warn_buf);
2865 eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2866 self.warn_log.push(line);
2867 }
2868 }
2869
2870 /// Drain the in-process warning log (one entry per emitted message, sans
2871 /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2872 /// assert on warn output without scraping stderr.
2873 pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2874 std::mem::take(&mut self.warn_log)
2875 }
2876
2877 /// Arm the cooperative instruction budget (P09 embedding). The run loop
2878 /// decrements this once per dispatch turn; on zero it raises a catchable
2879 /// `"instruction budget exceeded"` error and disarms itself so the host
2880 /// can resume with a fresh budget on the next call. `None` removes the
2881 /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2882 /// short-script semantics.
2883 pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2884 self.instr_budget = budget;
2885 }
2886
2887 /// Remaining instruction budget (None when unbounded).
2888 pub fn instr_budget_remaining(&self) -> Option<i64> {
2889 self.instr_budget
2890 }
2891
2892 /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2893 /// **must** disable JIT when relying on `instr_budget` — see the
2894 /// `jit_enabled` field doc for the rationale.
2895 pub fn set_jit_enabled(&mut self, enabled: bool) {
2896 self.jit.enabled = enabled;
2897 }
2898
2899 /// Current JIT enable state.
2900 pub fn jit_enabled(&self) -> bool {
2901 self.jit.enabled
2902 }
2903
2904 /// Toggle the trace JIT (P12). Off by default while the sprint
2905 /// develops. When enabled, hot back-edges are counted on
2906 /// `Proto.trace_hot_count`; once the counter passes
2907 /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2908 /// mode at the back-edge target. Stays a no-op until S2's
2909 /// trace lowerer and S3's dispatcher land.
2910 pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2911 self.jit.trace_enabled = enabled;
2912 }
2913
2914 /// P16-A — opt-in flag for the self-link cycle catch. See field
2915 /// docs for the correctness blocker. Default `false`.
2916 pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2917 self.jit.p16_self_link_enabled = enabled;
2918 }
2919
2920 /// Current state of the P16-A self-link cycle catch.
2921 pub fn p16_self_link_enabled(&self) -> bool {
2922 self.jit.p16_self_link_enabled
2923 }
2924
2925 /// Current trace-JIT enable state.
2926 pub fn trace_jit_enabled(&self) -> bool {
2927 self.jit.trace_enabled
2928 }
2929
2930 /// Number of traces that have closed cleanly (looped back to the
2931 /// head PC) since this Vm was constructed. Cumulative; used by
2932 /// tests + tuning. Will become the dominant signal once S2's
2933 /// compile + cache lands.
2934 pub fn trace_closed_count(&self) -> u64 {
2935 self.jit.counters.closed
2936 }
2937
2938 /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2939 /// hit an un-recordable op — the latter lands at S2).
2940 pub fn trace_aborted_count(&self) -> u64 {
2941 self.jit.counters.aborted
2942 }
2943
2944 /// P13-S13-G v2 — number of compiled traces whose close shape
2945 /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2946 /// pin `dispatchable=false` because the dispatcher can't
2947 /// resume at a depth>0 PC without the matching CallFrames.
2948 /// S4-step4b's frame-mat helper could synthesise those, but
2949 /// the InlineAbort emit path isn't wired up yet — fresh
2950 /// pickup work for S13-G v2-full.
2951 pub fn trace_inline_abort_count(&self) -> u64 {
2952 self.jit.counters.inline_abort
2953 }
2954
2955 /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2956 pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2957 &self.jit.counters.dispatch_off_reasons
2958 }
2959
2960 /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2961 pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2962 &self.jit.counters.compile_failed_reasons
2963 }
2964
2965 /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2966 /// `(is_call_triggered, ops_len)` for every trace that closed.
2967 pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2968 &self.jit.counters.closed_lens
2969 }
2970
2971 /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
2972 /// Per-reason close-cause counts (recorder-side abort/discard +
2973 /// lowerer-side dispatch_off labels) keyed by `&'static str`.
2974 pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
2975 &self.jit.counters.close_cause_counts
2976 }
2977
2978 /// v2.0 Track-R R3b — number of compiled traces whose
2979 /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
2980 /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
2981 /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
2982 /// hot loop with p16-on. R3b keeps `dispatchable = false` even
2983 /// when this count bumps; R3d will lift it.
2984 pub fn trace_downrec_link_compiled_count(&self) -> u64 {
2985 self.jit.counters.downrec_link_compiled
2986 }
2987
2988 /// v2.0 Track-R R3c — see
2989 /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
2990 /// of times the dispatcher's `is_downrec_sentinel` arm fired and
2991 /// classified the return as a caller-pc-guard HIT.
2992 pub fn trace_downrec_dispatched_count(&self) -> u64 {
2993 self.jit.counters.downrec_dispatched
2994 }
2995
2996 /// v2.0 Track-R R3c — see
2997 /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
2998 /// times the dispatcher entered a `downrec_link`-bearing trace and
2999 /// the trace returned via the lowerer's deopt block (caller-pc
3000 /// guard MISS), or the dispatcher itself force-deopted via the
3001 /// stitch-cycle checkpoint.
3002 pub fn trace_downrec_deopt_count(&self) -> u64 {
3003 self.jit.counters.downrec_deopt
3004 }
3005
3006 /// v2.0 Track-R R3d — see
3007 /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3008 /// Number of compiled traces whose lowerer emitted a multi-way
3009 /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3010 /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3011 pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3012 self.jit.counters.multi_way_guard_emitted
3013 }
3014
3015 /// P12-S2.C — number of closed traces the lowerer compiled and
3016 /// parked on `Proto.traces`. Re-records of the same head_pc are
3017 /// deduped (the second close finds the head_pc already cached
3018 /// and skips compile), so this never exceeds `trace_closed_count`.
3019 pub fn trace_compiled_count(&self) -> u64 {
3020 self.jit.counters.compiled
3021 }
3022
3023 /// v2.1 Phase 1I.B — number of times the recorder captured a
3024 /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3025 /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3026 /// by the Phase 1I.B opt-in fire test to verify the env gate
3027 /// wiring round-trips end-to-end (env -> recorder -> snapshot
3028 /// -> counter -> getter -> assertion).
3029 pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3030 self.jit.counters.field_ic_snapshot_captured
3031 }
3032
3033 /// P12-S2.C — number of closed traces the lowerer rejected
3034 /// (any of the bail conditions in
3035 /// `crate::jit::trace::try_compile_trace`).
3036 pub fn trace_compile_failed_count(&self) -> u64 {
3037 self.jit.counters.compile_failed
3038 }
3039
3040 /// P12-S3 — number of times the dispatcher jumped into a
3041 /// compiled trace. Bumps on every entry; `trace_deopt_count`
3042 /// counts the subset where the trace returned with a parked
3043 /// `jit_pending_err`.
3044 pub fn trace_dispatched_count(&self) -> u64 {
3045 self.jit.counters.dispatched
3046 }
3047
3048 /// P12-S3 — number of trace entries that came back with
3049 /// `jit_pending_err` set (typically a metatable shadowed an
3050 /// index inside a helper, forcing the dispatcher to fall back
3051 /// to the interpreter without committing the trace's result).
3052 pub fn trace_deopt_count(&self) -> u64 {
3053 self.jit.counters.deopt
3054 }
3055
3056 /// P15-A v1 — number of times the dispatcher started a side
3057 /// trace recording (an `exit_hit_counts` slot crossed
3058 /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3059 /// was None and trace JIT was enabled). Each unit is exactly one
3060 /// `start_side_trace` call; the actual compile success counts
3061 /// under [`Self::trace_compiled_count`] like any other trace.
3062 /// Probe use: distinguishes the "side-trace pipeline fired"
3063 /// signal from the "primary back-edge / call-trigger fired"
3064 /// signal so v0-v3 architectural progress is visible without
3065 /// reading per-counter histograms.
3066 pub fn trace_side_trace_started_count(&self) -> u64 {
3067 self.jit.counters.side_trace_started
3068 }
3069
3070 /// P15-A v2-A — number of side-trace recordings that closed,
3071 /// compiled successfully, AND patched their parent's
3072 /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3073 /// dispatch through these ptrs yet (v2-B/C job), but the
3074 /// counter + ptr write proves the compile + link pipeline is
3075 /// complete end-to-end.
3076 pub fn trace_side_trace_compiled_count(&self) -> u64 {
3077 self.jit.counters.side_trace_compiled
3078 }
3079
3080 /// P15-A v2-C-A5-C — number of side traces that compiled
3081 /// successfully but were SHEDDED by the close-handler shape-
3082 /// match gate (`exit_tags_match_entry_tags`). High ratios
3083 /// vs. `trace_side_trace_compiled_count` indicate the
3084 /// architecture is shedding lots of would-be side traces;
3085 /// useful as a tuning probe for future relaxation of the
3086 /// gate or for child-IR re-specialisation against parent's
3087 /// exit shape.
3088 pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3089 self.jit.counters.side_trace_shape_mismatch
3090 }
3091
3092 /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3093 /// classified as `crate::jit::trace::EscapeState::Sinkable`
3094 /// across every successfully compiled trace on this Vm. The
3095 /// count is post-demotion: sites pre-emit drops back to Escaped
3096 /// for not meeting v1 sunk-emit criteria are NOT counted.
3097 /// `trace_sunk_alloc_count` matches one-for-one today (every
3098 /// surviving Sinkable site goes through sunk emit).
3099 pub fn trace_sinkable_seen_count(&self) -> u64 {
3100 self.jit.counters.sinkable_seen
3101 }
3102
3103 /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3104 pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3105 self.jit.counters.accum_bufferable_seen
3106 }
3107
3108 /// P15-prep — total dispatch hits across all known traces,
3109 /// broken into hot-exit telemetry (max single-exit count,
3110 /// total dispatches, exit count). Used by probes to identify
3111 /// hot side-exits as side-trace candidates.
3112 ///
3113 /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3114 /// recursively, so inner functions' traces are reported.
3115 pub fn trace_exit_hit_summary(
3116 &self,
3117 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3118 ) -> Vec<(u32, Vec<u32>)> {
3119 fn walk(
3120 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3121 out: &mut Vec<(u32, Vec<u32>)>,
3122 ) {
3123 for ct in proto.traces.borrow().iter() {
3124 let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3125 out.push((ct.head_pc, counts));
3126 }
3127 for inner in proto.protos.iter() {
3128 walk(*inner, out);
3129 }
3130 }
3131 let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3132 walk(cl.proto, &mut out);
3133 out
3134 }
3135
3136 /// P15-A v0 — surface every side-exit slot whose hit count is
3137 /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3138 /// `cl.proto` (recursively walking `proto.protos`). Returned
3139 /// entries are side-trace candidates: each carries the parent
3140 /// trace's `(head_proto, head_pc)`, the exit's index in the
3141 /// parent's `exit_hit_counts`, and the side trace's natural
3142 /// entry shape (`cont_pc` + `exit_tags`).
3143 ///
3144 /// Layout of `exit_hit_counts` (mirrored by the iter):
3145 /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3146 /// window-sized exit_tags).
3147 /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3148 /// → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3149 /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3150 /// exit_tags = `ct.exit_tags`).
3151 pub fn hot_exit_iter(
3152 &self,
3153 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3154 ) -> Vec<crate::jit::trace::HotExitInfo> {
3155 use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3156 fn walk(
3157 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3158 out: &mut Vec<HotExitInfo>,
3159 ) {
3160 for ct in proto.traces.borrow().iter() {
3161 let inline_n = ct.per_exit_inline.len();
3162 let tags_n = ct.per_exit_tags.len();
3163 debug_assert_eq!(
3164 ct.exit_hit_counts.len(),
3165 inline_n + tags_n + 1,
3166 "exit_hit_counts layout invariant violated"
3167 );
3168 for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3169 let hits = cell.get();
3170 if hits < HOTEXIT_THRESHOLD {
3171 continue;
3172 }
3173 let (cont_pc, exit_tags) = if idx < inline_n {
3174 let ent = &ct.per_exit_inline[idx];
3175 (ent.cont_pc, ent.exit_tags.clone())
3176 } else if idx < inline_n + tags_n {
3177 let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3178 (*pc, tags.clone())
3179 } else {
3180 (ct.head_pc, ct.exit_tags.clone())
3181 };
3182 out.push(HotExitInfo {
3183 head_proto: proto,
3184 head_pc: ct.head_pc,
3185 exit_idx: idx,
3186 hits,
3187 cont_pc,
3188 exit_tags,
3189 });
3190 }
3191 }
3192 for inner in proto.protos.iter() {
3193 walk(*inner, out);
3194 }
3195 }
3196 let mut out: Vec<HotExitInfo> = Vec::new();
3197 walk(cl.proto, &mut out);
3198 out
3199 }
3200
3201 /// P12-S5-B — sum of NewTable sites that actually took the
3202 /// sunk-emit path across every successfully compiled trace on
3203 /// this Vm. Each counted site skips its heap `Gc<Table>`
3204 /// allocation per dispatch; the array part lives as Cranelift
3205 /// `Variable`s for the duration of the trace.
3206 pub fn trace_sunk_alloc_count(&self) -> u64 {
3207 self.jit.counters.sunk_alloc
3208 }
3209
3210 /// P12-S5-C — sum of materialise-helper emit sites across every
3211 /// successfully compiled trace on this Vm. Each unit is a
3212 /// (site × cmp side-exit) pair whose IR reconstructs a heap
3213 /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3214 /// emit is wiring materialise into the right side-exits.
3215 pub fn trace_materialize_emit_count(&self) -> u64 {
3216 self.jit.counters.materialize_emit
3217 }
3218
3219 /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3220 /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3221 /// replaces a `Heap::new_closure_inline` call on the dispatch
3222 /// path; the count is static (one per matching op per compiled
3223 /// trace), summed at compile success.
3224 pub fn trace_closure_emit_count(&self) -> u64 {
3225 self.jit.counters.closure_emit
3226 }
3227
3228 /// v2.0 Stage 7 polish 6 fire experiment — see
3229 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3230 /// Number of compiled traces whose `per_exit_inline.len() > 0`
3231 /// (depth>0 inlined cmp side-exits emitted).
3232 pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3233 self.jit.counters.per_exit_inline_compiled
3234 }
3235
3236 /// v2.0 Stage 7 polish 6 fire experiment — see
3237 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3238 /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3239 /// `dispatchable == true` — i.e. the count of compiled traces
3240 /// that would actually exercise the AOT polish 6 chain-reloc +
3241 /// deploy-resolver path.
3242 pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3243 self.jit.counters.per_exit_inline_dispatchable
3244 }
3245
3246 /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3247 /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3248 /// whether a self-recursive function actually walked the depth
3249 /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3250 /// across traces and Vm activations; reset only on `Vm::new`.
3251 pub fn trace_max_depth_seen(&self) -> u8 {
3252 self.jit.max_depth_seen
3253 }
3254
3255 /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3256 /// dispatch time). The frame-materialization helper reads `.base`
3257 /// to compute offsets for each inlined frame's window.
3258 #[doc(hidden)]
3259 pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3260 match self.frames.last() {
3261 Some(CallFrame::Lua(f)) => Some(*f),
3262 _ => None,
3263 }
3264 }
3265
3266 /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3267 /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3268 /// by `luna-tools` (`luna-profile`'s sampler walks this from
3269 /// inside a `Count` hook). Sibling-module scope: not part of the
3270 /// public embedder surface, but `inspect::frames_for_profile` is.
3271 #[doc(hidden)]
3272 pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3273 &self.frames
3274 }
3275
3276 /// P12-S4-step4b — ensure the value stack covers indices
3277 /// `[0..need)`. Extends with Nil if shorter. Called by the
3278 /// frame-materialization helper before pushing an inlined frame
3279 /// whose register window may exceed the current stack length.
3280 #[doc(hidden)]
3281 pub fn jit_ensure_stack(&mut self, need: usize) {
3282 if self.stack.len() < need {
3283 self.stack.resize(need, Value::Nil);
3284 }
3285 }
3286
3287 /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3288 /// `__close` handlers would run (any active tbc slot ≥ from
3289 /// holding a non-nil/false Value); if so, parks a deopt sentinel
3290 /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3291 /// IR branches to the deopt block. Otherwise performs the safe
3292 /// part of close — `close_from(from)` to close open upvals +
3293 /// drop any drained tbc entries ≥ from — and returns 0.
3294 ///
3295 /// Returns are i64-shaped so the cranelift import sig stays
3296 /// trivial (i64 → i64 mapping).
3297 #[doc(hidden)]
3298 pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3299 if self.jit.pending_err.is_some() {
3300 return 1;
3301 }
3302 let Some(f) = self.jit_last_lua_frame() else {
3303 self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3304 return 1;
3305 };
3306 let from = f.base + start_offset;
3307 let has_handler = self.tbc.iter().any(|&s| {
3308 s >= from && {
3309 let v = self.stack[s as usize];
3310 !matches!(v, Value::Nil | Value::Bool(false))
3311 }
3312 });
3313 if has_handler {
3314 self.jit.pending_err =
3315 Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3316 return 1;
3317 }
3318 self.close_from(from);
3319 // Drain any tbc entries ≥ from (they're nil/false stubs the
3320 // interpreter's drive_close would have skipped silently).
3321 while let Some(&s) = self.tbc.last() {
3322 if s < from {
3323 break;
3324 }
3325 self.tbc.pop();
3326 }
3327 0
3328 }
3329
3330 /// P12-S7-B — spill the trace's current value for a register to
3331 /// the underlying `vm.stack[base + slot_offset]`. Required before
3332 /// an `Op::Closure` whose inner proto has an `in_stack: true`
3333 /// upval at `slot_offset` — the helper's `find_or_create_upval`
3334 /// captures a live pointer to `vm.stack[base + slot_offset]`,
3335 /// which must hold the right value at call time (trace IR's
3336 /// Variable hasn't yet been written back).
3337 ///
3338 /// Parameters arrive as i64 from the IR: `slot_offset` is the
3339 /// caller-frame register index (`u32` in practice, depth=0
3340 /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3341 /// `crate::runtime::value::raw` byte for the slot's RegKind;
3342 /// `raw_bits` is the trace Variable's `use_var` payload
3343 /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3344 /// raw `Gc::as_ptr` cast).
3345 #[doc(hidden)]
3346 pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3347 let Some(f) = self.jit_last_lua_frame() else {
3348 self.jit.pending_err =
3349 Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3350 return;
3351 };
3352 let idx = (f.base as usize) + (slot_offset as usize);
3353 if self.stack.len() <= idx {
3354 self.stack.resize(idx + 1, Value::Nil);
3355 }
3356 // SAFETY: caller (trace JIT IR emit) provides matching
3357 // `(tag, raw_bits)` — same shape produced by Value::unpack.
3358 let v = unsafe {
3359 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3360 };
3361 self.stack[idx] = v;
3362 }
3363
3364 /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3365 /// Mirrors the interp arm (this file ~L5316): copies the
3366 /// generator/state/control triple from `R[A..=A+2]` to
3367 /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3368 /// the iterator function via `begin_call`. v2 only handles
3369 /// `Value::Native` iterators (the canonical `ipairs_iter` /
3370 /// `next` builtins) — a Lua-closure iterator would push a Lua
3371 /// frame mid-trace, breaking `recording_frame_base`, so we
3372 /// deopt by parking a `pending_err` and returning `-1`.
3373 ///
3374 /// `slot_offset` is the caller-frame register index (=
3375 /// `inst.a()` decoded from a u32-wide field). `nvars` is
3376 /// `inst.c() as i32` — the caller's expected return count.
3377 /// P12-S12-C v1 — refresh only the raw payload of
3378 /// `vm.stack[base + slot_offset]`, preserving its existing
3379 /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3380 /// uses this when the slot's `RegKind` is `Unset` (no compile-
3381 /// time tag info; commonly `Str` slots which the trace doesn't
3382 /// model). The interp's previous execution of the same op
3383 /// already populated the slot with the right tag — the trace
3384 /// only needs to swap in its current raw value.
3385 #[doc(hidden)]
3386 pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3387 let Some(f) = self.jit_last_lua_frame() else {
3388 return;
3389 };
3390 let idx = (f.base as usize) + (slot_offset as usize);
3391 if idx >= self.stack.len() {
3392 return;
3393 }
3394 let (tag, _) = self.stack[idx].unpack();
3395 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3396 self.stack[idx] = unsafe {
3397 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3398 };
3399 }
3400
3401 /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3402 ///
3403 /// Mirrors the interp arm (this file ~L5112): `self.top =
3404 /// base + a + n; concat_run(base + a)`. Result lands at
3405 /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3406 /// deopt (any error from `concat_run` OR detection that the
3407 /// metamethod path was taken — `concat_run` returns `Ok(())`
3408 /// after `begin_meta_call` which has pushed a Lua frame the
3409 /// trace can't safely continue past).
3410 ///
3411 /// The frame-push detection uses `pre/post frames.len()` and
3412 /// unwinds any pushed frames before deopting, so the
3413 /// dispatcher's existing deopt path sees a clean stack.
3414 #[doc(hidden)]
3415 pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3416 if self.jit.pending_err.is_some() {
3417 return -1;
3418 }
3419 let Some(f) = self.jit_last_lua_frame() else {
3420 self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3421 return -1;
3422 };
3423 let abs_a = f.base + slot_offset;
3424 self.top = abs_a + n as u32;
3425 let pre_frames = self.frames.len();
3426 let result = self.concat_run(abs_a);
3427 let post_frames = self.frames.len();
3428 // Frame-push = metamethod path taken (begin_meta_call pushed
3429 // a Lua frame). The trace can't continue past it; unwind +
3430 // deopt so interp redoes Op::Concat in the slow path.
3431 while self.frames.len() > pre_frames {
3432 frames_pop_sync(&mut self.frames, &mut self.frames_top);
3433 }
3434 if let Err(e) = result {
3435 self.jit.pending_err = Some(e);
3436 return -1;
3437 }
3438 if post_frames > pre_frames {
3439 self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3440 return -1;
3441 }
3442 0
3443 }
3444
3445 /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3446 /// accumulator buffer pool, returning a raw pointer. The trace
3447 /// fn's IR holds this pointer in a stack slot through the loop
3448 /// and calls `jit_str_buf_extend` per iter. If the pool is
3449 /// empty, allocate fresh.
3450 ///
3451 /// Safety: the returned pointer is valid until
3452 /// `jit_str_buf_release` is called or the Vm is dropped. The
3453 /// caller MUST not retain it across `enter_jit` boundaries.
3454 #[doc(hidden)]
3455 pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3456 let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3457 // Move into a Box so the pointer is stable until release.
3458 Box::into_raw(Box::new(buf))
3459 }
3460
3461 /// P14-S14-B v2 — return a previously-acquired buffer to the
3462 /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3463 /// buffer is `clear`ed (capacity retained) so the next acquire
3464 /// gets a ready-to-extend Vec.
3465 ///
3466 /// Safety: `buf` must have been returned by a prior
3467 /// `jit_str_buf_acquire` on the same Vm.
3468 #[doc(hidden)]
3469 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3470 pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3471 if buf.is_null() {
3472 return;
3473 }
3474 // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3475 let mut owned = unsafe { Box::from_raw(buf) };
3476 owned.clear();
3477 if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3478 self.jit.str_buf_pool.push(*owned);
3479 }
3480 // Else: drop the buffer.
3481 }
3482
3483 /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3484 /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3485 /// the piece slot) and passes it through; we treat it as a
3486 /// `*mut LuaStr` and append its bytes.
3487 ///
3488 /// Returns 0 on success, -1 if the piece isn't a Str (would
3489 /// trip __concat metamethod path → deopt to interp).
3490 ///
3491 /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3492 /// trace's piece slot raw bits.
3493 #[doc(hidden)]
3494 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3495 pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3496 if buf.is_null() || str_ptr == 0 {
3497 return -1;
3498 }
3499 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3500 let buf = unsafe { &mut *buf };
3501 let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3502 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3503 let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3504 buf.extend_from_slice(bytes);
3505 0
3506 }
3507
3508 /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3509 /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3510 /// the trace to write into the accumulator slot.
3511 ///
3512 /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3513 /// (the v2 hard cap; the trace deopts).
3514 ///
3515 /// Safety: `buf` from prior `acquire`. The buffer is left
3516 /// CLEAR (drained) ready for `release`.
3517 #[doc(hidden)]
3518 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3519 pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3520 if buf.is_null() {
3521 return 0;
3522 }
3523 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3524 let buf = unsafe { &mut *buf };
3525 let bytes = std::mem::take(buf);
3526 // v2 hard cap at 256KB per RFC Q3.
3527 if bytes.len() > 256 * 1024 {
3528 return 0;
3529 }
3530 let gc = self.heap.intern(&bytes);
3531 gc.as_ptr() as i64
3532 }
3533
3534 /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3535 ///
3536 /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3537 /// v3: ipairs `inext` fast path at the top — skip begin_call
3538 /// when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3539 /// R[A+2]=Int.
3540 /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3541 /// caller-provided buffers + return R[A+4]'s tag byte. Lets
3542 /// emit skip 3 separate `luna_jit_stack_load` calls and 1
3543 /// `luna_jit_stack_tag` call by reading the buffer via
3544 /// cranelift `stack_load` IR instead. Returns -1 on deopt.
3545 #[doc(hidden)]
3546 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3547 pub fn jit_op_tforcall(
3548 &mut self,
3549 slot_offset: u32,
3550 nvars: i32,
3551 ctrl_out: *mut i64,
3552 key_out: *mut i64,
3553 val_out: *mut i64,
3554 ) -> i64 {
3555 if self.jit.pending_err.is_some() {
3556 return -1;
3557 }
3558 let Some(f) = self.jit_last_lua_frame() else {
3559 self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3560 return -1;
3561 };
3562 let abs = f.base + slot_offset;
3563 let need = (abs + 7) as usize;
3564 if self.stack.len() < need {
3565 self.stack.resize(need, Value::Nil);
3566 }
3567 // v3 fast path.
3568 let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3569 && std::ptr::fn_addr_eq(
3570 n.f,
3571 crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3572 )
3573 && let Value::Table(t) = self.stack[(abs + 1) as usize]
3574 && t.metatable().is_none()
3575 && let Value::Int(i) = self.stack[(abs + 2) as usize]
3576 {
3577 let next_i = i.wrapping_add(1);
3578 let v = t.get_int(next_i);
3579 if v.is_nil() {
3580 self.stack[(abs + 4) as usize] = Value::Nil;
3581 } else {
3582 self.stack[(abs + 4) as usize] = Value::Int(next_i);
3583 if (nvars as usize) >= 2 {
3584 self.stack[(abs + 5) as usize] = v;
3585 }
3586 for j in 2..nvars as usize {
3587 let slot = abs + 4 + j as u32;
3588 if (slot as usize) < self.stack.len() {
3589 self.stack[slot as usize] = Value::Nil;
3590 }
3591 }
3592 }
3593 true
3594 } else {
3595 false
3596 };
3597 if !took_fast_path {
3598 // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3599 // route through begin_call. Lua-closure iters would push
3600 // a Lua frame mid-trace → deopt.
3601 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3602 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3603 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3604 if !matches!(self.stack[abs as usize], Value::Native(_)) {
3605 self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3606 return -1;
3607 }
3608 if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3609 self.jit.pending_err = Some(e);
3610 return -1;
3611 }
3612 }
3613 // v4 batched writeback — fill the caller's buffers with the
3614 // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3615 // reload via cranelift `stack_load` instead of separate
3616 // `luna_jit_stack_load` helper calls.
3617 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3618 let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3619 let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3620 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3621 let key_raw = unsafe { key_rv.zero };
3622 let val_raw = if (nvars as usize) >= 2 {
3623 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3624 unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3625 } else {
3626 0u64
3627 };
3628 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3629 unsafe {
3630 ctrl_out.write(ctrl_raw as i64);
3631 key_out.write(key_raw as i64);
3632 val_out.write(val_raw as i64);
3633 }
3634 key_tag as i64
3635 }
3636
3637 /// P12-S12-B-v2 — load the raw `i64` payload of
3638 /// `vm.stack[base + slot_offset]` for the active trace's head
3639 /// Lua frame. Used to reload trace IR `Variable`s after a
3640 /// helper has written to `vm.stack` directly (e.g. TForCall's
3641 /// iter results land at `R[A+4..A+4+nvars]`).
3642 #[doc(hidden)]
3643 pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3644 let Some(f) = self.jit_last_lua_frame() else {
3645 return 0;
3646 };
3647 let idx = (f.base as usize) + (slot_offset as usize);
3648 if idx >= self.stack.len() {
3649 return 0;
3650 }
3651 let v = self.stack[idx];
3652 let (_, raw) = v.unpack();
3653 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3654 unsafe { raw.zero as i64 }
3655 }
3656
3657 /// P12-S12-B-v2 — read the tag byte of
3658 /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3659 /// to dispatch on the iterator's return-key tag at runtime
3660 /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3661 /// deopt for v2).
3662 #[doc(hidden)]
3663 pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3664 let Some(f) = self.jit_last_lua_frame() else {
3665 return crate::runtime::value::raw::NIL;
3666 };
3667 let idx = (f.base as usize) + (slot_offset as usize);
3668 if idx >= self.stack.len() {
3669 return crate::runtime::value::raw::NIL;
3670 }
3671 self.stack[idx].unpack().0
3672 }
3673
3674 /// P12-S4-step4b — push a Lua frame onto the call stack with
3675 /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3676 /// at trace side-exits to recreate the inlined call activations
3677 /// the lowerer compiled past. The contract (enforced by the
3678 /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3679 /// `nresults` is the caller's expected count (today always 1
3680 /// because the lowerer bails Op::Call C != 2), and the caller
3681 /// has already called `jit_ensure_stack` to cover
3682 /// `[0..base + cl.proto.max_stack)`.
3683 #[doc(hidden)]
3684 pub fn jit_push_inlined_frame(
3685 &mut self,
3686 cl: Gc<LuaClosure>,
3687 base: u32,
3688 pc: u32,
3689 nresults: i32,
3690 ) {
3691 frames_push_sync(
3692 &mut self.frames,
3693 &mut self.frames_top,
3694 CallFrame::Lua(Frame {
3695 closure: cl,
3696 base,
3697 pc,
3698 // Lua call ABI: callee R[0] sits at caller R[A+1], so
3699 // callee.base = caller.base + A + 1; func_slot is
3700 // caller.base + A = callee.base - 1.
3701 func_slot: base - 1,
3702 n_varargs: 0,
3703 nresults,
3704 hook_oldpc: u32::MAX,
3705 from_c: false,
3706 tm: None,
3707 is_hook: false,
3708 tailcalls: 0,
3709 }),
3710 );
3711 }
3712
3713 /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3714 /// should set to `false` so `load`/`loadstring` reject bytecode input
3715 /// (which bypasses parser limits and could exploit verifier gaps).
3716 pub fn set_bytecode_loading(&mut self, enabled: bool) {
3717 self.bytecode_loading = enabled;
3718 }
3719
3720 /// Current bytecode-loading gate state.
3721 pub fn bytecode_loading(&self) -> bool {
3722 self.bytecode_loading
3723 }
3724
3725 /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3726 /// bytecode is a strictly larger trust surface than luna's own dump
3727 /// format (third-party toolchain bugs, malformed chunks, unknown
3728 /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3729 /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3730 pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3731 self.puc_bytecode_loading = enabled;
3732 }
3733
3734 /// Current PUC bytecode-loading gate state.
3735 pub fn puc_bytecode_loading(&self) -> bool {
3736 self.puc_bytecode_loading
3737 }
3738
3739 /// Default loader input budget — 256 MiB.
3740 ///
3741 /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3742 /// sources whose byte length crosses this cap, returning the
3743 /// PUC-shaped `not enough memory` error rather than letting the
3744 /// host allocator try (and crash) to hold the next chunk.
3745 pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3746
3747 /// Set the loader input byte budget (see
3748 /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3749 /// effectively disable. Smaller caps are honored verbatim — a 0
3750 /// cap rejects every non-empty source.
3751 pub fn set_loader_input_budget(&mut self, bytes: usize) {
3752 self.loader_input_budget = bytes;
3753 }
3754
3755 /// Current loader input byte budget.
3756 pub fn loader_input_budget(&self) -> usize {
3757 self.loader_input_budget
3758 }
3759
3760 /// Take the error traceback captured at the latest error point and
3761 /// reset it. Embedders should call this immediately after a failed
3762 /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3763 /// entry clears it. Returns `None` if no error was in flight.
3764 pub fn take_error_traceback(&mut self) -> Option<String> {
3765 self.error_traceback
3766 .take()
3767 .map(|b| String::from_utf8_lossy(&b).into_owned())
3768 }
3769
3770 /// Arm the soft memory cap (P09 embedding). The run loop checks the
3771 /// heap's tracked byte usage between dispatch turns; on overshoot it
3772 /// first runs a full collect, and if `bytes` still exceeds the cap it
3773 /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3774 /// itself (fire-once: re-arm before the next `call_value` if reusing
3775 /// the Vm across requests). `None` removes the cap. The accounting is
3776 /// approximate — internal Vec/Box capacity overhead is not tracked,
3777 /// so embedders should size the cap with ~2× margin over the desired
3778 /// hard limit and additionally bound the Vm's lifetime (drop after
3779 /// each request).
3780 pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3781 self.heap.mem_cap = cap;
3782 }
3783
3784 /// Approximate bytes the heap is currently holding. Object shells plus
3785 /// every table's internal array/hash boxes (tracked via
3786 /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3787 /// bytecode and closure upvalue slices still go uncounted — this is a
3788 /// lower bound, not a precise `malloc_stats`-style total.
3789 pub fn memory_used(&self) -> usize {
3790 self.heap.bytes()
3791 }
3792
3793 /// Read upvalue slot `i` of the native function currently on top of the
3794 /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3795 /// when no native is running. Public so the C ABI trampoline can fetch
3796 /// the host C function pointer it stashed there at registration time.
3797 pub fn running_native_upvalue(&self, i: usize) -> Value {
3798 match self.running_natives.last() {
3799 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3800 Some(nc) => unsafe {
3801 let upvals = &(*nc.as_ptr()).upvals;
3802 upvals.get(i).copied().unwrap_or(Value::Nil)
3803 },
3804 None => Value::Nil,
3805 }
3806 }
3807
3808 /// Register a table for finalization if its (just-set) metatable carries a
3809 /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3810 /// `__gc` to the metatable afterwards does not retroactively register).
3811 pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3812 if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3813 self.heap.register_finalizable(t);
3814 }
3815 }
3816
3817 /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3818 /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3819 /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3820 /// metatable that `newproxy` returned, which then needs to flow through.
3821 /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3822 /// would re-check at metatable-set time); luna's only userdata
3823 /// finalizables today come via `newproxy`, which registers itself.
3824 #[allow(dead_code)]
3825 pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3826 if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3827 self.heap.register_finalizable_userdata(u);
3828 }
3829 }
3830
3831 /// Run pending `__gc` finalizers (objects the collector resurrected for
3832 /// finalization). Finalizer errors are swallowed — PUC turns them into a
3833 /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3834 fn run_finalizers(&mut self) {
3835 let _ = self.run_finalizers_or_err();
3836 }
3837
3838 fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3839 if self.gc_finalizing {
3840 return Ok(());
3841 }
3842 let pending = self.heap.take_tobefnz();
3843 if pending.is_empty() {
3844 return Ok(());
3845 }
3846 self.gc_finalizing = true;
3847 let mut first_err: Option<LuaError> = None;
3848 for obj in pending {
3849 let gc = self.get_mm(obj, Mm::Gc);
3850 // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3851 // schedule the object for finalization (`__gc = true` is the
3852 // canonical placeholder); only call it at finalize time when it
3853 // is actually a function. gc.lua 5.2 :412 wires up exactly this
3854 // sentinel and then expects no call.
3855 let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3856 if callable {
3857 // PUC `GCTM` sets `CIST_FIN` on the new ci so
3858 // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3859 // `name = "__gc"`. luna threads the same outcome through the
3860 // generic `pending_tm` slot: the Lua frame born from this
3861 // call consumes it in `push_frame`. Saved/restored around the
3862 // call in case the handler is a native (which never pops it).
3863 // Bare event name; `frame_name` / `c_frame_name` add the
3864 // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3865 // the convention used by `__close`, `__index`, …
3866 let saved_tm = self.pending_tm.replace("gc");
3867 // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3868 // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3869 // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3870 // luna mirrors by temporarily tagging the current top Lua
3871 // frame's `tm` to "__gc" for the duration of the call.
3872 let caller_tm_idx = self
3873 .frames
3874 .iter()
3875 .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3876 let saved_caller_tm = caller_tm_idx.and_then(|i| {
3877 if let CallFrame::Lua(fr) = &mut self.frames[i] {
3878 let prev = fr.tm;
3879 fr.tm = Some("gc");
3880 Some(prev)
3881 } else {
3882 None
3883 }
3884 });
3885 if let Err(e) = self.call_value(gc, &[obj]) {
3886 // PUC 5.1 GCTM raised the finalizer's error to the
3887 // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3888 // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3889 // wrapped it in `error in __gc metamethod (msg)` first
3890 // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3891 // introduced the warning system and switched to "warn
3892 // then continue" — never re-raise, just route the
3893 // wrapped message through `warn`. gc.lua 5.5 :378 wires
3894 // up `_WARN` capture under the `if T then …` block to
3895 // baseline on the same wrapped string.
3896 if self.version >= LuaVersion::Lua54 {
3897 let inner = self.error_text(&e);
3898 let msg = format!("error in __gc metamethod ({inner})");
3899 self.emit_warn(msg.as_bytes(), false);
3900 } else if first_err.is_none() {
3901 let wrapped = if self.version >= LuaVersion::Lua52 {
3902 let inner = self.error_text(&e);
3903 let msg = format!("error in __gc metamethod ({inner})");
3904 let s = Value::Str(self.heap.intern(msg.as_bytes()));
3905 LuaError(s)
3906 } else {
3907 e
3908 };
3909 first_err = Some(wrapped);
3910 }
3911 }
3912 self.pending_tm = saved_tm;
3913 if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3914 && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3915 {
3916 fr.tm = prev; // prev is Option<&'static str>; restore exactly
3917 }
3918 }
3919 }
3920 self.gc_finalizing = false;
3921 match first_err {
3922 Some(e) => Err(e),
3923 None => Ok(()),
3924 }
3925 }
3926
3927 /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3928 /// Crosses up to three phases per call:
3929 /// 1. Pause → seed Propagate (`gc_start_propagate`)
3930 /// 2. Propagate → drain gray up to `budget`; on exhaustion run atomic
3931 /// (`gc_finish_atomic` → tobefnz populated; finalizers
3932 /// run via `run_finalizers`) and enter Sweep
3933 /// 3. Sweep → `gc_sweep_step` up to (residual) `budget`
3934 /// Returns true when this call completed the cycle's sweep (back to
3935 /// Pause). The budget is spent generously across phases — a large `n`
3936 /// can finish a whole cycle in one call (PUC stop-the-world step).
3937 pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3938 // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3939 // that may hit a safe point and try to step again. Re-entry was OK
3940 // under STW (collect_garbage had its own guard) but here the
3941 // intermediate phase state would corrupt.
3942 if self.gc_finalizing {
3943 return false;
3944 }
3945 if self.heap.gc_phase_is_pause() {
3946 let (roots, extra) = self.gc_roots();
3947 self.heap.gc_start_propagate(&roots, &extra);
3948 }
3949 if self.heap.gc_phase_is_propagate() {
3950 if !self.heap.gc_step_propagate(budget) {
3951 return false;
3952 }
3953 self.heap.gc_finish_atomic();
3954 // any __gc scheduled by atomic — run before sweep so a finalizer
3955 // re-registering `self` re-enters the next cycle, not this sweep
3956 self.run_finalizers();
3957 }
3958 // either we just transitioned, or we entered already in Sweep, or
3959 // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3960 self.heap.gc_sweep_step(budget)
3961 }
3962
3963 // ---- frames & calls ----
3964
3965 /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3966 /// Returns true if a Lua frame was pushed (the dispatch loop continues
3967 /// there), false if a native completed inline.
3968 fn begin_call(
3969 &mut self,
3970 func_slot: u32,
3971 nargs: Option<u32>,
3972 nresults: i32,
3973 from_c: bool,
3974 ) -> Result<bool, LuaError> {
3975 let mut nargs = match nargs {
3976 Some(n) => n,
3977 None => self.top - (func_slot + 1),
3978 };
3979 // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3980 // only for the immediately-following Lua activation. Native dispatch
3981 // (or `__call` resolution) below must not let it leak to the next
3982 // begin_call's frame; restore it just before push_frame for the Lua
3983 // arm so its meaning is preserved across __call chaining.
3984 let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3985 // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3986 // is inserted before the value so it becomes the first argument, and a
3987 // chain of `__call` tables resolves down to a real function.
3988 let mut chain = 0u32;
3989 loop {
3990 match self.stack[func_slot as usize] {
3991 Value::Closure(cl) => {
3992 // P11-S2c.B JIT fast path: if the Proto's body fits
3993 // the int-arith whitelist, every arg is `Value::Int`,
3994 // and the cached arity matches, skip frame setup and
3995 // run the cached native fn in-place.
3996 if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
3997 self.pending_tailcalls = tailcalls;
3998 return Ok(false);
3999 }
4000 self.pending_tailcalls = tailcalls;
4001 self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4002 // P12-S4-step0 — trace-on-call trigger. The frame
4003 // we just pushed is the callee whose body the
4004 // recorder will trace. Bump the per-Proto call
4005 // counter; once it crosses `CALL_HOT_THRESHOLD`
4006 // and no other trace is in flight, snapshot the
4007 // callee's register window (R[0..max_stack]) and
4008 // begin recording at `pc=0`. This is what unlocks
4009 // tracing for functions whose body has no negative
4010 // `Op::Jmp` back-edge (`fib`, recursive helpers).
4011 //
4012 // Gated on `trace_jit_enabled`, so the default
4013 // dispatch pays a single not-taken branch.
4014 if self.jit.trace_enabled {
4015 let proto = cl.proto;
4016 let c = proto.call_hot_count.get();
4017 if c < u32::MAX / 2 {
4018 proto.call_hot_count.set(c + 1);
4019 }
4020 // P13-S13-H — relaxed call-trigger:
4021 // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4022 // `!already_cached` short-circuit. Lets a
4023 // discarded short call-trigger close retry
4024 // on the next call (fib(10/15/20/25)
4025 // pathology — first capture is base-case
4026 // [Lt,Jmp,Return1]; coverage-heuristic
4027 // discards; next call gets to record at a
4028 // potentially deeper recursion point).
4029 // Without `already_cached`, the relaxed
4030 // condition would re-record over a cached
4031 // trace every call.
4032 //
4033 // P13-S13-K — additionally short-circuit on
4034 // `proto.trace_gave_up`. The S13-I discard
4035 // cap force-compiles a partial trace and
4036 // flips this flag; subsequent calls into
4037 // this Proto skip the RefCell borrow + Vec
4038 // scan entirely.
4039 if proto.trace_gave_up.get() {
4040 return Ok(true);
4041 }
4042 let call_already_cached =
4043 proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4044 if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4045 && self.jit.active_trace.is_none()
4046 && !call_already_cached
4047 {
4048 // The new frame is on top: index in
4049 // `self.frames` is `len() - 1`.
4050 let frame_idx = self.frames.len() - 1;
4051 // Snapshot R[0..max_stack] at the callee's
4052 // base. `push_frame` resized `self.stack`
4053 // to `base + max_stack`, so this window is
4054 // guaranteed in-bounds.
4055 let f = match &self.frames[frame_idx] {
4056 CallFrame::Lua(f) => f,
4057 _ => unreachable!("push_frame just pushed a Lua frame"),
4058 };
4059 let max_stack = cl.proto.max_stack as usize;
4060 let base_us = f.base as usize;
4061 let mut entry_tags = Vec::with_capacity(max_stack);
4062 for i in 0..max_stack {
4063 let (tag, _) = self.stack[base_us + i].unpack();
4064 entry_tags.push(tag);
4065 }
4066 self.jit.active_trace =
4067 Some(Box::new(crate::jit::trace::TraceRecord::start(
4068 cl.proto, 0, entry_tags, true,
4069 )));
4070 self.jit.recording_frame_base = frame_idx;
4071 }
4072 }
4073 return Ok(true);
4074 }
4075 Value::Native(nc) => {
4076 // v1.1 B10 Stage 2 — async-marked NativeClosure.
4077 // Route through the cooperative-yield mechanism
4078 // when async_mode is on; reject when called from
4079 // a sync `eval`/`call_value` path (would have no
4080 // executor to drive the returned future).
4081 if nc.is_async {
4082 if !self.async_mode {
4083 let s = Value::Str(
4084 self.heap.intern(b"async native called in sync context"),
4085 );
4086 self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4087 return Err(LuaError(s));
4088 }
4089 // Same root-up bookkeeping as the sync path:
4090 // pin args + result-count expectation so a
4091 // collection across the suspend boundary
4092 // keeps the arg window live.
4093 self.native_nresults = nresults;
4094 self.gc_top = func_slot + nargs + 1;
4095 // v1.3 Phase AS — fire the "call" hook BEFORE
4096 // building the future. Mirrors the sync native
4097 // path's `hook_call(true, nargs)` site
4098 // (`exec.rs` further down) so embedders with a
4099 // Rust debug hook installed see a Call event
4100 // for async natives identical to the sync
4101 // path. The matching "return" hook fires from
4102 // `commit_async_native_result` in
4103 // `async_drive.rs` after the future resolves.
4104 // Placement follows audit §"Open questions"
4105 // Q6: after the `native_nresults` / `gc_top`
4106 // pin, before the future is constructed, so a
4107 // hook body that triggers GC observes the
4108 // correct pinned window. On hook error the
4109 // sentinel never returns and
4110 // `pending_async_native_*` remain `None` —
4111 // the executor sees `DispatchOutcome::Error`
4112 // (audit §A.1 edge cases).
4113 self.hook_call(true, nargs)?;
4114 // Transmute the stored NativeFn back to its
4115 // real AsyncNativeFn shape. Sound because
4116 // `set_async_native` / `create_async_native`
4117 // installed an AsyncNativeFn through the
4118 // identically-sized fn-pointer slot, and the
4119 // `is_async` marker bit is what records that
4120 // fact.
4121 let async_fn: crate::vm::async_drive::AsyncNativeFn =
4122 // SAFETY: same-size fn pointers; provenance
4123 // preserved through `mem::transmute`. The
4124 // `is_async` marker is the only safe-to-call
4125 // gate, set exclusively by
4126 // `Vm::create_async_native`.
4127 unsafe { std::mem::transmute(nc.f) };
4128 let vm_ptr: *mut Vm = self;
4129 let fut = async_fn(vm_ptr, func_slot, nargs);
4130 // Stash the future + post-call context for
4131 // `drive_one` to surface to `EvalFuture::poll`.
4132 self.pending_async_native_fut = Some(fut);
4133 self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4134 func_slot,
4135 nargs,
4136 nresults,
4137 gc_top: self.gc_top,
4138 });
4139 // Sentinel Err walked up to `drive_one` (same
4140 // shape as `host_yield_pending`'s budget yield).
4141 // Value::Nil — never seen by user code.
4142 return Err(LuaError(Value::Nil));
4143 }
4144 // pcall/xpcall are yieldable: rather than calling the
4145 // protected function through the Rust stack (which cannot be
4146 // suspended), push a continuation frame and drive the call
4147 // through the interpreter loop (PUC lua_pcallk). A yield
4148 // inside it is preserved with the thread's saved frames.
4149 use crate::runtime::value::NativeFn;
4150 if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4151 return self.begin_pcall(func_slot, nargs, nresults);
4152 }
4153 if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4154 return self.begin_xpcall(func_slot, nargs, nresults);
4155 }
4156 // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4157 // luaB_pairs); without one, fall through to the plain native.
4158 if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4159 let arg = self.stack[(func_slot + 1) as usize];
4160 if !self.get_mm(arg, Mm::Pairs).is_nil() {
4161 return self.begin_pairs(func_slot, nresults);
4162 }
4163 }
4164 // a native that collects (e.g. `collectgarbage`) roots up to
4165 // its own arguments — the caller's live registers all sit
4166 // below `func_slot` and stay rooted.
4167 self.native_nresults = nresults;
4168 self.gc_top = func_slot + nargs + 1;
4169 // Push the native onto the running-natives chain BEFORE
4170 // firing the call hook so that `debug.getinfo(level)` and
4171 // `arg_error` from inside the hook see this native as the
4172 // currently-running C function (db.lua :344 reads
4173 // `getinfo(2, "f").func` for the just-entered callee).
4174 // Popped after the matching return hook fires — even on
4175 // error, the pop must happen, so the body is bracketed
4176 // through a scope guard.
4177 self.running_natives.push(nc);
4178 self.running_native_slots.push((func_slot, nargs));
4179 // PUC luaD_precall fires the "call" hook for C functions too.
4180 // A yield inside the native (coroutine.yield) propagates an
4181 // Err and the matching "return" hook fires on resume instead.
4182 if let Err(e) = self.hook_call(true, nargs) {
4183 self.running_natives.pop();
4184 self.running_native_slots.pop();
4185 return Err(e);
4186 }
4187 // P09: trap a Rust panic in the native and surface it as
4188 // a Lua error rather than letting it unwind through the
4189 // VM into the embedder. The VM's internal state may still
4190 // be inconsistent after a panic (half-pushed args,
4191 // dangling GC references), so embedders that catch this
4192 // class of error should drop and re-create the Vm — but
4193 // it's still better than tearing the host process down.
4194 // `AssertUnwindSafe` is sound because the caller is the
4195 // dispatch loop and any half-done state is fenced behind
4196 // the immediate Err return below.
4197 use std::panic::{AssertUnwindSafe, catch_unwind};
4198 let result =
4199 match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4200 Ok(r) => r,
4201 Err(payload) => {
4202 let msg = panic_payload_str(&payload);
4203 let s = Value::Str(
4204 self.heap.intern(format!("native panic: {msg}").as_bytes()),
4205 );
4206 Err(LuaError(s))
4207 }
4208 };
4209 let nret = match result {
4210 Ok(n) => n,
4211 Err(e) => {
4212 // Stash the offending native's name BEFORE the
4213 // pop so a dying coroutine's traceback snapshot
4214 // can prepend `[C]: in function '<name>'`. Use
4215 // pushglobalfuncname (PUC walks package.loaded
4216 // to qualify); fall back to "?".
4217 self.errored_native =
4218 Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4219 self.running_natives.pop();
4220 self.running_native_slots.pop();
4221 return Err(e);
4222 }
4223 };
4224 // PUC `luaD_poscall` fires the return hook BEFORE moving
4225 // results into the function's slot — at that point args
4226 // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4227 // results above them at `[func_slot + 1 + nargs, …)`.
4228 // luna's `nat_return` has already written the results
4229 // into `[func_slot, func_slot + nret)`, so we replay PUC's
4230 // layout by copying the results up past the preserved
4231 // args, firing the hook (with ftransfer = nargs + 1, so
4232 // `getlocal(2, ftransfer..)` reads results), and then
4233 // copying back for `finish_results`. db.lua :541 reads
4234 // `getinfo("r").ftransfer` + `getlocal` to inspect a
4235 // returning native's results this way.
4236 if self.hook.ret
4237 && !self.in_hook
4238 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4239 {
4240 let res_dst = func_slot + nargs + 1;
4241 let need = (res_dst + nret) as usize;
4242 if self.stack.len() < need {
4243 self.stack.resize(need, Value::Nil);
4244 }
4245 for i in (0..nret).rev() {
4246 self.stack[(res_dst + i) as usize] =
4247 self.stack[(func_slot + i) as usize];
4248 }
4249 // widen the C-frame's argument window for getlocal
4250 if let Some(slot) = self.running_native_slots.last_mut() {
4251 slot.1 = nargs + nret;
4252 }
4253 let hr = self.hook_return(true, nargs + 1, nret);
4254 if let Some(slot) = self.running_native_slots.last_mut() {
4255 slot.1 = nargs;
4256 }
4257 // restore results into the slot finish_results expects
4258 for i in 0..nret {
4259 self.stack[(func_slot + i) as usize] =
4260 self.stack[(res_dst + i) as usize];
4261 }
4262 self.running_natives.pop();
4263 self.running_native_slots.pop();
4264 hr?;
4265 } else {
4266 self.running_natives.pop();
4267 self.running_native_slots.pop();
4268 }
4269 self.finish_results(func_slot, nret, nresults);
4270 // the native may have allocated; collect with the results as
4271 // the live boundary (PUC checks GC after a call returns).
4272 self.maybe_collect_garbage(self.top);
4273 return Ok(false);
4274 }
4275 v => {
4276 let mm = self.get_mm(v, Mm::Call);
4277 if mm.is_nil() {
4278 return Err(self.call_err(v));
4279 }
4280 chain += 1;
4281 // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4282 // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4283 // and the 5.5 test exercises the new tight bound directly
4284 // (calls.lua :225 builds a 16-deep chain and expects the
4285 // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4286 // deep chain and expects it to succeed.
4287 let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4288 15
4289 } else {
4290 MAX_CCMT
4291 };
4292 if chain > cap {
4293 return Err(self.rt_err("'__call' chain too long"));
4294 }
4295 // slots above shift by one; at a call site those are dead
4296 // temps of the current frame
4297 self.stack.insert(func_slot as usize, mm);
4298 if self.top > func_slot {
4299 self.top += 1;
4300 }
4301 nargs += 1;
4302 }
4303 }
4304 }
4305 }
4306
4307 fn push_frame(
4308 &mut self,
4309 cl: Gc<LuaClosure>,
4310 func_slot: u32,
4311 nargs: u32,
4312 nresults: i32,
4313 from_c: bool,
4314 ) -> Result<(), LuaError> {
4315 if func_slot + 256 > MAX_LUA_STACK {
4316 // PUC `stackerror`: a stack overflow that surfaces while the
4317 // current activation is inside an xpcall message handler is
4318 // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4319 // error handling". errors.lua :606 expects the inner pcall(loop)
4320 // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4321 // message matching "error handling".
4322 let msg = if self.msgh_depth > 0 {
4323 "error in error handling"
4324 } else {
4325 "stack overflow"
4326 };
4327 return Err(self.rt_err(msg));
4328 }
4329 let proto = cl.proto;
4330 let nparams = proto.num_params as u32;
4331 // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4332 // stack just below the new `base`, so a named vararg can be indexed
4333 // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4334 // `[e1..em][p1..pn]` so the fixed params land at the new base.
4335 let n_varargs = if proto.is_vararg {
4336 nargs.saturating_sub(nparams)
4337 } else {
4338 0
4339 };
4340 if n_varargs > 0 {
4341 let s = (func_slot + 1) as usize;
4342 self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4343 }
4344 let base = func_slot + 1 + n_varargs;
4345 let need = (base + proto.max_stack as u32) as usize;
4346 if self.stack.len() < need {
4347 self.stack.resize(need, Value::Nil);
4348 }
4349 // wipe the register window beyond the kept parameters (stale values —
4350 // required for GC-safety and codegen). The varargs below `base` survive.
4351 let kept = nargs.saturating_sub(n_varargs).min(nparams);
4352 // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4353 // need` since `base + nparams <= base + max_stack = need` and `kept <=
4354 // nparams`. `slice::fill` lowers to a single memset on Copy types.
4355 unsafe {
4356 self.stack
4357 .get_unchecked_mut((base + kept) as usize..need)
4358 .fill(Value::Nil);
4359 }
4360 frames_push_sync(
4361 &mut self.frames,
4362 &mut self.frames_top,
4363 CallFrame::Lua(Frame {
4364 closure: cl,
4365 base,
4366 pc: 0,
4367 func_slot,
4368 nresults,
4369 hook_oldpc: u32::MAX,
4370 from_c,
4371 n_varargs,
4372 // single-shot consume: `close_slots` sets pending_tm before each
4373 // handler call; the next Lua frame born is that handler's.
4374 tm: self.pending_tm.take(),
4375 // `run_hook` sets `pending_is_hook` before dispatching the user
4376 // hook so its frame reports `namewhat = "hook"` via getinfo.
4377 is_hook: std::mem::take(&mut self.pending_is_hook),
4378 tailcalls: std::mem::take(&mut self.pending_tailcalls),
4379 }),
4380 );
4381 // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4382 // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4383 // the slot at `base + nparams`; the extras sit just below `base` from
4384 // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4385 // hook; vararg.lua's contradictory expectations were already going to
4386 // fail either way (some asserts want `arg == nil`).
4387 if proto.has_compat_vararg_arg {
4388 let arg_slot = (base + nparams) as usize;
4389 let t = self.heap.new_table();
4390 {
4391 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4392 let tm = unsafe { t.as_mut() };
4393 for i in 0..n_varargs {
4394 let v = self.stack[(base - n_varargs + i) as usize];
4395 // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4396 // below `MAX_ASIZE`
4397 let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4398 }
4399 let nk = Value::Str(self.heap.intern(b"n"));
4400 tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4401 .expect("'n' key");
4402 }
4403 // once-per-table barrier mirrors SETLIST: t is born BLACK during
4404 // Propagate and the bulk `set_int`/`set` calls above don't barrier
4405 self.heap
4406 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4407 self.stack[arg_slot] = Value::Table(t);
4408 }
4409 // PUC luaD_precall fires the "call" hook with the new frame current, so
4410 // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4411 // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4412 // fixed params count — extras already live below `base`).
4413 // A frame born via OP_TailCall fires "tail call" instead (PUC
4414 // luaD_pretailcall) and skips the matching "return" hook on exit.
4415 let is_tail = self
4416 .frames
4417 .last()
4418 .and_then(|f| f.lua())
4419 .is_some_and(|f| f.tailcalls > 0);
4420 self.hook_call_with(false, nparams, is_tail)?;
4421 Ok(())
4422 }
4423
4424 /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4425 /// the protected call `f` through the interpreter loop. The protected
4426 /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4427 /// at `func_slot+1` lets its results land one slot above the continuation —
4428 /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4429 /// Always returns `Ok(true)`: a continuation is now on the stack to be
4430 /// resolved by the loop (even when `f` is a native that already ran inline).
4431 fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4432 if nargs == 0 {
4433 return Err(crate::vm::builtins::raise_str(
4434 self,
4435 "bad argument #1 to 'pcall' (value expected)",
4436 ));
4437 }
4438 if self.pcall_depth >= MAX_C_DEPTH {
4439 return Err(self.rt_err("C stack overflow"));
4440 }
4441 self.pcall_depth += 1;
4442 frames_push_sync(
4443 &mut self.frames,
4444 &mut self.frames_top,
4445 CallFrame::Cont(NativeCont {
4446 kind: ContKind::Pcall,
4447 func_slot,
4448 nresults,
4449 }),
4450 );
4451 // call f (slot func_slot+1) with the remaining args, asking for all
4452 // results; a yield or error inside propagates with the continuation kept
4453 // on the stack (caught by `unwind` / preserved across a yield).
4454 self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4455 Ok(true)
4456 }
4457
4458 /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4459 /// message handler is stashed in the continuation and the arguments are
4460 /// shifted down over the handler's slot so `f`'s args are contiguous.
4461 fn begin_xpcall(
4462 &mut self,
4463 func_slot: u32,
4464 nargs: u32,
4465 nresults: i32,
4466 ) -> Result<bool, LuaError> {
4467 if nargs < 2 {
4468 return Err(crate::vm::builtins::raise_str(
4469 self,
4470 "bad argument #2 to 'xpcall' (value expected)",
4471 ));
4472 }
4473 if self.pcall_depth >= MAX_C_DEPTH {
4474 return Err(self.rt_err("C stack overflow"));
4475 }
4476 self.pcall_depth += 1;
4477 // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4478 // close its gap so f's args become [f@+1, a1@+2, ...].
4479 let handler = self.stack[(func_slot + 2) as usize];
4480 let nfargs = nargs - 2;
4481 for i in 0..nfargs {
4482 self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4483 }
4484 self.top = func_slot + 2 + nfargs;
4485 frames_push_sync(
4486 &mut self.frames,
4487 &mut self.frames_top,
4488 CallFrame::Cont(NativeCont {
4489 kind: ContKind::Xpcall { handler },
4490 func_slot,
4491 nresults,
4492 }),
4493 );
4494 self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4495 Ok(true)
4496 }
4497
4498 /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4499 /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4500 /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4501 /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4502 /// results land exactly where `pairs`'s results belong.
4503 fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4504 let arg = self.stack[(func_slot + 1) as usize];
4505 let mm = self.get_mm(arg, Mm::Pairs);
4506 // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4507 self.stack[func_slot as usize] = mm;
4508 self.top = func_slot + 2;
4509 frames_push_sync(
4510 &mut self.frames,
4511 &mut self.frames_top,
4512 CallFrame::Cont(NativeCont {
4513 kind: ContKind::Pairs,
4514 func_slot,
4515 nresults,
4516 }),
4517 );
4518 self.begin_call(func_slot, Some(1), 4, true)?;
4519 Ok(true)
4520 }
4521
4522 /// The running (top) Lua frame. The interpreter only reads this while a Lua
4523 /// frame is on top — a continuation frame is never the running frame (it is
4524 /// consumed the instant the call it protects unwinds onto it).
4525 #[inline]
4526 fn top_frame(&self) -> &Frame {
4527 self.frames
4528 .last()
4529 .and_then(CallFrame::lua)
4530 .expect("running Lua frame")
4531 }
4532
4533 #[inline]
4534 fn top_frame_mut(&mut self) -> &mut Frame {
4535 self.frames
4536 .last_mut()
4537 .and_then(CallFrame::lua_mut)
4538 .expect("running Lua frame")
4539 }
4540
4541 /// Pad/announce results sitting at func_slot.
4542 pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4543 // v2.3 P1B-A: capture the call's high-water-mark before
4544 // setting the new top so we can Nil-clear slots that the
4545 // call temporarily wrote but no longer holds — matching
4546 // PUC's `L->top` discipline (slots past L->top are "free"
4547 // and the next push overwrites them). Without this clear,
4548 // a stale `Value::Closure` (e.g. the called function
4549 // itself, when wanted = 0) sits at `func_slot` and a
4550 // later GC with wider `gc_top` traces it after the
4551 // closure has been freed by a previous narrow safe-point
4552 // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4553 // sort.lua AA case).
4554 let prev_top = self.top as usize;
4555 if wanted < 0 {
4556 self.top = func_slot + nret;
4557 } else {
4558 let wanted = wanted as u32;
4559 let need = (func_slot + wanted) as usize;
4560 if self.stack.len() < need {
4561 self.stack.resize(need, Value::Nil);
4562 }
4563 for i in nret..wanted {
4564 self.stack[(func_slot + i) as usize] = Value::Nil;
4565 }
4566 self.top = func_slot + wanted;
4567 }
4568 let new_top = self.top as usize;
4569 let clear_end = prev_top.min(self.stack.len());
4570 if new_top < clear_end {
4571 for slot in &mut self.stack[new_top..clear_end] {
4572 *slot = Value::Nil;
4573 }
4574 }
4575 }
4576
4577 /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4578 /// Used by `EvalFuture` on the bootstrap poll to compute the
4579 /// `entry_depth` it will pass to subsequent resume slices.
4580 pub(crate) fn frame_count(&self) -> usize {
4581 self.frames.len()
4582 }
4583
4584 fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4585 let nret = self.top - func_slot;
4586 let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4587 self.stack.truncate(func_slot as usize);
4588 self.top = func_slot;
4589 out
4590 }
4591
4592 // ---- open upvalues ----
4593
4594 #[doc(hidden)]
4595 pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4596 match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4597 Ok(i) => self.open_upvals[i].1,
4598 Err(i) => {
4599 let uv = self.heap.new_upvalue(UpvalState::Open {
4600 slot,
4601 thread: self.current,
4602 });
4603 self.open_upvals.insert(i, (slot, uv));
4604 uv
4605 }
4606 }
4607 }
4608
4609 pub(crate) fn close_from(&mut self, slot: u32) {
4610 while let Some(&(s, uv)) = self.open_upvals.last() {
4611 if s < slot {
4612 break;
4613 }
4614 let v = self.stack[s as usize];
4615 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4616 unsafe { uv.as_mut() }.set_closed(v);
4617 self.heap
4618 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4619 self.open_upvals.pop();
4620 }
4621 }
4622
4623 /// Register a to-be-closed slot (TBC op / generic-for closing value).
4624 fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4625 let v = self.stack[slot as usize];
4626 if matches!(v, Value::Nil | Value::Bool(false)) {
4627 return Ok(()); // nil and false are silently ignored
4628 }
4629 if self.get_mm(v, Mm::Close).is_nil() {
4630 // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4631 // (a <type> value)"; the local's name comes from the running
4632 // frame's locvars at this pc.
4633 let tn = v.type_name();
4634 let f = self.top_frame();
4635 let reg = slot - f.base;
4636 let pc = (f.pc as usize).saturating_sub(1);
4637 let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4638 Some(n) => format!("variable '{n}'"),
4639 None => "to-be-closed slot".to_string(),
4640 };
4641 return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4642 }
4643 debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4644 self.tbc.push(slot);
4645 Ok(())
4646 }
4647
4648 /// Close upvalues and run `__close` handlers for slots ≥ `from`
4649 /// (handlers in reverse registration order; PUC luaF_close).
4650 fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4651 self.close_from(from);
4652 // PUC: handlers run in reverse declaration order; an error raised by a
4653 // handler becomes the error object passed to the remaining ones, and
4654 // the rest are still closed. The last raised error propagates.
4655 let mut pending = err;
4656 let mut result = Ok(());
4657 let saved_err = self.closing_err;
4658 // On a normal close the handler runs within the closing function's
4659 // activation (debug parent = that function); during error unwinding the
4660 // function's frame is already gone, so the handler sits at the C
4661 // boundary instead (PUC: luaF_close runs after the ci is restored).
4662 let error_close = err.is_some();
4663 while let Some(&s) = self.tbc.last() {
4664 if s < from {
4665 break;
4666 }
4667 self.tbc.pop();
4668 let v = self.stack[s as usize];
4669 if matches!(v, Value::Nil | Value::Bool(false)) {
4670 continue;
4671 }
4672 let mm = self.get_mm(v, Mm::Close);
4673 if mm.is_nil() {
4674 // PUC `prepclosingmethod`: the __close metamethod was present
4675 // at OP_TBC (else we would have errored there) but has since
4676 // been removed/replaced. Treat as a non-callable target.
4677 let tn = self.obj_typename(v);
4678 let e = self.rt_err(&format!(
4679 "attempt to call a {tn} value (metamethod 'close')"
4680 ));
4681 pending = Some(e.0);
4682 result = Err(e);
4683 continue;
4684 }
4685 // root the pending error: a handler may trigger a collection
4686 self.closing_err = pending;
4687 // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4688 // getinfo report the handler as "in metamethod 'close'". Saved/
4689 // restored around the call to cover the path where `mm` is a
4690 // native (`push_frame` never consumes it) or it raises before
4691 // reaching push_frame.
4692 let saved_tm = self.pending_tm.replace("close");
4693 // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4694 // is nil on a normal close (5.4 locals.lua :875's
4695 // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4696 // through the yield). PUC 5.5 dropped the trailing nil: a clean
4697 // close passes only `obj`, the error case still passes both
4698 // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4699 // normal-close arms, n=2 for the error arm).
4700 let call = match pending {
4701 Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4702 None => {
4703 if self.version >= LuaVersion::Lua55 {
4704 self.call_value_impl(mm, &[v], error_close)
4705 } else {
4706 self.call_value_impl(mm, &[v, Value::Nil], error_close)
4707 }
4708 }
4709 };
4710 self.pending_tm = saved_tm;
4711 if let Err(e) = call {
4712 pending = Some(e.0);
4713 result = Err(e);
4714 }
4715 }
4716 self.closing_err = saved_err;
4717 result
4718 }
4719
4720 /// Yieldable variant of `close_slots`: drive the chain of `__close`
4721 /// handlers for slots ≥ `from` through the interpreter loop with a
4722 /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4723 /// suspends cleanly (the close iteration's state rides on the thread's
4724 /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4725 /// applied to `luaF_close`. `after` runs when every slot is closed; if
4726 /// `after` is `Return` and we've returned past `entry_depth`,
4727 /// `Ok(Some(vals))` carries the result up to the host caller.
4728 fn begin_close(
4729 &mut self,
4730 from: u32,
4731 err: Option<Value>,
4732 after: AfterClose,
4733 entry_depth: usize,
4734 ) -> Result<Option<Vec<Value>>, LuaError> {
4735 self.close_from(from);
4736 self.drive_close(from, err, after, entry_depth)
4737 }
4738
4739 /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4740 /// non-callable-mm error for an `__close` that was reset to a bad value
4741 /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4742 /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4743 /// the interpreter then drives the handler and re-enters this driver via
4744 /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4745 /// the threaded error (if any) propagates or `after` fires.
4746 fn drive_close(
4747 &mut self,
4748 from: u32,
4749 mut pending: Option<Value>,
4750 after: AfterClose,
4751 entry_depth: usize,
4752 ) -> Result<Option<Vec<Value>>, LuaError> {
4753 loop {
4754 let drained = match self.tbc.last() {
4755 None => true,
4756 Some(&s) => s < from,
4757 };
4758 if drained {
4759 return self.finish_close_after(after, pending, entry_depth);
4760 }
4761 let s = self.tbc.pop().expect("tbc non-empty");
4762 let v = self.stack[s as usize];
4763 if matches!(v, Value::Nil | Value::Bool(false)) {
4764 continue;
4765 }
4766 let mm = self.get_mm(v, Mm::Close);
4767 if mm.is_nil() {
4768 let tn = self.obj_typename(v);
4769 let e = self.rt_err(&format!(
4770 "attempt to call a {tn} value (metamethod 'close')"
4771 ));
4772 pending = Some(e.0);
4773 continue;
4774 }
4775 // A real handler: stage [mm, v, (err?)] above the current top,
4776 // record the close iteration state in a Cont::Close, and let the
4777 // interpreter dispatch the handler. On return the run() head
4778 // re-enters this driver via the Cont::Close consumer.
4779 let func_slot = self.top;
4780 let error_close = pending.is_some();
4781 let need = (func_slot + 3) as usize;
4782 if self.stack.len() < need {
4783 self.stack.resize(need, Value::Nil);
4784 }
4785 self.stack[func_slot as usize] = mm;
4786 self.stack[func_slot as usize + 1] = v;
4787 // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4788 // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4789 let nargs = match pending {
4790 Some(e) => {
4791 self.stack[func_slot as usize + 2] = e;
4792 2u32
4793 }
4794 None => {
4795 if self.version >= LuaVersion::Lua55 {
4796 1u32
4797 } else {
4798 self.stack[func_slot as usize + 2] = Value::Nil;
4799 2u32
4800 }
4801 }
4802 };
4803 self.top = func_slot + 1 + nargs;
4804 // Root the pending error during the call (a handler may collect).
4805 let saved_err = self.closing_err;
4806 self.closing_err = pending;
4807 // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4808 // for traceback / getinfo.
4809 let saved_tm = self.pending_tm.replace("close");
4810 frames_push_sync(
4811 &mut self.frames,
4812 &mut self.frames_top,
4813 CallFrame::Cont(NativeCont {
4814 kind: ContKind::Close(CloseCont {
4815 from,
4816 pending,
4817 after,
4818 }),
4819 func_slot,
4820 nresults: 0,
4821 }),
4822 );
4823 // PUC luaF_close runs a normal close *within* the closing
4824 // function's activation (debug parent = that function); during an
4825 // error unwind the function's frame is already gone and the
4826 // handler sits at the C boundary instead.
4827 let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4828 self.pending_tm = saved_tm;
4829 self.closing_err = saved_err;
4830 r?;
4831 return Ok(None);
4832 }
4833 }
4834
4835 /// Fire `after` once every `__close` handler has run. `Block` propagates
4836 /// any remaining error or simply continues; `Return` performs OP_Return's
4837 /// tail (hook + frame pop + result delivery) and may surface results to
4838 /// the host when the function whose return triggered the close was the
4839 /// entry activation, but only on a clean drain — a pending error skips
4840 /// the return tail and propagates instead. `ResumeUnwind` pops the
4841 /// deferred Lua frame and re-raises, letting a handler's own error win
4842 /// over the original propagating one (PUC luaF_close).
4843 fn finish_close_after(
4844 &mut self,
4845 after: AfterClose,
4846 pending: Option<Value>,
4847 entry_depth: usize,
4848 ) -> Result<Option<Vec<Value>>, LuaError> {
4849 match after {
4850 AfterClose::Block => match pending {
4851 Some(e) => Err(LuaError(e)),
4852 None => Ok(None),
4853 },
4854 AfterClose::Return {
4855 abs_a,
4856 nret,
4857 from_native,
4858 } => match pending {
4859 Some(e) => Err(LuaError(e)),
4860 None => self.complete_return(abs_a, nret, from_native, entry_depth),
4861 },
4862 AfterClose::ResumeUnwind { func_slot, err } => {
4863 // The aborting Lua frame was popped before `begin_close`;
4864 // restore the catcher's stack window down to `func_slot` and
4865 // re-raise — preferring a handler-raised error over the
4866 // original (PUC luaF_close).
4867 self.stack.truncate(func_slot as usize);
4868 self.top = func_slot;
4869 self.tbc.retain(|&s| s < func_slot);
4870 Err(LuaError(pending.unwrap_or(err)))
4871 }
4872 }
4873 }
4874
4875 /// OP_Return's post-close tail: fire the "return" hook (frame still
4876 /// current), pop the Lua frame, slide results into `func_slot`, then
4877 /// either hand them to the host (`Ok(Some(vals))` when we've returned
4878 /// past `entry_depth`), leave them contiguous for an exposed
4879 /// pcall/xpcall continuation, or finish into the caller's expected
4880 /// result slot. Mirrors the synchronous OP_Return tail so both paths
4881 /// share semantics — the `from_native` flag selects the right "return"
4882 /// hook context for `hook_return`.
4883 fn complete_return(
4884 &mut self,
4885 abs_a: u32,
4886 nret: u32,
4887 from_native: bool,
4888 entry_depth: usize,
4889 ) -> Result<Option<Vec<Value>>, LuaError> {
4890 // ftransfer is the local index (1-based) of the first result, as
4891 // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4892 // exposes locals starting at `frame.base` (= func_slot + 1 +
4893 // n_varargs for a vararg call), so the conversion is the absolute
4894 // result slot minus base, plus one to make it 1-based. db.lua 5.4
4895 // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4896 // shape end-to-end.
4897 let ftransfer = self
4898 .frames
4899 .last()
4900 .and_then(CallFrame::lua)
4901 .map(|fr| {
4902 let raw = abs_a.saturating_sub(fr.base) + 1;
4903 // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4904 // local injected at index `numparams + 1`, so getlocal
4905 // numbering shifts results past it (5.5 db.lua :539
4906 // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4907 if fr.closure.proto.has_vararg_table_pseudo {
4908 raw + 1
4909 } else {
4910 raw
4911 }
4912 })
4913 .unwrap_or(1);
4914 // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4915 // per tail call that collapsed into this activation, *after* its
4916 // own "return". `tailcalls` tracks that count exactly (PUC
4917 // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4918 // "return" hook fires once even when the activation absorbed
4919 // multiple tail calls — only `istailcall` on getinfo surfaces the
4920 // collapse. 5.1 db.lua :366 pins the event ordering.
4921 let tailcalls = if self.version <= LuaVersion::Lua51 {
4922 self.frames
4923 .last()
4924 .and_then(|f| f.lua())
4925 .map(|f| f.tailcalls)
4926 .unwrap_or(0)
4927 } else {
4928 0
4929 };
4930 self.hook_return(from_native, ftransfer, nret)?;
4931 for _ in 0..tailcalls {
4932 self.hook_tail_return()?;
4933 }
4934 let CallFrame::Lua(fr) =
4935 frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4936 else {
4937 unreachable!("returning from a non-Lua frame")
4938 };
4939 for i in 0..nret {
4940 self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4941 }
4942 if self.frames.len() < entry_depth {
4943 self.top = fr.func_slot + nret;
4944 return Ok(Some(self.take_results(fr.func_slot)));
4945 } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4946 self.top = fr.func_slot + nret;
4947 } else {
4948 self.finish_results(fr.func_slot, nret, fr.nresults);
4949 }
4950 Ok(None)
4951 }
4952
4953 #[doc(hidden)]
4954 pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4955 match cl.upvals()[idx as usize].state() {
4956 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4957 UpvalState::Closed(v) => v,
4958 }
4959 }
4960
4961 fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4962 let uv = cl.upvals()[idx as usize];
4963 match uv.state() {
4964 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4965 UpvalState::Closed(_) => {
4966 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4967 unsafe { uv.as_mut() }.set_closed(v);
4968 // forward barrier: a closed upvalue is single-slot, so the
4969 // forward variant is cheaper than barrier_back (PUC uses
4970 // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4971 // tables / threads).
4972 self.heap
4973 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4974 }
4975 }
4976 }
4977
4978 // ---- register / error helpers ----
4979
4980 #[inline(always)]
4981 fn r(&self, base: u32, i: u32) -> Value {
4982 // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4983 // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4984 // and every bytecode-generated reference falls within `[0, max_stack)`.
4985 // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4986 // reason. The bounds check would re-validate this invariant on every
4987 // op — the dispatch hot path can't afford it.
4988 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4989 unsafe { *self.stack.get_unchecked((base + i) as usize) }
4990 }
4991
4992 #[inline(always)]
4993 fn set_r(&mut self, base: u32, i: u32, v: Value) {
4994 // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
4995 // frame-entry contract.
4996 unsafe {
4997 *self.stack.get_unchecked_mut((base + i) as usize) = v;
4998 }
4999 }
5000
5001 #[doc(hidden)]
5002 pub fn rt_err(&mut self, msg: &str) -> LuaError {
5003 let text = match self.position_prefix() {
5004 Some(p) => format!("{p}{msg}"),
5005 None => msg.to_string(),
5006 };
5007 LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5008 }
5009
5010 pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5011 let extra = self.subject_varinfo(v);
5012 let tn = self.obj_typename(v);
5013 self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5014 }
5015
5016 /// Name the offending operand of the current instruction (PUC varinfo) for
5017 /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5018 /// to the instruction's subject register(s); a native-raised error whose
5019 /// current instruction doesn't hold `bad` simply yields "".
5020 fn subject_varinfo(&self, bad: Value) -> String {
5021 use crate::vm::isa::Op;
5022 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5023 return String::new();
5024 };
5025 let proto = f.closure.proto;
5026 let p: &crate::runtime::Proto = &proto;
5027 let pc = f.pc as usize;
5028 if pc == 0 || pc > p.code.len() {
5029 return String::new();
5030 }
5031 let instr = p.code[pc - 1];
5032 let mut cands: Vec<u32> = Vec::new();
5033 match instr.op() {
5034 // indexed reads / length / method: the table/object is in B
5035 Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5036 cands.push(instr.b());
5037 }
5038 // indexed writes / calls: the table/function is in A
5039 Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5040 cands.push(instr.a());
5041 }
5042 // arithmetic/bitwise: a register operand (B, and C unless constant)
5043 Op::Add
5044 | Op::Sub
5045 | Op::Mul
5046 | Op::Div
5047 | Op::Mod
5048 | Op::Pow
5049 | Op::IDiv
5050 | Op::BAnd
5051 | Op::BOr
5052 | Op::BXor
5053 | Op::Shl
5054 | Op::Shr => {
5055 cands.push(instr.b());
5056 if !instr.k() {
5057 cands.push(instr.c());
5058 }
5059 }
5060 Op::Unm | Op::BNot => cands.push(instr.b()),
5061 Op::Concat => {
5062 let a = instr.a();
5063 for r in a..a + instr.b() {
5064 cands.push(r);
5065 }
5066 }
5067 _ => {}
5068 }
5069 for reg in cands {
5070 if self.r(f.base, reg).raw_eq(bad) {
5071 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5072 Some((kind, name)) => format!(" ({kind} '{name}')"),
5073 None => String::new(),
5074 };
5075 }
5076 }
5077 String::new()
5078 }
5079
5080 /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5081 /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5082 /// 'add')" when the call is a metamethod dispatched by the current opcode.
5083 fn call_err(&mut self, v: Value) -> LuaError {
5084 let extra = self.call_target_varinfo(v);
5085 let tn = self.obj_typename(v);
5086 self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5087 }
5088
5089 /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5090 /// frame before the call, so the opcode that triggered it lives in the
5091 /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5092 /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5093 fn call_target_varinfo(&self, bad: Value) -> String {
5094 use crate::vm::isa::Op;
5095 let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5096 return String::new();
5097 };
5098 let proto = f.closure.proto;
5099 let p: &crate::runtime::Proto = &proto;
5100 let pc = f.pc as usize;
5101 if pc == 0 || pc > p.code.len() {
5102 return String::new();
5103 }
5104 let instr = p.code[pc - 1];
5105 match instr.op() {
5106 Op::Call | Op::TailCall => {
5107 let reg = instr.a();
5108 if self.r(f.base, reg).raw_eq(bad) {
5109 match crate::vm::objname::getobjname(p, pc - 1, reg) {
5110 Some((kind, name)) => format!(" ({kind} '{name}')"),
5111 None => String::new(),
5112 }
5113 } else {
5114 String::new()
5115 }
5116 }
5117 op => match mm_event_name(op) {
5118 Some(ev) => format!(" (metamethod '{ev}')"),
5119 None => String::new(),
5120 },
5121 }
5122 }
5123
5124 /// "number has no integer representation", enriched (PUC luaG_tointerror)
5125 /// with a "(field 'x')"-style suffix naming the offending operand of the
5126 /// current arithmetic instruction when it can be recovered from bytecode.
5127 fn no_int_rep_err(&mut self) -> LuaError {
5128 let extra = self.bad_operand_varinfo();
5129 self.rt_err(&format!("number{extra} has no integer representation"))
5130 }
5131
5132 /// Inspect the current frame's faulting instruction: find the register
5133 /// operand holding a float with no integer representation and name it.
5134 fn bad_operand_varinfo(&self) -> String {
5135 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5136 return String::new();
5137 };
5138 let proto = f.closure.proto;
5139 let p: &crate::runtime::Proto = &proto;
5140 let pc = f.pc as usize;
5141 if pc == 0 || pc > p.code.len() {
5142 return String::new();
5143 }
5144 let instr = p.code[pc - 1];
5145 let mut regs = vec![instr.b()];
5146 if !instr.k() {
5147 regs.push(instr.c());
5148 }
5149 for reg in regs {
5150 let v = self.r(f.base, reg);
5151 if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5152 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5153 Some((kind, name)) => format!(" ({kind} '{name}')"),
5154 None => String::new(),
5155 };
5156 }
5157 }
5158 String::new()
5159 }
5160
5161 /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5162 /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5163 /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5164 /// native call), PUC pushes no prefix — match that by looking only at the
5165 /// topmost frame directly and bailing if it is anything but a Lua frame.
5166 pub(crate) fn position_prefix(&self) -> Option<String> {
5167 let f = self.frames.last().and_then(CallFrame::lua)?;
5168 let proto = f.closure.proto;
5169 if proto.source.as_bytes().is_empty() {
5170 return Some(self.stripped_prefix());
5171 }
5172 if proto.lines.is_empty() {
5173 return None;
5174 }
5175 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5176 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5177 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5178 let display = crate::vm::lib_debug::chunk_id(raw);
5179 let src = String::from_utf8_lossy(&display).into_owned();
5180 Some(format!("{src}:{line}: "))
5181 }
5182
5183 /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5184 /// for the source and renders the line as "?" (so the prefix reads
5185 /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5186 /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5187 /// matches `^%?:%-1:`).
5188 fn stripped_prefix(&self) -> String {
5189 if self.version >= crate::version::LuaVersion::Lua55 {
5190 "?:?: ".to_string()
5191 } else {
5192 "?:-1: ".to_string()
5193 }
5194 }
5195
5196 /// Position prefix of the Lua frame `level` steps up from the running C
5197 /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5198 /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5199 /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5200 /// caller's frame is reported even across pcall/xpcall continuations.
5201 pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5202 let fi = match self.dbg_frame(level)? {
5203 DbgKind::Lua(fi) => fi,
5204 DbgKind::C(_) | DbgKind::Tail(_) => return None,
5205 };
5206 let f = self.frames[fi].lua()?;
5207 let proto = f.closure.proto;
5208 // PUC luaG_addinfo: a stripped chunk has no source — see
5209 // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5210 if proto.source.as_bytes().is_empty() {
5211 return Some(self.stripped_prefix());
5212 }
5213 // a stripped chunk carries no per-instruction line info
5214 if proto.lines.is_empty() {
5215 return None;
5216 }
5217 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5218 // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5219 // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5220 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5221 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5222 let display = crate::vm::lib_debug::chunk_id(raw);
5223 let src = String::from_utf8_lossy(&display).into_owned();
5224 Some(format!("{src}:{line}: "))
5225 }
5226
5227 // ---- the interpreter ----
5228
5229 fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5230 let entry_depth = self.frames.len();
5231 self.exec_with(entry_depth)
5232 }
5233
5234 /// Run from the current top frame down to (but not past) `entry_depth`
5235 /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5236 /// runs to completion or a yield.
5237 /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5238 /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5239 /// `EvalFuture::poll` on every poll after the first to walk the
5240 /// existing call frames until the next `BudgetExhausted` or
5241 /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5242 /// embedder reaches it through `Vm::eval_async`.
5243 pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5244 self.exec_with(entry_depth)
5245 }
5246
5247 fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5248 loop {
5249 let r = self.run(entry_depth);
5250 if r.is_err()
5251 && (self.yielding.is_some()
5252 || self.terminating.is_some()
5253 || self.host_yield_pending
5254 || self.pending_async_native_fut.is_some())
5255 {
5256 // a `coroutine.yield` is in flight: keep the frames intact (they
5257 // are the suspended coroutine's saved state) and propagate to
5258 // resume. A self-close termination propagates the same way, so a
5259 // protecting pcall on the way out cannot catch (unwind) it.
5260 // v1.1 B10 — `host_yield_pending` is the async-mode
5261 // analogue: the sentinel must reach `drive_one` without
5262 // a protecting `pcall` swallowing it.
5263 return r;
5264 }
5265 match r {
5266 Ok(vals) => return Ok(vals),
5267 // unwind toward `entry_depth`. A protecting pcall/xpcall
5268 // continuation caught along the way turns the error into
5269 // `false, msg` and the loop resumes running its caller; an
5270 // uncaught error propagates out.
5271 Err(e) => match self.unwind(e.0, entry_depth) {
5272 Unwound::Caught => continue,
5273 Unwound::CaughtReturn(vals) => return Ok(vals),
5274 Unwound::Propagated(err) => return Err(err),
5275 },
5276 }
5277 }
5278 }
5279
5280 /// Unwind the call stack from the error point toward `entry_depth`, running
5281 /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5282 /// continuation frame at/above `entry_depth` (the error is *caught*: its
5283 /// slot receives `false, msg`); if none is reached, the error propagates.
5284 fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5285 // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5286 // error object is nil — so `pcall(function() error(nil) end)` returns
5287 // that string instead of nil, and `assert(nil, nil)` (whose path
5288 // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5289 // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5290 // `doit("error()") == nil` and luna would fail that if it always
5291 // substituted. luna's native `error()` still does its own conversion
5292 // for direct callers.
5293 if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5294 err = Value::Str(self.heap.intern(b"<no error object>"));
5295 }
5296 // The protected call runs in-place among the caller frames' registers,
5297 // so truncating the failed frames here cuts into caller windows below
5298 // the catcher. Snapshot the live length: at the error point the stack
5299 // already spans every surviving frame's window, so restoring it after a
5300 // catch reinstates them all (the reclaimed slots above are dead temps).
5301 // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5302 // we instead clamp the restore to the catcher's caller window when the
5303 // error point was at the stack limit (cause: the next `call_value_impl`
5304 // picks `func_slot = stack.len()` which would otherwise re-overflow).
5305 let saved_len = self.stack.len();
5306 // Snapshot the traceback at the error point — before any frame is
5307 // popped — so an `xpcall` msgh (which runs after the failed frames are
5308 // gone) can still describe the error site. The handler frame about to
5309 // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5310 // visible here; once popped, `debug.traceback` would miss it.
5311 // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5312 // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5313 // recovery — locals.lua:659) re-overflows. Capture-once propagates
5314 // through nested unwinds (inner→outer) without re-running msgh.
5315 if self.error_traceback.is_none() {
5316 self.error_traceback = Some(self.traceback_bytes(1));
5317 }
5318 while self.frames.len() >= entry_depth {
5319 match *self.frames.last().expect("frame") {
5320 // a yieldable-metamethod continuation does not catch: discard the
5321 // abandoned instruction and keep unwinding (PUC drops the partial
5322 // op on error).
5323 CallFrame::Cont(NativeCont {
5324 kind: ContKind::Meta(mc),
5325 func_slot,
5326 ..
5327 }) => {
5328 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5329 self.stack.truncate(func_slot as usize);
5330 self.top = mc.saved_top.min(func_slot);
5331 self.tbc.retain(|&s| s < func_slot);
5332 }
5333 // a __pairs continuation does not catch either: an error inside
5334 // the metamethod propagates past `pairs`.
5335 CallFrame::Cont(NativeCont {
5336 kind: ContKind::Pairs,
5337 func_slot,
5338 ..
5339 }) => {
5340 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5341 self.stack.truncate(func_slot as usize);
5342 self.top = func_slot;
5343 self.tbc.retain(|&s| s < func_slot);
5344 }
5345 // a __close continuation does not catch: drop the half-run
5346 // handler's window, then continue the close yieldably with
5347 // the new error threaded as `pending`. Preserve `cc.after`
5348 // verbatim — `Return`/`Block` originating from an aborting
5349 // OP_Return/OP_Close will be short-circuited by
5350 // `finish_close_after` (pending propagates as Err); a
5351 // `ResumeUnwind` originated by our own Lua-frame handler
5352 // must keep its deferred frame-pop semantics so that frame
5353 // is not orphaned. If a fresh handler yields, `drive_close`
5354 // pushes another `Cont::Close` and we return `Caught` so
5355 // `exec_with` re-enters the run loop.
5356 CallFrame::Cont(NativeCont {
5357 kind: ContKind::Close(cc),
5358 func_slot,
5359 ..
5360 }) => {
5361 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5362 self.stack.truncate(func_slot as usize);
5363 self.top = func_slot;
5364 self.tbc.retain(|&s| s < func_slot);
5365 match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5366 Ok(Some(_)) => {
5367 unreachable!(
5368 "Block / Return / ResumeUnwind never return host values mid-unwind"
5369 )
5370 }
5371 Ok(None) => return Unwound::Caught,
5372 Err(e) => {
5373 err = e.0;
5374 continue;
5375 }
5376 }
5377 }
5378 CallFrame::Cont(nc) => {
5379 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5380 self.pcall_depth -= 1;
5381 let result = match nc.kind {
5382 ContKind::Pcall => err,
5383 ContKind::Xpcall { handler } => {
5384 // PUC keeps `L->errfunc` set across the handler's
5385 // call: `luaG_errormsg` re-fires the handler when
5386 // it raises (so `xpcall(error, err, 170)` lets the
5387 // chain bottom out at err(0) → "END"). luna mirrors
5388 // that by looping until the handler returns or
5389 // luna's `iters` cap forces termination.
5390 //
5391 // The cap models PUC's nCcalls soft window
5392 // (MAXCCALLS/10*11): once tripped, `stackerror`
5393 // raises "C stack overflow" via `luaG_runerror`
5394 // which itself re-enters `luaG_errormsg`, so the
5395 // handler runs once more with that string and
5396 // naturally returns it (errors.lua :637 at N=300).
5397 // We count iterations per Cont::Xpcall rather than
5398 // a global counter — nested xpcalls each get their
5399 // own budget, matching the way PUC's stack frames
5400 // accumulate per dispatch path.
5401 const MSGH_CAP: u32 = MAX_C_DEPTH;
5402 let mut cur_err = err;
5403 let mut iters: u32 = 0;
5404 let mut capped = false;
5405 loop {
5406 if iters >= MSGH_CAP && !capped {
5407 cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5408 capped = true;
5409 }
5410 iters += 1;
5411 self.msgh_depth += 1;
5412 let r = self.call_value(handler, &[cur_err]);
5413 self.msgh_depth -= 1;
5414 match r {
5415 Ok(hr) => {
5416 break hr.first().copied().unwrap_or(Value::Nil);
5417 }
5418 Err(_) if capped => {
5419 // the handler still errored on the
5420 // synthesized "C stack overflow"; fall
5421 // back to PUC's LUA_ERRERR string.
5422 break Value::Str(
5423 self.heap.intern(b"error in error handling"),
5424 );
5425 }
5426 Err(e) => {
5427 cur_err = e.0;
5428 }
5429 }
5430 }
5431 }
5432 ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5433 unreachable!("Meta/Pairs/Close cont handled above")
5434 }
5435 };
5436 // the error has been caught (pcall/xpcall): the captured
5437 // traceback was for that error and is no longer in flight.
5438 self.error_traceback = None;
5439 let fs = nc.func_slot as usize;
5440 if self.stack.len() < fs + 2 {
5441 self.stack.resize(fs + 2, Value::Nil);
5442 }
5443 self.stack[fs] = Value::Bool(false);
5444 self.stack[fs + 1] = result;
5445 self.top = nc.func_slot + 2;
5446 self.tbc.retain(|&s| s < nc.func_slot);
5447 if self.frames.len() < entry_depth {
5448 return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5449 }
5450 self.finish_results(nc.func_slot, 2, nc.nresults);
5451 // reinstate the caller windows the unwind truncated into,
5452 // clamped to the catcher's caller window + a `MIN_STACK`
5453 // reserve. The clamp is a no-op for normal pcall catches
5454 // (saved_len lies within the caller's max_stack window),
5455 // and prevents the stack from staying near `MAX_LUA_STACK`
5456 // after an overflow-recovery catch — which would make the
5457 // next `call_value_impl` (e.g. a `__close` in the catcher's
5458 // errorh, locals.lua:659) pick `func_slot = stack.len()`
5459 // above the limit and re-overflow.
5460 // Restore the caller's full register window: opcodes
5461 // index it directly. The cap covers caller's base +
5462 // `max_stack` + a small reserve. We always resize to
5463 // exactly this window — previously this clamped
5464 // `saved_len` from above to prevent staying near
5465 // `MAX_LUA_STACK` after an overflow-recovery catch, and
5466 // a yieldable-unwind re-entry adds the dual case where
5467 // `saved_len` is *below* the window (a prior
5468 // `ResumeUnwind` truncated). Using the window directly
5469 // covers both.
5470 let restore = self
5471 .frames
5472 .iter()
5473 .rev()
5474 .find_map(CallFrame::lua)
5475 .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5476 .unwrap_or(saved_len);
5477 if self.stack.len() < restore {
5478 self.stack.resize(restore, Value::Nil);
5479 } else if self.stack.len() > restore {
5480 self.stack.truncate(restore);
5481 }
5482 // v2.5 P1B-2B: clear slots vacated by the popped
5483 // frames the unwind walked over. finish_results
5484 // above clears `[nc.func_slot + nresults ..
5485 // nc.func_slot + 2)`, which only covers the
5486 // pcall's own result region — the unwind-popped
5487 // frames' locals in `[nc.func_slot + 2 .. restore)`
5488 // are still in place with whatever Gc-bearing
5489 // Values they last held. Without this clear, a
5490 // later GC marks the stale pointers (UAF-A family
5491 // analog of the v2.3 Op::Return finish_results
5492 // path). PUC's `luaD_pcall` similarly truncates
5493 // L->top to the catcher's level — luna's
5494 // truncate above resizes the Vec but doesn't
5495 // touch slots [func_slot+2..restore) that were
5496 // already present.
5497 let clear_lo = (nc.func_slot as usize + 2).min(self.stack.len());
5498 let clear_hi = restore.min(self.stack.len());
5499 if clear_lo < clear_hi {
5500 for slot in &mut self.stack[clear_lo..clear_hi] {
5501 *slot = Value::Nil;
5502 }
5503 }
5504 return Unwound::Caught;
5505 }
5506 CallFrame::Lua(f) => {
5507 // Yieldable error-unwind close, PUC luaG_errormsg shape:
5508 // (1) pop the Lua frame immediately so each `__close`
5509 // handler runs at the C boundary above — `debug.getinfo`
5510 // sees the next outer Lua frame's call site (typically
5511 // `pcall`), not this aborting function (locals.lua:480).
5512 // (2) drive the close yieldably with
5513 // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5514 // it truncates to `func_slot` and re-raises (letting a
5515 // handler-raised error win over `err`). If a handler
5516 // yields, `drive_close` pushes `Cont::Close` and we
5517 // return `Caught` so `exec_with` re-enters the run loop;
5518 // a synchronous drain returns Err exactly as the old
5519 // path did.
5520 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5521 let after = AfterClose::ResumeUnwind {
5522 func_slot: f.func_slot,
5523 err,
5524 };
5525 match self.begin_close(f.base, Some(err), after, entry_depth) {
5526 Ok(Some(_)) => {
5527 unreachable!("ResumeUnwind never returns host values")
5528 }
5529 Ok(None) => return Unwound::Caught,
5530 Err(e) => {
5531 err = e.0;
5532 continue;
5533 }
5534 }
5535 }
5536 }
5537 }
5538 Unwound::Propagated(LuaError(err))
5539 }
5540
5541 fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5542 loop {
5543 // Fast-path slow-check gate: most embedders run with both
5544 // `instr_budget` and `mem_cap` as None, so a single combined
5545 // is_some test lets the hot loop skip both branches with one
5546 // load + branch instead of two.
5547 if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5548 if let Some(b) = self.instr_budget.as_mut() {
5549 *b -= 1;
5550 if *b <= 0 {
5551 self.instr_budget = None;
5552 // v1.1 B10 Stage 1 — async-mode cooperative
5553 // yield. Set a sentinel flag so `exec_with`
5554 // propagates the Err without `unwind` running
5555 // (mirroring the `yielding.is_some()` path),
5556 // and `call_value_impl` preserves the call
5557 // frames for the next `poll`. Translation back
5558 // to `DispatchOutcome::BudgetExhausted` happens
5559 // in `drive_one`. The Err value itself is
5560 // `Value::Nil` — a pure sentinel, never seen by
5561 // user code.
5562 if self.async_mode {
5563 self.host_yield_pending = true;
5564 return Err(LuaError(Value::Nil));
5565 }
5566 // B6: classify the trip so embedders can
5567 // distinguish budget exhaustion from a
5568 // generic Runtime error and retry / give up
5569 // accordingly.
5570 self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5571 let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5572 return Err(LuaError(s));
5573 }
5574 }
5575 if let Some(cap) = self.heap.mem_cap
5576 && self.heap.bytes() > cap
5577 {
5578 // First try a full collect — embedders set tight caps
5579 // and the overshoot may be reclaimable (closures kept
5580 // by short-lived frames, intermediate strings). Only
5581 // disarm + raise if the cap is still breached after
5582 // collection. PUC's `LUA_GCEMERGENCY` path matches.
5583 //
5584 // v2.5 P1B-2E partial: maybe_collect_garbage
5585 // tightening to bare `live_top` works (slot-clear
5586 // covers all frame-pop sites), but the mem-cap-
5587 // fire path remains over-rooted via
5588 // `self.stack.len()`. Reason: the cap fires
5589 // during table mutation in a tight `a[i] = i`
5590 // loop, where `a` lives at a frame-register slot
5591 // past `self.top` (OP_NEWINDEX doesn't advance
5592 // top) and there's no frame-pop event for the
5593 // slot-clear to trigger on. Per-frame walk could
5594 // catch it but broke db.lua in v2.2.1 attempts.
5595 // The over-root here is rare (fire-once disarms)
5596 // + correctness-critical. Full tightening lives
5597 // in v2.6+ if a per-frame walk with weak-table
5598 // semantics fix lands.
5599 self.gc_top = self.stack.len() as u32;
5600 self.collect_garbage();
5601 if self.heap.bytes() > cap {
5602 self.heap.mem_cap = None;
5603 let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5604 return Err(LuaError(s));
5605 }
5606 }
5607 }
5608 // Single combined frame fetch: continuation arm OR Lua arm. Saves
5609 // a second `self.frames.last()` slice access vs the prior split
5610 // form (LLVM doesn't always CSE these across the cont branch).
5611 // A continuation frame on top means the call it protected just
5612 // delivered its results — wrap as `true, results…` and hand to
5613 // the pcall/xpcall caller. The error path is handled by `unwind`;
5614 // this branch is only reached on success/resume completion.
5615 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5616 let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5617 if let &CallFrame::Cont(nc) = frame_peek {
5618 // a yieldable metamethod returned: complete the interrupted
5619 // instruction (PUC luaV_finishOp) and resume the running frame.
5620 if let ContKind::Meta(mc) = nc.kind {
5621 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5622 let result = if self.top > nc.func_slot {
5623 self.stack[nc.func_slot as usize]
5624 } else {
5625 Value::Nil
5626 };
5627 self.stack.truncate(nc.func_slot as usize);
5628 self.top = mc.saved_top;
5629 self.finish_meta(mc.action, result)?;
5630 continue;
5631 }
5632 // a __close handler returned successfully: discard its
5633 // results, restore `top` to the slot the handler was called
5634 // at (the surrounding frame's register window above this slot
5635 // must stay alloc'd — never truncate the underlying stack),
5636 // then continue the close chain (next slot, or fire
5637 // AfterClose). When the close ends an entry activation,
5638 // drive_close hands the results up to exec_with directly.
5639 if let ContKind::Close(cc) = nc.kind {
5640 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5641 self.top = nc.func_slot;
5642 if let Some(vals) =
5643 self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5644 {
5645 return Ok(vals);
5646 }
5647 continue;
5648 }
5649 // __pairs returned: normalize its results to exactly four
5650 // (iterator, state, control, closing) at pairs's slot, where
5651 // the metamethod was called, and hand them to pairs's caller.
5652 if let ContKind::Pairs = nc.kind {
5653 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5654 let total = 4u32;
5655 let need = (nc.func_slot + total) as usize;
5656 if self.stack.len() < need {
5657 self.stack.resize(need, Value::Nil);
5658 }
5659 for s in self.top..(nc.func_slot + total) {
5660 self.stack[s as usize] = Value::Nil;
5661 }
5662 self.top = nc.func_slot + total;
5663 if self.frames.len() < entry_depth {
5664 return Ok(self.take_results(nc.func_slot));
5665 }
5666 self.finish_results(nc.func_slot, total, nc.nresults);
5667 continue;
5668 }
5669 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5670 self.pcall_depth -= 1;
5671 // f's results sit at nc.func_slot+1.. (f was called one slot
5672 // above the continuation), so writing `true` at the slot makes
5673 // `true, results…` already contiguous.
5674 let nret = self.top - (nc.func_slot + 1);
5675 self.stack[nc.func_slot as usize] = Value::Bool(true);
5676 let total = 1 + nret;
5677 self.top = nc.func_slot + total;
5678 if self.frames.len() < entry_depth {
5679 return Ok(self.take_results(nc.func_slot));
5680 }
5681 self.finish_results(nc.func_slot, total, nc.nresults);
5682 continue;
5683 }
5684 // GC runs only at the allocation safe points below (PUC's
5685 // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5686 // no longer collects, so a stale full-window `gc_top` cannot leak in.
5687 //
5688 // Hot-path frame fetch: the Cont arm above continues the loop,
5689 // so reaching here means `frame_peek` is the Lua frame. Reuse it
5690 // rather than re-fetching `self.frames.last()`.
5691 let f = match frame_peek {
5692 CallFrame::Lua(f) => f,
5693 _ => unreachable!("Cont frame survived the dispatch loop head"),
5694 };
5695 let cl = f.closure;
5696 let base = f.base;
5697 let func_slot = f.func_slot;
5698 let n_varargs = f.n_varargs;
5699 let pc = f.pc;
5700 let oldpc = f.hook_oldpc;
5701
5702 // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5703 // — every branch / call op only sets `pc` to a valid index, and
5704 // function entry initialises pc=0 with a non-empty body. PUC's
5705 // `vmfetch` uses the equivalent unchecked load.
5706 let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5707
5708 // P12-S1.C/D — trace recording append + close detection.
5709 // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5710 // so default dispatch keeps a single not-taken branch.
5711 //
5712 // - At the head PC with a non-empty record, the trace has
5713 // looped back to its start: mark `closed = true` and
5714 // take the record (S2 will compile + cache).
5715 // - Otherwise, capture the op. If the record overflows
5716 // MAX_TRACE_LEN, abort by dropping it.
5717 if self.jit.trace_enabled
5718 && let Some(_rec) = self.jit.active_trace.as_mut()
5719 {
5720 // P12-S4 — depth tracking. The trace head's frame is
5721 // at index `recording_frame_base`; every Op::Call that
5722 // pushes a new frame bumps the live depth, every
5723 // Op::Return that pops one decrements it.
5724 //
5725 // **Three clean-close conditions** (P12-S4-step4a):
5726 // - `at_head`: cur_depth == 0 AND about-to-execute the
5727 // trace's head_pc on its head_proto (loop closed back
5728 // to start). Same for loop-triggered and call-triggered
5729 // traces — step4a unified the gating so call-triggered
5730 // no longer closes on the first re-entry (that left
5731 // fib's body at 7 depth=0 ops; step4a lets it inline
5732 // up to MAX_INLINE_DEPTH levels before any close).
5733 // - `returned_past_head`: trace head's frame is gone
5734 // (callee returned past it, or the call-trigger
5735 // started a recording inside a callee that has now
5736 // returned). Whatever ops were recorded form the
5737 // trace body; the lowerer treats the partial trace
5738 // the same as InlineAbort (dispatchable=false until
5739 // step4b's frame materialization lands).
5740 // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5741 // Recording any deeper would just bloat the IR; close
5742 // with the body we have. Lowerer's existing length
5743 // gate + InlineAbort path handles short bodies.
5744 let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5745 let cur_depth = if returned_past_head {
5746 0
5747 } else {
5748 self.frames.len() - 1 - self.jit.recording_frame_base
5749 };
5750 let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5751 let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5752 let at_head_loop = cur_depth == 0
5753 && !rec.ops.is_empty()
5754 && !returned_past_head
5755 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5756 && pc == rec.head_pc;
5757 // P16-A — self-link cycle catch (mirrors LuaJIT's
5758 // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5759 // 1. We're about to execute the head_pc on head_proto
5760 // at depth > 0 (we're re-entering the trace head
5761 // from inside an inlined recursion level — UpRec).
5762 // 2. The count of ancestor frames in the recording
5763 // window that share `head_proto` exceeds
5764 // [`RECUNROLL_THRESHOLD`] (default 2).
5765 // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5766 // recursion levels are captured, the recorder enters
5767 // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5768 // trips this catch — closing with `SelfRecKind::UpRec`.
5769 // The lowerer's `TraceEnd::SelfLink` tail emits the
5770 // bump-base + branch-to-self loop body.
5771 //
5772 // TailRec vs UpRec: LJ distinguishes via
5773 // `framedepth + retdepth == 0`. luna doesn't track
5774 // retdepth separately; cur_depth == 0 with a non-empty
5775 // call chain in tail position is rare (would require
5776 // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5777 // condition (fib's case); cur_depth == 0 with positive
5778 // ancestor count would route to TailRec, but luna's
5779 // recorder doesn't currently produce that shape because
5780 // tail-call elision pops the caller frame and we'd
5781 // hit `at_head_loop` instead.
5782 let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5783 if self.jit.p16_self_link_enabled
5784 && !returned_past_head
5785 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5786 && pc == rec.head_pc
5787 && cur_depth > 0
5788 {
5789 // Count ancestor frames sharing head_proto.
5790 // self.frames[recording_frame_base..] currently
5791 // includes the just-pushed frame at the top
5792 // (the one about to execute head_pc). Ancestors
5793 // = the slice excluding the top frame.
5794 let head_proto_ptr = rec.head_proto.as_ptr();
5795 let last_idx = self.frames.len() - 1;
5796 let mut count = 0usize;
5797 for i in self.jit.recording_frame_base..last_idx {
5798 if let CallFrame::Lua(f) = &self.frames[i]
5799 && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5800 {
5801 count += 1;
5802 }
5803 }
5804 if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5805 // cur_depth > 0 → UpRec (fib pattern).
5806 // cur_depth == 0 wouldn't reach this arm.
5807 Some(crate::jit::trace::SelfRecKind::UpRec)
5808 } else {
5809 None
5810 }
5811 } else {
5812 None
5813 }
5814 };
5815 if let Some(kind) = self_link_trip {
5816 // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
5817 // self-recursive patterns at frame depth >= 2.
5818 //
5819 // Pre sub-0: a SelfLink trip at the head_pc re-entry
5820 // unconditionally stamped `self_link_kind`. The
5821 // R3a `downrec_close` marker can only fire from the
5822 // depth>0 Op::Return path (`rec.retfs` chain),
5823 // which never reaches the recorder for fib(28)-like
5824 // shapes that hit the SelfLink cycle catch BEFORE
5825 // any base-case Return — leaving `downrec_close`
5826 // None and routing the trace through R1's safe
5827 // `dispatchable=false` `"self-link-retf-r1"` path
5828 // (audit measured `trace_dispatched = 0`).
5829 //
5830 // Sub-0 lift: when the SelfLink trip fires AND
5831 // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
5832 // gate already requires this — kept explicit as a
5833 // safety floor), route the close through `downrec_
5834 // close` INSTEAD of `self_link_kind`. The recorder
5835 // synthesises the close marker from the most
5836 // recent Op::Call at depth `cur_depth - 1`:
5837 // - `return_pc` = `call.pc + 1` (caller's resume
5838 // PC after the recursive call returns; mirror
5839 // of R3a's `caller_pc` derivation at the
5840 // depth>0 Op::Return capture path below).
5841 // - `target_proto` = `call.proto` (caller's
5842 // proto; equals `rec.head_proto` for self-
5843 // recursion).
5844 // - `depth_delta` = `1` (today's recorder always
5845 // unrolls one level; R3a uses the same
5846 // constant).
5847 //
5848 // The lowerer's `end_idx` picker (`trace.rs:3729`)
5849 // routes through `TraceEnd::DownRec` ahead of the
5850 // `self_link_kind` arm; the R3b/R3d lowerer arm
5851 // emits the stitch-sentinel + caller-pc-guard
5852 // scaffold. Single-candidate guard chain (sub-0's
5853 // recorder produces 1 caller_pc candidate because
5854 // `rec.retfs` is empty) keeps `dispatchable=false`
5855 // + `"downrec-stitch-pending"` label (per R3d's
5856 // `multi_way_candidate_count >= 2` gate at
5857 // `trace.rs:7385`). Net behaviour: trace compiles
5858 // under DownRec routing; interp runs the
5859 // recursion naturally → result 317811.
5860 //
5861 // The `cur_depth >= 2` gate is automatically
5862 // satisfied by the count > RECUNROLL_THRESHOLD=2
5863 // trip condition (3 ancestor frames sharing
5864 // head_proto implies cur_depth >= 3), kept
5865 // explicit so a future RECUNROLL_THRESHOLD tweak
5866 // doesn't silently flip shallow-recursion
5867 // shapes (cur_depth == 1) onto the DownRec arm.
5868 //
5869 // R3.3+ sub-1/2/3/4 will replace the depth-baked
5870 // op_offsets[] addressing with runtime base_var
5871 // threading so the trace's recorded body is
5872 // depth-relative and the DownRec dispatch
5873 // becomes wall-clock-positive. Sub-0 is the
5874 // routing scaffold; it does not aim for gain.
5875 let _ = kind;
5876 let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
5877 let caller_depth_u8 = (cur_depth - 1) as u8;
5878 if let Some(call_op) = rec.ops.iter().rev().find(|r| {
5879 r.inline_depth == caller_depth_u8
5880 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
5881 }) {
5882 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
5883 return_pc: call_op.pc + 1,
5884 target_proto: call_op.proto,
5885 depth_delta: 1,
5886 });
5887 true
5888 } else {
5889 false
5890 }
5891 };
5892 if relaxed_to_downrec {
5893 // R2 close-cause taxonomy: tag the lift so
5894 // probes can tally the fire rate. Mirrors
5895 // R3a's `"downrec-restart"` bump for the
5896 // depth>0 Op::Return path (different trip
5897 // origin, same downstream routing). The
5898 // existing `"self-link-retf-r1"` label still
5899 // fires for trips that DON'T relax (no
5900 // candidate Op::Call ancestor in rec.ops, or
5901 // cur_depth < 2) via the lowerer's
5902 // dispatch_off_reason mirror at the close
5903 // handler — kept as a regression safety net.
5904 self.jit
5905 .counters
5906 .bump_close_cause("selflink-yields-to-downrec");
5907 } else {
5908 rec.self_link_kind = Some(kind);
5909 }
5910 }
5911 let should_close =
5912 at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5913 if should_close {
5914 // P13-S13-H — long-trace bias: a call-triggered
5915 // recording that closed with a very short body
5916 // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5917 // binary_trees `make(0)`: 4 ops) is pathological.
5918 // Compiling + caching it pins `Proto.traces` to a
5919 // trace that the length gate will refuse to
5920 // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5921 // = 40`), AND blocks the back-edge / longer-call
5922 // path from re-recording the same head_pc (the
5923 // dedup `already_cached` check below short-
5924 // circuits). The fix: discard the short call-
5925 // triggered recording WITHOUT caching, and bias
5926 // the proto's `call_hot_count` back to
5927 // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5928 // sequence of calls retries the trigger at a
5929 // different (hopefully deeper) recursion point.
5930 //
5931 // Back-edge triggered traces are exempt — a
5932 // tight numeric-for loop's body is legitimately
5933 // 3 ops (`Add`, ForLoop) and DOES dispatch
5934 // usefully when re-entered many times.
5935 // P13-S13-H — coverage heuristic to detect
5936 // pathologically partial call-triggered traces:
5937 // for self-recursive / branchy protos like
5938 // `fib` (~17 bytecode ops) or
5939 // `binary_trees.make` (~26 ops), the recorder
5940 // can fire at a BASE-case entry (`fib(0)` or
5941 // `make(0)`) producing a 3–4 op trace that
5942 // covers a tiny fraction of the proto's code.
5943 // That trace is doomed by the length gate
5944 // post-compile AND blocks any longer follow-up
5945 // (the dedup `already_cached` check below). The
5946 // fix: discard call-triggered closes where
5947 // `rec.ops.len() * 2 < head_proto.code.len()`
5948 // (less than half the proto's bytecode), so the
5949 // back-edge / longer call path can take over.
5950 //
5951 // Why coverage > raw length:protos with
5952 // intrinsically short bodies (closure
5953 // factories: `Closure + Return1` = 2 ops,
5954 // simple wrappers: `LoadI + Return1` = 2 ops)
5955 // record 100% coverage even at length 2 — those
5956 // ARE legitimately short and the closure /
5957 // sunk-emit lowering paths (S7-A / S9-C) make
5958 // them worth compiling. The heuristic admits
5959 // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5960 // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5961 // ~26) get discarded.
5962 //
5963 // Back-edge triggered traces are unaffected —
5964 // a tight numeric-for body legitimately covers
5965 // 3 of ~3 proto ops it can dispatch from
5966 // (`Add + ForLoop`) and the recorder fires on
5967 // the back-edge, not call entry.
5968 //
5969 // `call_hot_count` is intentionally NOT reset
5970 // (an earlier draft tried `THRESHOLD - 32` but
5971 // caused active_trace contention with the
5972 // outer back-edge trigger — see
5973 // setlist_b_zero_with_call_c_zero_sunk_emits).
5974 // We give up on dispatching the pathological
5975 // shape on the same proto; the back-edge or a
5976 // longer call path on a deeper recursion point
5977 // can still record + cache a real trace.
5978 let proto_code_len = rec.head_proto.code.len();
5979 let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5980 // P13-S13-I — per-Proto discard cap. The S13-H
5981 // relaxed trigger condition (`c >= THRESHOLD &&
5982 // !already_cached`) means a Proto whose every
5983 // recording is partial-coverage will re-fire the
5984 // trigger every call indefinitely (1500+ in
5985 // `binary_trees`-pattern test). The cap stops
5986 // discarding after `MAX_DISCARDS_PER_PROTO` —
5987 // the next close falls through to compile (even
5988 // if partial), caches the trace, and the
5989 // `already_cached` short-circuit kills the
5990 // storm. Dispatch may still be refused
5991 // post-compile (length gate), but the recorder
5992 // stops churning.
5993 const MAX_DISCARDS_PER_PROTO: u32 = 5;
5994 let prior_discards = rec.head_proto.trace_discard_count.get();
5995 let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
5996 // P13-S13-K — flip the `gave_up` flag the
5997 // moment cap is reached (BEFORE the close-
5998 // dispatching branch below). The trigger gates
5999 // short-circuit on this flag, skipping the
6000 // RefCell + linear `already_cached` scan on
6001 // every subsequent call to this Proto. Useful
6002 // for `binary_trees_pattern`-class loads where
6003 // a single Proto sees ~20k calls post-cap.
6004 if cap_reached
6005 && rec.is_call_triggered
6006 && is_partial_coverage
6007 && !rec.head_proto.trace_gave_up.get()
6008 {
6009 rec.head_proto.trace_gave_up.set(true);
6010 }
6011 if rec.is_call_triggered && is_partial_coverage && !cap_reached {
6012 // Tally as closed (for visibility) but DROP
6013 // without compile/cache. Use the existing
6014 // closed-lens accumulator so probes can
6015 // observe the discarded shape.
6016 // P13-S13-I — bump discard count BEFORE
6017 // dropping the recording so the next
6018 // close sees the updated counter.
6019 rec.head_proto.trace_discard_count.set(prior_discards + 1);
6020 self.jit.counters.closed += 1;
6021 self.jit
6022 .counters
6023 .closed_lens
6024 .push((rec.is_call_triggered, rec.ops.len()));
6025 // v2.0 Track-R R2 — partial-coverage discard
6026 // close path. Pre-R2 this site bumped `closed`
6027 // + `closed_lens` (visibility) but no per-
6028 // reason label, so probes couldn't separate a
6029 // real successful close from a discard tally.
6030 // Tag explicitly to make the recorder-side
6031 // close-cause taxonomy single-source.
6032 self.jit
6033 .counters
6034 .bump_close_cause("partial-coverage-discard");
6035 self.jit.active_trace = None;
6036 // Continue with interp loop — don't
6037 // fall through to compile path.
6038 // The op at `pc` hasn't dispatched yet;
6039 // the outer loop iteration handles it.
6040 } else {
6041 rec.closed = true;
6042 // P12-S2.C — detach the closed record, then try
6043 // to compile it. Dedup by `head_pc`: a Proto
6044 // already carrying a CompiledTrace for this PC
6045 // skips recompile (the hot counter caps
6046 // re-recording at `u32::MAX / 2` anyway, but
6047 // explicit dedup keeps `Proto.traces` short
6048 // for the S3 dispatcher's linear scan).
6049 //
6050 // No `Vm::run` change for failure: we just bump
6051 // the failed counter and drop the record. S3
6052 // will read `Proto.traces` to decide whether to
6053 // dispatch — until then, this is bookkeeping.
6054 let head_pc_val = rec.head_pc;
6055 let closed_record = self
6056 .jit
6057 .active_trace
6058 .take()
6059 .expect("active_trace was Some this branch");
6060 self.jit.counters.closed += 1;
6061 self.jit
6062 .counters
6063 .closed_lens
6064 .push((closed_record.is_call_triggered, closed_record.ops.len()));
6065 // P12-S5-B fix: cache the trace on the
6066 // recorder's *head proto*, not the current
6067 // closure's proto. For non-recursive
6068 // call-triggered traces, close fires after
6069 // `Return1` pops the callee frame — `cl` at
6070 // that point is the CALLER's closure, while
6071 // `closed_record.head_proto` is the CALLEE's
6072 // proto (the one we actually want the trace
6073 // to be discoverable from on the next call).
6074 // Self-recursive fib closed via depth-cap
6075 // mid-recursion so `cl.proto == head_proto`
6076 // happened to coincide — this fix makes that
6077 // accidental coincidence intentional.
6078 let head_proto = closed_record.head_proto;
6079 let already_cached = head_proto
6080 .traces
6081 .borrow()
6082 .iter()
6083 .any(|t| t.head_pc == head_pc_val);
6084 if !already_cached {
6085 // Internal-loop = true: the trace runs in
6086 // a native loop until a cmp side-exits, so
6087 // the dispatcher's per-entry marshal cost
6088 // amortizes across the whole run of
6089 // iterations the loop's recorded direction
6090 // stays valid. The lowerer auto-downgrades
6091 // to one-shot for cmp-less or Call-truncating
6092 // traces.
6093 // P15-A v2-C-A6-5 — side traces MUST NOT
6094 // internal-loop. The parent's recorded prefix
6095 // (ops at PCs < side trace's head_pc) defines
6096 // values for registers the child's body reads
6097 // without re-writing each iter — e.g. for
6098 // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6099 // + R[11]` sets R[12], and the child trace
6100 // (head_pc=24) re-runs `pc=20 Move R[1] =
6101 // R[12]` each iter via its outer ForLoop
6102 // internal-loop, ALWAYS reading the stale
6103 // entry-time R[12]. The parent's Add never
6104 // re-runs during child's loop, so R[1] gets
6105 // pinned to one stale value. Force one-shot
6106 // for side traces: each parent-exit round-
6107 // trips through dispatcher → parent's Add
6108 // runs → side trace runs ONE iter → return.
6109 let opts = crate::jit::trace::CompileOptions {
6110 internal_loop: closed_record.side_trace_parent.is_none(),
6111 pre53: self.version() <= LuaVersion::Lua53,
6112 aot: false,
6113 };
6114 // v1.1 A1 Session A — route through trace_compiler.
6115 // v2.0 Track J sub-step J-B — split-borrow JitState
6116 // so the trait method can take `&mut dyn JitStorage`.
6117 let result = {
6118 let jit = &mut self.jit;
6119 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6120 jit.trace_compiler
6121 .try_compile_trace(storage, &closed_record, opts)
6122 };
6123 match result {
6124 Some(mut ct) => {
6125 // P12-S5-A/B/C — tally Sinkable sites
6126 // + actually-sunk-emit sites + materialise
6127 // emit sites before moving `ct` into
6128 // Proto.traces.
6129 self.jit.counters.sinkable_seen +=
6130 ct.sinkable_sites_seen as u64;
6131 self.jit.counters.accum_bufferable_seen +=
6132 ct.accum_bufferable_seen as u64;
6133 self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6134 self.jit.counters.materialize_emit +=
6135 ct.materialize_emit_count as u64;
6136 self.jit.counters.closure_emit += ct.closure_seen as u64;
6137 if ct.is_inline_abort_close {
6138 self.jit.counters.inline_abort += 1;
6139 }
6140 // v2.0 Stage 7 polish 6 fire
6141 // experiment — split tally so a
6142 // probe can answer the AOT
6143 // `accepted_with_per_exit_inline`
6144 // gate's question at the JIT
6145 // surface too: how many compiled
6146 // traces emitted depth>0 cmp
6147 // side-exits, and how many of
6148 // those survived all the
6149 // `dispatchable = false` pins
6150 // (`InlineAbort-gate`,
6151 // `self-link-retf-r1`,
6152 // `downrec-stitch-pending`, etc.).
6153 if !ct.per_exit_inline.is_empty() {
6154 self.jit.counters.per_exit_inline_compiled += 1;
6155 if ct.dispatchable {
6156 self.jit.counters.per_exit_inline_dispatchable += 1;
6157 }
6158 }
6159 if let Some(reason) = ct.dispatch_off_reason {
6160 self.jit.counters.dispatch_off_reasons.push(reason);
6161 // v2.0 Track-R R2 — mirror
6162 // the ordered Vec push into
6163 // the per-reason HashMap so
6164 // probes can answer "how many
6165 // of each dispatch_off label
6166 // fired" in O(1) without
6167 // walking the Vec. Same
6168 // bucket as the recorder-side
6169 // abort/discard tags above.
6170 self.jit.counters.bump_close_cause(reason);
6171 }
6172 // v2.0 Track-R R3b — count
6173 // compiled traces that carry a
6174 // down-recursion stitch link.
6175 // Bumped here (not at the lowerer
6176 // emit site) because the Vm's
6177 // JitCounters live on the Vm,
6178 // and the lowerer doesn't have a
6179 // Vm handle. R3b's regression
6180 // pin reads this via
6181 // `Vm::trace_downrec_link_compiled_count`.
6182 if ct.downrec_link.is_some() {
6183 self.jit.counters.downrec_link_compiled += 1;
6184 }
6185 // v2.0 Track-R R3d — multi-way
6186 // guard emit counter. Bumped when
6187 // the lowerer's R3d arm collected
6188 // >= 2 distinct caller_pc candidates
6189 // and lifted `dispatchable=true`.
6190 // R3c's single-CMP shape stores
6191 // `1` here without bumping; non-
6192 // DownRec closes store `0`.
6193 if ct.downrec_multi_way_count >= 2 {
6194 self.jit.counters.multi_way_guard_emitted += 1;
6195 }
6196 // P15-A v2-A — side-trace finalisation.
6197 // Pin `dispatchable=false` so the
6198 // primary lookup `traces.find(|t|
6199 // t.head_pc == pc && t.dispatchable)`
6200 // never matches this entry — the
6201 // side trace is meant to be entered
6202 // ONLY through the parent's exit
6203 // indirection (v2-B/C IR), not the
6204 // back-edge / call-trigger paths.
6205 // Then write the entry fn ptr into
6206 // the parent's `exit_side_trace_ptrs`
6207 // slot so v2-B/C IR can read it.
6208 if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6209 closed_record.side_trace_parent
6210 {
6211 ct.dispatchable = false;
6212 let entry_ptr = ct.entry as *const () as *const u8;
6213 let _side_trace_head_pc = closed_record.head_pc;
6214 let parent_traces = parent_proto.traces.borrow();
6215 if let Some(parent_ct) = parent_traces
6216 .iter()
6217 .find(|t| t.head_pc == parent_head_pc)
6218 {
6219 // P15-A v2-C-A5-C — shape-match
6220 // gate. Find the parent's per-exit
6221 // tag snapshot at the wired exit
6222 // (inline / tag / global) and
6223 // check the child's entry_tags
6224 // match. If not, leave the cell
6225 // null + skip cache populate so
6226 // the future v2-C-A2 IR's
6227 // `call_indirect` stays inert at
6228 // this exit (the child's
6229 // shape-specialised IR would
6230 // mis-interpret raw bits the
6231 // parent writes to reg_state).
6232 let inline_n = parent_ct.per_exit_inline.len();
6233 let tags_n = parent_ct.per_exit_tags.len();
6234 let parent_exit_tags_slice: &[
6235 crate::jit::trace::ExitTag
6236 ] = if parent_exit_idx < inline_n {
6237 &parent_ct.per_exit_inline
6238 [parent_exit_idx]
6239 .exit_tags
6240 } else if parent_exit_idx
6241 < inline_n + tags_n
6242 {
6243 &parent_ct.per_exit_tags
6244 [parent_exit_idx - inline_n]
6245 .1
6246 } else {
6247 &parent_ct.exit_tags
6248 };
6249 let shape_ok =
6250 crate::jit::trace::exit_tags_match_entry_tags(
6251 &ct.entry_tags,
6252 parent_exit_tags_slice,
6253 &parent_ct.entry_tags,
6254 );
6255 if !shape_ok {
6256 self.jit.counters.side_trace_shape_mismatch += 1;
6257 }
6258 // P15-A v2-C-A4 — write the child's
6259 // entry fn ptr to BOTH the legacy
6260 // v2-A `exit_side_trace_ptrs[idx]`
6261 // cell (kept so v2-A's
6262 // walk_any_side_ptr_non_null tests
6263 // stay green) AND the per-kind cell
6264 // whose heap address the parent's
6265 // IR baked (v2-C-A2). The IR-baked
6266 // cell is what the call_indirect
6267 // gate actually reads. Only write
6268 // when A5-C shape gate passes.
6269 if shape_ok {
6270 if let Some(cell) = parent_ct
6271 .exit_side_trace_ptrs
6272 .get(parent_exit_idx)
6273 {
6274 cell.set(entry_ptr);
6275 }
6276 // Compute (kind, local) for the
6277 // IR-baked cell. Layout follows
6278 // exit_hit_counts: inline first,
6279 // then per_exit_tags, then the
6280 // global tail slot.
6281 let (sent_kind, sent_local) = if parent_exit_idx
6282 < inline_n
6283 {
6284 parent_ct.per_exit_inline[parent_exit_idx]
6285 .side_trace_ptr
6286 .set(entry_ptr);
6287 (
6288 crate::jit::trace::SIDE_SENT_KIND_INLINE,
6289 parent_exit_idx as u32,
6290 )
6291 } else if parent_exit_idx < inline_n + tags_n {
6292 let local = parent_exit_idx - inline_n;
6293 if let Some(b) =
6294 parent_ct.tags_side_trace_ptrs.get(local)
6295 {
6296 b.set(entry_ptr);
6297 }
6298 (
6299 crate::jit::trace::SIDE_SENT_KIND_TAG,
6300 local as u32,
6301 )
6302 } else {
6303 parent_ct.global_side_trace_ptr.set(entry_ptr);
6304 (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6305 };
6306 self.jit.counters.side_trace_compiled += 1;
6307 // P15-A v2-D-A8 — flip the
6308 // parent's fast-path hint so
6309 // the dispatcher knows to do
6310 // the tentative decode + cell
6311 // check on subsequent
6312 // dispatches. Set once and
6313 // stays true (we never unwire
6314 // a side trace today).
6315 parent_ct.has_any_side_wired.set(true);
6316
6317 // P15-A v2-C-A1/A4 — populate
6318 // the O(1) lookup cache the
6319 // dispatcher consults on
6320 // sentinel-bit-set returns.
6321 // Key is the encoded sentinel
6322 // (same encoding the IR ORs
6323 // into bits 56..=62 of the
6324 // child's i64 return).
6325 let sentinel =
6326 crate::jit::trace::encode_side_sentinel(
6327 sent_kind, sent_local,
6328 );
6329 let predicted_idx = if std::ptr::eq(
6330 parent_proto.as_ptr(),
6331 head_proto.as_ptr(),
6332 ) {
6333 parent_traces.len() as u32
6334 } else {
6335 head_proto.traces.borrow().len() as u32
6336 };
6337 parent_ct
6338 .side_trace_cache
6339 .borrow_mut()
6340 .insert(sentinel, predicted_idx);
6341 }
6342 }
6343 drop(parent_traces);
6344 }
6345 head_proto.traces.borrow_mut().push(TArc::new(ct));
6346 self.jit.counters.compiled += 1;
6347 }
6348 None => {
6349 self.jit.counters.compile_failed += 1;
6350 self.jit
6351 .counters
6352 .compile_failed_reasons
6353 .push(self.jit.trace_compiler.last_compile_checkpoint());
6354 }
6355 }
6356 }
6357 } // P13-S13-H — close the long-trace-bias else branch
6358 } else {
6359 // P12-S4-step1 + step4a — depth-aware push at the
6360 // current `cur_depth`. The `depth_cap_hit` /
6361 // `returned_past_head` early-exit is handled by
6362 // the `should_close` branch above; reaching here
6363 // means `cur_depth <= MAX_INLINE_DEPTH` and the
6364 // trace head's frame is still live.
6365 let depth_u8 = cur_depth as u8;
6366 if depth_u8 > self.jit.max_depth_seen {
6367 self.jit.max_depth_seen = depth_u8;
6368 }
6369 // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6370 // return / variable return count). Recorder pushed
6371 // it with var_count=None before the call dispatched;
6372 // now that the call has returned and we're about to
6373 // push the next op, top reflects the actual return
6374 // count. Snapshot top - (caller.base + call.a).
6375 if let Some(last) = rec.ops.last_mut()
6376 && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6377 && last.inst.c() == 0
6378 && last.var_count.is_none()
6379 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6380 {
6381 let from = f.base + last.inst.a();
6382 if self.top >= from {
6383 last.var_count = Some(self.top - from);
6384 }
6385 }
6386 // P12-S9-A/C — for SetList B=0, snapshot the source
6387 // count = top - A - 1 (mirrors Lua's `n = top - ra
6388 // - 1` from lvm.c OP_SETLIST). Sources are
6389 // R[A+1..top), exclusive top. For Call C=0's
6390 // var_count (the return count = top - A inclusive),
6391 // see the prior-op fix-up above; here we
6392 // initialise the current Call op to None and let
6393 // the fix-up on the next op's push populate it.
6394 let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6395 && inst.b() == 0
6396 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6397 {
6398 let from = f.base + inst.a();
6399 if self.top > from {
6400 Some(self.top - from - 1)
6401 } else {
6402 None
6403 }
6404 } else {
6405 None
6406 };
6407 let op = crate::jit::trace::RecordedOp {
6408 proto: cl.proto,
6409 pc,
6410 inst,
6411 inline_depth: depth_u8,
6412 var_count,
6413 };
6414 // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6415 // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6416 // Captured as a side-channel `RetfRecord` parallel to
6417 // `ops` when `p16_self_link_enabled` is on. R3's
6418 // down-rec stitch consumes these to guard side-trace
6419 // inlined-frame topology against the recorded shape.
6420 // Gated on the same flag as the cycle catch so the
6421 // ship-default path (p16 off) sees zero behavior
6422 // change. `caller_pc` is the recorded enclosing Call's
6423 // pc + 1 — interp's resume point after the inlined
6424 // frame pops.
6425 if self.jit.p16_self_link_enabled
6426 && depth_u8 > 0
6427 && matches!(
6428 inst.op(),
6429 crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6430 )
6431 {
6432 let results: u8 = match inst.op() {
6433 crate::vm::isa::Op::Return0 => 0,
6434 crate::vm::isa::Op::Return1 => 1,
6435 _ => 0,
6436 };
6437 // Most recent Op::Call recorded at the caller's
6438 // depth (`depth_u8 - 1`) is the frame this Return
6439 // is unwinding from. Reverse scan stops at the
6440 // first match.
6441 let caller_depth = depth_u8 - 1;
6442 let caller_call = rec.ops.iter().rev().find(|r| {
6443 r.inline_depth == caller_depth
6444 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6445 });
6446 let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6447 // v2.0 Track-R R3a — capture the caller's proto
6448 // for the RetfRecord. LuaJIT `IR_RETF.op1`
6449 // equivalent. For fib(28) the caller's proto
6450 // equals the trace head; for future mutual
6451 // recursion the recorded Op::Call's proto is the
6452 // right target. Fallback to head_proto when no
6453 // enclosing Call op was captured (mirrors
6454 // `caller_pc`'s fallback to the Return's own pc).
6455 let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6456 rec.retfs.push(crate::jit::trace::RetfRecord {
6457 from_depth: depth_u8,
6458 to_depth: caller_depth,
6459 results,
6460 caller_pc,
6461 proto: caller_proto,
6462 });
6463 // v2.0 Track-R R3a — DownRec close trigger:
6464 // count RetfRecords on this recording whose
6465 // `proto` matches `caller_proto` (LuaJIT
6466 // `check_downrec_unroll` chain filter
6467 // `op1 == ptref`). Threshold mirrors
6468 // RECUNROLL_THRESHOLD; first trip stamps the
6469 // `downrec_close` marker, subsequent retfs
6470 // keep the marker without overwrite. The
6471 // lowerer's end_idx picker routes through
6472 // TraceEnd::DownRec when the marker is set;
6473 // R3a's tail emit still falls through to R1's
6474 // safe deopt path so fib(28) result stays
6475 // 317_811. R3b lifts.
6476 if rec.downrec_close.is_none() {
6477 let caller_proto_ptr = caller_proto.as_ptr();
6478 let prior_match_count = rec
6479 .retfs
6480 .iter()
6481 .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6482 .count();
6483 // Strictly-greater-than threshold matches
6484 // LuaJIT `count + J->tailcalled > recunroll`.
6485 // The newly-pushed retf is already counted.
6486 if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6487 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6488 return_pc: caller_pc,
6489 target_proto: caller_proto,
6490 depth_delta: 1,
6491 });
6492 // R2 close-cause taxonomy: tag the
6493 // restart with `"downrec-restart"`. R3b
6494 // adds `"downrec-stitch-failed"` when
6495 // the lifted back-edge falls back to
6496 // deopt.
6497 self.jit.counters.bump_close_cause("downrec-restart");
6498 }
6499 }
6500 }
6501 // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6502 // FIRST eligible Op::GetField site under env-gate
6503 // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6504 // - R[B] is Value::Table with metatable.is_none()
6505 // - K[C] is Value::Str
6506 // - The string key actually occupies a hash slot
6507 // (so the IC's slot_idx is a real index, not
6508 // a probe sentinel).
6509 // Once captured, subsequent GetFields skip this
6510 // logic (rec.field_ic_snapshot.is_some() short-
6511 // circuits). Env-OFF short-circuits on the cached
6512 // atomic check inside field_ic_enabled().
6513 if rec.field_ic_snapshot.is_none()
6514 && matches!(inst.op(), crate::vm::isa::Op::GetField)
6515 && crate::jit::trace_types::field_ic_enabled()
6516 {
6517 let b = inst.b();
6518 let c_idx = inst.c() as usize;
6519 let r_b = self.stack[(base + b) as usize];
6520 if let Value::Table(g) = r_b
6521 && g.metatable().is_none()
6522 && c_idx < cl.proto.consts.len()
6523 && let Value::Str(s) = cl.proto.consts[c_idx]
6524 {
6525 let key = Value::Str(s);
6526 let tbl_ref = &*g;
6527 if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6528 && let Some(val) = tbl_ref.node_val_at(slot_idx)
6529 {
6530 let op_idx = rec.ops.len() as u32;
6531 rec.field_ic_snapshot =
6532 Some(crate::jit::trace_types::FieldIcSnapshot {
6533 op_idx,
6534 nodes_len: tbl_ref.nodes_capacity() as u64,
6535 slot_idx: slot_idx as u64,
6536 key_ptr_bits: s.as_ptr() as u64,
6537 cached_val_tag: val.tag_byte(),
6538 });
6539 self.jit.counters.field_ic_snapshot_captured += 1;
6540 }
6541 }
6542 }
6543 if !rec.push(op) {
6544 // v2.0 Track-R R2 — recorder overflow
6545 // (MAX_TRACE_LEN). Pre-R2 this site bumped
6546 // `aborted` with no reason label, leaving the
6547 // overflow indistinguishable from any other
6548 // abort cause that might be added later.
6549 // Tag it explicitly under the close-cause
6550 // bucket so probes can tally overflow vs
6551 // other abort causes in O(1).
6552 self.jit.active_trace = None;
6553 self.jit.counters.aborted += 1;
6554 self.jit.counters.bump_close_cause("trace-overflow");
6555 }
6556 }
6557 }
6558
6559 // P12-S3 — trace JIT dispatcher.
6560 //
6561 // When the dispatch loop is about to execute the op at
6562 // `pc` and there's a `numeric_only` CompiledTrace cached
6563 // for that `head_pc`, marshal the live regs into an
6564 // i64 buffer, jump into the trace, and resume the
6565 // interpreter at the returned continuation PC.
6566 //
6567 // Skipped (zero overhead) when `trace_jit_enabled` is
6568 // false; the lookup is a borrow + scan over
6569 // `cl.proto.traces`, which is a `Vec` whose size is at
6570 // most one entry per back-edge per Proto in practice.
6571 //
6572 // Marshalling contract — only Int slots survive the
6573 // round-trip cleanly (the reg_state ABI is `*mut i64`
6574 // with no tag info). Any non-Int slot in the affected
6575 // window forces a skip; interp takes over for one op
6576 // and the back-edge brings us back to try again next
6577 // pass (slots that were Nil/Float at one moment can
6578 // settle to Int by the time the next back-edge fires).
6579 //
6580 // A trace that comes back with `vm.jit.pending_err`
6581 // parked is treated as a deopt: clear the err, leave
6582 // the stack as the trace wrote it, and let the
6583 // interpreter run from the same `pc`. The trace itself
6584 // is left cached — a future entry might find no
6585 // metatable in the way and succeed.
6586 // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6587 // of 6 per-field Rc clones. proto.traces is now
6588 // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6589 // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6590 // operations per dispatch × 434k = ~2.2M Rc atomic ops
6591 // (~1-2% gain measured separately).
6592 // v2.0 Track-R R3c — one-shot consume of the
6593 // `suppress_downrec_admit_once` flag. Set by the R3c
6594 // downrec post-invoke arm below when it force-deopts the
6595 // trace (caller-pc guard miss OR cycle-budget exhausted)
6596 // so the NEXT interpreter loop iteration skips the
6597 // downrec admit, lets interp run the op at `head_pc`,
6598 // advances `pc` past `head_pc`, and breaks the otherwise-
6599 // infinite admit loop. Reading + clearing here means a
6600 // single dispatch tick consumes the suppression — the
6601 // following tick re-admits naturally (with the budget
6602 // also reset by the deopt site).
6603 let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6604 self.jit.suppress_downrec_admit_once = false;
6605 if self.jit.trace_enabled
6606 && let Some(ct) = {
6607 let traces = cl.proto.traces.borrow();
6608 traces
6609 .iter()
6610 .find(|t| {
6611 if t.head_pc != pc {
6612 return false;
6613 }
6614 let is_downrec = t.downrec_link.is_some();
6615 // v2.0 Track-R R3c — the one-shot suppress
6616 // flag blocks any admit (primary or fallback)
6617 // for `downrec_link`-bearing traces so the
6618 // next interp iter can run the natural op
6619 // at `head_pc` and advance past it. R3d's
6620 // `dispatchable=true` lift means the suppress
6621 // must also cover the primary `t.dispatchable`
6622 // arm — otherwise the lifted lookup would
6623 // immediately re-admit after a force-deopt
6624 // and the infinite loop returns.
6625 if is_downrec && downrec_admit_blocked {
6626 return false;
6627 }
6628 // Primary arm: `dispatchable=true` traces
6629 // (R3d-lifted DownRec or normal traces).
6630 // Fallback arm: R3c-shape `dispatchable=false`
6631 // DownRec traces (single-CMP guard kept
6632 // pinned because the 90% miss-rate would
6633 // make blind admit perf-negative).
6634 t.dispatchable || is_downrec
6635 })
6636 .cloned()
6637 }
6638 {
6639 // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6640 // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6641 // across the entire dispatch block so the fields outlive
6642 // all consumers. Saves 5 Rc::clone per dispatch.
6643 let entry_fn = ct.entry;
6644 let head_pc_val = ct.head_pc;
6645 let window_size = ct.window_size;
6646 let exit_tags = &ct.exit_tags;
6647 let per_exit_tags = &ct.per_exit_tags;
6648 let per_exit_inline = &ct.per_exit_inline;
6649 let compile_entry_tags = &ct.entry_tags;
6650 let global_tag_res_kind = ct.global_tag_res_kind;
6651 let exit_hit_counts = &ct.exit_hit_counts;
6652 let max_stack = cl.proto.max_stack as usize;
6653 let window_size_us = window_size as usize;
6654 let base_us = base as usize;
6655 // P12-S4-step3a — `reg_state` sized to the trace's
6656 // `window_size`, which today equals max_stack but
6657 // S4-step3b will expand for inlined frames.
6658 // Marshal-in still only writes [0..max_stack); slots
6659 // [max_stack..window_size) are zero-initialised and
6660 // filled by the trace's own GetUpval / arith.
6661 // P13-S13-D — reuse the Vm's amortised buffers
6662 // instead of allocating fresh Vecs each dispatch.
6663 // mem::take leaves an empty placeholder we restore
6664 // at the end of the dispatch block (success +
6665 // deopt paths both fall through to the restore).
6666 let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6667 entry_tags.clear();
6668 entry_tags.reserve(max_stack);
6669 // v2.0 Track-R R3c — this trace was admitted via the
6670 // `downrec_link.is_some()` arm rather than the normal
6671 // `dispatchable=true` arm. The pre-invoke path
6672 // populates a reserved saved-PC slot just past the
6673 // normal register window so R3b's lowerer guard load
6674 // (`reg_state[window_size]`) compares the runtime
6675 // saved caller PC against the recorded `dr_return_pc`.
6676 //
6677 // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6678 // gate. After R3d lifts `dispatchable = true` for
6679 // multi-way guards, the trace's body still emits the
6680 // R3b/R3d sentinel shape on return — the saved-PC slot
6681 // and post-invoke classifier must keep firing.
6682 // `downrec_link.is_some()` is the unique structural
6683 // signal that the trace closes via DownRec.
6684 let is_downrec_entry = ct.downrec_link.is_some();
6685 let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6686 reg_state.clear();
6687 // v2.0 Track-R R3c — when admitting a downrec trace,
6688 // size the buffer to `window_size + 1` so the lowerer
6689 // can `load(I64, ..., reg_state, window_size * 8)`
6690 // for the saved caller PC guard input. The extra slot
6691 // is the LAST element so cranelift's existing
6692 // `0..window_size` accesses are unaffected.
6693 let reg_state_len = if is_downrec_entry {
6694 window_size_us + 1
6695 } else {
6696 window_size_us
6697 };
6698 reg_state.resize(reg_state_len, 0i64);
6699 let mut dispatch_ok = true;
6700 for i in 0..max_stack {
6701 let v = self.stack[base_us + i];
6702 let (tag, raw) = v.unpack();
6703 entry_tags.push(tag);
6704 // P12-S12-C v3 — entry tag guard. The trace's IR
6705 // is specialised to the compile-time entry tags
6706 // (via current_kinds propagation from
6707 // from_entry_tag). A runtime tag mismatch means
6708 // body ops would mis-interpret raw bits (e.g.
6709 // treat a Str pointer as Int payload → garbage).
6710 // Skip dispatch on mismatch so interp handles
6711 // this entry shape; the trace stays cached for
6712 // future entries that match.
6713 if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6714 dispatch_ok = false;
6715 break;
6716 }
6717 match tag {
6718 // Int / Float / Table / Nil all marshal
6719 // to raw payload cleanly; the trace's IR
6720 // treats the 8-byte slot as an i64 (with
6721 // f64 ops bitcasting around the boundary).
6722 crate::runtime::value::raw::INT
6723 | crate::runtime::value::raw::FLOAT
6724 | crate::runtime::value::raw::TABLE
6725 | crate::runtime::value::raw::CLOSURE
6726 // P12-S12-B-v2 — Native iter slots (e.g.
6727 // R[A] = ipairs_iter) are present in
6728 // generic-for traces; the raw bits are a
6729 // valid `*mut NativeClosure` and round-trip
6730 // cleanly.
6731 | crate::runtime::value::raw::NATIVE
6732 // P12-S12-C v1 — Str slots show up in
6733 // string-concat traces; raw bits = `*mut
6734 // LuaStr` (interned, GC-managed). Round-
6735 // trips cleanly as a heap pointer.
6736 | crate::runtime::value::raw::STR
6737 | crate::runtime::value::raw::NIL => {
6738 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6739 reg_state[i] = unsafe { raw.zero as i64 };
6740 }
6741 _ => {
6742 dispatch_ok = false;
6743 break;
6744 }
6745 }
6746 }
6747
6748 if dispatch_ok {
6749 debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6750 self.jit.pending_err = None;
6751 // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6752 // count. A cmp@d>0 side-exit calls the materialize
6753 // helper which pushes inlined frames onto
6754 // `vm.frames`; on deopt those frames must be popped
6755 // before falling through to the interpreter, else
6756 // the stack grows unboundedly per deopted dispatch.
6757 let pre_frames = self.frames.len();
6758 // v2.0 Track-R R3c — saved-PC slot population. The
6759 // recorded `dr_return_pc` on the closing trace is
6760 // the caller's resume PC captured at a depth>0
6761 // Return push (recorder push site, see R3a verdict
6762 // §3). The natural runtime analogue for self-
6763 // stitch is the dispatching frame's PARENT frame's
6764 // PC: the trace's head_pc sits inside a Lua frame,
6765 // and the parent (caller) frame's `pc` is what
6766 // luna would observe as `[base-8]` in the LJ
6767 // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6768 // the parent isn't a Lua frame (top-level dispatch
6769 // — first invocation through `call_value`), no
6770 // saved PC exists; we write 0, which always
6771 // mismatches the recorded `dr_return_pc != 0`
6772 // invariant pinned by R3b
6773 // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6774 // debug_assert!(dr_return_pc != 0, ...)`).
6775 if is_downrec_entry {
6776 let saved_pc: i64 = if pre_frames >= 2 {
6777 match &self.frames[pre_frames - 2] {
6778 CallFrame::Lua(parent) => parent.pc as i64,
6779 CallFrame::Cont(_) => 0,
6780 }
6781 } else {
6782 0
6783 };
6784 reg_state[window_size_us] = saved_pc;
6785 }
6786 // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6787 // diagnostic hook. The probe fires once per trace dispatch
6788 // (regardless of JIT vs AOT origin — both go through this
6789 // arm), letting the AOT smoke test verify mcode actually
6790 // executed. Guarded behind `OnceLock` so the env read is
6791 // a one-time cost per process; not gated on a particular
6792 // counter so the smoke test gets a deterministic single-
6793 // line `aot_trace_fired pc=N` per first dispatch.
6794 if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6795 eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6796 }
6797 let continuation_pc = {
6798 // v1.1 A1 Session A — chunk_compiler.enter
6799 // (CraneliftBackend delegates to enter_jit;
6800 // NullJitBackend returns an inert guard).
6801 let vm_ptr: *mut Vm = self;
6802 let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6803 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6804 unsafe { entry_fn(reg_state.as_mut_ptr()) }
6805 };
6806 self.jit.counters.dispatched += 1;
6807
6808 if self.jit.pending_err.is_some() {
6809 self.jit.pending_err = None;
6810 self.jit.counters.deopt += 1;
6811 // P12-S4-step4b-C-2 — unwind any helper-pushed
6812 // inlined frames before the interpreter resumes.
6813 // Don't restore reg_state — the trace's partial
6814 // writes are discarded; interp re-executes from
6815 // the original `pc`.
6816 while self.frames.len() > pre_frames {
6817 frames_pop_sync(&mut self.frames, &mut self.frames_top);
6818 }
6819 if is_downrec_entry {
6820 // v2.0 Track-R R3c — pending_err observed
6821 // mid-trace inside a downrec admit. Treat
6822 // it as a guard miss: bump `downrec_deopt`
6823 // and suppress the next downrec admit so
6824 // interp can advance past `head_pc` and
6825 // the same trace doesn't immediately re-
6826 // fire on the next loop iteration.
6827 self.jit.counters.downrec_deopt += 1;
6828 self.jit.suppress_downrec_admit_once = true;
6829 }
6830 } else if is_downrec_entry && {
6831 // v2.0 Track-R R3d — only enter the R3c/R3d
6832 // downrec classifier for returns whose shape
6833 // matches the lowerer's `downrec_idx_opt` tail
6834 // emit: either the stitch_blk DOWNREC sentinel
6835 // (HIT) or the deopt_blk GLOBAL-sentinel-with-
6836 // body==head_pc (MISS via guard fail). Any
6837 // other return from a downrec trace (intermediate
6838 // body cmp side-exit, GetField inference fail,
6839 // etc.) carries a different sentinel/body shape
6840 // and means the body exited BEFORE reaching the
6841 // downrec close — classify those through the
6842 // normal decode path (else branch below) so
6843 // reg_state restores + pc advances correctly.
6844 // The pre-R3d behavior (R3c) classified them all
6845 // as MISS and skipped the normal restore, which
6846 // inflated `downrec_deopt` with non-downrec
6847 // events and lost the trace's mid-flight writes.
6848 let raw_ret = continuation_pc as u64;
6849 let from_side_trace = (raw_ret >> 63) & 1 == 1;
6850 let sentinel_code = if from_side_trace {
6851 ((raw_ret >> 56) & 0x7F) as u32
6852 } else {
6853 0
6854 };
6855 let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6856 let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
6857 crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
6858 0,
6859 );
6860 from_side_trace
6861 && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
6862 || (sentinel_code == global_deopt_code
6863 && raw_body == head_pc_val as u64))
6864 } {
6865 // R3d downrec event classifier.
6866 let raw_ret = continuation_pc as u64;
6867 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6868 if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
6869 // Guard HIT — saved_pc matched one of the
6870 // baked candidates and the trace's
6871 // `stitch_blk` arm returned the DOWNREC
6872 // sentinel. Cycle-safety checkpoint:
6873 // decrement budget; on underflow,
6874 // reclassify as deopt + reset budget.
6875 // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
6876 // ~all natural HITs in a hot loop fire
6877 // before reset pressure.
6878 if self.jit.stitch_depth_remaining > 0 {
6879 self.jit.stitch_depth_remaining -= 1;
6880 self.jit.counters.downrec_dispatched += 1;
6881 } else {
6882 self.jit.counters.downrec_deopt += 1;
6883 self.jit.stitch_depth_remaining =
6884 crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
6885 }
6886 } else {
6887 // Guard MISS via the lowerer's deopt_blk
6888 // arm (GLOBAL sentinel + body == head_pc).
6889 // The deopt_blk emit performs the
6890 // store-back via `emit_store_back_and_return_pc`,
6891 // so the live stack already reflects the
6892 // body's writes; no extra restore needed
6893 // from the dispatcher side.
6894 self.jit.counters.downrec_deopt += 1;
6895 }
6896 self.jit.suppress_downrec_admit_once = true;
6897 // Pop helper-pushed inlined frames (defensive —
6898 // R3d's emit shape doesn't push frames in the
6899 // tail, but a body side-exit before reaching
6900 // the tail may have via the materialize helper).
6901 while self.frames.len() > pre_frames {
6902 frames_pop_sync(&mut self.frames, &mut self.frames_top);
6903 }
6904 self.jit.reg_state_buf = reg_state;
6905 self.jit.entry_tags_buf = entry_tags;
6906 continue;
6907 } else {
6908 // Restore each slot using the trace's
6909 // exit-tag analysis (see ExitTag docs).
6910 // P12-S4-step4b-C-2 — decode the IR's
6911 // side-exit shape. Upper 32 bits = (site_idx
6912 // + 1) for inline cmp side-exits, 0 for
6913 // legacy clean-tail / non-inline exits.
6914 // P15-A v2-C-A0 — decode lives in
6915 // `crate::jit::trace::decode_exit_shape` so
6916 // v2-C-A3 can reuse it with the SIDE TRACE's
6917 // shape inputs when the sentinel bit
6918 // (v2-C-A2) is set on `raw_ret`.
6919 let raw_ret = continuation_pc as u64;
6920 // P15-A v2-C-A3 — side-trace return decode.
6921 // Bit 63 of `raw_ret` is the side-trace
6922 // marker the parent's IR OR'd in when it
6923 // tail-called into a wired child trace.
6924 // Bits 56..=62 carry the sentinel code (the
6925 // cache key into the parent's
6926 // `side_trace_cache`); bits 0..=55 are the
6927 // child's own return value (encoded site or
6928 // plain cont_pc) which we MUST decode using
6929 // the CHILD's per_exit_inline / per_exit_tags
6930 // / exit_tags / exit_hit_counts — not the
6931 // parent's. The dispatcher snapshot read
6932 // above holds the parent's shapes; when bit
6933 // 63 is set we re-fetch the child's via the
6934 // sentinel-keyed cache.
6935 let from_side_trace = (raw_ret >> 63) & 1 == 1;
6936 let (
6937 decode_inline,
6938 decode_tags,
6939 decode_exit_tags,
6940 decode_hit_counts,
6941 decode_body,
6942 ) = if from_side_trace {
6943 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6944 let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6945 let traces = cl.proto.traces.borrow();
6946 let child_idx = traces
6947 .iter()
6948 .find(|t| t.head_pc == head_pc_val)
6949 .and_then(|pct| {
6950 pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6951 });
6952 if let Some(idx) = child_idx
6953 && let Some(child) = traces.get(idx as usize)
6954 {
6955 if crate::jit::trace::v2c_probe_enabled() {
6956 eprintln!(
6957 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6958 sentinel_code,
6959 body,
6960 idx,
6961 child.n_ops,
6962 child.head_pc,
6963 child.window_size,
6964 pc,
6965 window_size,
6966 child.dispatchable,
6967 child.is_inline_abort_close,
6968 );
6969 }
6970 (
6971 child.per_exit_inline.clone(),
6972 child.per_exit_tags.clone(),
6973 child.exit_tags.clone(),
6974 child.exit_hit_counts.clone(),
6975 body,
6976 )
6977 } else {
6978 if crate::jit::trace::v2c_probe_enabled() {
6979 eprintln!(
6980 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6981 sentinel_code, body,
6982 );
6983 }
6984 // Cache miss — fall back to parent
6985 // shapes with the body bits. Best-
6986 // effort; the trace_side_trace_
6987 // shape_mismatch_count records this
6988 // path indirectly (close-handler
6989 // skips wiring on mismatch so we
6990 // shouldn't reach here when shape
6991 // gate held).
6992 (
6993 per_exit_inline.clone(),
6994 per_exit_tags.clone(),
6995 exit_tags.clone(),
6996 exit_hit_counts.clone(),
6997 body,
6998 )
6999 }
7000 } else {
7001 // P15-A v2-D — dispatcher-level side-trace
7002 // invocation. Replaces v2-C's universal IR
7003 // gate (`load + icmp + brif` at every
7004 // emit_store_back callsite, which A6/A7
7005 // measured as a net perf regression).
7006 // A8 fast-path: skip the tentative decode +
7007 // child lookup entirely when `has_any_side
7008 // _wired == false` (the common case until
7009 // the first side trace compiles for this
7010 // parent). For fib_10_x10k and other tight
7011 // short-trace workloads where most parent
7012 // traces never get a wired child, this
7013 // collapses the v2-D overhead to a single
7014 // `Cell::get()` on the cold path.
7015 // A8-revert: A8 had `parent_has_side` short-
7016 // circuit + snapshot hoist; mini N=3 showed
7017 // A8 lost the btrees_d8 1.02× win (dropped
7018 // to 0.95×) WITHOUT helping fib_10 (same
7019 // 0.86×). Drop A8 — accept the always-run
7020 // v2-D path; the tentative decode + cell
7021 // load is cheaper than the cost A8 added.
7022 {
7023 let tentative = crate::jit::trace::decode_exit_shape(
7024 raw_ret,
7025 per_exit_inline,
7026 per_exit_tags,
7027 exit_tags,
7028 );
7029 let tentative_exit_idx = tentative.exit_hit_idx;
7030 let child_invoke = {
7031 let traces = cl.proto.traces.borrow();
7032 traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7033 |pct| {
7034 let cell =
7035 pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7036 let fn_ptr = cell.get();
7037 if fn_ptr.is_null() {
7038 return None;
7039 }
7040 traces
7041 .iter()
7042 .find(|t| {
7043 t.entry as *const () as *const u8 == fn_ptr
7044 })
7045 .map(|child| {
7046 (
7047 child.entry,
7048 child.per_exit_inline.clone(),
7049 child.per_exit_tags.clone(),
7050 child.exit_tags.clone(),
7051 child.exit_hit_counts.clone(),
7052 )
7053 })
7054 },
7055 )
7056 };
7057 if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7058 let child_raw_ret = {
7059 // v1.1 A1 Session A — chunk_compiler.enter
7060 // (side-trace entry).
7061 let vm_ptr: *mut Vm = self;
7062 let _guard =
7063 self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7064 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7065 unsafe { cent(reg_state.as_mut_ptr()) }
7066 };
7067 (cpi, cpt, cet, chc, child_raw_ret as u64)
7068 } else {
7069 (
7070 per_exit_inline.clone(),
7071 per_exit_tags.clone(),
7072 exit_tags.clone(),
7073 exit_hit_counts.clone(),
7074 raw_ret,
7075 )
7076 }
7077 }
7078 };
7079 let decoded = crate::jit::trace::decode_exit_shape(
7080 decode_body,
7081 &decode_inline,
7082 &decode_tags,
7083 &decode_exit_tags,
7084 );
7085 let site_id = decoded.site_id;
7086 let cont_pc = decoded.cont_pc;
7087 let exit_hit_idx = decoded.exit_hit_idx;
7088 let exit_tags_for_pc = decoded.exit_tags_for_pc;
7089 // P15-A v2-C-A3 — for side-trace returns
7090 // force using_global_exit_tags=false so the
7091 // restore loop always takes the per-tag slow
7092 // path (the child's global_tag_res_kind
7093 // classification isn't plumbed through yet
7094 // — TODO for a future polish step).
7095 let using_global_exit_tags = if from_side_trace {
7096 false
7097 } else {
7098 decoded.using_global_exit_tags
7099 };
7100 // P15-prep — increment the counter (saturate
7101 // at u32::MAX to avoid wrap on long runs).
7102 // P15-A v1 — track whether this increment is
7103 // the one that crossed `HOTEXIT_THRESHOLD`
7104 // (transition: previous v < threshold, new v
7105 // == threshold). The side-trace start is
7106 // deferred to just before `continue;` so
7107 // vm.stack and frame.pc are fully restored
7108 // (the snapshot reads post-restore values).
7109 let mut side_trace_should_start = false;
7110 // P15-A v2-C-A3 — for side-trace returns the
7111 // counter to bump is the CHILD's (decoded
7112 // shape lookup) — `exit_hit_idx` is into the
7113 // decoded layout, so use the matching
7114 // `decode_hit_counts`. For parent decode
7115 // they're aliased (clone of the parent's
7116 // own Rc).
7117 if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7118 let v = c.get();
7119 if v < u32::MAX {
7120 c.set(v + 1);
7121 }
7122 if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7123 && self.jit.active_trace.is_none()
7124 && self.jit.trace_enabled
7125 {
7126 side_trace_should_start = true;
7127 }
7128 }
7129 // P12-S4-step4b-C-2 — at an inline cmp@d>0
7130 // side-exit, the helper has pushed N frames on
7131 // top of the trace head's frame and
7132 // `exit_tags_for_pc.len()` covers the full
7133 // window (caller + each inlined frame's
7134 // window). Slots beyond `max_stack` belong to
7135 // an inlined frame: their `Untouched` entries
7136 // default to Nil (no entry-tag fallback —
7137 // marshal-in only captured caller slots) and
7138 // we write to interp stack at `base + i` which
7139 // mirrors `op_offsets`-derived layout.
7140 let slot_count = exit_tags_for_pc.len();
7141 // P12-S4-step4b-C-2 — the helper only extends
7142 // vm.stack up to the deepest pushed frame's
7143 // window, but the exit_tags snapshot covers
7144 // the trace's full `window_size` (which
7145 // includes depth-N+1 scratch slots that the
7146 // trace's IR may have written without a
7147 // matching pushed frame). Extend with Nil so
7148 // the write at the tail doesn't panic; these
7149 // slots get overwritten by the writeback loop
7150 // and won't leak meaningful data past the
7151 // pushed frames' R[0..max_stack) windows.
7152 if self.stack.len() < base_us + slot_count {
7153 self.stack
7154 .resize(base_us + slot_count, crate::runtime::Value::Nil);
7155 }
7156 // P13-S13-E — fast-path restore loop. When
7157 // we landed on the global `exit_tags`,
7158 // dispatch on the compile-time
7159 // classification: skip the loop entirely
7160 // for `AllUntouched`, do a tag-free
7161 // `Value::Int(...)` write per slot for
7162 // `AllInt`, otherwise fall through to the
7163 // general match-arm loop. site_id > 0
7164 // (inline frame mat) and per_exit_tags
7165 // hits always take the general path —
7166 // their per-side-exit shapes aren't
7167 // pre-classified yet.
7168 let fast_path_taken = if using_global_exit_tags {
7169 match global_tag_res_kind {
7170 crate::jit::trace::TagResKind::AllUntouched => {
7171 // No-op: vm.stack already
7172 // matches the trace's post-
7173 // entry state for these
7174 // slots (entry values not
7175 // overridden, or already
7176 // spilled by helpers).
7177 true
7178 }
7179 crate::jit::trace::TagResKind::AllInt => {
7180 for i in 0..slot_count {
7181 self.stack[base_us + i] =
7182 crate::runtime::Value::Int(reg_state[i]);
7183 }
7184 true
7185 }
7186 crate::jit::trace::TagResKind::Mixed => false,
7187 }
7188 } else {
7189 false
7190 };
7191 if !fast_path_taken {
7192 for i in 0..slot_count {
7193 let tag = match exit_tags_for_pc[i] {
7194 crate::jit::trace::ExitTag::Untouched => {
7195 if i < max_stack {
7196 entry_tags[i]
7197 } else {
7198 crate::runtime::value::raw::NIL
7199 }
7200 }
7201 crate::jit::trace::ExitTag::Int => {
7202 crate::runtime::value::raw::INT
7203 }
7204 crate::jit::trace::ExitTag::Float => {
7205 crate::runtime::value::raw::FLOAT
7206 }
7207 crate::jit::trace::ExitTag::Table => {
7208 crate::runtime::value::raw::TABLE
7209 }
7210 crate::jit::trace::ExitTag::Closure => {
7211 crate::runtime::value::raw::CLOSURE
7212 }
7213 // P12-S6-A1 — trace actively wrote Nil
7214 // to this slot (e.g. via Op::LoadNil).
7215 // Restore as Nil regardless of the entry
7216 // tag, since the i64 payload is 0 and
7217 // packing as the entry tag (e.g. INT)
7218 // would mis-type the slot.
7219 crate::jit::trace::ExitTag::Nil => {
7220 crate::runtime::value::raw::NIL
7221 }
7222 // P12-S12-C v2 — trace wrote a Str ptr
7223 // to this slot (LoadK Str / Move from
7224 // Str / Concat result). Restore as
7225 // Value::Str with raw bits round-
7226 // tripped.
7227 crate::jit::trace::ExitTag::Str => {
7228 crate::runtime::value::raw::STR
7229 }
7230 };
7231 // SAFETY: tag is from a verified slot
7232 // (entry validated above) or pinned by
7233 // the exit-tag analysis to INT/TABLE.
7234 // The raw payload sits in reg_state[i].
7235 // Stack was extended by the materialize
7236 // helper for inline frames.
7237 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7238 self.stack[base_us + i] = unsafe {
7239 Value::pack(
7240 tag,
7241 crate::runtime::value::RawVal {
7242 zero: reg_state[i] as u64,
7243 },
7244 )
7245 };
7246 }
7247 }
7248 // P12-S4-step4b-C-2 — for non-inline exits the
7249 // helper was never called (no metas chain for
7250 // this cont_pc), so `frames.last()` is the
7251 // trace head's frame and we set its pc to
7252 // cont_pc as before. For inline exits the
7253 // helper baked the side-exit PC into the
7254 // innermost frame's `pc` at push time
7255 // (chain.last().pc was overridden at emit),
7256 // so this assignment to `frames.last_mut().pc
7257 // = cont_pc` is a redundant-but-correct
7258 // confirmation.
7259 let _ = &per_exit_inline; // hold the Rc alive across dispatch
7260 // P12-S4-step4b-C-2 — for inline side-exits the
7261 // helper has pushed N frames on top. The trace
7262 // head frame is at `pre_frames - 1`; set its
7263 // pc to `head_resume_pc` so when the chain
7264 // eventually pops back to it, interp resumes
7265 // PAST the trace's depth-0 Op::Call instead of
7266 // restarting from `head_pc` and re-triggering
7267 // dispatch (infinite loop). The innermost
7268 // (helper-pushed) frame already has its pc
7269 // baked in at compile time, but we still
7270 // assign `cont_pc` below for parity with the
7271 // non-inline path (no-op).
7272 if site_id > 0 {
7273 let idx = (site_id - 1) as usize;
7274 let head_resume_pc = decode_inline[idx].head_resume_pc;
7275 if pre_frames > 0 {
7276 if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7277 f.pc = head_resume_pc;
7278 }
7279 }
7280 }
7281 let frames_len_now = self.frames.len();
7282 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7283 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7284 CallFrame::Lua(fmut) => {
7285 if crate::jit::trace::v2c_probe_enabled() {
7286 eprintln!(
7287 "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7288 from_side_trace,
7289 raw_ret,
7290 fmut.pc,
7291 cont_pc,
7292 site_id,
7293 frames_len_now,
7294 pre_frames,
7295 max_stack,
7296 );
7297 }
7298 fmut.pc = cont_pc;
7299 }
7300 _ => unreachable!("Cont frame at trace dispatch"),
7301 }
7302 // P15-A v1 — deferred side-trace start. The
7303 // increment block above flagged this exit's
7304 // hit count crossing HOTEXIT_THRESHOLD; now
7305 // that vm.stack is restored and frame.pc is
7306 // settled, snapshot entry_tags from the
7307 // resume frame's window and create the
7308 // recorder. The recorder's first push fires
7309 // on the next interp iteration at cont_pc.
7310 //
7311 // `head_proto` for the side trace = cl.proto
7312 // (trace JIT only inlines self-recursive
7313 // calls today, so cont_pc always lands in
7314 // the same proto as the parent). Frame base
7315 // is the resume frame (top of `self.frames`
7316 // — inline-pushed frames moved this).
7317 if side_trace_should_start {
7318 let (resume_base, resume_proto) = match self.frames.last() {
7319 Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7320 _ => (base_us, cl.proto),
7321 };
7322 let resume_max_stack = resume_proto.max_stack as usize;
7323 let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7324 // Extend stack if cont_pc's frame window
7325 // overhangs the current stack len (rare,
7326 // but inline-pushed frame stack writes
7327 // only covered the trace's writeback).
7328 if self.stack.len() < resume_base + resume_max_stack {
7329 self.stack.resize(
7330 resume_base + resume_max_stack,
7331 crate::runtime::Value::Nil,
7332 );
7333 }
7334 for i in 0..resume_max_stack {
7335 let (tag, _) = self.stack[resume_base + i].unpack();
7336 side_entry_tags.push(tag);
7337 }
7338 self.jit.active_trace =
7339 Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7340 resume_proto,
7341 cont_pc,
7342 side_entry_tags,
7343 cl.proto,
7344 head_pc_val,
7345 exit_hit_idx,
7346 )));
7347 self.jit.recording_frame_base = self.frames.len() - 1;
7348 self.jit.counters.side_trace_started += 1;
7349 }
7350 // P13-S13-D — put the dispatch buffers back
7351 // before the `continue;` so the next
7352 // dispatch picks up the same allocation.
7353 self.jit.reg_state_buf = reg_state;
7354 self.jit.entry_tags_buf = entry_tags;
7355 continue;
7356 }
7357 }
7358 // P13-S13-D — !dispatch_ok / deopt path / non-cont
7359 // exit also restore the buffers before falling
7360 // through to the interp.
7361 self.jit.reg_state_buf = reg_state;
7362 self.jit.entry_tags_buf = entry_tags;
7363 }
7364
7365 // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7366 // hook code that consults `currentpc = savedpc - 1` lands on the
7367 // instruction now executing. luna mirrors that by advancing
7368 // `f.pc` to `pc + 1` before the hook block — local_at /
7369 // getinfo / line attribution all read f.pc, and the existing
7370 // `pc - 1` convention in those helpers then yields the current
7371 // instruction's pc (db.lua :696: local `A` visible at the
7372 // chunk's return line once OP_CLOSURE has advanced pc).
7373 //
7374 // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7375 // (cont frames drained above) so the and_then/Option layers are
7376 // dead weight.
7377 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7378 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7379 CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7380 _ => unreachable!("Cont frame at pc bump"),
7381 }
7382
7383 // count + line hooks (PUC traceexec): before executing the
7384 // instruction. Skipped while the hook itself runs.
7385 // (Parens here are load-bearing — without them `&&` binds tighter
7386 // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7387 // letting a Lua line hook recurse into itself → stack overflow
7388 // on db.lua line-hook assertions. Matches the `hook_call_with` /
7389 // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7390 if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7391 let lines = &cl.proto.lines;
7392 let cur_line = if lines.is_empty() {
7393 None
7394 } else {
7395 Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7396 };
7397 // count hook: fire every `count_base` instructions
7398 if self.hook.count {
7399 self.hook.count_left -= 1;
7400 if self.hook.count_left <= 0 {
7401 self.hook.count_left = self.hook.count_base;
7402 // hooked function is the running Lua frame: its frame
7403 // is on the stack, so no synthetic C level is needed.
7404 self.run_hook(b"count", cur_line, false)?;
7405 }
7406 }
7407 // line hook: fire on a fresh frame, a backward jump (loop), or a
7408 // change of source line.
7409 if self.hook.line {
7410 if lines.is_empty() {
7411 // PUC: a stripped chunk has no line info, so
7412 // `getfuncline` returns -1. The line hook still fires
7413 // on the first instruction of the new frame (where
7414 // `npci <= oldpc` holds at oldpc=0), with the line
7415 // pushed as `nil` instead of an integer (db.lua :1030
7416 // "hook called without debug info for 1st instruction").
7417 if oldpc == u32::MAX {
7418 self.run_hook(b"line", None, false)?;
7419 self.top_frame_mut().hook_oldpc = pc;
7420 }
7421 } else {
7422 let newline = lines[(pc as usize).min(lines.len() - 1)];
7423 // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7424 // on a backward jump (`pc < oldpc` — strict; an equal pc
7425 // would re-fire the install-site after `oldpc = pc`),
7426 // or when the source line changes.
7427 let fire = oldpc == u32::MAX
7428 || pc < oldpc
7429 || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7430 if fire {
7431 self.run_hook(b"line", Some(newline as i64), false)?;
7432 }
7433 self.top_frame_mut().hook_oldpc = pc;
7434 }
7435 }
7436 }
7437
7438 match inst.op() {
7439 Op::Move => {
7440 let v = self.r(base, inst.b());
7441 self.set_r(base, inst.a(), v);
7442 }
7443 Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7444 Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7445 Op::LoadK => {
7446 let v = cl.proto.consts[inst.bx() as usize];
7447 self.set_r(base, inst.a(), v);
7448 }
7449 Op::LoadKx => {
7450 let extra = cl.proto.code[self.pc_of_top() as usize];
7451 self.bump_pc();
7452 let v = cl.proto.consts[extra.ax() as usize];
7453 self.set_r(base, inst.a(), v);
7454 }
7455 Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7456 Op::LFalseSkip => {
7457 self.set_r(base, inst.a(), Value::Bool(false));
7458 self.bump_pc();
7459 }
7460 Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7461 Op::LoadNil => {
7462 let a = inst.a();
7463 for i in 0..=inst.b() {
7464 self.set_r(base, a + i, Value::Nil);
7465 }
7466 }
7467 Op::GetUpval => {
7468 let v = self.upval_get(cl, inst.b());
7469 self.set_r(base, inst.a(), v);
7470 }
7471 Op::SetUpval => {
7472 let v = self.r(base, inst.a());
7473 self.upval_set(cl, inst.b(), v);
7474 }
7475 Op::GetTabUp => {
7476 let t = self.upval_get(cl, inst.b());
7477 let key = cl.proto.consts[inst.c() as usize];
7478 self.op_index(t, key, base + inst.a())?;
7479 }
7480 Op::GetTable => {
7481 let t = self.r(base, inst.b());
7482 let key = self.r(base, inst.c());
7483 self.op_index(t, key, base + inst.a())?;
7484 }
7485 Op::GetI => {
7486 let t = self.r(base, inst.b());
7487 self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7488 }
7489 Op::GetField => {
7490 let t = self.r(base, inst.b());
7491 let key = cl.proto.consts[inst.c() as usize];
7492 // v1.2 D4 A1 — fast path: known-Str const key + no
7493 // metatable on the table → skip `op_index` /
7494 // `index_step`'s MAX_TAG_LOOP setup and the outer
7495 // `Value` match. Falls through to the slow path
7496 // unchanged when either invariant breaks (so
7497 // `__index` metamethods, non-Table receivers, and
7498 // non-Str keys behave exactly as before).
7499 if let Value::Table(tb) = t
7500 && tb.metatable().is_none()
7501 && let Value::Str(s) = key
7502 {
7503 let v = tb.get_str(s);
7504 self.stack[(base + inst.a()) as usize] = v;
7505 } else {
7506 self.op_index(t, key, base + inst.a())?;
7507 }
7508 }
7509 Op::SetTabUp => {
7510 let t = self.upval_get(cl, inst.a());
7511 let key = cl.proto.consts[inst.b() as usize];
7512 let v = self.r(base, inst.c());
7513 self.op_newindex(t, key, v)?;
7514 }
7515 Op::SetTable => {
7516 let t = self.r(base, inst.a());
7517 let key = self.r(base, inst.b());
7518 let v = self.r(base, inst.c());
7519 self.op_newindex(t, key, v)?;
7520 }
7521 Op::SetI => {
7522 let t = self.r(base, inst.a());
7523 let v = self.r(base, inst.c());
7524 self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7525 }
7526 Op::SetField => {
7527 let t = self.r(base, inst.a());
7528 let key = cl.proto.consts[inst.b() as usize];
7529 let v = self.r(base, inst.c());
7530 self.op_newindex(t, key, v)?;
7531 }
7532 Op::NewTable => {
7533 let t = self.heap.new_table();
7534 self.set_r(base, inst.a(), Value::Table(t));
7535 self.maybe_collect_garbage(base + inst.a() + 1);
7536 }
7537 Op::SetList => {
7538 let a = inst.a();
7539 let abs_a = base + a;
7540 let n = if inst.b() == 0 {
7541 self.top - (abs_a + 1)
7542 } else {
7543 inst.b()
7544 };
7545 let offset = if inst.k() {
7546 let extra = cl.proto.code[self.pc_of_top() as usize];
7547 self.bump_pc();
7548 extra.ax() as i64
7549 } else {
7550 inst.c() as i64
7551 };
7552 let Value::Table(t) = self.r(base, a) else {
7553 unreachable!("SETLIST on non-table");
7554 };
7555 for i in 1..=n {
7556 let v = self.r(base, a + i);
7557 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7558 if let Err(TableError::Overflow) =
7559 unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7560 {
7561 return Err(self.rt_err("table overflow"));
7562 }
7563 }
7564 // one barrier_back covers every store this op did — PUC's
7565 // `luaC_barrierback_` once-per-table optimisation
7566 self.heap
7567 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7568 // the element temps above the table are now consumed
7569 self.maybe_collect_garbage(base + a + 1);
7570 }
7571 Op::SelfOp => {
7572 let o = self.r(base, inst.b());
7573 self.set_r(base, inst.a() + 1, o);
7574 // PUC OP_SELF's C is a constant index when the k-flag is
7575 // set; otherwise it points to a register that holds the
7576 // (constant-loaded) key. luna's compiler falls back to the
7577 // register form when the constant index exceeds OP_SELF's
7578 // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7579 // a table with 250+ string keys, where "findfield" lands
7580 // past const #255). The exec must honour the same split.
7581 let key = if inst.k() {
7582 cl.proto.consts[inst.c() as usize]
7583 } else {
7584 self.r(base, inst.c())
7585 };
7586 self.op_index(o, key, base + inst.a())?;
7587 }
7588 Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7589 Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7590 Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7591 Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7592 Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7593 Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7594 Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7595 Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7596 Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7597 Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7598 Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7599 Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7600 Op::Unm => {
7601 let v = self.r(base, inst.b());
7602 match coerce_num(v) {
7603 Some(Num::Int(i)) => {
7604 self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7605 }
7606 Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7607 None => {
7608 let mm = self.get_mm(v, Mm::Unm);
7609 if mm.is_nil() {
7610 return Err(self.type_err("perform arithmetic on", v));
7611 }
7612 let dst = base + inst.a();
7613 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7614 }
7615 }
7616 }
7617 Op::BNot => {
7618 let v = self.r(base, inst.b());
7619 match coerce_num(v) {
7620 Some(n) => {
7621 let i = self.int_from_num(n)?;
7622 self.set_r(base, inst.a(), Value::Int(!i));
7623 }
7624 None => {
7625 let mm = self.get_mm(v, Mm::BNot);
7626 if mm.is_nil() {
7627 return Err(self.type_err("perform bitwise operation on", v));
7628 }
7629 let dst = base + inst.a();
7630 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7631 }
7632 }
7633 }
7634 Op::Not => {
7635 let v = self.r(base, inst.b());
7636 self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7637 }
7638 Op::Len => {
7639 let v = self.r(base, inst.b());
7640 match self.len_step(v)? {
7641 MmOut::Done(r) => self.set_r(base, inst.a(), r),
7642 MmOut::Mm { func, recv } => {
7643 let dst = base + inst.a();
7644 self.begin_meta_call(
7645 func,
7646 &[recv, recv],
7647 MetaAction::Store { dst },
7648 "len",
7649 )?;
7650 }
7651 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7652 }
7653 }
7654 Op::Concat => {
7655 // right-associative fold over operands at base+a .. base+a+n,
7656 // in place on the stack so a yielding __concat can suspend.
7657 let a = inst.a();
7658 let n = inst.b();
7659 self.top = base + a + n;
7660 self.concat_run(base + a)?;
7661 }
7662 Op::Close => {
7663 // Yieldable: drive __close handlers through the
7664 // interpreter loop so a coroutine.yield() inside a
7665 // handler suspends cleanly (locals.lua block-end yield).
7666 // `drive_close` parks the handler call at `self.top`, so
7667 // raise `top` past this frame's full register window
7668 // first — a goto out of a nested for-loop can fire
7669 // OP_Close while `self.top` still sits at the inner
7670 // body's working top, which would let `push_frame`'s
7671 // wipe clobber the outer tbc slot before it could be
7672 // closed (locals.lua:1219 nested-for goto regression).
7673 self.top = self.top.max(base + cl.proto.max_stack as u32);
7674 let _ =
7675 self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7676 }
7677 Op::Tbc => {
7678 self.register_tbc(base + inst.a())?;
7679 }
7680 Op::Jmp => {
7681 let off = inst.sj();
7682 // P12-S1.B — trace JIT back-edge counter. A negative
7683 // jump offset is a loop back-edge (the only canonical
7684 // backward jumps the compiler emits — `while`, `for`,
7685 // `repeat`). Tick the per-Proto counter and, once it
7686 // exceeds the threshold, log a stub promotion that
7687 // S1.C will turn into actual trace recording. The
7688 // whole block is gated on `trace_jit_enabled` so
7689 // existing benches see one branch-not-taken and no
7690 // counter writes.
7691 if self.jit.trace_enabled && off < 0 {
7692 let proto = cl.proto;
7693 let c = proto.trace_hot_count.get();
7694 if c < u32::MAX / 2 {
7695 proto.trace_hot_count.set(c + 1);
7696 }
7697 // P13-S13-H — relaxed back-edge trigger:
7698 // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7699 // a missed crossing (active_trace busy with
7700 // a call-trigger, or the recorder slot
7701 // happened to be in use) doesn't permanently
7702 // lock this back-edge target out. The
7703 // `already_cached` short-circuit prevents
7704 // duplicate recordings: once a trace is
7705 // cached for this target, subsequent
7706 // crossings skip the start. This pairs with
7707 // S13-H's discard-on-partial-coverage close
7708 // handling — when a short call-trigger is
7709 // discarded, the back-edge can still find an
7710 // open slot at the next iteration.
7711 let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7712 // P13-S13-K — gave-up short-circuit. Skip
7713 // the RefCell borrow + scan when the
7714 // S13-I cap force-compiled a partial
7715 // trace on this Proto.
7716 let back_edge_already_cached = if proto.trace_gave_up.get() {
7717 true
7718 } else {
7719 proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7720 };
7721 if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7722 && self.jit.active_trace.is_none()
7723 && !back_edge_already_cached
7724 {
7725 // Back-edge target = pc after `add_pc(off)`,
7726 // i.e. current `pc + 1 + off` (the dispatch
7727 // loop has already advanced f.pc to pc+1).
7728 let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7729 // Snapshot per-slot Value tag at trace
7730 // entry so the lowerer's kind tracker
7731 // knows which arith path to lower
7732 // (iadd vs fadd, etc.).
7733 let max_stack = cl.proto.max_stack as usize;
7734 let base_us = base as usize;
7735 let mut entry_tags = Vec::with_capacity(max_stack);
7736 for i in 0..max_stack {
7737 let (tag, _) = self.stack[base_us + i].unpack();
7738 entry_tags.push(tag);
7739 }
7740 self.jit.active_trace =
7741 Some(Box::new(crate::jit::trace::TraceRecord::start(
7742 cl.proto, target, entry_tags, false,
7743 )));
7744 // P12-S4 — record the frame the trace
7745 // started in. `self.frames.len() - 1`
7746 // since we're inside the currently-running
7747 // Lua frame's dispatch.
7748 self.jit.recording_frame_base = self.frames.len() - 1;
7749 }
7750 }
7751 self.add_pc(off);
7752 }
7753 Op::Eq => {
7754 let l = self.r(base, inst.a());
7755 let r = self.r(base, inst.b());
7756 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7757 if (a == b) != inst.k() {
7758 self.bump_pc();
7759 }
7760 } else {
7761 let step = self.eq_step(l, r);
7762 self.op_compare(step, l, r, inst.k(), "eq")?;
7763 }
7764 }
7765 Op::EqK => {
7766 let l = self.r(base, inst.a());
7767 let r = cl.proto.consts[inst.b() as usize];
7768 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7769 if (a == b) != inst.k() {
7770 self.bump_pc();
7771 }
7772 } else {
7773 let step = self.eq_step(l, r);
7774 self.op_compare(step, l, r, inst.k(), "eq")?;
7775 }
7776 }
7777 Op::Lt => {
7778 let l = self.r(base, inst.a());
7779 let r = self.r(base, inst.b());
7780 // hot path: Int < Int — drops the MmOut + op_compare match
7781 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7782 if (a < b) != inst.k() {
7783 self.bump_pc();
7784 }
7785 } else {
7786 let step = self.less_step(l, r, false)?;
7787 self.op_compare(step, l, r, inst.k(), "lt")?;
7788 }
7789 }
7790 Op::Le => {
7791 let l = self.r(base, inst.a());
7792 let r = self.r(base, inst.b());
7793 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7794 if (a <= b) != inst.k() {
7795 self.bump_pc();
7796 }
7797 } else {
7798 let step = self.less_step(l, r, true)?;
7799 self.op_compare(step, l, r, inst.k(), "le")?;
7800 }
7801 }
7802 Op::Test => {
7803 let cond = self.r(base, inst.a()).truthy();
7804 self.cond_skip(cond, inst.k());
7805 }
7806 Op::TestSet => {
7807 let v = self.r(base, inst.b());
7808 if v.truthy() == inst.k() {
7809 self.set_r(base, inst.a(), v);
7810 } else {
7811 self.bump_pc();
7812 }
7813 }
7814 Op::Call => {
7815 let abs = base + inst.a();
7816 let nargs = if inst.b() == 0 {
7817 None
7818 } else {
7819 Some(inst.b() - 1)
7820 };
7821 let wanted = inst.c() as i32 - 1;
7822 self.begin_call(abs, nargs, wanted, false)?;
7823 }
7824 Op::TailCall => {
7825 let fr = *self.top_frame();
7826 let abs = base + inst.a();
7827 let mut nargs = if inst.b() == 0 {
7828 self.top - (abs + 1)
7829 } else {
7830 inst.b() - 1
7831 };
7832 // A tail call pops this frame before begin_call, so a
7833 // non-callable target would lose its name/position. Report
7834 // it now (PUC reads funcname from the still-current ci),
7835 // while the frame is intact, for "(field 'x')"-style info.
7836 let mut func = self.stack[abs as usize];
7837 if !matches!(func, Value::Closure(_) | Value::Native(_))
7838 && self.get_mm(func, Mm::Call).is_nil()
7839 {
7840 return Err(self.call_err(func));
7841 }
7842 // PUC `luaD_pretailcall` resolves a chain of `__call`
7843 // metamethods *in place* before deciding whether to
7844 // collapse this frame. Without that, each __call hop
7845 // would push a fresh Lua frame and a 10000-deep
7846 // tail-recursion through a 100-deep __call chain
7847 // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7848 // shift args right, install the handler at `abs`, retry.
7849 // Chain depth limit matches the call-site `begin_call`
7850 // version cap (5.5 calls.lua :223 — 15 max, then "too
7851 // long"; 16th wrap fails the call). An infinite
7852 // self-referential `__call` would otherwise spin.
7853 let chain_cap = if self.version >= LuaVersion::Lua55 {
7854 15
7855 } else {
7856 MAX_CCMT
7857 };
7858 let mut chain = 0u32;
7859 while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7860 let mm = self.get_mm(func, Mm::Call);
7861 if mm.is_nil() {
7862 return Err(self.call_err(func));
7863 }
7864 chain += 1;
7865 if chain > chain_cap {
7866 return Err(self.rt_err("'__call' chain too long"));
7867 }
7868 let end = (abs + 1 + nargs) as usize;
7869 if self.stack.len() < end + 1 {
7870 self.stack.resize(end + 1, Value::Nil);
7871 }
7872 for i in (0..=nargs).rev() {
7873 self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7874 }
7875 self.stack[abs as usize] = mm;
7876 nargs += 1;
7877 self.top = abs + 1 + nargs;
7878 func = mm;
7879 }
7880 // PUC's tail-call collapse is Lua→Lua only. A tail call to
7881 // a C function runs the C function under the *current* Lua
7882 // activation (no frame fold — a C frame has nothing to
7883 // collapse into); after the C function returns, the
7884 // calling Lua function returns those results normally.
7885 // Mirror that: keep our Lua frame on the stack, call the
7886 // target through `begin_call(abs, …)` as a regular call,
7887 // and let the fallback `Op::Return` that the compiler
7888 // emits right after `Op::TailCall` forward the results.
7889 // 5.1 closure.lua :177's `return getfenv()` from inside
7890 // foo needs level 1 to resolve to foo, not to the
7891 // thread's globals fallback that happens when no Lua
7892 // frame is on the stack.
7893 let lua_target = matches!(func, Value::Closure(_));
7894 if lua_target {
7895 self.close_slots(fr.base, None)?;
7896 for i in 0..=nargs {
7897 self.stack[(fr.func_slot + i) as usize] =
7898 self.stack[(abs + i) as usize];
7899 }
7900 // v2.5 P1B-2A: clear the slot range that's now
7901 // stranded by the tail-call collapse. The args
7902 // were copied to `[fr.func_slot..fr.func_slot+
7903 // nargs+1)`; the source slots `[abs..abs+
7904 // nargs+1)` still hold the same `Value::Closure
7905 // / Value::Str / ...` entries, but they're past
7906 // the new call's window. Without this clear, a
7907 // later GC with wider gc_top would mark stale
7908 // pointers there (same UAF-A family the v2.3
7909 // finish_results slot-clear closed for the
7910 // Op::Return path).
7911 let new_top_lower_bound = fr.func_slot + nargs + 1;
7912 let prev_top = (self.top as usize).min(self.stack.len());
7913 if (new_top_lower_bound as usize) < prev_top {
7914 for slot in &mut self.stack[new_top_lower_bound as usize..prev_top] {
7915 *slot = Value::Nil;
7916 }
7917 }
7918 // PUC `CIST_TAIL`: the new Lua activation inherits
7919 // the popped frame's tailcalls count plus one for
7920 // this collapse. 5.1 db.lua :372 hammers 30000
7921 // recursive tail calls and expects to see the
7922 // synthetic tail level for every one of them.
7923 self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7924 frames_pop_sync(&mut self.frames, &mut self.frames_top);
7925 if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7926 && self.frames.len() < entry_depth
7927 {
7928 // a native completed what was this function's result
7929 return Ok(self.take_results(fr.func_slot));
7930 }
7931 } else {
7932 // Native (or __call-bearing) target: regular call. The
7933 // results land at `abs..self.top` and the next op (the
7934 // fallback `Op::Return`) forwards them. `wanted = -1`
7935 // because the caller will multret them through Return.
7936 self.begin_call(abs, Some(nargs), -1, false)?;
7937 }
7938 }
7939 Op::Return | Op::Return0 | Op::Return1 => {
7940 let (abs_a, nret) = match inst.op() {
7941 Op::Return0 => (base, 0),
7942 Op::Return1 => (base + inst.a(), 1),
7943 _ => {
7944 let abs_a = base + inst.a();
7945 let nret = if inst.b() == 0 {
7946 self.top - abs_a
7947 } else {
7948 inst.b() - 1
7949 };
7950 (abs_a, nret)
7951 }
7952 };
7953 // close before moving results: __close handlers run above
7954 // the stack top, so the result region [abs_a..abs_a+nret)
7955 // stays intact across any yields the close performs.
7956 // Fixed-count returns may leave `self.top` below the last
7957 // result slot (the compiler does not always re-bump it);
7958 // raise it past the result region so `drive_close` parks
7959 // the handler call *above* — landing at `self.top` would
7960 // otherwise clobber a result with the handler closure.
7961 self.top = self.top.max(abs_a + nret);
7962 if let Some(vals) = self.begin_close(
7963 base,
7964 None,
7965 AfterClose::Return {
7966 abs_a,
7967 nret,
7968 from_native: false,
7969 },
7970 entry_depth,
7971 )? {
7972 return Ok(vals);
7973 }
7974 }
7975 Op::ForPrep => self.for_prep(inst, base)?,
7976 Op::ForLoop => {
7977 // P12 — trace JIT back-edge counter on the
7978 // numeric-for back-edge. ForLoop is always at
7979 // a back-edge position (when it continues);
7980 // for the trace recorder we treat it as the
7981 // close-detection equivalent of `Op::Jmp` with
7982 // negative offset. Counter only ticks when the
7983 // back-edge will actually fire (count > 0 in
7984 // the 5.4+ Int form, comparable predicates in
7985 // pre-5.3 / Float). The cheap check up front
7986 // matches the for_loop helper's branch.
7987 if self.jit.trace_enabled {
7988 let a = inst.a();
7989 let pre53 = self.version() <= LuaVersion::Lua53;
7990 let take_back_edge =
7991 match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
7992 (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
7993 count > 0
7994 }
7995 (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
7996 let next = cur.wrapping_add(st);
7997 if st > 0 { next <= lim } else { next >= lim }
7998 }
7999 (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
8000 let next = cur + st;
8001 if st > 0.0 { next <= lim } else { next >= lim }
8002 }
8003 _ => false,
8004 };
8005 if take_back_edge {
8006 let proto = cl.proto;
8007 let c = proto.trace_hot_count.get();
8008 if c < u32::MAX / 2 {
8009 proto.trace_hot_count.set(c + 1);
8010 }
8011 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8012 && self.jit.active_trace.is_none()
8013 {
8014 // ForLoop's back-edge target = pc
8015 // after `add_pc(-bx)` runs from the
8016 // already-bumped f.pc (= pc + 1).
8017 // So target = (pc + 1) - bx.
8018 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8019 let max_stack = cl.proto.max_stack as usize;
8020 let base_us = base as usize;
8021 let mut entry_tags = Vec::with_capacity(max_stack);
8022 for i in 0..max_stack {
8023 let (tag, _) = self.stack[base_us + i].unpack();
8024 entry_tags.push(tag);
8025 }
8026 self.jit.active_trace =
8027 Some(Box::new(crate::jit::trace::TraceRecord::start(
8028 cl.proto, target, entry_tags, false,
8029 )));
8030 // P12-S4 — record the frame the trace
8031 // started in. The currently-running
8032 // Lua frame is at len() - 1.
8033 self.jit.recording_frame_base = self.frames.len() - 1;
8034 }
8035 }
8036 }
8037 self.for_loop(inst, base);
8038 }
8039 Op::TForPrep => {
8040 // the 4th control slot is the iterator's closing value
8041 self.register_tbc(base + inst.a() + 3)?;
8042 self.add_pc(inst.bx() as i32);
8043 }
8044 Op::TForCall => {
8045 let abs = base + inst.a();
8046 let need = (abs + 7) as usize;
8047 if self.stack.len() < need {
8048 self.stack.resize(need, Value::Nil);
8049 }
8050 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8051 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8052 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8053 let nvars = inst.c() as i32;
8054 self.begin_call(abs + 4, Some(2), nvars, false)?;
8055 }
8056 Op::TForLoop => {
8057 let a = inst.a();
8058 let ctrl = self.r(base, a + 4);
8059 if !ctrl.is_nil() {
8060 // P12-S12-B v1 — trace JIT back-edge counter on
8061 // generic-for back-edge. TForLoop sits at the
8062 // tail of `for k,v in expr do ... end`; recorder
8063 // treats it as the close-detection equivalent of
8064 // a negative Op::Jmp. Gate on `take_back_edge`
8065 // (= `ctrl != nil`) so empty-iter loops don't
8066 // pollute hot_count. v1 only adds the trigger;
8067 // whitelist + helper + emit live in v2.
8068 if self.jit.trace_enabled {
8069 let proto = cl.proto;
8070 let c = proto.trace_hot_count.get();
8071 if c < u32::MAX / 2 {
8072 proto.trace_hot_count.set(c + 1);
8073 }
8074 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8075 && self.jit.active_trace.is_none()
8076 {
8077 // TForLoop back-edge target = pc after
8078 // `add_pc(-bx)` runs from the already-
8079 // bumped f.pc (= pc + 1). So target =
8080 // (pc + 1) - bx, normally landing on
8081 // body_top (the op right after TForPrep).
8082 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8083 let max_stack = cl.proto.max_stack as usize;
8084 let base_us = base as usize;
8085 let mut entry_tags = Vec::with_capacity(max_stack);
8086 for i in 0..max_stack {
8087 let (tag, _) = self.stack[base_us + i].unpack();
8088 entry_tags.push(tag);
8089 }
8090 // P12-S12-B-v5 — snapshot the iter
8091 // fn's address if Native, so the
8092 // lowerer can specialise ipairs into
8093 // inline Table aget IR.
8094 let iter_ptr =
8095 if let Value::Native(n) = self.stack[base_us + a as usize] {
8096 Some(n.f as usize)
8097 } else {
8098 None
8099 };
8100 // P12-S12-C v3 — snapshot R[A+5]'s
8101 // tag (= current iter's val from
8102 // the just-fired TForCall). The v5
8103 // inline aget fast_blk emits a
8104 // runtime guard against this tag;
8105 // mixed-tag arrays deopt rather
8106 // than producing garbage pointers
8107 // through the v2 spill path.
8108 let val_slot = base_us + (a as usize) + 5;
8109 let val_tag = if val_slot < self.stack.len() {
8110 Some(self.stack[val_slot].unpack().0)
8111 } else {
8112 None
8113 };
8114 let mut rec = crate::jit::trace::TraceRecord::start(
8115 cl.proto, target, entry_tags, false,
8116 );
8117 rec.tfor_iter_ptr = iter_ptr;
8118 rec.tfor_val_tag = val_tag;
8119 self.jit.active_trace = Some(Box::new(rec));
8120 self.jit.recording_frame_base = self.frames.len() - 1;
8121 }
8122 }
8123 self.set_r(base, a + 2, ctrl);
8124 self.add_pc(-(inst.bx() as i32));
8125 }
8126 }
8127 Op::Closure => {
8128 let proto = cl.proto.protos[inst.bx() as usize];
8129 let n_ups = proto.upvals.len();
8130 // P11-S5d.M — build upvals on the stack for small
8131 // closures, skipping the per-call Vec/Box alloc
8132 // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8133 // = 2 covers most Lua source (1 captured local, or
8134 // _ENV + a single capture). Beyond that, fall back
8135 // to a heap Vec.
8136 use crate::runtime::function::INLINE_UPVALS_N;
8137 let mut stack_buf: [std::mem::MaybeUninit<
8138 Gc<crate::runtime::function::Upvalue>,
8139 >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8140 let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8141 let use_inline = n_ups <= INLINE_UPVALS_N;
8142 if !use_inline {
8143 heap_buf.reserve_exact(n_ups);
8144 }
8145 for (i, d) in proto.upvals.iter().enumerate() {
8146 let uv = if d.in_stack {
8147 self.find_or_create_upval(base + d.index as u32)
8148 } else {
8149 cl.upvals()[d.index as usize]
8150 };
8151 if use_inline {
8152 stack_buf[i] = std::mem::MaybeUninit::new(uv);
8153 } else {
8154 heap_buf.push(uv);
8155 }
8156 }
8157 // Tiny shim around the two paths so the 5.1 _ENV
8158 // clone + cache check below see one uniform
8159 // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8160 // into the local frame (still valid through the
8161 // rest of this Op::Closure handler).
8162 let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8163 // SAFETY: the first n_ups slots of stack_buf
8164 // were initialised above; we hand out a slice
8165 // covering exactly them.
8166 unsafe {
8167 std::slice::from_raw_parts_mut(
8168 stack_buf.as_mut_ptr()
8169 as *mut Gc<crate::runtime::function::Upvalue>,
8170 n_ups,
8171 )
8172 }
8173 } else {
8174 &mut heap_buf[..]
8175 };
8176 // PUC 5.1 had per-function environments: every Lua
8177 // function carried its own `env` slot, snapshotted from
8178 // the creating function's env at closure time, so a
8179 // `setfenv` on one closure never bled into a sibling.
8180 // luna models that by giving the 5.1 closure a *fresh*
8181 // closed upvalue for whichever cell holds `_ENV`, seeded
8182 // from the parent's current env value. Only that cell is
8183 // cloned — every other upvalue keeps its open/shared
8184 // identity (so e.g. `local function range(...) ...
8185 // range(...) ... end` still sees its self-reference). 5.2+
8186 // keeps the shared-upval model (and the proto cache that
8187 // depends on it).
8188 let v51 = self.version() <= LuaVersion::Lua51;
8189 if v51 && proto.env_upval_idx != u8::MAX {
8190 let i = proto.env_upval_idx as usize;
8191 let cur = match ups[i].state() {
8192 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8193 UpvalState::Closed(v) => v,
8194 };
8195 ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8196 }
8197 let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8198 // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8199 // and reuses it when every fresh-upvalue binding still
8200 // points to the same Upvalue object as the cached one.
8201 // That keeps `function() return outer end` repeated in a
8202 // loop comparing equal across iterations (the captured
8203 // outer is a shared open upvalue), while `function()
8204 // return loop_var end` gets a fresh closure each round
8205 // because the loop var is re-created per iteration. PUC
8206 // 5.1 predated the cache, and the per-closure `_ENV`
8207 // clone above would defeat it anyway, so skip it.
8208 let nc = if v51 {
8209 self.heap.new_closure_inline(proto, ups_slice)
8210 } else {
8211 let cached = proto.cache.get().filter(|c| {
8212 c.upvals().len() == ups_slice.len()
8213 && c.upvals()
8214 .iter()
8215 .zip(ups_slice.iter())
8216 .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8217 });
8218 match cached {
8219 Some(c) => c,
8220 None => {
8221 let n = self.heap.new_closure_inline(proto, ups_slice);
8222 proto.cache.set(Some(n));
8223 n
8224 }
8225 }
8226 };
8227 self.set_r(base, inst.a(), Value::Closure(nc));
8228 self.maybe_collect_garbage(base + inst.a() + 1);
8229 }
8230 Op::Vararg => {
8231 let abs_a = base + inst.a();
8232 let wanted = inst.c() as i32 - 1;
8233 // A materialized named vararg lives in func_slot (its writes
8234 // must be visible to `...`); otherwise spread the extra args
8235 // straight off the stack at func_slot+1 .. +n_varargs.
8236 let vt = match self.stack[func_slot as usize] {
8237 Value::Table(t) => Some(t),
8238 _ => None,
8239 };
8240 let n = match vt {
8241 Some(t) => {
8242 let n_key = Value::Str(self.heap.intern(b"n"));
8243 // PUC getnumargs: a named vararg `t.n` set out of the
8244 // integer range [0, INT_MAX/2] is rejected here
8245 match t.get(n_key) {
8246 Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8247 _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8248 }
8249 }
8250 None => n_varargs,
8251 };
8252 let count = if wanted < 0 { n } else { wanted as u32 };
8253 let need = (abs_a + count) as usize;
8254 if self.stack.len() < need {
8255 self.stack.resize(need, Value::Nil);
8256 }
8257 for i in 0..count {
8258 let v = if i >= n {
8259 Value::Nil
8260 } else if let Some(t) = vt {
8261 t.get_int(i as i64 + 1)
8262 } else {
8263 self.stack[(func_slot + 1 + i) as usize]
8264 };
8265 self.stack[(abs_a + i) as usize] = v;
8266 }
8267 if wanted < 0 {
8268 self.top = abs_a + count;
8269 }
8270 }
8271 Op::GetVarg => {
8272 // materialize the vararg table (PUC table.pack shape) from the
8273 // stack varargs — used when the named vararg is written /
8274 // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8275 // sees later writes) and in the local register R[A].
8276 let n = n_varargs;
8277 let t = self.heap.new_table();
8278 {
8279 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8280 let tm = unsafe { t.as_mut() };
8281 for i in 0..n {
8282 let _ = tm.set_int(
8283 &mut self.heap,
8284 i as i64 + 1,
8285 self.stack[(func_slot + 1 + i) as usize],
8286 );
8287 }
8288 }
8289 let n_key = Value::Str(self.heap.intern(b"n"));
8290 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8291 unsafe { t.as_mut() }
8292 .set(&mut self.heap, n_key, Value::Int(n as i64))
8293 .expect("'n' is a valid key");
8294 // once-per-table barrier (mirror SETLIST): t is born BLACK
8295 // during Propagate; the bulk inserts above don't barrier.
8296 self.heap
8297 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8298 self.stack[func_slot as usize] = Value::Table(t);
8299 self.set_r(base, inst.a(), Value::Table(t));
8300 }
8301 Op::VargIdx => {
8302 // R[A] := vararg[R[C]] without allocating: integer key in
8303 // [1,n] → that vararg, "n" → the count, else nil.
8304 let key = self.r(base, inst.c());
8305 let n = n_varargs;
8306 let v = match key {
8307 Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8308 self.stack[(func_slot + k as u32) as usize]
8309 }
8310 Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8311 self.stack[(func_slot + f as u32) as usize]
8312 }
8313 Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8314 _ => Value::Nil,
8315 };
8316 self.set_r(base, inst.a(), v);
8317 }
8318 Op::ErrNNil => {
8319 let v = self.r(base, inst.a());
8320 if !matches!(v, Value::Nil) {
8321 let bx = inst.bx();
8322 let name = if bx == 0 {
8323 "?".to_string()
8324 } else {
8325 match cl.proto.consts[(bx - 1) as usize] {
8326 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8327 _ => "?".to_string(),
8328 }
8329 };
8330 return Err(self.rt_err(&format!("global '{name}' already defined")));
8331 }
8332 }
8333 Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8334 }
8335 }
8336 }
8337
8338 #[inline(always)]
8339 fn pc_of_top(&self) -> u32 {
8340 self.top_frame().pc
8341 }
8342
8343 #[inline(always)]
8344 fn bump_pc(&mut self) {
8345 // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8346 // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8347 // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8348 // up over `fib_28`'s ~500k jumps.
8349 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8350 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8351 CallFrame::Lua(f) => f.pc += 1,
8352 _ => unreachable!("Cont frame at bump_pc"),
8353 }
8354 }
8355
8356 #[inline(always)]
8357 fn add_pc(&mut self, d: i32) {
8358 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8359 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8360 CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8361 _ => unreachable!("Cont frame at add_pc"),
8362 }
8363 }
8364
8365 /// PUC conditional-skip convention: the JMP that follows is executed when
8366 /// `cond == k`; otherwise it is skipped.
8367 #[inline(always)]
8368 fn cond_skip(&mut self, cond: bool, k: bool) {
8369 if cond != k {
8370 self.bump_pc();
8371 }
8372 }
8373
8374 // ---- indexing (with __index/__newindex chains) ----
8375
8376 /// The `#` length operation: string byte length, `__len` if present, else
8377 /// the raw table border. Returns the raw length value (may be non-integer
8378 /// when `__len` is exotic).
8379 pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8380 match self.len_step(v)? {
8381 MmOut::Done(n) => Ok(n),
8382 // PUC calls unary metamethods with the operand twice
8383 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8384 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8385 }
8386 }
8387
8388 /// Length fast path: a string's byte count or a table's raw border when no
8389 /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8390 /// called with the operand twice. Errors for a non-table with no `__len`.
8391 fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8392 match v {
8393 Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8394 Value::Table(t) => {
8395 let mm = self.get_mm(v, Mm::Len);
8396 if mm.is_nil() {
8397 Ok(MmOut::Done(Value::Int(t.len())))
8398 } else {
8399 Ok(MmOut::Mm { func: mm, recv: v })
8400 }
8401 }
8402 _ => {
8403 let mm = self.get_mm(v, Mm::Len);
8404 if mm.is_nil() {
8405 Err(self.type_err("get length of", v))
8406 } else {
8407 Ok(MmOut::Mm { func: mm, recv: v })
8408 }
8409 }
8410 }
8411 }
8412
8413 /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8414 /// value with no integer representation.
8415 pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8416 match self.len_value(v)? {
8417 Value::Int(i) => Ok(i),
8418 Value::Float(f) => crate::runtime::value::f2i_exact(f)
8419 .ok_or_else(|| self.rt_err("object length is not an integer")),
8420 _ => Err(self.rt_err("object length is not an integer")),
8421 }
8422 }
8423
8424 pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8425 match self.index_step(t, key)? {
8426 MmOut::Done(v) => Ok(v),
8427 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8428 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8429 }
8430 }
8431
8432 /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8433 /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8434 /// are followed inline (no yield possible); only a function link can yield.
8435 fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8436 let mut cur = t;
8437 for _ in 0..MAX_TAG_LOOP {
8438 let mm = match cur {
8439 Value::Table(tb) => {
8440 let v = tb.get(key);
8441 if !v.is_nil() {
8442 return Ok(MmOut::Done(v));
8443 }
8444 let mm = self.get_mm(cur, Mm::Index);
8445 if mm.is_nil() {
8446 return Ok(MmOut::Done(Value::Nil));
8447 }
8448 mm
8449 }
8450 v => {
8451 let mm = self.get_mm(v, Mm::Index);
8452 if mm.is_nil() {
8453 return Err(self.type_err("index", v));
8454 }
8455 mm
8456 }
8457 };
8458 match mm {
8459 Value::Closure(_) | Value::Native(_) => {
8460 return Ok(MmOut::Mm {
8461 func: mm,
8462 recv: cur,
8463 });
8464 }
8465 next => cur = next,
8466 }
8467 }
8468 Err(self.rt_err("'__index' chain too long; possible loop"))
8469 }
8470
8471 pub(crate) fn newindex_value(
8472 &mut self,
8473 t: Value,
8474 key: Value,
8475 v: Value,
8476 ) -> Result<(), LuaError> {
8477 match self.newindex_step(t, key, v)? {
8478 MmOut::Done(_) => Ok(()),
8479 MmOut::Mm { func, recv } => {
8480 self.call_value(func, &[recv, key, v])?;
8481 Ok(())
8482 }
8483 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8484 }
8485 }
8486
8487 /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8488 /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8489 /// needs an actual call — which the caller may run yieldably.
8490 fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8491 let mut cur = t;
8492 for _ in 0..MAX_TAG_LOOP {
8493 let mm = match cur {
8494 Value::Table(tb) => {
8495 // PI-A3 single-walk collapse — Table::try_set_existing
8496 // fuses the prior `tb.get(key).is_nil()` gate and
8497 // `raw_set` walk into one chain traversal when the
8498 // key is already present with a non-nil value. The
8499 // __newindex chain semantics are preserved by the
8500 // identity (slot_nil ⇔ fire_newindex); see
8501 // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8502 //
8503 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8504 // heap is single-threaded and the pointer is live as
8505 // long as it is reachable from active roots (see
8506 // heap.rs:5-7). Mirrors the raw_set wrapper below.
8507 if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8508 self.heap
8509 .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8510 return Ok(MmOut::Done(Value::Nil));
8511 }
8512 let mm = self.get_mm(cur, Mm::NewIndex);
8513 if mm.is_nil() {
8514 self.raw_set(tb, key, v)?;
8515 return Ok(MmOut::Done(Value::Nil));
8516 }
8517 mm
8518 }
8519 bad => {
8520 let mm = self.get_mm(bad, Mm::NewIndex);
8521 if mm.is_nil() {
8522 return Err(self.type_err("index", bad));
8523 }
8524 mm
8525 }
8526 };
8527 match mm {
8528 Value::Closure(_) | Value::Native(_) => {
8529 return Ok(MmOut::Mm {
8530 func: mm,
8531 recv: cur,
8532 });
8533 }
8534 next => cur = next,
8535 }
8536 }
8537 Err(self.rt_err("'__newindex' chain too long; possible loop"))
8538 }
8539
8540 fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8541 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8542 match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8543 Ok(()) => {
8544 self.heap
8545 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8546 Ok(())
8547 }
8548 Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8549 Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8550 Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8551 Err(TableError::InvalidNext) => unreachable!(),
8552 }
8553 }
8554
8555 /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8556 /// the boolean result; `Mm` (when raw equality fails and both are tables
8557 /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8558 fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8559 if l.raw_eq(r) {
8560 return MmOut::Done(Value::Bool(true));
8561 }
8562 if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8563 (l, r)
8564 {
8565 // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8566 // (and earlier) required the two operands' metatables to expose a
8567 // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8568 // metatable falls straight back to raw inequality. events.lua 5.1
8569 // :262 bakes this in.
8570 let mm = if self.version() <= LuaVersion::Lua51 {
8571 self.get_comp_mm(l, r, Mm::Eq)
8572 } else {
8573 let mut m = self.get_mm(l, Mm::Eq);
8574 if m.is_nil() {
8575 m = self.get_mm(r, Mm::Eq);
8576 }
8577 m
8578 };
8579 if !mm.is_nil() {
8580 return MmOut::Mm { func: mm, recv: l };
8581 }
8582 }
8583 MmOut::Done(Value::Bool(false))
8584 }
8585
8586 // ---- arithmetic ----
8587
8588 #[inline(always)]
8589 fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8590 let l = self.r(base, inst.b());
8591 let r = self.r(base, inst.c());
8592 // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8593 // binary_trees all hammer these. Skipping coerce_num + the big
8594 // arith_fast match shaves several conditional moves per op.
8595 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8596 let fast = match op {
8597 ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8598 ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8599 ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8600 _ => None,
8601 };
8602 if let Some(v) = fast {
8603 self.set_r(base, inst.a(), v);
8604 return Ok(());
8605 }
8606 }
8607 // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8608 // and any numeric workload with non-integer accumulators benefits.
8609 if let (Value::Float(a), Value::Float(b)) = (l, r) {
8610 let fast = match op {
8611 ArithOp::Add => Some(Value::Float(a + b)),
8612 ArithOp::Sub => Some(Value::Float(a - b)),
8613 ArithOp::Mul => Some(Value::Float(a * b)),
8614 ArithOp::Div => Some(Value::Float(a / b)),
8615 _ => None,
8616 };
8617 if let Some(v) = fast {
8618 self.set_r(base, inst.a(), v);
8619 return Ok(());
8620 }
8621 }
8622 match self.arith_fast(op, l, r)? {
8623 Some(v) => self.set_r(base, inst.a(), v),
8624 None => {
8625 let mm = self.arith_mm_func(op, l, r)?;
8626 let dst = base + inst.a();
8627 self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8628 }
8629 }
8630 Ok(())
8631 }
8632
8633 /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8634 /// directly, `Ok(None)` when a metamethod is required (the caller decides
8635 /// whether to call it synchronously or yieldably).
8636 fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8637 use ArithOp::*;
8638 match op {
8639 BAnd | BOr | BXor | Shl | Shr => {
8640 // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8641 match (coerce_num(l), coerce_num(r)) {
8642 (Some(a), Some(b)) => {
8643 let to_int = |n: Num| match n {
8644 Num::Int(i) => Some(i),
8645 Num::Float(f) => crate::runtime::value::f2i_exact(f),
8646 };
8647 let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8648 // PUC luaG_tointerror: name the offending operand
8649 return Err(self.no_int_rep_err());
8650 };
8651 let v = match op {
8652 BAnd => a & b,
8653 BOr => a | b,
8654 BXor => a ^ b,
8655 Shl => shift_left(a, b),
8656 Shr => shift_left(a, b.wrapping_neg()),
8657 _ => unreachable!(),
8658 };
8659 return Ok(Some(Value::Int(v)));
8660 }
8661 _ => return Ok(None),
8662 }
8663 }
8664 _ => {}
8665 }
8666 let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8667 (Some(a), Some(b)) => (a, b),
8668 _ => return Ok(None),
8669 };
8670 let v = match (op, ln, rn) {
8671 (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8672 (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8673 (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8674 (IDiv, Num::Int(a), Num::Int(b)) => {
8675 if b == 0 {
8676 return Err(self.rt_err("attempt to divide by zero"));
8677 }
8678 let mut q = a.wrapping_div(b);
8679 if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8680 q -= 1;
8681 }
8682 Value::Int(q)
8683 }
8684 (Mod, Num::Int(a), Num::Int(b)) => {
8685 if b == 0 {
8686 return Err(self.rt_err("attempt to perform 'n%0'"));
8687 }
8688 let mut m = a.wrapping_rem(b);
8689 if m != 0 && (m ^ b) < 0 {
8690 m += b;
8691 }
8692 Value::Int(m)
8693 }
8694 (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8695 (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8696 (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8697 (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8698 (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8699 (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8700 (Mod, a, b) => {
8701 let (x, y) = (a.as_f64(), b.as_f64());
8702 // PUC luai_nummod: correct fmod's sign without the `m*y`
8703 // product, which underflows to 0 for tiny denormals
8704 let mut m = x % y;
8705 if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8706 m += y;
8707 }
8708 Value::Float(m)
8709 }
8710 _ => unreachable!(),
8711 };
8712 Ok(Some(v))
8713 }
8714
8715 pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8716 match v {
8717 Value::Int(i) => Ok(i),
8718 Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8719 Some(i) => Ok(i),
8720 None => Err(self.rt_err("number has no integer representation")),
8721 },
8722 v => Err(self.type_err(what, v)),
8723 }
8724 }
8725
8726 fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8727 match n {
8728 Num::Int(i) => Ok(i),
8729 Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8730 Some(i) => Ok(i),
8731 None => Err(self.rt_err("number has no integer representation")),
8732 },
8733 }
8734 }
8735
8736 /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8737 /// PUC type error when neither operand provides one.
8738 fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8739 use ArithOp::*;
8740 let event = match op {
8741 Add => Mm::Add,
8742 Sub => Mm::Sub,
8743 Mul => Mm::Mul,
8744 Div => Mm::Div,
8745 Mod => Mm::Mod,
8746 Pow => Mm::Pow,
8747 IDiv => Mm::IDiv,
8748 BAnd => Mm::BAnd,
8749 BOr => Mm::BOr,
8750 BXor => Mm::BXor,
8751 Shl => Mm::Shl,
8752 Shr => Mm::Shr,
8753 };
8754 let mut mm = self.get_mm(l, event);
8755 if mm.is_nil() {
8756 mm = self.get_mm(r, event);
8757 }
8758 if mm.is_nil() {
8759 let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8760 "perform bitwise operation on"
8761 } else {
8762 "perform arithmetic on"
8763 };
8764 let bad = if coerce_num(l).is_none() { l } else { r };
8765 return Err(self.type_err(what, bad));
8766 }
8767 Ok(mm)
8768 }
8769
8770 // ---- comparison ----
8771
8772 pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8773 match self.less_step(l, r, or_eq)? {
8774 MmOut::Done(v) => Ok(v.truthy()),
8775 MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8776 MmOut::CompareSynth { func } => {
8777 // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8778 // by library code (sort comparator etc.) — no yield expected
8779 // here (a yield would have hit `call_noyield`'s C boundary).
8780 Ok(!self.call_mm1(func, &[r, l])?.truthy())
8781 }
8782 }
8783 }
8784
8785 /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8786 /// carries the boolean result; `Mm` (for non-number/string operands) carries
8787 /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8788 /// neither operand provides one.
8789 fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8790 let b = match (l, r) {
8791 (Value::Int(a), Value::Int(b)) => {
8792 if or_eq {
8793 a <= b
8794 } else {
8795 a < b
8796 }
8797 }
8798 (Value::Float(a), Value::Float(b)) => {
8799 if or_eq {
8800 a <= b
8801 } else {
8802 a < b
8803 }
8804 }
8805 (Value::Int(a), Value::Float(b)) => {
8806 if or_eq {
8807 int_le_float(a, b)
8808 } else {
8809 int_lt_float(a, b)
8810 }
8811 }
8812 (Value::Float(a), Value::Int(b)) => {
8813 if a.is_nan() {
8814 false
8815 } else if or_eq {
8816 !int_lt_float(b, a)
8817 } else {
8818 !int_le_float(b, a)
8819 }
8820 }
8821 (Value::Str(a), Value::Str(b)) => {
8822 let (a, b) = (a.as_bytes(), b.as_bytes());
8823 if or_eq { a <= b } else { a < b }
8824 }
8825 (l, r) => {
8826 let event = if or_eq { Mm::Le } else { Mm::Lt };
8827 // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8828 // too: both operands' metatables must expose the same
8829 // implementation for `__lt` / `__le` to fire. events.lua 5.1
8830 // :262 expects `c < d` (where `d` has no metatable) to error
8831 // with the default "attempt to compare two table values"
8832 // rather than running c's `__lt` blindly.
8833 let mm = if self.version() <= LuaVersion::Lua51 {
8834 self.get_comp_mm(l, r, event)
8835 } else {
8836 let mut m = self.get_mm(l, event);
8837 if m.is_nil() {
8838 m = self.get_mm(r, event);
8839 }
8840 m
8841 };
8842 // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8843 // operand carries `__le`. 5.4 dropped the synthesis (now
8844 // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8845 // on the synthesis — its metatable defines only `__lt`.
8846 // The fallback calls `__lt(r, l)` synchronously (the suite's
8847 // `__lt` doesn't yield) and negates the result; the yieldable
8848 // `__lt` path stays reserved for the explicit `<` operator.
8849 if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8850 let lt = Mm::Lt;
8851 let mut mm_lt = self.get_mm(l, lt);
8852 if mm_lt.is_nil() {
8853 mm_lt = self.get_mm(r, lt);
8854 }
8855 if !mm_lt.is_nil() {
8856 return Ok(MmOut::CompareSynth { func: mm_lt });
8857 }
8858 }
8859 if mm.is_nil() {
8860 // PUC luaG_ordererror: "two X values" when the operand
8861 // types match, "X with Y" otherwise (objtypename-aware).
8862 let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8863 return Err(self.rt_err(&if t1 == t2 {
8864 format!("attempt to compare two {t1} values")
8865 } else {
8866 format!("attempt to compare {t1} with {t2}")
8867 }));
8868 }
8869 return Ok(MmOut::Mm { func: mm, recv: l });
8870 }
8871 };
8872 Ok(MmOut::Done(Value::Bool(b)))
8873 }
8874
8875 // ---- numeric for ----
8876
8877 fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8878 let a = inst.a();
8879 let init = self.r(base, a);
8880 let limit = self.r(base, a + 1);
8881 let step = self.r(base, a + 2);
8882 let (Some(init_n), Some(limit_n), Some(step_n)) =
8883 (as_num(init), as_num(limit), as_num(step))
8884 else {
8885 // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8886 // PUC checks limit, then step, then initial value.
8887 let (what, bad) = if as_num(limit).is_none() {
8888 ("limit", limit)
8889 } else if as_num(step).is_none() {
8890 ("step", step)
8891 } else {
8892 ("initial value", init)
8893 };
8894 let tn = self.obj_typename(bad);
8895 return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8896 };
8897 // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8898 // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8899 // first test, so each successful iteration emits a backward `OP_FORLOOP`
8900 // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8901 // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8902 // distance in luna's encoding is `loop_pc - prep_pc`; firing
8903 // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8904 let pre53 = self.version() <= LuaVersion::Lua53;
8905 match (init_n, step_n) {
8906 (Num::Int(i0), Num::Int(st)) => {
8907 if st == 0 {
8908 return Err(self.rt_err("'for' step is zero"));
8909 }
8910 if pre53 {
8911 // PUC 5.3 `forlimit`: int limit passes through; float limit
8912 // gets clamped to MIN/MAX with a `stopnow` flag set only
8913 // when the clamp is unreachable (positive float with a
8914 // negative step → limit=MAX, stopnow; negative float with
8915 // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8916 // `init = 0` so OP_FORLOOP's first test against the
8917 // unreachable clamp fails cleanly. An ordinary in-range
8918 // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8919 // lets OP_FORLOOP's natural test reject the first step.
8920 let (lim, stopnow) = match limit_n {
8921 Num::Int(l) => (l, false),
8922 Num::Float(f) => {
8923 if f.is_nan() {
8924 (0, true)
8925 } else if f >= i64::MAX as f64 + 1.0 {
8926 // beyond +MAX: unreachable for a decreasing loop
8927 (i64::MAX, st < 0)
8928 } else if f <= i64::MIN as f64 {
8929 // beyond -MIN: unreachable for an increasing loop
8930 (i64::MIN, st >= 0)
8931 } else if st > 0 {
8932 (f.floor() as i64, false)
8933 } else {
8934 (f.ceil() as i64, false)
8935 }
8936 }
8937 };
8938 let initv = if stopnow { 0 } else { i0 };
8939 let pre = initv.wrapping_sub(st);
8940 self.set_r(base, a, Value::Int(pre));
8941 self.set_r(base, a + 1, Value::Int(lim));
8942 self.set_r(base, a + 2, Value::Int(st));
8943 self.add_pc(inst.bx() as i32 - 1);
8944 return Ok(());
8945 }
8946 let (lim, empty) = int_for_limit(limit_n, i0, st);
8947 if empty {
8948 self.add_pc(inst.bx() as i32);
8949 return Ok(());
8950 }
8951 let count = if st > 0 {
8952 (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8953 } else {
8954 (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8955 };
8956 self.set_r(base, a, Value::Int(i0));
8957 self.set_r(base, a + 1, Value::Int(count as i64));
8958 self.set_r(base, a + 2, Value::Int(st));
8959 self.set_r(base, a + 3, Value::Int(i0));
8960 }
8961 _ => {
8962 let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8963 if st == 0.0 {
8964 return Err(self.rt_err("'for' step is zero"));
8965 }
8966 if pre53 {
8967 let pre = x0 - st;
8968 self.set_r(base, a, Value::Float(pre));
8969 self.set_r(base, a + 1, Value::Float(lim));
8970 self.set_r(base, a + 2, Value::Float(st));
8971 self.add_pc(inst.bx() as i32 - 1);
8972 return Ok(());
8973 }
8974 let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8975 if !runs {
8976 self.add_pc(inst.bx() as i32);
8977 return Ok(());
8978 }
8979 self.set_r(base, a, Value::Float(x0));
8980 self.set_r(base, a + 1, Value::Float(lim));
8981 self.set_r(base, a + 2, Value::Float(st));
8982 self.set_r(base, a + 3, Value::Float(x0));
8983 }
8984 }
8985 Ok(())
8986 }
8987
8988 #[inline(always)]
8989 fn for_loop(&mut self, inst: Inst, base: u32) {
8990 let a = inst.a();
8991 // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
8992 // directly (R[a+1] holds the limit, *not* a remaining-count) so the
8993 // first iteration's test fires through the same backward-jump path as
8994 // every later iteration. 5.4+ switched to the count-based form luna
8995 // already uses for `Int`; the float branch was already PUC-3.x-style.
8996 let pre53 = self.version() <= LuaVersion::Lua53;
8997 match self.r(base, a) {
8998 Value::Int(cur) if pre53 => {
8999 let Value::Int(lim) = self.r(base, a + 1) else {
9000 unreachable!()
9001 };
9002 let Value::Int(st) = self.r(base, a + 2) else {
9003 unreachable!()
9004 };
9005 let next = cur.wrapping_add(st);
9006 let cont = if st > 0 { next <= lim } else { next >= lim };
9007 if cont {
9008 self.set_r(base, a, Value::Int(next));
9009 self.set_r(base, a + 3, Value::Int(next));
9010 self.add_pc(-(inst.bx() as i32));
9011 }
9012 }
9013 Value::Int(cur) => {
9014 let Value::Int(count) = self.r(base, a + 1) else {
9015 unreachable!()
9016 };
9017 if count > 0 {
9018 let Value::Int(st) = self.r(base, a + 2) else {
9019 unreachable!()
9020 };
9021 let next = cur.wrapping_add(st);
9022 self.set_r(base, a, Value::Int(next));
9023 self.set_r(base, a + 1, Value::Int(count - 1));
9024 self.set_r(base, a + 3, Value::Int(next));
9025 self.add_pc(-(inst.bx() as i32));
9026 }
9027 }
9028 Value::Float(cur) => {
9029 let Value::Float(lim) = self.r(base, a + 1) else {
9030 unreachable!()
9031 };
9032 let Value::Float(st) = self.r(base, a + 2) else {
9033 unreachable!()
9034 };
9035 let next = cur + st;
9036 let cont = if st > 0.0 { next <= lim } else { next >= lim };
9037 if cont {
9038 self.set_r(base, a, Value::Float(next));
9039 self.set_r(base, a + 3, Value::Float(next));
9040 self.add_pc(-(inst.bx() as i32));
9041 }
9042 }
9043 _ => unreachable!("corrupt for-loop state"),
9044 }
9045 }
9046
9047 // ---- native helpers (used by builtins) ----
9048
9049 /// A native function's own captured upvalue (self lives at func_slot).
9050 ///
9051 /// Public so `native_typed` trampolines and embedders authoring
9052 /// stateful natives via `native_with(...)` can read their upvals.
9053 pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9054 let Value::Native(nc) = self.stack[func_slot as usize] else {
9055 unreachable!("native frame without native closure");
9056 };
9057 nc.upvals[i]
9058 }
9059
9060 /// Number of upvalues captured by the native at `func_slot` (variadic
9061 /// captures such as the `io.lines` format list).
9062 pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9063 let Value::Native(nc) = self.stack[func_slot as usize] else {
9064 unreachable!("native frame without native closure");
9065 };
9066 nc.upvals.len()
9067 }
9068
9069 /// Write a native function's own upvalue (stateful iterators).
9070 pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9071 let Value::Native(nc) = self.stack[func_slot as usize] else {
9072 unreachable!("native frame without native closure");
9073 };
9074 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9075 unsafe { nc.as_mut() }.upvals[i] = v;
9076 // NativeClosure.upvals is traced as part of its Trace; a long-lived
9077 // stateful iterator closure (e.g. string.gmatch) sees many writes —
9078 // barrier_back once-and-done is cheaper than per-child forward.
9079 self.heap
9080 .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9081 }
9082
9083 /// Read the i-th positional argument inside a `NativeFn` body
9084 /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9085 /// matching PUC's "missing arg is nil" contract. Public so embedders
9086 /// can author their own natives.
9087 pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9088 if i < nargs {
9089 self.stack[(func_slot + 1 + i) as usize]
9090 } else {
9091 Value::Nil
9092 }
9093 }
9094
9095 /// Push the return values of a `NativeFn` and return their count
9096 /// (analogous to pushing N values then `return N` from a C function).
9097 /// Public so embedders can author their own natives.
9098 pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9099 let need = func_slot as usize + vals.len();
9100 if self.stack.len() < need {
9101 self.stack.resize(need, Value::Nil);
9102 }
9103 for (i, &v) in vals.iter().enumerate() {
9104 self.stack[func_slot as usize + i] = v;
9105 }
9106 vals.len() as u32
9107 }
9108
9109 /// Fast string concatenation of an adjacent pair, or `None` when a
9110 /// `__concat` metamethod is required.
9111 fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9112 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9113 // Length-check fast paths for both string operands BEFORE the
9114 // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9115 // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9116 // overflow on the first pair that would exceed `INT_MAX` instead
9117 // of allocating multi-GB intermediates first.
9118 let max_str = i32::MAX as usize;
9119 if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9120 let a_len = ls.as_bytes().len();
9121 let b_len = rs.as_bytes().len();
9122 let new_len = a_len.checked_add(b_len);
9123 if new_len.is_none() || new_len.unwrap() > max_str {
9124 return Err(self.rt_err("string length overflow"));
9125 }
9126 }
9127 match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9128 (Some(a), Some(b)) => {
9129 // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9130 // concat past it raises "string length overflow"
9131 // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9132 // exactly this wall).
9133 let new_len = a.len().checked_add(b.len());
9134 if new_len.is_none() || new_len.unwrap() > max_str {
9135 return Err(self.rt_err("string length overflow"));
9136 }
9137 let mut combined = a;
9138 combined.extend_from_slice(&b);
9139 Ok(Some(Value::Str(self.heap.intern(&combined))))
9140 }
9141 _ => Ok(None),
9142 }
9143 }
9144
9145 /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9146 /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9147 /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9148 /// call — its `Meta` continuation re-enters here on the metamethod's return.
9149 fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9150 // Sum the lengths of all all-Str operands BEFORE starting the
9151 // right-associative fold so a 129-operand `a..a..…` chain
9152 // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9153 // not after dozens of multi-GB intermediate intern+hash rounds.
9154 // A non-Str operand falls through to the per-pair check.
9155 let max_str = i32::MAX as usize;
9156 let mut total: usize = 0;
9157 let mut all_str = true;
9158 for slot in base_a..self.top {
9159 match self.stack[slot as usize] {
9160 Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9161 Some(t) if t <= max_str => total = t,
9162 _ => return Err(self.rt_err("string length overflow")),
9163 },
9164 _ => {
9165 all_str = false;
9166 break;
9167 }
9168 }
9169 }
9170 let _ = all_str; // discrimination already captured by early returns above
9171 while self.top.saturating_sub(base_a) >= 2 {
9172 let i = self.top - 1; // rightmost operand
9173 let x = self.stack[(i - 1) as usize];
9174 let y = self.stack[i as usize];
9175 match self.concat_pair(x, y)? {
9176 Some(s) => {
9177 self.stack[(i - 1) as usize] = s;
9178 self.top = i; // consumed y
9179 }
9180 None => {
9181 let mut mm = self.get_mm(x, Mm::Concat);
9182 if mm.is_nil() {
9183 mm = self.get_mm(y, Mm::Concat);
9184 }
9185 if mm.is_nil() {
9186 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9187 let bad = if concat_piece(x, legacy).is_none() {
9188 x
9189 } else {
9190 y
9191 };
9192 return Err(self.type_err("concatenate", bad));
9193 }
9194 // result lands at i-1, dropping y (top→i); resume continues.
9195 let dst = i - 1;
9196 self.begin_meta_call(
9197 mm,
9198 &[x, y],
9199 MetaAction::Concat { dst, base_a },
9200 "concat",
9201 )?;
9202 return Ok(());
9203 }
9204 }
9205 }
9206 self.maybe_collect_garbage(base_a + 1);
9207 Ok(())
9208 }
9209
9210 /// tostring with __tostring / __name support.
9211 pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9212 let mm = self.get_mm(v, Mm::ToString);
9213 if !mm.is_nil() {
9214 return match self.call_mm1(mm, &[v])? {
9215 Value::Str(s) => Ok(s.as_bytes().to_vec()),
9216 _ => Err(self.rt_err("'__tostring' must return a string")),
9217 };
9218 }
9219 if let Value::Table(t) = v
9220 && let Value::Str(name) = self.get_mm(v, Mm::Name)
9221 {
9222 let mut out = name.as_bytes().to_vec();
9223 out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9224 return Ok(out);
9225 }
9226 Ok(self.tostring_basic(v))
9227 }
9228
9229 /// Basic tostring (no metamethods).
9230 pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9231 match v {
9232 Value::Nil => b"nil".to_vec(),
9233 Value::Bool(true) => b"true".to_vec(),
9234 Value::Bool(false) => b"false".to_vec(),
9235 Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9236 // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9237 // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9238 // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9239 // concatenating these renderings.
9240 Value::Float(f) => {
9241 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9242 numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9243 }
9244 Value::Str(s) => s.as_bytes().to_vec(),
9245 Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9246 Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9247 Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9248 Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9249 // PUC names file handles `file (0x…)`; a bare userdata is
9250 // `userdata: 0x…`. The io library overrides this via __tostring.
9251 Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9252 // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9253 // (the "light" qualifier only appears in `luaL_typeerror`).
9254 Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9255 }
9256 }
9257}
9258
9259#[derive(Clone, Copy, PartialEq, Eq)]
9260enum ArithOp {
9261 Add,
9262 Sub,
9263 Mul,
9264 Mod,
9265 Pow,
9266 Div,
9267 IDiv,
9268 BAnd,
9269 BOr,
9270 BXor,
9271 Shl,
9272 Shr,
9273}
9274
9275impl ArithOp {
9276 /// PUC metamethod event name (`__add` → "add" etc.) used by
9277 /// `debug.getinfo(level, "n")` inside a metamethod handler.
9278 fn mm_name(self) -> &'static str {
9279 match self {
9280 ArithOp::Add => "add",
9281 ArithOp::Sub => "sub",
9282 ArithOp::Mul => "mul",
9283 ArithOp::Mod => "mod",
9284 ArithOp::Pow => "pow",
9285 ArithOp::Div => "div",
9286 ArithOp::IDiv => "idiv",
9287 ArithOp::BAnd => "band",
9288 ArithOp::BOr => "bor",
9289 ArithOp::BXor => "bxor",
9290 ArithOp::Shl => "shl",
9291 ArithOp::Shr => "shr",
9292 }
9293 }
9294}
9295
9296fn as_num(v: Value) -> Option<Num> {
9297 match v {
9298 Value::Int(i) => Some(Num::Int(i)),
9299 Value::Float(f) => Some(Num::Float(f)),
9300 // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9301 Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9302 _ => None,
9303 }
9304}
9305
9306/// A concatenable operand's byte form (string, or a number coerced to its
9307/// string), or `None` when only a `__concat` metamethod can handle it.
9308/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9309/// suffix on integer-valued floats) — see `num_to_string_for`.
9310fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9311 match v {
9312 Value::Str(s) => Some(s.as_bytes().to_vec()),
9313 Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9314 Value::Float(x) => {
9315 Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9316 }
9317 _ => None,
9318 }
9319}
9320
9321/// Index into the per-basic-type metatable table for a non-table value
9322/// (None for tables, which carry their own metatable).
9323fn type_mt_slot(v: Value) -> Option<usize> {
9324 match v {
9325 Value::Nil => Some(0),
9326 Value::Bool(_) => Some(1),
9327 Value::Int(_) | Value::Float(_) => Some(2),
9328 Value::Str(_) => Some(3),
9329 Value::Closure(_) | Value::Native(_) => Some(4),
9330 // tables and full userdata carry their own metatable; threads and
9331 // light userdata have none (PUC keeps a shared per-type mt slot for
9332 // light, but luna doesn't expose it — no test gates on it yet).
9333 Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9334 }
9335}
9336
9337/// Number, or string coerced to number (5.5 default string-arith coercion).
9338fn coerce_num(v: Value) -> Option<Num> {
9339 match v {
9340 Value::Int(i) => Some(Num::Int(i)),
9341 Value::Float(f) => Some(Num::Float(f)),
9342 Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9343 _ => None,
9344 }
9345}
9346
9347/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9348/// reverse direction.
9349fn shift_left(a: i64, b: i64) -> i64 {
9350 if b < 0 {
9351 if b <= -64 {
9352 0
9353 } else {
9354 ((a as u64) >> (-b as u32)) as i64
9355 }
9356 } else if b >= 64 {
9357 0
9358 } else {
9359 ((a as u64) << (b as u32)) as i64
9360 }
9361}
9362
9363/// i < f, exactly (PUC LTintfloat shape).
9364fn int_lt_float(i: i64, f: f64) -> bool {
9365 if f.is_nan() {
9366 return false;
9367 }
9368 if f >= 9_223_372_036_854_775_808.0 {
9369 return true;
9370 }
9371 if f < -9_223_372_036_854_775_808.0 {
9372 return false;
9373 }
9374 let ff = f.floor();
9375 let fi = ff as i64;
9376 if f == ff { i < fi } else { i <= fi }
9377}
9378
9379/// i <= f, exactly.
9380fn int_le_float(i: i64, f: f64) -> bool {
9381 if f.is_nan() {
9382 return false;
9383 }
9384 if f >= 9_223_372_036_854_775_808.0 {
9385 return true;
9386 }
9387 if f < -9_223_372_036_854_775_808.0 {
9388 return false;
9389 }
9390 i <= f.floor() as i64
9391}
9392
9393/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9394/// (clipped limit, loop-is-empty).
9395fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9396 match limit {
9397 Num::Int(l) => {
9398 let empty = if step > 0 { init > l } else { init < l };
9399 (l, empty)
9400 }
9401 Num::Float(f) => {
9402 if f.is_nan() {
9403 return (0, true);
9404 }
9405 if step > 0 {
9406 if f >= 9_223_372_036_854_775_808.0 {
9407 (i64::MAX, false)
9408 } else {
9409 let l = f.floor();
9410 if l < -9_223_372_036_854_775_808.0 {
9411 (i64::MIN, true)
9412 } else {
9413 let li = l as i64;
9414 (li, init > li)
9415 }
9416 }
9417 } else if f <= -9_223_372_036_854_775_808.0 {
9418 (i64::MIN, false)
9419 } else {
9420 let l = f.ceil();
9421 if l >= 9_223_372_036_854_775_808.0 {
9422 // PUC forlimit: a positive limit beyond the integer range
9423 // is unreachable for a decreasing loop — empty.
9424 (i64::MAX, true)
9425 } else {
9426 let li = l as i64;
9427 (li, init < li)
9428 }
9429 }
9430 }
9431 }
9432}
9433
9434/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9435/// `@file` / `=name` markers in `source`).
9436fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9437 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9438 let b = unsafe { crate::runtime::string::bytes_of(p) };
9439 match b.first() {
9440 Some(b'@') | Some(b'=') => &b[1..],
9441 _ => b,
9442 }
9443}
9444
9445impl Vm {
9446 /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9447 /// that called the current native. Returns (closure, current line,
9448 /// extra vararg count).
9449 /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9450 /// function running at `level`, recovered from the caller's call
9451 /// instruction (PUC funcnamefromcode). None for the main chunk or a
9452 /// tail/anonymous call with no recoverable name.
9453 /// A debug-level position: either a real Lua frame (by index) or a synthetic
9454 /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9455 /// coroutine body), which `debug.getinfo` and traceback report as "C".
9456 /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9457 /// frame at `level`'s current pc, as (name, value). Locals are visited in
9458 /// registration order (start pc, then register) to match luaF_getlocalname.
9459 pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9460 if n == 0 {
9461 return None;
9462 }
9463 let fi = match self.dbg_frame(level)? {
9464 DbgKind::Lua(fi) => fi,
9465 // Tail-call placeholder has no real frame backing it — no locals
9466 // exist to read or write here. PUC `findlocal` returns NULL on
9467 // a CIST_TAIL activation.
9468 DbgKind::Tail(_) => return None,
9469 // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9470 // for slot `n` inside the argument window (db.lua :408-:413, and
9471 // the call/return hook reads of math.sin / select args via
9472 // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9473 // meaningful for a C frame here.
9474 DbgKind::C(fi) => {
9475 if n < 1 {
9476 return None;
9477 }
9478 let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9479 if (n as u32) > nargs {
9480 return None;
9481 }
9482 let slot = (func_slot + n as u32) as usize;
9483 let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9484 return Some((self.temporary_locvar_name().to_string(), val));
9485 }
9486 };
9487 let f = self.frames[fi].lua()?;
9488 // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9489 // is the first extra arg passed to the function (`...[1]`), `-2` the
9490 // second, etc. The 5.5 stack layout parks varargs in
9491 // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9492 if n < 0 {
9493 let i = (-n) as u32;
9494 if i == 0 || i > f.n_varargs {
9495 return None;
9496 }
9497 let val = self
9498 .stack
9499 .get((f.func_slot + i) as usize)
9500 .copied()
9501 .unwrap_or(Value::Nil);
9502 return Some((self.vararg_locvar_name().to_string(), val));
9503 }
9504 let proto = f.closure.proto;
9505 // PUC's parser injects a hidden `(vararg table)` locvar for an
9506 // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9507 // right after the fixed parameters (`numparams + 1`). Main chunks
9508 // and `(...t)` named-vararg funcs do NOT get one — gate on the
9509 // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9510 // their declared registers (no shadow slot allocated), so we expose
9511 // that hidden index purely in this debug view.
9512 let num_params = proto.num_params as i64;
9513 let vararg_slot = if proto.has_vararg_table_pseudo {
9514 Some(num_params + 1)
9515 } else {
9516 None
9517 };
9518 if vararg_slot == Some(n) {
9519 return Some(("(vararg table)".to_string(), Value::Nil));
9520 }
9521 let pc = (f.pc as usize).saturating_sub(1);
9522 let mut active: Vec<&crate::runtime::LocVar> = proto
9523 .locvars
9524 .iter()
9525 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9526 .collect();
9527 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9528 let mut idx: i64 = n - 1;
9529 if let Some(vs) = vararg_slot
9530 && n > vs
9531 {
9532 idx -= 1;
9533 }
9534 let idx = idx as usize;
9535 if let Some(lv) = active.get(idx) {
9536 let val = self
9537 .stack
9538 .get((f.base + lv.reg) as usize)
9539 .copied()
9540 .unwrap_or(Value::Nil);
9541 return Some((lv.name.to_string(), val));
9542 }
9543 // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9544 // still inside the frame's live register window — report a
9545 // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9546 // the next frame's func slot (`ci->next->func.p`) so the
9547 // temporary window stops where the callee's frame begins
9548 // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9549 // an out-of-range slot).
9550 let limit = self
9551 .frames
9552 .get(fi + 1)
9553 .and_then(|cf| cf.lua())
9554 .map(|nf| nf.func_slot)
9555 .unwrap_or_else(|| self.top.max(f.base));
9556 let temp_reg = idx as u32;
9557 if f.base + temp_reg < limit {
9558 let val = self
9559 .stack
9560 .get((f.base + temp_reg) as usize)
9561 .copied()
9562 .unwrap_or(Value::Nil);
9563 return Some((self.lua_temporary_locvar_name().to_string(), val));
9564 }
9565 None
9566 }
9567
9568 /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9569 /// the local / vararg name on success, `None` when the slot does not
9570 /// resolve. Mirrors `local_at`'s indexing exactly.
9571 pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9572 if n == 0 {
9573 return None;
9574 }
9575 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9576 return None;
9577 };
9578 let f = self.frames[fi].lua()?;
9579 if n < 0 {
9580 let i = (-n) as u32;
9581 if i == 0 || i > f.n_varargs {
9582 return None;
9583 }
9584 let slot = (f.func_slot + i) as usize;
9585 if let Some(s) = self.stack.get_mut(slot) {
9586 *s = v;
9587 }
9588 return Some(self.vararg_locvar_name().to_string());
9589 }
9590 let proto = f.closure.proto;
9591 let num_params = proto.num_params as i64;
9592 let vararg_slot = if proto.has_vararg_table_pseudo {
9593 Some(num_params + 1)
9594 } else {
9595 None
9596 };
9597 if vararg_slot == Some(n) {
9598 // hidden (vararg table) slot has no real storage — accept the
9599 // write as a no-op for PUC parity (db.lua doesn't write to it).
9600 return Some("(vararg table)".to_string());
9601 }
9602 let pc = (f.pc as usize).saturating_sub(1);
9603 let mut active: Vec<&crate::runtime::LocVar> = proto
9604 .locvars
9605 .iter()
9606 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9607 .collect();
9608 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9609 let mut idx: i64 = n - 1;
9610 if let Some(vs) = vararg_slot
9611 && n > vs
9612 {
9613 idx -= 1;
9614 }
9615 let idx = idx as usize;
9616 let (name, reg) = if let Some(lv) = active.get(idx) {
9617 (lv.name.to_string(), lv.reg)
9618 } else {
9619 // PUC `luaG_findlocal` fallback into the temporary window —
9620 // bounded by the next frame's func slot (see local_at).
9621 let limit = self
9622 .frames
9623 .get(fi + 1)
9624 .and_then(|cf| cf.lua())
9625 .map(|nf| nf.func_slot)
9626 .unwrap_or_else(|| self.top.max(f.base));
9627 let temp_reg = idx as u32;
9628 if f.base + temp_reg >= limit {
9629 return None;
9630 }
9631 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9632 };
9633 let slot = (f.base + reg) as usize;
9634 if let Some(s) = self.stack.get_mut(slot) {
9635 *s = v;
9636 }
9637 Some(name)
9638 }
9639
9640 /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9641 /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9642 /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9643 /// vararg indexing past `n_varargs`, or for a register past the live
9644 /// window. Naming follows the same priority as `local_at`: named locals,
9645 /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9646 /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9647 /// window.
9648 pub(crate) fn local_at_coro(
9649 &self,
9650 co: Gc<crate::runtime::Coro>,
9651 level: i64,
9652 n: i64,
9653 ) -> Option<(String, Value)> {
9654 if level < 1 || n == 0 {
9655 return None;
9656 }
9657 let frames = &co.frames;
9658 // Logical level: iterate Lua frames from the top.
9659 let lua_indices: Vec<usize> = (0..frames.len())
9660 .rev()
9661 .filter(|&i| frames[i].lua().is_some())
9662 .collect();
9663 let fi = *lua_indices.get((level - 1) as usize)?;
9664 let f = frames[fi].lua()?;
9665 if n < 0 {
9666 let i = (-n) as u32;
9667 if i == 0 || i > f.n_varargs {
9668 return None;
9669 }
9670 let val = co
9671 .stack
9672 .get((f.func_slot + i) as usize)
9673 .copied()
9674 .unwrap_or(Value::Nil);
9675 return Some((self.vararg_locvar_name().to_string(), val));
9676 }
9677 let proto = f.closure.proto;
9678 let num_params = proto.num_params as i64;
9679 let vararg_slot = if proto.has_vararg_table_pseudo {
9680 Some(num_params + 1)
9681 } else {
9682 None
9683 };
9684 if vararg_slot == Some(n) {
9685 return Some(("(vararg table)".to_string(), Value::Nil));
9686 }
9687 let pc = (f.pc as usize).saturating_sub(1);
9688 let mut active: Vec<&crate::runtime::LocVar> = proto
9689 .locvars
9690 .iter()
9691 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9692 .collect();
9693 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9694 let mut idx: i64 = n - 1;
9695 if let Some(vs) = vararg_slot
9696 && n > vs
9697 {
9698 idx -= 1;
9699 }
9700 let idx = idx as usize;
9701 if let Some(lv) = active.get(idx) {
9702 let val = co
9703 .stack
9704 .get((f.base + lv.reg) as usize)
9705 .copied()
9706 .unwrap_or(Value::Nil);
9707 return Some((lv.name.to_string(), val));
9708 }
9709 let limit = frames
9710 .get(fi + 1)
9711 .and_then(|cf| cf.lua())
9712 .map(|nf| nf.func_slot)
9713 .unwrap_or(co.top.max(f.base));
9714 let temp_reg = idx as u32;
9715 if f.base + temp_reg < limit {
9716 let val = co
9717 .stack
9718 .get((f.base + temp_reg) as usize)
9719 .copied()
9720 .unwrap_or(Value::Nil);
9721 return Some((self.lua_temporary_locvar_name().to_string(), val));
9722 }
9723 None
9724 }
9725
9726 /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9727 /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9728 pub(crate) fn local_set_coro(
9729 &mut self,
9730 co: Gc<crate::runtime::Coro>,
9731 level: i64,
9732 n: i64,
9733 v: Value,
9734 ) -> Option<String> {
9735 if level < 1 || n == 0 {
9736 return None;
9737 }
9738 let lua_indices: Vec<usize> = (0..co.frames.len())
9739 .rev()
9740 .filter(|&i| co.frames[i].lua().is_some())
9741 .collect();
9742 let fi = *lua_indices.get((level - 1) as usize)?;
9743 let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9744 let f = co.frames[fi].lua()?;
9745 (
9746 f.func_slot,
9747 f.n_varargs,
9748 f.base,
9749 f.closure.proto,
9750 co.top.max(f.base),
9751 co.frames
9752 .get(fi + 1)
9753 .and_then(|cf| cf.lua())
9754 .map(|nf| nf.func_slot),
9755 )
9756 };
9757 if n < 0 {
9758 let i = (-n) as u32;
9759 if i == 0 || i > n_varargs {
9760 return None;
9761 }
9762 let slot = (func_slot + i) as usize;
9763 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9764 let stack = unsafe { &mut co.as_mut().stack };
9765 if let Some(s) = stack.get_mut(slot) {
9766 *s = v;
9767 }
9768 // co.stack values are traced — once-per-call barrier so propagate
9769 // sees the new value if co was already BLACK this cycle.
9770 self.heap
9771 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9772 return Some(self.vararg_locvar_name().to_string());
9773 }
9774 let num_params = proto.num_params as i64;
9775 let vararg_slot = if proto.has_vararg_table_pseudo {
9776 Some(num_params + 1)
9777 } else {
9778 None
9779 };
9780 if vararg_slot == Some(n) {
9781 return Some("(vararg table)".to_string());
9782 }
9783 let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9784 let mut active: Vec<&crate::runtime::LocVar> = proto
9785 .locvars
9786 .iter()
9787 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9788 .collect();
9789 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9790 let mut idx: i64 = n - 1;
9791 if let Some(vs) = vararg_slot
9792 && n > vs
9793 {
9794 idx -= 1;
9795 }
9796 let idx = idx as usize;
9797 let (name, reg) = if let Some(lv) = active.get(idx) {
9798 (lv.name.to_string(), lv.reg)
9799 } else {
9800 let limit = next_func_slot.unwrap_or(top_for_temp);
9801 let temp_reg = idx as u32;
9802 if base + temp_reg >= limit {
9803 return None;
9804 }
9805 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9806 };
9807 let slot = (base + reg) as usize;
9808 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9809 let stack = unsafe { &mut co.as_mut().stack };
9810 if let Some(s) = stack.get_mut(slot) {
9811 *s = v;
9812 }
9813 // co.stack values are traced — once-per-call barrier so propagate
9814 // sees the new value if co was already BLACK this cycle.
9815 self.heap
9816 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9817 Some(name)
9818 }
9819
9820 /// Frame info for a level on a suspended coroutine (PUC
9821 /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9822 /// Returns the closure + currentline + extraargs + istailcall for the
9823 /// level-th Lua activation in `co.frames`. None if level overshoots.
9824 pub(crate) fn coro_frame_info(
9825 &self,
9826 co: Gc<crate::runtime::Coro>,
9827 level: i64,
9828 ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9829 if level < 1 {
9830 return None;
9831 }
9832 let lua_indices: Vec<usize> = (0..co.frames.len())
9833 .rev()
9834 .filter(|&i| co.frames[i].lua().is_some())
9835 .collect();
9836 let fi = *lua_indices.get((level - 1) as usize)?;
9837 let f = co.frames[fi].lua()?;
9838 let proto = f.closure.proto;
9839 let pc = (f.pc as usize)
9840 .saturating_sub(1)
9841 .min(proto.lines.len().saturating_sub(1));
9842 let line = proto.lines.get(pc).copied().unwrap_or(0);
9843 Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9844 }
9845
9846 /// Whether `level` resolves to any live activation (PUC lua_getstack).
9847 pub(crate) fn level_in_range(&self, level: i64) -> bool {
9848 self.dbg_frame(level).is_some()
9849 }
9850
9851 /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9852 /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9853 /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9854 /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9855 pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9856 if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9857 "(*vararg)"
9858 } else {
9859 "(vararg)"
9860 }
9861 }
9862
9863 /// PUC's debug-API placeholder for an unnamed temporary on a C
9864 /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9865 /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9866 /// their spelling.
9867 pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9868 if matches!(
9869 self.version,
9870 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9871 ) {
9872 // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9873 // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9874 // to `(C temporary)`.
9875 "(*temporary)"
9876 } else {
9877 "(C temporary)"
9878 }
9879 }
9880
9881 /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9882 /// (an arithmetic intermediate sitting past the last named local on a
9883 /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9884 /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9885 /// spelling.
9886 pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9887 if matches!(
9888 self.version,
9889 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9890 ) {
9891 "(*temporary)"
9892 } else {
9893 "(temporary)"
9894 }
9895 }
9896
9897 /// The Lua closure running at `level` on the current thread, or `None`
9898 /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9899 /// need this to reach the function whose env they read or rewrite.
9900 pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9901 // `DbgKind::Tail` also falls into the else branch — a tail-call
9902 // placeholder has no closure of its own, so PUC's `lua_getstack` +
9903 // `getfunc` for that level returns no function, and `getfenv(level)`
9904 // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9905 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9906 return None;
9907 };
9908 Some(self.frames[fi].lua()?.closure)
9909 }
9910
9911 pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9912 if level < 1 {
9913 return false;
9914 }
9915 let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9916 (level as usize) <= count
9917 }
9918
9919 pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9920 if level < 1 {
9921 return None;
9922 }
9923 // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9924 // activation counts as a level, and each Lua activation's
9925 // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9926 // dropped the synthetic shape: `istailcall` becomes a flag on the
9927 // real frame and Cont activations no longer count separately.
9928 // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9929 // `getinfo(2).func == g1` pins the 5.2+ shape.
9930 let v51 = self.version <= LuaVersion::Lua51;
9931 let mut lvl = level;
9932 for fi in (0..self.frames.len()).rev() {
9933 match &self.frames[fi] {
9934 CallFrame::Lua(f) => {
9935 lvl -= 1;
9936 if lvl == 0 {
9937 return Some(DbgKind::Lua(fi));
9938 }
9939 if v51 {
9940 // 5.1 reports one synthetic CIST_TAIL level per
9941 // collapsed tail call (PUC `lua_getstack` subtracts
9942 // `ci->u.l.tailcalls` from the remaining level).
9943 for _ in 0..f.tailcalls {
9944 lvl -= 1;
9945 if lvl == 0 {
9946 return Some(DbgKind::Tail(fi));
9947 }
9948 }
9949 }
9950 if f.from_c {
9951 lvl -= 1;
9952 if lvl == 0 {
9953 return Some(DbgKind::C(fi));
9954 }
9955 }
9956 }
9957 CallFrame::Cont(_) => {
9958 if !v51 {
9959 continue;
9960 }
9961 lvl -= 1;
9962 if lvl == 0 {
9963 let parent = (0..fi)
9964 .rev()
9965 .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9966 return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9967 }
9968 }
9969 }
9970 }
9971 None
9972 }
9973
9974 pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9975 let f = self.frames[fi].lua()?;
9976 // metamethod handler frames carry the event tag (e.g. "close" for
9977 // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9978 if f.is_hook {
9979 return Some(("hook", "?".to_string()));
9980 }
9981 if let Some(tm) = f.tm {
9982 return Some(("metamethod", tm_debug_name(self.version, tm)));
9983 }
9984 // a frame entered across a C boundary has no naming call instruction
9985 if fi == 0 || f.from_c {
9986 return None;
9987 }
9988 // the caller's call instruction names this frame; a continuation frame
9989 // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
9990 // already short-circuits those.
9991 let caller = self.frames[fi - 1].lua()?;
9992 let caller_proto = caller.closure.proto;
9993 let p: &crate::runtime::Proto = &caller_proto;
9994 let call_pc = (caller.pc as usize).checked_sub(1)?;
9995 let instr = *p.code.get(call_pc)?;
9996 match instr.op() {
9997 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9998 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9999 _ => None,
10000 }
10001 }
10002
10003 /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
10004 /// (PUC names a C function from the call instruction that invoked it). The
10005 /// native was called by the nearest Lua frame below `fi` (skipping pcall/
10006 /// xpcall continuations); that frame's call instruction names it.
10007 pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
10008 // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
10009 // the synthetic C edge between the __gc finalizer (top Lua frame, has
10010 // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
10011 // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
10012 // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
10013 // sets the tag on the handler frame only, so level 2 there still
10014 // names the calling Lua frame's call instruction (5.5 locals.lua
10015 // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
10016 if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
10017 && fr.tm == Some("gc")
10018 {
10019 let name = tm_debug_name(self.version, "gc");
10020 return Some(("metamethod", name));
10021 }
10022 let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
10023 let caller = self.frames[caller_fi].lua()?;
10024 let p = &caller.closure.proto;
10025 let call_pc = (caller.pc as usize).checked_sub(1)?;
10026 let instr = *p.code.get(call_pc)?;
10027 match instr.op() {
10028 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10029 _ => None,
10030 }
10031 }
10032
10033 /// Native value currently sitting on the synthetic C edge identified by
10034 /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
10035 /// above `fi` (each one corresponds to one native pushing the hook) and
10036 /// indexes into `running_natives` from the top, also skipping the caller
10037 /// of `getinfo` itself (the native that is currently asking).
10038 /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10039 /// expects the just-entered C function.
10040 pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10041 let idx = self.c_frame_native_idx(fi)?;
10042 Some(Value::Native(self.running_natives[idx]))
10043 }
10044
10045 /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10046 /// so `local_at` can index the native's argument window like PUC's
10047 /// `(C temporary)` path. Returns `None` when no matching native exists
10048 /// (e.g. the C edge corresponds to a non-native boundary).
10049 pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10050 let idx = self.c_frame_native_idx(fi)?;
10051 self.running_native_slots.get(idx).copied()
10052 }
10053
10054 fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10055 let n_above = self.frames[fi..]
10056 .iter()
10057 .filter_map(CallFrame::lua)
10058 .filter(|f| f.from_c)
10059 .count();
10060 if n_above == 0 {
10061 return None;
10062 }
10063 // running_natives.last() is the native currently executing (the one
10064 // that called getinfo). Pop it conceptually, then take the n_above-th
10065 // entry from the top of what remains.
10066 let nr = self.running_natives.len().checked_sub(1)?;
10067 nr.checked_sub(n_above)
10068 }
10069
10070 /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10071 /// native whose function pointer matches `target`, and return its qualified
10072 /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10073 /// `None` if no match is found. Used by `arg_error` when the running native
10074 /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10075 pub(crate) fn pushglobalfuncname(
10076 &mut self,
10077 target: crate::runtime::value::NativeFn,
10078 ) -> Option<String> {
10079 let pkg_k = Value::Str(self.heap.intern(b"package"));
10080 let pkg = match self.globals().get(pkg_k) {
10081 Value::Table(t) => t,
10082 _ => return None,
10083 };
10084 let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10085 let loaded = match pkg.get(loaded_k) {
10086 Value::Table(t) => t,
10087 _ => return None,
10088 };
10089 let matches = |v: Value| -> bool {
10090 matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10091 };
10092 let mut k = Value::Nil;
10093 while let Ok(Some((nk, nv))) = loaded.next(k) {
10094 k = nk;
10095 let Value::Str(outer) = nk else { continue };
10096 let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10097 if matches(nv) {
10098 return Some(if outer == "_G" { String::new() } else { outer });
10099 }
10100 if let Value::Table(inner_t) = nv {
10101 let mut k2 = Value::Nil;
10102 while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10103 k2 = nk2;
10104 if matches(nv2)
10105 && let Value::Str(inner) = nk2
10106 {
10107 let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10108 return Some(if outer == "_G" {
10109 inner
10110 } else {
10111 format!("{outer}.{inner}")
10112 });
10113 }
10114 }
10115 }
10116 }
10117 None
10118 }
10119
10120 /// Name and namewhat of the native currently running on behalf of the top
10121 /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10122 /// `luaL_argerror` rewrite a method call's self-argument error.
10123 pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10124 let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10125 let p = &caller.closure.proto;
10126 let call_pc = (caller.pc as usize).checked_sub(1)?;
10127 let instr = *p.code.get(call_pc)?;
10128 match instr.op() {
10129 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10130 _ => None,
10131 }
10132 }
10133
10134 pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10135 let f = self.frames[fi].lua().expect("Lua frame");
10136 let proto = f.closure.proto;
10137 let pc = (f.pc as usize)
10138 .saturating_sub(1)
10139 .min(proto.lines.len().saturating_sub(1));
10140 let line = proto.lines.get(pc).copied().unwrap_or(0);
10141 // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10142 // (independent of any later write to a materialized vararg table's `n`).
10143 // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10144 // any nonzero `tailcalls` count flips it true.
10145 (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10146 }
10147
10148 /// Read an upvalue cell of a closure (debug.getupvalue).
10149 pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10150 match cl.upvals()[idx].state() {
10151 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10152 UpvalState::Closed(v) => v,
10153 }
10154 }
10155
10156 /// Write an upvalue cell of a closure (debug.setupvalue).
10157 pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10158 let uv = cl.upvals()[idx];
10159 match uv.state() {
10160 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10161 UpvalState::Closed(_) => {
10162 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10163 unsafe { uv.as_mut() }.set_closed(v);
10164 self.heap
10165 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10166 }
10167 }
10168 }
10169
10170 /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10171 /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10172 /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10173 /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10174 /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10175 /// else `"function <src:line_defined>"` for an anonymous Lua function.
10176 /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10177 /// Walks the coroutine's saved frames and prepends a synthetic C-level
10178 /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10179 /// (its `resume_at` marker is set). `level` skips entries from the top
10180 /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10181 /// frame; etc.). db.lua :764-:768 sample several levels.
10182 pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10183 use crate::runtime::CoroStatus;
10184 const LEVELS1: usize = 10;
10185 const LEVELS2: usize = 11;
10186 #[derive(Clone, Copy)]
10187 enum VFrame<'a> {
10188 Lua(&'a crate::runtime::function::Frame),
10189 CPcall,
10190 CXpcall,
10191 CYield,
10192 /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10193 /// call collapsed into the next Lua frame down the chain.
10194 Tail,
10195 }
10196 let v51 = self.version <= LuaVersion::Lua51;
10197 let mut visible: Vec<VFrame<'_>> = Vec::new();
10198 // PUC's level 0 entry on a suspended coroutine is the C call where it
10199 // paused — `coroutine.yield` for a yielded thread.
10200 if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10201 visible.push(VFrame::CYield);
10202 }
10203 for cf in co.frames.iter().rev() {
10204 match cf {
10205 CallFrame::Lua(f) => {
10206 visible.push(VFrame::Lua(f));
10207 if v51 {
10208 for _ in 0..f.tailcalls {
10209 visible.push(VFrame::Tail);
10210 }
10211 }
10212 }
10213 CallFrame::Cont(nc) => match nc.kind {
10214 ContKind::Pcall => visible.push(VFrame::CPcall),
10215 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10216 _ => {}
10217 },
10218 }
10219 }
10220 if level < 0 {
10221 level = 0;
10222 }
10223 if (level as usize) >= visible.len() {
10224 return Vec::new();
10225 }
10226 let visible = &visible[level as usize..];
10227 let total = visible.len();
10228 let mut out = Vec::new();
10229 // To name a Lua frame, PUC consults the caller's OP_CALL via
10230 // getobjname: find the index `fi` of the current frame in co.frames,
10231 // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10232 let coro_frame_name = |frames: &[CallFrame],
10233 target: &crate::runtime::function::Frame|
10234 -> Option<(&'static str, String)> {
10235 let fi = frames
10236 .iter()
10237 .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10238 if fi == 0 || target.from_c {
10239 return None;
10240 }
10241 let caller = frames[fi - 1].lua()?;
10242 let p = &caller.closure.proto;
10243 let call_pc = (caller.pc as usize).checked_sub(1)?;
10244 let instr = *p.code.get(call_pc)?;
10245 match instr.op() {
10246 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10247 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10248 _ => None,
10249 }
10250 };
10251 let frames = &co.frames;
10252 let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10253 VFrame::Lua(f) => {
10254 let proto = f.closure.proto;
10255 let src = chunk_display_name(proto.source.as_ptr());
10256 let pc = (f.pc as usize)
10257 .saturating_sub(1)
10258 .min(proto.lines.len().saturating_sub(1));
10259 let line = proto.lines.get(pc).copied().unwrap_or(0);
10260 out.extend_from_slice(b"\n\t");
10261 out.extend_from_slice(src);
10262 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10263 if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10264 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10265 } else if proto.line_defined == 0 {
10266 out.extend_from_slice(b"main chunk");
10267 } else {
10268 out.extend_from_slice(
10269 format!(
10270 "function <{}:{}>",
10271 String::from_utf8_lossy(src),
10272 proto.line_defined
10273 )
10274 .as_bytes(),
10275 );
10276 }
10277 }
10278 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10279 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10280 VFrame::CYield => {
10281 // PUC `pushglobalfuncname` reports `yield` as
10282 // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10283 // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10284 // `'yield'` (5.5 :841).
10285 let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10286 if qualified {
10287 out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10288 } else {
10289 out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10290 }
10291 }
10292 VFrame::Tail => {
10293 // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10294 // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10295 // :403 asserts these appear once per collapsed tail call.
10296 out.extend_from_slice(b"\n\t(...tail calls...)");
10297 }
10298 };
10299 if total <= LEVELS1 + LEVELS2 {
10300 for &v in visible {
10301 emit(&mut out, v);
10302 }
10303 } else {
10304 for &v in &visible[..LEVELS1] {
10305 emit(&mut out, v);
10306 }
10307 let skip = total - LEVELS1 - LEVELS2;
10308 out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10309 for &v in &visible[total - LEVELS2..] {
10310 emit(&mut out, v);
10311 }
10312 }
10313 out
10314 }
10315
10316 pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10317 // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10318 // (11) bottom frames; if there are more, the middle is collapsed into
10319 // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10320 // overflow traceback would balloon to tens of megabytes (errors.lua's
10321 // stack-overflow test ran string.gmatch over the resulting buffer).
10322 const LEVELS1: usize = 10;
10323 const LEVELS2: usize = 11;
10324 // Collect visible frames in top-down order (deepest first). Both Lua
10325 // activations and pcall/xpcall continuations (which stand in for a
10326 // C-level pcall on the stack) are visible; PUC's traceback enumerates
10327 // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10328 #[derive(Clone, Copy)]
10329 enum VFrame {
10330 Lua(usize),
10331 CPcall,
10332 CXpcall,
10333 }
10334 let mut visible: Vec<VFrame> = Vec::new();
10335 for (fi, cf) in self.frames.iter().enumerate().rev() {
10336 match cf {
10337 CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10338 CallFrame::Cont(nc) => match nc.kind {
10339 ContKind::Pcall => visible.push(VFrame::CPcall),
10340 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10341 _ => {}
10342 },
10343 }
10344 }
10345 // PUC `luaL_traceback` starts enumerating at the given `level` (in
10346 // terms of L1's CallInfo chain). For the running-thread case the C
10347 // frame for debug.traceback itself is level 0 and luna's `visible`
10348 // doesn't include it — so level=1 (PUC default) means "emit from the
10349 // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10350 // the top. level<=0 emits nothing extra here (d_traceback handles the
10351 // "[C]: in function 'traceback'" prefix for level==0 separately).
10352 let skip = (level - 1).max(0) as usize;
10353 if skip >= visible.len() {
10354 return Vec::new();
10355 }
10356 let visible = &visible[skip..];
10357 let total = visible.len();
10358 let mut out = Vec::new();
10359 let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10360 VFrame::Lua(fi) => {
10361 let f = this.frames[fi].lua().expect("Lua frame");
10362 let proto = f.closure.proto;
10363 let src = chunk_display_name(proto.source.as_ptr());
10364 let pc = (f.pc as usize)
10365 .saturating_sub(1)
10366 .min(proto.lines.len().saturating_sub(1));
10367 let line = proto.lines.get(pc).copied().unwrap_or(0);
10368 out.extend_from_slice(b"\n\t");
10369 out.extend_from_slice(src);
10370 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10371 if let Some((namewhat, name)) = this.frame_name(fi) {
10372 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10373 } else if proto.line_defined == 0 {
10374 out.extend_from_slice(b"main chunk");
10375 } else {
10376 out.extend_from_slice(
10377 format!(
10378 "function <{}:{}>",
10379 String::from_utf8_lossy(src),
10380 proto.line_defined
10381 )
10382 .as_bytes(),
10383 );
10384 }
10385 }
10386 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10387 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10388 };
10389 if total <= LEVELS1 + LEVELS2 {
10390 for &v in visible {
10391 emit_frame(&mut out, v, self);
10392 }
10393 } else {
10394 for &v in &visible[..LEVELS1] {
10395 emit_frame(&mut out, v, self);
10396 }
10397 let dropped = total - LEVELS1 - LEVELS2;
10398 out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10399 for &v in &visible[total - LEVELS2..] {
10400 emit_frame(&mut out, v, self);
10401 }
10402 }
10403 out
10404 }
10405}
10406
10407// ────────────────────────────────────────────────────────────────────
10408// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10409//
10410// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10411// trace-meta section after `vm.load`, resolves each entry's
10412// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10413// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10414// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10415// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10416// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10417// mcode without further plumbing — same code path the runtime JIT
10418// uses.
10419//
10420// Why a separate impl block: keeps the AOT API surface (one fn) easy
10421// to locate when grep'ing for `install_aot_trace`, without dragging
10422// the 8500-line `impl Vm` block above.
10423// ────────────────────────────────────────────────────────────────────
10424
10425impl Vm {
10426 /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10427 /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10428 /// fires it at the trace's `head_pc`. This is the runtime install
10429 /// API the deploy-side `luna-runtime-helpers` resolver calls once
10430 /// per AOT-emitted trace meta entry, after looking up `proto` by
10431 /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10432 ///
10433 /// # What this does
10434 ///
10435 /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10436 /// The trace's `entry` fn ptr must already point at runnable
10437 /// machine code (the AOT linker resolved the symbol at link time;
10438 /// the deploy resolver passes the address verbatim).
10439 ///
10440 /// # What this does NOT do
10441 ///
10442 /// - **No deduplication.** Calling twice with the same `head_pc`
10443 /// pushes two entries; the dispatcher's `find` will pick the
10444 /// first match. The deploy resolver is responsible for not
10445 /// double-installing.
10446 /// - **No invalidation of the runtime JIT cache.** If the runtime
10447 /// JIT later records + compiles a trace for the same
10448 /// `(proto, head_pc)`, both coexist on `proto.traces` and the
10449 /// dispatcher's `find` picks whichever appears first. AOT
10450 /// traces install before any runtime recording is possible
10451 /// (resolver runs before `vm.load` returns its first closure),
10452 /// so AOT traces win the race for the same site.
10453 /// - **No coverage gating.** AOT traces are trusted by
10454 /// construction — they were validated at compile time. Setting
10455 /// `dispatchable: false` on the input would silently disable
10456 /// dispatch; the caller controls that flag.
10457 ///
10458 /// # Safety / soundness
10459 ///
10460 /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10461 /// machine code). Soundness contract:
10462 ///
10463 /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10464 /// In the AOT-binary deploy shape this is trivially satisfied —
10465 /// the fn lives in the binary's `.text`.
10466 /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10467 /// what the trace's IR actually compiled against; the dispatcher
10468 /// uses them to marshal `reg_state` in and out without further
10469 /// validation. A mismatch corrupts vm.stack.
10470 ///
10471 /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10472 /// invariants hold; this fn is a plain push — no validation that
10473 /// would slow the dispatcher's hot path either.
10474 pub fn install_aot_trace(
10475 &mut self,
10476 proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10477 trace: crate::jit::trace::CompiledTrace,
10478 ) {
10479 let _ = self; // resolver passes &mut Vm for symmetry with future
10480 // pending-install + hash-walk variants; nothing on `self` to
10481 // mutate today because the install target lives on the Proto.
10482 proto.traces.borrow_mut().push(TArc::new(trace));
10483 }
10484
10485 /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10486 /// reachable from `root` and return `(proto, stable_hash)` pairs
10487 /// for every Proto found. Used by the deploy-side resolver to
10488 /// match AOT-emitted `proto_hash` keys against the freshly
10489 /// `undump`'d chunk's protos.
10490 ///
10491 /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10492 /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10493 /// multiple nested closures (rare; the cache field would catch
10494 /// the closure-side dedup, not the Proto side) is reported once.
10495 ///
10496 /// # Why on `&Vm` and not a free fn
10497 ///
10498 /// Keeps the AOT install API discoverable on the Vm surface —
10499 /// `vm.collect_proto_hashes(root)` reads naturally next to
10500 /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10501 /// any Vm field, so `&self` (read-only) is enough.
10502 pub fn collect_proto_hashes(
10503 &self,
10504 root: crate::runtime::Gc<crate::runtime::function::Proto>,
10505 ) -> Vec<(
10506 crate::runtime::Gc<crate::runtime::function::Proto>,
10507 [u8; 16],
10508 )> {
10509 let _ = self;
10510 let mut out = Vec::new();
10511 let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10512 std::collections::HashSet::new();
10513 let mut queue: std::collections::VecDeque<
10514 crate::runtime::Gc<crate::runtime::function::Proto>,
10515 > = std::collections::VecDeque::new();
10516 queue.push_back(root);
10517 while let Some(p) = queue.pop_front() {
10518 let key = p.as_ptr() as *const _;
10519 if !seen.insert(key) {
10520 continue;
10521 }
10522 out.push((p, p.stable_hash()));
10523 for &child in p.protos.iter() {
10524 queue.push_back(child);
10525 }
10526 }
10527 out
10528 }
10529}