luna_core/vm/exec.rs
1//! The interpreter. Dispatch is a plain match over opcodes (the P10 ceiling
2//! pass owns dispatch optimization). Lua→Lua calls share one loop and never
3//! recurse the Rust stack; only native↔Lua boundaries do (e.g. pcall).
4//!
5//! Varargs follow 5.5 semantics: a vararg call materializes a vararg table
6//! (fields 1..n plus "n") kept in the function's own stack slot; `...`
7//! expands from it and `...name` binds it. 5.1 LUAI_COMPAT_VARARG also
8//! materializes a local `arg` table (see `proto.has_compat_vararg_arg`).
9
10use crate::compiler::compile_chunk;
11use crate::frontend::{SyntaxError, parse};
12use crate::jit::send_compat::TArc;
13use crate::numeric::{self, Num};
14use crate::runtime::heap::GcHeader;
15use crate::runtime::{
16 AfterClose, CallFrame, CloseCont, ContKind, Coro, CoroStatus, Frame, Gc, Heap, LuaClosure,
17 MetaAction, MetaCont, NativeClosure, NativeCont, Table, TableError, UpvalState, Upvalue, Value,
18};
19use crate::version::LuaVersion;
20use crate::vm::builtins::{nat_pairs, nat_pcall, nat_xpcall};
21use crate::vm::error::LuaError;
22use crate::vm::isa::{Inst, Op};
23
24/// A Lua virtual machine: one OS thread's worth of Lua state.
25///
26/// # Threading model
27///
28/// `Vm` is **`!Send + !Sync`**. The GC uses `Gc<T> = NonNull<T>` over
29/// an intrusive mark-sweep heap (not `Rc<RefCell<T>>`), and the trace
30/// JIT side-table uses `Rc<CompiledTrace>` — both single-threaded by
31/// design. Embedders that want concurrency spawn one `Vm` per OS
32/// thread (or per single-thread Tokio worker) and exchange data via
33/// channels. See [`docs/threading.md`](../../docs/threading.md) for
34/// canonical embedding patterns including Tokio `current_thread`,
35/// `LocalSet` on multi-thread, and `Vm`-per-OS-thread + channels.
36///
37/// The constraint is enforced at compile time:
38///
39/// ```compile_fail
40/// fn must_be_send<T: Send>() {}
41/// must_be_send::<luna_core::Vm>(); // error[E0277]: `Vm` cannot be sent between threads safely
42/// ```
43///
44/// A future `feature = "send"` (post-v1.1 sprint) will gate an
45/// opt-in `Arc<RwLock<T>>` mode with a hard ≤8% perf regression
46/// budget. See `.dev/rfcs/v1.1-rfc-vm-send-sync.md` for the design.
47pub struct Vm {
48 /// The GC heap owned by this VM. Embedders normally interact via the
49 /// `Vm` methods (`load` / `call_value` / `set_global` / …) rather than
50 /// the heap directly.
51 pub heap: Heap,
52 stack: Vec<Value>,
53 frames: Vec<CallFrame>,
54 /// P17-D Week 1 shadow — frames_top mirrors `self.frames.len()`.
55 /// Synced on every push/pop in `frames_push_sync`/`frames_pop_sync`
56 /// helpers (debug-asserted on use). NOT consumed by readers yet;
57 /// week 1 is pure scaffold. Week 2-N migrations replace readers
58 /// one slice at a time, then remove `frames: Vec<CallFrame>` in
59 /// favour of a flat `[CallFrame; MAX_FRAMES]` indexed by frames_top.
60 frames_top: u32,
61 /// open upvalues, sorted ascending by stack slot
62 open_upvals: Vec<(u32, Gc<Upvalue>)>,
63 /// to-be-closed slots, ascending
64 tbc: Vec<u32>,
65 /// logical stack top for multi-result sequences
66 pub(crate) top: u32,
67 globals: Gc<Table>,
68 /// shared metatable for all strings (populated by the string lib, P04)
69 /// per-basic-type metatables (PUC luaT): indexed by `type_mt_slot`
70 /// (0 nil, 1 boolean, 2 number, 3 string, 4 function); tables carry their
71 /// own. Settable via debug.setmetatable.
72 type_mt: [Option<Gc<Table>>; 5],
73 /// pre-interned metamethod event names, indexed by `Mm`
74 mm_names: Vec<Gc<crate::runtime::LuaStr>>,
75 /// native↔Lua nesting depth (PUC C-stack guard analogue)
76 c_depth: u32,
77 /// number of live pcall/xpcall continuation frames on the running thread
78 /// (PUC counts these against nCcalls). Bounds protected-call recursion the
79 /// way `c_depth` bounds call_value recursion. Per-thread: saved/restored
80 /// with the coroutine context, since continuations survive a yield.
81 pcall_depth: u32,
82 /// number of non-yieldable C calls in flight on the running thread (PUC's
83 /// `L->nny`). A library callback that runs via synchronous Rust recursion
84 /// (sort comparator, gsub replacement) cannot be continued across a yield,
85 /// so it bumps this for its duration; `coroutine.yield` inside hits the
86 /// C-call boundary and errors. Always 0 at a suspend point (a yield can
87 /// never cross such a call), so it needs no per-thread save/restore.
88 nny: u32,
89 /// Nonzero while an xpcall message handler is on the Rust stack. Used so a
90 /// stack-overflow that surfaces *inside* the handler is reported as PUC's
91 /// "error in error handling" (LUA_ERRERR + `luaD_seterrorobj`), not the
92 /// plain "stack overflow" — errors.lua :606's `checkerr("error handling",
93 /// loop)` then matches. PUC tracks this via the soft-cap window
94 /// `nCcalls >= MAXCCALLS/10*11`; luna's c_depth is strict, so we mark the
95 /// scope explicitly.
96 msgh_depth: u32,
97 /// set by a coroutine closing itself (`coroutine.close()` on the running
98 /// thread): the to-be-closed handlers have already run; the thread must now
99 /// terminate. `Some(None)` is a clean close, `Some(Some(e))` a handler
100 /// raised `e`. Checked by `exec_with`/`resume_coro` to propagate (not
101 /// unwind, so a protecting pcall cannot catch it) the termination.
102 terminating: Option<Option<Value>>,
103 /// xoshiro256** state (math.random)
104 rng: [u64; 4],
105 /// VM creation time (os.clock)
106 started: std::time::Instant,
107 version: LuaVersion,
108 /// error object being threaded through a chain of __close handlers; a GC
109 /// root for the duration (a handler may trigger collection)
110 closing_err: Option<Value>,
111 /// the coroutine whose context is currently live in the fields above;
112 /// `None` while the main thread runs (P05)
113 current: Option<Gc<crate::runtime::Coro>>,
114 /// the main thread's saved execution context while a coroutine runs
115 main_ctx: Option<SavedCtx>,
116 /// set by `coroutine.yield` to suspend the running coroutine: the yielded
117 /// values plus the slot/result-count needed to finish the yielding call on
118 /// the next resume. Checked by `exec` to propagate (not unwind) on yield.
119 yielding: Option<(Vec<Value>, u32, i32)>,
120 /// results expected by the in-flight native call (so `yield` knows how many
121 /// values its call site wants when it suspends)
122 native_nresults: i32,
123 /// identity object for the main thread, returned by `coroutine.running`
124 /// (the main thread's context lives in the VM fields / `main_ctx`, not here)
125 main_coro: Option<Gc<Coro>>,
126 /// `collectgarbage` mode name ("incremental"/"generational"). The collector
127 /// itself is still stop-the-world mark-sweep; this tracks the mode so mode
128 /// switches report the previous one, as PUC does.
129 gc_mode: &'static str,
130 /// the live-register boundary of the running thread for GC rooting (PUC's
131 /// `L->top`): set precisely at each GC safe point so freed temporary
132 /// registers above it are not rooted. Without this the collector roots the
133 /// whole stack window, pinning weak-table values stranded in stale temps
134 /// (e.g. closure.lua's `while x[1]` GC-detection loop).
135 pub(crate) gc_top: u32,
136 /// `collectgarbage("param", name [,value])` pacing parameters. The collector
137 /// is still stop-the-world, so these are stored/returned for API fidelity
138 /// (PUC round-trips them via `setparam`/`getparam`). Defaults mirror PUC's
139 /// `LUAI_GC*` knobs: pause=200, stepmul=100, stepsize=13.
140 gc_pause: i64,
141 gc_stepmul: i64,
142 gc_stepsize: i64,
143 /// true while `__gc` finalizers are being run, so a finalizer that calls
144 /// `collectgarbage` gets a no-op (PUC's non-reentrancy: lua_gc returns -1 →
145 /// `collectgarbage` yields fail).
146 gc_finalizing: bool,
147 /// C ABI scratch (`capi` module): the host-visible value stack that C
148 /// callers operate on via `lua_pushinteger` / `lua_tostring` / etc.
149 /// Kept here (instead of in a separate `LuaState` wrapper) so the
150 /// trampoline that bridges to a `LuaCFunction` can safely cast the
151 /// Vm pointer it already holds to the public `*mut LuaState` type
152 /// without any aliasing of `&mut Vm` against `&mut LuaState.vm`.
153 pub capi_stack: Vec<crate::runtime::Value>,
154 /// Pinned CString backing the pointer last returned by `lua_tostring`;
155 /// valid until the next `lua_tostring` on the same Vm.
156 pub capi_cstr_pin: Option<std::ffi::CString>,
157 /// PUC 5.4+ warning system. Lua manual §6.1 `warn`: emitted messages
158 /// concatenate across continuation calls until a non-`tocont` call
159 /// flushes; the default warnf recognises `@on`/`@off` control messages
160 /// and starts disabled. luna's `emit_warn` mirrors the default warnf
161 /// behaviour and 5.4+ `__gc` errors are routed through it (5.1–5.3
162 /// keep the older raise semantics).
163 pub(crate) warn_state: WarnState,
164 pub(crate) warn_buf: Vec<u8>,
165 /// P09 embedding cooperative budget: a per-Vm tick counter that the run
166 /// loop decrements once per dispatch turn. When it hits zero the loop
167 /// raises a catchable "instruction budget exceeded" error so the embedder
168 /// can yield control back to its caller (short-script eval, game
169 /// frame budgets). `None` = unbounded; reset on each call via
170 /// `set_instr_budget`.
171 pub(crate) instr_budget: Option<i64>,
172 // v1.1 A2 — JIT-specific fields moved to `JitState` sidecar; see
173 // `self.jit` below + `crate::vm::jit_state` for field docs.
174 // (Was: jit_enabled here.)
175 // v1.1 A2 — was: trace_jit_enabled (moved to JitState).
176 // v1.1 A2 — was: p16_self_link_enabled (moved to JitState).
177 // v1.1 A2 — was: active_trace, recording_frame_base, trace_max_depth_seen,
178 // trace_closed_count, trace_aborted_count, trace_inline_abort_count,
179 // trace_dispatch_off_reasons, trace_compile_failed_reasons, trace_closed_lens,
180 // trace_compiled_count, trace_compile_failed_count, trace_dispatched_count,
181 // trace_deopt_count, trace_side_trace_{started,compiled,shape_mismatch}_count,
182 // trace_{sinkable,accum_bufferable}_seen_count, trace_{sunk_alloc,
183 // materialize_emit,closure_emit}_count — all moved to JitState.
184 /// Bytecode-loading gate. Default `true`. Sandbox embedders should
185 /// call `set_bytecode_loading(false)` so `load`/`loadstring` reject
186 /// precompiled chunks (which bypass the parser's depth / opcode
187 /// limits). When `false`, the loader rejects any source whose first
188 /// byte is the bytecode signature `\27` ("`\27Lua`").
189 pub(crate) bytecode_loading: bool,
190 /// PUC bytecode-loading gate. Default `false` — PUC `.luac` files are
191 /// a strictly larger trust surface than luna's own dump format
192 /// (third-party toolchain bugs, malformed chunks, unknown opcode
193 /// shapes). When `true`, the loader routes `\x1bLua\x{51..55}` inputs
194 /// through the per-dialect PUC translators in `crate::vm::dump::puc`
195 /// (Phase LB Wave 2 — currently returns "not yet implemented" stubs).
196 /// Embedder toggles via `set_puc_bytecode_loading`.
197 pub(crate) puc_bytecode_loading: bool,
198 /// Byte budget for source fed into `load` / `loadstring` / `Vm::load`.
199 /// Default [`Vm::DEFAULT_LOADER_INPUT_BUDGET`] (256 MiB). When the
200 /// accumulated reader output (`load(f, ...)`) or a one-shot `&[u8]`
201 /// source exceeds this, the loader returns the PUC-shaped
202 /// `not enough memory` error before the host allocator is asked to
203 /// hold the next chunk. Defends against `heavy.lua::loadrep`-style
204 /// 7 GB+ feeder loops that would otherwise SIGSEGV when `Vec::push`
205 /// crosses `isize::MAX` or the host runs out of RAM. Tracked at
206 /// `.dev/known-bugs/fixed/heavy-lua-sigsegv-under-128mb-loadrep.md`.
207 /// Embedders that genuinely need to load > 256 MiB sources widen the
208 /// cap via [`Vm::set_loader_input_budget`].
209 pub(crate) loader_input_budget: usize,
210 /// In-process log of fully-emitted warnings (each entry = one flushed
211 /// message, sans the "Lua warning: " prefix and trailing newline). Lets
212 /// tests assert what was warned without scraping stderr.
213 pub(crate) warn_log: Vec<Vec<u8>>,
214 /// PUC's `LUA_REGISTRYINDEX` table — a single Lua table the debug library
215 /// exposes via `debug.getregistry`. Used to hold `_HOOKKEY` (the weak-key
216 /// table PUC's `db_sethook` keys per-thread hooks under). luna stores hook
217 /// state directly in `Vm.hook`/`Coro.hook`, so the entry is largely a
218 /// shape stub for db.lua :328; if other registry-keyed APIs land later
219 /// they can share this table.
220 pub(crate) registry: Option<Gc<Table>>,
221 /// the shared `FILE*` metatable for io file handles (PUC's LUA_FILEHANDLE
222 /// registry entry); attached to every file userdata the io library makes
223 pub(crate) file_mt: Option<Gc<Table>>,
224 /// io library default input/output streams (PUC registry IO_INPUT/IO_OUTPUT)
225 pub(crate) io_input: Option<Gc<crate::runtime::Userdata>>,
226 pub(crate) io_output: Option<Gc<crate::runtime::Userdata>>,
227 /// the running thread's debug hook state (`debug.sethook`); per-thread,
228 /// swapped with the execution context on a coroutine resume/yield
229 pub(crate) hook: HookState,
230 /// true while the hook itself runs, so its own execution fires no events
231 /// (PUC clears the mask for the duration)
232 pub(crate) in_hook: bool,
233 /// arms the next Lua frame's `tailcalls` count (PUC `ci->u.l.tailcalls`),
234 /// consumed by `push_frame`. `OP_TailCall` sets it to the caller's
235 /// own tailcalls + 1 before begin_call so deeply tail-recursive chains
236 /// accumulate the count instead of capping at 1.
237 pub(crate) pending_tailcalls: u32,
238 /// Name of the C native that just propagated an error (captured before
239 /// the native is popped from `running_natives`). Lets a dying coroutine
240 /// preserve `[C]: in function '<name>'` at the top of its traceback
241 /// snapshot — PUC walks `luaG_funcnamefrompc` over a still-live ci, but
242 /// luna's native frames are off-stack so we stash the name explicitly.
243 pub(crate) errored_native: Option<String>,
244 /// PUC `CallInfo.u2.transferinfo`: index of the first transferred value
245 /// (relative to the activation's func slot) and the number transferred.
246 /// Set just before firing a call/return hook, read by `getinfo("r")`.
247 pub(crate) hook_ftransfer: u16,
248 pub(crate) hook_ntransfer: u16,
249 /// metamethod event tag (e.g. "close") to attach to the next Lua frame
250 /// pushed by `push_frame`; `close_slots` sets this before calling a
251 /// `__close` handler so `debug.traceback` names it "metamethod 'close'"
252 /// (PUC `CallInfo.u.l.tm`). Single-shot: `push_frame` consumes it.
253 pending_tm: Option<&'static str>,
254 /// `true` when the next `push_frame` is the user hook function itself,
255 /// so `debug.getinfo(1).namewhat` resolves to `"hook"` (PUC
256 /// `CIST_HOOKED`). `run_hook` arms it before dispatching the hook.
257 pending_is_hook: bool,
258 /// traceback snapshot taken at the error point (the first `unwind` entry
259 /// for the in-flight error), so that an `xpcall` msgh — which runs *after*
260 /// the failed frames are popped — can still see the error point's stack
261 /// via `debug.traceback`. PUC `luaG_errormsg` instead runs msgh with the
262 /// stack intact; we approximate by snapshotting the string and letting
263 /// `d_traceback` consume it. Cleared on Cont catch and at host-level
264 /// `call_value` entry (`public_call_depth == 0`).
265 pub(crate) error_traceback: Option<Vec<u8>>,
266 /// nesting depth of public `call_value` entries (host vs. internal). The
267 /// outermost entry (depth 0) resets per-error state (`error_traceback`);
268 /// internal calls (e.g. xpcall msgh, sort callback) preserve it.
269 public_call_depth: u32,
270 /// stack of native (`Value::Native`) closures currently running on the
271 /// Rust call stack. `begin_call` pushes the closure before invoking
272 /// `nc.f` and pops on return. Used by `arg_error` to detect a *nested*
273 /// native call (PUC `ar.name == NULL` at level 0 because the level-0
274 /// caller is C, not Lua) and qualify the running function's name via
275 /// `pushglobalfuncname` (e.g. `'sort'` → `'table.sort'`).
276 pub(crate) running_natives: Vec<Gc<NativeClosure>>,
277 /// Parallel to `running_natives`: each entry's `(func_slot, nargs)` is
278 /// the native's argument-window head and width, so `debug.getlocal`
279 /// can index it like PUC's `luaG_findlocal` `(C temporary)` path.
280 pub(crate) running_native_slots: Vec<(u32, u32)>,
281 // v1.1 A2 — was: jit_pending_err, jit_reg_state_buf, jit_str_buf_pool,
282 // jit_str_buf_pool_cap, jit_entry_tags_buf, chunk_compiler,
283 // trace_compiler — all moved to JitState. See `jit` below.
284 /// v1.1 A2 — JIT sidecar. Always present (never `Option`); inert
285 /// when `chunk_compiler` / `trace_compiler` are
286 /// [`crate::jit::NullJitBackend`]. See [`crate::vm::jit_state`].
287 ///
288 /// `#[doc(hidden)] pub` so the `luna` crate's
289 /// `extern "C"` JIT helpers can write `vm.jit.pending_err`
290 /// directly (same pattern as the pre-A2 `pub Vm::jit_pending_err`
291 /// field). Not part of the embedder-facing API surface.
292 #[doc(hidden)]
293 pub jit: crate::vm::jit_state::JitState,
294
295 /// B12 host roots — append-only `Vec<Value>` traced as an extra
296 /// GC root set. `Lua` facade handles (`LuaFunction`, `LuaTable`,
297 /// `LuaRoot`) hold indices into this vector so the underlying
298 /// `Gc<T>` stays alive across `eval` calls / yield boundaries.
299 ///
300 /// v1.1 strategy: append-only with explicit `unpin_all` / new Vm.
301 /// Slot recycling lands in Phase 3 alongside B8 LuaUserdata, when
302 /// the trade-offs between `Drop` plumbing and append-only memory
303 /// growth have a richer ergonomics envelope to live in.
304 pub(crate) host_roots: Vec<crate::vm::host_roots::HostRootSlot>,
305 /// v1.3 Phase SR — recycled-slot index pool. `pin_host` pops the
306 /// back if non-empty, else extends `host_roots`. Generation
307 /// overflow at `u32::MAX` retires the slot (NOT pushed here).
308 pub(crate) host_roots_free: Vec<u32>,
309
310 /// v2.1 — GC-rooted scratch stack for `table.sort` (and any other
311 /// builtin that needs a Rust-side `Vec<Value>` to outlive a user
312 /// callback). Each entry is one in-flight working buffer; `gc_roots`
313 /// extends with every contained `Value` so a `collectgarbage()`
314 /// inside the comparator cannot free strings/tables snapshotted
315 /// here. Nested sorts push a new buffer on entry, pop on exit
316 /// (sort.lua's `load(..)(); collectgarbage()` compare callback
317 /// regression).
318 pub(crate) sort_scratch: Vec<Vec<Value>>,
319
320 /// v1.3 Phase ML — MacroLua compile-time macro registry.
321 /// Pre-populated with built-in macros (`@quote` / `@unquote` /
322 /// `@if` / `@gensym`) at construction time when `version ==
323 /// LuaVersion::MacroLua`; embedders register custom macros via
324 /// [`Vm::define_macro`]. The expander runs once per `load()` call
325 /// between lexing and parsing (only when `is_macro_lua()`).
326 pub(crate) macro_registry: crate::frontend::macro_expander::MacroRegistry,
327
328 /// v1.2 Track B — per-Vm cache of `Gc<Table>` metatables keyed
329 /// by `TypeId::of::<T>()` for embedder types implementing
330 /// [`crate::vm::userdata_trait::LuaUserdata`]. Populated lazily by
331 /// [`Vm::register_userdata`]; metatables are pinned via
332 /// [`Vm::pin_host`] at registration time so the entry's
333 /// `Gc<Table>` stays live for the rest of the Vm's lifetime.
334 pub(crate) userdata_metatables:
335 std::collections::HashMap<std::any::TypeId, Gc<crate::runtime::table::Table>>,
336
337 /// B6 — classification of the most recent error raised on this Vm.
338 /// Embedders read via [`Vm::error_kind`]; the dispatcher sets it
339 /// at well-known sites (syntax errors, instr-budget trips, native
340 /// callback errors, type errors).
341 pub(crate) last_error_kind: crate::vm::error::LuaErrorKind,
342
343 /// B6 — `(source_name, line)` of the most recent error. Set by the
344 /// dispatcher / lexer / parser; cleared when a new call_value
345 /// enters cleanly.
346 pub(crate) last_error_source: Option<(String, u32)>,
347
348 /// v1.1 B10 Stage 1 — when `true`, `instr_budget` exhaustion in
349 /// the dispatcher hot loop yields cooperatively (sets
350 /// [`Vm::host_yield_pending`] + returns a sentinel `Err` walked up
351 /// to `EvalFuture::poll`) instead of returning a real
352 /// "instruction budget exceeded" error. Set by [`Vm::eval_async`]
353 /// for the duration of the future; restored to `false` on
354 /// `Poll::Ready`. The sync `Vm::eval` / `Vm::call_value` paths
355 /// leave it `false` so v1.0 behavior is preserved exactly.
356 pub(crate) async_mode: bool,
357
358 /// v1.1 B10 Stage 1 — host waker cloned by `EvalFuture::poll`
359 /// before driving a slice. The dispatcher itself does not call it
360 /// (the future's poll loop does `wake_by_ref` after observing
361 /// `BudgetExhausted`), but storing the waker keeps the door open
362 /// for Stage 2 async natives to wake the host directly from a
363 /// helper future.
364 pub(crate) async_waker: Option<std::task::Waker>,
365
366 /// v1.1 B10 Stage 1 — per-poll opcode quota loaded into
367 /// `instr_budget` at the start of each `EvalFuture::poll` slice.
368 /// Default 10_000 (RFC §D5). Tunable via
369 /// [`Vm::set_async_slice`].
370 pub(crate) async_slice_size: i64,
371
372 /// v1.1 B10 Stage 1 — set by the dispatcher when an async-mode
373 /// budget exhaustion fires; checked by `exec_with` (so the
374 /// sentinel propagates without `unwind` running, mirroring
375 /// `yielding.is_some()`) and by `call_value_impl` (so the call
376 /// frames survive for the next poll). Cleared by `drive_one`
377 /// after translating it to `DispatchOutcome::BudgetExhausted`.
378 pub(crate) host_yield_pending: bool,
379
380 /// v1.1 B10 Stage 2 — set by the dispatcher's native-call path
381 /// when an async-marked [`NativeClosure`] is invoked under
382 /// `async_mode`. The Vm pauses the dispatcher (same sentinel-Err
383 /// mechanism as `host_yield_pending` — see `exec_with` +
384 /// `call_value_impl`), stashes the in-flight future +
385 /// post-completion context here, and surfaces them to
386 /// `EvalFuture::poll` via `drive_one`. Cleared by `drive_one`
387 /// once the future is moved out into a
388 /// `DispatchOutcome::AsyncNativeAwaiting`.
389 pub(crate) pending_async_native_fut:
390 Option<std::pin::Pin<Box<dyn std::future::Future<Output = Result<u32, LuaError>>>>>,
391
392 /// v1.1 B10 Stage 2 — companion to `pending_async_native_fut`:
393 /// the `(func_slot, nargs, nresults, gc_top)` quad needed to
394 /// commit the future's eventual `Ok(nret)` back into the calling
395 /// frame's expected result slots. Recorded by the dispatcher;
396 /// consumed by [`Vm::commit_async_native_result`] after the
397 /// future resolves.
398 pub(crate) pending_async_native_ctx: Option<AsyncNativeCallCtx>,
399}
400
401/// v1.1 B10 Stage 2 — call-site context an in-flight async native
402/// needs preserved across the cooperative-yield boundary.
403///
404/// The dispatcher records this when it routes a `NativeClosure` with
405/// `is_async == true` through the cooperative path; `EvalFuture::poll`
406/// hands it back to [`Vm::commit_async_native_result`] once the
407/// awaited future resolves so `finish_results` (and the post-call GC
408/// checkpoint) can run as if the native had completed synchronously.
409#[derive(Clone, Copy)]
410pub(crate) struct AsyncNativeCallCtx {
411 pub func_slot: u32,
412 /// Recorded for parity with the sync native-call path's
413 /// `native_nresults`/`gc_top` bookkeeping; reserved for Stage 3+
414 /// hook firing + traceback shaping. Not yet read in Stage 2.
415 #[allow(dead_code)]
416 pub nargs: u32,
417 pub nresults: i32,
418 /// Recorded for Stage 3+ traceback + GC-root-window auditing.
419 /// Stage 2 reads `Vm.gc_top` directly post-resume, so this is
420 /// unread today; carried so an Stage 3 audit can confirm the
421 /// pre-suspend root window matches the post-resume one.
422 #[allow(dead_code)]
423 pub gc_top: u32,
424}
425
426/// Per-thread debug hook state (PUC `lua_State` hook/hookmask/basehookcount/
427/// hookcount). `func` is the Lua hook; the booleans are the PUC mask bits.
428#[derive(Clone, Copy, Default)]
429pub struct HookState {
430 /// the hook function (`None` when no hook is installed)
431 pub func: Option<Value>,
432 /// v1.1 B11 — Rust-side debug hook. Fires alongside the Lua hook
433 /// (Rust first); both can be installed simultaneously, but most
434 /// embedders pick one.
435 pub rust_func: Option<RustDebugHook>,
436 /// LUA_MASKCALL — fire on function entry
437 pub call: bool,
438 /// LUA_MASKRET — fire on function return
439 pub ret: bool,
440 /// LUA_MASKLINE — fire on source-line change
441 pub line: bool,
442 /// LUA_MASKCOUNT — fire every `count_base` instructions
443 pub count: bool,
444 /// instruction count between count events (PUC basehookcount)
445 pub count_base: i64,
446 /// instructions left until the next count event (PUC hookcount)
447 pub count_left: i64,
448}
449
450/// Rust-side debug hook callback (B11). Receives the `Vm` plus a
451/// classified event. The callback runs synchronously in the
452/// dispatcher; the hook flag (`in_hook`) is set for its duration so
453/// hook recursion is suppressed.
454pub type RustDebugHook = fn(&mut Vm, RustHookEvent);
455
456/// Classified debug event delivered to a [`RustDebugHook`].
457#[derive(Clone, Copy, Debug, PartialEq, Eq)]
458pub enum RustHookEvent {
459 /// Function entry (`hook_call` analogue).
460 Call,
461 /// Function return (`hook_return` analogue).
462 Return,
463 /// Tail call entry (PUC 5.2+ separates this from a plain Call).
464 TailCall,
465 /// Source-line change (the `u32` is the 1-based line number).
466 Line(u32),
467 /// Instruction count event (fires every `count_base` instructions).
468 Count,
469}
470
471/// Mask flags for [`Vm::set_rust_debug_hook`]. OR these to subscribe
472/// to multiple event categories with a single hook installation.
473pub const HOOK_MASK_CALL: u32 = 1;
474/// Subscribe to function-return events.
475pub const HOOK_MASK_RETURN: u32 = 2;
476/// Subscribe to line-change events.
477pub const HOOK_MASK_LINE: u32 = 4;
478/// Subscribe to instruction-count events.
479pub const HOOK_MASK_COUNT: u32 = 8;
480
481/// A thread's swapped-out execution context (PUC per-thread stack state).
482struct SavedCtx {
483 stack: Vec<Value>,
484 frames: Vec<CallFrame>,
485 open_upvals: Vec<(u32, Gc<Upvalue>)>,
486 tbc: Vec<u32>,
487 top: u32,
488 pcall_depth: u32,
489 hook: HookState,
490 /// PUC `L->l_gt` — the thread's own globals table. Carried alongside
491 /// the rest of the suspended state so each thread can keep its own
492 /// `setfenv(0, env)` rewire without the swap leaking into another
493 /// thread (5.1 closure.lua :177).
494 globals: Gc<Table>,
495}
496
497/// Outcome of unwinding the call stack on an error (see `Vm::unwind`).
498enum Unwound {
499 /// caught by a pcall/xpcall continuation; resume running its caller
500 Caught,
501 /// caught by a continuation that was the entry-level activation; these are
502 /// the call's (wrapped) results
503 CaughtReturn(Vec<Value>),
504 /// no protecting continuation up to `entry_depth`; propagate the error
505 Propagated(LuaError),
506}
507
508/// A resolved debug stack level: a real Lua frame (by index into `frames`) or a
509/// synthetic C frame for a call_value boundary.
510pub(crate) enum DbgKind {
511 Lua(usize),
512 /// a synthetic C level; the index is the `from_c` Lua frame it sits below,
513 /// used to name the native via its invoking call instruction.
514 C(usize),
515 /// PUC `CIST_TAIL` placeholder — a Lua-to-Lua tail call collapsed the
516 /// caller's activation, so `debug.getinfo(level)` at this slot returns
517 /// `what = "tail"` / `short_src = "(tail call)"` / `linedefined = -1` /
518 /// `func = nil` and `getfenv(level)` errors (5.1 db.lua :336/:341 pin
519 /// both shapes). The index points at the *tail-called* frame whose
520 /// `is_tail` flag induced this synthetic level.
521 Tail(#[allow(dead_code)] usize),
522}
523
524/// Outcome of an index/newindex/comparison fast path: either a directly
525/// computed result, or a metamethod (with the receiver it resolved against) the
526/// caller must invoke — synchronously (C context) or yieldably (VM opcode).
527enum MmOut {
528 /// index → the looked-up value; newindex → done (raw set performed);
529 /// comparison → the boolean result already known
530 Done(Value),
531 /// a metamethod to call; `recv` is the chain element it was found on (the
532 /// extra args — key / value — are supplied by the caller)
533 Mm { func: Value, recv: Value },
534 /// ≤5.3 `a <= b` synthesised via `not __lt(b, a)` when neither operand
535 /// carries `__le` — `op_compare` swaps the args and negates the result.
536 /// Lives separate from `Mm` so the synth path can stay yieldable without
537 /// every other Mm caller learning a swap flag they would never set.
538 CompareSynth { func: Value },
539}
540
541/// Metamethod events; discriminants index `Vm::mm_names`.
542#[derive(Clone, Copy, PartialEq, Eq)]
543#[repr(usize)]
544pub(crate) enum Mm {
545 Index,
546 NewIndex,
547 Call,
548 ToString,
549 Metatable,
550 Name,
551 Eq,
552 Lt,
553 Le,
554 Concat,
555 Len,
556 Add,
557 Sub,
558 Mul,
559 Div,
560 Mod,
561 Pow,
562 IDiv,
563 BAnd,
564 BOr,
565 BXor,
566 Shl,
567 Shr,
568 Unm,
569 BNot,
570 Close,
571 Gc,
572 Pairs,
573}
574
575const MM_NAMES: [&str; 28] = [
576 "__index",
577 "__newindex",
578 "__call",
579 "__tostring",
580 "__metatable",
581 "__name",
582 "__eq",
583 "__lt",
584 "__le",
585 "__concat",
586 "__len",
587 "__add",
588 "__sub",
589 "__mul",
590 "__div",
591 "__mod",
592 "__pow",
593 "__idiv",
594 "__band",
595 "__bor",
596 "__bxor",
597 "__shl",
598 "__shr",
599 "__unm",
600 "__bnot",
601 "__close",
602 "__gc",
603 "__pairs",
604];
605
606/// Debug-name spelling for a metamethod event tag (the bare `"index"` /
607/// `"gc"` / … stored in `Frame.tm`), as `getinfo("n").name` reports it.
608///
609/// PUC 5.2/5.3 keep the leading `"__"` for every event; 5.4+ strips it for
610/// every event *except* `__gc` (`funcnamefromcall` returns the literal
611/// `"__gc"` string for `CIST_FIN`, whereas `funcnamefromcode` does
612/// `getstr(tmname[tm]) + 2` to skip the `__`).
613fn tm_debug_name(version: LuaVersion, tm: &str) -> String {
614 if version <= LuaVersion::Lua53 {
615 format!("__{tm}")
616 } else if tm == "gc" {
617 "__gc".to_string()
618 } else {
619 tm.to_string()
620 }
621}
622
623/// The metamethod event an opcode dispatches, without the `__` prefix (PUC
624/// funcnamefromcode), for "(metamethod 'event')" call-error suffixes.
625fn mm_event_name(op: crate::vm::isa::Op) -> Option<&'static str> {
626 use crate::vm::isa::Op;
627 Some(match op {
628 Op::Add => "add",
629 Op::Sub => "sub",
630 Op::Mul => "mul",
631 Op::Div => "div",
632 Op::Mod => "mod",
633 Op::Pow => "pow",
634 Op::IDiv => "idiv",
635 Op::BAnd => "band",
636 Op::BOr => "bor",
637 Op::BXor => "bxor",
638 Op::Shl => "shl",
639 Op::Shr => "shr",
640 Op::Unm => "unm",
641 Op::BNot => "bnot",
642 Op::Concat => "concat",
643 Op::Len => "len",
644 Op::GetField | Op::GetTable | Op::GetI | Op::SelfOp => "index",
645 Op::SetField | Op::SetTable | Op::SetI => "newindex",
646 Op::Eq | Op::EqK => "eq",
647 Op::Lt => "lt",
648 Op::Le => "le",
649 _ => return None,
650 })
651}
652
653/// PUC MAXTAGLOOP: bound on `__index`/`__newindex` chains.
654const MAX_TAG_LOOP: u32 = 2000;
655/// PUC `MAXCCMT`: bound on a `__call` metamethod chain (lvm.c). 200 chains
656/// is more than any reasonable program needs and matches PUC 5.4/5.5; the
657/// earlier `15` here was tight enough to fire on calls.lua :194 (N=20).
658const MAX_CCMT: u32 = 200;
659/// PUC LUAI_MAXCCALLS analogue: native↔Lua nesting bound.
660const MAX_C_DEPTH: u32 = 200;
661/// luna's engine-level VM stack cap (used by call-site overflow checks).
662/// Slightly larger than PUC's `LUAI_MAXSTACK` so engine internals have a
663/// little headroom above any single library push.
664const MAX_LUA_STACK: u32 = 1 << 20;
665/// PUC `LUAI_MAXSTACK` (`luaconf.h`): the cap library code consults via
666/// `lua_checkstack` to refuse multi-value pushes (`table.unpack` returning
667/// N values, `string.pack` results, etc.). 5.3 coroutine.lua :530 pins
668/// this at one million — `for j in {lim-10, …}` expects every j ≥ lim-10
669/// to fail because the few slots already consumed in the coroutine push
670/// the effective cap below lim-10.
671const PUC_MAXSTACK: i64 = 1_000_000;
672
673/// PUC 5.4+ default warnf state. The base library's `warn` function flips
674/// between `Off` and `On` via the `@on` / `@off` control messages; any other
675/// `@<word>` control is silently ignored, mirroring `lauxlib.c::checkcontrol`.
676#[derive(Clone, Copy, PartialEq, Eq, Debug)]
677pub enum WarnState {
678 /// `warn` calls are silently dropped (default after `warn("@off")`).
679 Off,
680 /// `warn` calls are delivered to stderr (after `warn("@on")`).
681 On,
682}
683
684/// Best-effort extraction of a textual message from a `catch_unwind` payload.
685/// `panic!("msg")` arrives as `String`, `panic!(static)` as `&str`; anything
686/// else degrades to `"<non-string panic>"`. Used by the native-call
687/// catch_unwind to fold the panic into a Lua error.
688fn panic_payload_str(payload: &Box<dyn std::any::Any + Send>) -> String {
689 if let Some(s) = payload.downcast_ref::<String>() {
690 return s.clone();
691 }
692 if let Some(s) = payload.downcast_ref::<&'static str>() {
693 return (*s).to_string();
694 }
695 "<non-string panic>".to_string()
696}
697
698/// Combined error type returned by [`Vm::eval`] and friends — either the
699/// chunk failed to parse / compile, or it raised at runtime.
700#[derive(Debug)]
701pub enum Error {
702 /// Parse or compile failure.
703 Syntax(SyntaxError),
704 /// Runtime error raised during execution.
705 Runtime(LuaError),
706}
707
708impl From<SyntaxError> for Error {
709 fn from(e: SyntaxError) -> Error {
710 Error::Syntax(e)
711 }
712}
713
714impl From<LuaError> for Error {
715 fn from(e: LuaError) -> Error {
716 Error::Runtime(e)
717 }
718}
719
720impl Drop for Vm {
721 fn drop(&mut self) {
722 // state close: run `__gc` for every still-registered finalizable before
723 // the heap frees them (PUC separatetobefnz(g,1) + callallpending). A
724 // single pass — objects created by a closing finalizer are not
725 // re-finalized (they go to the heap's free list directly).
726 self.heap.queue_all_finalizers();
727 self.run_finalizers();
728 }
729}
730
731// P17-D Week 1 scaffold — split-borrow free fn helpers for frames
732// push/pop with shadow counter `frames_top: u32`. Free fns (not Vm
733// methods) so callers can pass `&mut self.frames` + `&mut self.frames_top`
734// as split borrows, allowing other `&mut self.field` reads inside the
735// CallFrame construction (e.g. `std::mem::take(&mut self.pending_tm)`).
736//
737// Week 1 has NO readers yet; the shadow just stays in sync + asserts.
738// Week 2 begins migrating hot-path readers (materialize_frames helper)
739// to consume `frames_top` and a flat array in place of the Vec.
740#[inline(always)]
741fn frames_push_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32, cf: CallFrame) {
742 frames.push(cf);
743 // Shadow maintenance is debug-only: release builds skip the
744 // increment + assertion entirely. The shadow's purpose in Week 1
745 // is to VERIFY the assumed invariant (frames_top == frames.len())
746 // across all push/pop sites; once Week 2+ migrates readers to
747 // consume the shadow, release will run the increment unconditionally.
748 #[cfg(debug_assertions)]
749 {
750 *frames_top += 1;
751 debug_assert_eq!(
752 *frames_top as usize,
753 frames.len(),
754 "P17-D frames_top out of sync after push",
755 );
756 }
757 #[cfg(not(debug_assertions))]
758 let _ = frames_top;
759}
760
761#[inline(always)]
762fn frames_pop_sync(frames: &mut Vec<CallFrame>, frames_top: &mut u32) -> Option<CallFrame> {
763 let r = frames.pop();
764 #[cfg(debug_assertions)]
765 {
766 if r.is_some() {
767 *frames_top = frames_top.saturating_sub(1);
768 }
769 debug_assert_eq!(
770 *frames_top as usize,
771 frames.len(),
772 "P17-D frames_top out of sync after pop",
773 );
774 }
775 #[cfg(not(debug_assertions))]
776 let _ = frames_top;
777 r
778}
779
780/// v1.3 Phase AOT Stage 7 sub-piece 4 — one-time env-var read for
781/// `LUNA_AOT_PROBE`. Returns `true` iff the env var is set to any
782/// non-empty value. The result is cached in a `OnceLock` so the
783/// dispatcher's hot path pays a single atomic load per process. Off
784/// by default — production deploys don't bleed diagnostic prints.
785fn jit_probe_enabled() -> bool {
786 static PROBE_ON: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
787 *PROBE_ON.get_or_init(|| {
788 std::env::var("LUNA_AOT_PROBE")
789 .ok()
790 .filter(|v| !v.is_empty())
791 .is_some()
792 })
793}
794
795impl Vm {
796 /// P17-D Week 1 — re-sync `frames_top` after a bulk `frames: Vec`
797 /// swap (take_ctx, put_ctx, load_coro_ctx). Must be called after
798 /// the Vec replacement to keep the shadow valid.
799 #[inline(always)]
800 fn frames_resync(&mut self) {
801 // Debug-only Week 1 — see `frames_push_sync` comment.
802 #[cfg(debug_assertions)]
803 {
804 self.frames_top = self.frames.len() as u32;
805 }
806 }
807
808 // ====================================================================
809 // P17-D v2 Phase 2 — stack-inline frame metadata accessors (unused).
810 //
811 // These methods read/write the LJ_FR2 marker slots at `stack[base-2]`
812 // (closure GCRef) and `stack[base-1]` (FrameMarker as i64). Phase 2
813 // ships them WITHOUT call-site usage; Phase 3 migrates push/pop
814 // sites to consume them. Phase 4 removes Vec<CallFrame>.
815 //
816 // Preconditions (debug-asserted):
817 // - base >= 2 (slots base-2 and base-1 must exist below the frame)
818 // - self.stack.len() > base + max_stack (caller has grown stack)
819 // - For Lua frames, stack[base-2] holds Value::Closure(cl)
820 // - For Lua frames, stack[base-1] holds Value::Int(marker.to_raw())
821 //
822 // No release-build cost when unused (LTO strips dead methods).
823 // ====================================================================
824
825 /// Write a Lua frame's closure pointer into `stack[base-2]`.
826 /// The caller must ensure `base >= 2` and the slot is within the
827 /// stack's allocated range.
828 #[inline]
829 #[allow(dead_code)] // Phase 2 — consumer is Phase 3.
830 fn write_frame_closure(&mut self, base: u32, cl: crate::runtime::Gc<LuaClosure>) {
831 debug_assert!(
832 base >= 2,
833 "frame closure slot needs base >= 2; got {}",
834 base
835 );
836 let idx = (base - 2) as usize;
837 debug_assert!(idx < self.stack.len(), "stack[base-2] out of range");
838 self.stack[idx] = Value::Closure(cl);
839 }
840
841 /// Read a Lua frame's closure pointer from `stack[base-2]`.
842 /// Returns `None` if the slot doesn't hold a closure (caller is
843 /// expected to treat that as a corrupt frame).
844 ///
845 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
846 /// to avoid the enum-match cost on the hot path. Tag check via
847 /// 1-byte load + branch + `as_closure_unchecked` payload load.
848 #[inline]
849 #[allow(dead_code)]
850 fn read_frame_closure(&self, base: u32) -> Option<crate::runtime::Gc<LuaClosure>> {
851 debug_assert!(base >= 2);
852 let v = self.stack.get((base - 2) as usize)?;
853 if v.tag_byte() == crate::runtime::value::tag::CLOSURE {
854 // SAFETY: tag byte just verified == CLOSURE.
855 Some(unsafe { v.as_closure_unchecked() })
856 } else {
857 None
858 }
859 }
860
861 /// Write a packed [`FrameMarker`] into `stack[base-1]`. The marker
862 /// encodes the frame kind (Lua / Cont) + PC-or-delta payload.
863 /// Stored as `Value::Int(marker.to_raw())` so it round-trips
864 /// cleanly through the value stack without losing bits.
865 #[inline]
866 #[allow(dead_code)]
867 fn write_frame_marker(&mut self, base: u32, marker: crate::runtime::frame_marker::FrameMarker) {
868 debug_assert!(base >= 1, "frame marker slot needs base >= 1; got {}", base);
869 let idx = (base - 1) as usize;
870 debug_assert!(idx < self.stack.len(), "stack[base-1] out of range");
871 self.stack[idx] = Value::Int(marker.to_raw());
872 }
873
874 /// Read a packed [`FrameMarker`] from `stack[base-1]`. Returns
875 /// `None` if the slot isn't a `Value::Int` (caller treats as a
876 /// corrupt frame); the kind tag itself may still be invalid, in
877 /// which case [`FrameMarker::kind`] returns `None` on the result.
878 ///
879 /// P17-D v2 Direction E2 — uses E1's [`Value::tag_byte`] fast-path
880 /// for the tag check + `as_int_unchecked` for the payload load.
881 #[inline]
882 #[allow(dead_code)]
883 fn read_frame_marker(&self, base: u32) -> Option<crate::runtime::frame_marker::FrameMarker> {
884 debug_assert!(base >= 1);
885 let v = self.stack.get((base - 1) as usize)?;
886 if v.tag_byte() == crate::runtime::value::tag::INT {
887 // SAFETY: tag byte just verified == INT.
888 Some(crate::runtime::frame_marker::FrameMarker::from_raw(
889 unsafe { v.as_int_unchecked() },
890 ))
891 } else {
892 None
893 }
894 }
895
896 /// Build the raw `Vm` struct without main coroutine / RNG seed / library
897 /// setup. Private helper shared by `Vm::new` and `Vm::new_minimal`; the
898 /// caller is responsible for the rest of the bring-up.
899 fn new_inner(version: LuaVersion) -> Vm {
900 let mut heap = Heap::new();
901 // PUC 5.1 had no ephemeron pass — `__mode='k'` tables marked their
902 // values strongly. gc.lua's "weak tables" section relies on that.
903 heap.no_ephemeron = version <= LuaVersion::Lua51;
904 // PUC 5.3 needs two GC cycles to finalize a table caught in a
905 // coroutine reference cycle (gc.lua :502); 5.4+ rewrote the GC and
906 // finalize in a single cycle (5.4/5.5 gc.lua :544 assert exactly one).
907 heap.defer_thread_cycle_finalize = version == LuaVersion::Lua53;
908 let globals = heap.new_table();
909 let mm_names = MM_NAMES.iter().map(|n| heap.intern(n.as_bytes())).collect();
910
911 Vm {
912 heap,
913 stack: Vec::new(),
914 frames: Vec::new(),
915 frames_top: 0,
916 open_upvals: Vec::new(),
917 tbc: Vec::new(),
918 top: 0,
919 globals,
920 type_mt: [None; 5],
921 mm_names,
922 c_depth: 0,
923 pcall_depth: 0,
924 nny: 0,
925 msgh_depth: 0,
926 terminating: None,
927 rng: [0; 4],
928 started: std::time::Instant::now(),
929 version,
930 closing_err: None,
931 current: None,
932 main_ctx: None,
933 yielding: None,
934 native_nresults: -1,
935 main_coro: None,
936 gc_mode: "incremental",
937 gc_top: 0,
938 gc_pause: 200,
939 gc_stepmul: 100,
940 gc_stepsize: 13,
941 gc_finalizing: false,
942 capi_stack: Vec::new(),
943 capi_cstr_pin: None,
944 warn_state: WarnState::Off,
945 warn_buf: Vec::new(),
946 warn_log: Vec::new(),
947 instr_budget: None,
948 bytecode_loading: true,
949 puc_bytecode_loading: false,
950 loader_input_budget: Vm::DEFAULT_LOADER_INPUT_BUDGET,
951 registry: None,
952 file_mt: None,
953 io_input: None,
954 io_output: None,
955 hook: HookState::default(),
956 in_hook: false,
957 pending_tailcalls: 0,
958 errored_native: None,
959 hook_ftransfer: 0,
960 hook_ntransfer: 0,
961 pending_tm: None,
962 pending_is_hook: false,
963 error_traceback: None,
964 public_call_depth: 0,
965 running_natives: Vec::new(),
966 running_native_slots: Vec::new(),
967 // v1.1 A2 — JIT-specific state factored into `JitState`
968 // sidecar. The `luna` crate's `Vm::new_minimal_with_jit` /
969 // `install_jit_backend` / `luaL_newstate` swap in
970 // `CraneliftBackend` for callers that want JIT acceleration.
971 jit: crate::vm::jit_state::JitState::with_null_backend(),
972 // v1.1 B12 — host roots ticket pool for the `Lua` facade.
973 host_roots: Vec::new(),
974 // v1.3 Phase ML — MacroLua registry. Pre-populated with
975 // built-ins (`@quote` / `@unquote` / `@if` / `@gensym`)
976 // when this Vm is constructed under `LuaVersion::MacroLua`.
977 macro_registry: if version == LuaVersion::MacroLua {
978 crate::frontend::macro_expander::MacroRegistry::with_builtins()
979 } else {
980 crate::frontend::macro_expander::MacroRegistry::new()
981 },
982 host_roots_free: Vec::new(),
983 sort_scratch: Vec::new(),
984 // v1.2 Track B — LuaUserdata trait sugar's per-Vm
985 // metatable cache. Populated lazily by register_userdata.
986 userdata_metatables: std::collections::HashMap::new(),
987 // v1.1 B6 — error classification metadata. Defaults to
988 // Runtime; set at known sites (syntax / budget trip /
989 // native error / type error).
990 last_error_kind: crate::vm::error::LuaErrorKind::default(),
991 last_error_source: None,
992 // v1.1 B10 Stage 1 — async embedder fields. Defaults
993 // preserve sync behavior bit-for-bit (`async_mode = false`
994 // means the budget hot loop errors out exactly as v1.0).
995 async_mode: false,
996 async_waker: None,
997 async_slice_size: 10_000,
998 host_yield_pending: false,
999 // v1.1 B10 Stage 2 — pending async-native state. Empty by
1000 // default; populated only by the dispatcher when an
1001 // async-marked NativeClosure is invoked under async_mode.
1002 pending_async_native_fut: None,
1003 pending_async_native_ctx: None,
1004 }
1005 }
1006
1007 /// Build a fully-loaded Vm — the default for embedders that want PUC's
1008 /// standard library surface. Equivalent to `Vm::new_minimal(version)`
1009 /// followed by `vm.open_all_libs()`.
1010 pub fn new(version: LuaVersion) -> Vm {
1011 let mut vm = Vm::new_minimal(version);
1012 vm.open_all_libs();
1013 vm
1014 }
1015
1016 /// P09 embedding: build a Vm with no standard libraries loaded. Embedders
1017 /// that want a sandbox (Redis-style scripts, in-game scripting with
1018 /// a curated API) call this and then `open_base` / `open_math` / etc.
1019 /// selectively. The Vm is otherwise fully initialized (main coroutine,
1020 /// RNG seed, GC) so `eval` and `call_value` are immediately usable.
1021 pub fn new_minimal(version: LuaVersion) -> Vm {
1022 let mut vm = Vm::new_inner(version);
1023 let mc = vm.heap.new_coro(Value::Nil, vm.globals);
1024 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1025 unsafe { mc.as_mut() }.status = CoroStatus::Running;
1026 vm.main_coro = Some(mc);
1027 let (a, b) = vm.rng_auto_seed();
1028 vm.rng_seed(a as u64, b as u64);
1029 vm
1030 }
1031
1032 /// v1.1 A1 Session C — install a caller-supplied JIT backend. The
1033 /// `luna` crate uses this to swap in its `CraneliftBackend`; tests
1034 /// or third-party backends pass their own [`crate::jit::IntChunkCompiler`] /
1035 /// [`crate::jit::TraceCompiler`] implementations. Re-installing on a Vm whose
1036 /// closures already populated `Proto.jit: JitProtoState::Compiled`
1037 /// does NOT evict those cached entries — call right after
1038 /// construction for a clean swap.
1039 ///
1040 /// Naming: `install_jit_backend` (not `install_default_jit`)
1041 /// because the "default" in luna-core is `NullJitBackend`; the
1042 /// "default JIT" lives in the `luna` crate.
1043 pub fn install_jit_backend<C, T>(&mut self, chunk: C, trace: T)
1044 where
1045 C: crate::jit::IntChunkCompiler + 'static,
1046 T: crate::jit::TraceCompiler + 'static,
1047 {
1048 self.jit.chunk_compiler = Box::new(chunk);
1049 self.jit.trace_compiler = Box::new(trace);
1050 }
1051
1052 /// v2.0 Track J sub-step J-B — install a caller-supplied JIT
1053 /// storage holder. Default is [`crate::jit::NullJitStorage`];
1054 /// the `luna_jit` crate's `install_default_jit` pairs this with
1055 /// `install_jit_backend(CraneliftBackend, CraneliftBackend)` to
1056 /// also install a fresh `CraneliftJitStorage`. Storage holds
1057 /// the per-`Vm` JIT cache + handle collections that used to be
1058 /// `thread_local!`s in `luna_jit::jit_backend`.
1059 ///
1060 /// Idempotency: re-installing storage on a Vm that already
1061 /// holds compiled-trace pointers WILL evict their owners (the
1062 /// old `CraneliftJitStorage`'s `JITModule`s drop their mmap
1063 /// pages). Call right after construction for a clean swap.
1064 pub fn install_jit_storage<S>(&mut self, storage: S)
1065 where
1066 S: crate::jit::JitStorage + 'static,
1067 {
1068 self.jit.storage = Box::new(storage);
1069 }
1070
1071 /// v1.1 A1 Session A — install the no-op JIT backend. `try_compile`
1072 /// reports "skipped" so every closure stays on the interpreter
1073 /// path, and the trace recorder's compile attempt always returns
1074 /// `None`. Intended for tests that want to verify the trait
1075 /// boundary works in a JIT-free configuration, and for the future
1076 /// `luna-core` build path that ships without Cranelift.
1077 ///
1078 /// Calling this on a Vm whose closures already populated
1079 /// `Proto.jit: JitProtoState::Compiled` does NOT evict those
1080 /// cached entries — the dispatcher will still call into them. For
1081 /// a truly JIT-free run, call this immediately after construction.
1082 pub fn install_null_jit(&mut self) {
1083 self.jit.chunk_compiler = Box::new(crate::jit::NullJitBackend);
1084 self.jit.trace_compiler = Box::new(crate::jit::NullJitBackend);
1085 }
1086
1087 /// Open the entire 5.5 standard library on a `new_minimal`-built Vm.
1088 /// `Vm::new` calls this; sandboxed embedders open libraries one at a
1089 /// time instead (`open_base`, `open_math`, `open_table`, …).
1090 pub fn open_all_libs(&mut self) {
1091 self.open_base();
1092 self.open_math();
1093 self.open_table();
1094 self.open_string();
1095 self.open_utf8();
1096 self.open_os_io();
1097 self.open_debug();
1098 self.open_coroutine();
1099 self.open_package();
1100 // PUC 5.2 introduced `bit32` and 5.3 retired it (the native bitwise
1101 // operators replace it on 64-bit integers). Only expose it under 5.2
1102 // so bitwise.lua's first line (`bit32.band(...)`) resolves without
1103 // leaking the global into newer dialects.
1104 if self.version == LuaVersion::Lua52 {
1105 self.open_bit32();
1106 }
1107 }
1108
1109 /// Install the base library (`print`, `type`, `pairs`, `tostring`,
1110 /// `pcall`, `error`, `assert`, `select`, `setmetatable`, `getmetatable`,
1111 /// `rawequal`, `rawget`, `rawset`, `rawlen`, `next`, `tonumber`,
1112 /// `collectgarbage`, `warn` on 5.4+, `_VERSION`, `_G`, plus 5.1's
1113 /// retired globals `unpack`, `loadstring`, `setfenv`, `getfenv`,
1114 /// `newproxy`, `gcinfo` when version == 5.1). Safe to call at most
1115 /// once per Vm.
1116 pub fn open_base(&mut self) {
1117 crate::vm::builtins::open_base(self);
1118 }
1119 /// Install the `math` standard library.
1120 pub fn open_math(&mut self) {
1121 crate::vm::lib_math::open_math(self);
1122 }
1123 /// Install the `table` standard library.
1124 pub fn open_table(&mut self) {
1125 crate::vm::lib_table::open_table(self);
1126 }
1127 /// Install the `string` standard library (and the shared string metatable).
1128 pub fn open_string(&mut self) {
1129 crate::vm::lib_string::open_string(self);
1130 }
1131 /// Install the `utf8` standard library (5.3+).
1132 pub fn open_utf8(&mut self) {
1133 crate::vm::lib_utf8::open_utf8(self);
1134 }
1135 /// `os` and `io` are merged because file userdata shares state with both
1136 /// (`io.tmpname` and `os.tmpname` are the same function, `io.popen`
1137 /// wraps `os.execute`'s shell).
1138 pub fn open_os_io(&mut self) {
1139 crate::vm::lib_os_io::open_os_io(self);
1140 }
1141 /// Install the `debug` standard library (introspection / hooks). Off by
1142 /// default for sandbox embedders.
1143 pub fn open_debug(&mut self) {
1144 crate::vm::lib_debug::open_debug(self);
1145 }
1146 /// Install the `coroutine` standard library.
1147 pub fn open_coroutine(&mut self) {
1148 crate::vm::lib_coroutine::open_coroutine(self);
1149 }
1150 /// `package` plus the 5.1-only `module` and `package.seeall` aliases.
1151 pub fn open_package(&mut self) {
1152 crate::vm::lib_os_io::open_package(self);
1153 }
1154 /// 5.2-only `bit32` library (5.3+ retired in favour of native bitwise
1155 /// ops on 64-bit integers).
1156 pub fn open_bit32(&mut self) {
1157 crate::vm::lib_bit32::open_bit32(self);
1158 }
1159
1160 /// xoshiro256** next.
1161 pub(crate) fn rng_next(&mut self) -> u64 {
1162 let s = &mut self.rng;
1163 let result = s[1].wrapping_mul(5).rotate_left(7).wrapping_mul(9);
1164 let t = s[1] << 17;
1165 s[2] ^= s[0];
1166 s[3] ^= s[1];
1167 s[1] ^= s[2];
1168 s[0] ^= s[3];
1169 s[2] ^= t;
1170 s[3] = s[3].rotate_left(45);
1171 result
1172 }
1173
1174 /// Seed the RNG via splitmix64 expansion (PUC randseed shape).
1175 pub(crate) fn rng_seed(&mut self, a: u64, b: u64) {
1176 // PUC setseed: state = [n1, 0xff, n2, 0] (0xff avoids an all-zero
1177 // state), then 16 discards to spread the seed. Matches PUC's exact
1178 // sequence so the low-level conformance test passes.
1179 self.rng = [a, 0xff, b, 0];
1180 for _ in 0..16 {
1181 self.rng_next();
1182 }
1183 }
1184
1185 /// Wall-clock since VM creation (os.clock approximation).
1186 pub(crate) fn uptime(&self) -> std::time::Duration {
1187 self.started.elapsed()
1188 }
1189
1190 /// Entropy for math.randomseed() with no arguments.
1191 pub(crate) fn rng_auto_seed(&mut self) -> (i64, i64) {
1192 let t = std::time::SystemTime::now()
1193 .duration_since(std::time::UNIX_EPOCH)
1194 .map(|d| d.as_nanos() as u64)
1195 .unwrap_or(0);
1196 let addr = &self.rng as *const _ as u64;
1197 (t as i64, addr as i64)
1198 }
1199
1200 /// Allocate a native function object (no upvalues): builtin registration.
1201 pub fn native(&mut self, f: crate::runtime::value::NativeFn) -> Value {
1202 Value::Native(self.heap.new_native(f, Box::new([])))
1203 }
1204
1205 /// Allocate a native function object with captured upvalues.
1206 pub fn native_with(
1207 &mut self,
1208 f: crate::runtime::value::NativeFn,
1209 upvals: Box<[Value]>,
1210 ) -> Value {
1211 Value::Native(self.heap.new_native(f, upvals))
1212 }
1213
1214 /// Install the shared string metatable (string library, P04).
1215 pub fn set_string_metatable(&mut self, mt: Option<Gc<Table>>) {
1216 self.type_mt[3] = mt;
1217 }
1218
1219 /// The current globals table (`_G` / `_ENV` source for new chunks).
1220 pub fn globals(&self) -> Gc<Table> {
1221 self.globals
1222 }
1223
1224 /// Remaining VM stack slots (PUC `L->stack_last - L->top` analogue).
1225 /// Library code that pushes a known number of fresh slots — e.g.
1226 /// `table.unpack` returning N values — consults this to refuse when
1227 /// the push would blow past `LUAI_MAXSTACK`. 5.3 coroutine.lua :530's
1228 /// `for j in {lim-10, lim-5, …}` series pins this contract: the
1229 /// coroutine's already-built table eats a few slots, so an unpack of
1230 /// ~lim values can't fit.
1231 pub(crate) fn stack_room(&self) -> i64 {
1232 PUC_MAXSTACK - (self.stack.len() as i64)
1233 }
1234
1235 /// Repoint the thread's "global table" used by *future* `Vm::load` calls
1236 /// for the chunk's `_ENV` upvalue (PUC 5.1 `setfenv(0, env)` rewrites
1237 /// `L->l_gt`). Already-loaded chunks keep their own snapshot via the
1238 /// per-closure cell-0 clone in `Op::Closure`, so they are unaffected.
1239 pub(crate) fn set_globals(&mut self, env: Gc<Table>) {
1240 self.globals = env;
1241 }
1242
1243 /// The Lua dialect this VM was constructed for (5.1 / 5.2 / 5.3 / 5.4 /
1244 /// 5.5). Determines numeric semantics, available standard libraries, and
1245 /// metamethod behavior.
1246 pub fn version(&self) -> LuaVersion {
1247 self.version
1248 }
1249
1250 /// Set a global by name. `v` may be any `IntoValue`: a primitive
1251 /// (`i64`, `f64`, `bool`, `&str`, `String`, `Vec<u8>`), a `Value`
1252 /// directly, an `Option<T>`, or a `Gc<Table>` / `Gc<LuaClosure>` /
1253 /// `Gc<NativeClosure>` handle.
1254 ///
1255 /// Returns `Err(LuaError)` only if the globals table overflows
1256 /// (extremely unlikely in practice — `MAX_ASIZE = 1 << 27`).
1257 /// String interning + key construction cannot fail.
1258 ///
1259 /// ```
1260 /// # use luna_core::vm::Vm;
1261 /// # use luna_core::version::LuaVersion;
1262 /// let mut vm = Vm::sandbox(LuaVersion::Lua55).open_base().build();
1263 /// vm.set_global("answer", 42).unwrap();
1264 /// vm.set_global("ratio", 0.5_f64).unwrap();
1265 /// vm.set_global("hello", "world").unwrap();
1266 /// let r = vm.eval("return answer, ratio, hello").unwrap();
1267 /// assert_eq!(r.len(), 3);
1268 /// ```
1269 pub fn set_global<V: crate::vm::IntoValue>(
1270 &mut self,
1271 name: &str,
1272 v: V,
1273 ) -> Result<(), LuaError> {
1274 let v = v.into_value(self);
1275 let k = Value::Str(self.heap.intern(name.as_bytes()));
1276 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1277 unsafe { self.globals.as_mut() }.set(&mut self.heap, k, v)?;
1278 self.heap
1279 .barrier_back(self.globals.as_ptr() as *mut crate::runtime::heap::GcHeader);
1280 Ok(())
1281 }
1282
1283 /// Backward write barrier shorthand for native lib code: demote `t` from
1284 /// BLACK back to gray so the next propagate step re-traces its fields.
1285 /// No-op outside Propagate (parent is never BLACK at mutation time).
1286 pub(crate) fn barrier_back_table(&mut self, t: Gc<Table>) {
1287 self.heap
1288 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
1289 }
1290
1291 /// Forward write barrier shorthand: a closed upvalue is a single-slot
1292 /// container — `barrier_forward` is cheaper than `barrier_back` here.
1293 /// No-op outside Propagate.
1294 pub(crate) fn barrier_forward_upvalue(&mut self, uv: Gc<Upvalue>, child: Value) {
1295 self.heap
1296 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, child);
1297 }
1298
1299 /// v1.3 Phase ML — register a MacroLua macro under `name`. Inert
1300 /// under non-MacroLua dialects (the macro is stored but the load
1301 /// path only consults the registry when
1302 /// `self.version == LuaVersion::MacroLua`).
1303 ///
1304 /// `name` is stored without the leading `@` — source code writes
1305 /// `@double(x)` to invoke a macro registered as `"double"`.
1306 pub fn define_macro(&mut self, name: &str, m: Box<dyn crate::frontend::macro_expander::Macro>) {
1307 self.macro_registry.register(name, m);
1308 }
1309
1310 /// v1.3 Phase ML — drop all MacroLua macros (built-in + custom).
1311 /// Mostly useful for tests / dogfood resets.
1312 pub fn clear_macros(&mut self) {
1313 self.macro_registry.clear();
1314 }
1315
1316 /// Parse + compile a chunk and close it over the globals table.
1317 pub fn load(&mut self, src: &[u8], chunkname: &[u8]) -> Result<Gc<LuaClosure>, SyntaxError> {
1318 // Reject oversize input *before* handing the parser/lexer a
1319 // potentially multi-GB slice. The PUC-shaped `not enough memory`
1320 // message keeps `heavy.lua::loadrep` compatibility: that test
1321 // accepts either `string length overflow` or `not enough memory`
1322 // as the failure mode for a feeder loop that outruns the host
1323 // allocator. See `set_loader_input_budget`.
1324 if src.len() > self.loader_input_budget {
1325 return Err(SyntaxError {
1326 line: 0,
1327 msg: b"not enough memory".to_vec(),
1328 });
1329 }
1330 // a precompiled (binary) chunk is undumped; source is parsed + compiled
1331 let is_bytecode = crate::vm::dump::is_binary_chunk(src);
1332 if is_bytecode && !self.bytecode_loading {
1333 return Err(SyntaxError {
1334 line: 0,
1335 msg: b"attempt to load a binary chunk (bytecode loading disabled)".to_vec(),
1336 });
1337 }
1338 let proto = if is_bytecode {
1339 let allow_puc = self.puc_bytecode_loading;
1340 crate::vm::dump::undump(src, &mut self.heap, self.version, allow_puc).map_err(
1341 |msg| SyntaxError {
1342 line: 0,
1343 msg: msg.into_bytes(),
1344 },
1345 )?
1346 } else if self.version.is_macro_lua() {
1347 // v1.3 Phase ML — MacroLua dialect: drain the lexer into a
1348 // token vec, run the macro expander pre-pass against the
1349 // per-Vm registry, then hand the rewritten stream to
1350 // `parse_tokens`. The AST + compiler are dialect-agnostic
1351 // because by this point all `@`/quote tokens are gone.
1352 let mut lexer = crate::frontend::lexer::Lexer::new(src, self.version);
1353 let mut raw: Vec<crate::frontend::token::TokenInfo> = Vec::new();
1354 loop {
1355 let t = lexer.next_token()?;
1356 let eof = matches!(t.tok, crate::frontend::token::Token::Eof);
1357 raw.push(t);
1358 if eof {
1359 break;
1360 }
1361 }
1362 // Drop the trailing Eof — expander operates on the body and
1363 // `parse_tokens` reinserts Eof when it runs out of tokens.
1364 raw.pop();
1365 let expanded = self.macro_registry.expand(raw)?;
1366 let ast = crate::frontend::parse_tokens(expanded, src, self.version)?;
1367 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1368 } else {
1369 let ast = parse(src, self.version)?;
1370 compile_chunk(&ast, self.version, chunkname, &mut self.heap)?
1371 };
1372 // PUC `lua_load` (lapi.c) only seeds the loaded closure's first
1373 // upvalue with the globals table when the closure has *exactly* one
1374 // upvalue — that's the main-chunk `_ENV` case. A dumped non-main
1375 // function with two-or-more upvalues keeps every cell at nil; the
1376 // host must use `debug.setupvalue` to wire them up. 5.2 calls.lua
1377 // :293's `assert(x() == nil)` pins this contract.
1378 let n = proto.upvals.len();
1379 let mut ups: Vec<Gc<Upvalue>> = Vec::with_capacity(n.max(1));
1380 if n == 0 {
1381 // synthetic main chunk has no declared upvalues, but the engine
1382 // still expects at least one cell so the host can probe via
1383 // `debug.upvalueid` etc. Match the historical luna shape.
1384 ups.push(
1385 self.heap
1386 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1387 );
1388 } else if n == 1 {
1389 ups.push(
1390 self.heap
1391 .new_upvalue(UpvalState::Closed(Value::Table(self.globals))),
1392 );
1393 } else {
1394 for _ in 0..n {
1395 ups.push(self.heap.new_upvalue(UpvalState::Closed(Value::Nil)));
1396 }
1397 }
1398 Ok(self.heap.new_closure(proto, ups.into_boxed_slice()))
1399 }
1400
1401 /// Compile and run `src` as an anonymous chunk; return its results.
1402 /// Source name in the traceback is `"=eval"`. Syntax errors are
1403 /// surfaced as `LuaError` carrying the formatted PUC-style message
1404 /// (interned through the heap so the error value composes with
1405 /// `pcall` / `error_text` like any runtime error).
1406 pub fn eval(&mut self, src: &str) -> Result<Vec<Value>, LuaError> {
1407 self.eval_chunk(src, "=eval")
1408 }
1409
1410 /// Render an error value for messages/tests. Non-string errors —
1411 /// `error({code=…})`, `error(42)`, etc. — collapse to a type tag
1412 /// (`"(error object is a table value)"`); embedders that need
1413 /// structured payloads should inspect `e.0` directly. Errors whose
1414 /// text starts with `"native panic:"` indicate a Rust panic
1415 /// crossed `catch_unwind` — the Vm may be inconsistent and should
1416 /// be dropped (do not reuse).
1417 pub fn error_text(&self, e: &LuaError) -> String {
1418 match e.0 {
1419 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
1420 v => format!("(error object is a {} value)", v.type_name()),
1421 }
1422 }
1423
1424 /// Call any callable value from the host (or from natives like pcall).
1425 pub fn call_value(&mut self, f: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
1426 // host-level entry (no enclosing exec): drop any error state from a
1427 // prior call that propagated uncaught (`error_traceback` would
1428 // otherwise leak into the next debug.traceback call).
1429 if self.public_call_depth == 0 {
1430 self.error_traceback = None;
1431 }
1432 self.public_call_depth += 1;
1433 // P11-S2 — JIT fast path. A host call with no args targeting a Lua
1434 // chunk whose body fits the S1 int-arith whitelist short-circuits
1435 // the whole interpreter dispatch and runs straight through the
1436 // mmap'd native code. The lookup is one Cell::get + one match —
1437 // the slow path (compile attempt on first reach) is paid once per
1438 // Proto.
1439 if args.is_empty()
1440 && let Value::Closure(cl) = f
1441 && let Some(vs) = self.try_jit_call(cl)
1442 {
1443 self.public_call_depth -= 1;
1444 return Ok(vs);
1445 }
1446 let r = self.call_value_impl(f, args, true);
1447 self.public_call_depth -= 1;
1448 r
1449 }
1450
1451 /// P11-S2 — peek/populate the Proto's JIT cache slot, returning
1452 /// `Some(values)` when the cached native fn is callable for a
1453 /// zero-arg call. (Non-zero-arg dispatch is handled by
1454 /// `try_jit_call_op` from inside `begin_call`.)
1455 fn try_jit_call(&mut self, cl: Gc<LuaClosure>) -> Option<Vec<Value>> {
1456 use crate::runtime::function::JitProtoState;
1457 if !self.jit.enabled {
1458 return None;
1459 }
1460 let proto = cl.proto;
1461 if let JitProtoState::Untried = proto.jit.get() {
1462 self.populate_jit_cache(proto);
1463 }
1464 match proto.jit.get() {
1465 JitProtoState::Compiled {
1466 entry,
1467 num_args: 0,
1468 returns_one,
1469 arg_float_mask: _,
1470 arg_table_mask: _,
1471 ret_is_float,
1472 ret_is_table,
1473 } => {
1474 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1475 let f: crate::jit::IntChunkFn = unsafe { std::mem::transmute(entry) };
1476 // P11-S5c / S5d.J — install the active Vm + closure
1477 // for any Rust helper the JIT'd code may call (e.g.
1478 // `luna_jit_new_table`, `luna_jit_upval_get`) via
1479 // cranelift `Linkage::Import`. RAII clear on return.
1480 // Chunks with no upvalue reads don't touch the closure
1481 // slot, paying nothing.
1482 // v1.1 A1 Session A — route through chunk_compiler so
1483 // the NullJitBackend path stays inert. Raw-ptr arg
1484 // avoids the &mut self borrow conflict against the
1485 // shared self.jit.chunk_compiler read.
1486 let vm_ptr: *mut Vm = self;
1487 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1488 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1489 let r = unsafe { f() };
1490 drop(_jit_vm_guard);
1491 // P11-S5d.E' — a JIT helper may have detected a metatable
1492 // on a table operand and parked a deopt request here.
1493 // Discard the sentinel value and return None so the caller
1494 // re-runs the call through the interpreter, which honours
1495 // __index/__newindex.
1496 if self.jit.pending_err.take().is_some() {
1497 return None;
1498 }
1499 Some(if returns_one {
1500 let v = if ret_is_float {
1501 Value::Float(f64::from_bits(r as u64))
1502 } else if ret_is_table {
1503 Value::Table(crate::runtime::Gc::from_ptr(
1504 r as *mut crate::runtime::Table,
1505 ))
1506 } else {
1507 Value::Int(r)
1508 };
1509 vec![v]
1510 } else {
1511 Vec::new()
1512 })
1513 }
1514 // Non-zero-arg Compiled state: call_value's empty-args
1515 // fast path can't drive it. Op::Call handles those.
1516 JitProtoState::Compiled { .. } | JitProtoState::Failed | JitProtoState::Untried => None,
1517 }
1518 }
1519
1520 /// P11-S2 / S2c — populate the cache slot. Flips `Untried` to either
1521 /// `Compiled { … }` or `Failed`; idempotent on already-populated
1522 /// states (call sites guard with a get before invoking).
1523 ///
1524 /// S4: consults a thread-local cross-`Vm` cache keyed by a hash of
1525 /// `proto.code`. Compiled artefacts live in the thread-local
1526 /// `JITModule` so their mmap pages outlive the `Vm`; subsequent
1527 /// `Vm`s loading the same source skip the cranelift compile step
1528 /// entirely.
1529 fn populate_jit_cache(&mut self, proto: Gc<crate::runtime::function::Proto>) {
1530 use crate::runtime::function::JitProtoState;
1531 let version = self.version();
1532 let pre53 = version <= crate::version::LuaVersion::Lua53;
1533 // P11-S5d.J — 5.1 and 5.2 have no Int subtype (all numbers
1534 // are Float). The JIT's `GetUpval` ValueRead path uses this
1535 // to default-pin upvalue reads to Float without a tag check.
1536 let float_only = version <= crate::version::LuaVersion::Lua52;
1537 // v2.0 Track J sub-step J-B — split-borrow JitState so the
1538 // trait method can take `&mut dyn JitStorage` without
1539 // double-borrowing self.jit.
1540 let jit = &mut self.jit;
1541 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
1542 match jit
1543 .chunk_compiler
1544 .try_compile(storage, proto, pre53, float_only)
1545 {
1546 crate::jit::CompileResult::Compiled {
1547 entry,
1548 num_args,
1549 returns_one,
1550 arg_float_mask,
1551 arg_table_mask,
1552 ret_is_float,
1553 ret_is_table,
1554 } => {
1555 proto.jit.set(JitProtoState::Compiled {
1556 entry,
1557 num_args,
1558 returns_one,
1559 arg_float_mask,
1560 arg_table_mask,
1561 ret_is_float,
1562 ret_is_table,
1563 });
1564 }
1565 crate::jit::CompileResult::Skipped => {
1566 proto.jit.set(JitProtoState::Failed);
1567 }
1568 }
1569 }
1570
1571 /// P11-S2c.B — `Op::Call` JIT fast path. Run inside `begin_call`
1572 /// before `push_frame`. Returns `true` when the call was handled
1573 /// in-place (no new Lua frame). Constraints: every arg slot must
1574 /// be `Value::Int`, the cached arity must match the call site's
1575 /// `nargs`, the host wanted-count `wanted` is honoured by
1576 /// `finish_results`. Also bails when a debug hook is armed —
1577 /// JIT'd code does not fire line / call / return hooks, so any
1578 /// active hook makes the interpreter the source of truth.
1579 fn try_jit_call_op(
1580 &mut self,
1581 cl: Gc<LuaClosure>,
1582 func_slot: u32,
1583 nargs: u32,
1584 wanted: i32,
1585 ) -> bool {
1586 use crate::runtime::function::JitProtoState;
1587 if !self.jit.enabled {
1588 return false;
1589 }
1590 // Any active debug hook means the interpreter has to run the
1591 // call so the hook gets the expected events.
1592 if self.hook.func.is_some() || self.hook.rust_func.is_some() {
1593 return false;
1594 }
1595 let proto = cl.proto;
1596 if let JitProtoState::Untried = proto.jit.get() {
1597 self.populate_jit_cache(proto);
1598 }
1599 let JitProtoState::Compiled {
1600 entry,
1601 num_args,
1602 returns_one,
1603 arg_float_mask,
1604 arg_table_mask,
1605 ret_is_float,
1606 ret_is_table,
1607 } = proto.jit.get()
1608 else {
1609 return false;
1610 };
1611 if num_args as u32 != nargs {
1612 return false;
1613 }
1614 // Pack args into i64 bit-patterns per the per-slot expected
1615 // kind. A Float-typed slot accepts Value::Float verbatim and
1616 // promotes Value::Int(x) via i64 → f64; a Table-typed slot
1617 // accepts only Value::Table and passes the raw Gc ptr; an
1618 // Int-typed slot accepts only Value::Int. Any other shape
1619 // bails to the interpreter so the call's actual dynamics
1620 // (metamethod dispatch / type-coerce) take over.
1621 let mut args: [i64; crate::jit::MAX_JIT_ARITY as usize] =
1622 [0; crate::jit::MAX_JIT_ARITY as usize];
1623 for i in 0..num_args as usize {
1624 let v = self.stack[(func_slot + 1) as usize + i];
1625 let want_float = (arg_float_mask >> i) & 1 == 1;
1626 let want_table = (arg_table_mask >> i) & 1 == 1;
1627 args[i] = match (want_table, want_float, v) {
1628 (true, _, Value::Table(t)) => t.as_ptr() as i64,
1629 (false, false, Value::Int(x)) => x,
1630 (false, true, Value::Float(f)) => f.to_bits() as i64,
1631 (false, true, Value::Int(x)) => (x as f64).to_bits() as i64,
1632 _ => return false,
1633 };
1634 }
1635 // P11-S5c / S5d.J — Vm + closure pin for helpers; see the
1636 // matching guard in `try_jit_call`.
1637 // v1.1 A1 Session A — route through chunk_compiler.
1638 let vm_ptr: *mut Vm = self;
1639 let _jit_vm_guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
1640 // SAFETY: the source `*const u8` is a JIT-compiled function entry pointer produced by Cranelift with the target `fn`-pointer signature (IntChunkFn / IntFnN); the JitVmGuard above keeps the JIT_VM TLS slot live across the call.
1641 let r = unsafe {
1642 match num_args {
1643 0 => (std::mem::transmute::<*const u8, crate::jit::IntChunkFn>(entry))(),
1644 1 => (std::mem::transmute::<*const u8, crate::jit::IntFn1>(entry))(args[0]),
1645 2 => {
1646 (std::mem::transmute::<*const u8, crate::jit::IntFn2>(entry))(args[0], args[1])
1647 }
1648 3 => (std::mem::transmute::<*const u8, crate::jit::IntFn3>(entry))(
1649 args[0], args[1], args[2],
1650 ),
1651 4 => (std::mem::transmute::<*const u8, crate::jit::IntFn4>(entry))(
1652 args[0], args[1], args[2], args[3],
1653 ),
1654 _ => unreachable!("MAX_JIT_ARITY enforces num_args <= 4"),
1655 }
1656 };
1657 drop(_jit_vm_guard);
1658 // P11-S5d.E' — see matching path in `try_jit_call`. A helper
1659 // flagged a metatable on a table operand; bail to the interpreter
1660 // so `push_frame` runs the call from scratch.
1661 if self.jit.pending_err.take().is_some() {
1662 return false;
1663 }
1664 // Write result at func_slot, replacing the closure value, then
1665 // hand to finish_results to pad/truncate per the call site's
1666 // `wanted` count.
1667 if returns_one {
1668 let v = if ret_is_float {
1669 Value::Float(f64::from_bits(r as u64))
1670 } else if ret_is_table {
1671 Value::Table(crate::runtime::Gc::from_ptr(
1672 r as *mut crate::runtime::Table,
1673 ))
1674 } else {
1675 Value::Int(r)
1676 };
1677 self.stack[func_slot as usize] = v;
1678 self.finish_results(func_slot, 1, wanted);
1679 } else {
1680 self.finish_results(func_slot, 0, wanted);
1681 }
1682 true
1683 }
1684
1685 /// `call_value` with control over the `from_c` debug boundary. A `__close`
1686 /// handler runs *within* the closing Lua frame's activation (PUC luaF_close
1687 /// invokes it inside that ci), so it is called with `from_c = false`: its
1688 /// debug parent is the closing function, not a synthetic C level.
1689 fn call_value_impl(
1690 &mut self,
1691 f: Value,
1692 args: &[Value],
1693 from_c: bool,
1694 ) -> Result<Vec<Value>, LuaError> {
1695 if self.c_depth >= MAX_C_DEPTH {
1696 return Err(self.rt_err("stack overflow"));
1697 }
1698 self.c_depth += 1;
1699 let func_slot = self.stack.len() as u32;
1700 self.stack.push(f);
1701 self.stack.extend_from_slice(args);
1702 self.top = self.stack.len() as u32;
1703 let r = self.call_at(func_slot, args.len() as u32, from_c);
1704 self.c_depth -= 1;
1705 if r.is_err()
1706 && self.yielding.is_none()
1707 && self.terminating.is_none()
1708 && !self.host_yield_pending
1709 && self.pending_async_native_fut.is_none()
1710 {
1711 // A `coroutine.yield` in flight raises a sentinel error to unwind the
1712 // Rust stack, but the suspended coroutine's frames/registers (which
1713 // sit at/above `func_slot`) must survive for the next resume — so we
1714 // only truncate on a real error. A self-close termination is in the
1715 // same boat: the dying thread's state is discarded wholesale.
1716 // v1.1 B10 — a `host_yield_pending` cooperative yield is in
1717 // the same boat as `yielding`: the next `EvalFuture::poll`
1718 // resumes the same call, so the in-flight frames must
1719 // survive.
1720 self.stack.truncate(func_slot as usize);
1721 self.top = func_slot;
1722 }
1723 r
1724 }
1725
1726 /// Invoke `f` with the running thread marked non-yieldable for the duration
1727 /// (PUC `luaD_callnoyield`): a `coroutine.yield` inside `f` hits the C-call
1728 /// boundary and errors instead of suspending. Used by library callbacks
1729 /// (sort comparator, gsub replacement) that run via synchronous Rust
1730 /// recursion and so could not be re-entered after a yield.
1731 pub(crate) fn call_noyield(
1732 &mut self,
1733 f: Value,
1734 args: &[Value],
1735 ) -> Result<Vec<Value>, LuaError> {
1736 self.nny += 1;
1737 let r = self.call_value(f, args);
1738 self.nny -= 1;
1739 r
1740 }
1741
1742 // ---- coroutines (P05) ----
1743
1744 pub(crate) fn new_coro(&mut self, body: Value) -> Gc<Coro> {
1745 // The new coroutine inherits the creating thread's current globals
1746 // (PUC `lua_newthread`: the new state copies `g->mainthread`'s
1747 // `l_gt`). `Vm.globals` always reflects the live thread, so reading
1748 // it here picks the creator regardless of which coro is running.
1749 self.heap.new_coro(body, self.globals)
1750 }
1751
1752 /// Is `t` the thread whose context is currently live in the VM?
1753 pub(crate) fn is_current_thread(&self, t: Option<Gc<Coro>>) -> bool {
1754 match (self.current, t) {
1755 (None, None) => true,
1756 (Some(a), Some(b)) => a.ptr_eq(b),
1757 _ => false,
1758 }
1759 }
1760
1761 /// Read an open-upvalue slot from its owning thread's stack (the live VM
1762 /// stack if that thread is current, else its saved context).
1763 #[doc(hidden)]
1764 pub fn read_slot(&self, slot: u32, thread: Option<Gc<Coro>>) -> Value {
1765 let s = slot as usize;
1766 if self.is_current_thread(thread) {
1767 self.stack[s]
1768 } else {
1769 match thread {
1770 Some(co) => co.stack[s],
1771 None => self.main_ctx.as_ref().expect("main context").stack[s],
1772 }
1773 }
1774 }
1775
1776 fn write_slot(&mut self, slot: u32, thread: Option<Gc<Coro>>, v: Value) {
1777 let s = slot as usize;
1778 if self.is_current_thread(thread) {
1779 self.stack[s] = v;
1780 } else {
1781 match thread {
1782 Some(co) => {
1783 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1784 unsafe { co.as_mut() }.stack[s] = v;
1785 // co.stack is traced by Coro::trace; demote co back to
1786 // gray so propagate re-traces this slot if it was
1787 // already black.
1788 self.heap
1789 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
1790 }
1791 None => self.main_ctx.as_mut().expect("main context").stack[s] = v,
1792 }
1793 }
1794 }
1795
1796 /// Whether `co` is the main thread's identity object.
1797 pub(crate) fn is_main_coro(&self, co: Gc<Coro>) -> bool {
1798 self.main_coro.is_some_and(|m| m.ptr_eq(co))
1799 }
1800
1801 /// The status of `co` from the caller's view. The main thread's identity
1802 /// object has no stored status — it is "running" when nothing else runs,
1803 /// else "normal" (it resumed the active coroutine).
1804 pub(crate) fn effective_coro_status(&self, co: Gc<Coro>) -> CoroStatus {
1805 if self.is_main_coro(co) {
1806 if self.current.is_none() {
1807 CoroStatus::Running
1808 } else {
1809 CoroStatus::Normal
1810 }
1811 } else {
1812 co.status
1813 }
1814 }
1815
1816 /// `coroutine.close` (PUC `lua_closethread`): run the suspended coroutine's
1817 /// pending to-be-closed `__close` handlers, then mark it dead and drop its
1818 /// context. Handlers see the coroutine's death error (if it died by error)
1819 /// or nil; an error they raise propagates out. `Ok(Some(e))` means it died
1820 /// with error `e` and no handler overrode it; `Err` means a handler raised.
1821 pub(crate) fn close_coro(&mut self, co: Gc<Coro>) -> Result<Option<Value>, LuaError> {
1822 // re-entrant close: a __close handler closed its own coroutine while the
1823 // outer close is mid-flight (its context is live). Report success and let
1824 // the outer close finish — re-entering the swap would corrupt the stack.
1825 if self.current.is_some_and(|c| c.ptr_eq(co)) {
1826 return Ok(None);
1827 }
1828 // A chain of coroutines whose `__close` handlers each close the previous
1829 // one recurses on the C stack (PUC `luaD_callnoyield` in `lua_closethread`).
1830 // The calling handler's `call_value` has already pushed `c_depth` to the
1831 // cap, so here it reads as full first — report PUC's "C stack overflow"
1832 // before the next handler call would surface the plainer "stack overflow".
1833 if self.c_depth >= MAX_C_DEPTH {
1834 return Err(self.rt_err("C stack overflow"));
1835 }
1836 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1837 let death_err = unsafe { co.as_mut() }.error_value.take();
1838 // swap the caller's live context out (into a GC-rooted home) and the
1839 // coroutine's in, mirroring resume_coro, so the __close handlers run on
1840 // the coroutine's stack while everything stays rooted.
1841 let resumer = self.current;
1842 let rctx = self.take_ctx();
1843 match resumer {
1844 Some(r) => {
1845 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1846 let m = unsafe { r.as_mut() };
1847 m.stack = rctx.stack;
1848 m.frames = rctx.frames;
1849 m.open_upvals = rctx.open_upvals;
1850 m.tbc = rctx.tbc;
1851 m.top = rctx.top;
1852 m.pcall_depth = rctx.pcall_depth;
1853 }
1854 None => self.main_ctx = Some(rctx),
1855 }
1856 self.load_coro_ctx(co);
1857 self.current = Some(co);
1858 let result = self.close_slots(0, death_err);
1859 // discard the (now-closed) coroutine context and restore the caller
1860 let _ = self.take_ctx();
1861 match resumer {
1862 Some(r) => {
1863 self.load_coro_ctx(r);
1864 self.current = Some(r);
1865 }
1866 None => {
1867 let m = self.main_ctx.take().expect("main context saved");
1868 self.put_ctx(m);
1869 self.current = None;
1870 }
1871 }
1872 {
1873 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1874 let m = unsafe { co.as_mut() };
1875 m.status = CoroStatus::Dead;
1876 m.stack = Vec::new();
1877 m.frames = Vec::new();
1878 m.open_upvals = Vec::new();
1879 m.tbc = Vec::new();
1880 m.top = 0;
1881 m.pcall_depth = 0;
1882 m.resume_at = None;
1883 m.error_value = None;
1884 }
1885 result.map(|()| death_err)
1886 }
1887
1888 /// `coroutine.running`: the running thread plus whether it is the main one.
1889 pub(crate) fn running_thread(&self) -> (Value, bool) {
1890 match self.current {
1891 Some(co) => (Value::Coro(co), false),
1892 None => (Value::Coro(self.main_coro.expect("main coro")), true),
1893 }
1894 }
1895
1896 /// `coroutine.isyieldable([co])`: whether `co` (default: the running
1897 /// thread) can yield. The main thread never can; any other coroutine can
1898 /// unless it is dead.
1899 pub(crate) fn is_yieldable(&self, co: Option<Gc<Coro>>) -> bool {
1900 match co {
1901 Some(c) => !self.main_coro.is_some_and(|m| m.ptr_eq(c)) && c.status != CoroStatus::Dead,
1902 // the running thread can yield only outside any non-yieldable C call
1903 None => self.current.is_some() && self.nny == 0,
1904 }
1905 }
1906
1907 /// Why `coroutine.yield` may not suspend the running thread right now, as a
1908 /// PUC error message — `None` if it may. Distinguishes "not in a coroutine"
1909 /// from "inside an unyieldable C call" (sort/gsub callback).
1910 pub(crate) fn yield_barrier(&self) -> Option<&'static str> {
1911 if self.current.is_none() {
1912 Some("attempt to yield from outside a coroutine")
1913 } else if self.nny > 0 {
1914 Some("attempt to yield across a C-call boundary")
1915 } else {
1916 None
1917 }
1918 }
1919
1920 /// The coroutine whose context is currently live (`None` on the main thread).
1921 pub(crate) fn current_coro(&self) -> Option<Gc<Coro>> {
1922 self.current
1923 }
1924
1925 /// `coroutine.close()` on the *running* thread (PUC 5.5 close-self): run all
1926 /// its pending `__close` handlers, then signal termination. The handlers run
1927 /// here, in place, with the thread still non-yieldable (a yield in one hits
1928 /// the C-call boundary). The returned sentinel unwinds the Rust stack the
1929 /// way a yield does — `exec_with` propagates it past any protecting pcall
1930 /// rather than letting `unwind` catch it — and `resume_coro` turns it into a
1931 /// clean death (or, if a handler raised, the coroutine's error).
1932 pub(crate) fn close_running(&mut self) -> LuaError {
1933 let death = match self.close_slots(0, None) {
1934 Ok(()) => None,
1935 Err(e) => Some(e.0),
1936 };
1937 self.terminating = Some(death);
1938 LuaError(Value::Nil)
1939 }
1940
1941 /// `coroutine.status` as seen by the caller.
1942 pub(crate) fn coro_status_str(&self, co: Gc<Coro>) -> &'static str {
1943 match self.effective_coro_status(co) {
1944 CoroStatus::Suspended => "suspended",
1945 CoroStatus::Running => "running",
1946 CoroStatus::Normal => "normal",
1947 CoroStatus::Dead => "dead",
1948 }
1949 }
1950
1951 fn take_ctx(&mut self) -> SavedCtx {
1952 let saved = SavedCtx {
1953 stack: std::mem::take(&mut self.stack),
1954 frames: std::mem::take(&mut self.frames),
1955 open_upvals: std::mem::take(&mut self.open_upvals),
1956 tbc: std::mem::take(&mut self.tbc),
1957 top: self.top,
1958 pcall_depth: self.pcall_depth,
1959 hook: self.hook,
1960 globals: self.globals,
1961 };
1962 self.frames_resync(); // P17-D Week 1 — frames now empty.
1963 saved
1964 }
1965
1966 fn put_ctx(&mut self, c: SavedCtx) {
1967 self.stack = c.stack;
1968 self.frames = c.frames;
1969 self.open_upvals = c.open_upvals;
1970 self.tbc = c.tbc;
1971 self.top = c.top;
1972 self.pcall_depth = c.pcall_depth;
1973 self.hook = c.hook;
1974 self.globals = c.globals;
1975 self.frames_resync(); // P17-D Week 1 — sync shadow to new Vec.
1976 }
1977
1978 /// Move a coroutine's saved context into the live VM fields.
1979 fn load_coro_ctx(&mut self, co: Gc<Coro>) {
1980 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1981 let m = unsafe { co.as_mut() };
1982 self.stack = std::mem::take(&mut m.stack);
1983 self.frames = std::mem::take(&mut m.frames);
1984 self.open_upvals = std::mem::take(&mut m.open_upvals);
1985 self.tbc = std::mem::take(&mut m.tbc);
1986 self.top = m.top;
1987 self.frames_resync(); // P17-D Week 1 — sync shadow to coro's frames.
1988 self.pcall_depth = m.pcall_depth;
1989 self.hook = m.hook;
1990 self.globals = m.globals;
1991 }
1992
1993 /// Save the live VM context back into a coroutine object.
1994 fn store_coro_ctx(&mut self, co: Gc<Coro>) {
1995 let c = self.take_ctx();
1996 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
1997 let m = unsafe { co.as_mut() };
1998 m.stack = c.stack;
1999 m.frames = c.frames;
2000 m.open_upvals = c.open_upvals;
2001 m.tbc = c.tbc;
2002 m.top = c.top;
2003 m.pcall_depth = c.pcall_depth;
2004 m.hook = c.hook;
2005 m.globals = c.globals;
2006 // bulk-overwrite of every collectable field traced by Coro::trace:
2007 // demote the coro back to gray so propagate re-traces its new state.
2008 self.heap
2009 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2010 }
2011
2012 /// `coroutine.resume` core: drive `co` with `args` until it yields, returns
2013 /// or errors. Ok(values) carries yielded or returned values; Err carries an
2014 /// error raised inside the coroutine (the coroutine becomes dead).
2015 pub(crate) fn resume_coro(
2016 &mut self,
2017 co: Gc<Coro>,
2018 args: Vec<Value>,
2019 ) -> Result<Vec<Value>, LuaError> {
2020 match co.status {
2021 CoroStatus::Suspended => {}
2022 CoroStatus::Dead => return Err(self.rt_err("cannot resume dead coroutine")),
2023 _ => return Err(self.rt_err("cannot resume non-suspended coroutine")),
2024 }
2025 if self.c_depth >= MAX_C_DEPTH {
2026 return Err(self.rt_err("C stack overflow"));
2027 }
2028 self.c_depth += 1;
2029 let resumer = self.current;
2030 // save the resumer's live context away
2031 let rctx = self.take_ctx();
2032 match resumer {
2033 Some(r) => {
2034 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2035 let m = unsafe { r.as_mut() };
2036 m.stack = rctx.stack;
2037 m.frames = rctx.frames;
2038 m.open_upvals = rctx.open_upvals;
2039 m.tbc = rctx.tbc;
2040 m.top = rctx.top;
2041 m.pcall_depth = rctx.pcall_depth;
2042 m.globals = rctx.globals;
2043 m.status = CoroStatus::Normal;
2044 // bulk overwrite of every traced field on r — mirror
2045 // store_coro_ctx's barrier_back so propagate re-traces r.
2046 self.heap
2047 .barrier_back(r.as_ptr() as *mut crate::runtime::heap::GcHeader);
2048 }
2049 None => self.main_ctx = Some(rctx),
2050 }
2051 // swap the coroutine in
2052 self.load_coro_ctx(co);
2053 {
2054 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2055 let m = unsafe { co.as_mut() };
2056 m.status = CoroStatus::Running;
2057 m.resumer = resumer;
2058 }
2059 // co.resumer is a traced Gc field; barrier_back covers the new
2060 // resumer reference and any future field writes during this call.
2061 self.heap
2062 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2063 self.current = Some(co);
2064
2065 // drive it
2066 let drive = if co.started {
2067 self.coro_continue(&args)
2068 } else {
2069 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2070 unsafe { co.as_mut() }.started = true;
2071 self.coro_first(co.body, &args)
2072 };
2073
2074 // classify: a self-close termination or a pending yield each win over
2075 // the (sentinel) error they raised to unwind the Rust stack.
2076 let (outcome, status) = if let Some(death) = self.terminating.take() {
2077 // the coroutine closed itself: it dies now, cleanly or with the
2078 // error a `__close` handler raised.
2079 match death {
2080 Some(e) => {
2081 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2082 unsafe { co.as_mut() }.error_value = Some(e);
2083 self.heap
2084 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2085 (Err(LuaError(e)), CoroStatus::Dead)
2086 }
2087 None => (Ok(Vec::new()), CoroStatus::Dead),
2088 }
2089 } else {
2090 match self.yielding.take() {
2091 Some((vals, fslot, nres)) => {
2092 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2093 unsafe { co.as_mut() }.resume_at = Some((fslot, nres));
2094 (Ok(vals), CoroStatus::Suspended)
2095 }
2096 None => {
2097 // died: a return is clean, an error is remembered so a later
2098 // `coroutine.close` can report it (PUC lua_closethread).
2099 // Capture the error-point traceback (set by `unwind` before
2100 // popping the failing frames) and prepend a synthetic
2101 // top entry for the C native that initiated the error
2102 // (PUC `[C]: in function '<name>'`) so `debug.traceback(co)`
2103 // on the dead coroutine still shows the error site
2104 // (db.lua :848 family).
2105 if drive.is_err() {
2106 let mut tb = self.error_traceback.take().unwrap_or_default();
2107 if let Some(nm) = self.errored_native.take() {
2108 let mut prefixed: Vec<u8> = Vec::new();
2109 prefixed.extend_from_slice(
2110 format!("\n\t[C]: in function '{nm}'").as_bytes(),
2111 );
2112 prefixed.extend(tb);
2113 tb = prefixed;
2114 }
2115 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2116 unsafe { co.as_mut() }.error_traceback = Some(tb);
2117 }
2118 if let Err(e) = drive {
2119 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2120 unsafe { co.as_mut() }.error_value = Some(e.0);
2121 self.heap
2122 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2123 }
2124 (drive, CoroStatus::Dead)
2125 }
2126 }
2127 };
2128
2129 // save the coroutine's context back and restore the resumer
2130 self.store_coro_ctx(co);
2131 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2132 unsafe { co.as_mut() }.status = status;
2133 match resumer {
2134 Some(r) => {
2135 self.load_coro_ctx(r);
2136 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2137 unsafe { r.as_mut() }.status = CoroStatus::Running;
2138 self.current = Some(r);
2139 }
2140 None => {
2141 let m = self.main_ctx.take().expect("main context saved");
2142 self.put_ctx(m);
2143 self.current = None;
2144 }
2145 }
2146 self.c_depth -= 1;
2147 outcome
2148 }
2149
2150 /// First resume: install the body function at slot 0 and run.
2151 fn coro_first(&mut self, body: Value, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2152 self.stack.clear();
2153 self.stack.push(body);
2154 self.stack.extend_from_slice(args);
2155 self.top = self.stack.len() as u32;
2156 match self.begin_call(0, Some(args.len() as u32), -1, true) {
2157 Ok(true) => self.exec_with(1),
2158 Ok(false) => Ok(self.take_results(0)),
2159 Err(e) => Err(e),
2160 }
2161 }
2162
2163 /// Resume after a yield: deliver `args` as the results of the call that
2164 /// yielded, then continue the suspended thread.
2165 fn coro_continue(&mut self, args: &[Value]) -> Result<Vec<Value>, LuaError> {
2166 let (fslot, nres) = self.current.unwrap().resume_at.expect("resume point");
2167 let n = args.len() as u32;
2168 // Restore the full register window of the suspended top frame: a yield
2169 // that unwound through a native (call_value) may have left the stack
2170 // shorter than the frame needs. `base + max_stack` is what push_frame
2171 // allocates; `fslot + n` covers the delivered yield results.
2172 let frame_need = self
2173 .frames
2174 .last()
2175 .and_then(CallFrame::lua)
2176 .map(|f| (f.base + f.closure.proto.max_stack as u32) as usize)
2177 .unwrap_or(0);
2178 let need = frame_need.max((fslot + n) as usize);
2179 if self.stack.len() < need {
2180 self.stack.resize(need, Value::Nil);
2181 }
2182 for (i, &v) in args.iter().enumerate() {
2183 self.stack[fslot as usize + i] = v;
2184 }
2185 self.finish_results(fslot, n, nres);
2186 // the suspended `coroutine.yield` (a C call) now returns its resume
2187 // values: fire the matching "return" hook PUC defers until the resume.
2188 self.hook_return(true, 1, n)?;
2189 self.exec_with(1)
2190 }
2191
2192 /// `coroutine.yield`: suspend the running coroutine, recording where to
2193 /// resume. Errors if called outside a coroutine. Returns a sentinel error
2194 /// that `exec`/`resume_coro` recognise as a yield (never surfaced to Lua).
2195 pub(crate) fn do_yield(&mut self, func_slot: u32, vals: Vec<Value>) -> LuaError {
2196 let nres = self.native_nresults;
2197 self.yielding = Some((vals, func_slot, nres));
2198 // value is irrelevant: resume_coro consults `self.yielding`, not this
2199 LuaError(Value::Nil)
2200 }
2201
2202 /// Install or clear the debug hook on the running thread (`debug.sethook`
2203 /// without a thread argument). Arms the calling frame's `oldpc` to the
2204 /// sethook CALL's own pc (one less than the next-to-execute pc), mirroring
2205 /// PUC `rethook`'s `L->oldpc = pcRel(savedpc, p)` (= savedpc - code - 1) on
2206 /// native return: the very next traceexec compares against the sethook
2207 /// CALL's line. When the install statement and the following statement are
2208 /// on different source lines (db.lua :322), `changedline` fires for that
2209 /// first statement; when they share a line (db.lua :25 wrapper), they do
2210 /// not, so the wrapper line is not re-fired.
2211 pub(crate) fn install_hook(&mut self, hook: HookState) {
2212 self.hook = hook;
2213 if self.hook.line
2214 && let Some(f) = self.frames.last_mut().and_then(CallFrame::lua_mut)
2215 {
2216 f.hook_oldpc = f.pc.saturating_sub(1);
2217 }
2218 }
2219
2220 /// Install a hook on `target` (`None`/current thread → the live VM fields;
2221 /// another, suspended thread → its saved `Coro` state). PUC `debug.sethook`
2222 /// with an optional thread argument.
2223 ///
2224 /// `target == None` means "no explicit thread argument" — PUC binds that
2225 /// to `L` (the running thread). luna's live VM fields (`self.hook`,
2226 /// `self.frames`, `self.stack`) ARE the running thread's state, regardless
2227 /// of whether that's the main thread or a currently-resumed coroutine
2228 /// (save/restore happens at resume/yield boundaries via `load_coro_ctx`/
2229 /// `store_coro_ctx`). So a `None` target should always route to
2230 /// `install_hook` on the live fields. The pre-fix predicate gate
2231 /// `is_current_thread(target)` returned `false` when running inside a
2232 /// coroutine (`self.current = Some(co)`, `target = None` don't match)
2233 /// and silently dropped the hook on the floor — the install happened on
2234 /// no thread at all.
2235 pub(crate) fn set_hook(&mut self, target: Option<Gc<Coro>>, state: HookState) {
2236 if target.is_none() || self.is_current_thread(target) {
2237 self.install_hook(state);
2238 } else if let Some(co) = target {
2239 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
2240 let m = unsafe { co.as_mut() };
2241 m.hook = state;
2242 if state.line
2243 && let Some(f) = m.frames.last_mut().and_then(CallFrame::lua_mut)
2244 {
2245 f.hook_oldpc = u32::MAX;
2246 }
2247 // co.hook.func is a traced Value (Coro::trace covers it); demote
2248 // co back to gray so propagate sees the new hook function.
2249 self.heap
2250 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
2251 }
2252 }
2253
2254 /// The hook state of `target` (`None`/current → the live VM state).
2255 pub(crate) fn get_hook(&self, target: Option<Gc<Coro>>) -> HookState {
2256 match target {
2257 t if self.is_current_thread(t) => self.hook,
2258 Some(co) => co.hook,
2259 None => self.hook,
2260 }
2261 }
2262
2263 /// Invoke the debug hook for `event` (PUC `luaD_hook`). The hook runs with
2264 /// hooks disabled (PUC clears the mask) and its results/stack growth are
2265 /// discarded so the interrupted frame's register window is untouched.
2266 /// `line` is the source line for a "line" event, `None` (→ nil) otherwise.
2267 fn run_hook(
2268 &mut self,
2269 event: &[u8],
2270 line: Option<i64>,
2271 from_native: bool,
2272 ) -> Result<(), LuaError> {
2273 // v1.1 B11 — Rust hook fires first (no Vm reentrancy via call_value;
2274 // synchronous fn pointer call). Both Rust and Lua hooks may be
2275 // installed; both observe each event.
2276 if let Some(rh) = self.hook.rust_func {
2277 let evt = match event {
2278 b"call" => Some(RustHookEvent::Call),
2279 b"return" => Some(RustHookEvent::Return),
2280 b"tail call" | b"tail return" => Some(RustHookEvent::TailCall),
2281 b"line" => Some(RustHookEvent::Line(line.unwrap_or(0).max(0) as u32)),
2282 b"count" => Some(RustHookEvent::Count),
2283 _ => None,
2284 };
2285 if let Some(evt) = evt {
2286 let was_in_hook = self.in_hook;
2287 self.in_hook = true;
2288 rh(self, evt);
2289 self.in_hook = was_in_hook;
2290 }
2291 }
2292 let Some(hook) = self.hook.func else {
2293 return Ok(());
2294 };
2295 let saved_top = self.top;
2296 let saved_len = self.stack.len();
2297 let name = Value::Str(self.heap.intern(event));
2298 let lv = line.map_or(Value::Nil, Value::Int);
2299 self.in_hook = true;
2300 // PUC `db_sethook`'s C trampoline `hookf` sits between the engine and
2301 // the Lua hook — so `getinfo(2)` inside the hook resolves to whatever
2302 // ci sat below `hookf` (the function being hooked). When that hooked
2303 // function is native, no Lua frame for it exists in luna's `frames`;
2304 // model it as a synthetic C level by pushing the hook with
2305 // `from_c = true` (then `c_frame_name` reads the caller's call
2306 // instruction → e.g. `name = "sethook"`). When the hooked function is
2307 // Lua (its frame is still on the stack), push with `from_c = false`
2308 // so the level descent lands on it directly. The hook's own frame
2309 // carries `is_hook = true` so `getinfo(1).namewhat` reports "hook"
2310 // (PUC `CIST_HOOKED`).
2311 self.pending_is_hook = true;
2312 let r = self.call_value_impl(hook, &[name, lv], from_native);
2313 self.pending_is_hook = false;
2314 self.in_hook = false;
2315 self.stack.truncate(saved_len);
2316 self.top = saved_top;
2317 r.map(|_| ())
2318 }
2319
2320 /// Fire the "call" hook on entry to a function, if armed and not already in
2321 /// a hook (PUC clears the mask while a hook runs). PUC's transferinfo for
2322 /// a call hook is the param window: ftransfer = 1, ntransfer = nargs.
2323 /// `is_tail` selects the "tail call" event (PUC `LUA_HOOKTAILCALL`); a
2324 /// tail-call hook has no matching return hook (PUC luaD_pretailcall).
2325 fn hook_call_with(
2326 &mut self,
2327 from_native: bool,
2328 nargs: u32,
2329 is_tail: bool,
2330 ) -> Result<(), LuaError> {
2331 if self.hook.call
2332 && !self.in_hook
2333 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2334 {
2335 self.hook_ftransfer = 1;
2336 self.hook_ntransfer = nargs.min(u16::MAX as u32) as u16;
2337 // PUC 5.1 didn't distinguish tail-call events — every call,
2338 // including tail-calls, fired plain `"call"`. 5.2 introduced
2339 // the separate `"tail call"` event (mask `"c"` covers both).
2340 // 5.1 db.lua :366 pins this with `{"call","call","call","call",
2341 // "return","tail return","return","tail return"}`.
2342 let event: &[u8] = if is_tail && self.version >= LuaVersion::Lua52 {
2343 b"tail call"
2344 } else {
2345 b"call"
2346 };
2347 self.run_hook(event, None, from_native)?;
2348 }
2349 Ok(())
2350 }
2351
2352 pub(crate) fn hook_call(&mut self, from_native: bool, nargs: u32) -> Result<(), LuaError> {
2353 self.hook_call_with(from_native, nargs, false)
2354 }
2355
2356 /// Fire the "return" hook on exit from a function, if armed. ftransfer is
2357 /// the first result slot relative to the activation's func slot, ntransfer
2358 /// the number of results.
2359 pub(crate) fn hook_return(
2360 &mut self,
2361 from_native: bool,
2362 ftransfer: u32,
2363 nresults: u32,
2364 ) -> Result<(), LuaError> {
2365 if self.hook.ret
2366 && !self.in_hook
2367 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2368 {
2369 self.hook_ftransfer = ftransfer.min(u16::MAX as u32) as u16;
2370 self.hook_ntransfer = nresults.min(u16::MAX as u32) as u16;
2371 self.run_hook(b"return", None, from_native)?;
2372 }
2373 Ok(())
2374 }
2375
2376 /// PUC "tail return" event — fires once per tail call that collapsed
2377 /// into the activation now returning, *after* its own "return" event.
2378 /// 5.1 hook mask `"r"` covers both `return` and `tail return`.
2379 fn hook_tail_return(&mut self) -> Result<(), LuaError> {
2380 if self.hook.ret
2381 && !self.in_hook
2382 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
2383 {
2384 self.run_hook(b"tail return", None, false)?;
2385 }
2386 Ok(())
2387 }
2388
2389 /// Call a metamethod with a single expected result.
2390 fn call_mm1(&mut self, f: Value, args: &[Value]) -> Result<Value, LuaError> {
2391 let mut r = self.call_value(f, args)?;
2392 Ok(if r.is_empty() {
2393 Value::Nil
2394 } else {
2395 r.swap_remove(0)
2396 })
2397 }
2398
2399 /// Begin a *yieldable* metamethod call from a VM instruction: `func(args…)`
2400 /// driven through the interpreter loop with a `Meta` continuation, so a
2401 /// `coroutine.yield` inside the metamethod suspends and resumes cleanly.
2402 /// On the metamethod's return the loop head runs `finish_meta(action, …)`.
2403 /// Returns to the caller with the call set up — the opcode arm must do no
2404 /// further work on the running frame and let the loop iterate. `tm` is
2405 /// the metamethod event name (e.g. "index", "add"); a Lua handler frame
2406 /// born from this call inherits it via `pending_tm`, so
2407 /// `debug.getinfo(1).namewhat == "metamethod"` and `.name == tm`
2408 /// (db.lua :878).
2409 fn begin_meta_call(
2410 &mut self,
2411 func: Value,
2412 args: &[Value],
2413 action: MetaAction,
2414 tm: &'static str,
2415 ) -> Result<(), LuaError> {
2416 let saved_top = self.top;
2417 let cont_slot = self.stack.len() as u32;
2418 self.stack.push(func);
2419 self.stack.extend_from_slice(args);
2420 self.top = self.stack.len() as u32;
2421 frames_push_sync(
2422 &mut self.frames,
2423 &mut self.frames_top,
2424 CallFrame::Cont(NativeCont {
2425 kind: ContKind::Meta(MetaCont { action, saved_top }),
2426 func_slot: cont_slot,
2427 nresults: 1,
2428 }),
2429 );
2430 let saved_tm = self.pending_tm.replace(tm);
2431 // begin_call drives a Lua metamethod through the loop (returns true) or
2432 // runs a native one inline (returns false, leaving results at cont_slot
2433 // for the loop head to pick up); either way the Meta cont resolves there.
2434 let r = self.begin_call(cont_slot, Some(args.len() as u32), 1, true);
2435 // Native callees never consumed pending_tm (push_frame is only hit on
2436 // a Lua callee); restore so it doesn't leak to a later push_frame.
2437 self.pending_tm = saved_tm;
2438 r?;
2439 Ok(())
2440 }
2441
2442 /// `R[dst] := t[key]` for a VM read opcode, resolving `__index` yieldably.
2443 fn op_index(&mut self, t: Value, key: Value, dst: u32) -> Result<(), LuaError> {
2444 match self.index_step(t, key)? {
2445 MmOut::Done(v) => self.stack[dst as usize] = v,
2446 MmOut::Mm { func, recv } => {
2447 self.begin_meta_call(func, &[recv, key], MetaAction::Store { dst }, "index")?;
2448 }
2449 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
2450 }
2451 Ok(())
2452 }
2453
2454 /// `t[key] := v` for a VM write opcode, resolving `__newindex` yieldably.
2455 fn op_newindex(&mut self, t: Value, key: Value, v: Value) -> Result<(), LuaError> {
2456 match self.newindex_step(t, key, v)? {
2457 MmOut::Done(_) => {}
2458 MmOut::Mm { func, recv } => {
2459 self.begin_meta_call(func, &[recv, key, v], MetaAction::Discard, "newindex")?;
2460 }
2461 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
2462 }
2463 Ok(())
2464 }
2465
2466 /// Apply a comparison opcode's outcome: a known boolean drives the
2467 /// conditional skip directly; a metamethod is called yieldably, its
2468 /// truthiness driving the skip on return.
2469 fn op_compare(
2470 &mut self,
2471 step: MmOut,
2472 l: Value,
2473 r: Value,
2474 k: bool,
2475 tm: &'static str,
2476 ) -> Result<(), LuaError> {
2477 match step {
2478 MmOut::Done(v) => self.cond_skip(v.truthy(), k),
2479 MmOut::Mm { func, .. } => {
2480 self.begin_meta_call(func, &[l, r], MetaAction::Compare { k, negate: false }, tm)?;
2481 }
2482 MmOut::CompareSynth { func } => {
2483 // ≤5.3 `__le` falls back to `not __lt(r, l)`; the swap and
2484 // negation are driven through `MetaAction::Compare` so the
2485 // metamethod call can yield like any other compare.
2486 self.begin_meta_call(func, &[r, l], MetaAction::Compare { k, negate: true }, "lt")?;
2487 }
2488 }
2489 Ok(())
2490 }
2491
2492 /// Complete a VM instruction whose metamethod just returned `result` (PUC
2493 /// `luaV_finishOp`). The running frame is already back on top.
2494 fn finish_meta(&mut self, action: MetaAction, result: Value) -> Result<(), LuaError> {
2495 match action {
2496 MetaAction::Store { dst } => self.stack[dst as usize] = result,
2497 MetaAction::Discard => {}
2498 MetaAction::Compare { k, negate } => {
2499 let t = if negate {
2500 !result.truthy()
2501 } else {
2502 result.truthy()
2503 };
2504 self.cond_skip(t, k);
2505 }
2506 MetaAction::Concat { dst, base_a } => {
2507 self.stack[dst as usize] = result;
2508 self.top = dst + 1;
2509 self.concat_run(base_a)?;
2510 }
2511 }
2512 Ok(())
2513 }
2514
2515 // ---- metatables ----
2516
2517 pub(crate) fn metatable_of(&self, v: Value) -> Option<Gc<Table>> {
2518 match v {
2519 Value::Table(t) => t.metatable(),
2520 Value::Userdata(u) => u.metatable(),
2521 v => type_mt_slot(v).and_then(|i| self.type_mt[i]),
2522 }
2523 }
2524
2525 /// Set the shared metatable for `v`'s basic type (debug.setmetatable on a
2526 /// non-table). No-op for tables (they carry their own).
2527 pub(crate) fn set_type_metatable(&mut self, v: Value, mt: Option<Gc<Table>>) {
2528 if let Some(i) = type_mt_slot(v) {
2529 self.type_mt[i] = mt;
2530 }
2531 }
2532
2533 /// The metamethod of `v` for `mm`, or nil.
2534 pub(crate) fn get_mm(&self, v: Value, mm: Mm) -> Value {
2535 match self.metatable_of(v) {
2536 Some(mt) => mt.get(Value::Str(self.mm_names[mm as usize])),
2537 None => Value::Nil,
2538 }
2539 }
2540
2541 /// PUC 5.1 `get_compTM`: a comparison metamethod (`__eq` / `__lt` / `__le`)
2542 /// only fires when both operands carry a metatable that exposes the same
2543 /// implementation. Returns the metamethod to call, or `Nil` when no
2544 /// compatible match exists. Used to honour events.lua 5.1 :262's rule
2545 /// that `c == d` (where `d` has no metatable) falls back to raw equality.
2546 pub(crate) fn get_comp_mm(&self, l: Value, r: Value, mm: Mm) -> Value {
2547 let mt1 = self.metatable_of(l);
2548 let Some(mt1) = mt1 else { return Value::Nil };
2549 let key = Value::Str(self.mm_names[mm as usize]);
2550 let tm1 = mt1.get(key);
2551 if tm1.is_nil() {
2552 return Value::Nil;
2553 }
2554 let mt2 = self.metatable_of(r);
2555 let Some(mt2) = mt2 else { return Value::Nil };
2556 if mt1.as_ptr() == mt2.as_ptr() {
2557 return tm1;
2558 }
2559 let tm2 = mt2.get(key);
2560 if tm2.is_nil() {
2561 return Value::Nil;
2562 }
2563 if tm1.raw_eq(tm2) {
2564 return tm1;
2565 }
2566 Value::Nil
2567 }
2568
2569 /// PUC `luaT_objtypename`: the type name shown in error messages. A table
2570 /// or full userdata whose metatable carries a string `__name` reports that
2571 /// (e.g. "FILE*", "My Type") instead of the bare "table"/"userdata".
2572 pub(crate) fn obj_typename(&self, v: Value) -> String {
2573 if matches!(v, Value::Table(_) | Value::Userdata(_))
2574 && let Value::Str(s) = self.get_mm(v, Mm::Name)
2575 {
2576 return String::from_utf8_lossy(s.as_bytes()).into_owned();
2577 }
2578 v.type_name().to_string()
2579 }
2580
2581 fn call_at(
2582 &mut self,
2583 func_slot: u32,
2584 nargs: u32,
2585 from_c: bool,
2586 ) -> Result<Vec<Value>, LuaError> {
2587 if self.begin_call(func_slot, Some(nargs), -1, from_c)? {
2588 self.exec()
2589 } else {
2590 // native completed inline; results at func_slot..top
2591 Ok(self.take_results(func_slot))
2592 }
2593 }
2594
2595 /// Switch the `collectgarbage` mode, returning the previous mode name.
2596 pub(crate) fn gc_switch_mode(&mut self, new: &'static str) -> &'static str {
2597 std::mem::replace(&mut self.gc_mode, new)
2598 }
2599
2600 /// Whether the current `collectgarbage` mode is "generational" (where a
2601 /// "step" is a minor collection — a full atomic pass — rather than a paced
2602 /// incremental sweep).
2603 pub(crate) fn gc_mode_is_generational(&self) -> bool {
2604 self.gc_mode == "generational"
2605 }
2606
2607 /// Current `stepsize` pacing parameter (PUC: 0 means an unbounded step that
2608 /// completes a whole cycle at once).
2609 pub(crate) fn gc_stepsize(&self) -> i64 {
2610 self.gc_stepsize
2611 }
2612
2613 /// `collectgarbage("param", name [,value])`: read (or set, returning the
2614 /// previous value of) a pacing parameter. Returns `None` for an unknown
2615 /// name so the caller can raise PUC's `invalid parameter` error. The
2616 /// collector is stop-the-world, so these only round-trip for API fidelity.
2617 pub(crate) fn gc_param(&mut self, name: &[u8], set: Option<i64>) -> Option<i64> {
2618 let slot = match name {
2619 b"pause" => &mut self.gc_pause,
2620 b"stepmul" => &mut self.gc_stepmul,
2621 b"stepsize" => &mut self.gc_stepsize,
2622 _ => return None,
2623 };
2624 let prev = *slot;
2625 if let Some(v) = set {
2626 *slot = v;
2627 }
2628 Some(prev)
2629 }
2630
2631 /// Interpreter safe-point auto-GC: FULL incremental Propagate + adaptive
2632 /// paced sweep via `Vm::gc_step`.
2633 ///
2634 /// Round 1/2 of this attempt SIGABRT'd under coroutine + finalizer stress
2635 /// (suspected missed barrier). Round 3 (STW-mark + paced sweep) hung
2636 /// heavy.lua. With **born-black during Propagate** landed (@92b22b3) the
2637 /// suspected UAF is structurally closed — born objects no longer become
2638 /// dead-white at atomic flip — so Propagate is safe to re-enable here.
2639 ///
2640 /// Adaptive budget scales with heap size: 100M-object heap (heavy.lua's
2641 /// `loadrep` stress) gets a 25M-object budget so a cycle completes in
2642 /// O(SWEEP_DIVISOR) safe-points regardless of size.
2643 #[inline(always)]
2644 pub(crate) fn maybe_collect_garbage(&mut self, live_top: u32) {
2645 if self.gc_finalizing {
2646 return;
2647 }
2648 if !self.heap.gc_due() {
2649 return;
2650 }
2651 // v2.2 UAF-A fix: the historical `gc_top = live_top` narrowed
2652 // past slots that prior bytecode left holding Gc-bearing
2653 // Values (slots are never auto-cleared on frame pop, only
2654 // overwritten). The narrow GC swept the closure, the slot
2655 // kept the stale `Value::Closure`, and a later wider GC
2656 // OOB'd in `Marker::header`. Use `max(live_top, self.top)`
2657 // — `self.top` is the multi-result top maintained across
2658 // calls/returns, so it leads the live frontier closely
2659 // enough to cover stale closure refs without over-rooting
2660 // the whole `Vec` (which broke gc.lua / db.lua weak-table
2661 // semantics).
2662 self.gc_top = live_top.max(self.top);
2663 // PUC stepmul: % of allocation rate. Higher = more GC work per
2664 // safe-point (lower memory, more CPU). Default 100 = `live / 4` per
2665 // step (~4 safe-points per cycle). stepmul=200 → `live / 2`, etc.
2666 const SWEEP_BASE: usize = 400; // 400 / stepmul=100 = divisor 4
2667 const MIN_BUDGET: usize = 64_000;
2668 let stepmul = self.gc_stepmul.max(1) as usize;
2669 let divisor = (SWEEP_BASE / stepmul).max(1);
2670 let budget = (self.heap.live_objects() / divisor).max(MIN_BUDGET);
2671 if self.gc_step(budget) {
2672 self.heap.rearm_gc_pause(self.gc_pause);
2673 }
2674 }
2675
2676 /// Enumerate the GC roots: first-class `Value` roots plus bare-object
2677 /// roots (open upvalues, which are not first-class Values). Shared by the
2678 /// full collector and the incremental-sweep driver so both snapshot the
2679 /// exact same live set.
2680 fn gc_roots(&self) -> (Vec<Value>, Vec<*mut GcHeader>) {
2681 let mut roots: Vec<Value> = Vec::with_capacity(self.stack.len() + 32);
2682 roots.push(Value::Table(self.globals));
2683 for mt in self.type_mt.into_iter().flatten() {
2684 roots.push(Value::Table(mt));
2685 }
2686 for &n in &self.mm_names {
2687 roots.push(Value::Str(n));
2688 }
2689 // root only the running thread's live registers (PUC marks [stack, top)):
2690 // freed temporaries above `gc_top` are excluded so weak values stranded
2691 // there are not pinned. Suspended threads (main_ctx, other coroutines)
2692 // stay whole-rooted below — safe over-rooting, and they are not the
2693 // thread whose weak-table loop is under test.
2694 let live = (self.gc_top as usize).min(self.stack.len());
2695 roots.extend_from_slice(&self.stack[..live]);
2696 for cf in &self.frames {
2697 match cf {
2698 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2699 CallFrame::Cont(NativeCont {
2700 kind: ContKind::Xpcall { handler },
2701 ..
2702 }) => roots.push(*handler),
2703 CallFrame::Cont(NativeCont {
2704 kind: ContKind::Close(cc),
2705 ..
2706 }) => {
2707 // Root the error threaded through this close chain so a
2708 // `collectgarbage()` inside a sibling `__close` handler
2709 // does not free it before the next handler is invoked
2710 // (PUC L->ci->u.l.errfunc / the closing_err shadow).
2711 if let Some(e) = cc.pending {
2712 roots.push(e);
2713 }
2714 if let AfterClose::ResumeUnwind { err, .. } = cc.after {
2715 roots.push(err);
2716 }
2717 }
2718 CallFrame::Cont(_) => {}
2719 }
2720 }
2721 if let Some(e) = self.closing_err {
2722 roots.push(e);
2723 }
2724 // B12 host roots — Lua-facade handles keep their referenced
2725 // values alive across calls/yields. Trace the whole vector;
2726 // unused slots (post-`unpin_all`) carry Value::Nil which the
2727 // GC ignores.
2728 for slot in &self.host_roots {
2729 // v1.3 SR — free-list slots carry Value::Nil (GC no-op).
2730 roots.push(slot.value);
2731 }
2732 // v2.1 — `table.sort` and similar builtins stash their working
2733 // `Vec<Value>` here so a `collectgarbage()` invoked inside the
2734 // comparator callback doesn't free strings/tables snapshotted
2735 // off the live table (sort.lua's `load(..)(); collectgarbage()`
2736 // compare regression).
2737 for buf in &self.sort_scratch {
2738 roots.extend_from_slice(buf);
2739 }
2740 // v2.1 — the running-natives chain holds Gc<NativeClosure>s
2741 // mid-execution. Without rooting them here, a `collectgarbage()`
2742 // invoked inside the running native (sort.lua AA `load(..)();
2743 // collectgarbage()` compare callback regression) sweeps the
2744 // closure that's actively executing, leaving `nc.upvals`
2745 // dangling and the Rust local `nc` pointing at recycled memory
2746 // — the SIGSEGV pops on the very next field access or pop.
2747 for &nc in &self.running_natives {
2748 roots.push(Value::Native(nc));
2749 }
2750 // the running thread's debug hook (suspended threads root theirs via
2751 // Coro::trace / the main_ctx sweep below)
2752 if let Some(h) = self.hook.func {
2753 roots.push(h);
2754 }
2755 // the running coroutine (its saved-context fields live in the VM, but
2756 // the object itself + its resumer chain must stay reachable)
2757 if let Some(co) = self.current {
2758 roots.push(Value::Coro(co));
2759 }
2760 if let Some(mc) = self.main_coro {
2761 roots.push(Value::Coro(mc));
2762 }
2763 // debug.getregistry() and io library state
2764 if let Some(r) = self.registry {
2765 roots.push(Value::Table(r));
2766 }
2767 if let Some(mt) = self.file_mt {
2768 roots.push(Value::Table(mt));
2769 }
2770 if let Some(f) = self.io_input {
2771 roots.push(Value::Userdata(f));
2772 }
2773 if let Some(f) = self.io_output {
2774 roots.push(Value::Userdata(f));
2775 }
2776 // the main thread's saved context while a coroutine runs
2777 if let Some(m) = &self.main_ctx {
2778 roots.extend_from_slice(&m.stack);
2779 if let Some(h) = m.hook.func {
2780 roots.push(h);
2781 }
2782 for cf in &m.frames {
2783 match cf {
2784 CallFrame::Lua(f) => roots.push(Value::Closure(f.closure)),
2785 CallFrame::Cont(NativeCont {
2786 kind: ContKind::Xpcall { handler },
2787 ..
2788 }) => roots.push(*handler),
2789 CallFrame::Cont(_) => {}
2790 }
2791 }
2792 }
2793 let mut extra: Vec<*mut GcHeader> = self
2794 .open_upvals
2795 .iter()
2796 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader)
2797 .collect();
2798 if let Some(m) = &self.main_ctx {
2799 extra.extend(
2800 m.open_upvals
2801 .iter()
2802 .map(|&(_, uv)| uv.as_ptr() as *mut GcHeader),
2803 );
2804 }
2805 (roots, extra)
2806 }
2807
2808 /// Run a full collection with the VM's roots, then run any `__gc`
2809 /// finalizers the collection scheduled. A no-op (returns 0) when already
2810 /// inside a finalizer — the collector is not reentrant (PUC).
2811 pub fn collect_garbage(&mut self) -> usize {
2812 if self.gc_finalizing {
2813 return 0;
2814 }
2815 let (roots, extra) = self.gc_roots();
2816 let freed = self.heap.collect_ex(&roots, &extra);
2817 self.run_finalizers();
2818 freed
2819 }
2820
2821 /// PUC 5.1 `collectgarbage` re-raised the first error a `__gc` finalizer
2822 /// threw; gc.lua's "errors during collection" probe relies on it. This
2823 /// variant runs the same cycle but propagates the captured finalizer
2824 /// error to the explicit caller.
2825 pub(crate) fn collect_garbage_propagating(&mut self) -> Result<usize, LuaError> {
2826 if self.gc_finalizing {
2827 return Ok(0);
2828 }
2829 let (roots, extra) = self.gc_roots();
2830 let freed = self.heap.collect_ex(&roots, &extra);
2831 self.run_finalizers_or_err()?;
2832 Ok(freed)
2833 }
2834
2835 /// Whether a `__gc` finalizer is currently running (so `collectgarbage`
2836 /// should report fail rather than collect).
2837 pub(crate) fn gc_is_finalizing(&self) -> bool {
2838 self.gc_finalizing
2839 }
2840
2841 /// PUC 5.4+ default warnf: emit one piece of a warning message. `to_cont`
2842 /// = true indicates more pieces follow (concatenated until the first
2843 /// `to_cont = false` call flushes the whole line). Mirrors
2844 /// `lauxlib.c::warnfon` + `warnfcont` + `checkcontrol`:
2845 /// * If the buffer is fresh, `to_cont` is false, and the message is
2846 /// `@<word>`, treat as a control message — only `@on` / `@off` are
2847 /// recognised; any other `@…` is silently ignored.
2848 /// * Otherwise, while the state is `Off`, drop the piece; while `On`,
2849 /// accumulate, and flush to stderr + `warn_log` on the
2850 /// non-continuation call.
2851 pub(crate) fn emit_warn(&mut self, msg: &[u8], to_cont: bool) {
2852 if self.warn_buf.is_empty()
2853 && !to_cont
2854 && let Some(b'@') = msg.first().copied()
2855 {
2856 match &msg[1..] {
2857 b"on" => self.warn_state = WarnState::On,
2858 b"off" => self.warn_state = WarnState::Off,
2859 _ => {} // unknown control — silently ignored (PUC checkcontrol)
2860 }
2861 return;
2862 }
2863 if self.warn_state == WarnState::Off {
2864 // drop continuation pieces too — PUC `warnfoff` is the trampoline
2865 return;
2866 }
2867 self.warn_buf.extend_from_slice(msg);
2868 if !to_cont {
2869 let line = std::mem::take(&mut self.warn_buf);
2870 eprintln!("Lua warning: {}", String::from_utf8_lossy(&line));
2871 self.warn_log.push(line);
2872 }
2873 }
2874
2875 /// Drain the in-process warning log (one entry per emitted message, sans
2876 /// `"Lua warning: "` prefix and newline). For test harnesses that want to
2877 /// assert on warn output without scraping stderr.
2878 pub fn warn_log_take(&mut self) -> Vec<Vec<u8>> {
2879 std::mem::take(&mut self.warn_log)
2880 }
2881
2882 /// Arm the cooperative instruction budget (P09 embedding). The run loop
2883 /// decrements this once per dispatch turn; on zero it raises a catchable
2884 /// `"instruction budget exceeded"` error and disarms itself so the host
2885 /// can resume with a fresh budget on the next call. `None` removes the
2886 /// cap. Pass `Some(n)` before `eval`/`call_value` for the embedder's
2887 /// short-script semantics.
2888 pub fn set_instr_budget(&mut self, budget: Option<i64>) {
2889 self.instr_budget = budget;
2890 }
2891
2892 /// Remaining instruction budget (None when unbounded).
2893 pub fn instr_budget_remaining(&self) -> Option<i64> {
2894 self.instr_budget
2895 }
2896
2897 /// Toggle the cranelift JIT (P11). Default `true`. Sandbox embedders
2898 /// **must** disable JIT when relying on `instr_budget` — see the
2899 /// `jit_enabled` field doc for the rationale.
2900 pub fn set_jit_enabled(&mut self, enabled: bool) {
2901 self.jit.enabled = enabled;
2902 }
2903
2904 /// Current JIT enable state.
2905 pub fn jit_enabled(&self) -> bool {
2906 self.jit.enabled
2907 }
2908
2909 /// Toggle the trace JIT (P12). Off by default while the sprint
2910 /// develops. When enabled, hot back-edges are counted on
2911 /// `Proto.trace_hot_count`; once the counter passes
2912 /// `TRACE_HOT_THRESHOLD`, the dispatch loop enters recording
2913 /// mode at the back-edge target. Stays a no-op until S2's
2914 /// trace lowerer and S3's dispatcher land.
2915 pub fn set_trace_jit_enabled(&mut self, enabled: bool) {
2916 self.jit.trace_enabled = enabled;
2917 }
2918
2919 /// P16-A — opt-in flag for the self-link cycle catch. See field
2920 /// docs for the correctness blocker. Default `false`.
2921 pub fn set_p16_self_link_enabled(&mut self, enabled: bool) {
2922 self.jit.p16_self_link_enabled = enabled;
2923 }
2924
2925 /// Current state of the P16-A self-link cycle catch.
2926 pub fn p16_self_link_enabled(&self) -> bool {
2927 self.jit.p16_self_link_enabled
2928 }
2929
2930 /// Current trace-JIT enable state.
2931 pub fn trace_jit_enabled(&self) -> bool {
2932 self.jit.trace_enabled
2933 }
2934
2935 /// Number of traces that have closed cleanly (looped back to the
2936 /// head PC) since this Vm was constructed. Cumulative; used by
2937 /// tests + tuning. Will become the dominant signal once S2's
2938 /// compile + cache lands.
2939 pub fn trace_closed_count(&self) -> u64 {
2940 self.jit.counters.closed
2941 }
2942
2943 /// Number of traces that have aborted (exceeded MAX_TRACE_LEN or
2944 /// hit an un-recordable op — the latter lands at S2).
2945 pub fn trace_aborted_count(&self) -> u64 {
2946 self.jit.counters.aborted
2947 }
2948
2949 /// P13-S13-G v2 — number of compiled traces whose close shape
2950 /// is `TraceEnd::InlineAbort` (depth>0 boundary). Such traces
2951 /// pin `dispatchable=false` because the dispatcher can't
2952 /// resume at a depth>0 PC without the matching CallFrames.
2953 /// S4-step4b's frame-mat helper could synthesise those, but
2954 /// the InlineAbort emit path isn't wired up yet — fresh
2955 /// pickup work for S13-G v2-full.
2956 pub fn trace_inline_abort_count(&self) -> u64 {
2957 self.jit.counters.inline_abort
2958 }
2959
2960 /// P13-S13-G v2.5 — see `JitCounters::dispatch_off_reasons`.
2961 pub fn trace_dispatch_off_reasons(&self) -> &[&'static str] {
2962 &self.jit.counters.dispatch_off_reasons
2963 }
2964
2965 /// P13-S13-G v2.6 — see `JitCounters::compile_failed_reasons`.
2966 pub fn trace_compile_failed_reasons(&self) -> &[&'static str] {
2967 &self.jit.counters.compile_failed_reasons
2968 }
2969
2970 /// P13-S13-H — see `JitCounters::closed_lens`. Returns
2971 /// `(is_call_triggered, ops_len)` for every trace that closed.
2972 pub fn trace_closed_lens(&self) -> &[(bool, usize)] {
2973 &self.jit.counters.closed_lens
2974 }
2975
2976 /// v2.0 Track-R R2 — see [`crate::vm::jit_state::JitCounters::close_cause_counts`].
2977 /// Per-reason close-cause counts (recorder-side abort/discard +
2978 /// lowerer-side dispatch_off labels) keyed by `&'static str`.
2979 pub fn trace_close_cause_counts(&self) -> &std::collections::HashMap<&'static str, u64> {
2980 &self.jit.counters.close_cause_counts
2981 }
2982
2983 /// v2.0 Track-R R3b — number of compiled traces whose
2984 /// `CompiledTrace.downrec_link` is `Some(_)` (lowerer's
2985 /// `downrec_idx_opt` arm emitted the stitch sentinel + caller-pc
2986 /// guard scaffold). R3b regression pin checks `>= 1` on a fib(3)
2987 /// hot loop with p16-on. R3b keeps `dispatchable = false` even
2988 /// when this count bumps; R3d will lift it.
2989 pub fn trace_downrec_link_compiled_count(&self) -> u64 {
2990 self.jit.counters.downrec_link_compiled
2991 }
2992
2993 /// v2.0 Track-R R3c — see
2994 /// [`crate::vm::jit_state::JitCounters::downrec_dispatched`]. Number
2995 /// of times the dispatcher's `is_downrec_sentinel` arm fired and
2996 /// classified the return as a caller-pc-guard HIT.
2997 pub fn trace_downrec_dispatched_count(&self) -> u64 {
2998 self.jit.counters.downrec_dispatched
2999 }
3000
3001 /// v2.0 Track-R R3c — see
3002 /// [`crate::vm::jit_state::JitCounters::downrec_deopt`]. Number of
3003 /// times the dispatcher entered a `downrec_link`-bearing trace and
3004 /// the trace returned via the lowerer's deopt block (caller-pc
3005 /// guard MISS), or the dispatcher itself force-deopted via the
3006 /// stitch-cycle checkpoint.
3007 pub fn trace_downrec_deopt_count(&self) -> u64 {
3008 self.jit.counters.downrec_deopt
3009 }
3010
3011 /// v2.0 Track-R R3d — see
3012 /// [`crate::vm::jit_state::JitCounters::multi_way_guard_emitted`].
3013 /// Number of compiled traces whose lowerer emitted a multi-way
3014 /// caller-pc guard chain (>= 2 distinct `caller_pc` candidates)
3015 /// at the `TraceEnd::DownRec` close + lifted `dispatchable = true`.
3016 pub fn trace_multi_way_guard_emitted_count(&self) -> u64 {
3017 self.jit.counters.multi_way_guard_emitted
3018 }
3019
3020 /// P12-S2.C — number of closed traces the lowerer compiled and
3021 /// parked on `Proto.traces`. Re-records of the same head_pc are
3022 /// deduped (the second close finds the head_pc already cached
3023 /// and skips compile), so this never exceeds `trace_closed_count`.
3024 pub fn trace_compiled_count(&self) -> u64 {
3025 self.jit.counters.compiled
3026 }
3027
3028 /// v2.1 Phase 1I.B — number of times the recorder captured a
3029 /// [`crate::jit::trace_types::FieldIcSnapshot`] under
3030 /// `LUNA_JIT_FIELD_IC=1`. Stays 0 on the env-default path. Used
3031 /// by the Phase 1I.B opt-in fire test to verify the env gate
3032 /// wiring round-trips end-to-end (env -> recorder -> snapshot
3033 /// -> counter -> getter -> assertion).
3034 pub fn trace_field_ic_snapshot_count(&self) -> u64 {
3035 self.jit.counters.field_ic_snapshot_captured
3036 }
3037
3038 /// P12-S2.C — number of closed traces the lowerer rejected
3039 /// (any of the bail conditions in
3040 /// `crate::jit::trace::try_compile_trace`).
3041 pub fn trace_compile_failed_count(&self) -> u64 {
3042 self.jit.counters.compile_failed
3043 }
3044
3045 /// P12-S3 — number of times the dispatcher jumped into a
3046 /// compiled trace. Bumps on every entry; `trace_deopt_count`
3047 /// counts the subset where the trace returned with a parked
3048 /// `jit_pending_err`.
3049 pub fn trace_dispatched_count(&self) -> u64 {
3050 self.jit.counters.dispatched
3051 }
3052
3053 /// P12-S3 — number of trace entries that came back with
3054 /// `jit_pending_err` set (typically a metatable shadowed an
3055 /// index inside a helper, forcing the dispatcher to fall back
3056 /// to the interpreter without committing the trace's result).
3057 pub fn trace_deopt_count(&self) -> u64 {
3058 self.jit.counters.deopt
3059 }
3060
3061 /// P15-A v1 — number of times the dispatcher started a side
3062 /// trace recording (an `exit_hit_counts` slot crossed
3063 /// [`crate::jit::trace::HOTEXIT_THRESHOLD`] while `active_trace`
3064 /// was None and trace JIT was enabled). Each unit is exactly one
3065 /// `start_side_trace` call; the actual compile success counts
3066 /// under [`Self::trace_compiled_count`] like any other trace.
3067 /// Probe use: distinguishes the "side-trace pipeline fired"
3068 /// signal from the "primary back-edge / call-trigger fired"
3069 /// signal so v0-v3 architectural progress is visible without
3070 /// reading per-counter histograms.
3071 pub fn trace_side_trace_started_count(&self) -> u64 {
3072 self.jit.counters.side_trace_started
3073 }
3074
3075 /// P15-A v2-A — number of side-trace recordings that closed,
3076 /// compiled successfully, AND patched their parent's
3077 /// `exit_side_trace_ptrs[exit_idx]`. The parent's IR doesn't
3078 /// dispatch through these ptrs yet (v2-B/C job), but the
3079 /// counter + ptr write proves the compile + link pipeline is
3080 /// complete end-to-end.
3081 pub fn trace_side_trace_compiled_count(&self) -> u64 {
3082 self.jit.counters.side_trace_compiled
3083 }
3084
3085 /// P15-A v2-C-A5-C — number of side traces that compiled
3086 /// successfully but were SHEDDED by the close-handler shape-
3087 /// match gate (`exit_tags_match_entry_tags`). High ratios
3088 /// vs. `trace_side_trace_compiled_count` indicate the
3089 /// architecture is shedding lots of would-be side traces;
3090 /// useful as a tuning probe for future relaxation of the
3091 /// gate or for child-IR re-specialisation against parent's
3092 /// exit shape.
3093 pub fn trace_side_trace_shape_mismatch_count(&self) -> u64 {
3094 self.jit.counters.side_trace_shape_mismatch
3095 }
3096
3097 /// P12-S5-A — sum of NewTable sites the pre-emit escape sweep
3098 /// classified as `crate::jit::trace::EscapeState::Sinkable`
3099 /// across every successfully compiled trace on this Vm. The
3100 /// count is post-demotion: sites pre-emit drops back to Escaped
3101 /// for not meeting v1 sunk-emit criteria are NOT counted.
3102 /// `trace_sunk_alloc_count` matches one-for-one today (every
3103 /// surviving Sinkable site goes through sunk emit).
3104 pub fn trace_sinkable_seen_count(&self) -> u64 {
3105 self.jit.counters.sinkable_seen
3106 }
3107
3108 /// P14-S14-B v1 — see `JitCounters::accum_bufferable_seen`.
3109 pub fn trace_accum_bufferable_seen_count(&self) -> u64 {
3110 self.jit.counters.accum_bufferable_seen
3111 }
3112
3113 /// P15-prep — total dispatch hits across all known traces,
3114 /// broken into hot-exit telemetry (max single-exit count,
3115 /// total dispatches, exit count). Used by probes to identify
3116 /// hot side-exits as side-trace candidates.
3117 ///
3118 /// Walks `cl.proto` AND all nested protos in `cl.proto.protos`
3119 /// recursively, so inner functions' traces are reported.
3120 pub fn trace_exit_hit_summary(
3121 &self,
3122 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3123 ) -> Vec<(u32, Vec<u32>)> {
3124 fn walk(
3125 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3126 out: &mut Vec<(u32, Vec<u32>)>,
3127 ) {
3128 for ct in proto.traces.borrow().iter() {
3129 let counts: Vec<u32> = ct.exit_hit_counts.iter().map(|c| c.get()).collect();
3130 out.push((ct.head_pc, counts));
3131 }
3132 for inner in proto.protos.iter() {
3133 walk(*inner, out);
3134 }
3135 }
3136 let mut out: Vec<(u32, Vec<u32>)> = Vec::new();
3137 walk(cl.proto, &mut out);
3138 out
3139 }
3140
3141 /// P15-A v0 — surface every side-exit slot whose hit count is
3142 /// `>= HOTEXIT_THRESHOLD` across every trace reachable from
3143 /// `cl.proto` (recursively walking `proto.protos`). Returned
3144 /// entries are side-trace candidates: each carries the parent
3145 /// trace's `(head_proto, head_pc)`, the exit's index in the
3146 /// parent's `exit_hit_counts`, and the side trace's natural
3147 /// entry shape (`cont_pc` + `exit_tags`).
3148 ///
3149 /// Layout of `exit_hit_counts` (mirrored by the iter):
3150 /// - `[0..per_exit_inline.len())` → `InlineSideExit` (cont_pc +
3151 /// window-sized exit_tags).
3152 /// - `[per_exit_inline.len()..inline.len() + per_exit_tags.len())`
3153 /// → `per_exit_tags[i]` (per-cont_pc caller-window tags).
3154 /// - Last slot → global clean-tail (cont_pc = `head_pc`,
3155 /// exit_tags = `ct.exit_tags`).
3156 pub fn hot_exit_iter(
3157 &self,
3158 cl: crate::runtime::heap::Gc<crate::runtime::function::LuaClosure>,
3159 ) -> Vec<crate::jit::trace::HotExitInfo> {
3160 use crate::jit::trace::{HOTEXIT_THRESHOLD, HotExitInfo};
3161 fn walk(
3162 proto: crate::runtime::heap::Gc<crate::runtime::function::Proto>,
3163 out: &mut Vec<HotExitInfo>,
3164 ) {
3165 for ct in proto.traces.borrow().iter() {
3166 let inline_n = ct.per_exit_inline.len();
3167 let tags_n = ct.per_exit_tags.len();
3168 debug_assert_eq!(
3169 ct.exit_hit_counts.len(),
3170 inline_n + tags_n + 1,
3171 "exit_hit_counts layout invariant violated"
3172 );
3173 for (idx, cell) in ct.exit_hit_counts.iter().enumerate() {
3174 let hits = cell.get();
3175 if hits < HOTEXIT_THRESHOLD {
3176 continue;
3177 }
3178 let (cont_pc, exit_tags) = if idx < inline_n {
3179 let ent = &ct.per_exit_inline[idx];
3180 (ent.cont_pc, ent.exit_tags.clone())
3181 } else if idx < inline_n + tags_n {
3182 let (pc, tags) = &ct.per_exit_tags[idx - inline_n];
3183 (*pc, tags.clone())
3184 } else {
3185 (ct.head_pc, ct.exit_tags.clone())
3186 };
3187 out.push(HotExitInfo {
3188 head_proto: proto,
3189 head_pc: ct.head_pc,
3190 exit_idx: idx,
3191 hits,
3192 cont_pc,
3193 exit_tags,
3194 });
3195 }
3196 }
3197 for inner in proto.protos.iter() {
3198 walk(*inner, out);
3199 }
3200 }
3201 let mut out: Vec<HotExitInfo> = Vec::new();
3202 walk(cl.proto, &mut out);
3203 out
3204 }
3205
3206 /// P12-S5-B — sum of NewTable sites that actually took the
3207 /// sunk-emit path across every successfully compiled trace on
3208 /// this Vm. Each counted site skips its heap `Gc<Table>`
3209 /// allocation per dispatch; the array part lives as Cranelift
3210 /// `Variable`s for the duration of the trace.
3211 pub fn trace_sunk_alloc_count(&self) -> u64 {
3212 self.jit.counters.sunk_alloc
3213 }
3214
3215 /// P12-S5-C — sum of materialise-helper emit sites across every
3216 /// successfully compiled trace on this Vm. Each unit is a
3217 /// (site × cmp side-exit) pair whose IR reconstructs a heap
3218 /// `Gc<Table>` from the virt slots on deopt — proves S5-C
3219 /// emit is wiring materialise into the right side-exits.
3220 pub fn trace_materialize_emit_count(&self) -> u64 {
3221 self.jit.counters.materialize_emit
3222 }
3223
3224 /// P12-S7-A diagnostic — total `Op::Closure` ops the trace JIT
3225 /// lowered to the `luna_jit_op_closure` helper. Each emitted op
3226 /// replaces a `Heap::new_closure_inline` call on the dispatch
3227 /// path; the count is static (one per matching op per compiled
3228 /// trace), summed at compile success.
3229 pub fn trace_closure_emit_count(&self) -> u64 {
3230 self.jit.counters.closure_emit
3231 }
3232
3233 /// v2.0 Stage 7 polish 6 fire experiment — see
3234 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_compiled`].
3235 /// Number of compiled traces whose `per_exit_inline.len() > 0`
3236 /// (depth>0 inlined cmp side-exits emitted).
3237 pub fn trace_per_exit_inline_compiled_count(&self) -> u64 {
3238 self.jit.counters.per_exit_inline_compiled
3239 }
3240
3241 /// v2.0 Stage 7 polish 6 fire experiment — see
3242 /// [`crate::vm::jit_state::JitCounters::per_exit_inline_dispatchable`].
3243 /// Number of compiled traces with `per_exit_inline.len() > 0` AND
3244 /// `dispatchable == true` — i.e. the count of compiled traces
3245 /// that would actually exercise the AOT polish 6 chain-reloc +
3246 /// deploy-resolver path.
3247 pub fn trace_per_exit_inline_dispatchable_count(&self) -> u64 {
3248 self.jit.counters.per_exit_inline_dispatchable
3249 }
3250
3251 /// P12-S4-step1 diagnostic — max `inline_depth` ever seen on any
3252 /// `RecordedOp` pushed by the recorder. Tells tests + tuning
3253 /// whether a self-recursive function actually walked the depth
3254 /// tracker past 0. Saturates at `MAX_INLINE_DEPTH`. Persists
3255 /// across traces and Vm activations; reset only on `Vm::new`.
3256 pub fn trace_max_depth_seen(&self) -> u8 {
3257 self.jit.max_depth_seen
3258 }
3259
3260 /// P12-S4-step4b — last live Lua frame (the trace head's frame at
3261 /// dispatch time). The frame-materialization helper reads `.base`
3262 /// to compute offsets for each inlined frame's window.
3263 #[doc(hidden)]
3264 pub fn jit_last_lua_frame(&self) -> Option<Frame> {
3265 match self.frames.last() {
3266 Some(CallFrame::Lua(f)) => Some(*f),
3267 _ => None,
3268 }
3269 }
3270
3271 /// v2.0 Track TL Phase 2 — read-only borrow of the current call
3272 /// stack, for the [`crate::vm::inspect`] pure-read accessors used
3273 /// by `luna-tools` (`luna-profile`'s sampler walks this from
3274 /// inside a `Count` hook). Sibling-module scope: not part of the
3275 /// public embedder surface, but `inspect::frames_for_profile` is.
3276 #[doc(hidden)]
3277 pub(super) fn inspect_frames(&self) -> &[CallFrame] {
3278 &self.frames
3279 }
3280
3281 /// P12-S4-step4b — ensure the value stack covers indices
3282 /// `[0..need)`. Extends with Nil if shorter. Called by the
3283 /// frame-materialization helper before pushing an inlined frame
3284 /// whose register window may exceed the current stack length.
3285 #[doc(hidden)]
3286 pub fn jit_ensure_stack(&mut self, need: usize) {
3287 if self.stack.len() < need {
3288 self.stack.resize(need, Value::Nil);
3289 }
3290 }
3291
3292 /// P12-S7-C — trace JIT path for `Op::Close A`. Predicts whether
3293 /// `__close` handlers would run (any active tbc slot ≥ from
3294 /// holding a non-nil/false Value); if so, parks a deopt sentinel
3295 /// in `jit_pending_err` and returns 1 (helper-side bool) so the
3296 /// IR branches to the deopt block. Otherwise performs the safe
3297 /// part of close — `close_from(from)` to close open upvals +
3298 /// drop any drained tbc entries ≥ from — and returns 0.
3299 ///
3300 /// Returns are i64-shaped so the cranelift import sig stays
3301 /// trivial (i64 → i64 mapping).
3302 #[doc(hidden)]
3303 pub fn jit_op_close(&mut self, start_offset: u32) -> i64 {
3304 if self.jit.pending_err.is_some() {
3305 return 1;
3306 }
3307 let Some(f) = self.jit_last_lua_frame() else {
3308 self.jit.pending_err = Some(self.rt_err("JIT op_close: no Lua frame"));
3309 return 1;
3310 };
3311 let from = f.base + start_offset;
3312 let has_handler = self.tbc.iter().any(|&s| {
3313 s >= from && {
3314 let v = self.stack[s as usize];
3315 !matches!(v, Value::Nil | Value::Bool(false))
3316 }
3317 });
3318 if has_handler {
3319 self.jit.pending_err =
3320 Some(self.rt_err("JIT deopt: Op::Close with active tbc handler"));
3321 return 1;
3322 }
3323 self.close_from(from);
3324 // Drain any tbc entries ≥ from (they're nil/false stubs the
3325 // interpreter's drive_close would have skipped silently).
3326 while let Some(&s) = self.tbc.last() {
3327 if s < from {
3328 break;
3329 }
3330 self.tbc.pop();
3331 }
3332 0
3333 }
3334
3335 /// P12-S7-B — spill the trace's current value for a register to
3336 /// the underlying `vm.stack[base + slot_offset]`. Required before
3337 /// an `Op::Closure` whose inner proto has an `in_stack: true`
3338 /// upval at `slot_offset` — the helper's `find_or_create_upval`
3339 /// captures a live pointer to `vm.stack[base + slot_offset]`,
3340 /// which must hold the right value at call time (trace IR's
3341 /// Variable hasn't yet been written back).
3342 ///
3343 /// Parameters arrive as i64 from the IR: `slot_offset` is the
3344 /// caller-frame register index (`u32` in practice, depth=0
3345 /// only — S7-B doesn't support depth>0 Closure); `tag` is the
3346 /// `crate::runtime::value::raw` byte for the slot's RegKind;
3347 /// `raw_bits` is the trace Variable's `use_var` payload
3348 /// (i64-shaped — Float is its bit-pattern, Table/Closure is the
3349 /// raw `Gc::as_ptr` cast).
3350 #[doc(hidden)]
3351 pub fn jit_spill_stack(&mut self, slot_offset: u32, tag: u8, raw_bits: u64) {
3352 let Some(f) = self.jit_last_lua_frame() else {
3353 self.jit.pending_err =
3354 Some(self.rt_err("JIT spill: no Lua frame on jit_last_lua_frame()"));
3355 return;
3356 };
3357 let idx = (f.base as usize) + (slot_offset as usize);
3358 if self.stack.len() <= idx {
3359 self.stack.resize(idx + 1, Value::Nil);
3360 }
3361 // SAFETY: caller (trace JIT IR emit) provides matching
3362 // `(tag, raw_bits)` — same shape produced by Value::unpack.
3363 let v = unsafe {
3364 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3365 };
3366 self.stack[idx] = v;
3367 }
3368
3369 /// P12-S12-B-v2 — trace JIT path for `Op::TForCall A 0 C`.
3370 /// Mirrors the interp arm (this file ~L5316): copies the
3371 /// generator/state/control triple from `R[A..=A+2]` to
3372 /// `R[A+4..=A+6]` (resizing the stack if needed), then enters
3373 /// the iterator function via `begin_call`. v2 only handles
3374 /// `Value::Native` iterators (the canonical `ipairs_iter` /
3375 /// `next` builtins) — a Lua-closure iterator would push a Lua
3376 /// frame mid-trace, breaking `recording_frame_base`, so we
3377 /// deopt by parking a `pending_err` and returning `-1`.
3378 ///
3379 /// `slot_offset` is the caller-frame register index (=
3380 /// `inst.a()` decoded from a u32-wide field). `nvars` is
3381 /// `inst.c() as i32` — the caller's expected return count.
3382 /// P12-S12-C v1 — refresh only the raw payload of
3383 /// `vm.stack[base + slot_offset]`, preserving its existing
3384 /// `Value` tag. The caller (trace JIT Op::Concat body emit)
3385 /// uses this when the slot's `RegKind` is `Unset` (no compile-
3386 /// time tag info; commonly `Str` slots which the trace doesn't
3387 /// model). The interp's previous execution of the same op
3388 /// already populated the slot with the right tag — the trace
3389 /// only needs to swap in its current raw value.
3390 #[doc(hidden)]
3391 pub fn jit_stack_update_raw(&mut self, slot_offset: u32, raw_bits: u64) {
3392 let Some(f) = self.jit_last_lua_frame() else {
3393 return;
3394 };
3395 let idx = (f.base as usize) + (slot_offset as usize);
3396 if idx >= self.stack.len() {
3397 return;
3398 }
3399 let (tag, _) = self.stack[idx].unpack();
3400 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3401 self.stack[idx] = unsafe {
3402 crate::runtime::Value::pack(tag, crate::runtime::value::RawVal { zero: raw_bits })
3403 };
3404 }
3405
3406 /// P12-S12-C v1 — trace JIT path for `Op::Concat A B`.
3407 ///
3408 /// Mirrors the interp arm (this file ~L5112): `self.top =
3409 /// base + a + n; concat_run(base + a)`. Result lands at
3410 /// `vm.stack[base + a]`. Returns `0` on success, `-1` on
3411 /// deopt (any error from `concat_run` OR detection that the
3412 /// metamethod path was taken — `concat_run` returns `Ok(())`
3413 /// after `begin_meta_call` which has pushed a Lua frame the
3414 /// trace can't safely continue past).
3415 ///
3416 /// The frame-push detection uses `pre/post frames.len()` and
3417 /// unwinds any pushed frames before deopting, so the
3418 /// dispatcher's existing deopt path sees a clean stack.
3419 #[doc(hidden)]
3420 pub fn jit_op_concat(&mut self, slot_offset: u32, n: i32) -> i64 {
3421 if self.jit.pending_err.is_some() {
3422 return -1;
3423 }
3424 let Some(f) = self.jit_last_lua_frame() else {
3425 self.jit.pending_err = Some(self.rt_err("JIT Concat: no Lua frame"));
3426 return -1;
3427 };
3428 let abs_a = f.base + slot_offset;
3429 self.top = abs_a + n as u32;
3430 let pre_frames = self.frames.len();
3431 let result = self.concat_run(abs_a);
3432 let post_frames = self.frames.len();
3433 // Frame-push = metamethod path taken (begin_meta_call pushed
3434 // a Lua frame). The trace can't continue past it; unwind +
3435 // deopt so interp redoes Op::Concat in the slow path.
3436 while self.frames.len() > pre_frames {
3437 frames_pop_sync(&mut self.frames, &mut self.frames_top);
3438 }
3439 if let Err(e) = result {
3440 self.jit.pending_err = Some(e);
3441 return -1;
3442 }
3443 if post_frames > pre_frames {
3444 self.jit.pending_err = Some(self.rt_err("JIT Concat: __concat metamethod path"));
3445 return -1;
3446 }
3447 0
3448 }
3449
3450 /// P14-S14-B v2 — pop a reusable `Vec<u8>` from the JIT
3451 /// accumulator buffer pool, returning a raw pointer. The trace
3452 /// fn's IR holds this pointer in a stack slot through the loop
3453 /// and calls `jit_str_buf_extend` per iter. If the pool is
3454 /// empty, allocate fresh.
3455 ///
3456 /// Safety: the returned pointer is valid until
3457 /// `jit_str_buf_release` is called or the Vm is dropped. The
3458 /// caller MUST not retain it across `enter_jit` boundaries.
3459 #[doc(hidden)]
3460 pub fn jit_str_buf_acquire(&mut self) -> *mut Vec<u8> {
3461 let buf = self.jit.str_buf_pool.pop().unwrap_or_default();
3462 // Move into a Box so the pointer is stable until release.
3463 Box::into_raw(Box::new(buf))
3464 }
3465
3466 /// P14-S14-B v2 — return a previously-acquired buffer to the
3467 /// pool, dropping any excess past `jit_str_buf_pool_cap`. The
3468 /// buffer is `clear`ed (capacity retained) so the next acquire
3469 /// gets a ready-to-extend Vec.
3470 ///
3471 /// Safety: `buf` must have been returned by a prior
3472 /// `jit_str_buf_acquire` on the same Vm.
3473 #[doc(hidden)]
3474 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` round-trips through `Box::into_raw`; SAFETY documented below.
3475 pub fn jit_str_buf_release(&mut self, buf: *mut Vec<u8>) {
3476 if buf.is_null() {
3477 return;
3478 }
3479 // SAFETY: `ptr` round-trips through `Box::into_raw` set up earlier in this dispatch (or owned by a long-lived VM handle); ownership re-acquired here.
3480 let mut owned = unsafe { Box::from_raw(buf) };
3481 owned.clear();
3482 if self.jit.str_buf_pool.len() < self.jit.str_buf_pool_cap {
3483 self.jit.str_buf_pool.push(*owned);
3484 }
3485 // Else: drop the buffer.
3486 }
3487
3488 /// P14-S14-B v2 — append a LuaStr's bytes to the accumulator
3489 /// buffer. The trace IR computes the `str_ptr` (= raw bits of
3490 /// the piece slot) and passes it through; we treat it as a
3491 /// `*mut LuaStr` and append its bytes.
3492 ///
3493 /// Returns 0 on success, -1 if the piece isn't a Str (would
3494 /// trip __concat metamethod path → deopt to interp).
3495 ///
3496 /// Safety: `buf` from prior `acquire`; `str_ptr` from the
3497 /// trace's piece slot raw bits.
3498 #[doc(hidden)]
3499 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; `str_ptr` from trace piece slot; SAFETY documented below.
3500 pub fn jit_str_buf_extend(&mut self, buf: *mut Vec<u8>, str_ptr: i64) -> i64 {
3501 if buf.is_null() || str_ptr == 0 {
3502 return -1;
3503 }
3504 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3505 let buf = unsafe { &mut *buf };
3506 let lua_str_ptr = str_ptr as *const crate::runtime::string::LuaStr;
3507 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3508 let bytes = unsafe { crate::runtime::string::bytes_of(lua_str_ptr) };
3509 buf.extend_from_slice(bytes);
3510 0
3511 }
3512
3513 /// P14-S14-B v2 — drain the accumulator buffer into a fresh
3514 /// `LuaStr` via `heap.intern`, returning the raw ptr bits for
3515 /// the trace to write into the accumulator slot.
3516 ///
3517 /// Returns the LuaStr ptr as i64 on success, 0 on overflow
3518 /// (the v2 hard cap; the trace deopts).
3519 ///
3520 /// Safety: `buf` from prior `acquire`. The buffer is left
3521 /// CLEAR (drained) ready for `release`.
3522 #[doc(hidden)]
3523 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `buf` from prior `acquire`; SAFETY documented below.
3524 pub fn jit_str_buf_intern(&mut self, buf: *mut Vec<u8>) -> i64 {
3525 if buf.is_null() {
3526 return 0;
3527 }
3528 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3529 let buf = unsafe { &mut *buf };
3530 let bytes = std::mem::take(buf);
3531 // v2 hard cap at 256KB per RFC Q3.
3532 if bytes.len() > 256 * 1024 {
3533 return 0;
3534 }
3535 let gc = self.heap.intern(&bytes);
3536 gc.as_ptr() as i64
3537 }
3538
3539 /// P12-S12-B v2/v3/v4 — trace JIT helper for `Op::TForCall A 0 C`.
3540 ///
3541 /// v2 base: copy R[A..=A+2] → R[A+4..=A+6] + `begin_call`.
3542 /// v3: ipairs `inext` fast path at the top — skip begin_call
3543 /// when R[A]=Native(ipairs_iter), R[A+1]=Table no-mt,
3544 /// R[A+2]=Int.
3545 /// v4: batched out-ptr writeback — fill ctrl/key/val raws into
3546 /// caller-provided buffers + return R[A+4]'s tag byte. Lets
3547 /// emit skip 3 separate `luna_jit_stack_load` calls and 1
3548 /// `luna_jit_stack_tag` call by reading the buffer via
3549 /// cranelift `stack_load` IR instead. Returns -1 on deopt.
3550 #[doc(hidden)]
3551 #[allow(clippy::not_unsafe_ptr_arg_deref)] // JIT helper: `ctrl_out`/`key_out`/`val_out` are caller-stack buffers from Cranelift-emitted prologue; SAFETY documented below.
3552 pub fn jit_op_tforcall(
3553 &mut self,
3554 slot_offset: u32,
3555 nvars: i32,
3556 ctrl_out: *mut i64,
3557 key_out: *mut i64,
3558 val_out: *mut i64,
3559 ) -> i64 {
3560 if self.jit.pending_err.is_some() {
3561 return -1;
3562 }
3563 let Some(f) = self.jit_last_lua_frame() else {
3564 self.jit.pending_err = Some(self.rt_err("JIT TForCall: no Lua frame"));
3565 return -1;
3566 };
3567 let abs = f.base + slot_offset;
3568 let need = (abs + 7) as usize;
3569 if self.stack.len() < need {
3570 self.stack.resize(need, Value::Nil);
3571 }
3572 // v3 fast path.
3573 let took_fast_path = if let Value::Native(n) = self.stack[abs as usize]
3574 && std::ptr::fn_addr_eq(
3575 n.f,
3576 crate::vm::builtins::ipairs_iter as crate::runtime::value::NativeFn,
3577 )
3578 && let Value::Table(t) = self.stack[(abs + 1) as usize]
3579 && t.metatable().is_none()
3580 && let Value::Int(i) = self.stack[(abs + 2) as usize]
3581 {
3582 let next_i = i.wrapping_add(1);
3583 let v = t.get_int(next_i);
3584 if v.is_nil() {
3585 self.stack[(abs + 4) as usize] = Value::Nil;
3586 } else {
3587 self.stack[(abs + 4) as usize] = Value::Int(next_i);
3588 if (nvars as usize) >= 2 {
3589 self.stack[(abs + 5) as usize] = v;
3590 }
3591 for j in 2..nvars as usize {
3592 let slot = abs + 4 + j as u32;
3593 if (slot as usize) < self.stack.len() {
3594 self.stack[slot as usize] = Value::Nil;
3595 }
3596 }
3597 }
3598 true
3599 } else {
3600 false
3601 };
3602 if !took_fast_path {
3603 // v2 slow path: copy R[A..=A+2] → R[A+4..=A+6], then
3604 // route through begin_call. Lua-closure iters would push
3605 // a Lua frame mid-trace → deopt.
3606 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
3607 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
3608 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
3609 if !matches!(self.stack[abs as usize], Value::Native(_)) {
3610 self.jit.pending_err = Some(self.rt_err("JIT TForCall: non-Native iter (v2 only)"));
3611 return -1;
3612 }
3613 if let Err(e) = self.begin_call(abs + 4, Some(2), nvars, false) {
3614 self.jit.pending_err = Some(e);
3615 return -1;
3616 }
3617 }
3618 // v4 batched writeback — fill the caller's buffers with the
3619 // raw bits of R[A+2] / R[A+4] / R[A+5] so the trace IR can
3620 // reload via cranelift `stack_load` instead of separate
3621 // `luna_jit_stack_load` helper calls.
3622 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3623 let ctrl_raw = unsafe { self.stack[(abs + 2) as usize].unpack().1.zero };
3624 let (key_tag, key_rv) = self.stack[(abs + 4) as usize].unpack();
3625 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3626 let key_raw = unsafe { key_rv.zero };
3627 let val_raw = if (nvars as usize) >= 2 {
3628 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3629 unsafe { self.stack[(abs + 5) as usize].unpack().1.zero }
3630 } else {
3631 0u64
3632 };
3633 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3634 unsafe {
3635 ctrl_out.write(ctrl_raw as i64);
3636 key_out.write(key_raw as i64);
3637 val_out.write(val_raw as i64);
3638 }
3639 key_tag as i64
3640 }
3641
3642 /// P12-S12-B-v2 — load the raw `i64` payload of
3643 /// `vm.stack[base + slot_offset]` for the active trace's head
3644 /// Lua frame. Used to reload trace IR `Variable`s after a
3645 /// helper has written to `vm.stack` directly (e.g. TForCall's
3646 /// iter results land at `R[A+4..A+4+nvars]`).
3647 #[doc(hidden)]
3648 pub fn jit_stack_load(&mut self, slot_offset: u32) -> i64 {
3649 let Some(f) = self.jit_last_lua_frame() else {
3650 return 0;
3651 };
3652 let idx = (f.base as usize) + (slot_offset as usize);
3653 if idx >= self.stack.len() {
3654 return 0;
3655 }
3656 let v = self.stack[idx];
3657 let (_, raw) = v.unpack();
3658 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3659 unsafe { raw.zero as i64 }
3660 }
3661
3662 /// P12-S12-B-v2 — read the tag byte of
3663 /// `vm.stack[base + slot_offset]`. Used by `Op::TForLoop` emit
3664 /// to dispatch on the iterator's return-key tag at runtime
3665 /// (`raw::NIL` → loop end exit, `raw::INT` → continue, other →
3666 /// deopt for v2).
3667 #[doc(hidden)]
3668 pub fn jit_stack_tag(&mut self, slot_offset: u32) -> u8 {
3669 let Some(f) = self.jit_last_lua_frame() else {
3670 return crate::runtime::value::raw::NIL;
3671 };
3672 let idx = (f.base as usize) + (slot_offset as usize);
3673 if idx >= self.stack.len() {
3674 return crate::runtime::value::raw::NIL;
3675 }
3676 self.stack[idx].unpack().0
3677 }
3678
3679 /// P12-S4-step4b — push a Lua frame onto the call stack with
3680 /// JIT-known metadata. Used by `luna_jit_trace_materialize_frames`
3681 /// at trace side-exits to recreate the inlined call activations
3682 /// the lowerer compiled past. The contract (enforced by the
3683 /// lowerer's pre-emit pass): `cl.proto` is non-vararg,
3684 /// `nresults` is the caller's expected count (today always 1
3685 /// because the lowerer bails Op::Call C != 2), and the caller
3686 /// has already called `jit_ensure_stack` to cover
3687 /// `[0..base + cl.proto.max_stack)`.
3688 #[doc(hidden)]
3689 pub fn jit_push_inlined_frame(
3690 &mut self,
3691 cl: Gc<LuaClosure>,
3692 base: u32,
3693 pc: u32,
3694 nresults: i32,
3695 ) {
3696 frames_push_sync(
3697 &mut self.frames,
3698 &mut self.frames_top,
3699 CallFrame::Lua(Frame {
3700 closure: cl,
3701 base,
3702 pc,
3703 // Lua call ABI: callee R[0] sits at caller R[A+1], so
3704 // callee.base = caller.base + A + 1; func_slot is
3705 // caller.base + A = callee.base - 1.
3706 func_slot: base - 1,
3707 n_varargs: 0,
3708 nresults,
3709 hook_oldpc: u32::MAX,
3710 from_c: false,
3711 tm: None,
3712 is_hook: false,
3713 tailcalls: 0,
3714 }),
3715 );
3716 }
3717
3718 /// Toggle precompiled-chunk loading. Default `true`. Sandbox embedders
3719 /// should set to `false` so `load`/`loadstring` reject bytecode input
3720 /// (which bypasses parser limits and could exploit verifier gaps).
3721 pub fn set_bytecode_loading(&mut self, enabled: bool) {
3722 self.bytecode_loading = enabled;
3723 }
3724
3725 /// Current bytecode-loading gate state.
3726 pub fn bytecode_loading(&self) -> bool {
3727 self.bytecode_loading
3728 }
3729
3730 /// Toggle PUC `.luac` bytecode loading. Default `false` — PUC
3731 /// bytecode is a strictly larger trust surface than luna's own dump
3732 /// format (third-party toolchain bugs, malformed chunks, unknown
3733 /// opcode shapes). Enable only for trusted PUC chunks. Per-dialect
3734 /// translators (Phase LB Wave 2) live in `crate::vm::dump::puc`.
3735 pub fn set_puc_bytecode_loading(&mut self, enabled: bool) {
3736 self.puc_bytecode_loading = enabled;
3737 }
3738
3739 /// Current PUC bytecode-loading gate state.
3740 pub fn puc_bytecode_loading(&self) -> bool {
3741 self.puc_bytecode_loading
3742 }
3743
3744 /// Default loader input budget — 256 MiB.
3745 ///
3746 /// `Vm::load` and the Lua-level `load(reader, ...)` both refuse
3747 /// sources whose byte length crosses this cap, returning the
3748 /// PUC-shaped `not enough memory` error rather than letting the
3749 /// host allocator try (and crash) to hold the next chunk.
3750 pub const DEFAULT_LOADER_INPUT_BUDGET: usize = 256 * 1024 * 1024;
3751
3752 /// Set the loader input byte budget (see
3753 /// [`Vm::DEFAULT_LOADER_INPUT_BUDGET`]). Pass `usize::MAX` to
3754 /// effectively disable. Smaller caps are honored verbatim — a 0
3755 /// cap rejects every non-empty source.
3756 pub fn set_loader_input_budget(&mut self, bytes: usize) {
3757 self.loader_input_budget = bytes;
3758 }
3759
3760 /// Current loader input byte budget.
3761 pub fn loader_input_budget(&self) -> usize {
3762 self.loader_input_budget
3763 }
3764
3765 /// Take the error traceback captured at the latest error point and
3766 /// reset it. Embedders should call this immediately after a failed
3767 /// `call_value`/`eval`/`call`/etc. — the next public `call_value`
3768 /// entry clears it. Returns `None` if no error was in flight.
3769 pub fn take_error_traceback(&mut self) -> Option<String> {
3770 self.error_traceback
3771 .take()
3772 .map(|b| String::from_utf8_lossy(&b).into_owned())
3773 }
3774
3775 /// Arm the soft memory cap (P09 embedding). The run loop checks the
3776 /// heap's tracked byte usage between dispatch turns; on overshoot it
3777 /// first runs a full collect, and if `bytes` still exceeds the cap it
3778 /// raises a catchable `"memory cap exceeded"` Lua error and disarms
3779 /// itself (fire-once: re-arm before the next `call_value` if reusing
3780 /// the Vm across requests). `None` removes the cap. The accounting is
3781 /// approximate — internal Vec/Box capacity overhead is not tracked,
3782 /// so embedders should size the cap with ~2× margin over the desired
3783 /// hard limit and additionally bound the Vm's lifetime (drop after
3784 /// each request).
3785 pub fn set_memory_cap(&mut self, cap: Option<usize>) {
3786 self.heap.mem_cap = cap;
3787 }
3788
3789 /// Approximate bytes the heap is currently holding. Object shells plus
3790 /// every table's internal array/hash boxes (tracked via
3791 /// `Heap::apply_bytes_delta` in `set`/`rehash`/`ensure_*`). Proto
3792 /// bytecode and closure upvalue slices still go uncounted — this is a
3793 /// lower bound, not a precise `malloc_stats`-style total.
3794 pub fn memory_used(&self) -> usize {
3795 self.heap.bytes()
3796 }
3797
3798 /// Read upvalue slot `i` of the native function currently on top of the
3799 /// dispatch chain (the one whose body is executing). Returns `Value::Nil`
3800 /// when no native is running. Public so the C ABI trampoline can fetch
3801 /// the host C function pointer it stashed there at registration time.
3802 pub fn running_native_upvalue(&self, i: usize) -> Value {
3803 match self.running_natives.last() {
3804 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
3805 Some(nc) => unsafe {
3806 let upvals = &(*nc.as_ptr()).upvals;
3807 upvals.get(i).copied().unwrap_or(Value::Nil)
3808 },
3809 None => Value::Nil,
3810 }
3811 }
3812
3813 /// Register a table for finalization if its (just-set) metatable carries a
3814 /// `__gc` metamethod (PUC luaC_checkfinalizer at setmetatable time — adding
3815 /// `__gc` to the metatable afterwards does not retroactively register).
3816 pub(crate) fn check_finalizer(&mut self, t: Gc<Table>) {
3817 if !self.get_mm(Value::Table(t), Mm::Gc).is_nil() {
3818 self.heap.register_finalizable(t);
3819 }
3820 }
3821
3822 /// Same as [`Self::check_finalizer`] for a userdata. PUC 5.1 attaches the
3823 /// finalizer to the proxy produced by `newproxy(true)` once its metatable
3824 /// gains `__gc`. gc.lua's "testing userdata" section sets `__gc` on the
3825 /// metatable that `newproxy` returned, which then needs to flow through.
3826 /// Kept available for the future 5.2+ `lua_setmetatable` path (which
3827 /// would re-check at metatable-set time); luna's only userdata
3828 /// finalizables today come via `newproxy`, which registers itself.
3829 #[allow(dead_code)]
3830 pub(crate) fn check_finalizer_userdata(&mut self, u: Gc<crate::runtime::Userdata>) {
3831 if !self.get_mm(Value::Userdata(u), Mm::Gc).is_nil() {
3832 self.heap.register_finalizable_userdata(u);
3833 }
3834 }
3835
3836 /// Run pending `__gc` finalizers (objects the collector resurrected for
3837 /// finalization). Finalizer errors are swallowed — PUC turns them into a
3838 /// warning; they must never propagate to the mutator. Reentrancy-guarded.
3839 fn run_finalizers(&mut self) {
3840 let _ = self.run_finalizers_or_err();
3841 }
3842
3843 fn run_finalizers_or_err(&mut self) -> Result<(), LuaError> {
3844 if self.gc_finalizing {
3845 return Ok(());
3846 }
3847 let pending = self.heap.take_tobefnz();
3848 if pending.is_empty() {
3849 return Ok(());
3850 }
3851 self.gc_finalizing = true;
3852 let mut first_err: Option<LuaError> = None;
3853 for obj in pending {
3854 let gc = self.get_mm(obj, Mm::Gc);
3855 // PUC 5.2+ accepts any non-nil `__gc` at setmetatable time to
3856 // schedule the object for finalization (`__gc = true` is the
3857 // canonical placeholder); only call it at finalize time when it
3858 // is actually a function. gc.lua 5.2 :412 wires up exactly this
3859 // sentinel and then expects no call.
3860 let callable = matches!(gc, Value::Closure(_) | Value::Native(_));
3861 if callable {
3862 // PUC `GCTM` sets `CIST_FIN` on the new ci so
3863 // `funcnamefromfinalizer` reports `namewhat = "metamethod"`,
3864 // `name = "__gc"`. luna threads the same outcome through the
3865 // generic `pending_tm` slot: the Lua frame born from this
3866 // call consumes it in `push_frame`. Saved/restored around the
3867 // call in case the handler is a native (which never pops it).
3868 // Bare event name; `frame_name` / `c_frame_name` add the
3869 // `"__"` debug prefix for 5.2/5.3, drop it for 5.4+. Matches
3870 // the convention used by `__close`, `__index`, …
3871 let saved_tm = self.pending_tm.replace("gc");
3872 // PUC `GCTM` also sets `CIST_FIN` on the CALLER's ci before
3873 // pcall, so `getinfo(2).namewhat` inside the finalizer reads
3874 // "metamethod" (5.3 db.lua :720 wires up exactly this probe).
3875 // luna mirrors by temporarily tagging the current top Lua
3876 // frame's `tm` to "__gc" for the duration of the call.
3877 let caller_tm_idx = self
3878 .frames
3879 .iter()
3880 .rposition(|cf| matches!(cf, CallFrame::Lua(_)));
3881 let saved_caller_tm = caller_tm_idx.and_then(|i| {
3882 if let CallFrame::Lua(fr) = &mut self.frames[i] {
3883 let prev = fr.tm;
3884 fr.tm = Some("gc");
3885 Some(prev)
3886 } else {
3887 None
3888 }
3889 });
3890 if let Err(e) = self.call_value(gc, &[obj]) {
3891 // PUC 5.1 GCTM raised the finalizer's error to the
3892 // explicit `collectgarbage()` caller (`gc.lua 5.1 :255`
3893 // baselines on `not pcall(collectgarbage)`). 5.2/5.3
3894 // wrapped it in `error in __gc metamethod (msg)` first
3895 // (`callGCTM` → `luaG_runerror`) but still raised. 5.4
3896 // introduced the warning system and switched to "warn
3897 // then continue" — never re-raise, just route the
3898 // wrapped message through `warn`. gc.lua 5.5 :378 wires
3899 // up `_WARN` capture under the `if T then …` block to
3900 // baseline on the same wrapped string.
3901 if self.version >= LuaVersion::Lua54 {
3902 let inner = self.error_text(&e);
3903 let msg = format!("error in __gc metamethod ({inner})");
3904 self.emit_warn(msg.as_bytes(), false);
3905 } else if first_err.is_none() {
3906 let wrapped = if self.version >= LuaVersion::Lua52 {
3907 let inner = self.error_text(&e);
3908 let msg = format!("error in __gc metamethod ({inner})");
3909 let s = Value::Str(self.heap.intern(msg.as_bytes()));
3910 LuaError(s)
3911 } else {
3912 e
3913 };
3914 first_err = Some(wrapped);
3915 }
3916 }
3917 self.pending_tm = saved_tm;
3918 if let (Some(i), Some(prev)) = (caller_tm_idx, saved_caller_tm)
3919 && let Some(CallFrame::Lua(fr)) = self.frames.get_mut(i)
3920 {
3921 fr.tm = prev; // prev is Option<&'static str>; restore exactly
3922 }
3923 }
3924 }
3925 self.gc_finalizing = false;
3926 match first_err {
3927 Some(e) => Err(e),
3928 None => Ok(()),
3929 }
3930 }
3931
3932 /// Drive one incremental GC step (PUC `collectgarbage("step", n)`).
3933 /// Crosses up to three phases per call:
3934 /// 1. Pause → seed Propagate (`gc_start_propagate`)
3935 /// 2. Propagate → drain gray up to `budget`; on exhaustion run atomic
3936 /// (`gc_finish_atomic` → tobefnz populated; finalizers
3937 /// run via `run_finalizers`) and enter Sweep
3938 /// 3. Sweep → `gc_sweep_step` up to (residual) `budget`
3939 /// Returns true when this call completed the cycle's sweep (back to
3940 /// Pause). The budget is spent generously across phases — a large `n`
3941 /// can finish a whole cycle in one call (PUC stop-the-world step).
3942 pub(crate) fn gc_step(&mut self, budget: usize) -> bool {
3943 // Re-entry guard: never recurse — `run_finalizers` calls Lua code
3944 // that may hit a safe point and try to step again. Re-entry was OK
3945 // under STW (collect_garbage had its own guard) but here the
3946 // intermediate phase state would corrupt.
3947 if self.gc_finalizing {
3948 return false;
3949 }
3950 if self.heap.gc_phase_is_pause() {
3951 let (roots, extra) = self.gc_roots();
3952 self.heap.gc_start_propagate(&roots, &extra);
3953 }
3954 if self.heap.gc_phase_is_propagate() {
3955 if !self.heap.gc_step_propagate(budget) {
3956 return false;
3957 }
3958 self.heap.gc_finish_atomic();
3959 // any __gc scheduled by atomic — run before sweep so a finalizer
3960 // re-registering `self` re-enters the next cycle, not this sweep
3961 self.run_finalizers();
3962 }
3963 // either we just transitioned, or we entered already in Sweep, or
3964 // a finalizer started a new cycle (gc_sweep_step is a no-op then)
3965 self.heap.gc_sweep_step(budget)
3966 }
3967
3968 // ---- frames & calls ----
3969
3970 /// Begin calling stack[func_slot] with `nargs` (None: up to self.top).
3971 /// Returns true if a Lua frame was pushed (the dispatch loop continues
3972 /// there), false if a native completed inline.
3973 fn begin_call(
3974 &mut self,
3975 func_slot: u32,
3976 nargs: Option<u32>,
3977 nresults: i32,
3978 from_c: bool,
3979 ) -> Result<bool, LuaError> {
3980 let mut nargs = match nargs {
3981 Some(n) => n,
3982 None => self.top - (func_slot + 1),
3983 };
3984 // Consume `pending_is_tail` at the boundary: a tail-call op sets it
3985 // only for the immediately-following Lua activation. Native dispatch
3986 // (or `__call` resolution) below must not let it leak to the next
3987 // begin_call's frame; restore it just before push_frame for the Lua
3988 // arm so its meaning is preserved across __call chaining.
3989 let tailcalls = std::mem::take(&mut self.pending_tailcalls);
3990 // resolve __call handlers iteratively (PUC tryfuncTM loop): each handler
3991 // is inserted before the value so it becomes the first argument, and a
3992 // chain of `__call` tables resolves down to a real function.
3993 let mut chain = 0u32;
3994 loop {
3995 match self.stack[func_slot as usize] {
3996 Value::Closure(cl) => {
3997 // P11-S2c.B JIT fast path: if the Proto's body fits
3998 // the int-arith whitelist, every arg is `Value::Int`,
3999 // and the cached arity matches, skip frame setup and
4000 // run the cached native fn in-place.
4001 if self.try_jit_call_op(cl, func_slot, nargs, nresults) {
4002 self.pending_tailcalls = tailcalls;
4003 return Ok(false);
4004 }
4005 self.pending_tailcalls = tailcalls;
4006 self.push_frame(cl, func_slot, nargs, nresults, from_c)?;
4007 // P12-S4-step0 — trace-on-call trigger. The frame
4008 // we just pushed is the callee whose body the
4009 // recorder will trace. Bump the per-Proto call
4010 // counter; once it crosses `CALL_HOT_THRESHOLD`
4011 // and no other trace is in flight, snapshot the
4012 // callee's register window (R[0..max_stack]) and
4013 // begin recording at `pc=0`. This is what unlocks
4014 // tracing for functions whose body has no negative
4015 // `Op::Jmp` back-edge (`fib`, recursive helpers).
4016 //
4017 // Gated on `trace_jit_enabled`, so the default
4018 // dispatch pays a single not-taken branch.
4019 if self.jit.trace_enabled {
4020 let proto = cl.proto;
4021 let c = proto.call_hot_count.get();
4022 if c < u32::MAX / 2 {
4023 proto.call_hot_count.set(c + 1);
4024 }
4025 // P13-S13-H — relaxed call-trigger:
4026 // `c >= THRESHOLD` (was `c == THRESHOLD`) +
4027 // `!already_cached` short-circuit. Lets a
4028 // discarded short call-trigger close retry
4029 // on the next call (fib(10/15/20/25)
4030 // pathology — first capture is base-case
4031 // [Lt,Jmp,Return1]; coverage-heuristic
4032 // discards; next call gets to record at a
4033 // potentially deeper recursion point).
4034 // Without `already_cached`, the relaxed
4035 // condition would re-record over a cached
4036 // trace every call.
4037 //
4038 // P13-S13-K — additionally short-circuit on
4039 // `proto.trace_gave_up`. The S13-I discard
4040 // cap force-compiles a partial trace and
4041 // flips this flag; subsequent calls into
4042 // this Proto skip the RefCell borrow + Vec
4043 // scan entirely.
4044 if proto.trace_gave_up.get() {
4045 return Ok(true);
4046 }
4047 let call_already_cached =
4048 proto.traces.borrow().iter().any(|t| t.head_pc == 0);
4049 if c >= crate::jit::trace::CALL_HOT_THRESHOLD
4050 && self.jit.active_trace.is_none()
4051 && !call_already_cached
4052 {
4053 // The new frame is on top: index in
4054 // `self.frames` is `len() - 1`.
4055 let frame_idx = self.frames.len() - 1;
4056 // Snapshot R[0..max_stack] at the callee's
4057 // base. `push_frame` resized `self.stack`
4058 // to `base + max_stack`, so this window is
4059 // guaranteed in-bounds.
4060 let f = match &self.frames[frame_idx] {
4061 CallFrame::Lua(f) => f,
4062 _ => unreachable!("push_frame just pushed a Lua frame"),
4063 };
4064 let max_stack = cl.proto.max_stack as usize;
4065 let base_us = f.base as usize;
4066 let mut entry_tags = Vec::with_capacity(max_stack);
4067 for i in 0..max_stack {
4068 let (tag, _) = self.stack[base_us + i].unpack();
4069 entry_tags.push(tag);
4070 }
4071 self.jit.active_trace =
4072 Some(Box::new(crate::jit::trace::TraceRecord::start(
4073 cl.proto, 0, entry_tags, true,
4074 )));
4075 self.jit.recording_frame_base = frame_idx;
4076 }
4077 }
4078 return Ok(true);
4079 }
4080 Value::Native(nc) => {
4081 // v1.1 B10 Stage 2 — async-marked NativeClosure.
4082 // Route through the cooperative-yield mechanism
4083 // when async_mode is on; reject when called from
4084 // a sync `eval`/`call_value` path (would have no
4085 // executor to drive the returned future).
4086 if nc.is_async {
4087 if !self.async_mode {
4088 let s = Value::Str(
4089 self.heap.intern(b"async native called in sync context"),
4090 );
4091 self.last_error_kind = crate::vm::error::LuaErrorKind::Runtime;
4092 return Err(LuaError(s));
4093 }
4094 // Same root-up bookkeeping as the sync path:
4095 // pin args + result-count expectation so a
4096 // collection across the suspend boundary
4097 // keeps the arg window live.
4098 self.native_nresults = nresults;
4099 self.gc_top = func_slot + nargs + 1;
4100 // v1.3 Phase AS — fire the "call" hook BEFORE
4101 // building the future. Mirrors the sync native
4102 // path's `hook_call(true, nargs)` site
4103 // (`exec.rs` further down) so embedders with a
4104 // Rust debug hook installed see a Call event
4105 // for async natives identical to the sync
4106 // path. The matching "return" hook fires from
4107 // `commit_async_native_result` in
4108 // `async_drive.rs` after the future resolves.
4109 // Placement follows audit §"Open questions"
4110 // Q6: after the `native_nresults` / `gc_top`
4111 // pin, before the future is constructed, so a
4112 // hook body that triggers GC observes the
4113 // correct pinned window. On hook error the
4114 // sentinel never returns and
4115 // `pending_async_native_*` remain `None` —
4116 // the executor sees `DispatchOutcome::Error`
4117 // (audit §A.1 edge cases).
4118 self.hook_call(true, nargs)?;
4119 // Transmute the stored NativeFn back to its
4120 // real AsyncNativeFn shape. Sound because
4121 // `set_async_native` / `create_async_native`
4122 // installed an AsyncNativeFn through the
4123 // identically-sized fn-pointer slot, and the
4124 // `is_async` marker bit is what records that
4125 // fact.
4126 let async_fn: crate::vm::async_drive::AsyncNativeFn =
4127 // SAFETY: same-size fn pointers; provenance
4128 // preserved through `mem::transmute`. The
4129 // `is_async` marker is the only safe-to-call
4130 // gate, set exclusively by
4131 // `Vm::create_async_native`.
4132 unsafe { std::mem::transmute(nc.f) };
4133 let vm_ptr: *mut Vm = self;
4134 let fut = async_fn(vm_ptr, func_slot, nargs);
4135 // Stash the future + post-call context for
4136 // `drive_one` to surface to `EvalFuture::poll`.
4137 self.pending_async_native_fut = Some(fut);
4138 self.pending_async_native_ctx = Some(AsyncNativeCallCtx {
4139 func_slot,
4140 nargs,
4141 nresults,
4142 gc_top: self.gc_top,
4143 });
4144 // Sentinel Err walked up to `drive_one` (same
4145 // shape as `host_yield_pending`'s budget yield).
4146 // Value::Nil — never seen by user code.
4147 return Err(LuaError(Value::Nil));
4148 }
4149 // pcall/xpcall are yieldable: rather than calling the
4150 // protected function through the Rust stack (which cannot be
4151 // suspended), push a continuation frame and drive the call
4152 // through the interpreter loop (PUC lua_pcallk). A yield
4153 // inside it is preserved with the thread's saved frames.
4154 use crate::runtime::value::NativeFn;
4155 if std::ptr::fn_addr_eq(nc.f, nat_pcall as NativeFn) {
4156 return self.begin_pcall(func_slot, nargs, nresults);
4157 }
4158 if std::ptr::fn_addr_eq(nc.f, nat_xpcall as NativeFn) {
4159 return self.begin_xpcall(func_slot, nargs, nresults);
4160 }
4161 // pairs(t) with a __pairs metamethod calls it yieldably (PUC
4162 // luaB_pairs); without one, fall through to the plain native.
4163 if std::ptr::fn_addr_eq(nc.f, nat_pairs as NativeFn) && nargs >= 1 {
4164 let arg = self.stack[(func_slot + 1) as usize];
4165 if !self.get_mm(arg, Mm::Pairs).is_nil() {
4166 return self.begin_pairs(func_slot, nresults);
4167 }
4168 }
4169 // a native that collects (e.g. `collectgarbage`) roots up to
4170 // its own arguments — the caller's live registers all sit
4171 // below `func_slot` and stay rooted.
4172 self.native_nresults = nresults;
4173 self.gc_top = func_slot + nargs + 1;
4174 // Push the native onto the running-natives chain BEFORE
4175 // firing the call hook so that `debug.getinfo(level)` and
4176 // `arg_error` from inside the hook see this native as the
4177 // currently-running C function (db.lua :344 reads
4178 // `getinfo(2, "f").func` for the just-entered callee).
4179 // Popped after the matching return hook fires — even on
4180 // error, the pop must happen, so the body is bracketed
4181 // through a scope guard.
4182 self.running_natives.push(nc);
4183 self.running_native_slots.push((func_slot, nargs));
4184 // PUC luaD_precall fires the "call" hook for C functions too.
4185 // A yield inside the native (coroutine.yield) propagates an
4186 // Err and the matching "return" hook fires on resume instead.
4187 if let Err(e) = self.hook_call(true, nargs) {
4188 self.running_natives.pop();
4189 self.running_native_slots.pop();
4190 return Err(e);
4191 }
4192 // P09: trap a Rust panic in the native and surface it as
4193 // a Lua error rather than letting it unwind through the
4194 // VM into the embedder. The VM's internal state may still
4195 // be inconsistent after a panic (half-pushed args,
4196 // dangling GC references), so embedders that catch this
4197 // class of error should drop and re-create the Vm — but
4198 // it's still better than tearing the host process down.
4199 // `AssertUnwindSafe` is sound because the caller is the
4200 // dispatch loop and any half-done state is fenced behind
4201 // the immediate Err return below.
4202 use std::panic::{AssertUnwindSafe, catch_unwind};
4203 let result =
4204 match catch_unwind(AssertUnwindSafe(|| (nc.f)(self, func_slot, nargs))) {
4205 Ok(r) => r,
4206 Err(payload) => {
4207 let msg = panic_payload_str(&payload);
4208 let s = Value::Str(
4209 self.heap.intern(format!("native panic: {msg}").as_bytes()),
4210 );
4211 Err(LuaError(s))
4212 }
4213 };
4214 let nret = match result {
4215 Ok(n) => n,
4216 Err(e) => {
4217 // Stash the offending native's name BEFORE the
4218 // pop so a dying coroutine's traceback snapshot
4219 // can prepend `[C]: in function '<name>'`. Use
4220 // pushglobalfuncname (PUC walks package.loaded
4221 // to qualify); fall back to "?".
4222 self.errored_native =
4223 Some(self.pushglobalfuncname(nc.f).unwrap_or_else(|| "?".into()));
4224 self.running_natives.pop();
4225 self.running_native_slots.pop();
4226 return Err(e);
4227 }
4228 };
4229 // PUC `luaD_poscall` fires the return hook BEFORE moving
4230 // results into the function's slot — at that point args
4231 // sit at `[func_slot + 1, func_slot + 1 + nargs)` and
4232 // results above them at `[func_slot + 1 + nargs, …)`.
4233 // luna's `nat_return` has already written the results
4234 // into `[func_slot, func_slot + nret)`, so we replay PUC's
4235 // layout by copying the results up past the preserved
4236 // args, firing the hook (with ftransfer = nargs + 1, so
4237 // `getlocal(2, ftransfer..)` reads results), and then
4238 // copying back for `finish_results`. db.lua :541 reads
4239 // `getinfo("r").ftransfer` + `getlocal` to inspect a
4240 // returning native's results this way.
4241 if self.hook.ret
4242 && !self.in_hook
4243 && (self.hook.func.is_some() || self.hook.rust_func.is_some())
4244 {
4245 let res_dst = func_slot + nargs + 1;
4246 let need = (res_dst + nret) as usize;
4247 if self.stack.len() < need {
4248 self.stack.resize(need, Value::Nil);
4249 }
4250 for i in (0..nret).rev() {
4251 self.stack[(res_dst + i) as usize] =
4252 self.stack[(func_slot + i) as usize];
4253 }
4254 // widen the C-frame's argument window for getlocal
4255 if let Some(slot) = self.running_native_slots.last_mut() {
4256 slot.1 = nargs + nret;
4257 }
4258 let hr = self.hook_return(true, nargs + 1, nret);
4259 if let Some(slot) = self.running_native_slots.last_mut() {
4260 slot.1 = nargs;
4261 }
4262 // restore results into the slot finish_results expects
4263 for i in 0..nret {
4264 self.stack[(func_slot + i) as usize] =
4265 self.stack[(res_dst + i) as usize];
4266 }
4267 self.running_natives.pop();
4268 self.running_native_slots.pop();
4269 hr?;
4270 } else {
4271 self.running_natives.pop();
4272 self.running_native_slots.pop();
4273 }
4274 self.finish_results(func_slot, nret, nresults);
4275 // the native may have allocated; collect with the results as
4276 // the live boundary (PUC checks GC after a call returns).
4277 self.maybe_collect_garbage(self.top);
4278 return Ok(false);
4279 }
4280 v => {
4281 let mm = self.get_mm(v, Mm::Call);
4282 if mm.is_nil() {
4283 return Err(self.call_err(v));
4284 }
4285 chain += 1;
4286 // PUC 5.5 dropped the chain cap from `MAXTAGRECUR = 200`
4287 // (the value 5.4's `lvm.c` uses) down to `MAXCCMT = 16`,
4288 // and the 5.5 test exercises the new tight bound directly
4289 // (calls.lua :225 builds a 16-deep chain and expects the
4290 // 16th to error). 5.4 calls.lua :194 instead builds a 20-
4291 // deep chain and expects it to succeed.
4292 let cap = if self.version >= crate::version::LuaVersion::Lua55 {
4293 15
4294 } else {
4295 MAX_CCMT
4296 };
4297 if chain > cap {
4298 return Err(self.rt_err("'__call' chain too long"));
4299 }
4300 // slots above shift by one; at a call site those are dead
4301 // temps of the current frame
4302 self.stack.insert(func_slot as usize, mm);
4303 if self.top > func_slot {
4304 self.top += 1;
4305 }
4306 nargs += 1;
4307 }
4308 }
4309 }
4310 }
4311
4312 fn push_frame(
4313 &mut self,
4314 cl: Gc<LuaClosure>,
4315 func_slot: u32,
4316 nargs: u32,
4317 nresults: i32,
4318 from_c: bool,
4319 ) -> Result<(), LuaError> {
4320 if func_slot + 256 > MAX_LUA_STACK {
4321 // PUC `stackerror`: a stack overflow that surfaces while the
4322 // current activation is inside an xpcall message handler is
4323 // translated by `luaD_seterrorobj` (LUA_ERRERR) to "error in
4324 // error handling". errors.lua :606 expects the inner pcall(loop)
4325 // it runs from within `xpcall(loop, msgh)`'s msgh to fail with a
4326 // message matching "error handling".
4327 let msg = if self.msgh_depth > 0 {
4328 "error in error handling"
4329 } else {
4330 "stack overflow"
4331 };
4332 return Err(self.rt_err(msg));
4333 }
4334 let proto = cl.proto;
4335 let nparams = proto.num_params as u32;
4336 // 5.5 vararg layout (PUC luaT_adjustvarargs): the extra args stay on the
4337 // stack just below the new `base`, so a named vararg can be indexed
4338 // virtually without allocating a table. Rotate `[p1..pn][e1..em]` to
4339 // `[e1..em][p1..pn]` so the fixed params land at the new base.
4340 let n_varargs = if proto.is_vararg {
4341 nargs.saturating_sub(nparams)
4342 } else {
4343 0
4344 };
4345 if n_varargs > 0 {
4346 let s = (func_slot + 1) as usize;
4347 self.stack[s..s + nargs as usize].rotate_left(nparams as usize);
4348 }
4349 let base = func_slot + 1 + n_varargs;
4350 let need = (base + proto.max_stack as u32) as usize;
4351 if self.stack.len() < need {
4352 self.stack.resize(need, Value::Nil);
4353 }
4354 // wipe the register window beyond the kept parameters (stale values —
4355 // required for GC-safety and codegen). The varargs below `base` survive.
4356 let kept = nargs.saturating_sub(n_varargs).min(nparams);
4357 // SAFETY: just resized above so `need <= stack.len()`; `base + kept <=
4358 // need` since `base + nparams <= base + max_stack = need` and `kept <=
4359 // nparams`. `slice::fill` lowers to a single memset on Copy types.
4360 unsafe {
4361 self.stack
4362 .get_unchecked_mut((base + kept) as usize..need)
4363 .fill(Value::Nil);
4364 }
4365 frames_push_sync(
4366 &mut self.frames,
4367 &mut self.frames_top,
4368 CallFrame::Lua(Frame {
4369 closure: cl,
4370 base,
4371 pc: 0,
4372 func_slot,
4373 nresults,
4374 hook_oldpc: u32::MAX,
4375 from_c,
4376 n_varargs,
4377 // single-shot consume: `close_slots` sets pending_tm before each
4378 // handler call; the next Lua frame born is that handler's.
4379 tm: self.pending_tm.take(),
4380 // `run_hook` sets `pending_is_hook` before dispatching the user
4381 // hook so its frame reports `namewhat = "hook"` via getinfo.
4382 is_hook: std::mem::take(&mut self.pending_is_hook),
4383 tailcalls: std::mem::take(&mut self.pending_tailcalls),
4384 }),
4385 );
4386 // PUC 5.1 `LUAI_COMPAT_VARARG`: populate the hidden `arg` local with
4387 // `{ n = n_varargs, [1] = e1, [2] = e2, … }`. The compiler reserved
4388 // the slot at `base + nparams`; the extras sit just below `base` from
4389 // the vararg rotate above. 5.1 db.lua :279 reads `arg.n` from a line
4390 // hook; vararg.lua's contradictory expectations were already going to
4391 // fail either way (some asserts want `arg == nil`).
4392 if proto.has_compat_vararg_arg {
4393 let arg_slot = (base + nparams) as usize;
4394 let t = self.heap.new_table();
4395 {
4396 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4397 let tm = unsafe { t.as_mut() };
4398 for i in 0..n_varargs {
4399 let v = self.stack[(base - n_varargs + i) as usize];
4400 // bounded by `n_varargs` (≤ MAXUPVAL territory), well
4401 // below `MAX_ASIZE`
4402 let _ = tm.set_int(&mut self.heap, (i + 1) as i64, v);
4403 }
4404 let nk = Value::Str(self.heap.intern(b"n"));
4405 tm.set(&mut self.heap, nk, Value::Int(n_varargs as i64))
4406 .expect("'n' key");
4407 }
4408 // once-per-table barrier mirrors SETLIST: t is born BLACK during
4409 // Propagate and the bulk `set_int`/`set` calls above don't barrier
4410 self.heap
4411 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
4412 self.stack[arg_slot] = Value::Table(t);
4413 }
4414 // PUC luaD_precall fires the "call" hook with the new frame current, so
4415 // a hook calling debug.getinfo(2) sees the entered function. For a Lua
4416 // callee, PUC `luaD_hookcall` passes `p->numparams` as ntransfer (only
4417 // fixed params count — extras already live below `base`).
4418 // A frame born via OP_TailCall fires "tail call" instead (PUC
4419 // luaD_pretailcall) and skips the matching "return" hook on exit.
4420 let is_tail = self
4421 .frames
4422 .last()
4423 .and_then(|f| f.lua())
4424 .is_some_and(|f| f.tailcalls > 0);
4425 self.hook_call_with(false, nparams, is_tail)?;
4426 Ok(())
4427 }
4428
4429 /// `pcall(f, ...)` (PUC luaB_pcall): push a continuation frame, then drive
4430 /// the protected call `f` through the interpreter loop. The protected
4431 /// function and its arguments already sit at `func_slot+1..`, so calling `f`
4432 /// at `func_slot+1` lets its results land one slot above the continuation —
4433 /// the loop head then writes `true` at `func_slot` to form `true, results…`.
4434 /// Always returns `Ok(true)`: a continuation is now on the stack to be
4435 /// resolved by the loop (even when `f` is a native that already ran inline).
4436 fn begin_pcall(&mut self, func_slot: u32, nargs: u32, nresults: i32) -> Result<bool, LuaError> {
4437 if nargs == 0 {
4438 return Err(crate::vm::builtins::raise_str(
4439 self,
4440 "bad argument #1 to 'pcall' (value expected)",
4441 ));
4442 }
4443 if self.pcall_depth >= MAX_C_DEPTH {
4444 return Err(self.rt_err("C stack overflow"));
4445 }
4446 self.pcall_depth += 1;
4447 frames_push_sync(
4448 &mut self.frames,
4449 &mut self.frames_top,
4450 CallFrame::Cont(NativeCont {
4451 kind: ContKind::Pcall,
4452 func_slot,
4453 nresults,
4454 }),
4455 );
4456 // call f (slot func_slot+1) with the remaining args, asking for all
4457 // results; a yield or error inside propagates with the continuation kept
4458 // on the stack (caught by `unwind` / preserved across a yield).
4459 self.begin_call(func_slot + 1, Some(nargs - 1), -1, true)?;
4460 Ok(true)
4461 }
4462
4463 /// `xpcall(f, msgh, ...)` (PUC luaB_xpcall): like `begin_pcall`, but the
4464 /// message handler is stashed in the continuation and the arguments are
4465 /// shifted down over the handler's slot so `f`'s args are contiguous.
4466 fn begin_xpcall(
4467 &mut self,
4468 func_slot: u32,
4469 nargs: u32,
4470 nresults: i32,
4471 ) -> Result<bool, LuaError> {
4472 if nargs < 2 {
4473 return Err(crate::vm::builtins::raise_str(
4474 self,
4475 "bad argument #2 to 'xpcall' (value expected)",
4476 ));
4477 }
4478 if self.pcall_depth >= MAX_C_DEPTH {
4479 return Err(self.rt_err("C stack overflow"));
4480 }
4481 self.pcall_depth += 1;
4482 // layout: [xpcall@func_slot, f@+1, msgh@+2, a1@+3, ...]. Stash msgh and
4483 // close its gap so f's args become [f@+1, a1@+2, ...].
4484 let handler = self.stack[(func_slot + 2) as usize];
4485 let nfargs = nargs - 2;
4486 for i in 0..nfargs {
4487 self.stack[(func_slot + 2 + i) as usize] = self.stack[(func_slot + 3 + i) as usize];
4488 }
4489 self.top = func_slot + 2 + nfargs;
4490 frames_push_sync(
4491 &mut self.frames,
4492 &mut self.frames_top,
4493 CallFrame::Cont(NativeCont {
4494 kind: ContKind::Xpcall { handler },
4495 func_slot,
4496 nresults,
4497 }),
4498 );
4499 self.begin_call(func_slot + 1, Some(nfargs), -1, true)?;
4500 Ok(true)
4501 }
4502
4503 /// `pairs(t)` where `t` has a `__pairs` metamethod (PUC luaB_pairs's
4504 /// lua_callk path): drive `__pairs(t)` through the loop with a `Pairs`
4505 /// continuation so a `coroutine.yield` inside it suspends cleanly. The
4506 /// metamethod is called in `pairs`'s own slot, so its (≤4, nil-padded)
4507 /// results land exactly where `pairs`'s results belong.
4508 fn begin_pairs(&mut self, func_slot: u32, nresults: i32) -> Result<bool, LuaError> {
4509 let arg = self.stack[(func_slot + 1) as usize];
4510 let mm = self.get_mm(arg, Mm::Pairs);
4511 // layout becomes [mm@func_slot, t@func_slot+1]; call mm(t) wanting 4.
4512 self.stack[func_slot as usize] = mm;
4513 self.top = func_slot + 2;
4514 frames_push_sync(
4515 &mut self.frames,
4516 &mut self.frames_top,
4517 CallFrame::Cont(NativeCont {
4518 kind: ContKind::Pairs,
4519 func_slot,
4520 nresults,
4521 }),
4522 );
4523 self.begin_call(func_slot, Some(1), 4, true)?;
4524 Ok(true)
4525 }
4526
4527 /// The running (top) Lua frame. The interpreter only reads this while a Lua
4528 /// frame is on top — a continuation frame is never the running frame (it is
4529 /// consumed the instant the call it protects unwinds onto it).
4530 #[inline]
4531 fn top_frame(&self) -> &Frame {
4532 self.frames
4533 .last()
4534 .and_then(CallFrame::lua)
4535 .expect("running Lua frame")
4536 }
4537
4538 #[inline]
4539 fn top_frame_mut(&mut self) -> &mut Frame {
4540 self.frames
4541 .last_mut()
4542 .and_then(CallFrame::lua_mut)
4543 .expect("running Lua frame")
4544 }
4545
4546 /// Pad/announce results sitting at func_slot.
4547 pub(crate) fn finish_results(&mut self, func_slot: u32, nret: u32, wanted: i32) {
4548 // v2.3 P1B-A: capture the call's high-water-mark before
4549 // setting the new top so we can Nil-clear slots that the
4550 // call temporarily wrote but no longer holds — matching
4551 // PUC's `L->top` discipline (slots past L->top are "free"
4552 // and the next push overwrites them). Without this clear,
4553 // a stale `Value::Closure` (e.g. the called function
4554 // itself, when wanted = 0) sits at `func_slot` and a
4555 // later GC with wider `gc_top` traces it after the
4556 // closure has been freed by a previous narrow safe-point
4557 // GC → heap-buffer-overflow in `Marker::header` (UAF-A
4558 // sort.lua AA case).
4559 let prev_top = self.top as usize;
4560 if wanted < 0 {
4561 self.top = func_slot + nret;
4562 } else {
4563 let wanted = wanted as u32;
4564 let need = (func_slot + wanted) as usize;
4565 if self.stack.len() < need {
4566 self.stack.resize(need, Value::Nil);
4567 }
4568 for i in nret..wanted {
4569 self.stack[(func_slot + i) as usize] = Value::Nil;
4570 }
4571 self.top = func_slot + wanted;
4572 }
4573 let new_top = self.top as usize;
4574 let clear_end = prev_top.min(self.stack.len());
4575 if new_top < clear_end {
4576 for slot in &mut self.stack[new_top..clear_end] {
4577 *slot = Value::Nil;
4578 }
4579 }
4580 }
4581
4582 /// v1.1 B10 Stage 1 — current Lua call-frame depth (read-only).
4583 /// Used by `EvalFuture` on the bootstrap poll to compute the
4584 /// `entry_depth` it will pass to subsequent resume slices.
4585 pub(crate) fn frame_count(&self) -> usize {
4586 self.frames.len()
4587 }
4588
4589 fn take_results(&mut self, func_slot: u32) -> Vec<Value> {
4590 let nret = self.top - func_slot;
4591 let out = self.stack[func_slot as usize..(func_slot + nret) as usize].to_vec();
4592 self.stack.truncate(func_slot as usize);
4593 self.top = func_slot;
4594 out
4595 }
4596
4597 // ---- open upvalues ----
4598
4599 #[doc(hidden)]
4600 pub fn find_or_create_upval(&mut self, slot: u32) -> Gc<Upvalue> {
4601 match self.open_upvals.binary_search_by_key(&slot, |&(s, _)| s) {
4602 Ok(i) => self.open_upvals[i].1,
4603 Err(i) => {
4604 let uv = self.heap.new_upvalue(UpvalState::Open {
4605 slot,
4606 thread: self.current,
4607 });
4608 self.open_upvals.insert(i, (slot, uv));
4609 uv
4610 }
4611 }
4612 }
4613
4614 pub(crate) fn close_from(&mut self, slot: u32) {
4615 while let Some(&(s, uv)) = self.open_upvals.last() {
4616 if s < slot {
4617 break;
4618 }
4619 let v = self.stack[s as usize];
4620 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4621 unsafe { uv.as_mut() }.set_closed(v);
4622 self.heap
4623 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4624 self.open_upvals.pop();
4625 }
4626 }
4627
4628 /// Register a to-be-closed slot (TBC op / generic-for closing value).
4629 fn register_tbc(&mut self, slot: u32) -> Result<(), LuaError> {
4630 let v = self.stack[slot as usize];
4631 if matches!(v, Value::Nil | Value::Bool(false)) {
4632 return Ok(()); // nil and false are silently ignored
4633 }
4634 if self.get_mm(v, Mm::Close).is_nil() {
4635 // PUC `checkclosemth`: "variable '<name>' got a non-closable value
4636 // (a <type> value)"; the local's name comes from the running
4637 // frame's locvars at this pc.
4638 let tn = v.type_name();
4639 let f = self.top_frame();
4640 let reg = slot - f.base;
4641 let pc = (f.pc as usize).saturating_sub(1);
4642 let where_ = match crate::vm::objname::getlocalname(&f.closure.proto, reg, pc) {
4643 Some(n) => format!("variable '{n}'"),
4644 None => "to-be-closed slot".to_string(),
4645 };
4646 return Err(self.rt_err(&format!("{where_} got a non-closable value (a {tn} value)")));
4647 }
4648 debug_assert!(self.tbc.last().is_none_or(|&s| s < slot));
4649 self.tbc.push(slot);
4650 Ok(())
4651 }
4652
4653 /// Close upvalues and run `__close` handlers for slots ≥ `from`
4654 /// (handlers in reverse registration order; PUC luaF_close).
4655 fn close_slots(&mut self, from: u32, err: Option<Value>) -> Result<(), LuaError> {
4656 self.close_from(from);
4657 // PUC: handlers run in reverse declaration order; an error raised by a
4658 // handler becomes the error object passed to the remaining ones, and
4659 // the rest are still closed. The last raised error propagates.
4660 let mut pending = err;
4661 let mut result = Ok(());
4662 let saved_err = self.closing_err;
4663 // On a normal close the handler runs within the closing function's
4664 // activation (debug parent = that function); during error unwinding the
4665 // function's frame is already gone, so the handler sits at the C
4666 // boundary instead (PUC: luaF_close runs after the ci is restored).
4667 let error_close = err.is_some();
4668 while let Some(&s) = self.tbc.last() {
4669 if s < from {
4670 break;
4671 }
4672 self.tbc.pop();
4673 let v = self.stack[s as usize];
4674 if matches!(v, Value::Nil | Value::Bool(false)) {
4675 continue;
4676 }
4677 let mm = self.get_mm(v, Mm::Close);
4678 if mm.is_nil() {
4679 // PUC `prepclosingmethod`: the __close metamethod was present
4680 // at OP_TBC (else we would have errored there) but has since
4681 // been removed/replaced. Treat as a non-callable target.
4682 let tn = self.obj_typename(v);
4683 let e = self.rt_err(&format!(
4684 "attempt to call a {tn} value (metamethod 'close')"
4685 ));
4686 pending = Some(e.0);
4687 result = Err(e);
4688 continue;
4689 }
4690 // root the pending error: a handler may trigger a collection
4691 self.closing_err = pending;
4692 // PUC `luaF_close` sets `ci->u.l.tm = TM_CLOSE` so traceback /
4693 // getinfo report the handler as "in metamethod 'close'". Saved/
4694 // restored around the call to cover the path where `mm` is a
4695 // native (`push_frame` never consumes it) or it raises before
4696 // reaching push_frame.
4697 let saved_tm = self.pending_tm.replace("close");
4698 // PUC 5.4 `prepclosingmethod` always pushed (obj, errobj) — errobj
4699 // is nil on a normal close (5.4 locals.lua :875's
4700 // `func2close(coroutine.yield)` wrap pins `(self, nil)` back
4701 // through the yield). PUC 5.5 dropped the trailing nil: a clean
4702 // close passes only `obj`, the error case still passes both
4703 // (5.5 locals.lua :314 `select("#", ...) == n` with n=1 for the
4704 // normal-close arms, n=2 for the error arm).
4705 let call = match pending {
4706 Some(e) => self.call_value_impl(mm, &[v, e], error_close),
4707 None => {
4708 if self.version >= LuaVersion::Lua55 {
4709 self.call_value_impl(mm, &[v], error_close)
4710 } else {
4711 self.call_value_impl(mm, &[v, Value::Nil], error_close)
4712 }
4713 }
4714 };
4715 self.pending_tm = saved_tm;
4716 if let Err(e) = call {
4717 pending = Some(e.0);
4718 result = Err(e);
4719 }
4720 }
4721 self.closing_err = saved_err;
4722 result
4723 }
4724
4725 /// Yieldable variant of `close_slots`: drive the chain of `__close`
4726 /// handlers for slots ≥ `from` through the interpreter loop with a
4727 /// `Cont::Close` continuation, so a `coroutine.yield()` inside any handler
4728 /// suspends cleanly (the close iteration's state rides on the thread's
4729 /// frame/stack like any other suspended call) — PUC's `lua_callk` pattern
4730 /// applied to `luaF_close`. `after` runs when every slot is closed; if
4731 /// `after` is `Return` and we've returned past `entry_depth`,
4732 /// `Ok(Some(vals))` carries the result up to the host caller.
4733 fn begin_close(
4734 &mut self,
4735 from: u32,
4736 err: Option<Value>,
4737 after: AfterClose,
4738 entry_depth: usize,
4739 ) -> Result<Option<Vec<Value>>, LuaError> {
4740 self.close_from(from);
4741 self.drive_close(from, err, after, entry_depth)
4742 }
4743
4744 /// Pop tbc slots ≥ `from`, skipping nil/false and synthesising a
4745 /// non-callable-mm error for an `__close` that was reset to a bad value
4746 /// between OP_TBC and now (PUC `prepclosingmethod`). The first real
4747 /// handler pushes a `Cont::Close` + `begin_call` and returns `Ok(None)`;
4748 /// the interpreter then drives the handler and re-enters this driver via
4749 /// the `Cont::Close` consumer in `run()`. When the chain is exhausted,
4750 /// the threaded error (if any) propagates or `after` fires.
4751 fn drive_close(
4752 &mut self,
4753 from: u32,
4754 mut pending: Option<Value>,
4755 after: AfterClose,
4756 entry_depth: usize,
4757 ) -> Result<Option<Vec<Value>>, LuaError> {
4758 loop {
4759 let drained = match self.tbc.last() {
4760 None => true,
4761 Some(&s) => s < from,
4762 };
4763 if drained {
4764 return self.finish_close_after(after, pending, entry_depth);
4765 }
4766 let s = self.tbc.pop().expect("tbc non-empty");
4767 let v = self.stack[s as usize];
4768 if matches!(v, Value::Nil | Value::Bool(false)) {
4769 continue;
4770 }
4771 let mm = self.get_mm(v, Mm::Close);
4772 if mm.is_nil() {
4773 let tn = self.obj_typename(v);
4774 let e = self.rt_err(&format!(
4775 "attempt to call a {tn} value (metamethod 'close')"
4776 ));
4777 pending = Some(e.0);
4778 continue;
4779 }
4780 // A real handler: stage [mm, v, (err?)] above the current top,
4781 // record the close iteration state in a Cont::Close, and let the
4782 // interpreter dispatch the handler. On return the run() head
4783 // re-enters this driver via the Cont::Close consumer.
4784 let func_slot = self.top;
4785 let error_close = pending.is_some();
4786 let need = (func_slot + 3) as usize;
4787 if self.stack.len() < need {
4788 self.stack.resize(need, Value::Nil);
4789 }
4790 self.stack[func_slot as usize] = mm;
4791 self.stack[func_slot as usize + 1] = v;
4792 // PUC 5.4 always passes (obj, errobj=nil) on a normal close;
4793 // 5.5 drops the trailing nil. 5.4 locals.lua :875 vs 5.5 :314.
4794 let nargs = match pending {
4795 Some(e) => {
4796 self.stack[func_slot as usize + 2] = e;
4797 2u32
4798 }
4799 None => {
4800 if self.version >= LuaVersion::Lua55 {
4801 1u32
4802 } else {
4803 self.stack[func_slot as usize + 2] = Value::Nil;
4804 2u32
4805 }
4806 }
4807 };
4808 self.top = func_slot + 1 + nargs;
4809 // Root the pending error during the call (a handler may collect).
4810 let saved_err = self.closing_err;
4811 self.closing_err = pending;
4812 // PUC `luaF_close` flags the handler frame as "metamethod 'close'"
4813 // for traceback / getinfo.
4814 let saved_tm = self.pending_tm.replace("close");
4815 frames_push_sync(
4816 &mut self.frames,
4817 &mut self.frames_top,
4818 CallFrame::Cont(NativeCont {
4819 kind: ContKind::Close(CloseCont {
4820 from,
4821 pending,
4822 after,
4823 }),
4824 func_slot,
4825 nresults: 0,
4826 }),
4827 );
4828 // PUC luaF_close runs a normal close *within* the closing
4829 // function's activation (debug parent = that function); during an
4830 // error unwind the function's frame is already gone and the
4831 // handler sits at the C boundary instead.
4832 let r = self.begin_call(func_slot, Some(nargs), 0, error_close);
4833 self.pending_tm = saved_tm;
4834 self.closing_err = saved_err;
4835 r?;
4836 return Ok(None);
4837 }
4838 }
4839
4840 /// Fire `after` once every `__close` handler has run. `Block` propagates
4841 /// any remaining error or simply continues; `Return` performs OP_Return's
4842 /// tail (hook + frame pop + result delivery) and may surface results to
4843 /// the host when the function whose return triggered the close was the
4844 /// entry activation, but only on a clean drain — a pending error skips
4845 /// the return tail and propagates instead. `ResumeUnwind` pops the
4846 /// deferred Lua frame and re-raises, letting a handler's own error win
4847 /// over the original propagating one (PUC luaF_close).
4848 fn finish_close_after(
4849 &mut self,
4850 after: AfterClose,
4851 pending: Option<Value>,
4852 entry_depth: usize,
4853 ) -> Result<Option<Vec<Value>>, LuaError> {
4854 match after {
4855 AfterClose::Block => match pending {
4856 Some(e) => Err(LuaError(e)),
4857 None => Ok(None),
4858 },
4859 AfterClose::Return {
4860 abs_a,
4861 nret,
4862 from_native,
4863 } => match pending {
4864 Some(e) => Err(LuaError(e)),
4865 None => self.complete_return(abs_a, nret, from_native, entry_depth),
4866 },
4867 AfterClose::ResumeUnwind { func_slot, err } => {
4868 // The aborting Lua frame was popped before `begin_close`;
4869 // restore the catcher's stack window down to `func_slot` and
4870 // re-raise — preferring a handler-raised error over the
4871 // original (PUC luaF_close).
4872 self.stack.truncate(func_slot as usize);
4873 self.top = func_slot;
4874 self.tbc.retain(|&s| s < func_slot);
4875 Err(LuaError(pending.unwrap_or(err)))
4876 }
4877 }
4878 }
4879
4880 /// OP_Return's post-close tail: fire the "return" hook (frame still
4881 /// current), pop the Lua frame, slide results into `func_slot`, then
4882 /// either hand them to the host (`Ok(Some(vals))` when we've returned
4883 /// past `entry_depth`), leave them contiguous for an exposed
4884 /// pcall/xpcall continuation, or finish into the caller's expected
4885 /// result slot. Mirrors the synchronous OP_Return tail so both paths
4886 /// share semantics — the `from_native` flag selects the right "return"
4887 /// hook context for `hook_return`.
4888 fn complete_return(
4889 &mut self,
4890 abs_a: u32,
4891 nret: u32,
4892 from_native: bool,
4893 entry_depth: usize,
4894 ) -> Result<Option<Vec<Value>>, LuaError> {
4895 // ftransfer is the local index (1-based) of the first result, as
4896 // `getinfo("r").ftransfer + getlocal(level, k)` consumes it. luna
4897 // exposes locals starting at `frame.base` (= func_slot + 1 +
4898 // n_varargs for a vararg call), so the conversion is the absolute
4899 // result slot minus base, plus one to make it 1-based. db.lua 5.4
4900 // :542 (`foo1(); on=false; eqseq(out, {10, 0})`) pins the vararg
4901 // shape end-to-end.
4902 let ftransfer = self
4903 .frames
4904 .last()
4905 .and_then(CallFrame::lua)
4906 .map(|fr| {
4907 let raw = abs_a.saturating_sub(fr.base) + 1;
4908 // 5.5 anonymous-vararg functions get a `(vararg table)` pseudo
4909 // local injected at index `numparams + 1`, so getlocal
4910 // numbering shifts results past it (5.5 db.lua :539
4911 // `eqseq(out, {10, 0})`). 5.4 and earlier have no such pseudo.
4912 if fr.closure.proto.has_vararg_table_pseudo {
4913 raw + 1
4914 } else {
4915 raw
4916 }
4917 })
4918 .unwrap_or(1);
4919 // PUC 5.1 `luaD_poscall`: fire one extra "tail return" hook event
4920 // per tail call that collapsed into this activation, *after* its
4921 // own "return". `tailcalls` tracks that count exactly (PUC
4922 // `ci->u.l.tailcalls`). 5.2+ retired LUA_HOOKTAILRET, so the
4923 // "return" hook fires once even when the activation absorbed
4924 // multiple tail calls — only `istailcall` on getinfo surfaces the
4925 // collapse. 5.1 db.lua :366 pins the event ordering.
4926 let tailcalls = if self.version <= LuaVersion::Lua51 {
4927 self.frames
4928 .last()
4929 .and_then(|f| f.lua())
4930 .map(|f| f.tailcalls)
4931 .unwrap_or(0)
4932 } else {
4933 0
4934 };
4935 self.hook_return(from_native, ftransfer, nret)?;
4936 for _ in 0..tailcalls {
4937 self.hook_tail_return()?;
4938 }
4939 let CallFrame::Lua(fr) =
4940 frames_pop_sync(&mut self.frames, &mut self.frames_top).expect("no frame")
4941 else {
4942 unreachable!("returning from a non-Lua frame")
4943 };
4944 for i in 0..nret {
4945 self.stack[(fr.func_slot + i) as usize] = self.stack[(abs_a + i) as usize];
4946 }
4947 if self.frames.len() < entry_depth {
4948 self.top = fr.func_slot + nret;
4949 return Ok(Some(self.take_results(fr.func_slot)));
4950 } else if matches!(self.frames.last(), Some(CallFrame::Cont(_))) {
4951 self.top = fr.func_slot + nret;
4952 } else {
4953 self.finish_results(fr.func_slot, nret, fr.nresults);
4954 }
4955 Ok(None)
4956 }
4957
4958 #[doc(hidden)]
4959 pub fn upval_get(&self, cl: Gc<LuaClosure>, idx: u32) -> Value {
4960 match cl.upvals()[idx as usize].state() {
4961 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
4962 UpvalState::Closed(v) => v,
4963 }
4964 }
4965
4966 fn upval_set(&mut self, cl: Gc<LuaClosure>, idx: u32, v: Value) {
4967 let uv = cl.upvals()[idx as usize];
4968 match uv.state() {
4969 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
4970 UpvalState::Closed(_) => {
4971 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4972 unsafe { uv.as_mut() }.set_closed(v);
4973 // forward barrier: a closed upvalue is single-slot, so the
4974 // forward variant is cheaper than barrier_back (PUC uses
4975 // `luaC_barrier_` for upvalues; `luaC_barrierback_` for
4976 // tables / threads).
4977 self.heap
4978 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
4979 }
4980 }
4981 }
4982
4983 // ---- register / error helpers ----
4984
4985 #[inline(always)]
4986 fn r(&self, base: u32, i: u32) -> Value {
4987 // SAFETY: the compiler reserves `proto.max_stack` slots above `base`
4988 // at frame entry (`push_frame` sizes the stack up to base + max_stack),
4989 // and every bytecode-generated reference falls within `[0, max_stack)`.
4990 // PUC's vmfetch uses raw `R(A)` (`s2v(L->base + A)`) for the same
4991 // reason. The bounds check would re-validate this invariant on every
4992 // op — the dispatch hot path can't afford it.
4993 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
4994 unsafe { *self.stack.get_unchecked((base + i) as usize) }
4995 }
4996
4997 #[inline(always)]
4998 fn set_r(&mut self, base: u32, i: u32, v: Value) {
4999 // SAFETY: see `r` — `base + i < base + max_stack <= stack.len()` by
5000 // frame-entry contract.
5001 unsafe {
5002 *self.stack.get_unchecked_mut((base + i) as usize) = v;
5003 }
5004 }
5005
5006 #[doc(hidden)]
5007 pub fn rt_err(&mut self, msg: &str) -> LuaError {
5008 let text = match self.position_prefix() {
5009 Some(p) => format!("{p}{msg}"),
5010 None => msg.to_string(),
5011 };
5012 LuaError(Value::Str(self.heap.intern(text.as_bytes())))
5013 }
5014
5015 pub(crate) fn type_err(&mut self, what: &str, v: Value) -> LuaError {
5016 let extra = self.subject_varinfo(v);
5017 let tn = self.obj_typename(v);
5018 self.rt_err(&format!("attempt to {what} a {tn} value{extra}"))
5019 }
5020
5021 /// Name the offending operand of the current instruction (PUC varinfo) for
5022 /// a type error, e.g. " (global 'x')". The faulting value `bad` is matched
5023 /// to the instruction's subject register(s); a native-raised error whose
5024 /// current instruction doesn't hold `bad` simply yields "".
5025 fn subject_varinfo(&self, bad: Value) -> String {
5026 use crate::vm::isa::Op;
5027 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5028 return String::new();
5029 };
5030 let proto = f.closure.proto;
5031 let p: &crate::runtime::Proto = &proto;
5032 let pc = f.pc as usize;
5033 if pc == 0 || pc > p.code.len() {
5034 return String::new();
5035 }
5036 let instr = p.code[pc - 1];
5037 let mut cands: Vec<u32> = Vec::new();
5038 match instr.op() {
5039 // indexed reads / length / method: the table/object is in B
5040 Op::GetField | Op::GetI | Op::GetTable | Op::SelfOp | Op::Len => {
5041 cands.push(instr.b());
5042 }
5043 // indexed writes / calls: the table/function is in A
5044 Op::SetField | Op::SetI | Op::SetTable | Op::Call | Op::TailCall => {
5045 cands.push(instr.a());
5046 }
5047 // arithmetic/bitwise: a register operand (B, and C unless constant)
5048 Op::Add
5049 | Op::Sub
5050 | Op::Mul
5051 | Op::Div
5052 | Op::Mod
5053 | Op::Pow
5054 | Op::IDiv
5055 | Op::BAnd
5056 | Op::BOr
5057 | Op::BXor
5058 | Op::Shl
5059 | Op::Shr => {
5060 cands.push(instr.b());
5061 if !instr.k() {
5062 cands.push(instr.c());
5063 }
5064 }
5065 Op::Unm | Op::BNot => cands.push(instr.b()),
5066 Op::Concat => {
5067 let a = instr.a();
5068 for r in a..a + instr.b() {
5069 cands.push(r);
5070 }
5071 }
5072 _ => {}
5073 }
5074 for reg in cands {
5075 if self.r(f.base, reg).raw_eq(bad) {
5076 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5077 Some((kind, name)) => format!(" ({kind} '{name}')"),
5078 None => String::new(),
5079 };
5080 }
5081 }
5082 String::new()
5083 }
5084
5085 /// "attempt to call a X value", enriched (PUC luaG_callerror) with a name
5086 /// for the call target: "(global 'f')" for a direct call, or "(metamethod
5087 /// 'add')" when the call is a metamethod dispatched by the current opcode.
5088 fn call_err(&mut self, v: Value) -> LuaError {
5089 let extra = self.call_target_varinfo(v);
5090 let tn = self.obj_typename(v);
5091 self.rt_err(&format!("attempt to call a {tn} value{extra}"))
5092 }
5093
5094 /// Name the offending call target. A metamethod dispatch pushes a `Cont`
5095 /// frame before the call, so the opcode that triggered it lives in the
5096 /// nearest *Lua* frame — read that instruction: OP_CALL names the function
5097 /// register, any metamethod-bearing opcode yields "(metamethod 'event')".
5098 fn call_target_varinfo(&self, bad: Value) -> String {
5099 use crate::vm::isa::Op;
5100 let Some(f) = self.frames.iter().rev().find_map(CallFrame::lua) else {
5101 return String::new();
5102 };
5103 let proto = f.closure.proto;
5104 let p: &crate::runtime::Proto = &proto;
5105 let pc = f.pc as usize;
5106 if pc == 0 || pc > p.code.len() {
5107 return String::new();
5108 }
5109 let instr = p.code[pc - 1];
5110 match instr.op() {
5111 Op::Call | Op::TailCall => {
5112 let reg = instr.a();
5113 if self.r(f.base, reg).raw_eq(bad) {
5114 match crate::vm::objname::getobjname(p, pc - 1, reg) {
5115 Some((kind, name)) => format!(" ({kind} '{name}')"),
5116 None => String::new(),
5117 }
5118 } else {
5119 String::new()
5120 }
5121 }
5122 op => match mm_event_name(op) {
5123 Some(ev) => format!(" (metamethod '{ev}')"),
5124 None => String::new(),
5125 },
5126 }
5127 }
5128
5129 /// "number has no integer representation", enriched (PUC luaG_tointerror)
5130 /// with a "(field 'x')"-style suffix naming the offending operand of the
5131 /// current arithmetic instruction when it can be recovered from bytecode.
5132 fn no_int_rep_err(&mut self) -> LuaError {
5133 let extra = self.bad_operand_varinfo();
5134 self.rt_err(&format!("number{extra} has no integer representation"))
5135 }
5136
5137 /// Inspect the current frame's faulting instruction: find the register
5138 /// operand holding a float with no integer representation and name it.
5139 fn bad_operand_varinfo(&self) -> String {
5140 let Some(f) = self.frames.last().and_then(CallFrame::lua) else {
5141 return String::new();
5142 };
5143 let proto = f.closure.proto;
5144 let p: &crate::runtime::Proto = &proto;
5145 let pc = f.pc as usize;
5146 if pc == 0 || pc > p.code.len() {
5147 return String::new();
5148 }
5149 let instr = p.code[pc - 1];
5150 let mut regs = vec![instr.b()];
5151 if !instr.k() {
5152 regs.push(instr.c());
5153 }
5154 for reg in regs {
5155 let v = self.r(f.base, reg);
5156 if matches!(v, Value::Float(x) if crate::runtime::value::f2i_exact(x).is_none()) {
5157 return match crate::vm::objname::getobjname(p, pc - 1, reg) {
5158 Some((kind, name)) => format!(" ({kind} '{name}')"),
5159 None => String::new(),
5160 };
5161 }
5162 }
5163 String::new()
5164 }
5165
5166 /// Position prefix of the currently executing Lua frame. PUC `luaL_error`
5167 /// calls `luaL_where(L, 1)` which reads `L->ci->previous`. When the prior
5168 /// frame is a C function (e.g. a pcall Cont parked above `require`'s
5169 /// native call), PUC pushes no prefix — match that by looking only at the
5170 /// topmost frame directly and bailing if it is anything but a Lua frame.
5171 pub(crate) fn position_prefix(&self) -> Option<String> {
5172 let f = self.frames.last().and_then(CallFrame::lua)?;
5173 let proto = f.closure.proto;
5174 if proto.source.as_bytes().is_empty() {
5175 return Some(self.stripped_prefix());
5176 }
5177 if proto.lines.is_empty() {
5178 return None;
5179 }
5180 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5181 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5182 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5183 let display = crate::vm::lib_debug::chunk_id(raw);
5184 let src = String::from_utf8_lossy(&display).into_owned();
5185 Some(format!("{src}:{line}: "))
5186 }
5187
5188 /// PUC `luaG_addinfo` prefix for a stripped chunk. 5.5 substitutes "=?"
5189 /// for the source and renders the line as "?" (so the prefix reads
5190 /// `?:?: `). 5.4 and below leave the source NULL ("?") and use the raw
5191 /// `getfuncline = -1`, so the prefix reads `?:-1: ` (5.4 errors.lua :282
5192 /// matches `^%?:%-1:`).
5193 fn stripped_prefix(&self) -> String {
5194 if self.version >= crate::version::LuaVersion::Lua55 {
5195 "?:?: ".to_string()
5196 } else {
5197 "?:-1: ".to_string()
5198 }
5199 }
5200
5201 /// Position prefix of the Lua frame `level` steps up from the running C
5202 /// function (PUC `luaL_where(L, level)`): `level == 1` is the immediate
5203 /// Lua caller (skipping Cont/C-boundary frames the way `dbg_frame` does),
5204 /// `level == 2` its caller, and so on. Used by `error(msg, level)` so the
5205 /// caller's frame is reported even across pcall/xpcall continuations.
5206 pub(crate) fn position_prefix_at_level(&self, level: i64) -> Option<String> {
5207 let fi = match self.dbg_frame(level)? {
5208 DbgKind::Lua(fi) => fi,
5209 DbgKind::C(_) | DbgKind::Tail(_) => return None,
5210 };
5211 let f = self.frames[fi].lua()?;
5212 let proto = f.closure.proto;
5213 // PUC luaG_addinfo: a stripped chunk has no source — see
5214 // `stripped_prefix` for the per-version wording (5.5 vs ≤5.4).
5215 if proto.source.as_bytes().is_empty() {
5216 return Some(self.stripped_prefix());
5217 }
5218 // a stripped chunk carries no per-instruction line info
5219 if proto.lines.is_empty() {
5220 return None;
5221 }
5222 let line = proto.lines[(f.pc as usize).saturating_sub(1).min(proto.lines.len() - 1)];
5223 // PUC `luaG_addinfo` renders source via `luaO_chunkid` (LUA_IDSIZE=60),
5224 // not the raw chunk name — handles `@file`/`=name` sigils + truncation.
5225 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5226 let raw = unsafe { crate::runtime::string::bytes_of(proto.source.as_ptr()) };
5227 let display = crate::vm::lib_debug::chunk_id(raw);
5228 let src = String::from_utf8_lossy(&display).into_owned();
5229 Some(format!("{src}:{line}: "))
5230 }
5231
5232 // ---- the interpreter ----
5233
5234 fn exec(&mut self) -> Result<Vec<Value>, LuaError> {
5235 let entry_depth = self.frames.len();
5236 self.exec_with(entry_depth)
5237 }
5238
5239 /// Run from the current top frame down to (but not past) `entry_depth`
5240 /// frames. Coroutine driving passes `entry_depth = 1` so the whole thread
5241 /// runs to completion or a yield.
5242 /// v1.1 B10 Stage 1 — resume the dispatcher from the saved
5243 /// `entry_depth` (captured pre-yield by `drive_one`). Called by
5244 /// `EvalFuture::poll` on every poll after the first to walk the
5245 /// existing call frames until the next `BudgetExhausted` or
5246 /// terminal `Ok`/`Err`. Not a public-API surface in Stage 1; the
5247 /// embedder reaches it through `Vm::eval_async`.
5248 pub(crate) fn exec_with_async(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5249 self.exec_with(entry_depth)
5250 }
5251
5252 fn exec_with(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5253 loop {
5254 let r = self.run(entry_depth);
5255 if r.is_err()
5256 && (self.yielding.is_some()
5257 || self.terminating.is_some()
5258 || self.host_yield_pending
5259 || self.pending_async_native_fut.is_some())
5260 {
5261 // a `coroutine.yield` is in flight: keep the frames intact (they
5262 // are the suspended coroutine's saved state) and propagate to
5263 // resume. A self-close termination propagates the same way, so a
5264 // protecting pcall on the way out cannot catch (unwind) it.
5265 // v1.1 B10 — `host_yield_pending` is the async-mode
5266 // analogue: the sentinel must reach `drive_one` without
5267 // a protecting `pcall` swallowing it.
5268 return r;
5269 }
5270 match r {
5271 Ok(vals) => return Ok(vals),
5272 // unwind toward `entry_depth`. A protecting pcall/xpcall
5273 // continuation caught along the way turns the error into
5274 // `false, msg` and the loop resumes running its caller; an
5275 // uncaught error propagates out.
5276 Err(e) => match self.unwind(e.0, entry_depth) {
5277 Unwound::Caught => continue,
5278 Unwound::CaughtReturn(vals) => return Ok(vals),
5279 Unwound::Propagated(err) => return Err(err),
5280 },
5281 }
5282 }
5283 }
5284
5285 /// Unwind the call stack from the error point toward `entry_depth`, running
5286 /// `__close` handlers on each Lua frame. Stops at the first pcall/xpcall
5287 /// continuation frame at/above `entry_depth` (the error is *caught*: its
5288 /// slot receives `false, msg`); if none is reached, the error propagates.
5289 fn unwind(&mut self, mut err: Value, entry_depth: usize) -> Unwound {
5290 // PUC 5.5 `luaG_errormsg` substitutes "<no error object>" when the
5291 // error object is nil — so `pcall(function() error(nil) end)` returns
5292 // that string instead of nil, and `assert(nil, nil)` (whose path
5293 // throws nil via `lua_settop(L, 1)`) also surfaces a string. Earlier
5294 // dialects (5.4 and below) keep the nil — 5.4 errors.lua :49 asserts
5295 // `doit("error()") == nil` and luna would fail that if it always
5296 // substituted. luna's native `error()` still does its own conversion
5297 // for direct callers.
5298 if matches!(err, Value::Nil) && self.version >= crate::version::LuaVersion::Lua55 {
5299 err = Value::Str(self.heap.intern(b"<no error object>"));
5300 }
5301 // The protected call runs in-place among the caller frames' registers,
5302 // so truncating the failed frames here cuts into caller windows below
5303 // the catcher. Snapshot the live length: at the error point the stack
5304 // already spans every surviving frame's window, so restoring it after a
5305 // catch reinstates them all (the reclaimed slots above are dead temps).
5306 // PUC handles overflow recovery via a separate EXTRA_STACK reserve;
5307 // we instead clamp the restore to the catcher's caller window when the
5308 // error point was at the stack limit (cause: the next `call_value_impl`
5309 // picks `func_slot = stack.len()` which would otherwise re-overflow).
5310 let saved_len = self.stack.len();
5311 // Snapshot the traceback at the error point — before any frame is
5312 // popped — so an `xpcall` msgh (which runs after the failed frames are
5313 // gone) can still describe the error site. The handler frame about to
5314 // be popped (e.g. a `__close` handler with `tm = Some("close")`) is
5315 // visible here; once popped, `debug.traceback` would miss it.
5316 // PUC instead runs msgh with the failed stack intact (luaG_errormsg);
5317 // but doing so when the stack is near `MAX_LUA_STACK` (true overflow
5318 // recovery — locals.lua:659) re-overflows. Capture-once propagates
5319 // through nested unwinds (inner→outer) without re-running msgh.
5320 if self.error_traceback.is_none() {
5321 self.error_traceback = Some(self.traceback_bytes(1));
5322 }
5323 while self.frames.len() >= entry_depth {
5324 match *self.frames.last().expect("frame") {
5325 // a yieldable-metamethod continuation does not catch: discard the
5326 // abandoned instruction and keep unwinding (PUC drops the partial
5327 // op on error).
5328 CallFrame::Cont(NativeCont {
5329 kind: ContKind::Meta(mc),
5330 func_slot,
5331 ..
5332 }) => {
5333 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5334 self.stack.truncate(func_slot as usize);
5335 self.top = mc.saved_top.min(func_slot);
5336 self.tbc.retain(|&s| s < func_slot);
5337 }
5338 // a __pairs continuation does not catch either: an error inside
5339 // the metamethod propagates past `pairs`.
5340 CallFrame::Cont(NativeCont {
5341 kind: ContKind::Pairs,
5342 func_slot,
5343 ..
5344 }) => {
5345 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5346 self.stack.truncate(func_slot as usize);
5347 self.top = func_slot;
5348 self.tbc.retain(|&s| s < func_slot);
5349 }
5350 // a __close continuation does not catch: drop the half-run
5351 // handler's window, then continue the close yieldably with
5352 // the new error threaded as `pending`. Preserve `cc.after`
5353 // verbatim — `Return`/`Block` originating from an aborting
5354 // OP_Return/OP_Close will be short-circuited by
5355 // `finish_close_after` (pending propagates as Err); a
5356 // `ResumeUnwind` originated by our own Lua-frame handler
5357 // must keep its deferred frame-pop semantics so that frame
5358 // is not orphaned. If a fresh handler yields, `drive_close`
5359 // pushes another `Cont::Close` and we return `Caught` so
5360 // `exec_with` re-enters the run loop.
5361 CallFrame::Cont(NativeCont {
5362 kind: ContKind::Close(cc),
5363 func_slot,
5364 ..
5365 }) => {
5366 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5367 self.stack.truncate(func_slot as usize);
5368 self.top = func_slot;
5369 self.tbc.retain(|&s| s < func_slot);
5370 match self.drive_close(cc.from, Some(err), cc.after, entry_depth) {
5371 Ok(Some(_)) => {
5372 unreachable!(
5373 "Block / Return / ResumeUnwind never return host values mid-unwind"
5374 )
5375 }
5376 Ok(None) => return Unwound::Caught,
5377 Err(e) => {
5378 err = e.0;
5379 continue;
5380 }
5381 }
5382 }
5383 CallFrame::Cont(nc) => {
5384 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5385 self.pcall_depth -= 1;
5386 let result = match nc.kind {
5387 ContKind::Pcall => err,
5388 ContKind::Xpcall { handler } => {
5389 // PUC keeps `L->errfunc` set across the handler's
5390 // call: `luaG_errormsg` re-fires the handler when
5391 // it raises (so `xpcall(error, err, 170)` lets the
5392 // chain bottom out at err(0) → "END"). luna mirrors
5393 // that by looping until the handler returns or
5394 // luna's `iters` cap forces termination.
5395 //
5396 // The cap models PUC's nCcalls soft window
5397 // (MAXCCALLS/10*11): once tripped, `stackerror`
5398 // raises "C stack overflow" via `luaG_runerror`
5399 // which itself re-enters `luaG_errormsg`, so the
5400 // handler runs once more with that string and
5401 // naturally returns it (errors.lua :637 at N=300).
5402 // We count iterations per Cont::Xpcall rather than
5403 // a global counter — nested xpcalls each get their
5404 // own budget, matching the way PUC's stack frames
5405 // accumulate per dispatch path.
5406 const MSGH_CAP: u32 = MAX_C_DEPTH;
5407 let mut cur_err = err;
5408 let mut iters: u32 = 0;
5409 let mut capped = false;
5410 loop {
5411 if iters >= MSGH_CAP && !capped {
5412 cur_err = Value::Str(self.heap.intern(b"C stack overflow"));
5413 capped = true;
5414 }
5415 iters += 1;
5416 self.msgh_depth += 1;
5417 let r = self.call_value(handler, &[cur_err]);
5418 self.msgh_depth -= 1;
5419 match r {
5420 Ok(hr) => {
5421 break hr.first().copied().unwrap_or(Value::Nil);
5422 }
5423 Err(_) if capped => {
5424 // the handler still errored on the
5425 // synthesized "C stack overflow"; fall
5426 // back to PUC's LUA_ERRERR string.
5427 break Value::Str(
5428 self.heap.intern(b"error in error handling"),
5429 );
5430 }
5431 Err(e) => {
5432 cur_err = e.0;
5433 }
5434 }
5435 }
5436 }
5437 ContKind::Meta(_) | ContKind::Pairs | ContKind::Close(_) => {
5438 unreachable!("Meta/Pairs/Close cont handled above")
5439 }
5440 };
5441 // the error has been caught (pcall/xpcall): the captured
5442 // traceback was for that error and is no longer in flight.
5443 self.error_traceback = None;
5444 let fs = nc.func_slot as usize;
5445 if self.stack.len() < fs + 2 {
5446 self.stack.resize(fs + 2, Value::Nil);
5447 }
5448 self.stack[fs] = Value::Bool(false);
5449 self.stack[fs + 1] = result;
5450 self.top = nc.func_slot + 2;
5451 self.tbc.retain(|&s| s < nc.func_slot);
5452 if self.frames.len() < entry_depth {
5453 return Unwound::CaughtReturn(self.take_results(nc.func_slot));
5454 }
5455 self.finish_results(nc.func_slot, 2, nc.nresults);
5456 // reinstate the caller windows the unwind truncated into,
5457 // clamped to the catcher's caller window + a `MIN_STACK`
5458 // reserve. The clamp is a no-op for normal pcall catches
5459 // (saved_len lies within the caller's max_stack window),
5460 // and prevents the stack from staying near `MAX_LUA_STACK`
5461 // after an overflow-recovery catch — which would make the
5462 // next `call_value_impl` (e.g. a `__close` in the catcher's
5463 // errorh, locals.lua:659) pick `func_slot = stack.len()`
5464 // above the limit and re-overflow.
5465 // Restore the caller's full register window: opcodes
5466 // index it directly. The cap covers caller's base +
5467 // `max_stack` + a small reserve. We always resize to
5468 // exactly this window — previously this clamped
5469 // `saved_len` from above to prevent staying near
5470 // `MAX_LUA_STACK` after an overflow-recovery catch, and
5471 // a yieldable-unwind re-entry adds the dual case where
5472 // `saved_len` is *below* the window (a prior
5473 // `ResumeUnwind` truncated). Using the window directly
5474 // covers both.
5475 let restore = self
5476 .frames
5477 .iter()
5478 .rev()
5479 .find_map(CallFrame::lua)
5480 .map(|c| (c.base + c.closure.proto.max_stack as u32) as usize + 256)
5481 .unwrap_or(saved_len);
5482 if self.stack.len() < restore {
5483 self.stack.resize(restore, Value::Nil);
5484 } else if self.stack.len() > restore {
5485 self.stack.truncate(restore);
5486 }
5487 return Unwound::Caught;
5488 }
5489 CallFrame::Lua(f) => {
5490 // Yieldable error-unwind close, PUC luaG_errormsg shape:
5491 // (1) pop the Lua frame immediately so each `__close`
5492 // handler runs at the C boundary above — `debug.getinfo`
5493 // sees the next outer Lua frame's call site (typically
5494 // `pcall`), not this aborting function (locals.lua:480).
5495 // (2) drive the close yieldably with
5496 // `AfterClose::ResumeUnwind { func_slot, err }`; on drain
5497 // it truncates to `func_slot` and re-raises (letting a
5498 // handler-raised error win over `err`). If a handler
5499 // yields, `drive_close` pushes `Cont::Close` and we
5500 // return `Caught` so `exec_with` re-enters the run loop;
5501 // a synchronous drain returns Err exactly as the old
5502 // path did.
5503 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5504 let after = AfterClose::ResumeUnwind {
5505 func_slot: f.func_slot,
5506 err,
5507 };
5508 match self.begin_close(f.base, Some(err), after, entry_depth) {
5509 Ok(Some(_)) => {
5510 unreachable!("ResumeUnwind never returns host values")
5511 }
5512 Ok(None) => return Unwound::Caught,
5513 Err(e) => {
5514 err = e.0;
5515 continue;
5516 }
5517 }
5518 }
5519 }
5520 }
5521 Unwound::Propagated(LuaError(err))
5522 }
5523
5524 fn run(&mut self, entry_depth: usize) -> Result<Vec<Value>, LuaError> {
5525 loop {
5526 // Fast-path slow-check gate: most embedders run with both
5527 // `instr_budget` and `mem_cap` as None, so a single combined
5528 // is_some test lets the hot loop skip both branches with one
5529 // load + branch instead of two.
5530 if self.instr_budget.is_some() || self.heap.mem_cap.is_some() {
5531 if let Some(b) = self.instr_budget.as_mut() {
5532 *b -= 1;
5533 if *b <= 0 {
5534 self.instr_budget = None;
5535 // v1.1 B10 Stage 1 — async-mode cooperative
5536 // yield. Set a sentinel flag so `exec_with`
5537 // propagates the Err without `unwind` running
5538 // (mirroring the `yielding.is_some()` path),
5539 // and `call_value_impl` preserves the call
5540 // frames for the next `poll`. Translation back
5541 // to `DispatchOutcome::BudgetExhausted` happens
5542 // in `drive_one`. The Err value itself is
5543 // `Value::Nil` — a pure sentinel, never seen by
5544 // user code.
5545 if self.async_mode {
5546 self.host_yield_pending = true;
5547 return Err(LuaError(Value::Nil));
5548 }
5549 // B6: classify the trip so embedders can
5550 // distinguish budget exhaustion from a
5551 // generic Runtime error and retry / give up
5552 // accordingly.
5553 self.last_error_kind = crate::vm::error::LuaErrorKind::InstrBudget;
5554 let s = Value::Str(self.heap.intern(b"instruction budget exceeded"));
5555 return Err(LuaError(s));
5556 }
5557 }
5558 if let Some(cap) = self.heap.mem_cap
5559 && self.heap.bytes() > cap
5560 {
5561 // First try a full collect — embedders set tight caps
5562 // and the overshoot may be reclaimable (closures kept
5563 // by short-lived frames, intermediate strings). Only
5564 // disarm + raise if the cap is still breached after
5565 // collection. PUC's `LUA_GCEMERGENCY` path matches.
5566 //
5567 // v2.2 UAF-B fix: the historical `gc_top = self.top`
5568 // under-rooted a Lua-level `a[i] = i` loop's `a`
5569 // table — `a` sits at a slot above the multi-result
5570 // `self.top`, so cap-fire collect swept `a`'s
5571 // internal buckets and the next bytecode read them
5572 // → heap-use-after-free in `Table::try_set_existing`.
5573 // Use `self.stack.len()` here (full over-root) — the
5574 // cap-fire path is rare + a memory cap takes priority
5575 // over weak-table precision (the fire-once semantics
5576 // means a wrong-collected weak ref is recoverable;
5577 // a UAF in a table mutation is not).
5578 self.gc_top = self.stack.len() as u32;
5579 self.collect_garbage();
5580 if self.heap.bytes() > cap {
5581 self.heap.mem_cap = None;
5582 let s = Value::Str(self.heap.intern(b"memory cap exceeded"));
5583 return Err(LuaError(s));
5584 }
5585 }
5586 }
5587 // Single combined frame fetch: continuation arm OR Lua arm. Saves
5588 // a second `self.frames.last()` slice access vs the prior split
5589 // form (LLVM doesn't always CSE these across the cont branch).
5590 // A continuation frame on top means the call it protected just
5591 // delivered its results — wrap as `true, results…` and hand to
5592 // the pcall/xpcall caller. The error path is handled by `unwind`;
5593 // this branch is only reached on success/resume completion.
5594 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
5595 let frame_peek = unsafe { self.frames.last().unwrap_unchecked() };
5596 if let &CallFrame::Cont(nc) = frame_peek {
5597 // a yieldable metamethod returned: complete the interrupted
5598 // instruction (PUC luaV_finishOp) and resume the running frame.
5599 if let ContKind::Meta(mc) = nc.kind {
5600 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5601 let result = if self.top > nc.func_slot {
5602 self.stack[nc.func_slot as usize]
5603 } else {
5604 Value::Nil
5605 };
5606 self.stack.truncate(nc.func_slot as usize);
5607 self.top = mc.saved_top;
5608 self.finish_meta(mc.action, result)?;
5609 continue;
5610 }
5611 // a __close handler returned successfully: discard its
5612 // results, restore `top` to the slot the handler was called
5613 // at (the surrounding frame's register window above this slot
5614 // must stay alloc'd — never truncate the underlying stack),
5615 // then continue the close chain (next slot, or fire
5616 // AfterClose). When the close ends an entry activation,
5617 // drive_close hands the results up to exec_with directly.
5618 if let ContKind::Close(cc) = nc.kind {
5619 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5620 self.top = nc.func_slot;
5621 if let Some(vals) =
5622 self.drive_close(cc.from, cc.pending, cc.after, entry_depth)?
5623 {
5624 return Ok(vals);
5625 }
5626 continue;
5627 }
5628 // __pairs returned: normalize its results to exactly four
5629 // (iterator, state, control, closing) at pairs's slot, where
5630 // the metamethod was called, and hand them to pairs's caller.
5631 if let ContKind::Pairs = nc.kind {
5632 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5633 let total = 4u32;
5634 let need = (nc.func_slot + total) as usize;
5635 if self.stack.len() < need {
5636 self.stack.resize(need, Value::Nil);
5637 }
5638 for s in self.top..(nc.func_slot + total) {
5639 self.stack[s as usize] = Value::Nil;
5640 }
5641 self.top = nc.func_slot + total;
5642 if self.frames.len() < entry_depth {
5643 return Ok(self.take_results(nc.func_slot));
5644 }
5645 self.finish_results(nc.func_slot, total, nc.nresults);
5646 continue;
5647 }
5648 frames_pop_sync(&mut self.frames, &mut self.frames_top);
5649 self.pcall_depth -= 1;
5650 // f's results sit at nc.func_slot+1.. (f was called one slot
5651 // above the continuation), so writing `true` at the slot makes
5652 // `true, results…` already contiguous.
5653 let nret = self.top - (nc.func_slot + 1);
5654 self.stack[nc.func_slot as usize] = Value::Bool(true);
5655 let total = 1 + nret;
5656 self.top = nc.func_slot + total;
5657 if self.frames.len() < entry_depth {
5658 return Ok(self.take_results(nc.func_slot));
5659 }
5660 self.finish_results(nc.func_slot, total, nc.nresults);
5661 continue;
5662 }
5663 // GC runs only at the allocation safe points below (PUC's
5664 // `luaC_checkGC` sites), each with a precise `gc_top`; the loop head
5665 // no longer collects, so a stale full-window `gc_top` cannot leak in.
5666 //
5667 // Hot-path frame fetch: the Cont arm above continues the loop,
5668 // so reaching here means `frame_peek` is the Lua frame. Reuse it
5669 // rather than re-fetching `self.frames.last()`.
5670 let f = match frame_peek {
5671 CallFrame::Lua(f) => f,
5672 _ => unreachable!("Cont frame survived the dispatch loop head"),
5673 };
5674 let cl = f.closure;
5675 let base = f.base;
5676 let func_slot = f.func_slot;
5677 let n_varargs = f.n_varargs;
5678 let pc = f.pc;
5679 let oldpc = f.hook_oldpc;
5680
5681 // SAFETY: `pc` is bounded by the compiler against `proto.code.len()`
5682 // — every branch / call op only sets `pc` to a valid index, and
5683 // function entry initialises pc=0 with a non-empty body. PUC's
5684 // `vmfetch` uses the equivalent unchecked load.
5685 let inst = unsafe { *cl.proto.code.get_unchecked(pc as usize) };
5686
5687 // P12-S1.C/D — trace recording append + close detection.
5688 // Gated on `trace_jit_enabled` + `active_trace.is_some()`
5689 // so default dispatch keeps a single not-taken branch.
5690 //
5691 // - At the head PC with a non-empty record, the trace has
5692 // looped back to its start: mark `closed = true` and
5693 // take the record (S2 will compile + cache).
5694 // - Otherwise, capture the op. If the record overflows
5695 // MAX_TRACE_LEN, abort by dropping it.
5696 if self.jit.trace_enabled
5697 && let Some(_rec) = self.jit.active_trace.as_mut()
5698 {
5699 // P12-S4 — depth tracking. The trace head's frame is
5700 // at index `recording_frame_base`; every Op::Call that
5701 // pushes a new frame bumps the live depth, every
5702 // Op::Return that pops one decrements it.
5703 //
5704 // **Three clean-close conditions** (P12-S4-step4a):
5705 // - `at_head`: cur_depth == 0 AND about-to-execute the
5706 // trace's head_pc on its head_proto (loop closed back
5707 // to start). Same for loop-triggered and call-triggered
5708 // traces — step4a unified the gating so call-triggered
5709 // no longer closes on the first re-entry (that left
5710 // fib's body at 7 depth=0 ops; step4a lets it inline
5711 // up to MAX_INLINE_DEPTH levels before any close).
5712 // - `returned_past_head`: trace head's frame is gone
5713 // (callee returned past it, or the call-trigger
5714 // started a recording inside a callee that has now
5715 // returned). Whatever ops were recorded form the
5716 // trace body; the lowerer treats the partial trace
5717 // the same as InlineAbort (dispatchable=false until
5718 // step4b's frame materialization lands).
5719 // - `depth_cap_hit`: cur_depth > MAX_INLINE_DEPTH.
5720 // Recording any deeper would just bloat the IR; close
5721 // with the body we have. Lowerer's existing length
5722 // gate + InlineAbort path handles short bodies.
5723 let returned_past_head = self.frames.len() <= self.jit.recording_frame_base;
5724 let cur_depth = if returned_past_head {
5725 0
5726 } else {
5727 self.frames.len() - 1 - self.jit.recording_frame_base
5728 };
5729 let depth_cap_hit = cur_depth > crate::jit::trace::MAX_INLINE_DEPTH as usize;
5730 let rec = self.jit.active_trace.as_mut().expect("just checked Some");
5731 let at_head_loop = cur_depth == 0
5732 && !rec.ops.is_empty()
5733 && !returned_past_head
5734 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5735 && pc == rec.head_pc;
5736 // P16-A — self-link cycle catch (mirrors LuaJIT's
5737 // `check_call_unroll` at `lj_record.c:1869`). Trips when:
5738 // 1. We're about to execute the head_pc on head_proto
5739 // at depth > 0 (we're re-entering the trace head
5740 // from inside an inlined recursion level — UpRec).
5741 // 2. The count of ancestor frames in the recording
5742 // window that share `head_proto` exceeds
5743 // [`RECUNROLL_THRESHOLD`] (default 2).
5744 // For fib(N): head_pc=0, head_proto=fib. After 2 inline
5745 // recursion levels are captured, the recorder enters
5746 // the 3rd nested fib frame, sees cur_depth=3 > 2, and
5747 // trips this catch — closing with `SelfRecKind::UpRec`.
5748 // The lowerer's `TraceEnd::SelfLink` tail emits the
5749 // bump-base + branch-to-self loop body.
5750 //
5751 // TailRec vs UpRec: LJ distinguishes via
5752 // `framedepth + retdepth == 0`. luna doesn't track
5753 // retdepth separately; cur_depth == 0 with a non-empty
5754 // call chain in tail position is rare (would require
5755 // explicit Lua TCO). We use cur_depth > 0 as the UpRec
5756 // condition (fib's case); cur_depth == 0 with positive
5757 // ancestor count would route to TailRec, but luna's
5758 // recorder doesn't currently produce that shape because
5759 // tail-call elision pops the caller frame and we'd
5760 // hit `at_head_loop` instead.
5761 let self_link_trip: Option<crate::jit::trace::SelfRecKind> = {
5762 if self.jit.p16_self_link_enabled
5763 && !returned_past_head
5764 && std::ptr::eq(cl.proto.as_ptr(), rec.head_proto.as_ptr())
5765 && pc == rec.head_pc
5766 && cur_depth > 0
5767 {
5768 // Count ancestor frames sharing head_proto.
5769 // self.frames[recording_frame_base..] currently
5770 // includes the just-pushed frame at the top
5771 // (the one about to execute head_pc). Ancestors
5772 // = the slice excluding the top frame.
5773 let head_proto_ptr = rec.head_proto.as_ptr();
5774 let last_idx = self.frames.len() - 1;
5775 let mut count = 0usize;
5776 for i in self.jit.recording_frame_base..last_idx {
5777 if let CallFrame::Lua(f) = &self.frames[i]
5778 && std::ptr::eq(f.closure.proto.as_ptr(), head_proto_ptr)
5779 {
5780 count += 1;
5781 }
5782 }
5783 if count > crate::jit::trace::RECUNROLL_THRESHOLD {
5784 // cur_depth > 0 → UpRec (fib pattern).
5785 // cur_depth == 0 wouldn't reach this arm.
5786 Some(crate::jit::trace::SelfRecKind::UpRec)
5787 } else {
5788 None
5789 }
5790 } else {
5791 None
5792 }
5793 };
5794 if let Some(kind) = self_link_trip {
5795 // v2.0 Track-R R3.3+ sub-0 — SelfLink relax for
5796 // self-recursive patterns at frame depth >= 2.
5797 //
5798 // Pre sub-0: a SelfLink trip at the head_pc re-entry
5799 // unconditionally stamped `self_link_kind`. The
5800 // R3a `downrec_close` marker can only fire from the
5801 // depth>0 Op::Return path (`rec.retfs` chain),
5802 // which never reaches the recorder for fib(28)-like
5803 // shapes that hit the SelfLink cycle catch BEFORE
5804 // any base-case Return — leaving `downrec_close`
5805 // None and routing the trace through R1's safe
5806 // `dispatchable=false` `"self-link-retf-r1"` path
5807 // (audit measured `trace_dispatched = 0`).
5808 //
5809 // Sub-0 lift: when the SelfLink trip fires AND
5810 // `cur_depth >= 2` (the count > RECUNROLL_THRESHOLD
5811 // gate already requires this — kept explicit as a
5812 // safety floor), route the close through `downrec_
5813 // close` INSTEAD of `self_link_kind`. The recorder
5814 // synthesises the close marker from the most
5815 // recent Op::Call at depth `cur_depth - 1`:
5816 // - `return_pc` = `call.pc + 1` (caller's resume
5817 // PC after the recursive call returns; mirror
5818 // of R3a's `caller_pc` derivation at the
5819 // depth>0 Op::Return capture path below).
5820 // - `target_proto` = `call.proto` (caller's
5821 // proto; equals `rec.head_proto` for self-
5822 // recursion).
5823 // - `depth_delta` = `1` (today's recorder always
5824 // unrolls one level; R3a uses the same
5825 // constant).
5826 //
5827 // The lowerer's `end_idx` picker (`trace.rs:3729`)
5828 // routes through `TraceEnd::DownRec` ahead of the
5829 // `self_link_kind` arm; the R3b/R3d lowerer arm
5830 // emits the stitch-sentinel + caller-pc-guard
5831 // scaffold. Single-candidate guard chain (sub-0's
5832 // recorder produces 1 caller_pc candidate because
5833 // `rec.retfs` is empty) keeps `dispatchable=false`
5834 // + `"downrec-stitch-pending"` label (per R3d's
5835 // `multi_way_candidate_count >= 2` gate at
5836 // `trace.rs:7385`). Net behaviour: trace compiles
5837 // under DownRec routing; interp runs the
5838 // recursion naturally → result 317811.
5839 //
5840 // The `cur_depth >= 2` gate is automatically
5841 // satisfied by the count > RECUNROLL_THRESHOLD=2
5842 // trip condition (3 ancestor frames sharing
5843 // head_proto implies cur_depth >= 3), kept
5844 // explicit so a future RECUNROLL_THRESHOLD tweak
5845 // doesn't silently flip shallow-recursion
5846 // shapes (cur_depth == 1) onto the DownRec arm.
5847 //
5848 // R3.3+ sub-1/2/3/4 will replace the depth-baked
5849 // op_offsets[] addressing with runtime base_var
5850 // threading so the trace's recorded body is
5851 // depth-relative and the DownRec dispatch
5852 // becomes wall-clock-positive. Sub-0 is the
5853 // routing scaffold; it does not aim for gain.
5854 let _ = kind;
5855 let relaxed_to_downrec = cur_depth >= 2 && rec.downrec_close.is_none() && {
5856 let caller_depth_u8 = (cur_depth - 1) as u8;
5857 if let Some(call_op) = rec.ops.iter().rev().find(|r| {
5858 r.inline_depth == caller_depth_u8
5859 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
5860 }) {
5861 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
5862 return_pc: call_op.pc + 1,
5863 target_proto: call_op.proto,
5864 depth_delta: 1,
5865 });
5866 true
5867 } else {
5868 false
5869 }
5870 };
5871 if relaxed_to_downrec {
5872 // R2 close-cause taxonomy: tag the lift so
5873 // probes can tally the fire rate. Mirrors
5874 // R3a's `"downrec-restart"` bump for the
5875 // depth>0 Op::Return path (different trip
5876 // origin, same downstream routing). The
5877 // existing `"self-link-retf-r1"` label still
5878 // fires for trips that DON'T relax (no
5879 // candidate Op::Call ancestor in rec.ops, or
5880 // cur_depth < 2) via the lowerer's
5881 // dispatch_off_reason mirror at the close
5882 // handler — kept as a regression safety net.
5883 self.jit
5884 .counters
5885 .bump_close_cause("selflink-yields-to-downrec");
5886 } else {
5887 rec.self_link_kind = Some(kind);
5888 }
5889 }
5890 let should_close =
5891 at_head_loop || returned_past_head || depth_cap_hit || self_link_trip.is_some();
5892 if should_close {
5893 // P13-S13-H — long-trace bias: a call-triggered
5894 // recording that closed with a very short body
5895 // (fib base case: `Lt`/`Jmp`/`Return1` = 3 ops,
5896 // binary_trees `make(0)`: 4 ops) is pathological.
5897 // Compiling + caching it pins `Proto.traces` to a
5898 // trace that the length gate will refuse to
5899 // dispatch (per `MIN_DISPATCHABLE_TRUNC_BODY_FLOOR
5900 // = 40`), AND blocks the back-edge / longer-call
5901 // path from re-recording the same head_pc (the
5902 // dedup `already_cached` check below short-
5903 // circuits). The fix: discard the short call-
5904 // triggered recording WITHOUT caching, and bias
5905 // the proto's `call_hot_count` back to
5906 // `THRESHOLD - HOT_RETRY_WINDOW` so the next
5907 // sequence of calls retries the trigger at a
5908 // different (hopefully deeper) recursion point.
5909 //
5910 // Back-edge triggered traces are exempt — a
5911 // tight numeric-for loop's body is legitimately
5912 // 3 ops (`Add`, ForLoop) and DOES dispatch
5913 // usefully when re-entered many times.
5914 // P13-S13-H — coverage heuristic to detect
5915 // pathologically partial call-triggered traces:
5916 // for self-recursive / branchy protos like
5917 // `fib` (~17 bytecode ops) or
5918 // `binary_trees.make` (~26 ops), the recorder
5919 // can fire at a BASE-case entry (`fib(0)` or
5920 // `make(0)`) producing a 3–4 op trace that
5921 // covers a tiny fraction of the proto's code.
5922 // That trace is doomed by the length gate
5923 // post-compile AND blocks any longer follow-up
5924 // (the dedup `already_cached` check below). The
5925 // fix: discard call-triggered closes where
5926 // `rec.ops.len() * 2 < head_proto.code.len()`
5927 // (less than half the proto's bytecode), so the
5928 // back-edge / longer call path can take over.
5929 //
5930 // Why coverage > raw length:protos with
5931 // intrinsically short bodies (closure
5932 // factories: `Closure + Return1` = 2 ops,
5933 // simple wrappers: `LoadI + Return1` = 2 ops)
5934 // record 100% coverage even at length 2 — those
5935 // ARE legitimately short and the closure /
5936 // sunk-emit lowering paths (S7-A / S9-C) make
5937 // them worth compiling. The heuristic admits
5938 // them. fib's `[Lt, Jmp, Return1]` (3 of ~17)
5939 // and make's `[Lt, Jmp, LoadI, Return1]` (4 of
5940 // ~26) get discarded.
5941 //
5942 // Back-edge triggered traces are unaffected —
5943 // a tight numeric-for body legitimately covers
5944 // 3 of ~3 proto ops it can dispatch from
5945 // (`Add + ForLoop`) and the recorder fires on
5946 // the back-edge, not call entry.
5947 //
5948 // `call_hot_count` is intentionally NOT reset
5949 // (an earlier draft tried `THRESHOLD - 32` but
5950 // caused active_trace contention with the
5951 // outer back-edge trigger — see
5952 // setlist_b_zero_with_call_c_zero_sunk_emits).
5953 // We give up on dispatching the pathological
5954 // shape on the same proto; the back-edge or a
5955 // longer call path on a deeper recursion point
5956 // can still record + cache a real trace.
5957 let proto_code_len = rec.head_proto.code.len();
5958 let is_partial_coverage = rec.ops.len() * 2 < proto_code_len;
5959 // P13-S13-I — per-Proto discard cap. The S13-H
5960 // relaxed trigger condition (`c >= THRESHOLD &&
5961 // !already_cached`) means a Proto whose every
5962 // recording is partial-coverage will re-fire the
5963 // trigger every call indefinitely (1500+ in
5964 // `binary_trees`-pattern test). The cap stops
5965 // discarding after `MAX_DISCARDS_PER_PROTO` —
5966 // the next close falls through to compile (even
5967 // if partial), caches the trace, and the
5968 // `already_cached` short-circuit kills the
5969 // storm. Dispatch may still be refused
5970 // post-compile (length gate), but the recorder
5971 // stops churning.
5972 const MAX_DISCARDS_PER_PROTO: u32 = 5;
5973 let prior_discards = rec.head_proto.trace_discard_count.get();
5974 let cap_reached = prior_discards >= MAX_DISCARDS_PER_PROTO;
5975 // P13-S13-K — flip the `gave_up` flag the
5976 // moment cap is reached (BEFORE the close-
5977 // dispatching branch below). The trigger gates
5978 // short-circuit on this flag, skipping the
5979 // RefCell + linear `already_cached` scan on
5980 // every subsequent call to this Proto. Useful
5981 // for `binary_trees_pattern`-class loads where
5982 // a single Proto sees ~20k calls post-cap.
5983 if cap_reached
5984 && rec.is_call_triggered
5985 && is_partial_coverage
5986 && !rec.head_proto.trace_gave_up.get()
5987 {
5988 rec.head_proto.trace_gave_up.set(true);
5989 }
5990 if rec.is_call_triggered && is_partial_coverage && !cap_reached {
5991 // Tally as closed (for visibility) but DROP
5992 // without compile/cache. Use the existing
5993 // closed-lens accumulator so probes can
5994 // observe the discarded shape.
5995 // P13-S13-I — bump discard count BEFORE
5996 // dropping the recording so the next
5997 // close sees the updated counter.
5998 rec.head_proto.trace_discard_count.set(prior_discards + 1);
5999 self.jit.counters.closed += 1;
6000 self.jit
6001 .counters
6002 .closed_lens
6003 .push((rec.is_call_triggered, rec.ops.len()));
6004 // v2.0 Track-R R2 — partial-coverage discard
6005 // close path. Pre-R2 this site bumped `closed`
6006 // + `closed_lens` (visibility) but no per-
6007 // reason label, so probes couldn't separate a
6008 // real successful close from a discard tally.
6009 // Tag explicitly to make the recorder-side
6010 // close-cause taxonomy single-source.
6011 self.jit
6012 .counters
6013 .bump_close_cause("partial-coverage-discard");
6014 self.jit.active_trace = None;
6015 // Continue with interp loop — don't
6016 // fall through to compile path.
6017 // The op at `pc` hasn't dispatched yet;
6018 // the outer loop iteration handles it.
6019 } else {
6020 rec.closed = true;
6021 // P12-S2.C — detach the closed record, then try
6022 // to compile it. Dedup by `head_pc`: a Proto
6023 // already carrying a CompiledTrace for this PC
6024 // skips recompile (the hot counter caps
6025 // re-recording at `u32::MAX / 2` anyway, but
6026 // explicit dedup keeps `Proto.traces` short
6027 // for the S3 dispatcher's linear scan).
6028 //
6029 // No `Vm::run` change for failure: we just bump
6030 // the failed counter and drop the record. S3
6031 // will read `Proto.traces` to decide whether to
6032 // dispatch — until then, this is bookkeeping.
6033 let head_pc_val = rec.head_pc;
6034 let closed_record = self
6035 .jit
6036 .active_trace
6037 .take()
6038 .expect("active_trace was Some this branch");
6039 self.jit.counters.closed += 1;
6040 self.jit
6041 .counters
6042 .closed_lens
6043 .push((closed_record.is_call_triggered, closed_record.ops.len()));
6044 // P12-S5-B fix: cache the trace on the
6045 // recorder's *head proto*, not the current
6046 // closure's proto. For non-recursive
6047 // call-triggered traces, close fires after
6048 // `Return1` pops the callee frame — `cl` at
6049 // that point is the CALLER's closure, while
6050 // `closed_record.head_proto` is the CALLEE's
6051 // proto (the one we actually want the trace
6052 // to be discoverable from on the next call).
6053 // Self-recursive fib closed via depth-cap
6054 // mid-recursion so `cl.proto == head_proto`
6055 // happened to coincide — this fix makes that
6056 // accidental coincidence intentional.
6057 let head_proto = closed_record.head_proto;
6058 let already_cached = head_proto
6059 .traces
6060 .borrow()
6061 .iter()
6062 .any(|t| t.head_pc == head_pc_val);
6063 if !already_cached {
6064 // Internal-loop = true: the trace runs in
6065 // a native loop until a cmp side-exits, so
6066 // the dispatcher's per-entry marshal cost
6067 // amortizes across the whole run of
6068 // iterations the loop's recorded direction
6069 // stays valid. The lowerer auto-downgrades
6070 // to one-shot for cmp-less or Call-truncating
6071 // traces.
6072 // P15-A v2-C-A6-5 — side traces MUST NOT
6073 // internal-loop. The parent's recorded prefix
6074 // (ops at PCs < side trace's head_pc) defines
6075 // values for registers the child's body reads
6076 // without re-writing each iter — e.g. for
6077 // s12_step_b, parent's `pc=19 Add R[12] = R[1]
6078 // + R[11]` sets R[12], and the child trace
6079 // (head_pc=24) re-runs `pc=20 Move R[1] =
6080 // R[12]` each iter via its outer ForLoop
6081 // internal-loop, ALWAYS reading the stale
6082 // entry-time R[12]. The parent's Add never
6083 // re-runs during child's loop, so R[1] gets
6084 // pinned to one stale value. Force one-shot
6085 // for side traces: each parent-exit round-
6086 // trips through dispatcher → parent's Add
6087 // runs → side trace runs ONE iter → return.
6088 let opts = crate::jit::trace::CompileOptions {
6089 internal_loop: closed_record.side_trace_parent.is_none(),
6090 pre53: self.version() <= LuaVersion::Lua53,
6091 aot: false,
6092 };
6093 // v1.1 A1 Session A — route through trace_compiler.
6094 // v2.0 Track J sub-step J-B — split-borrow JitState
6095 // so the trait method can take `&mut dyn JitStorage`.
6096 let result = {
6097 let jit = &mut self.jit;
6098 let storage: &mut dyn crate::jit::JitStorage = jit.storage.as_mut();
6099 jit.trace_compiler
6100 .try_compile_trace(storage, &closed_record, opts)
6101 };
6102 match result {
6103 Some(mut ct) => {
6104 // P12-S5-A/B/C — tally Sinkable sites
6105 // + actually-sunk-emit sites + materialise
6106 // emit sites before moving `ct` into
6107 // Proto.traces.
6108 self.jit.counters.sinkable_seen +=
6109 ct.sinkable_sites_seen as u64;
6110 self.jit.counters.accum_bufferable_seen +=
6111 ct.accum_bufferable_seen as u64;
6112 self.jit.counters.sunk_alloc += ct.sunk_alloc_seen as u64;
6113 self.jit.counters.materialize_emit +=
6114 ct.materialize_emit_count as u64;
6115 self.jit.counters.closure_emit += ct.closure_seen as u64;
6116 if ct.is_inline_abort_close {
6117 self.jit.counters.inline_abort += 1;
6118 }
6119 // v2.0 Stage 7 polish 6 fire
6120 // experiment — split tally so a
6121 // probe can answer the AOT
6122 // `accepted_with_per_exit_inline`
6123 // gate's question at the JIT
6124 // surface too: how many compiled
6125 // traces emitted depth>0 cmp
6126 // side-exits, and how many of
6127 // those survived all the
6128 // `dispatchable = false` pins
6129 // (`InlineAbort-gate`,
6130 // `self-link-retf-r1`,
6131 // `downrec-stitch-pending`, etc.).
6132 if !ct.per_exit_inline.is_empty() {
6133 self.jit.counters.per_exit_inline_compiled += 1;
6134 if ct.dispatchable {
6135 self.jit.counters.per_exit_inline_dispatchable += 1;
6136 }
6137 }
6138 if let Some(reason) = ct.dispatch_off_reason {
6139 self.jit.counters.dispatch_off_reasons.push(reason);
6140 // v2.0 Track-R R2 — mirror
6141 // the ordered Vec push into
6142 // the per-reason HashMap so
6143 // probes can answer "how many
6144 // of each dispatch_off label
6145 // fired" in O(1) without
6146 // walking the Vec. Same
6147 // bucket as the recorder-side
6148 // abort/discard tags above.
6149 self.jit.counters.bump_close_cause(reason);
6150 }
6151 // v2.0 Track-R R3b — count
6152 // compiled traces that carry a
6153 // down-recursion stitch link.
6154 // Bumped here (not at the lowerer
6155 // emit site) because the Vm's
6156 // JitCounters live on the Vm,
6157 // and the lowerer doesn't have a
6158 // Vm handle. R3b's regression
6159 // pin reads this via
6160 // `Vm::trace_downrec_link_compiled_count`.
6161 if ct.downrec_link.is_some() {
6162 self.jit.counters.downrec_link_compiled += 1;
6163 }
6164 // v2.0 Track-R R3d — multi-way
6165 // guard emit counter. Bumped when
6166 // the lowerer's R3d arm collected
6167 // >= 2 distinct caller_pc candidates
6168 // and lifted `dispatchable=true`.
6169 // R3c's single-CMP shape stores
6170 // `1` here without bumping; non-
6171 // DownRec closes store `0`.
6172 if ct.downrec_multi_way_count >= 2 {
6173 self.jit.counters.multi_way_guard_emitted += 1;
6174 }
6175 // P15-A v2-A — side-trace finalisation.
6176 // Pin `dispatchable=false` so the
6177 // primary lookup `traces.find(|t|
6178 // t.head_pc == pc && t.dispatchable)`
6179 // never matches this entry — the
6180 // side trace is meant to be entered
6181 // ONLY through the parent's exit
6182 // indirection (v2-B/C IR), not the
6183 // back-edge / call-trigger paths.
6184 // Then write the entry fn ptr into
6185 // the parent's `exit_side_trace_ptrs`
6186 // slot so v2-B/C IR can read it.
6187 if let Some((parent_proto, parent_head_pc, parent_exit_idx)) =
6188 closed_record.side_trace_parent
6189 {
6190 ct.dispatchable = false;
6191 let entry_ptr = ct.entry as *const () as *const u8;
6192 let _side_trace_head_pc = closed_record.head_pc;
6193 let parent_traces = parent_proto.traces.borrow();
6194 if let Some(parent_ct) = parent_traces
6195 .iter()
6196 .find(|t| t.head_pc == parent_head_pc)
6197 {
6198 // P15-A v2-C-A5-C — shape-match
6199 // gate. Find the parent's per-exit
6200 // tag snapshot at the wired exit
6201 // (inline / tag / global) and
6202 // check the child's entry_tags
6203 // match. If not, leave the cell
6204 // null + skip cache populate so
6205 // the future v2-C-A2 IR's
6206 // `call_indirect` stays inert at
6207 // this exit (the child's
6208 // shape-specialised IR would
6209 // mis-interpret raw bits the
6210 // parent writes to reg_state).
6211 let inline_n = parent_ct.per_exit_inline.len();
6212 let tags_n = parent_ct.per_exit_tags.len();
6213 let parent_exit_tags_slice: &[
6214 crate::jit::trace::ExitTag
6215 ] = if parent_exit_idx < inline_n {
6216 &parent_ct.per_exit_inline
6217 [parent_exit_idx]
6218 .exit_tags
6219 } else if parent_exit_idx
6220 < inline_n + tags_n
6221 {
6222 &parent_ct.per_exit_tags
6223 [parent_exit_idx - inline_n]
6224 .1
6225 } else {
6226 &parent_ct.exit_tags
6227 };
6228 let shape_ok =
6229 crate::jit::trace::exit_tags_match_entry_tags(
6230 &ct.entry_tags,
6231 parent_exit_tags_slice,
6232 &parent_ct.entry_tags,
6233 );
6234 if !shape_ok {
6235 self.jit.counters.side_trace_shape_mismatch += 1;
6236 }
6237 // P15-A v2-C-A4 — write the child's
6238 // entry fn ptr to BOTH the legacy
6239 // v2-A `exit_side_trace_ptrs[idx]`
6240 // cell (kept so v2-A's
6241 // walk_any_side_ptr_non_null tests
6242 // stay green) AND the per-kind cell
6243 // whose heap address the parent's
6244 // IR baked (v2-C-A2). The IR-baked
6245 // cell is what the call_indirect
6246 // gate actually reads. Only write
6247 // when A5-C shape gate passes.
6248 if shape_ok {
6249 if let Some(cell) = parent_ct
6250 .exit_side_trace_ptrs
6251 .get(parent_exit_idx)
6252 {
6253 cell.set(entry_ptr);
6254 }
6255 // Compute (kind, local) for the
6256 // IR-baked cell. Layout follows
6257 // exit_hit_counts: inline first,
6258 // then per_exit_tags, then the
6259 // global tail slot.
6260 let (sent_kind, sent_local) = if parent_exit_idx
6261 < inline_n
6262 {
6263 parent_ct.per_exit_inline[parent_exit_idx]
6264 .side_trace_ptr
6265 .set(entry_ptr);
6266 (
6267 crate::jit::trace::SIDE_SENT_KIND_INLINE,
6268 parent_exit_idx as u32,
6269 )
6270 } else if parent_exit_idx < inline_n + tags_n {
6271 let local = parent_exit_idx - inline_n;
6272 if let Some(b) =
6273 parent_ct.tags_side_trace_ptrs.get(local)
6274 {
6275 b.set(entry_ptr);
6276 }
6277 (
6278 crate::jit::trace::SIDE_SENT_KIND_TAG,
6279 local as u32,
6280 )
6281 } else {
6282 parent_ct.global_side_trace_ptr.set(entry_ptr);
6283 (crate::jit::trace::SIDE_SENT_KIND_GLOBAL, 0)
6284 };
6285 self.jit.counters.side_trace_compiled += 1;
6286 // P15-A v2-D-A8 — flip the
6287 // parent's fast-path hint so
6288 // the dispatcher knows to do
6289 // the tentative decode + cell
6290 // check on subsequent
6291 // dispatches. Set once and
6292 // stays true (we never unwire
6293 // a side trace today).
6294 parent_ct.has_any_side_wired.set(true);
6295
6296 // P15-A v2-C-A1/A4 — populate
6297 // the O(1) lookup cache the
6298 // dispatcher consults on
6299 // sentinel-bit-set returns.
6300 // Key is the encoded sentinel
6301 // (same encoding the IR ORs
6302 // into bits 56..=62 of the
6303 // child's i64 return).
6304 let sentinel =
6305 crate::jit::trace::encode_side_sentinel(
6306 sent_kind, sent_local,
6307 );
6308 let predicted_idx = if std::ptr::eq(
6309 parent_proto.as_ptr(),
6310 head_proto.as_ptr(),
6311 ) {
6312 parent_traces.len() as u32
6313 } else {
6314 head_proto.traces.borrow().len() as u32
6315 };
6316 parent_ct
6317 .side_trace_cache
6318 .borrow_mut()
6319 .insert(sentinel, predicted_idx);
6320 }
6321 }
6322 drop(parent_traces);
6323 }
6324 head_proto.traces.borrow_mut().push(TArc::new(ct));
6325 self.jit.counters.compiled += 1;
6326 }
6327 None => {
6328 self.jit.counters.compile_failed += 1;
6329 self.jit
6330 .counters
6331 .compile_failed_reasons
6332 .push(self.jit.trace_compiler.last_compile_checkpoint());
6333 }
6334 }
6335 }
6336 } // P13-S13-H — close the long-trace-bias else branch
6337 } else {
6338 // P12-S4-step1 + step4a — depth-aware push at the
6339 // current `cur_depth`. The `depth_cap_hit` /
6340 // `returned_past_head` early-exit is handled by
6341 // the `should_close` branch above; reaching here
6342 // means `cur_depth <= MAX_INLINE_DEPTH` and the
6343 // trace head's frame is still live.
6344 let depth_u8 = cur_depth as u8;
6345 if depth_u8 > self.jit.max_depth_seen {
6346 self.jit.max_depth_seen = depth_u8;
6347 }
6348 // P12-S9-A — fix up a prior `Op::Call C=0` (multi-
6349 // return / variable return count). Recorder pushed
6350 // it with var_count=None before the call dispatched;
6351 // now that the call has returned and we're about to
6352 // push the next op, top reflects the actual return
6353 // count. Snapshot top - (caller.base + call.a).
6354 if let Some(last) = rec.ops.last_mut()
6355 && matches!(last.inst.op(), crate::vm::isa::Op::Call)
6356 && last.inst.c() == 0
6357 && last.var_count.is_none()
6358 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6359 {
6360 let from = f.base + last.inst.a();
6361 if self.top >= from {
6362 last.var_count = Some(self.top - from);
6363 }
6364 }
6365 // P12-S9-A/C — for SetList B=0, snapshot the source
6366 // count = top - A - 1 (mirrors Lua's `n = top - ra
6367 // - 1` from lvm.c OP_SETLIST). Sources are
6368 // R[A+1..top), exclusive top. For Call C=0's
6369 // var_count (the return count = top - A inclusive),
6370 // see the prior-op fix-up above; here we
6371 // initialise the current Call op to None and let
6372 // the fix-up on the next op's push populate it.
6373 let var_count = if matches!(inst.op(), crate::vm::isa::Op::SetList)
6374 && inst.b() == 0
6375 && let Some(f) = self.frames.last().and_then(CallFrame::lua)
6376 {
6377 let from = f.base + inst.a();
6378 if self.top > from {
6379 Some(self.top - from - 1)
6380 } else {
6381 None
6382 }
6383 } else {
6384 None
6385 };
6386 let op = crate::jit::trace::RecordedOp {
6387 proto: cl.proto,
6388 pc,
6389 inst,
6390 inline_depth: depth_u8,
6391 var_count,
6392 };
6393 // v2.0 Track-R R1 — depth>0 Return0/Return1 mirrors
6394 // LuaJIT's `IR_RETF` (lj_record.c:922+ lj_record_ret).
6395 // Captured as a side-channel `RetfRecord` parallel to
6396 // `ops` when `p16_self_link_enabled` is on. R3's
6397 // down-rec stitch consumes these to guard side-trace
6398 // inlined-frame topology against the recorded shape.
6399 // Gated on the same flag as the cycle catch so the
6400 // ship-default path (p16 off) sees zero behavior
6401 // change. `caller_pc` is the recorded enclosing Call's
6402 // pc + 1 — interp's resume point after the inlined
6403 // frame pops.
6404 if self.jit.p16_self_link_enabled
6405 && depth_u8 > 0
6406 && matches!(
6407 inst.op(),
6408 crate::vm::isa::Op::Return0 | crate::vm::isa::Op::Return1
6409 )
6410 {
6411 let results: u8 = match inst.op() {
6412 crate::vm::isa::Op::Return0 => 0,
6413 crate::vm::isa::Op::Return1 => 1,
6414 _ => 0,
6415 };
6416 // Most recent Op::Call recorded at the caller's
6417 // depth (`depth_u8 - 1`) is the frame this Return
6418 // is unwinding from. Reverse scan stops at the
6419 // first match.
6420 let caller_depth = depth_u8 - 1;
6421 let caller_call = rec.ops.iter().rev().find(|r| {
6422 r.inline_depth == caller_depth
6423 && matches!(r.inst.op(), crate::vm::isa::Op::Call)
6424 });
6425 let caller_pc = caller_call.map(|r| r.pc + 1).unwrap_or(pc);
6426 // v2.0 Track-R R3a — capture the caller's proto
6427 // for the RetfRecord. LuaJIT `IR_RETF.op1`
6428 // equivalent. For fib(28) the caller's proto
6429 // equals the trace head; for future mutual
6430 // recursion the recorded Op::Call's proto is the
6431 // right target. Fallback to head_proto when no
6432 // enclosing Call op was captured (mirrors
6433 // `caller_pc`'s fallback to the Return's own pc).
6434 let caller_proto = caller_call.map(|r| r.proto).unwrap_or(rec.head_proto);
6435 rec.retfs.push(crate::jit::trace::RetfRecord {
6436 from_depth: depth_u8,
6437 to_depth: caller_depth,
6438 results,
6439 caller_pc,
6440 proto: caller_proto,
6441 });
6442 // v2.0 Track-R R3a — DownRec close trigger:
6443 // count RetfRecords on this recording whose
6444 // `proto` matches `caller_proto` (LuaJIT
6445 // `check_downrec_unroll` chain filter
6446 // `op1 == ptref`). Threshold mirrors
6447 // RECUNROLL_THRESHOLD; first trip stamps the
6448 // `downrec_close` marker, subsequent retfs
6449 // keep the marker without overwrite. The
6450 // lowerer's end_idx picker routes through
6451 // TraceEnd::DownRec when the marker is set;
6452 // R3a's tail emit still falls through to R1's
6453 // safe deopt path so fib(28) result stays
6454 // 317_811. R3b lifts.
6455 if rec.downrec_close.is_none() {
6456 let caller_proto_ptr = caller_proto.as_ptr();
6457 let prior_match_count = rec
6458 .retfs
6459 .iter()
6460 .filter(|r| r.proto.as_ptr() == caller_proto_ptr)
6461 .count();
6462 // Strictly-greater-than threshold matches
6463 // LuaJIT `count + J->tailcalled > recunroll`.
6464 // The newly-pushed retf is already counted.
6465 if prior_match_count > crate::jit::trace::RECUNROLL_THRESHOLD {
6466 rec.downrec_close = Some(crate::jit::trace::DownRecClose {
6467 return_pc: caller_pc,
6468 target_proto: caller_proto,
6469 depth_delta: 1,
6470 });
6471 // R2 close-cause taxonomy: tag the
6472 // restart with `"downrec-restart"`. R3b
6473 // adds `"downrec-stitch-failed"` when
6474 // the lifted back-edge falls back to
6475 // deopt.
6476 self.jit.counters.bump_close_cause("downrec-restart");
6477 }
6478 }
6479 }
6480 // v2.1 Phase 1I.B — capture FieldIcSnapshot for the
6481 // FIRST eligible Op::GetField site under env-gate
6482 // LUNA_JIT_FIELD_IC=1. "Eligible" means:
6483 // - R[B] is Value::Table with metatable.is_none()
6484 // - K[C] is Value::Str
6485 // - The string key actually occupies a hash slot
6486 // (so the IC's slot_idx is a real index, not
6487 // a probe sentinel).
6488 // Once captured, subsequent GetFields skip this
6489 // logic (rec.field_ic_snapshot.is_some() short-
6490 // circuits). Env-OFF short-circuits on the cached
6491 // atomic check inside field_ic_enabled().
6492 if rec.field_ic_snapshot.is_none()
6493 && matches!(inst.op(), crate::vm::isa::Op::GetField)
6494 && crate::jit::trace_types::field_ic_enabled()
6495 {
6496 let b = inst.b();
6497 let c_idx = inst.c() as usize;
6498 let r_b = self.stack[(base + b) as usize];
6499 if let Value::Table(g) = r_b
6500 && g.metatable().is_none()
6501 && c_idx < cl.proto.consts.len()
6502 && let Value::Str(s) = cl.proto.consts[c_idx]
6503 {
6504 let key = Value::Str(s);
6505 let tbl_ref = &*g;
6506 if let Some(slot_idx) = tbl_ref.find_node_idx(key)
6507 && let Some(val) = tbl_ref.node_val_at(slot_idx)
6508 {
6509 let op_idx = rec.ops.len() as u32;
6510 rec.field_ic_snapshot =
6511 Some(crate::jit::trace_types::FieldIcSnapshot {
6512 op_idx,
6513 nodes_len: tbl_ref.nodes_capacity() as u64,
6514 slot_idx: slot_idx as u64,
6515 key_ptr_bits: s.as_ptr() as u64,
6516 cached_val_tag: val.tag_byte(),
6517 });
6518 self.jit.counters.field_ic_snapshot_captured += 1;
6519 }
6520 }
6521 }
6522 if !rec.push(op) {
6523 // v2.0 Track-R R2 — recorder overflow
6524 // (MAX_TRACE_LEN). Pre-R2 this site bumped
6525 // `aborted` with no reason label, leaving the
6526 // overflow indistinguishable from any other
6527 // abort cause that might be added later.
6528 // Tag it explicitly under the close-cause
6529 // bucket so probes can tally overflow vs
6530 // other abort causes in O(1).
6531 self.jit.active_trace = None;
6532 self.jit.counters.aborted += 1;
6533 self.jit.counters.bump_close_cause("trace-overflow");
6534 }
6535 }
6536 }
6537
6538 // P12-S3 — trace JIT dispatcher.
6539 //
6540 // When the dispatch loop is about to execute the op at
6541 // `pc` and there's a `numeric_only` CompiledTrace cached
6542 // for that `head_pc`, marshal the live regs into an
6543 // i64 buffer, jump into the trace, and resume the
6544 // interpreter at the returned continuation PC.
6545 //
6546 // Skipped (zero overhead) when `trace_jit_enabled` is
6547 // false; the lookup is a borrow + scan over
6548 // `cl.proto.traces`, which is a `Vec` whose size is at
6549 // most one entry per back-edge per Proto in practice.
6550 //
6551 // Marshalling contract — only Int slots survive the
6552 // round-trip cleanly (the reg_state ABI is `*mut i64`
6553 // with no tag info). Any non-Int slot in the affected
6554 // window forces a skip; interp takes over for one op
6555 // and the back-edge brings us back to try again next
6556 // pass (slots that were Nil/Float at one moment can
6557 // settle to Int by the time the next back-edge fires).
6558 //
6559 // A trace that comes back with `vm.jit.pending_err`
6560 // parked is treated as a deopt: clear the err, leave
6561 // the stack as the trace wrote it, and let the
6562 // interpreter run from the same `pc`. The trace itself
6563 // is left cached — a future entry might find no
6564 // metatable in the way and succeed.
6565 // P17-A1 (Path C #3) — single Rc<CompiledTrace> clone instead
6566 // of 6 per-field Rc clones. proto.traces is now
6567 // Vec<Rc<CompiledTrace>>; the dispatcher clones ONE Rc and
6568 // reads fields via auto-deref. fib_28 saves ~5 Rc::clone
6569 // operations per dispatch × 434k = ~2.2M Rc atomic ops
6570 // (~1-2% gain measured separately).
6571 // v2.0 Track-R R3c — one-shot consume of the
6572 // `suppress_downrec_admit_once` flag. Set by the R3c
6573 // downrec post-invoke arm below when it force-deopts the
6574 // trace (caller-pc guard miss OR cycle-budget exhausted)
6575 // so the NEXT interpreter loop iteration skips the
6576 // downrec admit, lets interp run the op at `head_pc`,
6577 // advances `pc` past `head_pc`, and breaks the otherwise-
6578 // infinite admit loop. Reading + clearing here means a
6579 // single dispatch tick consumes the suppression — the
6580 // following tick re-admits naturally (with the budget
6581 // also reset by the deopt site).
6582 let downrec_admit_blocked = self.jit.suppress_downrec_admit_once;
6583 self.jit.suppress_downrec_admit_once = false;
6584 if self.jit.trace_enabled
6585 && let Some(ct) = {
6586 let traces = cl.proto.traces.borrow();
6587 traces
6588 .iter()
6589 .find(|t| {
6590 if t.head_pc != pc {
6591 return false;
6592 }
6593 let is_downrec = t.downrec_link.is_some();
6594 // v2.0 Track-R R3c — the one-shot suppress
6595 // flag blocks any admit (primary or fallback)
6596 // for `downrec_link`-bearing traces so the
6597 // next interp iter can run the natural op
6598 // at `head_pc` and advance past it. R3d's
6599 // `dispatchable=true` lift means the suppress
6600 // must also cover the primary `t.dispatchable`
6601 // arm — otherwise the lifted lookup would
6602 // immediately re-admit after a force-deopt
6603 // and the infinite loop returns.
6604 if is_downrec && downrec_admit_blocked {
6605 return false;
6606 }
6607 // Primary arm: `dispatchable=true` traces
6608 // (R3d-lifted DownRec or normal traces).
6609 // Fallback arm: R3c-shape `dispatchable=false`
6610 // DownRec traces (single-CMP guard kept
6611 // pinned because the 90% miss-rate would
6612 // make blind admit perf-negative).
6613 t.dispatchable || is_downrec
6614 })
6615 .cloned()
6616 }
6617 {
6618 // Path C #6 — borrow Rc<[T]> fields as &Rc<[T]> instead
6619 // of cloning. The outer `ct: Rc<CompiledTrace>` is held
6620 // across the entire dispatch block so the fields outlive
6621 // all consumers. Saves 5 Rc::clone per dispatch.
6622 let entry_fn = ct.entry;
6623 let head_pc_val = ct.head_pc;
6624 let window_size = ct.window_size;
6625 let exit_tags = &ct.exit_tags;
6626 let per_exit_tags = &ct.per_exit_tags;
6627 let per_exit_inline = &ct.per_exit_inline;
6628 let compile_entry_tags = &ct.entry_tags;
6629 let global_tag_res_kind = ct.global_tag_res_kind;
6630 let exit_hit_counts = &ct.exit_hit_counts;
6631 let max_stack = cl.proto.max_stack as usize;
6632 let window_size_us = window_size as usize;
6633 let base_us = base as usize;
6634 // P12-S4-step3a — `reg_state` sized to the trace's
6635 // `window_size`, which today equals max_stack but
6636 // S4-step3b will expand for inlined frames.
6637 // Marshal-in still only writes [0..max_stack); slots
6638 // [max_stack..window_size) are zero-initialised and
6639 // filled by the trace's own GetUpval / arith.
6640 // P13-S13-D — reuse the Vm's amortised buffers
6641 // instead of allocating fresh Vecs each dispatch.
6642 // mem::take leaves an empty placeholder we restore
6643 // at the end of the dispatch block (success +
6644 // deopt paths both fall through to the restore).
6645 let mut entry_tags: Vec<u8> = std::mem::take(&mut self.jit.entry_tags_buf);
6646 entry_tags.clear();
6647 entry_tags.reserve(max_stack);
6648 // v2.0 Track-R R3c — this trace was admitted via the
6649 // `downrec_link.is_some()` arm rather than the normal
6650 // `dispatchable=true` arm. The pre-invoke path
6651 // populates a reserved saved-PC slot just past the
6652 // normal register window so R3b's lowerer guard load
6653 // (`reg_state[window_size]`) compares the runtime
6654 // saved caller PC against the recorded `dr_return_pc`.
6655 //
6656 // v2.0 Track-R R3d — drop the `!ct.dispatchable`
6657 // gate. After R3d lifts `dispatchable = true` for
6658 // multi-way guards, the trace's body still emits the
6659 // R3b/R3d sentinel shape on return — the saved-PC slot
6660 // and post-invoke classifier must keep firing.
6661 // `downrec_link.is_some()` is the unique structural
6662 // signal that the trace closes via DownRec.
6663 let is_downrec_entry = ct.downrec_link.is_some();
6664 let mut reg_state: Vec<i64> = std::mem::take(&mut self.jit.reg_state_buf);
6665 reg_state.clear();
6666 // v2.0 Track-R R3c — when admitting a downrec trace,
6667 // size the buffer to `window_size + 1` so the lowerer
6668 // can `load(I64, ..., reg_state, window_size * 8)`
6669 // for the saved caller PC guard input. The extra slot
6670 // is the LAST element so cranelift's existing
6671 // `0..window_size` accesses are unaffected.
6672 let reg_state_len = if is_downrec_entry {
6673 window_size_us + 1
6674 } else {
6675 window_size_us
6676 };
6677 reg_state.resize(reg_state_len, 0i64);
6678 let mut dispatch_ok = true;
6679 for i in 0..max_stack {
6680 let v = self.stack[base_us + i];
6681 let (tag, raw) = v.unpack();
6682 entry_tags.push(tag);
6683 // P12-S12-C v3 — entry tag guard. The trace's IR
6684 // is specialised to the compile-time entry tags
6685 // (via current_kinds propagation from
6686 // from_entry_tag). A runtime tag mismatch means
6687 // body ops would mis-interpret raw bits (e.g.
6688 // treat a Str pointer as Int payload → garbage).
6689 // Skip dispatch on mismatch so interp handles
6690 // this entry shape; the trace stays cached for
6691 // future entries that match.
6692 if i < compile_entry_tags.len() && tag != compile_entry_tags[i] {
6693 dispatch_ok = false;
6694 break;
6695 }
6696 match tag {
6697 // Int / Float / Table / Nil all marshal
6698 // to raw payload cleanly; the trace's IR
6699 // treats the 8-byte slot as an i64 (with
6700 // f64 ops bitcasting around the boundary).
6701 crate::runtime::value::raw::INT
6702 | crate::runtime::value::raw::FLOAT
6703 | crate::runtime::value::raw::TABLE
6704 | crate::runtime::value::raw::CLOSURE
6705 // P12-S12-B-v2 — Native iter slots (e.g.
6706 // R[A] = ipairs_iter) are present in
6707 // generic-for traces; the raw bits are a
6708 // valid `*mut NativeClosure` and round-trip
6709 // cleanly.
6710 | crate::runtime::value::raw::NATIVE
6711 // P12-S12-C v1 — Str slots show up in
6712 // string-concat traces; raw bits = `*mut
6713 // LuaStr` (interned, GC-managed). Round-
6714 // trips cleanly as a heap pointer.
6715 | crate::runtime::value::raw::STR
6716 | crate::runtime::value::raw::NIL => {
6717 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6718 reg_state[i] = unsafe { raw.zero as i64 };
6719 }
6720 _ => {
6721 dispatch_ok = false;
6722 break;
6723 }
6724 }
6725 }
6726
6727 if dispatch_ok {
6728 debug_assert_eq!(head_pc_val, pc, "trace cache hit's head_pc != pc");
6729 self.jit.pending_err = None;
6730 // P12-S4-step4b-C-2 — snapshot the pre-entry frame
6731 // count. A cmp@d>0 side-exit calls the materialize
6732 // helper which pushes inlined frames onto
6733 // `vm.frames`; on deopt those frames must be popped
6734 // before falling through to the interpreter, else
6735 // the stack grows unboundedly per deopted dispatch.
6736 let pre_frames = self.frames.len();
6737 // v2.0 Track-R R3c — saved-PC slot population. The
6738 // recorded `dr_return_pc` on the closing trace is
6739 // the caller's resume PC captured at a depth>0
6740 // Return push (recorder push site, see R3a verdict
6741 // §3). The natural runtime analogue for self-
6742 // stitch is the dispatching frame's PARENT frame's
6743 // PC: the trace's head_pc sits inside a Lua frame,
6744 // and the parent (caller) frame's `pc` is what
6745 // luna would observe as `[base-8]` in the LJ
6746 // `asm_retf` shape (`lj_asm_arm64.h:565`). When
6747 // the parent isn't a Lua frame (top-level dispatch
6748 // — first invocation through `call_value`), no
6749 // saved PC exists; we write 0, which always
6750 // mismatches the recorded `dr_return_pc != 0`
6751 // invariant pinned by R3b
6752 // (`crates/luna-jit/src/jit_backend/trace.rs:7206
6753 // debug_assert!(dr_return_pc != 0, ...)`).
6754 if is_downrec_entry {
6755 let saved_pc: i64 = if pre_frames >= 2 {
6756 match &self.frames[pre_frames - 2] {
6757 CallFrame::Lua(parent) => parent.pc as i64,
6758 CallFrame::Cont(_) => 0,
6759 }
6760 } else {
6761 0
6762 };
6763 reg_state[window_size_us] = saved_pc;
6764 }
6765 // v1.3 Phase AOT Stage 7 sub-piece 4 — `LUNA_AOT_PROBE`
6766 // diagnostic hook. The probe fires once per trace dispatch
6767 // (regardless of JIT vs AOT origin — both go through this
6768 // arm), letting the AOT smoke test verify mcode actually
6769 // executed. Guarded behind `OnceLock` so the env read is
6770 // a one-time cost per process; not gated on a particular
6771 // counter so the smoke test gets a deterministic single-
6772 // line `aot_trace_fired pc=N` per first dispatch.
6773 if jit_probe_enabled() && self.jit.counters.dispatched == 0 {
6774 eprintln!("luna-runtime-helpers: aot_trace_fired pc={head_pc_val}");
6775 }
6776 let continuation_pc = {
6777 // v1.1 A1 Session A — chunk_compiler.enter
6778 // (CraneliftBackend delegates to enter_jit;
6779 // NullJitBackend returns an inert guard).
6780 let vm_ptr: *mut Vm = self;
6781 let _guard = self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
6782 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
6783 unsafe { entry_fn(reg_state.as_mut_ptr()) }
6784 };
6785 self.jit.counters.dispatched += 1;
6786
6787 if self.jit.pending_err.is_some() {
6788 self.jit.pending_err = None;
6789 self.jit.counters.deopt += 1;
6790 // P12-S4-step4b-C-2 — unwind any helper-pushed
6791 // inlined frames before the interpreter resumes.
6792 // Don't restore reg_state — the trace's partial
6793 // writes are discarded; interp re-executes from
6794 // the original `pc`.
6795 while self.frames.len() > pre_frames {
6796 frames_pop_sync(&mut self.frames, &mut self.frames_top);
6797 }
6798 if is_downrec_entry {
6799 // v2.0 Track-R R3c — pending_err observed
6800 // mid-trace inside a downrec admit. Treat
6801 // it as a guard miss: bump `downrec_deopt`
6802 // and suppress the next downrec admit so
6803 // interp can advance past `head_pc` and
6804 // the same trace doesn't immediately re-
6805 // fire on the next loop iteration.
6806 self.jit.counters.downrec_deopt += 1;
6807 self.jit.suppress_downrec_admit_once = true;
6808 }
6809 } else if is_downrec_entry && {
6810 // v2.0 Track-R R3d — only enter the R3c/R3d
6811 // downrec classifier for returns whose shape
6812 // matches the lowerer's `downrec_idx_opt` tail
6813 // emit: either the stitch_blk DOWNREC sentinel
6814 // (HIT) or the deopt_blk GLOBAL-sentinel-with-
6815 // body==head_pc (MISS via guard fail). Any
6816 // other return from a downrec trace (intermediate
6817 // body cmp side-exit, GetField inference fail,
6818 // etc.) carries a different sentinel/body shape
6819 // and means the body exited BEFORE reaching the
6820 // downrec close — classify those through the
6821 // normal decode path (else branch below) so
6822 // reg_state restores + pc advances correctly.
6823 // The pre-R3d behavior (R3c) classified them all
6824 // as MISS and skipped the normal restore, which
6825 // inflated `downrec_deopt` with non-downrec
6826 // events and lost the trace's mid-flight writes.
6827 let raw_ret = continuation_pc as u64;
6828 let from_side_trace = (raw_ret >> 63) & 1 == 1;
6829 let sentinel_code = if from_side_trace {
6830 ((raw_ret >> 56) & 0x7F) as u32
6831 } else {
6832 0
6833 };
6834 let raw_body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6835 let global_deopt_code = crate::jit::trace_types::encode_side_sentinel(
6836 crate::jit::trace_types::SIDE_SENT_KIND_GLOBAL,
6837 0,
6838 );
6839 from_side_trace
6840 && (crate::jit::trace_types::is_downrec_sentinel(sentinel_code)
6841 || (sentinel_code == global_deopt_code
6842 && raw_body == head_pc_val as u64))
6843 } {
6844 // R3d downrec event classifier.
6845 let raw_ret = continuation_pc as u64;
6846 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6847 if crate::jit::trace_types::is_downrec_sentinel(sentinel_code) {
6848 // Guard HIT — saved_pc matched one of the
6849 // baked candidates and the trace's
6850 // `stitch_blk` arm returned the DOWNREC
6851 // sentinel. Cycle-safety checkpoint:
6852 // decrement budget; on underflow,
6853 // reclassify as deopt + reset budget.
6854 // R3d's `STITCH_DEPTH_DEFAULT = 32` lets
6855 // ~all natural HITs in a hot loop fire
6856 // before reset pressure.
6857 if self.jit.stitch_depth_remaining > 0 {
6858 self.jit.stitch_depth_remaining -= 1;
6859 self.jit.counters.downrec_dispatched += 1;
6860 } else {
6861 self.jit.counters.downrec_deopt += 1;
6862 self.jit.stitch_depth_remaining =
6863 crate::vm::jit_state::JitState::STITCH_DEPTH_DEFAULT;
6864 }
6865 } else {
6866 // Guard MISS via the lowerer's deopt_blk
6867 // arm (GLOBAL sentinel + body == head_pc).
6868 // The deopt_blk emit performs the
6869 // store-back via `emit_store_back_and_return_pc`,
6870 // so the live stack already reflects the
6871 // body's writes; no extra restore needed
6872 // from the dispatcher side.
6873 self.jit.counters.downrec_deopt += 1;
6874 }
6875 self.jit.suppress_downrec_admit_once = true;
6876 // Pop helper-pushed inlined frames (defensive —
6877 // R3d's emit shape doesn't push frames in the
6878 // tail, but a body side-exit before reaching
6879 // the tail may have via the materialize helper).
6880 while self.frames.len() > pre_frames {
6881 frames_pop_sync(&mut self.frames, &mut self.frames_top);
6882 }
6883 self.jit.reg_state_buf = reg_state;
6884 self.jit.entry_tags_buf = entry_tags;
6885 continue;
6886 } else {
6887 // Restore each slot using the trace's
6888 // exit-tag analysis (see ExitTag docs).
6889 // P12-S4-step4b-C-2 — decode the IR's
6890 // side-exit shape. Upper 32 bits = (site_idx
6891 // + 1) for inline cmp side-exits, 0 for
6892 // legacy clean-tail / non-inline exits.
6893 // P15-A v2-C-A0 — decode lives in
6894 // `crate::jit::trace::decode_exit_shape` so
6895 // v2-C-A3 can reuse it with the SIDE TRACE's
6896 // shape inputs when the sentinel bit
6897 // (v2-C-A2) is set on `raw_ret`.
6898 let raw_ret = continuation_pc as u64;
6899 // P15-A v2-C-A3 — side-trace return decode.
6900 // Bit 63 of `raw_ret` is the side-trace
6901 // marker the parent's IR OR'd in when it
6902 // tail-called into a wired child trace.
6903 // Bits 56..=62 carry the sentinel code (the
6904 // cache key into the parent's
6905 // `side_trace_cache`); bits 0..=55 are the
6906 // child's own return value (encoded site or
6907 // plain cont_pc) which we MUST decode using
6908 // the CHILD's per_exit_inline / per_exit_tags
6909 // / exit_tags / exit_hit_counts — not the
6910 // parent's. The dispatcher snapshot read
6911 // above holds the parent's shapes; when bit
6912 // 63 is set we re-fetch the child's via the
6913 // sentinel-keyed cache.
6914 let from_side_trace = (raw_ret >> 63) & 1 == 1;
6915 let (
6916 decode_inline,
6917 decode_tags,
6918 decode_exit_tags,
6919 decode_hit_counts,
6920 decode_body,
6921 ) = if from_side_trace {
6922 let sentinel_code = ((raw_ret >> 56) & 0x7F) as u32;
6923 let body = raw_ret & 0x00FF_FFFF_FFFF_FFFFu64;
6924 let traces = cl.proto.traces.borrow();
6925 let child_idx = traces
6926 .iter()
6927 .find(|t| t.head_pc == head_pc_val)
6928 .and_then(|pct| {
6929 pct.side_trace_cache.borrow().get(&sentinel_code).copied()
6930 });
6931 if let Some(idx) = child_idx
6932 && let Some(child) = traces.get(idx as usize)
6933 {
6934 if crate::jit::trace::v2c_probe_enabled() {
6935 eprintln!(
6936 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child_idx={} child.n_ops={} child.head_pc={} child.window_size={} parent.pc={} parent.window_size={} child.dispatchable={} child.inline_abort={}",
6937 sentinel_code,
6938 body,
6939 idx,
6940 child.n_ops,
6941 child.head_pc,
6942 child.window_size,
6943 pc,
6944 window_size,
6945 child.dispatchable,
6946 child.is_inline_abort_close,
6947 );
6948 }
6949 (
6950 child.per_exit_inline.clone(),
6951 child.per_exit_tags.clone(),
6952 child.exit_tags.clone(),
6953 child.exit_hit_counts.clone(),
6954 body,
6955 )
6956 } else {
6957 if crate::jit::trace::v2c_probe_enabled() {
6958 eprintln!(
6959 "[v2c-A3-decode] sentinel={:#04x} body={:#018x} child MISS (fallback parent shapes)",
6960 sentinel_code, body,
6961 );
6962 }
6963 // Cache miss — fall back to parent
6964 // shapes with the body bits. Best-
6965 // effort; the trace_side_trace_
6966 // shape_mismatch_count records this
6967 // path indirectly (close-handler
6968 // skips wiring on mismatch so we
6969 // shouldn't reach here when shape
6970 // gate held).
6971 (
6972 per_exit_inline.clone(),
6973 per_exit_tags.clone(),
6974 exit_tags.clone(),
6975 exit_hit_counts.clone(),
6976 body,
6977 )
6978 }
6979 } else {
6980 // P15-A v2-D — dispatcher-level side-trace
6981 // invocation. Replaces v2-C's universal IR
6982 // gate (`load + icmp + brif` at every
6983 // emit_store_back callsite, which A6/A7
6984 // measured as a net perf regression).
6985 // A8 fast-path: skip the tentative decode +
6986 // child lookup entirely when `has_any_side
6987 // _wired == false` (the common case until
6988 // the first side trace compiles for this
6989 // parent). For fib_10_x10k and other tight
6990 // short-trace workloads where most parent
6991 // traces never get a wired child, this
6992 // collapses the v2-D overhead to a single
6993 // `Cell::get()` on the cold path.
6994 // A8-revert: A8 had `parent_has_side` short-
6995 // circuit + snapshot hoist; mini N=3 showed
6996 // A8 lost the btrees_d8 1.02× win (dropped
6997 // to 0.95×) WITHOUT helping fib_10 (same
6998 // 0.86×). Drop A8 — accept the always-run
6999 // v2-D path; the tentative decode + cell
7000 // load is cheaper than the cost A8 added.
7001 {
7002 let tentative = crate::jit::trace::decode_exit_shape(
7003 raw_ret,
7004 per_exit_inline,
7005 per_exit_tags,
7006 exit_tags,
7007 );
7008 let tentative_exit_idx = tentative.exit_hit_idx;
7009 let child_invoke = {
7010 let traces = cl.proto.traces.borrow();
7011 traces.iter().find(|t| t.head_pc == head_pc_val).and_then(
7012 |pct| {
7013 let cell =
7014 pct.exit_side_trace_ptrs.get(tentative_exit_idx)?;
7015 let fn_ptr = cell.get();
7016 if fn_ptr.is_null() {
7017 return None;
7018 }
7019 traces
7020 .iter()
7021 .find(|t| {
7022 t.entry as *const () as *const u8 == fn_ptr
7023 })
7024 .map(|child| {
7025 (
7026 child.entry,
7027 child.per_exit_inline.clone(),
7028 child.per_exit_tags.clone(),
7029 child.exit_tags.clone(),
7030 child.exit_hit_counts.clone(),
7031 )
7032 })
7033 },
7034 )
7035 };
7036 if let Some((cent, cpi, cpt, cet, chc)) = child_invoke {
7037 let child_raw_ret = {
7038 // v1.1 A1 Session A — chunk_compiler.enter
7039 // (side-trace entry).
7040 let vm_ptr: *mut Vm = self;
7041 let _guard =
7042 self.jit.chunk_compiler.enter(vm_ptr, Some(cl));
7043 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7044 unsafe { cent(reg_state.as_mut_ptr()) }
7045 };
7046 (cpi, cpt, cet, chc, child_raw_ret as u64)
7047 } else {
7048 (
7049 per_exit_inline.clone(),
7050 per_exit_tags.clone(),
7051 exit_tags.clone(),
7052 exit_hit_counts.clone(),
7053 raw_ret,
7054 )
7055 }
7056 }
7057 };
7058 let decoded = crate::jit::trace::decode_exit_shape(
7059 decode_body,
7060 &decode_inline,
7061 &decode_tags,
7062 &decode_exit_tags,
7063 );
7064 let site_id = decoded.site_id;
7065 let cont_pc = decoded.cont_pc;
7066 let exit_hit_idx = decoded.exit_hit_idx;
7067 let exit_tags_for_pc = decoded.exit_tags_for_pc;
7068 // P15-A v2-C-A3 — for side-trace returns
7069 // force using_global_exit_tags=false so the
7070 // restore loop always takes the per-tag slow
7071 // path (the child's global_tag_res_kind
7072 // classification isn't plumbed through yet
7073 // — TODO for a future polish step).
7074 let using_global_exit_tags = if from_side_trace {
7075 false
7076 } else {
7077 decoded.using_global_exit_tags
7078 };
7079 // P15-prep — increment the counter (saturate
7080 // at u32::MAX to avoid wrap on long runs).
7081 // P15-A v1 — track whether this increment is
7082 // the one that crossed `HOTEXIT_THRESHOLD`
7083 // (transition: previous v < threshold, new v
7084 // == threshold). The side-trace start is
7085 // deferred to just before `continue;` so
7086 // vm.stack and frame.pc are fully restored
7087 // (the snapshot reads post-restore values).
7088 let mut side_trace_should_start = false;
7089 // P15-A v2-C-A3 — for side-trace returns the
7090 // counter to bump is the CHILD's (decoded
7091 // shape lookup) — `exit_hit_idx` is into the
7092 // decoded layout, so use the matching
7093 // `decode_hit_counts`. For parent decode
7094 // they're aliased (clone of the parent's
7095 // own Rc).
7096 if let Some(c) = decode_hit_counts.get(exit_hit_idx) {
7097 let v = c.get();
7098 if v < u32::MAX {
7099 c.set(v + 1);
7100 }
7101 if v + 1 == crate::jit::trace::HOTEXIT_THRESHOLD
7102 && self.jit.active_trace.is_none()
7103 && self.jit.trace_enabled
7104 {
7105 side_trace_should_start = true;
7106 }
7107 }
7108 // P12-S4-step4b-C-2 — at an inline cmp@d>0
7109 // side-exit, the helper has pushed N frames on
7110 // top of the trace head's frame and
7111 // `exit_tags_for_pc.len()` covers the full
7112 // window (caller + each inlined frame's
7113 // window). Slots beyond `max_stack` belong to
7114 // an inlined frame: their `Untouched` entries
7115 // default to Nil (no entry-tag fallback —
7116 // marshal-in only captured caller slots) and
7117 // we write to interp stack at `base + i` which
7118 // mirrors `op_offsets`-derived layout.
7119 let slot_count = exit_tags_for_pc.len();
7120 // P12-S4-step4b-C-2 — the helper only extends
7121 // vm.stack up to the deepest pushed frame's
7122 // window, but the exit_tags snapshot covers
7123 // the trace's full `window_size` (which
7124 // includes depth-N+1 scratch slots that the
7125 // trace's IR may have written without a
7126 // matching pushed frame). Extend with Nil so
7127 // the write at the tail doesn't panic; these
7128 // slots get overwritten by the writeback loop
7129 // and won't leak meaningful data past the
7130 // pushed frames' R[0..max_stack) windows.
7131 if self.stack.len() < base_us + slot_count {
7132 self.stack
7133 .resize(base_us + slot_count, crate::runtime::Value::Nil);
7134 }
7135 // P13-S13-E — fast-path restore loop. When
7136 // we landed on the global `exit_tags`,
7137 // dispatch on the compile-time
7138 // classification: skip the loop entirely
7139 // for `AllUntouched`, do a tag-free
7140 // `Value::Int(...)` write per slot for
7141 // `AllInt`, otherwise fall through to the
7142 // general match-arm loop. site_id > 0
7143 // (inline frame mat) and per_exit_tags
7144 // hits always take the general path —
7145 // their per-side-exit shapes aren't
7146 // pre-classified yet.
7147 let fast_path_taken = if using_global_exit_tags {
7148 match global_tag_res_kind {
7149 crate::jit::trace::TagResKind::AllUntouched => {
7150 // No-op: vm.stack already
7151 // matches the trace's post-
7152 // entry state for these
7153 // slots (entry values not
7154 // overridden, or already
7155 // spilled by helpers).
7156 true
7157 }
7158 crate::jit::trace::TagResKind::AllInt => {
7159 for i in 0..slot_count {
7160 self.stack[base_us + i] =
7161 crate::runtime::Value::Int(reg_state[i]);
7162 }
7163 true
7164 }
7165 crate::jit::trace::TagResKind::Mixed => false,
7166 }
7167 } else {
7168 false
7169 };
7170 if !fast_path_taken {
7171 for i in 0..slot_count {
7172 let tag = match exit_tags_for_pc[i] {
7173 crate::jit::trace::ExitTag::Untouched => {
7174 if i < max_stack {
7175 entry_tags[i]
7176 } else {
7177 crate::runtime::value::raw::NIL
7178 }
7179 }
7180 crate::jit::trace::ExitTag::Int => {
7181 crate::runtime::value::raw::INT
7182 }
7183 crate::jit::trace::ExitTag::Float => {
7184 crate::runtime::value::raw::FLOAT
7185 }
7186 crate::jit::trace::ExitTag::Table => {
7187 crate::runtime::value::raw::TABLE
7188 }
7189 crate::jit::trace::ExitTag::Closure => {
7190 crate::runtime::value::raw::CLOSURE
7191 }
7192 // P12-S6-A1 — trace actively wrote Nil
7193 // to this slot (e.g. via Op::LoadNil).
7194 // Restore as Nil regardless of the entry
7195 // tag, since the i64 payload is 0 and
7196 // packing as the entry tag (e.g. INT)
7197 // would mis-type the slot.
7198 crate::jit::trace::ExitTag::Nil => {
7199 crate::runtime::value::raw::NIL
7200 }
7201 // P12-S12-C v2 — trace wrote a Str ptr
7202 // to this slot (LoadK Str / Move from
7203 // Str / Concat result). Restore as
7204 // Value::Str with raw bits round-
7205 // tripped.
7206 crate::jit::trace::ExitTag::Str => {
7207 crate::runtime::value::raw::STR
7208 }
7209 };
7210 // SAFETY: tag is from a verified slot
7211 // (entry validated above) or pinned by
7212 // the exit-tag analysis to INT/TABLE.
7213 // The raw payload sits in reg_state[i].
7214 // Stack was extended by the materialize
7215 // helper for inline frames.
7216 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7217 self.stack[base_us + i] = unsafe {
7218 Value::pack(
7219 tag,
7220 crate::runtime::value::RawVal {
7221 zero: reg_state[i] as u64,
7222 },
7223 )
7224 };
7225 }
7226 }
7227 // P12-S4-step4b-C-2 — for non-inline exits the
7228 // helper was never called (no metas chain for
7229 // this cont_pc), so `frames.last()` is the
7230 // trace head's frame and we set its pc to
7231 // cont_pc as before. For inline exits the
7232 // helper baked the side-exit PC into the
7233 // innermost frame's `pc` at push time
7234 // (chain.last().pc was overridden at emit),
7235 // so this assignment to `frames.last_mut().pc
7236 // = cont_pc` is a redundant-but-correct
7237 // confirmation.
7238 let _ = &per_exit_inline; // hold the Rc alive across dispatch
7239 // P12-S4-step4b-C-2 — for inline side-exits the
7240 // helper has pushed N frames on top. The trace
7241 // head frame is at `pre_frames - 1`; set its
7242 // pc to `head_resume_pc` so when the chain
7243 // eventually pops back to it, interp resumes
7244 // PAST the trace's depth-0 Op::Call instead of
7245 // restarting from `head_pc` and re-triggering
7246 // dispatch (infinite loop). The innermost
7247 // (helper-pushed) frame already has its pc
7248 // baked in at compile time, but we still
7249 // assign `cont_pc` below for parity with the
7250 // non-inline path (no-op).
7251 if site_id > 0 {
7252 let idx = (site_id - 1) as usize;
7253 let head_resume_pc = decode_inline[idx].head_resume_pc;
7254 if pre_frames > 0 {
7255 if let CallFrame::Lua(f) = &mut self.frames[pre_frames - 1] {
7256 f.pc = head_resume_pc;
7257 }
7258 }
7259 }
7260 let frames_len_now = self.frames.len();
7261 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7262 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7263 CallFrame::Lua(fmut) => {
7264 if crate::jit::trace::v2c_probe_enabled() {
7265 eprintln!(
7266 "[v2c-set-pc] from_side={} sentinel_or_raw={:#018x} prev_pc={} new_cont_pc={} site_id={} frames.len={} pre_frames={} max_stack={}",
7267 from_side_trace,
7268 raw_ret,
7269 fmut.pc,
7270 cont_pc,
7271 site_id,
7272 frames_len_now,
7273 pre_frames,
7274 max_stack,
7275 );
7276 }
7277 fmut.pc = cont_pc;
7278 }
7279 _ => unreachable!("Cont frame at trace dispatch"),
7280 }
7281 // P15-A v1 — deferred side-trace start. The
7282 // increment block above flagged this exit's
7283 // hit count crossing HOTEXIT_THRESHOLD; now
7284 // that vm.stack is restored and frame.pc is
7285 // settled, snapshot entry_tags from the
7286 // resume frame's window and create the
7287 // recorder. The recorder's first push fires
7288 // on the next interp iteration at cont_pc.
7289 //
7290 // `head_proto` for the side trace = cl.proto
7291 // (trace JIT only inlines self-recursive
7292 // calls today, so cont_pc always lands in
7293 // the same proto as the parent). Frame base
7294 // is the resume frame (top of `self.frames`
7295 // — inline-pushed frames moved this).
7296 if side_trace_should_start {
7297 let (resume_base, resume_proto) = match self.frames.last() {
7298 Some(CallFrame::Lua(f)) => (f.base as usize, f.closure.proto),
7299 _ => (base_us, cl.proto),
7300 };
7301 let resume_max_stack = resume_proto.max_stack as usize;
7302 let mut side_entry_tags: Vec<u8> = Vec::with_capacity(resume_max_stack);
7303 // Extend stack if cont_pc's frame window
7304 // overhangs the current stack len (rare,
7305 // but inline-pushed frame stack writes
7306 // only covered the trace's writeback).
7307 if self.stack.len() < resume_base + resume_max_stack {
7308 self.stack.resize(
7309 resume_base + resume_max_stack,
7310 crate::runtime::Value::Nil,
7311 );
7312 }
7313 for i in 0..resume_max_stack {
7314 let (tag, _) = self.stack[resume_base + i].unpack();
7315 side_entry_tags.push(tag);
7316 }
7317 self.jit.active_trace =
7318 Some(Box::new(crate::jit::trace::TraceRecord::start_side_trace(
7319 resume_proto,
7320 cont_pc,
7321 side_entry_tags,
7322 cl.proto,
7323 head_pc_val,
7324 exit_hit_idx,
7325 )));
7326 self.jit.recording_frame_base = self.frames.len() - 1;
7327 self.jit.counters.side_trace_started += 1;
7328 }
7329 // P13-S13-D — put the dispatch buffers back
7330 // before the `continue;` so the next
7331 // dispatch picks up the same allocation.
7332 self.jit.reg_state_buf = reg_state;
7333 self.jit.entry_tags_buf = entry_tags;
7334 continue;
7335 }
7336 }
7337 // P13-S13-D — !dispatch_ok / deopt path / non-cont
7338 // exit also restore the buffers before falling
7339 // through to the interp.
7340 self.jit.reg_state_buf = reg_state;
7341 self.jit.entry_tags_buf = entry_tags;
7342 }
7343
7344 // PUC `vmfetch` increments savedpc BEFORE firing traceexec, so
7345 // hook code that consults `currentpc = savedpc - 1` lands on the
7346 // instruction now executing. luna mirrors that by advancing
7347 // `f.pc` to `pc + 1` before the hook block — local_at /
7348 // getinfo / line attribution all read f.pc, and the existing
7349 // `pc - 1` convention in those helpers then yields the current
7350 // instruction's pc (db.lua :696: local `A` visible at the
7351 // chunk's return line once OP_CLOSURE has advanced pc).
7352 //
7353 // Inline `top_frame_mut` for the hot path: top is guaranteed Lua
7354 // (cont frames drained above) so the and_then/Option layers are
7355 // dead weight.
7356 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7357 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
7358 CallFrame::Lua(fmut) => fmut.pc = pc + 1,
7359 _ => unreachable!("Cont frame at pc bump"),
7360 }
7361
7362 // count + line hooks (PUC traceexec): before executing the
7363 // instruction. Skipped while the hook itself runs.
7364 // (Parens here are load-bearing — without them `&&` binds tighter
7365 // than `||` and the `!in_hook` guard only gates the rust-hook arm,
7366 // letting a Lua line hook recurse into itself → stack overflow
7367 // on db.lua line-hook assertions. Matches the `hook_call_with` /
7368 // `hook_return` predicate shape at lines 2245 / 2279 / 2294 / 4023.)
7369 if !self.in_hook && (self.hook.func.is_some() || self.hook.rust_func.is_some()) {
7370 let lines = &cl.proto.lines;
7371 let cur_line = if lines.is_empty() {
7372 None
7373 } else {
7374 Some(lines[(pc as usize).min(lines.len() - 1)] as i64)
7375 };
7376 // count hook: fire every `count_base` instructions
7377 if self.hook.count {
7378 self.hook.count_left -= 1;
7379 if self.hook.count_left <= 0 {
7380 self.hook.count_left = self.hook.count_base;
7381 // hooked function is the running Lua frame: its frame
7382 // is on the stack, so no synthetic C level is needed.
7383 self.run_hook(b"count", cur_line, false)?;
7384 }
7385 }
7386 // line hook: fire on a fresh frame, a backward jump (loop), or a
7387 // change of source line.
7388 if self.hook.line {
7389 if lines.is_empty() {
7390 // PUC: a stripped chunk has no line info, so
7391 // `getfuncline` returns -1. The line hook still fires
7392 // on the first instruction of the new frame (where
7393 // `npci <= oldpc` holds at oldpc=0), with the line
7394 // pushed as `nil` instead of an integer (db.lua :1030
7395 // "hook called without debug info for 1st instruction").
7396 if oldpc == u32::MAX {
7397 self.run_hook(b"line", None, false)?;
7398 self.top_frame_mut().hook_oldpc = pc;
7399 }
7400 } else {
7401 let newline = lines[(pc as usize).min(lines.len() - 1)];
7402 // PUC `traceexec`: fire on frame entry (`oldpc == MAX`),
7403 // on a backward jump (`pc < oldpc` — strict; an equal pc
7404 // would re-fire the install-site after `oldpc = pc`),
7405 // or when the source line changes.
7406 let fire = oldpc == u32::MAX
7407 || pc < oldpc
7408 || newline != lines[(oldpc as usize).min(lines.len() - 1)];
7409 if fire {
7410 self.run_hook(b"line", Some(newline as i64), false)?;
7411 }
7412 self.top_frame_mut().hook_oldpc = pc;
7413 }
7414 }
7415 }
7416
7417 match inst.op() {
7418 Op::Move => {
7419 let v = self.r(base, inst.b());
7420 self.set_r(base, inst.a(), v);
7421 }
7422 Op::LoadI => self.set_r(base, inst.a(), Value::Int(inst.sbx() as i64)),
7423 Op::LoadF => self.set_r(base, inst.a(), Value::Float(inst.sbx() as f64)),
7424 Op::LoadK => {
7425 let v = cl.proto.consts[inst.bx() as usize];
7426 self.set_r(base, inst.a(), v);
7427 }
7428 Op::LoadKx => {
7429 let extra = cl.proto.code[self.pc_of_top() as usize];
7430 self.bump_pc();
7431 let v = cl.proto.consts[extra.ax() as usize];
7432 self.set_r(base, inst.a(), v);
7433 }
7434 Op::LoadFalse => self.set_r(base, inst.a(), Value::Bool(false)),
7435 Op::LFalseSkip => {
7436 self.set_r(base, inst.a(), Value::Bool(false));
7437 self.bump_pc();
7438 }
7439 Op::LoadTrue => self.set_r(base, inst.a(), Value::Bool(true)),
7440 Op::LoadNil => {
7441 let a = inst.a();
7442 for i in 0..=inst.b() {
7443 self.set_r(base, a + i, Value::Nil);
7444 }
7445 }
7446 Op::GetUpval => {
7447 let v = self.upval_get(cl, inst.b());
7448 self.set_r(base, inst.a(), v);
7449 }
7450 Op::SetUpval => {
7451 let v = self.r(base, inst.a());
7452 self.upval_set(cl, inst.b(), v);
7453 }
7454 Op::GetTabUp => {
7455 let t = self.upval_get(cl, inst.b());
7456 let key = cl.proto.consts[inst.c() as usize];
7457 self.op_index(t, key, base + inst.a())?;
7458 }
7459 Op::GetTable => {
7460 let t = self.r(base, inst.b());
7461 let key = self.r(base, inst.c());
7462 self.op_index(t, key, base + inst.a())?;
7463 }
7464 Op::GetI => {
7465 let t = self.r(base, inst.b());
7466 self.op_index(t, Value::Int(inst.c() as i64), base + inst.a())?;
7467 }
7468 Op::GetField => {
7469 let t = self.r(base, inst.b());
7470 let key = cl.proto.consts[inst.c() as usize];
7471 // v1.2 D4 A1 — fast path: known-Str const key + no
7472 // metatable on the table → skip `op_index` /
7473 // `index_step`'s MAX_TAG_LOOP setup and the outer
7474 // `Value` match. Falls through to the slow path
7475 // unchanged when either invariant breaks (so
7476 // `__index` metamethods, non-Table receivers, and
7477 // non-Str keys behave exactly as before).
7478 if let Value::Table(tb) = t
7479 && tb.metatable().is_none()
7480 && let Value::Str(s) = key
7481 {
7482 let v = tb.get_str(s);
7483 self.stack[(base + inst.a()) as usize] = v;
7484 } else {
7485 self.op_index(t, key, base + inst.a())?;
7486 }
7487 }
7488 Op::SetTabUp => {
7489 let t = self.upval_get(cl, inst.a());
7490 let key = cl.proto.consts[inst.b() as usize];
7491 let v = self.r(base, inst.c());
7492 self.op_newindex(t, key, v)?;
7493 }
7494 Op::SetTable => {
7495 let t = self.r(base, inst.a());
7496 let key = self.r(base, inst.b());
7497 let v = self.r(base, inst.c());
7498 self.op_newindex(t, key, v)?;
7499 }
7500 Op::SetI => {
7501 let t = self.r(base, inst.a());
7502 let v = self.r(base, inst.c());
7503 self.op_newindex(t, Value::Int(inst.b() as i64), v)?;
7504 }
7505 Op::SetField => {
7506 let t = self.r(base, inst.a());
7507 let key = cl.proto.consts[inst.b() as usize];
7508 let v = self.r(base, inst.c());
7509 self.op_newindex(t, key, v)?;
7510 }
7511 Op::NewTable => {
7512 let t = self.heap.new_table();
7513 self.set_r(base, inst.a(), Value::Table(t));
7514 self.maybe_collect_garbage(base + inst.a() + 1);
7515 }
7516 Op::SetList => {
7517 let a = inst.a();
7518 let abs_a = base + a;
7519 let n = if inst.b() == 0 {
7520 self.top - (abs_a + 1)
7521 } else {
7522 inst.b()
7523 };
7524 let offset = if inst.k() {
7525 let extra = cl.proto.code[self.pc_of_top() as usize];
7526 self.bump_pc();
7527 extra.ax() as i64
7528 } else {
7529 inst.c() as i64
7530 };
7531 let Value::Table(t) = self.r(base, a) else {
7532 unreachable!("SETLIST on non-table");
7533 };
7534 for i in 1..=n {
7535 let v = self.r(base, a + i);
7536 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
7537 if let Err(TableError::Overflow) =
7538 unsafe { t.as_mut() }.set_int(&mut self.heap, offset + i as i64, v)
7539 {
7540 return Err(self.rt_err("table overflow"));
7541 }
7542 }
7543 // one barrier_back covers every store this op did — PUC's
7544 // `luaC_barrierback_` once-per-table optimisation
7545 self.heap
7546 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
7547 // the element temps above the table are now consumed
7548 self.maybe_collect_garbage(base + a + 1);
7549 }
7550 Op::SelfOp => {
7551 let o = self.r(base, inst.b());
7552 self.set_r(base, inst.a() + 1, o);
7553 // PUC OP_SELF's C is a constant index when the k-flag is
7554 // set; otherwise it points to a register that holds the
7555 // (constant-loaded) key. luna's compiler falls back to the
7556 // register form when the constant index exceeds OP_SELF's
7557 // 8-bit C field (5.1 big.lua's `a:findfield(...)` against
7558 // a table with 250+ string keys, where "findfield" lands
7559 // past const #255). The exec must honour the same split.
7560 let key = if inst.k() {
7561 cl.proto.consts[inst.c() as usize]
7562 } else {
7563 self.r(base, inst.c())
7564 };
7565 self.op_index(o, key, base + inst.a())?;
7566 }
7567 Op::Add => self.arith_rr(inst, base, ArithOp::Add)?,
7568 Op::Sub => self.arith_rr(inst, base, ArithOp::Sub)?,
7569 Op::Mul => self.arith_rr(inst, base, ArithOp::Mul)?,
7570 Op::Mod => self.arith_rr(inst, base, ArithOp::Mod)?,
7571 Op::Pow => self.arith_rr(inst, base, ArithOp::Pow)?,
7572 Op::Div => self.arith_rr(inst, base, ArithOp::Div)?,
7573 Op::IDiv => self.arith_rr(inst, base, ArithOp::IDiv)?,
7574 Op::BAnd => self.arith_rr(inst, base, ArithOp::BAnd)?,
7575 Op::BOr => self.arith_rr(inst, base, ArithOp::BOr)?,
7576 Op::BXor => self.arith_rr(inst, base, ArithOp::BXor)?,
7577 Op::Shl => self.arith_rr(inst, base, ArithOp::Shl)?,
7578 Op::Shr => self.arith_rr(inst, base, ArithOp::Shr)?,
7579 Op::Unm => {
7580 let v = self.r(base, inst.b());
7581 match coerce_num(v) {
7582 Some(Num::Int(i)) => {
7583 self.set_r(base, inst.a(), Value::Int(i.wrapping_neg()))
7584 }
7585 Some(Num::Float(f)) => self.set_r(base, inst.a(), Value::Float(-f)),
7586 None => {
7587 let mm = self.get_mm(v, Mm::Unm);
7588 if mm.is_nil() {
7589 return Err(self.type_err("perform arithmetic on", v));
7590 }
7591 let dst = base + inst.a();
7592 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "unm")?;
7593 }
7594 }
7595 }
7596 Op::BNot => {
7597 let v = self.r(base, inst.b());
7598 match coerce_num(v) {
7599 Some(n) => {
7600 let i = self.int_from_num(n)?;
7601 self.set_r(base, inst.a(), Value::Int(!i));
7602 }
7603 None => {
7604 let mm = self.get_mm(v, Mm::BNot);
7605 if mm.is_nil() {
7606 return Err(self.type_err("perform bitwise operation on", v));
7607 }
7608 let dst = base + inst.a();
7609 self.begin_meta_call(mm, &[v, v], MetaAction::Store { dst }, "bnot")?;
7610 }
7611 }
7612 }
7613 Op::Not => {
7614 let v = self.r(base, inst.b());
7615 self.set_r(base, inst.a(), Value::Bool(!v.truthy()));
7616 }
7617 Op::Len => {
7618 let v = self.r(base, inst.b());
7619 match self.len_step(v)? {
7620 MmOut::Done(r) => self.set_r(base, inst.a(), r),
7621 MmOut::Mm { func, recv } => {
7622 let dst = base + inst.a();
7623 self.begin_meta_call(
7624 func,
7625 &[recv, recv],
7626 MetaAction::Store { dst },
7627 "len",
7628 )?;
7629 }
7630 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
7631 }
7632 }
7633 Op::Concat => {
7634 // right-associative fold over operands at base+a .. base+a+n,
7635 // in place on the stack so a yielding __concat can suspend.
7636 let a = inst.a();
7637 let n = inst.b();
7638 self.top = base + a + n;
7639 self.concat_run(base + a)?;
7640 }
7641 Op::Close => {
7642 // Yieldable: drive __close handlers through the
7643 // interpreter loop so a coroutine.yield() inside a
7644 // handler suspends cleanly (locals.lua block-end yield).
7645 // `drive_close` parks the handler call at `self.top`, so
7646 // raise `top` past this frame's full register window
7647 // first — a goto out of a nested for-loop can fire
7648 // OP_Close while `self.top` still sits at the inner
7649 // body's working top, which would let `push_frame`'s
7650 // wipe clobber the outer tbc slot before it could be
7651 // closed (locals.lua:1219 nested-for goto regression).
7652 self.top = self.top.max(base + cl.proto.max_stack as u32);
7653 let _ =
7654 self.begin_close(base + inst.a(), None, AfterClose::Block, entry_depth)?;
7655 }
7656 Op::Tbc => {
7657 self.register_tbc(base + inst.a())?;
7658 }
7659 Op::Jmp => {
7660 let off = inst.sj();
7661 // P12-S1.B — trace JIT back-edge counter. A negative
7662 // jump offset is a loop back-edge (the only canonical
7663 // backward jumps the compiler emits — `while`, `for`,
7664 // `repeat`). Tick the per-Proto counter and, once it
7665 // exceeds the threshold, log a stub promotion that
7666 // S1.C will turn into actual trace recording. The
7667 // whole block is gated on `trace_jit_enabled` so
7668 // existing benches see one branch-not-taken and no
7669 // counter writes.
7670 if self.jit.trace_enabled && off < 0 {
7671 let proto = cl.proto;
7672 let c = proto.trace_hot_count.get();
7673 if c < u32::MAX / 2 {
7674 proto.trace_hot_count.set(c + 1);
7675 }
7676 // P13-S13-H — relaxed back-edge trigger:
7677 // `c >= THRESHOLD` (was `c == THRESHOLD`) so
7678 // a missed crossing (active_trace busy with
7679 // a call-trigger, or the recorder slot
7680 // happened to be in use) doesn't permanently
7681 // lock this back-edge target out. The
7682 // `already_cached` short-circuit prevents
7683 // duplicate recordings: once a trace is
7684 // cached for this target, subsequent
7685 // crossings skip the start. This pairs with
7686 // S13-H's discard-on-partial-coverage close
7687 // handling — when a short call-trigger is
7688 // discarded, the back-edge can still find an
7689 // open slot at the next iteration.
7690 let target_pc = (pc as i32 + 1 + off as i32).max(0) as u32;
7691 // P13-S13-K — gave-up short-circuit. Skip
7692 // the RefCell borrow + scan when the
7693 // S13-I cap force-compiled a partial
7694 // trace on this Proto.
7695 let back_edge_already_cached = if proto.trace_gave_up.get() {
7696 true
7697 } else {
7698 proto.traces.borrow().iter().any(|t| t.head_pc == target_pc)
7699 };
7700 if c >= crate::jit::trace::TRACE_HOT_THRESHOLD
7701 && self.jit.active_trace.is_none()
7702 && !back_edge_already_cached
7703 {
7704 // Back-edge target = pc after `add_pc(off)`,
7705 // i.e. current `pc + 1 + off` (the dispatch
7706 // loop has already advanced f.pc to pc+1).
7707 let target = (pc as i32 + 1 + off as i32).max(0) as u32;
7708 // Snapshot per-slot Value tag at trace
7709 // entry so the lowerer's kind tracker
7710 // knows which arith path to lower
7711 // (iadd vs fadd, etc.).
7712 let max_stack = cl.proto.max_stack as usize;
7713 let base_us = base as usize;
7714 let mut entry_tags = Vec::with_capacity(max_stack);
7715 for i in 0..max_stack {
7716 let (tag, _) = self.stack[base_us + i].unpack();
7717 entry_tags.push(tag);
7718 }
7719 self.jit.active_trace =
7720 Some(Box::new(crate::jit::trace::TraceRecord::start(
7721 cl.proto, target, entry_tags, false,
7722 )));
7723 // P12-S4 — record the frame the trace
7724 // started in. `self.frames.len() - 1`
7725 // since we're inside the currently-running
7726 // Lua frame's dispatch.
7727 self.jit.recording_frame_base = self.frames.len() - 1;
7728 }
7729 }
7730 self.add_pc(off);
7731 }
7732 Op::Eq => {
7733 let l = self.r(base, inst.a());
7734 let r = self.r(base, inst.b());
7735 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7736 if (a == b) != inst.k() {
7737 self.bump_pc();
7738 }
7739 } else {
7740 let step = self.eq_step(l, r);
7741 self.op_compare(step, l, r, inst.k(), "eq")?;
7742 }
7743 }
7744 Op::EqK => {
7745 let l = self.r(base, inst.a());
7746 let r = cl.proto.consts[inst.b() as usize];
7747 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7748 if (a == b) != inst.k() {
7749 self.bump_pc();
7750 }
7751 } else {
7752 let step = self.eq_step(l, r);
7753 self.op_compare(step, l, r, inst.k(), "eq")?;
7754 }
7755 }
7756 Op::Lt => {
7757 let l = self.r(base, inst.a());
7758 let r = self.r(base, inst.b());
7759 // hot path: Int < Int — drops the MmOut + op_compare match
7760 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7761 if (a < b) != inst.k() {
7762 self.bump_pc();
7763 }
7764 } else {
7765 let step = self.less_step(l, r, false)?;
7766 self.op_compare(step, l, r, inst.k(), "lt")?;
7767 }
7768 }
7769 Op::Le => {
7770 let l = self.r(base, inst.a());
7771 let r = self.r(base, inst.b());
7772 if let (Value::Int(a), Value::Int(b)) = (l, r) {
7773 if (a <= b) != inst.k() {
7774 self.bump_pc();
7775 }
7776 } else {
7777 let step = self.less_step(l, r, true)?;
7778 self.op_compare(step, l, r, inst.k(), "le")?;
7779 }
7780 }
7781 Op::Test => {
7782 let cond = self.r(base, inst.a()).truthy();
7783 self.cond_skip(cond, inst.k());
7784 }
7785 Op::TestSet => {
7786 let v = self.r(base, inst.b());
7787 if v.truthy() == inst.k() {
7788 self.set_r(base, inst.a(), v);
7789 } else {
7790 self.bump_pc();
7791 }
7792 }
7793 Op::Call => {
7794 let abs = base + inst.a();
7795 let nargs = if inst.b() == 0 {
7796 None
7797 } else {
7798 Some(inst.b() - 1)
7799 };
7800 let wanted = inst.c() as i32 - 1;
7801 self.begin_call(abs, nargs, wanted, false)?;
7802 }
7803 Op::TailCall => {
7804 let fr = *self.top_frame();
7805 let abs = base + inst.a();
7806 let mut nargs = if inst.b() == 0 {
7807 self.top - (abs + 1)
7808 } else {
7809 inst.b() - 1
7810 };
7811 // A tail call pops this frame before begin_call, so a
7812 // non-callable target would lose its name/position. Report
7813 // it now (PUC reads funcname from the still-current ci),
7814 // while the frame is intact, for "(field 'x')"-style info.
7815 let mut func = self.stack[abs as usize];
7816 if !matches!(func, Value::Closure(_) | Value::Native(_))
7817 && self.get_mm(func, Mm::Call).is_nil()
7818 {
7819 return Err(self.call_err(func));
7820 }
7821 // PUC `luaD_pretailcall` resolves a chain of `__call`
7822 // metamethods *in place* before deciding whether to
7823 // collapse this frame. Without that, each __call hop
7824 // would push a fresh Lua frame and a 10000-deep
7825 // tail-recursion through a 100-deep __call chain
7826 // (5.4 calls.lua :172) blows up. Mirror the PUC loop:
7827 // shift args right, install the handler at `abs`, retry.
7828 // Chain depth limit matches the call-site `begin_call`
7829 // version cap (5.5 calls.lua :223 — 15 max, then "too
7830 // long"; 16th wrap fails the call). An infinite
7831 // self-referential `__call` would otherwise spin.
7832 let chain_cap = if self.version >= LuaVersion::Lua55 {
7833 15
7834 } else {
7835 MAX_CCMT
7836 };
7837 let mut chain = 0u32;
7838 while !matches!(func, Value::Closure(_) | Value::Native(_)) {
7839 let mm = self.get_mm(func, Mm::Call);
7840 if mm.is_nil() {
7841 return Err(self.call_err(func));
7842 }
7843 chain += 1;
7844 if chain > chain_cap {
7845 return Err(self.rt_err("'__call' chain too long"));
7846 }
7847 let end = (abs + 1 + nargs) as usize;
7848 if self.stack.len() < end + 1 {
7849 self.stack.resize(end + 1, Value::Nil);
7850 }
7851 for i in (0..=nargs).rev() {
7852 self.stack[(abs + 1 + i) as usize] = self.stack[(abs + i) as usize];
7853 }
7854 self.stack[abs as usize] = mm;
7855 nargs += 1;
7856 self.top = abs + 1 + nargs;
7857 func = mm;
7858 }
7859 // PUC's tail-call collapse is Lua→Lua only. A tail call to
7860 // a C function runs the C function under the *current* Lua
7861 // activation (no frame fold — a C frame has nothing to
7862 // collapse into); after the C function returns, the
7863 // calling Lua function returns those results normally.
7864 // Mirror that: keep our Lua frame on the stack, call the
7865 // target through `begin_call(abs, …)` as a regular call,
7866 // and let the fallback `Op::Return` that the compiler
7867 // emits right after `Op::TailCall` forward the results.
7868 // 5.1 closure.lua :177's `return getfenv()` from inside
7869 // foo needs level 1 to resolve to foo, not to the
7870 // thread's globals fallback that happens when no Lua
7871 // frame is on the stack.
7872 let lua_target = matches!(func, Value::Closure(_));
7873 if lua_target {
7874 self.close_slots(fr.base, None)?;
7875 for i in 0..=nargs {
7876 self.stack[(fr.func_slot + i) as usize] =
7877 self.stack[(abs + i) as usize];
7878 }
7879 // PUC `CIST_TAIL`: the new Lua activation inherits
7880 // the popped frame's tailcalls count plus one for
7881 // this collapse. 5.1 db.lua :372 hammers 30000
7882 // recursive tail calls and expects to see the
7883 // synthetic tail level for every one of them.
7884 self.pending_tailcalls = fr.tailcalls.saturating_add(1);
7885 frames_pop_sync(&mut self.frames, &mut self.frames_top);
7886 if !self.begin_call(fr.func_slot, Some(nargs), fr.nresults, false)?
7887 && self.frames.len() < entry_depth
7888 {
7889 // a native completed what was this function's result
7890 return Ok(self.take_results(fr.func_slot));
7891 }
7892 } else {
7893 // Native (or __call-bearing) target: regular call. The
7894 // results land at `abs..self.top` and the next op (the
7895 // fallback `Op::Return`) forwards them. `wanted = -1`
7896 // because the caller will multret them through Return.
7897 self.begin_call(abs, Some(nargs), -1, false)?;
7898 }
7899 }
7900 Op::Return | Op::Return0 | Op::Return1 => {
7901 let (abs_a, nret) = match inst.op() {
7902 Op::Return0 => (base, 0),
7903 Op::Return1 => (base + inst.a(), 1),
7904 _ => {
7905 let abs_a = base + inst.a();
7906 let nret = if inst.b() == 0 {
7907 self.top - abs_a
7908 } else {
7909 inst.b() - 1
7910 };
7911 (abs_a, nret)
7912 }
7913 };
7914 // close before moving results: __close handlers run above
7915 // the stack top, so the result region [abs_a..abs_a+nret)
7916 // stays intact across any yields the close performs.
7917 // Fixed-count returns may leave `self.top` below the last
7918 // result slot (the compiler does not always re-bump it);
7919 // raise it past the result region so `drive_close` parks
7920 // the handler call *above* — landing at `self.top` would
7921 // otherwise clobber a result with the handler closure.
7922 self.top = self.top.max(abs_a + nret);
7923 if let Some(vals) = self.begin_close(
7924 base,
7925 None,
7926 AfterClose::Return {
7927 abs_a,
7928 nret,
7929 from_native: false,
7930 },
7931 entry_depth,
7932 )? {
7933 return Ok(vals);
7934 }
7935 }
7936 Op::ForPrep => self.for_prep(inst, base)?,
7937 Op::ForLoop => {
7938 // P12 — trace JIT back-edge counter on the
7939 // numeric-for back-edge. ForLoop is always at
7940 // a back-edge position (when it continues);
7941 // for the trace recorder we treat it as the
7942 // close-detection equivalent of `Op::Jmp` with
7943 // negative offset. Counter only ticks when the
7944 // back-edge will actually fire (count > 0 in
7945 // the 5.4+ Int form, comparable predicates in
7946 // pre-5.3 / Float). The cheap check up front
7947 // matches the for_loop helper's branch.
7948 if self.jit.trace_enabled {
7949 let a = inst.a();
7950 let pre53 = self.version() <= LuaVersion::Lua53;
7951 let take_back_edge =
7952 match (self.r(base, a), self.r(base, a + 1), self.r(base, a + 2)) {
7953 (Value::Int(_), Value::Int(count), Value::Int(_)) if !pre53 => {
7954 count > 0
7955 }
7956 (Value::Int(cur), Value::Int(lim), Value::Int(st)) if pre53 => {
7957 let next = cur.wrapping_add(st);
7958 if st > 0 { next <= lim } else { next >= lim }
7959 }
7960 (Value::Float(cur), Value::Float(lim), Value::Float(st)) => {
7961 let next = cur + st;
7962 if st > 0.0 { next <= lim } else { next >= lim }
7963 }
7964 _ => false,
7965 };
7966 if take_back_edge {
7967 let proto = cl.proto;
7968 let c = proto.trace_hot_count.get();
7969 if c < u32::MAX / 2 {
7970 proto.trace_hot_count.set(c + 1);
7971 }
7972 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
7973 && self.jit.active_trace.is_none()
7974 {
7975 // ForLoop's back-edge target = pc
7976 // after `add_pc(-bx)` runs from the
7977 // already-bumped f.pc (= pc + 1).
7978 // So target = (pc + 1) - bx.
7979 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
7980 let max_stack = cl.proto.max_stack as usize;
7981 let base_us = base as usize;
7982 let mut entry_tags = Vec::with_capacity(max_stack);
7983 for i in 0..max_stack {
7984 let (tag, _) = self.stack[base_us + i].unpack();
7985 entry_tags.push(tag);
7986 }
7987 self.jit.active_trace =
7988 Some(Box::new(crate::jit::trace::TraceRecord::start(
7989 cl.proto, target, entry_tags, false,
7990 )));
7991 // P12-S4 — record the frame the trace
7992 // started in. The currently-running
7993 // Lua frame is at len() - 1.
7994 self.jit.recording_frame_base = self.frames.len() - 1;
7995 }
7996 }
7997 }
7998 self.for_loop(inst, base);
7999 }
8000 Op::TForPrep => {
8001 // the 4th control slot is the iterator's closing value
8002 self.register_tbc(base + inst.a() + 3)?;
8003 self.add_pc(inst.bx() as i32);
8004 }
8005 Op::TForCall => {
8006 let abs = base + inst.a();
8007 let need = (abs + 7) as usize;
8008 if self.stack.len() < need {
8009 self.stack.resize(need, Value::Nil);
8010 }
8011 self.stack[(abs + 4) as usize] = self.stack[abs as usize];
8012 self.stack[(abs + 5) as usize] = self.stack[(abs + 1) as usize];
8013 self.stack[(abs + 6) as usize] = self.stack[(abs + 2) as usize];
8014 let nvars = inst.c() as i32;
8015 self.begin_call(abs + 4, Some(2), nvars, false)?;
8016 }
8017 Op::TForLoop => {
8018 let a = inst.a();
8019 let ctrl = self.r(base, a + 4);
8020 if !ctrl.is_nil() {
8021 // P12-S12-B v1 — trace JIT back-edge counter on
8022 // generic-for back-edge. TForLoop sits at the
8023 // tail of `for k,v in expr do ... end`; recorder
8024 // treats it as the close-detection equivalent of
8025 // a negative Op::Jmp. Gate on `take_back_edge`
8026 // (= `ctrl != nil`) so empty-iter loops don't
8027 // pollute hot_count. v1 only adds the trigger;
8028 // whitelist + helper + emit live in v2.
8029 if self.jit.trace_enabled {
8030 let proto = cl.proto;
8031 let c = proto.trace_hot_count.get();
8032 if c < u32::MAX / 2 {
8033 proto.trace_hot_count.set(c + 1);
8034 }
8035 if c == crate::jit::trace::TRACE_HOT_THRESHOLD
8036 && self.jit.active_trace.is_none()
8037 {
8038 // TForLoop back-edge target = pc after
8039 // `add_pc(-bx)` runs from the already-
8040 // bumped f.pc (= pc + 1). So target =
8041 // (pc + 1) - bx, normally landing on
8042 // body_top (the op right after TForPrep).
8043 let target = (pc as i32 + 1 - inst.bx() as i32).max(0) as u32;
8044 let max_stack = cl.proto.max_stack as usize;
8045 let base_us = base as usize;
8046 let mut entry_tags = Vec::with_capacity(max_stack);
8047 for i in 0..max_stack {
8048 let (tag, _) = self.stack[base_us + i].unpack();
8049 entry_tags.push(tag);
8050 }
8051 // P12-S12-B-v5 — snapshot the iter
8052 // fn's address if Native, so the
8053 // lowerer can specialise ipairs into
8054 // inline Table aget IR.
8055 let iter_ptr =
8056 if let Value::Native(n) = self.stack[base_us + a as usize] {
8057 Some(n.f as usize)
8058 } else {
8059 None
8060 };
8061 // P12-S12-C v3 — snapshot R[A+5]'s
8062 // tag (= current iter's val from
8063 // the just-fired TForCall). The v5
8064 // inline aget fast_blk emits a
8065 // runtime guard against this tag;
8066 // mixed-tag arrays deopt rather
8067 // than producing garbage pointers
8068 // through the v2 spill path.
8069 let val_slot = base_us + (a as usize) + 5;
8070 let val_tag = if val_slot < self.stack.len() {
8071 Some(self.stack[val_slot].unpack().0)
8072 } else {
8073 None
8074 };
8075 let mut rec = crate::jit::trace::TraceRecord::start(
8076 cl.proto, target, entry_tags, false,
8077 );
8078 rec.tfor_iter_ptr = iter_ptr;
8079 rec.tfor_val_tag = val_tag;
8080 self.jit.active_trace = Some(Box::new(rec));
8081 self.jit.recording_frame_base = self.frames.len() - 1;
8082 }
8083 }
8084 self.set_r(base, a + 2, ctrl);
8085 self.add_pc(-(inst.bx() as i32));
8086 }
8087 }
8088 Op::Closure => {
8089 let proto = cl.proto.protos[inst.bx() as usize];
8090 let n_ups = proto.upvals.len();
8091 // P11-S5d.M — build upvals on the stack for small
8092 // closures, skipping the per-call Vec/Box alloc
8093 // that closure_alloc's 10k iters pay. INLINE_UPVALS_N
8094 // = 2 covers most Lua source (1 captured local, or
8095 // _ENV + a single capture). Beyond that, fall back
8096 // to a heap Vec.
8097 use crate::runtime::function::INLINE_UPVALS_N;
8098 let mut stack_buf: [std::mem::MaybeUninit<
8099 Gc<crate::runtime::function::Upvalue>,
8100 >; INLINE_UPVALS_N] = [std::mem::MaybeUninit::uninit(); INLINE_UPVALS_N];
8101 let mut heap_buf: Vec<Gc<crate::runtime::function::Upvalue>> = Vec::new();
8102 let use_inline = n_ups <= INLINE_UPVALS_N;
8103 if !use_inline {
8104 heap_buf.reserve_exact(n_ups);
8105 }
8106 for (i, d) in proto.upvals.iter().enumerate() {
8107 let uv = if d.in_stack {
8108 self.find_or_create_upval(base + d.index as u32)
8109 } else {
8110 cl.upvals()[d.index as usize]
8111 };
8112 if use_inline {
8113 stack_buf[i] = std::mem::MaybeUninit::new(uv);
8114 } else {
8115 heap_buf.push(uv);
8116 }
8117 }
8118 // Tiny shim around the two paths so the 5.1 _ENV
8119 // clone + cache check below see one uniform
8120 // `&mut [Gc<Upvalue>]`. The stack_buf slice points
8121 // into the local frame (still valid through the
8122 // rest of this Op::Closure handler).
8123 let ups: &mut [Gc<crate::runtime::function::Upvalue>] = if use_inline {
8124 // SAFETY: the first n_ups slots of stack_buf
8125 // were initialised above; we hand out a slice
8126 // covering exactly them.
8127 unsafe {
8128 std::slice::from_raw_parts_mut(
8129 stack_buf.as_mut_ptr()
8130 as *mut Gc<crate::runtime::function::Upvalue>,
8131 n_ups,
8132 )
8133 }
8134 } else {
8135 &mut heap_buf[..]
8136 };
8137 // PUC 5.1 had per-function environments: every Lua
8138 // function carried its own `env` slot, snapshotted from
8139 // the creating function's env at closure time, so a
8140 // `setfenv` on one closure never bled into a sibling.
8141 // luna models that by giving the 5.1 closure a *fresh*
8142 // closed upvalue for whichever cell holds `_ENV`, seeded
8143 // from the parent's current env value. Only that cell is
8144 // cloned — every other upvalue keeps its open/shared
8145 // identity (so e.g. `local function range(...) ...
8146 // range(...) ... end` still sees its self-reference). 5.2+
8147 // keeps the shared-upval model (and the proto cache that
8148 // depends on it).
8149 let v51 = self.version() <= LuaVersion::Lua51;
8150 if v51 && proto.env_upval_idx != u8::MAX {
8151 let i = proto.env_upval_idx as usize;
8152 let cur = match ups[i].state() {
8153 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
8154 UpvalState::Closed(v) => v,
8155 };
8156 ups[i] = self.heap.new_upvalue(UpvalState::Closed(cur));
8157 }
8158 let ups_slice: &[Gc<crate::runtime::function::Upvalue>] = ups;
8159 // PUC 5.2+ `getcached`: a Proto remembers its last LClosure
8160 // and reuses it when every fresh-upvalue binding still
8161 // points to the same Upvalue object as the cached one.
8162 // That keeps `function() return outer end` repeated in a
8163 // loop comparing equal across iterations (the captured
8164 // outer is a shared open upvalue), while `function()
8165 // return loop_var end` gets a fresh closure each round
8166 // because the loop var is re-created per iteration. PUC
8167 // 5.1 predated the cache, and the per-closure `_ENV`
8168 // clone above would defeat it anyway, so skip it.
8169 let nc = if v51 {
8170 self.heap.new_closure_inline(proto, ups_slice)
8171 } else {
8172 let cached = proto.cache.get().filter(|c| {
8173 c.upvals().len() == ups_slice.len()
8174 && c.upvals()
8175 .iter()
8176 .zip(ups_slice.iter())
8177 .all(|(a, b)| std::ptr::eq(a.as_ptr(), b.as_ptr()))
8178 });
8179 match cached {
8180 Some(c) => c,
8181 None => {
8182 let n = self.heap.new_closure_inline(proto, ups_slice);
8183 proto.cache.set(Some(n));
8184 n
8185 }
8186 }
8187 };
8188 self.set_r(base, inst.a(), Value::Closure(nc));
8189 self.maybe_collect_garbage(base + inst.a() + 1);
8190 }
8191 Op::Vararg => {
8192 let abs_a = base + inst.a();
8193 let wanted = inst.c() as i32 - 1;
8194 // A materialized named vararg lives in func_slot (its writes
8195 // must be visible to `...`); otherwise spread the extra args
8196 // straight off the stack at func_slot+1 .. +n_varargs.
8197 let vt = match self.stack[func_slot as usize] {
8198 Value::Table(t) => Some(t),
8199 _ => None,
8200 };
8201 let n = match vt {
8202 Some(t) => {
8203 let n_key = Value::Str(self.heap.intern(b"n"));
8204 // PUC getnumargs: a named vararg `t.n` set out of the
8205 // integer range [0, INT_MAX/2] is rejected here
8206 match t.get(n_key) {
8207 Value::Int(n) if (n as u64) <= (i32::MAX as u64 / 2) => n as u32,
8208 _ => return Err(self.rt_err("vararg table has no proper 'n'")),
8209 }
8210 }
8211 None => n_varargs,
8212 };
8213 let count = if wanted < 0 { n } else { wanted as u32 };
8214 let need = (abs_a + count) as usize;
8215 if self.stack.len() < need {
8216 self.stack.resize(need, Value::Nil);
8217 }
8218 for i in 0..count {
8219 let v = if i >= n {
8220 Value::Nil
8221 } else if let Some(t) = vt {
8222 t.get_int(i as i64 + 1)
8223 } else {
8224 self.stack[(func_slot + 1 + i) as usize]
8225 };
8226 self.stack[(abs_a + i) as usize] = v;
8227 }
8228 if wanted < 0 {
8229 self.top = abs_a + count;
8230 }
8231 }
8232 Op::GetVarg => {
8233 // materialize the vararg table (PUC table.pack shape) from the
8234 // stack varargs — used when the named vararg is written /
8235 // escapes / is `_ENV`. It is kept BOTH in func_slot (so `...`
8236 // sees later writes) and in the local register R[A].
8237 let n = n_varargs;
8238 let t = self.heap.new_table();
8239 {
8240 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8241 let tm = unsafe { t.as_mut() };
8242 for i in 0..n {
8243 let _ = tm.set_int(
8244 &mut self.heap,
8245 i as i64 + 1,
8246 self.stack[(func_slot + 1 + i) as usize],
8247 );
8248 }
8249 }
8250 let n_key = Value::Str(self.heap.intern(b"n"));
8251 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8252 unsafe { t.as_mut() }
8253 .set(&mut self.heap, n_key, Value::Int(n as i64))
8254 .expect("'n' is a valid key");
8255 // once-per-table barrier (mirror SETLIST): t is born BLACK
8256 // during Propagate; the bulk inserts above don't barrier.
8257 self.heap
8258 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8259 self.stack[func_slot as usize] = Value::Table(t);
8260 self.set_r(base, inst.a(), Value::Table(t));
8261 }
8262 Op::VargIdx => {
8263 // R[A] := vararg[R[C]] without allocating: integer key in
8264 // [1,n] → that vararg, "n" → the count, else nil.
8265 let key = self.r(base, inst.c());
8266 let n = n_varargs;
8267 let v = match key {
8268 Value::Int(k) if k >= 1 && (k as u64) <= n as u64 => {
8269 self.stack[(func_slot + k as u32) as usize]
8270 }
8271 Value::Float(f) if f.fract() == 0.0 && f >= 1.0 && f <= n as f64 => {
8272 self.stack[(func_slot + f as u32) as usize]
8273 }
8274 Value::Str(s) if s.as_bytes() == b"n" => Value::Int(n as i64),
8275 _ => Value::Nil,
8276 };
8277 self.set_r(base, inst.a(), v);
8278 }
8279 Op::ErrNNil => {
8280 let v = self.r(base, inst.a());
8281 if !matches!(v, Value::Nil) {
8282 let bx = inst.bx();
8283 let name = if bx == 0 {
8284 "?".to_string()
8285 } else {
8286 match cl.proto.consts[(bx - 1) as usize] {
8287 Value::Str(s) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
8288 _ => "?".to_string(),
8289 }
8290 };
8291 return Err(self.rt_err(&format!("global '{name}' already defined")));
8292 }
8293 }
8294 Op::ExtraArg => unreachable!("EXTRAARG executed directly"),
8295 }
8296 }
8297 }
8298
8299 #[inline(always)]
8300 fn pc_of_top(&self) -> u32 {
8301 self.top_frame().pc
8302 }
8303
8304 #[inline(always)]
8305 fn bump_pc(&mut self) {
8306 // Inline `top_frame_mut`: top is guaranteed Lua (continuation frames
8307 // drained at dispatch loop head). Avoids the and_then/lua_mut Option
8308 // layers — bump_pc fires per Jmp / cond_skip miss, so the savings add
8309 // up over `fib_28`'s ~500k jumps.
8310 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8311 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8312 CallFrame::Lua(f) => f.pc += 1,
8313 _ => unreachable!("Cont frame at bump_pc"),
8314 }
8315 }
8316
8317 #[inline(always)]
8318 fn add_pc(&mut self, d: i32) {
8319 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8320 match unsafe { self.frames.last_mut().unwrap_unchecked() } {
8321 CallFrame::Lua(f) => f.pc = (f.pc as i64 + d as i64) as u32,
8322 _ => unreachable!("Cont frame at add_pc"),
8323 }
8324 }
8325
8326 /// PUC conditional-skip convention: the JMP that follows is executed when
8327 /// `cond == k`; otherwise it is skipped.
8328 #[inline(always)]
8329 fn cond_skip(&mut self, cond: bool, k: bool) {
8330 if cond != k {
8331 self.bump_pc();
8332 }
8333 }
8334
8335 // ---- indexing (with __index/__newindex chains) ----
8336
8337 /// The `#` length operation: string byte length, `__len` if present, else
8338 /// the raw table border. Returns the raw length value (may be non-integer
8339 /// when `__len` is exotic).
8340 pub(crate) fn len_value(&mut self, v: Value) -> Result<Value, LuaError> {
8341 match self.len_step(v)? {
8342 MmOut::Done(n) => Ok(n),
8343 // PUC calls unary metamethods with the operand twice
8344 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, recv]),
8345 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from len_step"),
8346 }
8347 }
8348
8349 /// Length fast path: a string's byte count or a table's raw border when no
8350 /// `__len` is present (`Done`); otherwise the `__len` metamethod (`Mm`),
8351 /// called with the operand twice. Errors for a non-table with no `__len`.
8352 fn len_step(&mut self, v: Value) -> Result<MmOut, LuaError> {
8353 match v {
8354 Value::Str(s) => Ok(MmOut::Done(Value::Int(s.len() as i64))),
8355 Value::Table(t) => {
8356 let mm = self.get_mm(v, Mm::Len);
8357 if mm.is_nil() {
8358 Ok(MmOut::Done(Value::Int(t.len())))
8359 } else {
8360 Ok(MmOut::Mm { func: mm, recv: v })
8361 }
8362 }
8363 _ => {
8364 let mm = self.get_mm(v, Mm::Len);
8365 if mm.is_nil() {
8366 Err(self.type_err("get length of", v))
8367 } else {
8368 Ok(MmOut::Mm { func: mm, recv: v })
8369 }
8370 }
8371 }
8372 }
8373
8374 /// PUC luaL_len: the length as an integer, erroring if `__len` returned a
8375 /// value with no integer representation.
8376 pub(crate) fn checked_len(&mut self, v: Value) -> Result<i64, LuaError> {
8377 match self.len_value(v)? {
8378 Value::Int(i) => Ok(i),
8379 Value::Float(f) => crate::runtime::value::f2i_exact(f)
8380 .ok_or_else(|| self.rt_err("object length is not an integer")),
8381 _ => Err(self.rt_err("object length is not an integer")),
8382 }
8383 }
8384
8385 pub(crate) fn index_value(&mut self, t: Value, key: Value) -> Result<Value, LuaError> {
8386 match self.index_step(t, key)? {
8387 MmOut::Done(v) => Ok(v),
8388 MmOut::Mm { func, recv } => self.call_mm1(func, &[recv, key]),
8389 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from index_step"),
8390 }
8391 }
8392
8393 /// Resolve `t[key]` through the `__index` chain, stopping at the first raw
8394 /// hit (`Done`) or function metamethod (`Mm`). Table-valued `__index` links
8395 /// are followed inline (no yield possible); only a function link can yield.
8396 fn index_step(&mut self, t: Value, key: Value) -> Result<MmOut, LuaError> {
8397 let mut cur = t;
8398 for _ in 0..MAX_TAG_LOOP {
8399 let mm = match cur {
8400 Value::Table(tb) => {
8401 let v = tb.get(key);
8402 if !v.is_nil() {
8403 return Ok(MmOut::Done(v));
8404 }
8405 let mm = self.get_mm(cur, Mm::Index);
8406 if mm.is_nil() {
8407 return Ok(MmOut::Done(Value::Nil));
8408 }
8409 mm
8410 }
8411 v => {
8412 let mm = self.get_mm(v, Mm::Index);
8413 if mm.is_nil() {
8414 return Err(self.type_err("index", v));
8415 }
8416 mm
8417 }
8418 };
8419 match mm {
8420 Value::Closure(_) | Value::Native(_) => {
8421 return Ok(MmOut::Mm {
8422 func: mm,
8423 recv: cur,
8424 });
8425 }
8426 next => cur = next,
8427 }
8428 }
8429 Err(self.rt_err("'__index' chain too long; possible loop"))
8430 }
8431
8432 pub(crate) fn newindex_value(
8433 &mut self,
8434 t: Value,
8435 key: Value,
8436 v: Value,
8437 ) -> Result<(), LuaError> {
8438 match self.newindex_step(t, key, v)? {
8439 MmOut::Done(_) => Ok(()),
8440 MmOut::Mm { func, recv } => {
8441 self.call_value(func, &[recv, key, v])?;
8442 Ok(())
8443 }
8444 MmOut::CompareSynth { .. } => unreachable!("CompareSynth from newindex_step"),
8445 }
8446 }
8447
8448 /// Resolve `t[key] = v` through the `__newindex` chain. A raw assignment is
8449 /// performed inline (returning `Done`); only a function metamethod (`Mm`)
8450 /// needs an actual call — which the caller may run yieldably.
8451 fn newindex_step(&mut self, t: Value, key: Value, v: Value) -> Result<MmOut, LuaError> {
8452 let mut cur = t;
8453 for _ in 0..MAX_TAG_LOOP {
8454 let mm = match cur {
8455 Value::Table(tb) => {
8456 // PI-A3 single-walk collapse — Table::try_set_existing
8457 // fuses the prior `tb.get(key).is_nil()` gate and
8458 // `raw_set` walk into one chain traversal when the
8459 // key is already present with a non-nil value. The
8460 // __newindex chain semantics are preserved by the
8461 // identity (slot_nil ⇔ fire_newindex); see
8462 // .dev/rfcs/v2.0-pi-phase2-a3-audit.md §4.
8463 //
8464 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the
8465 // heap is single-threaded and the pointer is live as
8466 // long as it is reachable from active roots (see
8467 // heap.rs:5-7). Mirrors the raw_set wrapper below.
8468 if unsafe { tb.as_mut() }.try_set_existing(key, v) {
8469 self.heap
8470 .barrier_back(tb.as_ptr() as *mut crate::runtime::heap::GcHeader);
8471 return Ok(MmOut::Done(Value::Nil));
8472 }
8473 let mm = self.get_mm(cur, Mm::NewIndex);
8474 if mm.is_nil() {
8475 self.raw_set(tb, key, v)?;
8476 return Ok(MmOut::Done(Value::Nil));
8477 }
8478 mm
8479 }
8480 bad => {
8481 let mm = self.get_mm(bad, Mm::NewIndex);
8482 if mm.is_nil() {
8483 return Err(self.type_err("index", bad));
8484 }
8485 mm
8486 }
8487 };
8488 match mm {
8489 Value::Closure(_) | Value::Native(_) => {
8490 return Ok(MmOut::Mm {
8491 func: mm,
8492 recv: cur,
8493 });
8494 }
8495 next => cur = next,
8496 }
8497 }
8498 Err(self.rt_err("'__newindex' chain too long; possible loop"))
8499 }
8500
8501 fn raw_set(&mut self, t: Gc<Table>, key: Value, v: Value) -> Result<(), LuaError> {
8502 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
8503 match unsafe { t.as_mut() }.set(&mut self.heap, key, v) {
8504 Ok(()) => {
8505 self.heap
8506 .barrier_back(t.as_ptr() as *mut crate::runtime::heap::GcHeader);
8507 Ok(())
8508 }
8509 Err(TableError::NilIndex) => Err(self.rt_err("table index is nil")),
8510 Err(TableError::NanIndex) => Err(self.rt_err("table index is NaN")),
8511 Err(TableError::Overflow) => Err(self.rt_err("table overflow")),
8512 Err(TableError::InvalidNext) => unreachable!(),
8513 }
8514 }
8515
8516 /// Decide equality, or surface the `__eq` metamethod to call. `Done` carries
8517 /// the boolean result; `Mm` (when raw equality fails and both are tables
8518 /// with an `__eq`) carries the metamethod — called with `(l, r)`.
8519 fn eq_step(&mut self, l: Value, r: Value) -> MmOut {
8520 if l.raw_eq(r) {
8521 return MmOut::Done(Value::Bool(true));
8522 }
8523 if let (Value::Table(_), Value::Table(_)) | (Value::Userdata(_), Value::Userdata(_)) =
8524 (l, r)
8525 {
8526 // PUC 5.2+ accepts any `__eq` reachable from either operand; 5.1
8527 // (and earlier) required the two operands' metatables to expose a
8528 // matching `__eq` (`get_compTM`) — `c == d` where `d` has no
8529 // metatable falls straight back to raw inequality. events.lua 5.1
8530 // :262 bakes this in.
8531 let mm = if self.version() <= LuaVersion::Lua51 {
8532 self.get_comp_mm(l, r, Mm::Eq)
8533 } else {
8534 let mut m = self.get_mm(l, Mm::Eq);
8535 if m.is_nil() {
8536 m = self.get_mm(r, Mm::Eq);
8537 }
8538 m
8539 };
8540 if !mm.is_nil() {
8541 return MmOut::Mm { func: mm, recv: l };
8542 }
8543 }
8544 MmOut::Done(Value::Bool(false))
8545 }
8546
8547 // ---- arithmetic ----
8548
8549 #[inline(always)]
8550 fn arith_rr(&mut self, inst: Inst, base: u32, op: ArithOp) -> Result<(), LuaError> {
8551 let l = self.r(base, inst.b());
8552 let r = self.r(base, inst.c());
8553 // hot path: Int + Int for Add / Sub / Mul — fib_28, loop_int_1m,
8554 // binary_trees all hammer these. Skipping coerce_num + the big
8555 // arith_fast match shaves several conditional moves per op.
8556 if let (Value::Int(a), Value::Int(b)) = (l, r) {
8557 let fast = match op {
8558 ArithOp::Add => Some(Value::Int(a.wrapping_add(b))),
8559 ArithOp::Sub => Some(Value::Int(a.wrapping_sub(b))),
8560 ArithOp::Mul => Some(Value::Int(a.wrapping_mul(b))),
8561 _ => None,
8562 };
8563 if let Some(v) = fast {
8564 self.set_r(base, inst.a(), v);
8565 return Ok(());
8566 }
8567 }
8568 // hot path: Float + Float for Add / Sub / Mul / Div — math_loop_100k
8569 // and any numeric workload with non-integer accumulators benefits.
8570 if let (Value::Float(a), Value::Float(b)) = (l, r) {
8571 let fast = match op {
8572 ArithOp::Add => Some(Value::Float(a + b)),
8573 ArithOp::Sub => Some(Value::Float(a - b)),
8574 ArithOp::Mul => Some(Value::Float(a * b)),
8575 ArithOp::Div => Some(Value::Float(a / b)),
8576 _ => None,
8577 };
8578 if let Some(v) = fast {
8579 self.set_r(base, inst.a(), v);
8580 return Ok(());
8581 }
8582 }
8583 match self.arith_fast(op, l, r)? {
8584 Some(v) => self.set_r(base, inst.a(), v),
8585 None => {
8586 let mm = self.arith_mm_func(op, l, r)?;
8587 let dst = base + inst.a();
8588 self.begin_meta_call(mm, &[l, r], MetaAction::Store { dst }, op.mm_name())?;
8589 }
8590 }
8591 Ok(())
8592 }
8593
8594 /// Fast path for an arithmetic/bitwise op: `Ok(Some(v))` when computed
8595 /// directly, `Ok(None)` when a metamethod is required (the caller decides
8596 /// whether to call it synchronously or yieldably).
8597 fn arith_fast(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
8598 use ArithOp::*;
8599 match op {
8600 BAnd | BOr | BXor | Shl | Shr => {
8601 // strings coerce for bitwise too (PUC tointegerns via cvt2num)
8602 match (coerce_num(l), coerce_num(r)) {
8603 (Some(a), Some(b)) => {
8604 let to_int = |n: Num| match n {
8605 Num::Int(i) => Some(i),
8606 Num::Float(f) => crate::runtime::value::f2i_exact(f),
8607 };
8608 let (Some(a), Some(b)) = (to_int(a), to_int(b)) else {
8609 // PUC luaG_tointerror: name the offending operand
8610 return Err(self.no_int_rep_err());
8611 };
8612 let v = match op {
8613 BAnd => a & b,
8614 BOr => a | b,
8615 BXor => a ^ b,
8616 Shl => shift_left(a, b),
8617 Shr => shift_left(a, b.wrapping_neg()),
8618 _ => unreachable!(),
8619 };
8620 return Ok(Some(Value::Int(v)));
8621 }
8622 _ => return Ok(None),
8623 }
8624 }
8625 _ => {}
8626 }
8627 let (ln, rn) = match (coerce_num(l), coerce_num(r)) {
8628 (Some(a), Some(b)) => (a, b),
8629 _ => return Ok(None),
8630 };
8631 let v = match (op, ln, rn) {
8632 (Add, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_add(b)),
8633 (Sub, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_sub(b)),
8634 (Mul, Num::Int(a), Num::Int(b)) => Value::Int(a.wrapping_mul(b)),
8635 (IDiv, Num::Int(a), Num::Int(b)) => {
8636 if b == 0 {
8637 return Err(self.rt_err("attempt to divide by zero"));
8638 }
8639 let mut q = a.wrapping_div(b);
8640 if (a ^ b) < 0 && q.wrapping_mul(b) != a {
8641 q -= 1;
8642 }
8643 Value::Int(q)
8644 }
8645 (Mod, Num::Int(a), Num::Int(b)) => {
8646 if b == 0 {
8647 return Err(self.rt_err("attempt to perform 'n%0'"));
8648 }
8649 let mut m = a.wrapping_rem(b);
8650 if m != 0 && (m ^ b) < 0 {
8651 m += b;
8652 }
8653 Value::Int(m)
8654 }
8655 (Add, a, b) => Value::Float(a.as_f64() + b.as_f64()),
8656 (Sub, a, b) => Value::Float(a.as_f64() - b.as_f64()),
8657 (Mul, a, b) => Value::Float(a.as_f64() * b.as_f64()),
8658 (Div, a, b) => Value::Float(a.as_f64() / b.as_f64()),
8659 (Pow, a, b) => Value::Float(a.as_f64().powf(b.as_f64())),
8660 (IDiv, a, b) => Value::Float((a.as_f64() / b.as_f64()).floor()),
8661 (Mod, a, b) => {
8662 let (x, y) = (a.as_f64(), b.as_f64());
8663 // PUC luai_nummod: correct fmod's sign without the `m*y`
8664 // product, which underflows to 0 for tiny denormals
8665 let mut m = x % y;
8666 if (m > 0.0 && y < 0.0) || (m < 0.0 && y > 0.0) {
8667 m += y;
8668 }
8669 Value::Float(m)
8670 }
8671 _ => unreachable!(),
8672 };
8673 Ok(Some(v))
8674 }
8675
8676 pub(crate) fn int_from(&mut self, v: Value, what: &str) -> Result<i64, LuaError> {
8677 match v {
8678 Value::Int(i) => Ok(i),
8679 Value::Float(f) => match crate::runtime::value::f2i_exact(f) {
8680 Some(i) => Ok(i),
8681 None => Err(self.rt_err("number has no integer representation")),
8682 },
8683 v => Err(self.type_err(what, v)),
8684 }
8685 }
8686
8687 fn int_from_num(&mut self, n: Num) -> Result<i64, LuaError> {
8688 match n {
8689 Num::Int(i) => Ok(i),
8690 Num::Float(f) => match crate::runtime::value::f2i_exact(f) {
8691 Some(i) => Ok(i),
8692 None => Err(self.rt_err("number has no integer representation")),
8693 },
8694 }
8695 }
8696
8697 /// Find the arithmetic/bitwise metamethod (left operand first), or raise the
8698 /// PUC type error when neither operand provides one.
8699 fn arith_mm_func(&mut self, op: ArithOp, l: Value, r: Value) -> Result<Value, LuaError> {
8700 use ArithOp::*;
8701 let event = match op {
8702 Add => Mm::Add,
8703 Sub => Mm::Sub,
8704 Mul => Mm::Mul,
8705 Div => Mm::Div,
8706 Mod => Mm::Mod,
8707 Pow => Mm::Pow,
8708 IDiv => Mm::IDiv,
8709 BAnd => Mm::BAnd,
8710 BOr => Mm::BOr,
8711 BXor => Mm::BXor,
8712 Shl => Mm::Shl,
8713 Shr => Mm::Shr,
8714 };
8715 let mut mm = self.get_mm(l, event);
8716 if mm.is_nil() {
8717 mm = self.get_mm(r, event);
8718 }
8719 if mm.is_nil() {
8720 let what = if matches!(op, BAnd | BOr | BXor | Shl | Shr) {
8721 "perform bitwise operation on"
8722 } else {
8723 "perform arithmetic on"
8724 };
8725 let bad = if coerce_num(l).is_none() { l } else { r };
8726 return Err(self.type_err(what, bad));
8727 }
8728 Ok(mm)
8729 }
8730
8731 // ---- comparison ----
8732
8733 pub(crate) fn less_than(&mut self, l: Value, r: Value, or_eq: bool) -> Result<bool, LuaError> {
8734 match self.less_step(l, r, or_eq)? {
8735 MmOut::Done(v) => Ok(v.truthy()),
8736 MmOut::Mm { func, .. } => Ok(self.call_mm1(func, &[l, r])?.truthy()),
8737 MmOut::CompareSynth { func } => {
8738 // ≤5.3 `__le` via `not __lt(r, l)`. Synchronous helper used
8739 // by library code (sort comparator etc.) — no yield expected
8740 // here (a yield would have hit `call_noyield`'s C boundary).
8741 Ok(!self.call_mm1(func, &[r, l])?.truthy())
8742 }
8743 }
8744 }
8745
8746 /// Decide `l < r` / `l <= r`, or surface the `__lt`/`__le` metamethod. `Done`
8747 /// carries the boolean result; `Mm` (for non-number/string operands) carries
8748 /// the metamethod — called with `(l, r)`; raises the PUC compare error when
8749 /// neither operand provides one.
8750 fn less_step(&mut self, l: Value, r: Value, or_eq: bool) -> Result<MmOut, LuaError> {
8751 let b = match (l, r) {
8752 (Value::Int(a), Value::Int(b)) => {
8753 if or_eq {
8754 a <= b
8755 } else {
8756 a < b
8757 }
8758 }
8759 (Value::Float(a), Value::Float(b)) => {
8760 if or_eq {
8761 a <= b
8762 } else {
8763 a < b
8764 }
8765 }
8766 (Value::Int(a), Value::Float(b)) => {
8767 if or_eq {
8768 int_le_float(a, b)
8769 } else {
8770 int_lt_float(a, b)
8771 }
8772 }
8773 (Value::Float(a), Value::Int(b)) => {
8774 if a.is_nan() {
8775 false
8776 } else if or_eq {
8777 !int_lt_float(b, a)
8778 } else {
8779 !int_le_float(b, a)
8780 }
8781 }
8782 (Value::Str(a), Value::Str(b)) => {
8783 let (a, b) = (a.as_bytes(), b.as_bytes());
8784 if or_eq { a <= b } else { a < b }
8785 }
8786 (l, r) => {
8787 let event = if or_eq { Mm::Le } else { Mm::Lt };
8788 // PUC 5.1's `get_compTM` rule applies to ordered comparisons
8789 // too: both operands' metatables must expose the same
8790 // implementation for `__lt` / `__le` to fire. events.lua 5.1
8791 // :262 expects `c < d` (where `d` has no metatable) to error
8792 // with the default "attempt to compare two table values"
8793 // rather than running c's `__lt` blindly.
8794 let mm = if self.version() <= LuaVersion::Lua51 {
8795 self.get_comp_mm(l, r, event)
8796 } else {
8797 let mut m = self.get_mm(l, event);
8798 if m.is_nil() {
8799 m = self.get_mm(r, event);
8800 }
8801 m
8802 };
8803 // PUC ≤5.3: `a <= b` falls back to `not (b < a)` when neither
8804 // operand carries `__le`. 5.4 dropped the synthesis (now
8805 // requires an explicit `__le`). events.lua 5.2/5.3 :172 relies
8806 // on the synthesis — its metatable defines only `__lt`.
8807 // The fallback calls `__lt(r, l)` synchronously (the suite's
8808 // `__lt` doesn't yield) and negates the result; the yieldable
8809 // `__lt` path stays reserved for the explicit `<` operator.
8810 if mm.is_nil() && or_eq && self.version <= crate::version::LuaVersion::Lua53 {
8811 let lt = Mm::Lt;
8812 let mut mm_lt = self.get_mm(l, lt);
8813 if mm_lt.is_nil() {
8814 mm_lt = self.get_mm(r, lt);
8815 }
8816 if !mm_lt.is_nil() {
8817 return Ok(MmOut::CompareSynth { func: mm_lt });
8818 }
8819 }
8820 if mm.is_nil() {
8821 // PUC luaG_ordererror: "two X values" when the operand
8822 // types match, "X with Y" otherwise (objtypename-aware).
8823 let (t1, t2) = (self.obj_typename(l), self.obj_typename(r));
8824 return Err(self.rt_err(&if t1 == t2 {
8825 format!("attempt to compare two {t1} values")
8826 } else {
8827 format!("attempt to compare {t1} with {t2}")
8828 }));
8829 }
8830 return Ok(MmOut::Mm { func: mm, recv: l });
8831 }
8832 };
8833 Ok(MmOut::Done(Value::Bool(b)))
8834 }
8835
8836 // ---- numeric for ----
8837
8838 fn for_prep(&mut self, inst: Inst, base: u32) -> Result<(), LuaError> {
8839 let a = inst.a();
8840 let init = self.r(base, a);
8841 let limit = self.r(base, a + 1);
8842 let step = self.r(base, a + 2);
8843 let (Some(init_n), Some(limit_n), Some(step_n)) =
8844 (as_num(init), as_num(limit), as_num(step))
8845 else {
8846 // PUC luaG_forerror: "bad 'for' <what> (number expected, got <type>)".
8847 // PUC checks limit, then step, then initial value.
8848 let (what, bad) = if as_num(limit).is_none() {
8849 ("limit", limit)
8850 } else if as_num(step).is_none() {
8851 ("step", step)
8852 } else {
8853 ("initial value", init)
8854 };
8855 let tn = self.obj_typename(bad);
8856 return Err(self.rt_err(&format!("bad 'for' {what} (number expected, got {tn})")));
8857 };
8858 // PUC 5.1–5.3 `OP_FORPREP` stores `i = init - step` and *unconditionally*
8859 // jumps to the matching `OP_FORLOOP` — the body never runs ahead of the
8860 // first test, so each successful iteration emits a backward `OP_FORLOOP`
8861 // jump (db.lua's `for i=1,4 do a=1 end` ↦ 5 line-hook events instead of
8862 // 5.4's 4). 5.4+ collapsed that to a count-based fall-through. The skip
8863 // distance in luna's encoding is `loop_pc - prep_pc`; firing
8864 // `add_pc(bx - 1)` lands the running pc on OP_FORLOOP itself.
8865 let pre53 = self.version() <= LuaVersion::Lua53;
8866 match (init_n, step_n) {
8867 (Num::Int(i0), Num::Int(st)) => {
8868 if st == 0 {
8869 return Err(self.rt_err("'for' step is zero"));
8870 }
8871 if pre53 {
8872 // PUC 5.3 `forlimit`: int limit passes through; float limit
8873 // gets clamped to MIN/MAX with a `stopnow` flag set only
8874 // when the clamp is unreachable (positive float with a
8875 // negative step → limit=MAX, stopnow; negative float with
8876 // step>=0 → limit=MIN, stopnow). On `stopnow` PUC rewrites
8877 // `init = 0` so OP_FORLOOP's first test against the
8878 // unreachable clamp fails cleanly. An ordinary in-range
8879 // empty loop (e.g. `for i = 1, 0`) is *not* `stopnow` — it
8880 // lets OP_FORLOOP's natural test reject the first step.
8881 let (lim, stopnow) = match limit_n {
8882 Num::Int(l) => (l, false),
8883 Num::Float(f) => {
8884 if f.is_nan() {
8885 (0, true)
8886 } else if f >= i64::MAX as f64 + 1.0 {
8887 // beyond +MAX: unreachable for a decreasing loop
8888 (i64::MAX, st < 0)
8889 } else if f <= i64::MIN as f64 {
8890 // beyond -MIN: unreachable for an increasing loop
8891 (i64::MIN, st >= 0)
8892 } else if st > 0 {
8893 (f.floor() as i64, false)
8894 } else {
8895 (f.ceil() as i64, false)
8896 }
8897 }
8898 };
8899 let initv = if stopnow { 0 } else { i0 };
8900 let pre = initv.wrapping_sub(st);
8901 self.set_r(base, a, Value::Int(pre));
8902 self.set_r(base, a + 1, Value::Int(lim));
8903 self.set_r(base, a + 2, Value::Int(st));
8904 self.add_pc(inst.bx() as i32 - 1);
8905 return Ok(());
8906 }
8907 let (lim, empty) = int_for_limit(limit_n, i0, st);
8908 if empty {
8909 self.add_pc(inst.bx() as i32);
8910 return Ok(());
8911 }
8912 let count = if st > 0 {
8913 (lim as u64).wrapping_sub(i0 as u64) / (st as u64)
8914 } else {
8915 (i0 as u64).wrapping_sub(lim as u64) / (st as i128).unsigned_abs() as u64
8916 };
8917 self.set_r(base, a, Value::Int(i0));
8918 self.set_r(base, a + 1, Value::Int(count as i64));
8919 self.set_r(base, a + 2, Value::Int(st));
8920 self.set_r(base, a + 3, Value::Int(i0));
8921 }
8922 _ => {
8923 let (x0, lim, st) = (init_n.as_f64(), limit_n.as_f64(), step_n.as_f64());
8924 if st == 0.0 {
8925 return Err(self.rt_err("'for' step is zero"));
8926 }
8927 if pre53 {
8928 let pre = x0 - st;
8929 self.set_r(base, a, Value::Float(pre));
8930 self.set_r(base, a + 1, Value::Float(lim));
8931 self.set_r(base, a + 2, Value::Float(st));
8932 self.add_pc(inst.bx() as i32 - 1);
8933 return Ok(());
8934 }
8935 let runs = if st > 0.0 { x0 <= lim } else { x0 >= lim };
8936 if !runs {
8937 self.add_pc(inst.bx() as i32);
8938 return Ok(());
8939 }
8940 self.set_r(base, a, Value::Float(x0));
8941 self.set_r(base, a + 1, Value::Float(lim));
8942 self.set_r(base, a + 2, Value::Float(st));
8943 self.set_r(base, a + 3, Value::Float(x0));
8944 }
8945 }
8946 Ok(())
8947 }
8948
8949 #[inline(always)]
8950 fn for_loop(&mut self, inst: Inst, base: u32) {
8951 let a = inst.a();
8952 // PUC 5.1–5.3 `OP_FORLOOP` compares the post-step `i` to `limit`
8953 // directly (R[a+1] holds the limit, *not* a remaining-count) so the
8954 // first iteration's test fires through the same backward-jump path as
8955 // every later iteration. 5.4+ switched to the count-based form luna
8956 // already uses for `Int`; the float branch was already PUC-3.x-style.
8957 let pre53 = self.version() <= LuaVersion::Lua53;
8958 match self.r(base, a) {
8959 Value::Int(cur) if pre53 => {
8960 let Value::Int(lim) = self.r(base, a + 1) else {
8961 unreachable!()
8962 };
8963 let Value::Int(st) = self.r(base, a + 2) else {
8964 unreachable!()
8965 };
8966 let next = cur.wrapping_add(st);
8967 let cont = if st > 0 { next <= lim } else { next >= lim };
8968 if cont {
8969 self.set_r(base, a, Value::Int(next));
8970 self.set_r(base, a + 3, Value::Int(next));
8971 self.add_pc(-(inst.bx() as i32));
8972 }
8973 }
8974 Value::Int(cur) => {
8975 let Value::Int(count) = self.r(base, a + 1) else {
8976 unreachable!()
8977 };
8978 if count > 0 {
8979 let Value::Int(st) = self.r(base, a + 2) else {
8980 unreachable!()
8981 };
8982 let next = cur.wrapping_add(st);
8983 self.set_r(base, a, Value::Int(next));
8984 self.set_r(base, a + 1, Value::Int(count - 1));
8985 self.set_r(base, a + 3, Value::Int(next));
8986 self.add_pc(-(inst.bx() as i32));
8987 }
8988 }
8989 Value::Float(cur) => {
8990 let Value::Float(lim) = self.r(base, a + 1) else {
8991 unreachable!()
8992 };
8993 let Value::Float(st) = self.r(base, a + 2) else {
8994 unreachable!()
8995 };
8996 let next = cur + st;
8997 let cont = if st > 0.0 { next <= lim } else { next >= lim };
8998 if cont {
8999 self.set_r(base, a, Value::Float(next));
9000 self.set_r(base, a + 3, Value::Float(next));
9001 self.add_pc(-(inst.bx() as i32));
9002 }
9003 }
9004 _ => unreachable!("corrupt for-loop state"),
9005 }
9006 }
9007
9008 // ---- native helpers (used by builtins) ----
9009
9010 /// A native function's own captured upvalue (self lives at func_slot).
9011 ///
9012 /// Public so `native_typed` trampolines and embedders authoring
9013 /// stateful natives via `native_with(...)` can read their upvals.
9014 pub fn nat_upval(&self, func_slot: u32, i: usize) -> Value {
9015 let Value::Native(nc) = self.stack[func_slot as usize] else {
9016 unreachable!("native frame without native closure");
9017 };
9018 nc.upvals[i]
9019 }
9020
9021 /// Number of upvalues captured by the native at `func_slot` (variadic
9022 /// captures such as the `io.lines` format list).
9023 pub(crate) fn nat_upcount(&self, func_slot: u32) -> usize {
9024 let Value::Native(nc) = self.stack[func_slot as usize] else {
9025 unreachable!("native frame without native closure");
9026 };
9027 nc.upvals.len()
9028 }
9029
9030 /// Write a native function's own upvalue (stateful iterators).
9031 pub(crate) fn nat_set_upval(&mut self, func_slot: u32, i: usize, v: Value) {
9032 let Value::Native(nc) = self.stack[func_slot as usize] else {
9033 unreachable!("native frame without native closure");
9034 };
9035 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9036 unsafe { nc.as_mut() }.upvals[i] = v;
9037 // NativeClosure.upvals is traced as part of its Trace; a long-lived
9038 // stateful iterator closure (e.g. string.gmatch) sees many writes —
9039 // barrier_back once-and-done is cheaper than per-child forward.
9040 self.heap
9041 .barrier_back(nc.as_ptr() as *mut crate::runtime::heap::GcHeader);
9042 }
9043
9044 /// Read the i-th positional argument inside a `NativeFn` body
9045 /// (analogous to `lua_tovalue(L, i + 1)`). `i >= nargs` yields `Nil`,
9046 /// matching PUC's "missing arg is nil" contract. Public so embedders
9047 /// can author their own natives.
9048 pub fn nat_arg(&self, func_slot: u32, nargs: u32, i: u32) -> Value {
9049 if i < nargs {
9050 self.stack[(func_slot + 1 + i) as usize]
9051 } else {
9052 Value::Nil
9053 }
9054 }
9055
9056 /// Push the return values of a `NativeFn` and return their count
9057 /// (analogous to pushing N values then `return N` from a C function).
9058 /// Public so embedders can author their own natives.
9059 pub fn nat_return(&mut self, func_slot: u32, vals: &[Value]) -> u32 {
9060 let need = func_slot as usize + vals.len();
9061 if self.stack.len() < need {
9062 self.stack.resize(need, Value::Nil);
9063 }
9064 for (i, &v) in vals.iter().enumerate() {
9065 self.stack[func_slot as usize + i] = v;
9066 }
9067 vals.len() as u32
9068 }
9069
9070 /// Fast string concatenation of an adjacent pair, or `None` when a
9071 /// `__concat` metamethod is required.
9072 fn concat_pair(&mut self, l: Value, r: Value) -> Result<Option<Value>, LuaError> {
9073 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9074 // Length-check fast paths for both string operands BEFORE the
9075 // (expensive) copy in `concat_piece`, so a runaway `a..a..a..…`
9076 // chain (5.1 big.lua / 5.5 heavy.lua's `teststring`) raises the
9077 // overflow on the first pair that would exceed `INT_MAX` instead
9078 // of allocating multi-GB intermediates first.
9079 let max_str = i32::MAX as usize;
9080 if let (Value::Str(ls), Value::Str(rs)) = (l, r) {
9081 let a_len = ls.as_bytes().len();
9082 let b_len = rs.as_bytes().len();
9083 let new_len = a_len.checked_add(b_len);
9084 if new_len.is_none() || new_len.unwrap() > max_str {
9085 return Err(self.rt_err("string length overflow"));
9086 }
9087 }
9088 match (concat_piece(l, legacy), concat_piece(r, legacy)) {
9089 (Some(a), Some(b)) => {
9090 // PUC `MAX_SIZE` for Lua strings is `INT_MAX`; an attempt to
9091 // concat past it raises "string length overflow"
9092 // (5.5 heavy.lua `teststring` doubles `a..a..…` until it hits
9093 // exactly this wall).
9094 let new_len = a.len().checked_add(b.len());
9095 if new_len.is_none() || new_len.unwrap() > max_str {
9096 return Err(self.rt_err("string length overflow"));
9097 }
9098 let mut combined = a;
9099 combined.extend_from_slice(&b);
9100 Ok(Some(Value::Str(self.heap.intern(&combined))))
9101 }
9102 _ => Ok(None),
9103 }
9104 }
9105
9106 /// Fold the concat operands occupying `[base_a .. self.top)` right-to-left
9107 /// into a single result at `base_a` (PUC `luaV_concat`). Returns after
9108 /// either finishing (result at `base_a`) or arming a yieldable `__concat`
9109 /// call — its `Meta` continuation re-enters here on the metamethod's return.
9110 fn concat_run(&mut self, base_a: u32) -> Result<(), LuaError> {
9111 // Sum the lengths of all all-Str operands BEFORE starting the
9112 // right-associative fold so a 129-operand `a..a..…` chain
9113 // (5.1 big.lua's `rep129(longs)`) raises overflow immediately,
9114 // not after dozens of multi-GB intermediate intern+hash rounds.
9115 // A non-Str operand falls through to the per-pair check.
9116 let max_str = i32::MAX as usize;
9117 let mut total: usize = 0;
9118 let mut all_str = true;
9119 for slot in base_a..self.top {
9120 match self.stack[slot as usize] {
9121 Value::Str(s) => match total.checked_add(s.as_bytes().len()) {
9122 Some(t) if t <= max_str => total = t,
9123 _ => return Err(self.rt_err("string length overflow")),
9124 },
9125 _ => {
9126 all_str = false;
9127 break;
9128 }
9129 }
9130 }
9131 let _ = all_str; // discrimination already captured by early returns above
9132 while self.top.saturating_sub(base_a) >= 2 {
9133 let i = self.top - 1; // rightmost operand
9134 let x = self.stack[(i - 1) as usize];
9135 let y = self.stack[i as usize];
9136 match self.concat_pair(x, y)? {
9137 Some(s) => {
9138 self.stack[(i - 1) as usize] = s;
9139 self.top = i; // consumed y
9140 }
9141 None => {
9142 let mut mm = self.get_mm(x, Mm::Concat);
9143 if mm.is_nil() {
9144 mm = self.get_mm(y, Mm::Concat);
9145 }
9146 if mm.is_nil() {
9147 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9148 let bad = if concat_piece(x, legacy).is_none() {
9149 x
9150 } else {
9151 y
9152 };
9153 return Err(self.type_err("concatenate", bad));
9154 }
9155 // result lands at i-1, dropping y (top→i); resume continues.
9156 let dst = i - 1;
9157 self.begin_meta_call(
9158 mm,
9159 &[x, y],
9160 MetaAction::Concat { dst, base_a },
9161 "concat",
9162 )?;
9163 return Ok(());
9164 }
9165 }
9166 }
9167 self.maybe_collect_garbage(base_a + 1);
9168 Ok(())
9169 }
9170
9171 /// tostring with __tostring / __name support.
9172 pub(crate) fn tostring_value(&mut self, v: Value) -> Result<Vec<u8>, LuaError> {
9173 let mm = self.get_mm(v, Mm::ToString);
9174 if !mm.is_nil() {
9175 return match self.call_mm1(mm, &[v])? {
9176 Value::Str(s) => Ok(s.as_bytes().to_vec()),
9177 _ => Err(self.rt_err("'__tostring' must return a string")),
9178 };
9179 }
9180 if let Value::Table(t) = v
9181 && let Value::Str(name) = self.get_mm(v, Mm::Name)
9182 {
9183 let mut out = name.as_bytes().to_vec();
9184 out.extend_from_slice(format!(": {:p}", t.as_ptr()).as_bytes());
9185 return Ok(out);
9186 }
9187 Ok(self.tostring_basic(v))
9188 }
9189
9190 /// Basic tostring (no metamethods).
9191 pub(crate) fn tostring_basic(&mut self, v: Value) -> Vec<u8> {
9192 match v {
9193 Value::Nil => b"nil".to_vec(),
9194 Value::Bool(true) => b"true".to_vec(),
9195 Value::Bool(false) => b"false".to_vec(),
9196 Value::Int(i) => numeric::num_to_string(Num::Int(i)).into_bytes(),
9197 // PUC ≤5.2 has no integer subtype — `tostring(2.0)` is `"2"`, not
9198 // `"2.0"`. The 5.3+ split needs the suffix so `print(2.0)` is
9199 // distinguishable from `print(2)`. pm.lua :13 builds patterns by
9200 // concatenating these renderings.
9201 Value::Float(f) => {
9202 let legacy = self.version <= crate::version::LuaVersion::Lua52;
9203 numeric::num_to_string_for(Num::Float(f), legacy).into_bytes()
9204 }
9205 Value::Str(s) => s.as_bytes().to_vec(),
9206 Value::Table(t) => format!("table: {:p}", t.as_ptr()).into_bytes(),
9207 Value::Closure(c) => format!("function: {:p}", c.as_ptr()).into_bytes(),
9208 Value::Native(n) => format!("function: builtin: {:p}", n.as_ptr()).into_bytes(),
9209 Value::Coro(co) => format!("thread: {:p}", co.as_ptr()).into_bytes(),
9210 // PUC names file handles `file (0x…)`; a bare userdata is
9211 // `userdata: 0x…`. The io library overrides this via __tostring.
9212 Value::Userdata(u) => format!("userdata: {:p}", u.as_ptr()).into_bytes(),
9213 // PUC `lua_topointer`/tostring on light udata: "userdata: 0x…"
9214 // (the "light" qualifier only appears in `luaL_typeerror`).
9215 Value::LightUserdata(p) => format!("userdata: {p:p}").into_bytes(),
9216 }
9217 }
9218}
9219
9220#[derive(Clone, Copy, PartialEq, Eq)]
9221enum ArithOp {
9222 Add,
9223 Sub,
9224 Mul,
9225 Mod,
9226 Pow,
9227 Div,
9228 IDiv,
9229 BAnd,
9230 BOr,
9231 BXor,
9232 Shl,
9233 Shr,
9234}
9235
9236impl ArithOp {
9237 /// PUC metamethod event name (`__add` → "add" etc.) used by
9238 /// `debug.getinfo(level, "n")` inside a metamethod handler.
9239 fn mm_name(self) -> &'static str {
9240 match self {
9241 ArithOp::Add => "add",
9242 ArithOp::Sub => "sub",
9243 ArithOp::Mul => "mul",
9244 ArithOp::Mod => "mod",
9245 ArithOp::Pow => "pow",
9246 ArithOp::Div => "div",
9247 ArithOp::IDiv => "idiv",
9248 ArithOp::BAnd => "band",
9249 ArithOp::BOr => "bor",
9250 ArithOp::BXor => "bxor",
9251 ArithOp::Shl => "shl",
9252 ArithOp::Shr => "shr",
9253 }
9254 }
9255}
9256
9257fn as_num(v: Value) -> Option<Num> {
9258 match v {
9259 Value::Int(i) => Some(Num::Int(i)),
9260 Value::Float(f) => Some(Num::Float(f)),
9261 // PUC forprep coerces numeric strings (`for i = "10", "1", "-2"`).
9262 Value::Str(s) => crate::numeric::str2num(s.as_bytes(), true, true),
9263 _ => None,
9264 }
9265}
9266
9267/// A concatenable operand's byte form (string, or a number coerced to its
9268/// string), or `None` when only a `__concat` metamethod can handle it.
9269/// `legacy_float = true` follows PUC ≤5.2's `%.14g` rendering (no `.0`
9270/// suffix on integer-valued floats) — see `num_to_string_for`.
9271fn concat_piece(v: Value, legacy_float: bool) -> Option<Vec<u8>> {
9272 match v {
9273 Value::Str(s) => Some(s.as_bytes().to_vec()),
9274 Value::Int(x) => Some(numeric::num_to_string(Num::Int(x)).into_bytes()),
9275 Value::Float(x) => {
9276 Some(numeric::num_to_string_for(Num::Float(x), legacy_float).into_bytes())
9277 }
9278 _ => None,
9279 }
9280}
9281
9282/// Index into the per-basic-type metatable table for a non-table value
9283/// (None for tables, which carry their own metatable).
9284fn type_mt_slot(v: Value) -> Option<usize> {
9285 match v {
9286 Value::Nil => Some(0),
9287 Value::Bool(_) => Some(1),
9288 Value::Int(_) | Value::Float(_) => Some(2),
9289 Value::Str(_) => Some(3),
9290 Value::Closure(_) | Value::Native(_) => Some(4),
9291 // tables and full userdata carry their own metatable; threads and
9292 // light userdata have none (PUC keeps a shared per-type mt slot for
9293 // light, but luna doesn't expose it — no test gates on it yet).
9294 Value::Table(_) | Value::Coro(_) | Value::Userdata(_) | Value::LightUserdata(_) => None,
9295 }
9296}
9297
9298/// Number, or string coerced to number (5.5 default string-arith coercion).
9299fn coerce_num(v: Value) -> Option<Num> {
9300 match v {
9301 Value::Int(i) => Some(Num::Int(i)),
9302 Value::Float(f) => Some(Num::Float(f)),
9303 Value::Str(s) => numeric::str2num(s.as_bytes(), true, true),
9304 _ => None,
9305 }
9306}
9307
9308/// Lua shifts: logical on 64 bits; |shift| ≥ 64 yields 0; negative shifts
9309/// reverse direction.
9310fn shift_left(a: i64, b: i64) -> i64 {
9311 if b < 0 {
9312 if b <= -64 {
9313 0
9314 } else {
9315 ((a as u64) >> (-b as u32)) as i64
9316 }
9317 } else if b >= 64 {
9318 0
9319 } else {
9320 ((a as u64) << (b as u32)) as i64
9321 }
9322}
9323
9324/// i < f, exactly (PUC LTintfloat shape).
9325fn int_lt_float(i: i64, f: f64) -> bool {
9326 if f.is_nan() {
9327 return false;
9328 }
9329 if f >= 9_223_372_036_854_775_808.0 {
9330 return true;
9331 }
9332 if f < -9_223_372_036_854_775_808.0 {
9333 return false;
9334 }
9335 let ff = f.floor();
9336 let fi = ff as i64;
9337 if f == ff { i < fi } else { i <= fi }
9338}
9339
9340/// i <= f, exactly.
9341fn int_le_float(i: i64, f: f64) -> bool {
9342 if f.is_nan() {
9343 return false;
9344 }
9345 if f >= 9_223_372_036_854_775_808.0 {
9346 return true;
9347 }
9348 if f < -9_223_372_036_854_775_808.0 {
9349 return false;
9350 }
9351 i <= f.floor() as i64
9352}
9353
9354/// Clip a numeric `for` limit to the integer range (PUC forlimit). Returns
9355/// (clipped limit, loop-is-empty).
9356fn int_for_limit(limit: Num, init: i64, step: i64) -> (i64, bool) {
9357 match limit {
9358 Num::Int(l) => {
9359 let empty = if step > 0 { init > l } else { init < l };
9360 (l, empty)
9361 }
9362 Num::Float(f) => {
9363 if f.is_nan() {
9364 return (0, true);
9365 }
9366 if step > 0 {
9367 if f >= 9_223_372_036_854_775_808.0 {
9368 (i64::MAX, false)
9369 } else {
9370 let l = f.floor();
9371 if l < -9_223_372_036_854_775_808.0 {
9372 (i64::MIN, true)
9373 } else {
9374 let li = l as i64;
9375 (li, init > li)
9376 }
9377 }
9378 } else if f <= -9_223_372_036_854_775_808.0 {
9379 (i64::MIN, false)
9380 } else {
9381 let l = f.ceil();
9382 if l >= 9_223_372_036_854_775_808.0 {
9383 // PUC forlimit: a positive limit beyond the integer range
9384 // is unreachable for a decreasing loop — empty.
9385 (i64::MAX, true)
9386 } else {
9387 let li = l as i64;
9388 (li, init < li)
9389 }
9390 }
9391 }
9392 }
9393}
9394
9395/// Strip the load-prefix sigil from a chunk name for messages (PUC keeps
9396/// `@file` / `=name` markers in `source`).
9397fn chunk_display_name(p: *const crate::runtime::LuaStr) -> &'static [u8] {
9398 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9399 let b = unsafe { crate::runtime::string::bytes_of(p) };
9400 match b.first() {
9401 Some(b'@') | Some(b'=') => &b[1..],
9402 _ => b,
9403 }
9404}
9405
9406impl Vm {
9407 /// Frame introspection for debug.getinfo: `level` 1 = the Lua function
9408 /// that called the current native. Returns (closure, current line,
9409 /// extra vararg count).
9410 /// Name (and kind: local/global/field/upvalue/method/for iterator) of the
9411 /// function running at `level`, recovered from the caller's call
9412 /// instruction (PUC funcnamefromcode). None for the main chunk or a
9413 /// tail/anonymous call with no recoverable name.
9414 /// A debug-level position: either a real Lua frame (by index) or a synthetic
9415 /// C frame standing for a call_value boundary (metamethod / pcall / __close /
9416 /// coroutine body), which `debug.getinfo` and traceback report as "C".
9417 /// PUC lua_getlocal: the `n`-th (1-based) local variable active at the Lua
9418 /// frame at `level`'s current pc, as (name, value). Locals are visited in
9419 /// registration order (start pc, then register) to match luaF_getlocalname.
9420 pub(crate) fn local_at(&self, level: i64, n: i64) -> Option<(String, Value)> {
9421 if n == 0 {
9422 return None;
9423 }
9424 let fi = match self.dbg_frame(level)? {
9425 DbgKind::Lua(fi) => fi,
9426 // Tail-call placeholder has no real frame backing it — no locals
9427 // exist to read or write here. PUC `findlocal` returns NULL on
9428 // a CIST_TAIL activation.
9429 DbgKind::Tail(_) => return None,
9430 // PUC's `luaG_findlocal` on a C activation returns `(C temporary)`
9431 // for slot `n` inside the argument window (db.lua :408-:413, and
9432 // the call/return hook reads of math.sin / select args via
9433 // `getinfo("r")` + `getlocal`). Negative `n` (vararg) is not
9434 // meaningful for a C frame here.
9435 DbgKind::C(fi) => {
9436 if n < 1 {
9437 return None;
9438 }
9439 let (func_slot, nargs) = self.c_frame_native_slots(fi)?;
9440 if (n as u32) > nargs {
9441 return None;
9442 }
9443 let slot = (func_slot + n as u32) as usize;
9444 let val = self.stack.get(slot).copied().unwrap_or(Value::Nil);
9445 return Some((self.temporary_locvar_name().to_string(), val));
9446 }
9447 };
9448 let f = self.frames[fi].lua()?;
9449 // PUC `lua_getlocal` with a negative `n` indexes the varargs: `-1`
9450 // is the first extra arg passed to the function (`...[1]`), `-2` the
9451 // second, etc. The 5.5 stack layout parks varargs in
9452 // [func_slot + 1, base), so the i-th is at `func_slot + i`.
9453 if n < 0 {
9454 let i = (-n) as u32;
9455 if i == 0 || i > f.n_varargs {
9456 return None;
9457 }
9458 let val = self
9459 .stack
9460 .get((f.func_slot + i) as usize)
9461 .copied()
9462 .unwrap_or(Value::Nil);
9463 return Some((self.vararg_locvar_name().to_string(), val));
9464 }
9465 let proto = f.closure.proto;
9466 // PUC's parser injects a hidden `(vararg table)` locvar for an
9467 // anonymous-vararg function (lparser.c new_localvarliteral), sitting
9468 // right after the fixed parameters (`numparams + 1`). Main chunks
9469 // and `(...t)` named-vararg funcs do NOT get one — gate on the
9470 // compiler-set flag, not on `is_vararg`. luna keeps user locals in
9471 // their declared registers (no shadow slot allocated), so we expose
9472 // that hidden index purely in this debug view.
9473 let num_params = proto.num_params as i64;
9474 let vararg_slot = if proto.has_vararg_table_pseudo {
9475 Some(num_params + 1)
9476 } else {
9477 None
9478 };
9479 if vararg_slot == Some(n) {
9480 return Some(("(vararg table)".to_string(), Value::Nil));
9481 }
9482 let pc = (f.pc as usize).saturating_sub(1);
9483 let mut active: Vec<&crate::runtime::LocVar> = proto
9484 .locvars
9485 .iter()
9486 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9487 .collect();
9488 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9489 let mut idx: i64 = n - 1;
9490 if let Some(vs) = vararg_slot
9491 && n > vs
9492 {
9493 idx -= 1;
9494 }
9495 let idx = idx as usize;
9496 if let Some(lv) = active.get(idx) {
9497 let val = self
9498 .stack
9499 .get((f.base + lv.reg) as usize)
9500 .copied()
9501 .unwrap_or(Value::Nil);
9502 return Some((lv.name.to_string(), val));
9503 }
9504 // PUC `luaG_findlocal` fallback: `n` is past the named locals but
9505 // still inside the frame's live register window — report a
9506 // "(temporary)" (e.g. an arithmetic intermediate). The limit is
9507 // the next frame's func slot (`ci->next->func.p`) so the
9508 // temporary window stops where the callee's frame begins
9509 // (db.lua :416/:417 distinguish a live temporary `(a+1)` from
9510 // an out-of-range slot).
9511 let limit = self
9512 .frames
9513 .get(fi + 1)
9514 .and_then(|cf| cf.lua())
9515 .map(|nf| nf.func_slot)
9516 .unwrap_or_else(|| self.top.max(f.base));
9517 let temp_reg = idx as u32;
9518 if f.base + temp_reg < limit {
9519 let val = self
9520 .stack
9521 .get((f.base + temp_reg) as usize)
9522 .copied()
9523 .unwrap_or(Value::Nil);
9524 return Some((self.lua_temporary_locvar_name().to_string(), val));
9525 }
9526 None
9527 }
9528
9529 /// `debug.setlocal`'s underlying write (PUC `lua_setlocal`). Returns
9530 /// the local / vararg name on success, `None` when the slot does not
9531 /// resolve. Mirrors `local_at`'s indexing exactly.
9532 pub(crate) fn local_set(&mut self, level: i64, n: i64, v: Value) -> Option<String> {
9533 if n == 0 {
9534 return None;
9535 }
9536 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9537 return None;
9538 };
9539 let f = self.frames[fi].lua()?;
9540 if n < 0 {
9541 let i = (-n) as u32;
9542 if i == 0 || i > f.n_varargs {
9543 return None;
9544 }
9545 let slot = (f.func_slot + i) as usize;
9546 if let Some(s) = self.stack.get_mut(slot) {
9547 *s = v;
9548 }
9549 return Some(self.vararg_locvar_name().to_string());
9550 }
9551 let proto = f.closure.proto;
9552 let num_params = proto.num_params as i64;
9553 let vararg_slot = if proto.has_vararg_table_pseudo {
9554 Some(num_params + 1)
9555 } else {
9556 None
9557 };
9558 if vararg_slot == Some(n) {
9559 // hidden (vararg table) slot has no real storage — accept the
9560 // write as a no-op for PUC parity (db.lua doesn't write to it).
9561 return Some("(vararg table)".to_string());
9562 }
9563 let pc = (f.pc as usize).saturating_sub(1);
9564 let mut active: Vec<&crate::runtime::LocVar> = proto
9565 .locvars
9566 .iter()
9567 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9568 .collect();
9569 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9570 let mut idx: i64 = n - 1;
9571 if let Some(vs) = vararg_slot
9572 && n > vs
9573 {
9574 idx -= 1;
9575 }
9576 let idx = idx as usize;
9577 let (name, reg) = if let Some(lv) = active.get(idx) {
9578 (lv.name.to_string(), lv.reg)
9579 } else {
9580 // PUC `luaG_findlocal` fallback into the temporary window —
9581 // bounded by the next frame's func slot (see local_at).
9582 let limit = self
9583 .frames
9584 .get(fi + 1)
9585 .and_then(|cf| cf.lua())
9586 .map(|nf| nf.func_slot)
9587 .unwrap_or_else(|| self.top.max(f.base));
9588 let temp_reg = idx as u32;
9589 if f.base + temp_reg >= limit {
9590 return None;
9591 }
9592 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9593 };
9594 let slot = (f.base + reg) as usize;
9595 if let Some(s) = self.stack.get_mut(slot) {
9596 *s = v;
9597 }
9598 Some(name)
9599 }
9600
9601 /// `debug.getlocal(thread, level, n)`: read frame `level` of the suspended
9602 /// coroutine `co`. Walks `co.frames` (the saved Lua activation stack) and
9603 /// reads from `co.stack`. Returns `None` for out-of-range, for negative
9604 /// vararg indexing past `n_varargs`, or for a register past the live
9605 /// window. Naming follows the same priority as `local_at`: named locals,
9606 /// then `(vararg)` for negative `n`, then `(vararg table)` for the
9607 /// explicit-`(...)` pseudo, else `(temporary)` in the live register
9608 /// window.
9609 pub(crate) fn local_at_coro(
9610 &self,
9611 co: Gc<crate::runtime::Coro>,
9612 level: i64,
9613 n: i64,
9614 ) -> Option<(String, Value)> {
9615 if level < 1 || n == 0 {
9616 return None;
9617 }
9618 let frames = &co.frames;
9619 // Logical level: iterate Lua frames from the top.
9620 let lua_indices: Vec<usize> = (0..frames.len())
9621 .rev()
9622 .filter(|&i| frames[i].lua().is_some())
9623 .collect();
9624 let fi = *lua_indices.get((level - 1) as usize)?;
9625 let f = frames[fi].lua()?;
9626 if n < 0 {
9627 let i = (-n) as u32;
9628 if i == 0 || i > f.n_varargs {
9629 return None;
9630 }
9631 let val = co
9632 .stack
9633 .get((f.func_slot + i) as usize)
9634 .copied()
9635 .unwrap_or(Value::Nil);
9636 return Some((self.vararg_locvar_name().to_string(), val));
9637 }
9638 let proto = f.closure.proto;
9639 let num_params = proto.num_params as i64;
9640 let vararg_slot = if proto.has_vararg_table_pseudo {
9641 Some(num_params + 1)
9642 } else {
9643 None
9644 };
9645 if vararg_slot == Some(n) {
9646 return Some(("(vararg table)".to_string(), Value::Nil));
9647 }
9648 let pc = (f.pc as usize).saturating_sub(1);
9649 let mut active: Vec<&crate::runtime::LocVar> = proto
9650 .locvars
9651 .iter()
9652 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9653 .collect();
9654 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9655 let mut idx: i64 = n - 1;
9656 if let Some(vs) = vararg_slot
9657 && n > vs
9658 {
9659 idx -= 1;
9660 }
9661 let idx = idx as usize;
9662 if let Some(lv) = active.get(idx) {
9663 let val = co
9664 .stack
9665 .get((f.base + lv.reg) as usize)
9666 .copied()
9667 .unwrap_or(Value::Nil);
9668 return Some((lv.name.to_string(), val));
9669 }
9670 let limit = frames
9671 .get(fi + 1)
9672 .and_then(|cf| cf.lua())
9673 .map(|nf| nf.func_slot)
9674 .unwrap_or(co.top.max(f.base));
9675 let temp_reg = idx as u32;
9676 if f.base + temp_reg < limit {
9677 let val = co
9678 .stack
9679 .get((f.base + temp_reg) as usize)
9680 .copied()
9681 .unwrap_or(Value::Nil);
9682 return Some((self.lua_temporary_locvar_name().to_string(), val));
9683 }
9684 None
9685 }
9686
9687 /// `debug.setlocal(thread, level, n, value)`: write into frame `level` of
9688 /// suspended `co`. Mirrors `local_at_coro`'s indexing exactly.
9689 pub(crate) fn local_set_coro(
9690 &mut self,
9691 co: Gc<crate::runtime::Coro>,
9692 level: i64,
9693 n: i64,
9694 v: Value,
9695 ) -> Option<String> {
9696 if level < 1 || n == 0 {
9697 return None;
9698 }
9699 let lua_indices: Vec<usize> = (0..co.frames.len())
9700 .rev()
9701 .filter(|&i| co.frames[i].lua().is_some())
9702 .collect();
9703 let fi = *lua_indices.get((level - 1) as usize)?;
9704 let (func_slot, n_varargs, base, proto, top_for_temp, next_func_slot) = {
9705 let f = co.frames[fi].lua()?;
9706 (
9707 f.func_slot,
9708 f.n_varargs,
9709 f.base,
9710 f.closure.proto,
9711 co.top.max(f.base),
9712 co.frames
9713 .get(fi + 1)
9714 .and_then(|cf| cf.lua())
9715 .map(|nf| nf.func_slot),
9716 )
9717 };
9718 if n < 0 {
9719 let i = (-n) as u32;
9720 if i == 0 || i > n_varargs {
9721 return None;
9722 }
9723 let slot = (func_slot + i) as usize;
9724 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9725 let stack = unsafe { &mut co.as_mut().stack };
9726 if let Some(s) = stack.get_mut(slot) {
9727 *s = v;
9728 }
9729 // co.stack values are traced — once-per-call barrier so propagate
9730 // sees the new value if co was already BLACK this cycle.
9731 self.heap
9732 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9733 return Some(self.vararg_locvar_name().to_string());
9734 }
9735 let num_params = proto.num_params as i64;
9736 let vararg_slot = if proto.has_vararg_table_pseudo {
9737 Some(num_params + 1)
9738 } else {
9739 None
9740 };
9741 if vararg_slot == Some(n) {
9742 return Some("(vararg table)".to_string());
9743 }
9744 let pc = (co.frames[fi].lua().unwrap().pc as usize).saturating_sub(1);
9745 let mut active: Vec<&crate::runtime::LocVar> = proto
9746 .locvars
9747 .iter()
9748 .filter(|lv| (lv.start_pc as usize) <= pc && pc < lv.end_pc as usize)
9749 .collect();
9750 active.sort_by_key(|lv| (lv.start_pc, lv.reg));
9751 let mut idx: i64 = n - 1;
9752 if let Some(vs) = vararg_slot
9753 && n > vs
9754 {
9755 idx -= 1;
9756 }
9757 let idx = idx as usize;
9758 let (name, reg) = if let Some(lv) = active.get(idx) {
9759 (lv.name.to_string(), lv.reg)
9760 } else {
9761 let limit = next_func_slot.unwrap_or(top_for_temp);
9762 let temp_reg = idx as u32;
9763 if base + temp_reg >= limit {
9764 return None;
9765 }
9766 (self.lua_temporary_locvar_name().to_string(), temp_reg)
9767 };
9768 let slot = (base + reg) as usize;
9769 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
9770 let stack = unsafe { &mut co.as_mut().stack };
9771 if let Some(s) = stack.get_mut(slot) {
9772 *s = v;
9773 }
9774 // co.stack values are traced — once-per-call barrier so propagate
9775 // sees the new value if co was already BLACK this cycle.
9776 self.heap
9777 .barrier_back(co.as_ptr() as *mut crate::runtime::heap::GcHeader);
9778 Some(name)
9779 }
9780
9781 /// Frame info for a level on a suspended coroutine (PUC
9782 /// `lua_getinfo(L1, "Sl...", &ar)` after `lua_getstack(L1, level, &ar)`).
9783 /// Returns the closure + currentline + extraargs + istailcall for the
9784 /// level-th Lua activation in `co.frames`. None if level overshoots.
9785 pub(crate) fn coro_frame_info(
9786 &self,
9787 co: Gc<crate::runtime::Coro>,
9788 level: i64,
9789 ) -> Option<(Gc<LuaClosure>, u32, i64, bool)> {
9790 if level < 1 {
9791 return None;
9792 }
9793 let lua_indices: Vec<usize> = (0..co.frames.len())
9794 .rev()
9795 .filter(|&i| co.frames[i].lua().is_some())
9796 .collect();
9797 let fi = *lua_indices.get((level - 1) as usize)?;
9798 let f = co.frames[fi].lua()?;
9799 let proto = f.closure.proto;
9800 let pc = (f.pc as usize)
9801 .saturating_sub(1)
9802 .min(proto.lines.len().saturating_sub(1));
9803 let line = proto.lines.get(pc).copied().unwrap_or(0);
9804 Some((f.closure, line, f.n_varargs as i64, f.tailcalls > 0))
9805 }
9806
9807 /// Whether `level` resolves to any live activation (PUC lua_getstack).
9808 pub(crate) fn level_in_range(&self, level: i64) -> bool {
9809 self.dbg_frame(level).is_some()
9810 }
9811
9812 /// PUC's debug-API placeholder for an unnamed vararg slot returned by
9813 /// `debug.getlocal(_, -n)`. 5.2/5.3 spelled it `"(*vararg)"`; 5.4
9814 /// dropped the asterisk in favour of `"(vararg)"`. db.lua 5.2 :189 /
9815 /// 5.3 :195 / 5.4 :286 baseline on their respective form.
9816 pub(crate) fn vararg_locvar_name(&self) -> &'static str {
9817 if matches!(self.version, LuaVersion::Lua52 | LuaVersion::Lua53) {
9818 "(*vararg)"
9819 } else {
9820 "(vararg)"
9821 }
9822 }
9823
9824 /// PUC's debug-API placeholder for an unnamed temporary on a C
9825 /// activation. 5.2/5.3 reported `"(*temporary)"`; 5.4 switched to
9826 /// `"(C temporary)"`. db.lua 5.2 :288, 5.3 :312, 5.4 :404 each pin
9827 /// their spelling.
9828 pub(crate) fn temporary_locvar_name(&self) -> &'static str {
9829 if matches!(
9830 self.version,
9831 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9832 ) {
9833 // PUC 5.1's `findlocal` C-frame branch reported `(*temporary)`
9834 // (db.lua :228 pins it). 5.2/5.3 kept the spelling, 5.4 changed
9835 // to `(C temporary)`.
9836 "(*temporary)"
9837 } else {
9838 "(C temporary)"
9839 }
9840 }
9841
9842 /// PUC's debug-API placeholder for an unnamed Lua-frame temporary
9843 /// (an arithmetic intermediate sitting past the last named local on a
9844 /// live register slot). 5.2/5.3 reported `"(*temporary)"`; 5.4 dropped
9845 /// the asterisk to `"(temporary)"`. db.lua 5.3 :786, 5.4 :966 pin the
9846 /// spelling.
9847 pub(crate) fn lua_temporary_locvar_name(&self) -> &'static str {
9848 if matches!(
9849 self.version,
9850 LuaVersion::Lua51 | LuaVersion::Lua52 | LuaVersion::Lua53
9851 ) {
9852 "(*temporary)"
9853 } else {
9854 "(temporary)"
9855 }
9856 }
9857
9858 /// The Lua closure running at `level` on the current thread, or `None`
9859 /// when the frame is a synthetic C boundary. PUC 5.1 `getfenv`/`setfenv`
9860 /// need this to reach the function whose env they read or rewrite.
9861 pub(crate) fn lua_closure_at_level(&self, level: i64) -> Option<Gc<LuaClosure>> {
9862 // `DbgKind::Tail` also falls into the else branch — a tail-call
9863 // placeholder has no closure of its own, so PUC's `lua_getstack` +
9864 // `getfunc` for that level returns no function, and `getfenv(level)`
9865 // / `setfenv(level)` raise an error (5.1 db.lua :336/:341).
9866 let DbgKind::Lua(fi) = self.dbg_frame(level)? else {
9867 return None;
9868 };
9869 Some(self.frames[fi].lua()?.closure)
9870 }
9871
9872 pub(crate) fn coro_level_in_range(&self, co: Gc<crate::runtime::Coro>, level: i64) -> bool {
9873 if level < 1 {
9874 return false;
9875 }
9876 let count = co.frames.iter().filter(|cf| cf.lua().is_some()).count();
9877 (level as usize) <= count
9878 }
9879
9880 pub(crate) fn dbg_frame(&self, level: i64) -> Option<DbgKind> {
9881 if level < 1 {
9882 return None;
9883 }
9884 // PUC 5.1's `lua_getstack` walks the full `ci` chain — each C
9885 // activation counts as a level, and each Lua activation's
9886 // `tailcalls` adds an extra synthetic level (CIST_TAIL). 5.2+
9887 // dropped the synthetic shape: `istailcall` becomes a flag on the
9888 // real frame and Cont activations no longer count separately.
9889 // 5.1 db.lua :336-:343 pin the 5.1 shape; 5.2/5.3/5.5 db.lua's
9890 // `getinfo(2).func == g1` pins the 5.2+ shape.
9891 let v51 = self.version <= LuaVersion::Lua51;
9892 let mut lvl = level;
9893 for fi in (0..self.frames.len()).rev() {
9894 match &self.frames[fi] {
9895 CallFrame::Lua(f) => {
9896 lvl -= 1;
9897 if lvl == 0 {
9898 return Some(DbgKind::Lua(fi));
9899 }
9900 if v51 {
9901 // 5.1 reports one synthetic CIST_TAIL level per
9902 // collapsed tail call (PUC `lua_getstack` subtracts
9903 // `ci->u.l.tailcalls` from the remaining level).
9904 for _ in 0..f.tailcalls {
9905 lvl -= 1;
9906 if lvl == 0 {
9907 return Some(DbgKind::Tail(fi));
9908 }
9909 }
9910 }
9911 if f.from_c {
9912 lvl -= 1;
9913 if lvl == 0 {
9914 return Some(DbgKind::C(fi));
9915 }
9916 }
9917 }
9918 CallFrame::Cont(_) => {
9919 if !v51 {
9920 continue;
9921 }
9922 lvl -= 1;
9923 if lvl == 0 {
9924 let parent = (0..fi)
9925 .rev()
9926 .find(|&j| matches!(self.frames[j], CallFrame::Lua(_)));
9927 return Some(DbgKind::C(parent.unwrap_or(fi.saturating_sub(1))));
9928 }
9929 }
9930 }
9931 }
9932 None
9933 }
9934
9935 pub(crate) fn frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9936 let f = self.frames[fi].lua()?;
9937 // metamethod handler frames carry the event tag (e.g. "close" for
9938 // `__close`); PUC `funcnamefromcall` reads `ci->u.l.tm`.
9939 if f.is_hook {
9940 return Some(("hook", "?".to_string()));
9941 }
9942 if let Some(tm) = f.tm {
9943 return Some(("metamethod", tm_debug_name(self.version, tm)));
9944 }
9945 // a frame entered across a C boundary has no naming call instruction
9946 if fi == 0 || f.from_c {
9947 return None;
9948 }
9949 // the caller's call instruction names this frame; a continuation frame
9950 // just below (pcall/xpcall) is itself a C boundary, so f.from_c above
9951 // already short-circuits those.
9952 let caller = self.frames[fi - 1].lua()?;
9953 let caller_proto = caller.closure.proto;
9954 let p: &crate::runtime::Proto = &caller_proto;
9955 let call_pc = (caller.pc as usize).checked_sub(1)?;
9956 let instr = *p.code.get(call_pc)?;
9957 match instr.op() {
9958 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9959 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
9960 _ => None,
9961 }
9962 }
9963
9964 /// Name the synthetic C level sitting below the `from_c` Lua frame at `fi`
9965 /// (PUC names a C function from the call instruction that invoked it). The
9966 /// native was called by the nearest Lua frame below `fi` (skipping pcall/
9967 /// xpcall continuations); that frame's call instruction names it.
9968 pub(crate) fn c_frame_name(&self, fi: usize) -> Option<(&'static str, String)> {
9969 // PUC `GCTM` sets `CIST_FIN` on the calling ci, so when getinfo names
9970 // the synthetic C edge between the __gc finalizer (top Lua frame, has
9971 // `tm = "gc"`) and its triggering Lua frame it reports "metamethod"
9972 // "__gc" — 5.3 db.lua :720's `getinfo(2).namewhat == "metamethod"`
9973 // pin. Restricted to the `__gc` event: `__close` (`tm = "close"`)
9974 // sets the tag on the handler frame only, so level 2 there still
9975 // names the calling Lua frame's call instruction (5.5 locals.lua
9976 // :514 pins `getinfo(2).name == "pcall"` from a __close handler).
9977 if let Some(fr) = self.frames.get(fi).and_then(|cf| cf.lua())
9978 && fr.tm == Some("gc")
9979 {
9980 let name = tm_debug_name(self.version, "gc");
9981 return Some(("metamethod", name));
9982 }
9983 let caller_fi = (0..fi).rev().find(|&i| self.frames[i].lua().is_some())?;
9984 let caller = self.frames[caller_fi].lua()?;
9985 let p = &caller.closure.proto;
9986 let call_pc = (caller.pc as usize).checked_sub(1)?;
9987 let instr = *p.code.get(call_pc)?;
9988 match instr.op() {
9989 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
9990 _ => None,
9991 }
9992 }
9993
9994 /// Native value currently sitting on the synthetic C edge identified by
9995 /// `DbgKind::C(fi)`. The walk counts how many `from_c` Lua frames live
9996 /// above `fi` (each one corresponds to one native pushing the hook) and
9997 /// indexes into `running_natives` from the top, also skipping the caller
9998 /// of `getinfo` itself (the native that is currently asking).
9999 /// db.lua :344 reads `debug.getinfo(2, "f").func` from a call hook and
10000 /// expects the just-entered C function.
10001 pub(crate) fn c_frame_func(&self, fi: usize) -> Option<Value> {
10002 let idx = self.c_frame_native_idx(fi)?;
10003 Some(Value::Native(self.running_natives[idx]))
10004 }
10005
10006 /// `(func_slot, nargs)` for the synthetic C edge identified by `C(fi)`,
10007 /// so `local_at` can index the native's argument window like PUC's
10008 /// `(C temporary)` path. Returns `None` when no matching native exists
10009 /// (e.g. the C edge corresponds to a non-native boundary).
10010 pub(crate) fn c_frame_native_slots(&self, fi: usize) -> Option<(u32, u32)> {
10011 let idx = self.c_frame_native_idx(fi)?;
10012 self.running_native_slots.get(idx).copied()
10013 }
10014
10015 fn c_frame_native_idx(&self, fi: usize) -> Option<usize> {
10016 let n_above = self.frames[fi..]
10017 .iter()
10018 .filter_map(CallFrame::lua)
10019 .filter(|f| f.from_c)
10020 .count();
10021 if n_above == 0 {
10022 return None;
10023 }
10024 // running_natives.last() is the native currently executing (the one
10025 // that called getinfo). Pop it conceptually, then take the n_above-th
10026 // entry from the top of what remains.
10027 let nr = self.running_natives.len().checked_sub(1)?;
10028 nr.checked_sub(n_above)
10029 }
10030
10031 /// PUC `pushglobalfuncname`: walk `package.loaded` to depth 2 looking for a
10032 /// native whose function pointer matches `target`, and return its qualified
10033 /// name (e.g. `"table.sort"`). A `_G.X` match is stripped to `"X"`. Returns
10034 /// `None` if no match is found. Used by `arg_error` when the running native
10035 /// was invoked from another native (PUC `ar.name == NULL` at level 0).
10036 pub(crate) fn pushglobalfuncname(
10037 &mut self,
10038 target: crate::runtime::value::NativeFn,
10039 ) -> Option<String> {
10040 let pkg_k = Value::Str(self.heap.intern(b"package"));
10041 let pkg = match self.globals().get(pkg_k) {
10042 Value::Table(t) => t,
10043 _ => return None,
10044 };
10045 let loaded_k = Value::Str(self.heap.intern(b"loaded"));
10046 let loaded = match pkg.get(loaded_k) {
10047 Value::Table(t) => t,
10048 _ => return None,
10049 };
10050 let matches = |v: Value| -> bool {
10051 matches!(v, Value::Native(nc) if std::ptr::fn_addr_eq(nc.f, target))
10052 };
10053 let mut k = Value::Nil;
10054 while let Ok(Some((nk, nv))) = loaded.next(k) {
10055 k = nk;
10056 let Value::Str(outer) = nk else { continue };
10057 let outer = String::from_utf8_lossy(outer.as_bytes()).into_owned();
10058 if matches(nv) {
10059 return Some(if outer == "_G" { String::new() } else { outer });
10060 }
10061 if let Value::Table(inner_t) = nv {
10062 let mut k2 = Value::Nil;
10063 while let Ok(Some((nk2, nv2))) = inner_t.next(k2) {
10064 k2 = nk2;
10065 if matches(nv2)
10066 && let Value::Str(inner) = nk2
10067 {
10068 let inner = String::from_utf8_lossy(inner.as_bytes()).into_owned();
10069 return Some(if outer == "_G" {
10070 inner
10071 } else {
10072 format!("{outer}.{inner}")
10073 });
10074 }
10075 }
10076 }
10077 }
10078 None
10079 }
10080
10081 /// Name and namewhat of the native currently running on behalf of the top
10082 /// Lua frame's call instruction (PUC `lua_getinfo("n")` at level 0). Lets
10083 /// `luaL_argerror` rewrite a method call's self-argument error.
10084 pub(crate) fn running_call_name(&self) -> Option<(&'static str, String)> {
10085 let caller = self.frames.iter().rev().find_map(CallFrame::lua)?;
10086 let p = &caller.closure.proto;
10087 let call_pc = (caller.pc as usize).checked_sub(1)?;
10088 let instr = *p.code.get(call_pc)?;
10089 match instr.op() {
10090 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10091 _ => None,
10092 }
10093 }
10094
10095 pub(crate) fn frame_info(&mut self, fi: usize) -> (Gc<LuaClosure>, u32, i64, bool) {
10096 let f = self.frames[fi].lua().expect("Lua frame");
10097 let proto = f.closure.proto;
10098 let pc = (f.pc as usize)
10099 .saturating_sub(1)
10100 .min(proto.lines.len().saturating_sub(1));
10101 let line = proto.lines.get(pc).copied().unwrap_or(0);
10102 // PUC CallInfo.nextraargs: the original extra-arg count, fixed at call
10103 // (independent of any later write to a materialized vararg table's `n`).
10104 // `istailcall` mirrors PUC `CIST_TAIL` for `debug.getinfo(_, "t")` —
10105 // any nonzero `tailcalls` count flips it true.
10106 (f.closure, line, f.n_varargs as i64, f.tailcalls > 0)
10107 }
10108
10109 /// Read an upvalue cell of a closure (debug.getupvalue).
10110 pub(crate) fn upvalue_value(&self, cl: Gc<LuaClosure>, idx: usize) -> Value {
10111 match cl.upvals()[idx].state() {
10112 UpvalState::Open { slot, thread } => self.read_slot(slot, thread),
10113 UpvalState::Closed(v) => v,
10114 }
10115 }
10116
10117 /// Write an upvalue cell of a closure (debug.setupvalue).
10118 pub(crate) fn upvalue_set_value(&mut self, cl: Gc<LuaClosure>, idx: usize, v: Value) {
10119 let uv = cl.upvals()[idx];
10120 match uv.state() {
10121 UpvalState::Open { slot, thread } => self.write_slot(slot, thread, v),
10122 UpvalState::Closed(_) => {
10123 // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
10124 unsafe { uv.as_mut() }.set_closed(v);
10125 self.heap
10126 .barrier_forward(uv.as_ptr() as *mut crate::runtime::heap::GcHeader, v);
10127 }
10128 }
10129 }
10130
10131 /// Lines for debug.traceback (PUC `luaL_traceback` / `pushfuncname`).
10132 /// Per Lua frame, emits `"\n\t<src>:<line>: in <funcname>"` where
10133 /// `<funcname>` is, in priority order: `"metamethod 'event'"` if the frame
10134 /// is a metamethod handler (e.g. `__close`); else `"<namewhat> '<name>'"`
10135 /// from the caller's call instruction (`getobjname`); else `"main chunk"`;
10136 /// else `"function <src:line_defined>"` for an anonymous Lua function.
10137 /// Traceback of a suspended coroutine (PUC `debug.traceback(L1, msg, lvl)`).
10138 /// Walks the coroutine's saved frames and prepends a synthetic C-level
10139 /// `'yield'` entry when the coroutine paused at a `coroutine.yield` call
10140 /// (its `resume_at` marker is set). `level` skips entries from the top
10141 /// (level 0 includes the yield frame; level 1 starts at the deepest Lua
10142 /// frame; etc.). db.lua :764-:768 sample several levels.
10143 pub(crate) fn coro_traceback(&self, co: Gc<crate::runtime::Coro>, mut level: i64) -> Vec<u8> {
10144 use crate::runtime::CoroStatus;
10145 const LEVELS1: usize = 10;
10146 const LEVELS2: usize = 11;
10147 #[derive(Clone, Copy)]
10148 enum VFrame<'a> {
10149 Lua(&'a crate::runtime::function::Frame),
10150 CPcall,
10151 CXpcall,
10152 CYield,
10153 /// Synthetic CIST_TAIL placeholder under 5.1 — one per tail
10154 /// call collapsed into the next Lua frame down the chain.
10155 Tail,
10156 }
10157 let v51 = self.version <= LuaVersion::Lua51;
10158 let mut visible: Vec<VFrame<'_>> = Vec::new();
10159 // PUC's level 0 entry on a suspended coroutine is the C call where it
10160 // paused — `coroutine.yield` for a yielded thread.
10161 if matches!(co.status, CoroStatus::Suspended) && co.resume_at.is_some() {
10162 visible.push(VFrame::CYield);
10163 }
10164 for cf in co.frames.iter().rev() {
10165 match cf {
10166 CallFrame::Lua(f) => {
10167 visible.push(VFrame::Lua(f));
10168 if v51 {
10169 for _ in 0..f.tailcalls {
10170 visible.push(VFrame::Tail);
10171 }
10172 }
10173 }
10174 CallFrame::Cont(nc) => match nc.kind {
10175 ContKind::Pcall => visible.push(VFrame::CPcall),
10176 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10177 _ => {}
10178 },
10179 }
10180 }
10181 if level < 0 {
10182 level = 0;
10183 }
10184 if (level as usize) >= visible.len() {
10185 return Vec::new();
10186 }
10187 let visible = &visible[level as usize..];
10188 let total = visible.len();
10189 let mut out = Vec::new();
10190 // To name a Lua frame, PUC consults the caller's OP_CALL via
10191 // getobjname: find the index `fi` of the current frame in co.frames,
10192 // then look at frames[fi-1] (the caller) and read its `code[pc-1]`.
10193 let coro_frame_name = |frames: &[CallFrame],
10194 target: &crate::runtime::function::Frame|
10195 -> Option<(&'static str, String)> {
10196 let fi = frames
10197 .iter()
10198 .position(|cf| matches!(cf, CallFrame::Lua(f) if std::ptr::eq(f, target)))?;
10199 if fi == 0 || target.from_c {
10200 return None;
10201 }
10202 let caller = frames[fi - 1].lua()?;
10203 let p = &caller.closure.proto;
10204 let call_pc = (caller.pc as usize).checked_sub(1)?;
10205 let instr = *p.code.get(call_pc)?;
10206 match instr.op() {
10207 Op::Call | Op::TailCall => crate::vm::objname::getobjname(p, call_pc, instr.a()),
10208 Op::TForCall => Some(("for iterator", "for iterator".to_string())),
10209 _ => None,
10210 }
10211 };
10212 let frames = &co.frames;
10213 let emit = |out: &mut Vec<u8>, v: VFrame<'_>| match v {
10214 VFrame::Lua(f) => {
10215 let proto = f.closure.proto;
10216 let src = chunk_display_name(proto.source.as_ptr());
10217 let pc = (f.pc as usize)
10218 .saturating_sub(1)
10219 .min(proto.lines.len().saturating_sub(1));
10220 let line = proto.lines.get(pc).copied().unwrap_or(0);
10221 out.extend_from_slice(b"\n\t");
10222 out.extend_from_slice(src);
10223 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10224 if let Some((namewhat, name)) = coro_frame_name(frames, f) {
10225 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10226 } else if proto.line_defined == 0 {
10227 out.extend_from_slice(b"main chunk");
10228 } else {
10229 out.extend_from_slice(
10230 format!(
10231 "function <{}:{}>",
10232 String::from_utf8_lossy(src),
10233 proto.line_defined
10234 )
10235 .as_bytes(),
10236 );
10237 }
10238 }
10239 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10240 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10241 VFrame::CYield => {
10242 // PUC `pushglobalfuncname` reports `yield` as
10243 // `'coroutine.yield'` under 5.3 and 5.4 (5.3 :566 / 5.4 :830
10244 // `checktraceback` baselines). 5.1/5.2/5.5 emit the bare
10245 // `'yield'` (5.5 :841).
10246 let qualified = matches!(self.version, LuaVersion::Lua53 | LuaVersion::Lua54);
10247 if qualified {
10248 out.extend_from_slice(b"\n\t[C]: in function 'coroutine.yield'");
10249 } else {
10250 out.extend_from_slice(b"\n\t[C]: in function 'yield'");
10251 }
10252 }
10253 VFrame::Tail => {
10254 // 5.1 traceback synthetic CIST_TAIL entry — luaG_addinfo
10255 // / luaO_chunkid format: `(...tail calls...)`. 5.1 db.lua
10256 // :403 asserts these appear once per collapsed tail call.
10257 out.extend_from_slice(b"\n\t(...tail calls...)");
10258 }
10259 };
10260 if total <= LEVELS1 + LEVELS2 {
10261 for &v in visible {
10262 emit(&mut out, v);
10263 }
10264 } else {
10265 for &v in &visible[..LEVELS1] {
10266 emit(&mut out, v);
10267 }
10268 let skip = total - LEVELS1 - LEVELS2;
10269 out.extend_from_slice(format!("\n\t...\t(skipping {skip} levels)").as_bytes());
10270 for &v in &visible[total - LEVELS2..] {
10271 emit(&mut out, v);
10272 }
10273 }
10274 out
10275 }
10276
10277 pub(crate) fn traceback_bytes(&self, level: i64) -> Vec<u8> {
10278 // PUC `luaL_traceback` shows up to LEVELS1 (10) top frames + LEVELS2
10279 // (11) bottom frames; if there are more, the middle is collapsed into
10280 // a `"...\t(skipping N levels)"` marker. Without this, a stack-
10281 // overflow traceback would balloon to tens of megabytes (errors.lua's
10282 // stack-overflow test ran string.gmatch over the resulting buffer).
10283 const LEVELS1: usize = 10;
10284 const LEVELS2: usize = 11;
10285 // Collect visible frames in top-down order (deepest first). Both Lua
10286 // activations and pcall/xpcall continuations (which stand in for a
10287 // C-level pcall on the stack) are visible; PUC's traceback enumerates
10288 // both via lua_getstack. db.lua :715 expects "pcall" to appear.
10289 #[derive(Clone, Copy)]
10290 enum VFrame {
10291 Lua(usize),
10292 CPcall,
10293 CXpcall,
10294 }
10295 let mut visible: Vec<VFrame> = Vec::new();
10296 for (fi, cf) in self.frames.iter().enumerate().rev() {
10297 match cf {
10298 CallFrame::Lua(_) => visible.push(VFrame::Lua(fi)),
10299 CallFrame::Cont(nc) => match nc.kind {
10300 ContKind::Pcall => visible.push(VFrame::CPcall),
10301 ContKind::Xpcall { .. } => visible.push(VFrame::CXpcall),
10302 _ => {}
10303 },
10304 }
10305 }
10306 // PUC `luaL_traceback` starts enumerating at the given `level` (in
10307 // terms of L1's CallInfo chain). For the running-thread case the C
10308 // frame for debug.traceback itself is level 0 and luna's `visible`
10309 // doesn't include it — so level=1 (PUC default) means "emit from the
10310 // innermost Lua frame" (visible[0..]); level=k skips k-1 frames from
10311 // the top. level<=0 emits nothing extra here (d_traceback handles the
10312 // "[C]: in function 'traceback'" prefix for level==0 separately).
10313 let skip = (level - 1).max(0) as usize;
10314 if skip >= visible.len() {
10315 return Vec::new();
10316 }
10317 let visible = &visible[skip..];
10318 let total = visible.len();
10319 let mut out = Vec::new();
10320 let emit_frame = |out: &mut Vec<u8>, v: VFrame, this: &Vm| match v {
10321 VFrame::Lua(fi) => {
10322 let f = this.frames[fi].lua().expect("Lua frame");
10323 let proto = f.closure.proto;
10324 let src = chunk_display_name(proto.source.as_ptr());
10325 let pc = (f.pc as usize)
10326 .saturating_sub(1)
10327 .min(proto.lines.len().saturating_sub(1));
10328 let line = proto.lines.get(pc).copied().unwrap_or(0);
10329 out.extend_from_slice(b"\n\t");
10330 out.extend_from_slice(src);
10331 out.extend_from_slice(format!(":{line}: in ").as_bytes());
10332 if let Some((namewhat, name)) = this.frame_name(fi) {
10333 out.extend_from_slice(format!("{namewhat} '{name}'").as_bytes());
10334 } else if proto.line_defined == 0 {
10335 out.extend_from_slice(b"main chunk");
10336 } else {
10337 out.extend_from_slice(
10338 format!(
10339 "function <{}:{}>",
10340 String::from_utf8_lossy(src),
10341 proto.line_defined
10342 )
10343 .as_bytes(),
10344 );
10345 }
10346 }
10347 VFrame::CPcall => out.extend_from_slice(b"\n\t[C]: in function 'pcall'"),
10348 VFrame::CXpcall => out.extend_from_slice(b"\n\t[C]: in function 'xpcall'"),
10349 };
10350 if total <= LEVELS1 + LEVELS2 {
10351 for &v in visible {
10352 emit_frame(&mut out, v, self);
10353 }
10354 } else {
10355 for &v in &visible[..LEVELS1] {
10356 emit_frame(&mut out, v, self);
10357 }
10358 let dropped = total - LEVELS1 - LEVELS2;
10359 out.extend_from_slice(format!("\n\t...\t(skipping {dropped} levels)").as_bytes());
10360 for &v in &visible[total - LEVELS2..] {
10361 emit_frame(&mut out, v, self);
10362 }
10363 }
10364 out
10365 }
10366}
10367
10368// ────────────────────────────────────────────────────────────────────
10369// v1.3 Phase AOT Stage 7 sub-piece 4 — AOT trace dispatch install.
10370//
10371// The deploy-side resolver in `luna-runtime-helpers` walks the binary's
10372// trace-meta section after `vm.load`, resolves each entry's
10373// `(proto_hash, head_pc, fn_ptr)` triple against the loaded chunk's
10374// proto tree, and pushes a `CompiledTrace` onto the matching Proto's
10375// `traces` Vec via [`Vm::install_aot_trace`] below. The existing
10376// trace-dispatch loop (this file's `cl.proto.traces.borrow().iter()
10377// .find(|t| t.head_pc == pc && t.dispatchable)`) then fires the AOT
10378// mcode without further plumbing — same code path the runtime JIT
10379// uses.
10380//
10381// Why a separate impl block: keeps the AOT API surface (one fn) easy
10382// to locate when grep'ing for `install_aot_trace`, without dragging
10383// the 8500-line `impl Vm` block above.
10384// ────────────────────────────────────────────────────────────────────
10385
10386impl Vm {
10387 /// v1.3 Phase AOT Stage 7 sub-piece 4 — install a precompiled
10388 /// `CompiledTrace` onto `proto.traces` so the interp dispatcher
10389 /// fires it at the trace's `head_pc`. This is the runtime install
10390 /// API the deploy-side `luna-runtime-helpers` resolver calls once
10391 /// per AOT-emitted trace meta entry, after looking up `proto` by
10392 /// stable hash (see `crate::runtime::function::Proto::stable_hash`).
10393 ///
10394 /// # What this does
10395 ///
10396 /// Pushes `trace` onto `proto.traces` via the existing `RefCell`.
10397 /// The trace's `entry` fn ptr must already point at runnable
10398 /// machine code (the AOT linker resolved the symbol at link time;
10399 /// the deploy resolver passes the address verbatim).
10400 ///
10401 /// # What this does NOT do
10402 ///
10403 /// - **No deduplication.** Calling twice with the same `head_pc`
10404 /// pushes two entries; the dispatcher's `find` will pick the
10405 /// first match. The deploy resolver is responsible for not
10406 /// double-installing.
10407 /// - **No invalidation of the runtime JIT cache.** If the runtime
10408 /// JIT later records + compiles a trace for the same
10409 /// `(proto, head_pc)`, both coexist on `proto.traces` and the
10410 /// dispatcher's `find` picks whichever appears first. AOT
10411 /// traces install before any runtime recording is possible
10412 /// (resolver runs before `vm.load` returns its first closure),
10413 /// so AOT traces win the race for the same site.
10414 /// - **No coverage gating.** AOT traces are trusted by
10415 /// construction — they were validated at compile time. Setting
10416 /// `dispatchable: false` on the input would silently disable
10417 /// dispatch; the caller controls that flag.
10418 ///
10419 /// # Safety / soundness
10420 ///
10421 /// `trace.entry` is an `unsafe extern "C" fn` (mmap'd or linked
10422 /// machine code). Soundness contract:
10423 ///
10424 /// - The fn pointer must remain valid for the `Vm`'s lifetime.
10425 /// In the AOT-binary deploy shape this is trivially satisfied —
10426 /// the fn lives in the binary's `.text`.
10427 /// - `trace.entry_tags` / `exit_tags` / `window_size` must match
10428 /// what the trace's IR actually compiled against; the dispatcher
10429 /// uses them to marshal `reg_state` in and out without further
10430 /// validation. A mismatch corrupts vm.stack.
10431 ///
10432 /// The AOT pipeline (`luna-aot`) is responsible for ensuring these
10433 /// invariants hold; this fn is a plain push — no validation that
10434 /// would slow the dispatcher's hot path either.
10435 pub fn install_aot_trace(
10436 &mut self,
10437 proto: crate::runtime::Gc<crate::runtime::function::Proto>,
10438 trace: crate::jit::trace::CompiledTrace,
10439 ) {
10440 let _ = self; // resolver passes &mut Vm for symmetry with future
10441 // pending-install + hash-walk variants; nothing on `self` to
10442 // mutate today because the install target lives on the Proto.
10443 proto.traces.borrow_mut().push(TArc::new(trace));
10444 }
10445
10446 /// v1.3 Phase AOT Stage 7 sub-piece 4 — walk the proto tree
10447 /// reachable from `root` and return `(proto, stable_hash)` pairs
10448 /// for every Proto found. Used by the deploy-side resolver to
10449 /// match AOT-emitted `proto_hash` keys against the freshly
10450 /// `undump`'d chunk's protos.
10451 ///
10452 /// The walk is BFS over `Proto.protos`. Same-Proto deduplication
10453 /// is done via `Gc::as_ptr` identity — a Proto re-referenced from
10454 /// multiple nested closures (rare; the cache field would catch
10455 /// the closure-side dedup, not the Proto side) is reported once.
10456 ///
10457 /// # Why on `&Vm` and not a free fn
10458 ///
10459 /// Keeps the AOT install API discoverable on the Vm surface —
10460 /// `vm.collect_proto_hashes(root)` reads naturally next to
10461 /// `vm.install_aot_trace(proto, trace)`. Doesn't actually touch
10462 /// any Vm field, so `&self` (read-only) is enough.
10463 pub fn collect_proto_hashes(
10464 &self,
10465 root: crate::runtime::Gc<crate::runtime::function::Proto>,
10466 ) -> Vec<(
10467 crate::runtime::Gc<crate::runtime::function::Proto>,
10468 [u8; 16],
10469 )> {
10470 let _ = self;
10471 let mut out = Vec::new();
10472 let mut seen: std::collections::HashSet<*const crate::runtime::function::Proto> =
10473 std::collections::HashSet::new();
10474 let mut queue: std::collections::VecDeque<
10475 crate::runtime::Gc<crate::runtime::function::Proto>,
10476 > = std::collections::VecDeque::new();
10477 queue.push_back(root);
10478 while let Some(p) = queue.pop_front() {
10479 let key = p.as_ptr() as *const _;
10480 if !seen.insert(key) {
10481 continue;
10482 }
10483 out.push((p, p.stable_hash()));
10484 for &child in p.protos.iter() {
10485 queue.push_back(child);
10486 }
10487 }
10488 out
10489 }
10490}