Skip to main content

luna_core/runtime/
function.rs

1//! Function objects: compiled prototypes, Lua closures, upvalues.
2
3use crate::runtime::heap::{Gc, GcHeader, Marker};
4use crate::runtime::string::LuaStr;
5use crate::runtime::value::Value;
6use crate::vm::isa::Inst;
7
8/// An activation record on a thread's call stack. Pure data (closure handle +
9/// stack offsets), so it lives in `runtime` where the GC can trace a suspended
10/// coroutine's frames.
11#[derive(Clone, Copy)]
12pub struct Frame {
13    /// Currently executing closure.
14    pub closure: Gc<LuaClosure>,
15    /// stack index of register 0
16    pub base: u32,
17    /// Program counter (index into `closure.proto.code`).
18    pub pc: u32,
19    /// stack slot of the function (results land here)
20    pub func_slot: u32,
21    /// number of extra (vararg) arguments, living on the stack just below `base`
22    /// at `func_slot+1 .. func_slot+1+n_varargs` (PUC `CallInfo.u.l.nextraargs`).
23    /// `OP_VARARG`/`OP_VARGIDX` read them there; a named vararg only materializes
24    /// a heap table when it is written / escapes / is `_ENV`.
25    pub n_varargs: u32,
26    /// results expected by the caller (-1 = all)
27    pub nresults: i32,
28    /// pc the line hook last observed in this frame (PUC CallInfo `oldpc`);
29    /// `u32::MAX` on a fresh frame so its first instruction fires a line event
30    pub hook_oldpc: u32,
31    /// true if this Lua frame was entered across a C boundary (call_value: a
32    /// metamethod, pcall, __close handler, or a coroutine body). Debug level
33    /// traversal (`debug.getinfo`/traceback) inserts a synthetic C frame below it.
34    pub from_c: bool,
35    /// the metamethod event this frame is handling (e.g. "close" for a `__close`
36    /// handler call), so `debug.traceback`/getinfo can name it "metamethod
37    /// 'close'" (PUC `CallInfo.u.l.tm`/`luaG_funcnamefromtm`).
38    pub tm: Option<&'static str>,
39    /// true when this frame is the hook function itself (PUC sets
40    /// `CIST_HOOKED`). `debug.getinfo(1).namewhat` returns `"hook"` for it.
41    pub is_hook: bool,
42    /// PUC `ci->u.l.tailcalls` — how many tail calls have collapsed into
43    /// this activation slot. Each `OP_TailCall` chain adds one. 5.1
44    /// `lua_getstack` reports a synthetic `CIST_TAIL` level per count
45    /// (so a deeply tail-recursive function shows `tailcalls` extra
46    /// levels between itself and its real caller — 5.1 db.lua :372 walks
47    /// `getinfo(2..lim)` and expects each to be `"tail"`). The 5.2+
48    /// `istailcall` boolean is `tailcalls > 0`.
49    pub tailcalls: u32,
50}
51
52/// An entry on a thread's call stack: either a Lua activation record or a
53/// continuation frame standing in for a *yieldable native* (pcall/xpcall).
54///
55/// A `Cont` sits just below the call it protects. When that call returns,
56/// yields-to-completion, or errors, the interpreter consumes the `Cont` to wrap
57/// the outcome — the analogue of PUC `lua_pcallk`'s continuation `k`. Keeping it
58/// on the same stack as Lua frames means a `coroutine.yield` crossing it is
59/// preserved and restored automatically with the thread's saved context.
60#[derive(Clone, Copy)]
61pub enum CallFrame {
62    /// A Lua activation record.
63    Lua(
64        /// The activation record.
65        Frame,
66    ),
67    /// A continuation guarding a yieldable native call (pcall / xpcall /
68    /// metamethod / `__close` / `__pairs`).
69    Cont(
70        /// The continuation record.
71        NativeCont,
72    ),
73}
74
75impl CallFrame {
76    /// Borrow the inner Lua frame if this is a `Lua` variant.
77    #[inline]
78    pub fn lua(&self) -> Option<&Frame> {
79        match self {
80            CallFrame::Lua(f) => Some(f),
81            CallFrame::Cont(_) => None,
82        }
83    }
84
85    /// Mutably borrow the inner Lua frame if this is a `Lua` variant.
86    #[inline]
87    pub fn lua_mut(&mut self) -> Option<&mut Frame> {
88        match self {
89            CallFrame::Lua(f) => Some(f),
90            CallFrame::Cont(_) => None,
91        }
92    }
93}
94
95/// A continuation frame for `pcall`/`xpcall`: where its wrapped result lands and
96/// how to wrap it. Lives on the call stack below the protected call (see
97/// [`CallFrame`]).
98#[derive(Clone, Copy)]
99pub struct NativeCont {
100    /// What kind of protection this continuation represents.
101    pub kind: ContKind,
102    /// the protecting native's own stack slot — the wrapped status + values
103    /// (`true, …` / `false, msg`) land here
104    pub func_slot: u32,
105    /// results the caller of pcall/xpcall expects (-1 = all)
106    pub nresults: i32,
107}
108
109/// Continuation kind for yieldable native dispatch.
110#[derive(Clone, Copy)]
111pub enum ContKind {
112    /// `pcall(f, ...)` — wraps the result as `(true, ...)` / `(false, msg)`.
113    Pcall,
114    /// xpcall: the message handler to run if the protected call errors
115    Xpcall {
116        /// Message handler function invoked on error.
117        handler: Value,
118    },
119    /// a yieldable metamethod call triggered by a VM instruction (PUC's
120    /// `luaV_finishOp`): on the metamethod's return the interrupted instruction
121    /// is completed per `MetaCont`. A `coroutine.yield` inside the metamethod is
122    /// preserved on the thread's frame stack like any other call.
123    Meta(
124        /// Continuation describing how to finish the interrupted op.
125        MetaCont,
126    ),
127    /// a yieldable `__pairs` metamethod call from `pairs()` (PUC luaB_pairs uses
128    /// lua_callk): on return, its (≤4, nil-padded) results are `pairs`'s own
129    /// results. A `coroutine.yield` inside `__pairs` is preserved like pcall's.
130    Pairs,
131    /// a yieldable `__close` handler call driven by `begin_close` (PUC's
132    /// `luaF_close` + `lua_callk` continuation). On the handler's return or
133    /// error, the close iteration resumes from `CloseCont`'s state and either
134    /// invokes the next handler (pushing a fresh Cont::Close) or executes the
135    /// recorded `AfterClose` action.
136    Close(
137        /// Per-iteration close state.
138        CloseCont,
139    ),
140}
141
142/// Per-iteration state for a chain of `__close` handlers driven through the
143/// interpreter loop. When a handler is pushed onto the call stack, this rides
144/// in a `Cont::Close` frame underneath it so a `coroutine.yield` from the
145/// handler preserves the close iteration with the rest of the thread.
146#[derive(Clone, Copy)]
147pub struct CloseCont {
148    /// the close threshold: keep closing tbc slots ≥ from until exhausted
149    pub from: u32,
150    /// the error object threaded through subsequent handlers, if any
151    pub pending: Option<Value>,
152    /// what to do once every slot ≥ from is closed
153    pub after: AfterClose,
154}
155
156/// What to run once `begin_close` has drained every tbc slot.
157#[derive(Clone, Copy)]
158pub enum AfterClose {
159    /// `OP_Close` (block-end close): nothing else; next instruction continues.
160    Block,
161    /// `OP_Return*`: pop the Lua frame whose `OP_Return` triggered the close
162    /// and deliver `nret` results from `[abs_a, abs_a + nret)` to the frame's
163    /// `func_slot`. `from_native` mirrors the original op's hook flag.
164    Return {
165        /// Absolute stack index of the first return value.
166        abs_a: u32,
167        /// Number of return values.
168        nret: u32,
169        /// Mirrors the original op's hook-fired flag.
170        from_native: bool,
171    },
172    /// Error unwind: the close runs while unwinding a Lua frame. When every
173    /// handler is done, pop the deferred Lua frame, truncate to `func_slot`,
174    /// and re-raise — preferring a handler-raised error over `err` (PUC
175    /// luaF_close).
176    ResumeUnwind {
177        /// Slot to truncate the value stack to before re-raising.
178        func_slot: u32,
179        /// Original error value to re-raise (or replaced by a handler raise).
180        err: Value,
181    },
182}
183
184/// How to complete a VM instruction once its metamethod returns.
185#[derive(Clone, Copy)]
186pub struct MetaCont {
187    /// What to do with the metamethod's return value.
188    pub action: MetaAction,
189    /// the interrupted frame's `top` to restore after the metamethod returns
190    pub saved_top: u32,
191}
192
193/// Per-op finishing action for a yielded metamethod call.
194#[derive(Clone, Copy)]
195pub enum MetaAction {
196    /// arithmetic / index / unary / length: store the single result at `dst`
197    Store {
198        /// Destination register receiving the metamethod's first result.
199        dst: u32,
200    },
201    /// `__newindex`: the metamethod has no result to keep
202    Discard,
203    /// comparison (`__eq`/`__lt`/`__le`): the truthiness of the result feeds the
204    /// conditional skip — the following JMP runs iff `result.truthy() == k`.
205    /// `negate=true` flips the truthiness first, for the ≤5.3 `__le` →
206    /// `not __lt(b, a)` synthesis path where the metamethod is `__lt` but
207    /// the operator was `<=`.
208    Compare {
209        /// Sense of the conditional skip the comparison op was emitted for.
210        k: bool,
211        /// True when the 5.3 `__le → not __lt(b,a)` synthesis is in effect.
212        negate: bool,
213    },
214    /// `__concat`: store the result at `dst`, set `top = dst + 1`, then continue
215    /// folding the operands still at `[base_a .. top)` (PUC finishOp re-runs).
216    Concat {
217        /// Destination register for the metamethod's result.
218        dst: u32,
219        /// First operand register of the original concat span.
220        base_a: u32,
221    },
222}
223
224/// Where a closure's upvalue is captured from, relative to the *enclosing*
225/// function (PUC Upvaldesc).
226#[derive(Clone, Debug)]
227pub struct UpvalDesc {
228    /// captured from the enclosing frame's registers (true) or from the
229    /// enclosing closure's own upvalues (false)
230    pub in_stack: bool,
231    /// Index in the enclosing frame's register file (when `in_stack`) or
232    /// in the enclosing closure's upvalue array (otherwise).
233    pub index: u8,
234    /// variable name, for error messages and debug info
235    pub name: Box<str>,
236    /// the captured variable is `<const>` (5.5): assignment through this
237    /// upvalue is a compile-time error
238    pub read_only: bool,
239}
240
241/// Debug record for a local variable: its name and the pc range over which it
242/// occupies register `reg`. Used to name registers in error messages and
243/// debug.getinfo (PUC LocVar).
244#[derive(Clone, Debug)]
245pub struct LocVar {
246    /// Local-variable name.
247    pub name: Box<str>,
248    /// Register holding the variable while in scope.
249    pub reg: u32,
250    /// First pc where the variable is live.
251    pub start_pc: u32,
252    /// Pc one past the last where the variable is live.
253    pub end_pc: u32,
254}
255
256/// A compiled function (PUC Proto). Immutable after compilation.
257#[repr(C)]
258pub struct Proto {
259    pub(crate) hdr: GcHeader,
260    /// Bytecode instructions, in execution order.
261    pub code: Box<[Inst]>,
262    /// Constant table referenced by `LoadK` / `*K` opcodes.
263    pub consts: Box<[Value]>,
264    /// Nested prototypes referenced by `Closure`.
265    pub protos: Box<[Gc<Proto>]>,
266    /// Upvalue descriptors (one per upvalue this function captures).
267    pub upvals: Box<[UpvalDesc]>,
268    /// Fixed parameter count.
269    pub num_params: u8,
270    /// Whether the function accepts `...`.
271    pub is_vararg: bool,
272    /// PUC `lparser.c` emits a hidden `(vararg table)` locvar for a function
273    /// declared with an explicit anonymous `(...)` (and NOT for a main chunk's
274    /// implicit vararg, nor for `(...t)` which becomes a named local). When
275    /// true, `debug.getlocal` exposes the pseudo at `num_params + 1`.
276    pub has_vararg_table_pseudo: bool,
277    /// PUC 5.1 `LUAI_COMPAT_VARARG`: the function declared `...` and so gets a
278    /// hidden local named `arg` at `num_params` populated at entry with the
279    /// extra args as `{n = count, [1] = e1, [2] = e2, …}`. The slot keeps the
280    /// shape across resumes; user code can reassign it. 5.1 db.lua :279 reads
281    /// `arg.n` from inside a `line` hook walking `debug.getlocal(2, i)`.
282    pub has_compat_vararg_arg: bool,
283    /// registers needed by a frame of this function
284    pub max_stack: u8,
285    /// line of each instruction (same length as `code`)
286    pub lines: Box<[u32]>,
287    /// chunk name, for error messages
288    pub source: Gc<LuaStr>,
289    /// Source line where the function was defined.
290    pub line_defined: u32,
291    /// line of the function's closing `end` (PUC `lastlinedefined`); 0 for the
292    /// main chunk
293    pub last_line_defined: u32,
294    /// local-variable debug records (name + live pc range)
295    pub locvars: Box<[LocVar]>,
296    /// PUC 5.2+ closure cache (`Proto.cache`): the last LClosure built from
297    /// this Proto. When OP_CLOSURE fires, the VM compares each candidate
298    /// upvalue to the cached closure's same-slot upvalue (`getcached`); on a
299    /// full match the cached closure is reused, so two `function() ... end`
300    /// literals reached from the same source compile but with identical
301    /// upvalue bindings compare equal. closure.lua's `for i=1,5 do
302    /// a[i]=function(x) return x+a+_ENV end end` asserts that subsequent
303    /// iterations reuse the closure; capturing `i` instead defeats the cache.
304    pub cache: std::cell::Cell<Option<Gc<LuaClosure>>>,
305    /// Index into `upvals` of the `_ENV` upvalue (5.1 per-function-env
306    /// model needs to clone-on-closure), or `u8::MAX` for "no _ENV
307    /// upval". Computed once at Proto construction so `Op::Closure`'s
308    /// 5.1 path doesn't string-compare across `upvals` per closure.
309    pub env_upval_idx: u8,
310    /// P11-S2 — JIT cache slot. `Untried` on Proto creation; the first
311    /// `Vm::call_value` on a closure whose body fits the S1 whitelist
312    /// flips it to `Compiled(fn ptr)` and the `JitHandle` that backs
313    /// the mmap is parked on the `Vm.jit_handles` Vec for the Vm's
314    /// lifetime. `Failed` records the whitelist miss so subsequent
315    /// calls skip the compile attempt.
316    pub jit: std::cell::Cell<JitProtoState>,
317    /// P12-S1 — trace JIT hot-loop detector. Incremented by `Vm::run`
318    /// on each backward-jump dispatched within this Proto. Once the
319    /// counter passes `TRACE_HOT_THRESHOLD`, the next visit to the
320    /// backward-jump target promotes that PC to a trace head and
321    /// begins recording (S2+). `Cell<u32>` matches the interp's
322    /// single-threaded dispatch and pays no atomic cost. Cap at
323    /// `u32::MAX / 2` to leave headroom above the threshold.
324    pub trace_hot_count: std::cell::Cell<u32>,
325    /// P12-S4 — trace-on-call counter. Incremented by `begin_call` on
326    /// every Lua-callee push into this Proto. Once it passes
327    /// `CALL_HOT_THRESHOLD`, the next call into this Proto promotes
328    /// `pc=0` to a trace head and begins recording. Lets the trace
329    /// JIT cover self-recursive functions whose body holds no
330    /// negative `Op::Jmp` (`fib`, recursive `make`/`check` in
331    /// `binary_trees`), where the back-edge counter never triggers.
332    pub call_hot_count: std::cell::Cell<u32>,
333    /// P13-S13-I — count of S13-H "partial-coverage" discards on
334    /// this Proto's call-triggered recordings. Each discard is a
335    /// new opportunity for the recorder to record a different
336    /// (hopefully longer) trace at a deeper recursion point; the
337    /// trigger condition re-uses `c >= THRESHOLD &&
338    /// !already_cached` (S13-H) so the next call retries. Without
339    /// a cap, pathologically-branchy workloads like binary_trees
340    /// (`make` body contains 2 nested self-recursive calls)
341    /// produce a 1500+ discard storm — the recorder never
342    /// captures a covered trace because every base / shallow-
343    /// depth entry caught yields a partial path. The S13-I cap
344    /// bounds the storm: after `MAX_DISCARDS = 5` discards, the
345    /// next close skips the coverage check and compiles + caches
346    /// whatever shape it has (length gate will likely refuse
347    /// dispatch but at least the trigger stops firing).
348    pub trace_discard_count: std::cell::Cell<u32>,
349    /// P13-S13-K — once the S13-I discard cap forces a compile on
350    /// this Proto (the recorder gave up trying to capture a
351    /// covered trace and just compiled whatever shape it had), set
352    /// this flag to `true`. Both trigger gates (back-edge in
353    /// `Op::Jmp` and call in `begin_call`) short-circuit on
354    /// `gave_up` BEFORE doing the `proto.traces.borrow()` +
355    /// linear-scan `already_cached` check. Each post-cap call into
356    /// such a Proto avoids the RefCell borrow + Vec scan
357    /// (`binary_trees_pattern`'s 20k make + 20k check calls per
358    /// run = 40k RefCell borrows saved). The `gave_up` flag never
359    /// flips back to `false` within a Vm — gave-up is permanent
360    /// on the Proto, mirroring the `JitProtoState::Failed`
361    /// invariant.
362    pub trace_gave_up: std::cell::Cell<bool>,
363    /// P12-S2 — compiled trace cache for this Proto. A successful
364    /// `compile_trace(record)` (S2.B) parks its `CompiledTrace` here;
365    /// `Vm::run`'s S3 dispatcher (next phase) iterates this on each
366    /// back-edge target visit. `RefCell` because compile is invoked
367    /// from inside `Vm::run` and may need to push while another op
368    /// is mid-dispatch in the same Proto. Empty `Vec` until S2 lands.
369    pub traces: crate::jit::send_compat::TRefLock<
370        Vec<crate::jit::send_compat::TArc<crate::jit::trace::CompiledTrace>>,
371    >,
372}
373
374/// P11-S2 / S2c — per-Proto JIT cache state. Copy so it fits a plain
375/// `Cell` on the dispatch hot path (no `RefCell` borrow check); the
376/// fn pointer's mmap is kept alive by `Vm.jit_handles`.
377#[derive(Clone, Copy, Debug)]
378pub enum JitProtoState {
379    /// Compilation hasn't been attempted yet.
380    Untried,
381    /// Compilation was attempted and the body fell outside the whitelist;
382    /// subsequent calls skip the attempt.
383    Failed,
384    /// Native code is installed and callable through the recorded entry.
385    Compiled {
386        /// Raw mmap'd code address. Transmute to the
387        /// `unsafe extern "C" fn(i64, …) -> i64` shape matching
388        /// `num_args` at the call site.
389        entry: *const u8,
390        /// 0..=MAX_JIT_ARITY. Picks the transmute target.
391        num_args: u8,
392        /// True when the Lua chunk terminates with `Return1` (single
393        /// observable return value). False means the chunk only
394        /// side-effects + `Return0` — host gets an empty `Vec<Value>`
395        /// from `Vm::call_value`, an interpreter `Op::Call` gets
396        /// zero results pushed (PUC nresults handling).
397        returns_one: bool,
398        /// P11-S3 — per-arg Float bit. Bit `i = 1` ↔ arg slot `i`
399        /// is f64 (passed as i64 bit-pattern across the ABI, bitcast
400        /// inside the JIT). Bit `i = 0` ↔ Int. Bits ≥ MAX_JIT_ARITY
401        /// are zero.
402        arg_float_mask: u8,
403        /// P11-S5d — per-arg Table bit. Bit `i = 1` ↔ arg slot `i`
404        /// is `Gc<Table>` raw ptr (passed as the i64 pointer value
405        /// directly, since `Gc<Table>` is `NonNull<Table>` =
406        /// pointer-shaped). Mutually exclusive with `arg_float_mask`
407        /// for the same bit. Required so `try_jit_call_op`'s arg
408        /// marshalling can accept `Value::Table(t)` and pack
409        /// `t.as_ptr() as i64`; without it a Table arg would fall
410        /// into the dispatcher's default-deny match arm and the
411        /// callee couldn't be reached via JIT.
412        arg_table_mask: u8,
413        /// P11-S3 — true iff the chunk's `Return1` value is f64.
414        /// Dispatcher wraps `r` as `Value::Float(f64::from_bits(r))`
415        /// vs `Value::Int(r)` accordingly. Meaningful only when
416        /// `returns_one == true`.
417        ret_is_float: bool,
418        /// P11-S5d — true iff the chunk's `Return1` value is a
419        /// `Gc<Table>` ptr. Mutually exclusive with `ret_is_float`.
420        /// Dispatcher wraps `r` as
421        /// `Value::Table(Gc::from_ptr(r as *mut Table))`.
422        ret_is_table: bool,
423    },
424}
425
426// Cell<JitProtoState> stores raw pointers; explicit Send + Sync
427// negative: keep these on a single-threaded runtime. The Vm itself
428// already is !Send (Heap holds raw GcHeader pointers), so we don't
429// need any auto-trait gymnastics — this comment exists so a future
430// audit doesn't try to flip the trait without thinking.
431
432/// v1.3 Phase AOT Stage 7 sub-piece 4 — hand-rolled FNV-1a-128 state.
433/// Used by [`Proto::stable_hash`] to fingerprint a Proto without
434/// pulling a third-party hash crate (`luna-core` 0-dep contract).
435///
436/// FNV-1a is not cryptographic; collision-resistance suffices for the
437/// AOT proto-ID use case because a collision would surface as a
438/// trace-vs-proto mismatch and the dispatcher's existing tag/shape
439/// guards would deopt to interp rather than corrupt state.
440struct FnvHash128 {
441    state: u128,
442}
443
444impl FnvHash128 {
445    /// Standard FNV-1a-128 offset basis.
446    const OFFSET_BASIS: u128 = 0x6c62272e07bb014262b821756295c58d;
447    /// Standard FNV-1a-128 prime.
448    const PRIME: u128 = 0x0000000001000000000000000000013b;
449
450    fn new() -> Self {
451        FnvHash128 {
452            state: Self::OFFSET_BASIS,
453        }
454    }
455
456    /// Absorb `bytes` into the running hash. FNV-1a: per byte, XOR
457    /// into the low octet of state, then multiply by the prime (wrap).
458    fn update(&mut self, bytes: &[u8]) {
459        let mut s = self.state;
460        for &b in bytes {
461            s ^= b as u128;
462            s = s.wrapping_mul(Self::PRIME);
463        }
464        self.state = s;
465    }
466
467    /// Finalise to 16 big-endian bytes (network order — stable across
468    /// platforms; the LE/BE choice is cosmetic since the only consumer
469    /// is byte-equality, but BE matches the canonical FNV-1a-128
470    /// reference output if anyone cross-checks).
471    fn finish(self) -> [u8; 16] {
472        self.state.to_be_bytes()
473    }
474}
475
476impl Proto {
477    /// v1.3 Phase AOT Stage 7 sub-piece 4 — stable 128-bit hash over a
478    /// Proto's identity-defining bytes. Two `Proto`s whose Lua source +
479    /// dialect compile to the same bytecode hash to the same digest;
480    /// distinct sources hash distinct. The digest is stable across
481    /// `dump` / `undump` round-trips and across separate process runs,
482    /// so an AOT pipeline (which fingerprints protos at compile time)
483    /// and the deploy `Vm` (which fingerprints the same protos after
484    /// undumping the embedded bytecode) agree on which `(Proto, pc)`
485    /// site a precompiled trace targets.
486    ///
487    /// # What's fed into the hash
488    ///
489    /// - `code`: the raw u32 packed words, in order.
490    /// - `consts`: per entry, a one-byte discriminant + payload bytes
491    ///   (Int/Float as raw 8-byte LE; Str as `[len_u32_le | bytes]`;
492    ///   Nil/Bool as discriminant alone). Heap-pointer variants in
493    ///   `Value` (Table / Closure / Native / Coro / Userdata /
494    ///   LightUserdata) never appear in a Proto's constant table —
495    ///   constants are restricted to nil / bool / number / string by
496    ///   the Lua compiler — so a `debug_assert!` catches the contract
497    ///   if a future refactor changes that.
498    /// - `upvals`: per descriptor, `in_stack` byte + `index` byte +
499    ///   `read_only` byte + name bytes (length-prefixed u32 LE).
500    /// - `num_params`, `is_vararg`, `max_stack`: single-byte each.
501    ///
502    /// # What's NOT fed in
503    ///
504    /// - Nested `protos`: each nested Proto has its own `stable_hash`;
505    ///   parent identity is determined by its own immediate bytes only.
506    ///   Callers that need a "whole tree" identity should hash the
507    ///   roots they care about.
508    /// - `lines`, `locvars`, `source`, `line_defined`,
509    ///   `last_line_defined`: debug metadata. A `.lua` source edited
510    ///   to add a comment shouldn't invalidate AOT traces — bytecode
511    ///   is the identity, not the editor cursor.
512    /// - JIT cache fields (`jit`, `traces`, `trace_hot_count`, …),
513    ///   `cache`, `has_vararg_table_pseudo`, `has_compat_vararg_arg`,
514    ///   `env_upval_idx`: runtime-only state derived from the
515    ///   load-bearing fields above.
516    ///
517    /// # Algorithm
518    ///
519    /// Hand-rolled FNV-1a-128 (no third-party deps — `luna-core` 0-dep
520    /// contract is hard). The standard 128-bit constants:
521    ///
522    /// - offset basis = `0x6c62272e07bb014262b821756295c58d`
523    /// - prime        = `0x0000000001000000000000000000013b`
524    ///
525    /// Collision resistance suffices for AOT proto ID — collisions
526    /// would manifest as a precompiled trace dispatched against the
527    /// wrong Proto, but the dispatcher's existing guards (entry_tags
528    /// match, head_pc match, register types match) would deopt to
529    /// interp on a mismatch rather than corrupt state.
530    pub fn stable_hash(&self) -> [u8; 16] {
531        let mut h = FnvHash128::new();
532        // 1. Bytecode words — `Inst` is `repr(transparent)` over u32;
533        //    feed the raw little-endian bytes so the hash matches
534        //    cross-platform (luna only targets little-endian platforms
535        //    today, but the explicit LE serialization future-proofs).
536        for inst in self.code.iter() {
537            h.update(&inst.0.to_le_bytes());
538        }
539        // 2. Constants — discriminant + payload. Keep the discriminant
540        //    byte values stable: bumping the `Value` enum order would
541        //    invalidate AOT cache files, but that's the same constraint
542        //    as `Value::tag_byte` already imposes.
543        for c in self.consts.iter() {
544            match c {
545                Value::Nil => h.update(&[0u8]),
546                Value::Bool(b) => {
547                    h.update(&[1u8, *b as u8]);
548                }
549                Value::Int(i) => {
550                    h.update(&[2u8]);
551                    h.update(&i.to_le_bytes());
552                }
553                Value::Float(f) => {
554                    // Hash the bit pattern so +0.0 / -0.0 don't
555                    // collide and NaNs are stable across runs.
556                    h.update(&[3u8]);
557                    h.update(&f.to_bits().to_le_bytes());
558                }
559                Value::Str(s) => {
560                    h.update(&[4u8]);
561                    let bytes = s.as_bytes();
562                    h.update(&(bytes.len() as u32).to_le_bytes());
563                    h.update(bytes);
564                }
565                // Heap-pointer constants are not produced by the Lua
566                // compiler. A debug_assert keeps the contract honest
567                // without paying a runtime cost in release.
568                Value::Table(_)
569                | Value::Closure(_)
570                | Value::Native(_)
571                | Value::Coro(_)
572                | Value::Userdata(_)
573                | Value::LightUserdata(_) => {
574                    debug_assert!(
575                        false,
576                        "Proto::stable_hash: unexpected heap-pointer constant \
577                         (kind={}); luna's compiler only emits nil/bool/number/string \
578                         constants",
579                        c.type_name()
580                    );
581                    // Fall-through default: treat as a NUL byte. Won't
582                    // happen in practice (compiler invariant), so the
583                    // exact behaviour doesn't matter.
584                    h.update(&[255u8]);
585                }
586            }
587        }
588        // 3. Upvalue descriptors — `name` bytes affect debug.getinfo
589        //    only, but they're cheap and bytecode-equivalent compiles
590        //    always produce equal names, so include them.
591        for u in self.upvals.iter() {
592            h.update(&[u.in_stack as u8, u.index, u.read_only as u8]);
593            let name_bytes = u.name.as_bytes();
594            h.update(&(name_bytes.len() as u32).to_le_bytes());
595            h.update(name_bytes);
596        }
597        // 4. Signature bytes.
598        h.update(&[self.num_params, self.is_vararg as u8, self.max_stack]);
599        h.finish()
600    }
601
602    pub(crate) fn trace(&self, m: &mut Marker) {
603        for &k in self.consts.iter() {
604            m.value(k);
605        }
606        for &p in self.protos.iter() {
607            m.header(p.as_ptr() as *mut GcHeader);
608        }
609        m.header(self.source.as_ptr() as *mut GcHeader);
610        // PUC `traverseproto`: the closure cache is a *weak* reference — if
611        // the cached LClosure is unmarked at sweep time, clear the slot
612        // instead of marking it. Queue self for the post-mark cleanup pass
613        // so a closure whose only remaining live reference is the cache
614        // becomes collectable (gc.lua's `__gc` finalisers inside `do ... end`
615        // blocks rely on this).
616        if self.cache.get().is_some() {
617            m.cached_protos.push(self as *const Proto as *mut Proto);
618        }
619    }
620}
621
622/// P11-S5d.M — closures with `≤ INLINE_UPVALS_N` upvalues skip the
623/// per-closure upvals Box. The `Op::Closure` handler builds upvals
624/// into a stack array and calls `Heap::new_closure_inline(&[Gc<…>])`,
625/// which writes them straight into `inline_storage` — no caller-side
626/// Vec/Box. `closure_alloc`-style benchmarks create 10k single-upval
627/// closures per iter; eliminating the 24-byte Vec alloc shaves ~300µs.
628pub const INLINE_UPVALS_N: usize = 2;
629
630/// A Lua closure: a `Proto` paired with its captured upvalues.
631#[repr(C)]
632pub struct LuaClosure {
633    /// read through raw casts by the GC, not by field access
634    #[allow(dead_code)]
635    pub(crate) hdr: GcHeader,
636    /// The compiled function body this closure binds.
637    pub proto: Gc<Proto>,
638    /// Single source of truth for "where are the upvals?". Points to
639    /// either `inline_storage` (when `upvals_len <= INLINE_UPVALS_N`)
640    /// or `overflow.as_mut_ptr()` (otherwise). Set up by
641    /// `Heap::new_closure*` after the LuaClosure reaches its stable
642    /// heap address.
643    pub(crate) upvals_ptr: *mut Gc<Upvalue>,
644    pub(crate) upvals_len: u32,
645    /// Inline storage for small closures. Only the first
646    /// `upvals_len.min(INLINE_UPVALS_N)` slots are initialised.
647    /// `Gc<Upvalue>` is `Copy` so no explicit `Drop` pass is needed.
648    pub(crate) inline_storage: [std::mem::MaybeUninit<Gc<Upvalue>>; INLINE_UPVALS_N],
649    /// Overflow box for closures with `> INLINE_UPVALS_N` upvalues.
650    /// Empty box (dangling, no allocation) otherwise.
651    pub(crate) overflow: Box<[Gc<Upvalue>]>,
652}
653
654// SAFETY: `upvals_ptr` always refers to memory the same LuaClosure
655// owns (its own inline_storage or its `overflow` Box). The closure is
656// heap-allocated and never moves post-adoption.
657unsafe impl Send for LuaClosure {}
658unsafe impl Sync for LuaClosure {}
659
660impl LuaClosure {
661    /// View of all upvalues as a `&[Gc<Upvalue>]`. Backed by inline
662    /// storage when `upvals_len <= INLINE_UPVALS_N`, else by overflow.
663    #[inline(always)]
664    pub fn upvals(&self) -> &[Gc<Upvalue>] {
665        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
666        unsafe { std::slice::from_raw_parts(self.upvals_ptr, self.upvals_len as usize) }
667    }
668
669    #[inline(always)]
670    pub(crate) fn upvals_mut(&mut self) -> &mut [Gc<Upvalue>] {
671        // SAFETY: Gc<T> is NonNull<T> over the GC heap; the heap is single-threaded and the pointer is live as long as it is reachable from active roots (see heap.rs:5-7).
672        unsafe { std::slice::from_raw_parts_mut(self.upvals_ptr, self.upvals_len as usize) }
673    }
674
675    /// Wire `upvals_ptr` to the active backing storage. Called by the
676    /// Heap closure constructors once the LuaClosure is at its stable
677    /// heap address (inline_storage's address is only valid after the
678    /// Box::new move into the heap).
679    pub(crate) fn init_upvals_ptr(&mut self) {
680        if self.upvals_len as usize <= INLINE_UPVALS_N {
681            self.upvals_ptr = self.inline_storage.as_mut_ptr() as *mut Gc<Upvalue>;
682        } else {
683            self.upvals_ptr = self.overflow.as_mut_ptr();
684        }
685    }
686
687    pub(crate) fn trace(&self, m: &mut Marker) {
688        m.header(self.proto.as_ptr() as *mut GcHeader);
689        for &uv in self.upvals().iter() {
690            m.header(uv.as_ptr() as *mut GcHeader);
691        }
692    }
693}
694
695/// A native (host) function with captured upvalues — the analogue of PUC C
696/// closures. Builtins are allocated once at registration so identity is
697/// stable; stateful iterators (gmatch) mutate their upvalues via `as_mut`.
698#[repr(C)]
699pub struct NativeClosure {
700    /// read through raw casts by the GC, not by field access
701    #[allow(dead_code)]
702    pub(crate) hdr: GcHeader,
703    /// The host function pointer this closure dispatches to.
704    pub f: crate::runtime::value::NativeFn,
705    /// Captured upvalues, visible inside `f` via the Vm's call API.
706    pub upvals: Box<[Value]>,
707    /// v1.1 B10 Stage 2 — marker bit for async natives. When `true`,
708    /// `f` is actually an `crate::vm::async_drive::AsyncNativeFn`
709    /// (same pointer width, transmuted at the call site) returning a
710    /// `Pin<Box<dyn Future>>`. The dispatcher's native-call path checks
711    /// this bit and routes through the cooperative-yield mechanism
712    /// instead of invoking `f` synchronously. Default `false` (sync
713    /// native) for all v1.0 / v1.1-Stage-1 construction sites.
714    pub is_async: bool,
715}
716
717impl NativeClosure {
718    pub(crate) fn trace(&self, m: &mut Marker) {
719        for &v in self.upvals.iter() {
720            m.value(v);
721        }
722    }
723}
724
725/// An upvalue cell. Open: refers to a live VM stack slot (the stack is a GC
726/// root, so open cells trace nothing). Closed: owns the value inline.
727#[repr(C)]
728pub struct Upvalue {
729    /// read through raw casts by the GC, not by field access
730    #[allow(dead_code)]
731    pub(crate) hdr: GcHeader,
732    pub(crate) state: UpvalState,
733}
734
735/// Open / closed state of an upvalue cell.
736#[derive(Clone, Copy)]
737pub enum UpvalState {
738    /// references slot `slot` of `thread`'s value stack (`None` = the main
739    /// thread). The owning thread is tracked so the cell still resolves to the
740    /// right stack after a coroutine swap (P05).
741    Open {
742        /// Stack slot of the captured local on the owning thread.
743        slot: u32,
744        /// Owning thread, or `None` for the main thread.
745        thread: Option<Gc<crate::runtime::coroutine::Coro>>,
746    },
747    /// Captured value has been hoisted into the cell.
748    Closed(
749        /// The closed-over value.
750        Value,
751    ),
752}
753
754impl Upvalue {
755    /// Return the upvalue's current state (open / closed).
756    pub fn state(&self) -> UpvalState {
757        self.state
758    }
759
760    pub(crate) fn set_closed(&mut self, v: Value) {
761        self.state = UpvalState::Closed(v);
762    }
763
764    pub(crate) fn trace(&self, m: &mut Marker) {
765        match self.state {
766            UpvalState::Closed(v) => {
767                m.value(v);
768            }
769            UpvalState::Open {
770                thread: Some(co), ..
771            } => {
772                m.header(co.as_ptr() as *mut GcHeader);
773            }
774            UpvalState::Open { thread: None, .. } => {}
775        }
776    }
777}