Skip to main content

lua_vm/
undump.rs

1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B.  These are best-guess paths based on other translated files.
11use crate::state::LuaState;
12#[allow(unused_imports)] use crate::prelude::*;
13use crate::zio::ZIO;
14use lua_types::error::LuaError;
15use lua_types::value::LuaValue;
16
17// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
18// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
19// crates.  All paths below are provisional for Phase A.
20use lua_types::proto::{LuaProto, UpvalDesc, LocalVar, AbsLineInfo};
21use lua_types::closure::LuaLClosure;
22use lua_types::string::LuaString;
23use lua_types::gc::GcRef;
24use lua_types::opcode::Instruction;
25
26// ── Constants (from lundump.h) ─────────────────────────────────────────────
27
28/// Six-byte data marker in the chunk header used to catch conversion errors.
29const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
30
31/// Reference integer written in the header to detect integer endianness/size
32/// mismatches.
33const LUAC_INT: i64 = 0x5678;
34
35// macros.tsv: cast_num → x as f64
36/// Reference float written in the header to detect float format mismatches.
37const LUAC_NUM: f64 = 370.5;
38
39// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
40/// One-byte version tag: upper nibble = major, lower nibble = minor.
41const LUAC_VERSION: u8 = 0x54;
42
43const LUAC_FORMAT: u8 = 0;
44
45const LUA_SIGNATURE: &[u8] = b"\x1bLua";
46
47// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
48const MAX_SHORT_LEN: usize = 40;
49
50// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
51//
52// These are the byte values written by ldump.c into the constants array.
53// makevariant(t, v) = t | (v << 4).
54//
55// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
56// values* used in the binary format are the raw tag integers from lobject.h.
57// We define them here as u8 constants so the match in load_constants is
58// self-documenting.
59
60const TAG_NIL: u8 = 0x00;
61const TAG_FALSE: u8 = 0x01;
62const TAG_TRUE: u8 = 0x11;
63const TAG_INT: u8 = 0x03;
64const TAG_FLOAT: u8 = 0x13;
65const TAG_SHORT_STR: u8 = 0x04;
66const TAG_LONG_STR: u8 = 0x14;
67
68// ── LoadState ──────────────────────────────────────────────────────────────
69
70/// Loader state bundled for convenience: Lua state, input stream, and the
71/// chunk name used in error messages.
72///
73/// # C mapping
74/// ```c
75///
76/// ```
77///
78/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
79/// In Rust these become references with a shared lifetime `'a`.  The struct is
80/// always stack-allocated inside [`undump`] and never escapes the call.
81struct LoadState<'a> {
82    state: &'a mut LuaState,
83    z: &'a mut ZIO,
84}
85
86// ── Error helper ───────────────────────────────────────────────────────────
87
88/// Build a syntax error for a malformed binary chunk.
89///
90/// # C source
91/// ```c
92///
93/// //   luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
94/// //   luaD_throw(S->L, LUA_ERRSYNTAX);
95/// // }
96/// ```
97///
98/// PORT NOTE: `l_noret` in C (diverges via `longjmp`).  In Rust we return
99/// `LuaError` and the caller does `return Err(load_error(...))`.  The C
100/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
101/// single `LuaError::syntax` per error_sites.tsv.
102///
103/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
104/// which requires an `std::fmt::Display` implementor.  `Vec<u8>` does not
105/// implement `Display`.  Phase B should add a byte-string formatting path to
106/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
107/// in the message.
108fn load_error(_s: &LoadState<'_>, why: &'static str) -> LuaError {
109    LuaError::syntax(format_args!("bad binary format ({})", why))
110}
111
112// ── Low-level I/O ──────────────────────────────────────────────────────────
113
114/// Read exactly `buf.len()` bytes from the stream into `buf`.
115///
116/// # C source
117/// ```c
118///
119/// //   if (luaZ_read(S->Z, b, size) != 0)
120/// //     error(S, "truncated chunk");
121/// // }
122/// ```
123///
124/// PORT NOTE: C takes `void *b` + explicit `size`.  In Rust we use `&mut [u8]`
125/// whose length encodes the byte count.  `luaZ_read` returns the number of
126/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
127fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
128    // macros.tsv: luaZ_read → z.read(buf)  (returns usize unread)
129    if s.z.read(buf) != 0 {
130        return Err(load_error(s, "truncated chunk"));
131    }
132    Ok(())
133}
134
135/// Read a single byte from the stream.
136///
137/// # C source
138/// ```c
139///
140/// //   int b = zgetc(S->Z);
141/// //   if (b == EOZ)
142/// //     error(S, "truncated chunk");
143/// //   return cast_byte(b);
144/// // }
145/// ```
146///
147/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
148fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
149    // macros.tsv: zgetc → z.getc()  returning i32
150    let b = s.z.getc();
151    if b == crate::zio::EOZ {
152        return Err(load_error(s, "truncated chunk"));
153    }
154    // macros.tsv: cast_byte → x as u8
155    Ok(b as u8)
156}
157
158/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
159/// MSB-first continuation flag).
160///
161/// # C source
162/// ```c
163///
164/// //   size_t x = 0;
165/// //   int b;
166/// //   limit >>= 7;
167/// //   do {
168/// //     b = loadByte(S);
169/// //     if (x >= limit)
170/// //       error(S, "integer overflow");
171/// //     x = (x << 7) | (b & 0x7f);
172/// //   } while ((b & 0x80) == 0);
173/// //   return x;
174/// // }
175/// ```
176///
177/// PORT NOTE: The encoding terminates when a byte with the high bit set is
178/// seen (the *last* byte has bit 7 = 1).  That is the opposite of the more
179/// common LEB128 where the continuation bit means "more follows".
180fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
181    let mut x: usize = 0;
182    let limit = limit >> 7;
183    loop {
184        let b = load_byte(s)? as usize;
185        if x >= limit {
186            return Err(load_error(s, "integer overflow"));
187        }
188        x = (x << 7) | (b & 0x7f);
189        if (b & 0x80) != 0 {
190            break;
191        }
192    }
193    Ok(x)
194}
195
196/// Read a `size_t`-sized unsigned value.
197///
198/// # C source
199/// ```c
200///
201/// //   return loadUnsigned(S, MAX_SIZET);
202/// // }
203/// ```
204///
205/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
206fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
207    // macros.tsv: MAX_SIZET → usize::MAX
208    load_unsigned(s, usize::MAX)
209}
210
211/// Read a signed `int`-sized value.
212///
213/// # C source
214/// ```c
215///
216/// //   return cast_int(loadUnsigned(S, INT_MAX));
217/// // }
218/// ```
219///
220/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv.  `INT_MAX` → `i32::MAX
221/// as usize`.
222fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
223    // macros.tsv: cast_int → x as i32
224    let v = load_unsigned(s, i32::MAX as usize)?;
225    Ok(v as i32)
226}
227
228/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
229///
230/// # C source
231/// ```c
232///
233/// //   lua_Number x;
234/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
235/// //   return x;
236/// // }
237/// ```
238///
239/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
240/// into the value.  In Rust we use `f64::from_ne_bytes` (native endian) to
241/// reconstruct the value from the eight bytes.  The binary format is host-
242/// endian for these fields; the header check verifies endianness compatibility
243/// via `LUAC_INT` and `LUAC_NUM` sentinels.
244fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
245    let mut buf = [0u8; 8];
246    load_block(s, &mut buf)?;
247    // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
248    Ok(f64::from_ne_bytes(buf))
249}
250
251/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
252///
253/// # C source
254/// ```c
255///
256/// //   lua_Integer x;
257/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
258/// //   return x;
259/// // }
260/// ```
261///
262/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
263fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
264    let mut buf = [0u8; 8];
265    load_block(s, &mut buf)?;
266    Ok(i64::from_ne_bytes(buf))
267}
268
269// ── String loading ─────────────────────────────────────────────────────────
270
271/// Load a nullable string.  Returns `None` if the stored size is zero.
272///
273/// # C source
274/// ```c
275///
276/// //   lua_State *L = S->L;
277/// //   TString *ts;
278/// //   size_t size = loadSize(S);
279/// //   if (size == 0) return NULL;
280/// //   else if (--size <= LUAI_MAXSHORTLEN) {  /* short string? */
281/// //     char buff[LUAI_MAXSHORTLEN];
282/// //     loadVector(S, buff, size);
283/// //     ts = luaS_newlstr(L, buff, size);
284/// //   } else {  /* long string */
285/// //     ts = luaS_createlngstrobj(L, size);
286/// //     setsvalue2s(L, L->top.p, ts);  /* anchor it (loadVector can GC) */
287/// //     luaD_inctop(L);
288/// //     loadVector(S, getlngstr(ts), size);
289/// //     L->top.p--;
290/// //   }
291/// //   luaC_objbarrier(L, p, ts);
292/// //   return ts;
293/// // }
294/// ```
295///
296/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
297/// is the null-string sentinel.  After reading `raw_size`, the actual byte
298/// count is `raw_size - 1`.
299///
300/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
301/// and then filled in-place via `getlngstr`.  In Rust, GC anchoring is not
302/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
303/// then create the string.
304///
305/// TODO(port): `luaS_newlstr` interns the string (short strings only);
306/// `luaS_createlngstrobj` does NOT intern.  Phase A uses `state.intern_str()`
307/// for both.  Phase B should add a `state.create_long_str()` path that skips
308/// the intern table, matching C semantics.
309///
310/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
311/// for `luaC_objbarrier(L, p, ts)`.  The barrier is a no-op in Phase A–C
312/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
313fn load_string_n(
314    s: &mut LoadState<'_>,
315    _proto: &LuaProto,
316) -> Result<Option<GcRef<LuaString>>, LuaError> {
317    let raw_size = load_size(s)?;
318    if raw_size == 0 {
319        return Ok(None);
320    }
321    let size = raw_size - 1;
322
323    // Read the raw bytes regardless of short/long distinction.
324    let mut buf = vec![0u8; size];
325
326    if size <= MAX_SHORT_LEN {
327        load_block(s, &mut buf)?;
328    } else {
329        load_block(s, &mut buf)?;
330    }
331
332    // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
333    // TODO(port): long strings should not be interned; see doc-comment above.
334    let ts = s.state.intern_str(&buf)?;
335
336    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
337    // (dropped — Phase A GC is Rc, no barrier needed)
338
339    Ok(Some(ts))
340}
341
342/// Load a non-nullable string; error if the stream encodes a null string.
343///
344/// # C source
345/// ```c
346///
347/// //   TString *st = loadStringN(S, p);
348/// //   if (st == NULL)
349/// //     error(S, "bad format for constant string");
350/// //   return st;
351/// // }
352/// ```
353fn load_string(
354    s: &mut LoadState<'_>,
355    proto: &LuaProto,
356) -> Result<GcRef<LuaString>, LuaError> {
357    match load_string_n(s, proto)? {
358        Some(ts) => Ok(ts),
359        None => Err(load_error(s, "bad format for constant string")),
360    }
361}
362
363// ── Proto-field loaders ────────────────────────────────────────────────────
364
365/// Load the bytecode instruction array into a prototype.
366///
367/// # C source
368/// ```c
369///
370/// //   int n = loadInt(S);
371/// //   f->code = luaM_newvectorchecked(S->L, n, Instruction);
372/// //   f->sizecode = n;
373/// //   loadVector(S, f->code, n);
374/// // }
375/// ```
376///
377/// PORT NOTE: `loadVector(S, f->code, n)` expands to
378/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
379/// We read each `u32` in native-endian order, consistent with how
380/// [`load_number`] and [`load_integer`] work.
381///
382/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
383/// (types.tsv: `Proto.sizecode → removed`).
384fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
385    let n = load_int(s)? as usize;
386    // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
387    // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
388    // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
389    let mut code = Vec::with_capacity(n);
390    for _ in 0..n {
391        let mut buf = [0u8; 4];
392        load_block(s, &mut buf)?;
393        // Instruction is a u32 newtype per types.tsv
394        code.push(Instruction(u32::from_ne_bytes(buf)));
395    }
396    f.code = code;
397    Ok(())
398}
399
400/// Load the constant pool into a prototype.
401///
402/// # C source
403/// ```c
404///
405/// //   int i; int n = loadInt(S);
406/// //   f->k = luaM_newvectorchecked(S->L, n, TValue);
407/// //   f->sizek = n;
408/// //   for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
409/// //   for (i = 0; i < n; i++) {
410/// //     TValue *o = &f->k[i];
411/// //     int t = loadByte(S);
412/// //     switch (t) {
413/// //       case LUA_VNIL:    setnilvalue(o); break;
414/// //       case LUA_VFALSE:  setbfvalue(o); break;
415/// //       case LUA_VTRUE:   setbtvalue(o); break;
416/// //       case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
417/// //       case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
418/// //       case LUA_VSHRSTR:
419/// //       case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
420/// //       default: lua_assert(0);
421/// //     }
422/// //   }
423/// // }
424/// ```
425///
426/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
427/// safety in C.  In Rust, `Vec` is always in a valid state; we skip it.
428fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
429    let n = load_int(s)? as usize;
430    // TODO(port): add overflow / OOM check.
431    let mut k = Vec::with_capacity(n);
432
433    // Dropped — Rust Vec elements are never uninitialized.
434
435    for _ in 0..n {
436        let t = load_byte(s)?;
437        let val = match t {
438            // macros.tsv: setnilvalue → *o = LuaValue::Nil
439            TAG_NIL => LuaValue::Nil,
440
441            // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
442            TAG_FALSE => LuaValue::Bool(false),
443
444            // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
445            TAG_TRUE => LuaValue::Bool(true),
446
447            // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
448            TAG_FLOAT => LuaValue::Float(load_number(s)?),
449
450            // macros.tsv: setivalue → *o = LuaValue::Int(x)
451            TAG_INT => LuaValue::Int(load_integer(s)?),
452
453            // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
454            TAG_SHORT_STR | TAG_LONG_STR => {
455                let ts = load_string(s, f)?;
456                LuaValue::Str(ts)
457            }
458
459            // macros.tsv: lua_assert → debug_assert!
460            _ => {
461                debug_assert!(false, "unknown constant type tag {:#04x}", t);
462                LuaValue::Nil
463            }
464        };
465        k.push(val);
466    }
467
468    f.k = k;
469    Ok(())
470}
471
472/// Load nested function prototypes into a prototype.
473///
474/// # C source
475/// ```c
476///
477/// //   int i; int n = loadInt(S);
478/// //   f->p = luaM_newvectorchecked(S->L, n, Proto *);
479/// //   f->sizep = n;
480/// //   for (i = 0; i < n; i++) f->p[i] = NULL;
481/// //   for (i = 0; i < n; i++) {
482/// //     f->p[i] = luaF_newproto(S->L);
483/// //     luaC_objbarrier(S->L, f, f->p[i]);
484/// //     loadFunction(S, f->p[i], f->source);
485/// //   }
486/// // }
487/// ```
488///
489/// PORT NOTE: C creates the proto first (for GC anchor) then fills it.  In
490/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
491/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
492fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
493    let n = load_int(s)? as usize;
494    // TODO(port): add overflow / OOM check.
495    let mut protos = Vec::with_capacity(n);
496
497
498    for _ in 0..n {
499        let mut sub = LuaProto::placeholder();
500
501        // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
502
503        // Pass parent source as fallback.
504        let parent_source = f.source.clone();
505        load_function(s, &mut sub, parent_source)?;
506
507        // Wrap in GcRef after loading.
508        // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
509        // In Rust Phase A it becomes Rc<LuaProto>.
510        // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
511        let sub_ref = GcRef::new(sub);
512        sub_ref.account_buffer(sub_ref.buffer_bytes() as isize);
513        protos.push(sub_ref);
514    }
515
516    f.p = protos;
517    Ok(())
518}
519
520/// Load upvalue descriptors into a prototype.
521///
522/// # C source
523/// ```c
524///
525/// //   int i, n;
526/// //   n = loadInt(S);
527/// //   f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
528/// //   f->sizeupvalues = n;
529/// //   for (i = 0; i < n; i++)
530/// //     f->upvalues[i].name = NULL;  /* make array valid for GC */
531/// //   for (i = 0; i < n; i++) {
532/// //     f->upvalues[i].instack = loadByte(S);
533/// //     f->upvalues[i].idx    = loadByte(S);
534/// //     f->upvalues[i].kind   = loadByte(S);
535/// //   }
536/// // }
537/// ```
538///
539/// PORT NOTE: The C comment says names must be filled first for GC safety.
540/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
541/// in [`load_debug`].  This requires `UpvalDesc.name` to be
542/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
543/// types.tsv.  Phase B should reconcile the types.tsv entry.
544///
545/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
546fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
547    let n = load_int(s)? as usize;
548    // TODO(port): add overflow / OOM check.
549
550    // In Rust: construct with name = None.
551
552    let mut upvalues = Vec::with_capacity(n);
553    for _ in 0..n {
554        let instack_raw = load_byte(s)?;
555        let idx = load_byte(s)?;
556        let kind = load_byte(s)?;
557
558        // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
559        upvalues.push(UpvalDesc {
560            name: None,           // filled by load_debug
561            instack: instack_raw != 0,
562            idx,
563            kind,
564        });
565    }
566
567    f.upvalues = upvalues;
568    Ok(())
569}
570
571/// Load debug information into a prototype.
572///
573/// # C source
574/// ```c
575///
576/// //   int i, n;
577/// //   n = loadInt(S);
578/// //   f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
579/// //   f->sizelineinfo = n;
580/// //   loadVector(S, f->lineinfo, n);
581/// //   n = loadInt(S);
582/// //   f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
583/// //   f->sizeabslineinfo = n;
584/// //   for (i = 0; i < n; i++) {
585/// //     f->abslineinfo[i].pc   = loadInt(S);
586/// //     f->abslineinfo[i].line = loadInt(S);
587/// //   }
588/// //   n = loadInt(S);
589/// //   f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
590/// //   f->sizelocvars = n;
591/// //   for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
592/// //   for (i = 0; i < n; i++) {
593/// //     f->locvars[i].varname = loadStringN(S, f);
594/// //     f->locvars[i].startpc = loadInt(S);
595/// //     f->locvars[i].endpc   = loadInt(S);
596/// //   }
597/// //   n = loadInt(S);
598/// //   if (n != 0)  /* does it have debug information? */
599/// //     n = f->sizeupvalues;  /* must be this many */
600/// //   for (i = 0; i < n; i++)
601/// //     f->upvalues[i].name = loadStringN(S, f);
602/// // }
603/// ```
604///
605/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
606/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
607/// We read them as `u8` then reinterpret as `i8` via cast.
608///
609/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
610/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
611///
612/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
613/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
614/// See also the note on [`load_upvalues`].
615fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
616    let n = load_int(s)? as usize;
617    let mut lineinfo = vec![0i8; n];
618    // Read as u8 slice then cast — safe because i8 and u8 have the same
619    // in-memory representation and we're casting a byte from the binary stream.
620    // SAFETY(port): this would need `unsafe` for the slice transmute in real
621    // code; for Phase A we read byte-by-byte.
622    // TODO(port): replace the loop with a single load_block into a u8 buffer
623    //             followed by an i8 transmute in Phase B (or use bytemuck).
624    for item in lineinfo.iter_mut() {
625        *item = load_byte(s)? as i8;
626    }
627    f.lineinfo = lineinfo;
628
629    let n = load_int(s)? as usize;
630    let mut abslineinfo = Vec::with_capacity(n);
631    for _ in 0..n {
632        abslineinfo.push(AbsLineInfo {
633            pc: load_int(s)?,
634            line: load_int(s)?,
635        });
636    }
637    f.abslineinfo = abslineinfo;
638
639    let n = load_int(s)? as usize;
640
641    let mut locvars = Vec::with_capacity(n);
642    for _ in 0..n {
643        let varname = load_string_n(s, f)?;
644        let startpc = load_int(s)?;
645        let endpc = load_int(s)?;
646        let varname = match varname {
647            Some(v) => v,
648            None => s.state.new_string(b"")?,
649        };
650        locvars.push(LocalVar { varname, startpc, endpc });
651    }
652    f.locvars = locvars;
653
654    // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
655    let has_names = load_int(s)?;
656    if has_names != 0 {
657        let n_upvals = f.upvalues.len();
658        for i in 0..n_upvals {
659            let name = load_string_n(s, f)?;
660            f.upvalues[i].name = name;
661        }
662    }
663
664    Ok(())
665}
666
667// ── Function loader ────────────────────────────────────────────────────────
668
669/// Load a complete function prototype from the stream.
670///
671/// # C source
672/// ```c
673///
674/// //   f->source = loadStringN(S, f);
675/// //   if (f->source == NULL) f->source = psource;
676/// //   f->linedefined    = loadInt(S);
677/// //   f->lastlinedefined = loadInt(S);
678/// //   f->numparams   = loadByte(S);
679/// //   f->is_vararg   = loadByte(S);
680/// //   f->maxstacksize = loadByte(S);
681/// //   loadCode(S, f);
682/// //   loadConstants(S, f);
683/// //   loadUpvalues(S, f);
684/// //   loadProtos(S, f);
685/// //   loadDebug(S, f);
686/// // }
687/// ```
688///
689/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
690/// the top-level call passes `NULL` (mapped to `None`).  `f->source` in `LuaProto`
691/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
692/// `Option<GcRef<LuaString>>` to express "inherited from parent".  Phase B
693/// should align types.tsv or add a dedicated `Option` wrapper there.
694///
695/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
696/// types.tsv.  We read the raw byte and convert to `bool` via `!= 0`.
697fn load_function(
698    s: &mut LoadState<'_>,
699    f: &mut LuaProto,
700    psource: Option<GcRef<LuaString>>,
701) -> Result<(), LuaError> {
702    let source = load_string_n(s, f)?;
703    f.source = source.or(psource);
704
705    f.linedefined = load_int(s)?;
706    f.lastlinedefined = load_int(s)?;
707    f.numparams = load_byte(s)?;
708    // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
709    f.is_vararg = load_byte(s)? != 0;
710    f.maxstacksize = load_byte(s)?;
711    load_code(s, f)?;
712    reconstruct_vararg_table_reg(f);
713    load_constants(s, f)?;
714    load_upvalues(s, f)?;
715    load_protos(s, f)?;
716    load_debug(s, f)?;
717
718    Ok(())
719}
720
721/// Recover `LuaProto.vararg_table_reg` from the loaded bytecode instead of from
722/// the wire format, so a precompiled chunk keeps Lua 5.5 named-vararg aliasing
723/// (`function f(...t)`) without lua-rs's `string.dump` output diverging from
724/// C's bytecode layout (which the structural oracle compares).
725///
726/// A named-vararg function emits exactly one `OP_VARARGPACK` (opcode 84) at
727/// entry; its A operand is the register holding the shared vararg table. The
728/// opcode occupies the low 7 bits of the instruction word and A the next 8.
729fn reconstruct_vararg_table_reg(f: &mut LuaProto) {
730    const OP_VARARGPACK: u32 = 84;
731    const OPCODE_MASK: u32 = 0x7F;
732    f.vararg_table_reg = f.code.iter().find_map(|inst| {
733        let raw = inst.raw();
734        (raw & OPCODE_MASK == OP_VARARGPACK).then(|| ((raw >> 7) & 0xFF) as u8)
735    });
736}
737
738// ── Header validation ──────────────────────────────────────────────────────
739
740/// Verify that the next `expected.len()` bytes in the stream match `expected`.
741///
742/// # C source
743/// ```c
744///
745/// //   char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
746/// //   size_t len = strlen(s);
747/// //   loadVector(S, buff, len);
748/// //   if (memcmp(s, buff, len) != 0)
749/// //     error(S, msg);
750/// // }
751/// ```
752///
753/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
754/// `memcmp` becomes slice equality.
755fn check_literal(
756    s: &mut LoadState<'_>,
757    expected: &[u8],
758    msg: &'static str,
759) -> Result<(), LuaError> {
760    let mut buf = vec![0u8; expected.len()];
761    load_block(s, &mut buf)?;
762    if buf != expected {
763        return Err(load_error(s, msg));
764    }
765    Ok(())
766}
767
768/// Verify that the next byte in the stream equals `expected_size`.
769///
770/// # C source
771/// ```c
772///
773/// //   if (loadByte(S) != size)
774/// //     error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
775/// // }
776/// ```
777///
778/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
779/// a throw site.  We inline the message directly.  `tname` is always a Rust
780/// type-name string literal (ASCII) from the call sites; using `&'static str`
781/// is appropriate here (not Lua data).
782fn fcheck_size(
783    s: &mut LoadState<'_>,
784    expected_size: usize,
785    tname: &'static str,
786) -> Result<(), LuaError> {
787    let b = load_byte(s)? as usize;
788    if b != expected_size {
789        // PORT NOTE: We build the error message inline rather than using
790        // luaO_pushfstring to avoid a stack push just for error formatting.
791        // TODO(port): include `tname` in the error message once LuaError::syntax
792        // supports composing byte-string and &str fragments.
793        return Err(LuaError::syntax(format_args!(
794            "{} size mismatch",
795            tname
796        )));
797    }
798    Ok(())
799}
800
801/// Validate the binary chunk header.
802///
803/// # C source
804/// ```c
805///
806/// //   checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
807/// //   if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
808/// //   if (loadByte(S) != LUAC_FORMAT)  error(S, "format mismatch");
809/// //   checkliteral(S, LUAC_DATA, "corrupted chunk");
810/// //   checksize(S, Instruction);
811/// //   checksize(S, lua_Integer);
812/// //   checksize(S, lua_Number);
813/// //   if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
814/// //   if (loadNumber(S)  != LUAC_NUM) error(S, "float format mismatch");
815/// // }
816/// ```
817///
818/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
819/// We emit the three concrete sizes inline.
820/// - `sizeof(Instruction)` = 4 (u32)
821/// - `sizeof(lua_Integer)` = 8 (i64)
822/// - `sizeof(lua_Number)` = 8 (f64)
823///
824/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
825/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
826/// of the signature (`"Lua"`).
827fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
828    // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
829    check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
830
831    let ver = load_byte(s)?;
832    if ver != LUAC_VERSION {
833        return Err(load_error(s, "version mismatch"));
834    }
835
836    let fmt = load_byte(s)?;
837    if fmt != LUAC_FORMAT {
838        return Err(load_error(s, "format mismatch"));
839    }
840
841    check_literal(s, LUAC_DATA, "corrupted chunk")?;
842
843    fcheck_size(s, 4, "Instruction")?;
844
845    fcheck_size(s, 8, "lua_Integer")?;
846
847    fcheck_size(s, 8, "lua_Number")?;
848
849    let int_check = load_integer(s)?;
850    if int_check != LUAC_INT {
851        return Err(load_error(s, "integer format mismatch"));
852    }
853
854    let num_check = load_number(s)?;
855    if num_check != LUAC_NUM {
856        return Err(load_error(s, "float format mismatch"));
857    }
858
859    Ok(())
860}
861
862// ── Public entry point ─────────────────────────────────────────────────────
863
864/// Load a precompiled Lua chunk and return the top-level Lua closure.
865///
866/// This is the Rust equivalent of `luaU_undump` — the single public function
867/// exported by `lundump.c`.
868///
869/// # C source
870/// ```c
871///
872/// //   LoadState S;
873/// //   LClosure *cl;
874/// //   if (*name == '@' || *name == '=')
875/// //     S.name = name + 1;
876/// //   else if (*name == LUA_SIGNATURE[0])
877/// //     S.name = "binary string";
878/// //   else
879/// //     S.name = name;
880/// //   S.L = L; S.Z = Z;
881/// //   checkHeader(&S);
882/// //   cl = luaF_newLclosure(L, loadByte(&S));
883/// //   setclLvalue2s(L, L->top.p, cl);
884/// //   luaD_inctop(L);
885/// //   cl->p = luaF_newproto(L);
886/// //   luaC_objbarrier(L, cl, cl->p);
887/// //   loadFunction(&S, cl->p, NULL);
888/// //   lua_assert(cl->nupvalues == cl->p->sizeupvalues);
889/// //   luai_verifycode(L, cl->p);
890/// //   return cl;
891/// // }
892/// ```
893///
894/// # Parameters
895/// - `state` — the Lua thread state.
896/// - `z` — input stream positioned at the start of the binary chunk
897///   (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
898/// - `name` — chunk name for error messages.  Stripped per Lua convention:
899///   - `@…` → filename (strip `@`)
900///   - `=…` → literal name (strip `=`)
901///   - starts with `\x1b` → `"binary string"`
902///   - otherwise used as-is.
903///
904/// PORT NOTE: The C function returns `LClosure *`.  In Rust we return
905/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`).  The
906/// closure is also pushed onto the stack for GC anchoring, matching the C
907/// behaviour (`setclLvalue2s + luaD_inctop`).  The caller is responsible for
908/// popping it when done (consistent with C).
909///
910/// PORT NOTE: `luai_verifycode` is a no-op in the default build
911/// (`#define luai_verifycode(L,f)  /* empty */`); dropped here.
912///
913/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
914/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
915/// assertion becomes `debug_assert_eq!`.
916pub(crate) fn undump(
917    state: &mut LuaState,
918    z: &mut ZIO,
919    _name: &[u8],
920) -> Result<GcRef<LuaLClosure>, LuaError> {
921    let mut s = LoadState {
922        state,
923        z,
924    };
925
926    check_header(&mut s)?;
927
928    // loadByte(&S) reads the number of upvalues for the top-level closure.
929    let nupvalues = load_byte(&mut s)?;
930    // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
931    // upvalue slots.  In Rust Phase A we construct the struct directly; the
932    // GcRef wrapping happens after the proto is loaded.
933    // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
934    // once lfunc.rs is translated and the API is settled.
935    let mut cl = LuaLClosure::placeholder();
936    let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
937    for _ in 0..nupvalues as usize {
938        upvals_vec.push(std::cell::Cell::new(s.state.new_upval_closed(LuaValue::Nil)));
939    }
940    cl.upvals = upvals_vec;
941
942    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
943    // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
944    // PORT NOTE: We push a placeholder Nil first; the real closure value is
945    // set after the proto is loaded.  This mirrors the C "anchor for GC"
946    // pattern.  In Phase A-C GC anchoring via the stack is not strictly
947    // necessary (Rc keeps things alive) but we preserve the stack discipline
948    // for behavioural parity.
949    // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
950    // the real value here instead of a placeholder.
951    s.state.push(LuaValue::Nil); // placeholder; replaced below
952
953    let mut proto = LuaProto::placeholder();
954
955    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
956
957    load_function(&mut s, &mut proto, None)?;
958
959    // Wrap the proto in a GcRef and attach it to the closure.
960    // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
961    let proto_ref = GcRef::new(proto);
962    proto_ref.account_buffer(proto_ref.buffer_bytes() as isize);
963
964    // macros.tsv: lua_assert → debug_assert!
965    // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
966    debug_assert_eq!(
967        nupvalues as usize,
968        proto_ref.upvalues.len(),
969        "upvalue count mismatch between closure header and prototype"
970    );
971
972    // The macro is defined as `/* empty */` in the default build; dropped.
973
974    // Attach the loaded proto to the closure.
975    cl.proto = proto_ref;
976
977    // Wrap the closure in GcRef.
978    // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
979    let cl_ref = GcRef::new(cl);
980    cl_ref.account_buffer(cl_ref.buffer_bytes() as isize);
981
982    // Replace the stack placeholder with the real closure value.
983    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
984    // TODO(port): replace the placeholder at the correct stack slot.
985    // For now the top slot holds Nil; Phase B must fix this once
986    // GcRef<LuaLClosure> → LuaValue conversion is defined.
987    // TODO(port): update the stack slot pushed above with the real cl_ref value.
988
989    Ok(cl_ref)
990}
991
992// ──────────────────────────────────────────────────────────────────────────
993// PORT STATUS
994//   source:        src/lundump.c  (335 lines, 20 functions/items)
995//                  src/lundump.h  (35 lines, merged)
996//   target_crate:  lua-vm
997//   confidence:    medium
998//   todos:         15
999//   port_notes:    39
1000//   unsafe_blocks: 0   (must be 0 outside explicit unsafe-budget crates)
1001//   notes:         Logic is faithful to the C.  The main open items for Phase B
1002//                  are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
1003//                  (2) LuaError::syntax byte-string formatting for the chunk
1004//                  name in load_error; (3) long-string vs short-string intern
1005//                  distinction in load_string_n; (4) the stack placeholder in
1006//                  undump must be replaced with the real GcRef<LuaLClosure>
1007//                  value once LuaValue conversion is defined; (5) UpvalDesc.name
1008//                  and LocalVar.varname need Option<GcRef<LuaString>> in the
1009//                  proto type to match the two-pass load order here.
1010// ──────────────────────────────────────────────────────────────────────────