Skip to main content

lua_vm/
undump.rs

1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B.  These are best-guess paths based on other translated files.
11use crate::state::LuaState;
12#[allow(unused_imports)] use crate::prelude::*;
13use crate::zio::ZIO;
14use lua_types::error::LuaError;
15use lua_types::value::LuaValue;
16
17// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
18// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
19// crates.  All paths below are provisional for Phase A.
20use lua_types::proto::{LuaProto, UpvalDesc, LocalVar, AbsLineInfo};
21use lua_types::closure::LuaLClosure;
22use lua_types::string::LuaString;
23use lua_types::gc::GcRef;
24use lua_types::opcode::Instruction;
25
26// ── Constants (from lundump.h) ─────────────────────────────────────────────
27
28/// Six-byte data marker in the chunk header used to catch conversion errors.
29const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
30
31/// Reference integer written in the header to detect integer endianness/size
32/// mismatches.
33const LUAC_INT: i64 = 0x5678;
34
35// macros.tsv: cast_num → x as f64
36/// Reference float written in the header to detect float format mismatches.
37const LUAC_NUM: f64 = 370.5;
38
39// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
40/// One-byte version tag: upper nibble = major, lower nibble = minor.
41const LUAC_VERSION: u8 = 0x54;
42
43const LUAC_FORMAT: u8 = 0;
44
45const LUA_SIGNATURE: &[u8] = b"\x1bLua";
46
47// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
48const MAX_SHORT_LEN: usize = 40;
49
50// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
51//
52// These are the byte values written by ldump.c into the constants array.
53// makevariant(t, v) = t | (v << 4).
54//
55// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
56// values* used in the binary format are the raw tag integers from lobject.h.
57// We define them here as u8 constants so the match in load_constants is
58// self-documenting.
59
60const TAG_NIL: u8 = 0x00;
61const TAG_FALSE: u8 = 0x01;
62const TAG_TRUE: u8 = 0x11;
63const TAG_INT: u8 = 0x03;
64const TAG_FLOAT: u8 = 0x13;
65const TAG_SHORT_STR: u8 = 0x04;
66const TAG_LONG_STR: u8 = 0x14;
67
68// ── LoadState ──────────────────────────────────────────────────────────────
69
70/// Loader state bundled for convenience: Lua state, input stream, and the
71/// chunk name used in error messages.
72///
73/// # C mapping
74/// ```c
75///
76/// ```
77///
78/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
79/// In Rust these become references with a shared lifetime `'a`.  The struct is
80/// always stack-allocated inside [`undump`] and never escapes the call.
81struct LoadState<'a> {
82    state: &'a mut LuaState,
83    z: &'a mut ZIO,
84}
85
86// ── Error helper ───────────────────────────────────────────────────────────
87
88/// Build a syntax error for a malformed binary chunk.
89///
90/// # C source
91/// ```c
92///
93/// //   luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
94/// //   luaD_throw(S->L, LUA_ERRSYNTAX);
95/// // }
96/// ```
97///
98/// PORT NOTE: `l_noret` in C (diverges via `longjmp`).  In Rust we return
99/// `LuaError` and the caller does `return Err(load_error(...))`.  The C
100/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
101/// single `LuaError::syntax` per error_sites.tsv.
102///
103/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
104/// which requires an `std::fmt::Display` implementor.  `Vec<u8>` does not
105/// implement `Display`.  Phase B should add a byte-string formatting path to
106/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
107/// in the message.
108fn load_error(_s: &LoadState<'_>, why: &'static str) -> LuaError {
109    LuaError::syntax(format_args!("bad binary format ({})", why))
110}
111
112// ── Low-level I/O ──────────────────────────────────────────────────────────
113
114/// Read exactly `buf.len()` bytes from the stream into `buf`.
115///
116/// # C source
117/// ```c
118///
119/// //   if (luaZ_read(S->Z, b, size) != 0)
120/// //     error(S, "truncated chunk");
121/// // }
122/// ```
123///
124/// PORT NOTE: C takes `void *b` + explicit `size`.  In Rust we use `&mut [u8]`
125/// whose length encodes the byte count.  `luaZ_read` returns the number of
126/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
127fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
128    // macros.tsv: luaZ_read → z.read(buf)  (returns usize unread)
129    if s.z.read(buf) != 0 {
130        return Err(load_error(s, "truncated chunk"));
131    }
132    Ok(())
133}
134
135/// Read a single byte from the stream.
136///
137/// # C source
138/// ```c
139///
140/// //   int b = zgetc(S->Z);
141/// //   if (b == EOZ)
142/// //     error(S, "truncated chunk");
143/// //   return cast_byte(b);
144/// // }
145/// ```
146///
147/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
148fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
149    // macros.tsv: zgetc → z.getc()  returning i32
150    let b = s.z.getc();
151    if b == crate::zio::EOZ {
152        return Err(load_error(s, "truncated chunk"));
153    }
154    // macros.tsv: cast_byte → x as u8
155    Ok(b as u8)
156}
157
158/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
159/// MSB-first continuation flag).
160///
161/// # C source
162/// ```c
163///
164/// //   size_t x = 0;
165/// //   int b;
166/// //   limit >>= 7;
167/// //   do {
168/// //     b = loadByte(S);
169/// //     if (x >= limit)
170/// //       error(S, "integer overflow");
171/// //     x = (x << 7) | (b & 0x7f);
172/// //   } while ((b & 0x80) == 0);
173/// //   return x;
174/// // }
175/// ```
176///
177/// PORT NOTE: The encoding terminates when a byte with the high bit set is
178/// seen (the *last* byte has bit 7 = 1).  That is the opposite of the more
179/// common LEB128 where the continuation bit means "more follows".
180fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
181    let mut x: usize = 0;
182    let limit = limit >> 7;
183    loop {
184        let b = load_byte(s)? as usize;
185        if x >= limit {
186            return Err(load_error(s, "integer overflow"));
187        }
188        x = (x << 7) | (b & 0x7f);
189        if (b & 0x80) != 0 {
190            break;
191        }
192    }
193    Ok(x)
194}
195
196/// Read a `size_t`-sized unsigned value.
197///
198/// # C source
199/// ```c
200///
201/// //   return loadUnsigned(S, MAX_SIZET);
202/// // }
203/// ```
204///
205/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
206fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
207    // macros.tsv: MAX_SIZET → usize::MAX
208    load_unsigned(s, usize::MAX)
209}
210
211/// Read a signed `int`-sized value.
212///
213/// # C source
214/// ```c
215///
216/// //   return cast_int(loadUnsigned(S, INT_MAX));
217/// // }
218/// ```
219///
220/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv.  `INT_MAX` → `i32::MAX
221/// as usize`.
222fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
223    // macros.tsv: cast_int → x as i32
224    let v = load_unsigned(s, i32::MAX as usize)?;
225    Ok(v as i32)
226}
227
228/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
229///
230/// # C source
231/// ```c
232///
233/// //   lua_Number x;
234/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
235/// //   return x;
236/// // }
237/// ```
238///
239/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
240/// into the value.  In Rust we use `f64::from_ne_bytes` (native endian) to
241/// reconstruct the value from the eight bytes.  The binary format is host-
242/// endian for these fields; the header check verifies endianness compatibility
243/// via `LUAC_INT` and `LUAC_NUM` sentinels.
244fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
245    let mut buf = [0u8; 8];
246    load_block(s, &mut buf)?;
247    // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
248    Ok(f64::from_ne_bytes(buf))
249}
250
251/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
252///
253/// # C source
254/// ```c
255///
256/// //   lua_Integer x;
257/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
258/// //   return x;
259/// // }
260/// ```
261///
262/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
263fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
264    let mut buf = [0u8; 8];
265    load_block(s, &mut buf)?;
266    Ok(i64::from_ne_bytes(buf))
267}
268
269// ── String loading ─────────────────────────────────────────────────────────
270
271/// Load a nullable string.  Returns `None` if the stored size is zero.
272///
273/// # C source
274/// ```c
275///
276/// //   lua_State *L = S->L;
277/// //   TString *ts;
278/// //   size_t size = loadSize(S);
279/// //   if (size == 0) return NULL;
280/// //   else if (--size <= LUAI_MAXSHORTLEN) {  /* short string? */
281/// //     char buff[LUAI_MAXSHORTLEN];
282/// //     loadVector(S, buff, size);
283/// //     ts = luaS_newlstr(L, buff, size);
284/// //   } else {  /* long string */
285/// //     ts = luaS_createlngstrobj(L, size);
286/// //     setsvalue2s(L, L->top.p, ts);  /* anchor it (loadVector can GC) */
287/// //     luaD_inctop(L);
288/// //     loadVector(S, getlngstr(ts), size);
289/// //     L->top.p--;
290/// //   }
291/// //   luaC_objbarrier(L, p, ts);
292/// //   return ts;
293/// // }
294/// ```
295///
296/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
297/// is the null-string sentinel.  After reading `raw_size`, the actual byte
298/// count is `raw_size - 1`.
299///
300/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
301/// and then filled in-place via `getlngstr`.  In Rust, GC anchoring is not
302/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
303/// then create the string.
304///
305/// TODO(port): `luaS_newlstr` interns the string (short strings only);
306/// `luaS_createlngstrobj` does NOT intern.  Phase A uses `state.intern_str()`
307/// for both.  Phase B should add a `state.create_long_str()` path that skips
308/// the intern table, matching C semantics.
309///
310/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
311/// for `luaC_objbarrier(L, p, ts)`.  The barrier is a no-op in Phase A–C
312/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
313fn load_string_n(
314    s: &mut LoadState<'_>,
315    _proto: &LuaProto,
316) -> Result<Option<GcRef<LuaString>>, LuaError> {
317    let raw_size = load_size(s)?;
318    if raw_size == 0 {
319        return Ok(None);
320    }
321    let size = raw_size - 1;
322
323    // Read the raw bytes regardless of short/long distinction.
324    let mut buf = vec![0u8; size];
325
326    if size <= MAX_SHORT_LEN {
327        load_block(s, &mut buf)?;
328    } else {
329        load_block(s, &mut buf)?;
330    }
331
332    // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
333    // TODO(port): long strings should not be interned; see doc-comment above.
334    let ts = s.state.intern_str(&buf)?;
335
336    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
337    // (dropped — Phase A GC is Rc, no barrier needed)
338
339    Ok(Some(ts))
340}
341
342/// Load a non-nullable string; error if the stream encodes a null string.
343///
344/// # C source
345/// ```c
346///
347/// //   TString *st = loadStringN(S, p);
348/// //   if (st == NULL)
349/// //     error(S, "bad format for constant string");
350/// //   return st;
351/// // }
352/// ```
353fn load_string(
354    s: &mut LoadState<'_>,
355    proto: &LuaProto,
356) -> Result<GcRef<LuaString>, LuaError> {
357    match load_string_n(s, proto)? {
358        Some(ts) => Ok(ts),
359        None => Err(load_error(s, "bad format for constant string")),
360    }
361}
362
363// ── Proto-field loaders ────────────────────────────────────────────────────
364
365/// Load the bytecode instruction array into a prototype.
366///
367/// # C source
368/// ```c
369///
370/// //   int n = loadInt(S);
371/// //   f->code = luaM_newvectorchecked(S->L, n, Instruction);
372/// //   f->sizecode = n;
373/// //   loadVector(S, f->code, n);
374/// // }
375/// ```
376///
377/// PORT NOTE: `loadVector(S, f->code, n)` expands to
378/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
379/// We read each `u32` in native-endian order, consistent with how
380/// [`load_number`] and [`load_integer`] work.
381///
382/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
383/// (types.tsv: `Proto.sizecode → removed`).
384fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
385    let n = load_int(s)? as usize;
386    // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
387    // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
388    // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
389    let mut code = Vec::with_capacity(n);
390    for _ in 0..n {
391        let mut buf = [0u8; 4];
392        load_block(s, &mut buf)?;
393        // Instruction is a u32 newtype per types.tsv
394        code.push(Instruction(u32::from_ne_bytes(buf)));
395    }
396    f.code = code;
397    Ok(())
398}
399
400/// Load the constant pool into a prototype.
401///
402/// # C source
403/// ```c
404///
405/// //   int i; int n = loadInt(S);
406/// //   f->k = luaM_newvectorchecked(S->L, n, TValue);
407/// //   f->sizek = n;
408/// //   for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
409/// //   for (i = 0; i < n; i++) {
410/// //     TValue *o = &f->k[i];
411/// //     int t = loadByte(S);
412/// //     switch (t) {
413/// //       case LUA_VNIL:    setnilvalue(o); break;
414/// //       case LUA_VFALSE:  setbfvalue(o); break;
415/// //       case LUA_VTRUE:   setbtvalue(o); break;
416/// //       case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
417/// //       case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
418/// //       case LUA_VSHRSTR:
419/// //       case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
420/// //       default: lua_assert(0);
421/// //     }
422/// //   }
423/// // }
424/// ```
425///
426/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
427/// safety in C.  In Rust, `Vec` is always in a valid state; we skip it.
428fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
429    let n = load_int(s)? as usize;
430    // TODO(port): add overflow / OOM check.
431    let mut k = Vec::with_capacity(n);
432
433    // Dropped — Rust Vec elements are never uninitialized.
434
435    for _ in 0..n {
436        let t = load_byte(s)?;
437        let val = match t {
438            // macros.tsv: setnilvalue → *o = LuaValue::Nil
439            TAG_NIL => LuaValue::Nil,
440
441            // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
442            TAG_FALSE => LuaValue::Bool(false),
443
444            // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
445            TAG_TRUE => LuaValue::Bool(true),
446
447            // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
448            TAG_FLOAT => LuaValue::Float(load_number(s)?),
449
450            // macros.tsv: setivalue → *o = LuaValue::Int(x)
451            TAG_INT => LuaValue::Int(load_integer(s)?),
452
453            // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
454            TAG_SHORT_STR | TAG_LONG_STR => {
455                let ts = load_string(s, f)?;
456                LuaValue::Str(ts)
457            }
458
459            // macros.tsv: lua_assert → debug_assert!
460            _ => {
461                debug_assert!(false, "unknown constant type tag {:#04x}", t);
462                LuaValue::Nil
463            }
464        };
465        k.push(val);
466    }
467
468    f.k = k;
469    Ok(())
470}
471
472/// Load nested function prototypes into a prototype.
473///
474/// # C source
475/// ```c
476///
477/// //   int i; int n = loadInt(S);
478/// //   f->p = luaM_newvectorchecked(S->L, n, Proto *);
479/// //   f->sizep = n;
480/// //   for (i = 0; i < n; i++) f->p[i] = NULL;
481/// //   for (i = 0; i < n; i++) {
482/// //     f->p[i] = luaF_newproto(S->L);
483/// //     luaC_objbarrier(S->L, f, f->p[i]);
484/// //     loadFunction(S, f->p[i], f->source);
485/// //   }
486/// // }
487/// ```
488///
489/// PORT NOTE: C creates the proto first (for GC anchor) then fills it.  In
490/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
491/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
492fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
493    let n = load_int(s)? as usize;
494    // TODO(port): add overflow / OOM check.
495    let mut protos = Vec::with_capacity(n);
496
497
498    for _ in 0..n {
499        let mut sub = LuaProto::placeholder();
500
501        // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
502
503        // Pass parent source as fallback.
504        let parent_source = f.source.clone();
505        load_function(s, &mut sub, parent_source)?;
506
507        // Wrap in GcRef after loading.
508        // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
509        // In Rust Phase A it becomes Rc<LuaProto>.
510        // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
511        protos.push(GcRef::new(sub));
512    }
513
514    f.p = protos;
515    Ok(())
516}
517
518/// Load upvalue descriptors into a prototype.
519///
520/// # C source
521/// ```c
522///
523/// //   int i, n;
524/// //   n = loadInt(S);
525/// //   f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
526/// //   f->sizeupvalues = n;
527/// //   for (i = 0; i < n; i++)
528/// //     f->upvalues[i].name = NULL;  /* make array valid for GC */
529/// //   for (i = 0; i < n; i++) {
530/// //     f->upvalues[i].instack = loadByte(S);
531/// //     f->upvalues[i].idx    = loadByte(S);
532/// //     f->upvalues[i].kind   = loadByte(S);
533/// //   }
534/// // }
535/// ```
536///
537/// PORT NOTE: The C comment says names must be filled first for GC safety.
538/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
539/// in [`load_debug`].  This requires `UpvalDesc.name` to be
540/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
541/// types.tsv.  Phase B should reconcile the types.tsv entry.
542///
543/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
544fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
545    let n = load_int(s)? as usize;
546    // TODO(port): add overflow / OOM check.
547
548    // In Rust: construct with name = None.
549
550    let mut upvalues = Vec::with_capacity(n);
551    for _ in 0..n {
552        let instack_raw = load_byte(s)?;
553        let idx = load_byte(s)?;
554        let kind = load_byte(s)?;
555
556        // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
557        upvalues.push(UpvalDesc {
558            name: None,           // filled by load_debug
559            instack: instack_raw != 0,
560            idx,
561            kind,
562        });
563    }
564
565    f.upvalues = upvalues;
566    Ok(())
567}
568
569/// Load debug information into a prototype.
570///
571/// # C source
572/// ```c
573///
574/// //   int i, n;
575/// //   n = loadInt(S);
576/// //   f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
577/// //   f->sizelineinfo = n;
578/// //   loadVector(S, f->lineinfo, n);
579/// //   n = loadInt(S);
580/// //   f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
581/// //   f->sizeabslineinfo = n;
582/// //   for (i = 0; i < n; i++) {
583/// //     f->abslineinfo[i].pc   = loadInt(S);
584/// //     f->abslineinfo[i].line = loadInt(S);
585/// //   }
586/// //   n = loadInt(S);
587/// //   f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
588/// //   f->sizelocvars = n;
589/// //   for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
590/// //   for (i = 0; i < n; i++) {
591/// //     f->locvars[i].varname = loadStringN(S, f);
592/// //     f->locvars[i].startpc = loadInt(S);
593/// //     f->locvars[i].endpc   = loadInt(S);
594/// //   }
595/// //   n = loadInt(S);
596/// //   if (n != 0)  /* does it have debug information? */
597/// //     n = f->sizeupvalues;  /* must be this many */
598/// //   for (i = 0; i < n; i++)
599/// //     f->upvalues[i].name = loadStringN(S, f);
600/// // }
601/// ```
602///
603/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
604/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
605/// We read them as `u8` then reinterpret as `i8` via cast.
606///
607/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
608/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
609///
610/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
611/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
612/// See also the note on [`load_upvalues`].
613fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
614    let n = load_int(s)? as usize;
615    let mut lineinfo = vec![0i8; n];
616    // Read as u8 slice then cast — safe because i8 and u8 have the same
617    // in-memory representation and we're casting a byte from the binary stream.
618    // SAFETY(port): this would need `unsafe` for the slice transmute in real
619    // code; for Phase A we read byte-by-byte.
620    // TODO(port): replace the loop with a single load_block into a u8 buffer
621    //             followed by an i8 transmute in Phase B (or use bytemuck).
622    for item in lineinfo.iter_mut() {
623        *item = load_byte(s)? as i8;
624    }
625    f.lineinfo = lineinfo;
626
627    let n = load_int(s)? as usize;
628    let mut abslineinfo = Vec::with_capacity(n);
629    for _ in 0..n {
630        abslineinfo.push(AbsLineInfo {
631            pc: load_int(s)?,
632            line: load_int(s)?,
633        });
634    }
635    f.abslineinfo = abslineinfo;
636
637    let n = load_int(s)? as usize;
638
639    let mut locvars = Vec::with_capacity(n);
640    for _ in 0..n {
641        let varname = load_string_n(s, f)?;
642        let startpc = load_int(s)?;
643        let endpc = load_int(s)?;
644        let varname = match varname {
645            Some(v) => v,
646            None => s.state.new_string(b"")?,
647        };
648        locvars.push(LocalVar { varname, startpc, endpc });
649    }
650    f.locvars = locvars;
651
652    // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
653    let has_names = load_int(s)?;
654    if has_names != 0 {
655        let n_upvals = f.upvalues.len();
656        for i in 0..n_upvals {
657            let name = load_string_n(s, f)?;
658            f.upvalues[i].name = name;
659        }
660    }
661
662    Ok(())
663}
664
665// ── Function loader ────────────────────────────────────────────────────────
666
667/// Load a complete function prototype from the stream.
668///
669/// # C source
670/// ```c
671///
672/// //   f->source = loadStringN(S, f);
673/// //   if (f->source == NULL) f->source = psource;
674/// //   f->linedefined    = loadInt(S);
675/// //   f->lastlinedefined = loadInt(S);
676/// //   f->numparams   = loadByte(S);
677/// //   f->is_vararg   = loadByte(S);
678/// //   f->maxstacksize = loadByte(S);
679/// //   loadCode(S, f);
680/// //   loadConstants(S, f);
681/// //   loadUpvalues(S, f);
682/// //   loadProtos(S, f);
683/// //   loadDebug(S, f);
684/// // }
685/// ```
686///
687/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
688/// the top-level call passes `NULL` (mapped to `None`).  `f->source` in `LuaProto`
689/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
690/// `Option<GcRef<LuaString>>` to express "inherited from parent".  Phase B
691/// should align types.tsv or add a dedicated `Option` wrapper there.
692///
693/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
694/// types.tsv.  We read the raw byte and convert to `bool` via `!= 0`.
695fn load_function(
696    s: &mut LoadState<'_>,
697    f: &mut LuaProto,
698    psource: Option<GcRef<LuaString>>,
699) -> Result<(), LuaError> {
700    let source = load_string_n(s, f)?;
701    f.source = source.or(psource);
702
703    f.linedefined = load_int(s)?;
704    f.lastlinedefined = load_int(s)?;
705    f.numparams = load_byte(s)?;
706    // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
707    f.is_vararg = load_byte(s)? != 0;
708    f.maxstacksize = load_byte(s)?;
709    load_code(s, f)?;
710    load_constants(s, f)?;
711    load_upvalues(s, f)?;
712    load_protos(s, f)?;
713    load_debug(s, f)?;
714
715    Ok(())
716}
717
718// ── Header validation ──────────────────────────────────────────────────────
719
720/// Verify that the next `expected.len()` bytes in the stream match `expected`.
721///
722/// # C source
723/// ```c
724///
725/// //   char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
726/// //   size_t len = strlen(s);
727/// //   loadVector(S, buff, len);
728/// //   if (memcmp(s, buff, len) != 0)
729/// //     error(S, msg);
730/// // }
731/// ```
732///
733/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
734/// `memcmp` becomes slice equality.
735fn check_literal(
736    s: &mut LoadState<'_>,
737    expected: &[u8],
738    msg: &'static str,
739) -> Result<(), LuaError> {
740    let mut buf = vec![0u8; expected.len()];
741    load_block(s, &mut buf)?;
742    if buf != expected {
743        return Err(load_error(s, msg));
744    }
745    Ok(())
746}
747
748/// Verify that the next byte in the stream equals `expected_size`.
749///
750/// # C source
751/// ```c
752///
753/// //   if (loadByte(S) != size)
754/// //     error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
755/// // }
756/// ```
757///
758/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
759/// a throw site.  We inline the message directly.  `tname` is always a Rust
760/// type-name string literal (ASCII) from the call sites; using `&'static str`
761/// is appropriate here (not Lua data).
762fn fcheck_size(
763    s: &mut LoadState<'_>,
764    expected_size: usize,
765    tname: &'static str,
766) -> Result<(), LuaError> {
767    let b = load_byte(s)? as usize;
768    if b != expected_size {
769        // PORT NOTE: We build the error message inline rather than using
770        // luaO_pushfstring to avoid a stack push just for error formatting.
771        // TODO(port): include `tname` in the error message once LuaError::syntax
772        // supports composing byte-string and &str fragments.
773        return Err(LuaError::syntax(format_args!(
774            "{} size mismatch",
775            tname
776        )));
777    }
778    Ok(())
779}
780
781/// Validate the binary chunk header.
782///
783/// # C source
784/// ```c
785///
786/// //   checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
787/// //   if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
788/// //   if (loadByte(S) != LUAC_FORMAT)  error(S, "format mismatch");
789/// //   checkliteral(S, LUAC_DATA, "corrupted chunk");
790/// //   checksize(S, Instruction);
791/// //   checksize(S, lua_Integer);
792/// //   checksize(S, lua_Number);
793/// //   if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
794/// //   if (loadNumber(S)  != LUAC_NUM) error(S, "float format mismatch");
795/// // }
796/// ```
797///
798/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
799/// We emit the three concrete sizes inline.
800/// - `sizeof(Instruction)` = 4 (u32)
801/// - `sizeof(lua_Integer)` = 8 (i64)
802/// - `sizeof(lua_Number)` = 8 (f64)
803///
804/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
805/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
806/// of the signature (`"Lua"`).
807fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
808    // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
809    check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
810
811    let ver = load_byte(s)?;
812    if ver != LUAC_VERSION {
813        return Err(load_error(s, "version mismatch"));
814    }
815
816    let fmt = load_byte(s)?;
817    if fmt != LUAC_FORMAT {
818        return Err(load_error(s, "format mismatch"));
819    }
820
821    check_literal(s, LUAC_DATA, "corrupted chunk")?;
822
823    fcheck_size(s, 4, "Instruction")?;
824
825    fcheck_size(s, 8, "lua_Integer")?;
826
827    fcheck_size(s, 8, "lua_Number")?;
828
829    let int_check = load_integer(s)?;
830    if int_check != LUAC_INT {
831        return Err(load_error(s, "integer format mismatch"));
832    }
833
834    let num_check = load_number(s)?;
835    if num_check != LUAC_NUM {
836        return Err(load_error(s, "float format mismatch"));
837    }
838
839    Ok(())
840}
841
842// ── Public entry point ─────────────────────────────────────────────────────
843
844/// Load a precompiled Lua chunk and return the top-level Lua closure.
845///
846/// This is the Rust equivalent of `luaU_undump` — the single public function
847/// exported by `lundump.c`.
848///
849/// # C source
850/// ```c
851///
852/// //   LoadState S;
853/// //   LClosure *cl;
854/// //   if (*name == '@' || *name == '=')
855/// //     S.name = name + 1;
856/// //   else if (*name == LUA_SIGNATURE[0])
857/// //     S.name = "binary string";
858/// //   else
859/// //     S.name = name;
860/// //   S.L = L; S.Z = Z;
861/// //   checkHeader(&S);
862/// //   cl = luaF_newLclosure(L, loadByte(&S));
863/// //   setclLvalue2s(L, L->top.p, cl);
864/// //   luaD_inctop(L);
865/// //   cl->p = luaF_newproto(L);
866/// //   luaC_objbarrier(L, cl, cl->p);
867/// //   loadFunction(&S, cl->p, NULL);
868/// //   lua_assert(cl->nupvalues == cl->p->sizeupvalues);
869/// //   luai_verifycode(L, cl->p);
870/// //   return cl;
871/// // }
872/// ```
873///
874/// # Parameters
875/// - `state` — the Lua thread state.
876/// - `z` — input stream positioned at the start of the binary chunk
877///   (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
878/// - `name` — chunk name for error messages.  Stripped per Lua convention:
879///   - `@…` → filename (strip `@`)
880///   - `=…` → literal name (strip `=`)
881///   - starts with `\x1b` → `"binary string"`
882///   - otherwise used as-is.
883///
884/// PORT NOTE: The C function returns `LClosure *`.  In Rust we return
885/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`).  The
886/// closure is also pushed onto the stack for GC anchoring, matching the C
887/// behaviour (`setclLvalue2s + luaD_inctop`).  The caller is responsible for
888/// popping it when done (consistent with C).
889///
890/// PORT NOTE: `luai_verifycode` is a no-op in the default build
891/// (`#define luai_verifycode(L,f)  /* empty */`); dropped here.
892///
893/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
894/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
895/// assertion becomes `debug_assert_eq!`.
896pub(crate) fn undump(
897    state: &mut LuaState,
898    z: &mut ZIO,
899    _name: &[u8],
900) -> Result<GcRef<LuaLClosure>, LuaError> {
901    let mut s = LoadState {
902        state,
903        z,
904    };
905
906    check_header(&mut s)?;
907
908    // loadByte(&S) reads the number of upvalues for the top-level closure.
909    let nupvalues = load_byte(&mut s)?;
910    // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
911    // upvalue slots.  In Rust Phase A we construct the struct directly; the
912    // GcRef wrapping happens after the proto is loaded.
913    // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
914    // once lfunc.rs is translated and the API is settled.
915    let mut cl = LuaLClosure::placeholder();
916    let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
917    for _ in 0..nupvalues as usize {
918        upvals_vec.push(std::cell::Cell::new(s.state.new_upval_closed(LuaValue::Nil)));
919    }
920    cl.upvals = upvals_vec;
921
922    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
923    // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
924    // PORT NOTE: We push a placeholder Nil first; the real closure value is
925    // set after the proto is loaded.  This mirrors the C "anchor for GC"
926    // pattern.  In Phase A-C GC anchoring via the stack is not strictly
927    // necessary (Rc keeps things alive) but we preserve the stack discipline
928    // for behavioural parity.
929    // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
930    // the real value here instead of a placeholder.
931    s.state.push(LuaValue::Nil); // placeholder; replaced below
932
933    let mut proto = LuaProto::placeholder();
934
935    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
936
937    load_function(&mut s, &mut proto, None)?;
938
939    // Wrap the proto in a GcRef and attach it to the closure.
940    // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
941    let proto_ref = GcRef::new(proto);
942
943    // macros.tsv: lua_assert → debug_assert!
944    // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
945    debug_assert_eq!(
946        nupvalues as usize,
947        proto_ref.upvalues.len(),
948        "upvalue count mismatch between closure header and prototype"
949    );
950
951    // The macro is defined as `/* empty */` in the default build; dropped.
952
953    // Attach the loaded proto to the closure.
954    cl.proto = proto_ref;
955
956    // Wrap the closure in GcRef.
957    // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
958    let cl_ref = GcRef::new(cl);
959
960    // Replace the stack placeholder with the real closure value.
961    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
962    // TODO(port): replace the placeholder at the correct stack slot.
963    // For now the top slot holds Nil; Phase B must fix this once
964    // GcRef<LuaLClosure> → LuaValue conversion is defined.
965    // TODO(port): update the stack slot pushed above with the real cl_ref value.
966
967    Ok(cl_ref)
968}
969
970// ──────────────────────────────────────────────────────────────────────────
971// PORT STATUS
972//   source:        src/lundump.c  (335 lines, 20 functions/items)
973//                  src/lundump.h  (35 lines, merged)
974//   target_crate:  lua-vm
975//   confidence:    medium
976//   todos:         15
977//   port_notes:    39
978//   unsafe_blocks: 0   (must be 0 outside explicit unsafe-budget crates)
979//   notes:         Logic is faithful to the C.  The main open items for Phase B
980//                  are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
981//                  (2) LuaError::syntax byte-string formatting for the chunk
982//                  name in load_error; (3) long-string vs short-string intern
983//                  distinction in load_string_n; (4) the stack placeholder in
984//                  undump must be replaced with the real GcRef<LuaLClosure>
985//                  value once LuaValue conversion is defined; (5) UpvalDesc.name
986//                  and LocalVar.varname need Option<GcRef<LuaString>> in the
987//                  proto type to match the two-pass load order here.
988// ──────────────────────────────────────────────────────────────────────────