Skip to main content

lua_vm/
undump.rs

1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B.  These are best-guess paths based on other translated files.
11use crate::state::LuaState;
12#[allow(unused_imports)] use crate::prelude::*;
13use crate::zio::ZIO;
14use lua_types::error::LuaError;
15use lua_types::value::LuaValue;
16
17// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
18// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
19// crates.  All paths below are provisional for Phase A.
20// TODO(port): confirm concrete module paths for all GC types in Phase B.
21use lua_types::proto::{LuaProto, UpvalDesc, LocalVar, AbsLineInfo};
22use lua_types::closure::{LuaClosure, LuaLClosure};
23use lua_types::upval::UpVal;
24use lua_types::string::LuaString;
25use lua_types::gc::GcRef;
26use lua_types::opcode::Instruction;
27
28// ── Constants (from lundump.h) ─────────────────────────────────────────────
29
30/// Six-byte data marker in the chunk header used to catch conversion errors.
31const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
32
33/// Reference integer written in the header to detect integer endianness/size
34/// mismatches.
35const LUAC_INT: i64 = 0x5678;
36
37// macros.tsv: cast_num → x as f64
38/// Reference float written in the header to detect float format mismatches.
39const LUAC_NUM: f64 = 370.5;
40
41// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
42/// One-byte version tag: upper nibble = major, lower nibble = minor.
43const LUAC_VERSION: u8 = 0x54;
44
45const LUAC_FORMAT: u8 = 0;
46
47const LUA_SIGNATURE: &[u8] = b"\x1bLua";
48
49// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
50const MAX_SHORT_LEN: usize = 40;
51
52// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
53//
54// These are the byte values written by ldump.c into the constants array.
55// makevariant(t, v) = t | (v << 4).
56//
57// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
58// values* used in the binary format are the raw tag integers from lobject.h.
59// We define them here as u8 constants so the match in load_constants is
60// self-documenting.
61
62const TAG_NIL: u8 = 0x00;
63const TAG_FALSE: u8 = 0x01;
64const TAG_TRUE: u8 = 0x11;
65const TAG_INT: u8 = 0x03;
66const TAG_FLOAT: u8 = 0x13;
67const TAG_SHORT_STR: u8 = 0x04;
68const TAG_LONG_STR: u8 = 0x14;
69
70// ── LoadState ──────────────────────────────────────────────────────────────
71
72/// Loader state bundled for convenience: Lua state, input stream, and the
73/// chunk name used in error messages.
74///
75/// # C mapping
76/// ```c
77///
78/// ```
79///
80/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
81/// In Rust these become references with a shared lifetime `'a`.  The struct is
82/// always stack-allocated inside [`undump`] and never escapes the call.
83struct LoadState<'a> {
84    state: &'a mut LuaState,
85    z: &'a mut ZIO,
86    // PORT NOTE: C uses const char * (a C string). In Rust we own a Vec<u8>
87    // because the name slice may be a sub-slice of the caller's &[u8].
88    name: Vec<u8>,
89}
90
91// ── Error helper ───────────────────────────────────────────────────────────
92
93/// Build a syntax error for a malformed binary chunk.
94///
95/// # C source
96/// ```c
97///
98/// //   luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
99/// //   luaD_throw(S->L, LUA_ERRSYNTAX);
100/// // }
101/// ```
102///
103/// PORT NOTE: `l_noret` in C (diverges via `longjmp`).  In Rust we return
104/// `LuaError` and the caller does `return Err(load_error(...))`.  The C
105/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
106/// single `LuaError::syntax` per error_sites.tsv.
107///
108/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
109/// which requires an `std::fmt::Display` implementor.  `Vec<u8>` does not
110/// implement `Display`.  Phase B should add a byte-string formatting path to
111/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
112/// in the message.
113fn load_error(s: &LoadState<'_>, why: &'static str) -> LuaError {
114    // error_sites.tsv: luaD_throw(L, LUA_ERRSYNTAX) → LuaError::syntax(...)
115    LuaError::syntax(format_args!("bad binary format ({})", why))
116}
117
118// ── Low-level I/O ──────────────────────────────────────────────────────────
119
120/// Read exactly `buf.len()` bytes from the stream into `buf`.
121///
122/// # C source
123/// ```c
124///
125/// //   if (luaZ_read(S->Z, b, size) != 0)
126/// //     error(S, "truncated chunk");
127/// // }
128/// ```
129///
130/// PORT NOTE: C takes `void *b` + explicit `size`.  In Rust we use `&mut [u8]`
131/// whose length encodes the byte count.  `luaZ_read` returns the number of
132/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
133fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
134    // macros.tsv: luaZ_read → z.read(buf)  (returns usize unread)
135    if s.z.read(buf) != 0 {
136        return Err(load_error(s, "truncated chunk"));
137    }
138    Ok(())
139}
140
141/// Read a single byte from the stream.
142///
143/// # C source
144/// ```c
145///
146/// //   int b = zgetc(S->Z);
147/// //   if (b == EOZ)
148/// //     error(S, "truncated chunk");
149/// //   return cast_byte(b);
150/// // }
151/// ```
152///
153/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
154fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
155    // macros.tsv: zgetc → z.getc()  returning i32
156    let b = s.z.getc();
157    if b == crate::zio::EOZ {
158        return Err(load_error(s, "truncated chunk"));
159    }
160    // macros.tsv: cast_byte → x as u8
161    Ok(b as u8)
162}
163
164/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
165/// MSB-first continuation flag).
166///
167/// # C source
168/// ```c
169///
170/// //   size_t x = 0;
171/// //   int b;
172/// //   limit >>= 7;
173/// //   do {
174/// //     b = loadByte(S);
175/// //     if (x >= limit)
176/// //       error(S, "integer overflow");
177/// //     x = (x << 7) | (b & 0x7f);
178/// //   } while ((b & 0x80) == 0);
179/// //   return x;
180/// // }
181/// ```
182///
183/// PORT NOTE: The encoding terminates when a byte with the high bit set is
184/// seen (the *last* byte has bit 7 = 1).  That is the opposite of the more
185/// common LEB128 where the continuation bit means "more follows".
186fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
187    let mut x: usize = 0;
188    let limit = limit >> 7;
189    loop {
190        let b = load_byte(s)? as usize;
191        if x >= limit {
192            return Err(load_error(s, "integer overflow"));
193        }
194        x = (x << 7) | (b & 0x7f);
195        if (b & 0x80) != 0 {
196            break;
197        }
198    }
199    Ok(x)
200}
201
202/// Read a `size_t`-sized unsigned value.
203///
204/// # C source
205/// ```c
206///
207/// //   return loadUnsigned(S, MAX_SIZET);
208/// // }
209/// ```
210///
211/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
212fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
213    // macros.tsv: MAX_SIZET → usize::MAX
214    load_unsigned(s, usize::MAX)
215}
216
217/// Read a signed `int`-sized value.
218///
219/// # C source
220/// ```c
221///
222/// //   return cast_int(loadUnsigned(S, INT_MAX));
223/// // }
224/// ```
225///
226/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv.  `INT_MAX` → `i32::MAX
227/// as usize`.
228fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
229    // macros.tsv: cast_int → x as i32
230    let v = load_unsigned(s, i32::MAX as usize)?;
231    Ok(v as i32)
232}
233
234/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
235///
236/// # C source
237/// ```c
238///
239/// //   lua_Number x;
240/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
241/// //   return x;
242/// // }
243/// ```
244///
245/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
246/// into the value.  In Rust we use `f64::from_ne_bytes` (native endian) to
247/// reconstruct the value from the eight bytes.  The binary format is host-
248/// endian for these fields; the header check verifies endianness compatibility
249/// via `LUAC_INT` and `LUAC_NUM` sentinels.
250fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
251    let mut buf = [0u8; 8];
252    load_block(s, &mut buf)?;
253    // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
254    Ok(f64::from_ne_bytes(buf))
255}
256
257/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
258///
259/// # C source
260/// ```c
261///
262/// //   lua_Integer x;
263/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
264/// //   return x;
265/// // }
266/// ```
267///
268/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
269fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
270    let mut buf = [0u8; 8];
271    load_block(s, &mut buf)?;
272    Ok(i64::from_ne_bytes(buf))
273}
274
275// ── String loading ─────────────────────────────────────────────────────────
276
277/// Load a nullable string.  Returns `None` if the stored size is zero.
278///
279/// # C source
280/// ```c
281///
282/// //   lua_State *L = S->L;
283/// //   TString *ts;
284/// //   size_t size = loadSize(S);
285/// //   if (size == 0) return NULL;
286/// //   else if (--size <= LUAI_MAXSHORTLEN) {  /* short string? */
287/// //     char buff[LUAI_MAXSHORTLEN];
288/// //     loadVector(S, buff, size);
289/// //     ts = luaS_newlstr(L, buff, size);
290/// //   } else {  /* long string */
291/// //     ts = luaS_createlngstrobj(L, size);
292/// //     setsvalue2s(L, L->top.p, ts);  /* anchor it (loadVector can GC) */
293/// //     luaD_inctop(L);
294/// //     loadVector(S, getlngstr(ts), size);
295/// //     L->top.p--;
296/// //   }
297/// //   luaC_objbarrier(L, p, ts);
298/// //   return ts;
299/// // }
300/// ```
301///
302/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
303/// is the null-string sentinel.  After reading `raw_size`, the actual byte
304/// count is `raw_size - 1`.
305///
306/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
307/// and then filled in-place via `getlngstr`.  In Rust, GC anchoring is not
308/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
309/// then create the string.
310///
311/// TODO(port): `luaS_newlstr` interns the string (short strings only);
312/// `luaS_createlngstrobj` does NOT intern.  Phase A uses `state.intern_str()`
313/// for both.  Phase B should add a `state.create_long_str()` path that skips
314/// the intern table, matching C semantics.
315///
316/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
317/// for `luaC_objbarrier(L, p, ts)`.  The barrier is a no-op in Phase A–C
318/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
319fn load_string_n(
320    s: &mut LoadState<'_>,
321    _proto: &LuaProto,
322) -> Result<Option<GcRef<LuaString>>, LuaError> {
323    let raw_size = load_size(s)?;
324    if raw_size == 0 {
325        return Ok(None);
326    }
327    let size = raw_size - 1;
328
329    // Read the raw bytes regardless of short/long distinction.
330    let mut buf = vec![0u8; size];
331
332    if size <= MAX_SHORT_LEN {
333        load_block(s, &mut buf)?;
334    } else {
335        load_block(s, &mut buf)?;
336    }
337
338    // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
339    // TODO(port): long strings should not be interned; see doc-comment above.
340    let ts = s.state.intern_str(&buf)?;
341
342    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
343    // (dropped — Phase A GC is Rc, no barrier needed)
344
345    Ok(Some(ts))
346}
347
348/// Load a non-nullable string; error if the stream encodes a null string.
349///
350/// # C source
351/// ```c
352///
353/// //   TString *st = loadStringN(S, p);
354/// //   if (st == NULL)
355/// //     error(S, "bad format for constant string");
356/// //   return st;
357/// // }
358/// ```
359fn load_string(
360    s: &mut LoadState<'_>,
361    proto: &LuaProto,
362) -> Result<GcRef<LuaString>, LuaError> {
363    match load_string_n(s, proto)? {
364        Some(ts) => Ok(ts),
365        None => Err(load_error(s, "bad format for constant string")),
366    }
367}
368
369// ── Proto-field loaders ────────────────────────────────────────────────────
370
371/// Load the bytecode instruction array into a prototype.
372///
373/// # C source
374/// ```c
375///
376/// //   int n = loadInt(S);
377/// //   f->code = luaM_newvectorchecked(S->L, n, Instruction);
378/// //   f->sizecode = n;
379/// //   loadVector(S, f->code, n);
380/// // }
381/// ```
382///
383/// PORT NOTE: `loadVector(S, f->code, n)` expands to
384/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
385/// We read each `u32` in native-endian order, consistent with how
386/// [`load_number`] and [`load_integer`] work.
387///
388/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
389/// (types.tsv: `Proto.sizecode → removed`).
390fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
391    let n = load_int(s)? as usize;
392    // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
393    // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
394    // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
395    let mut code = Vec::with_capacity(n);
396    for _ in 0..n {
397        let mut buf = [0u8; 4];
398        load_block(s, &mut buf)?;
399        // Instruction is a u32 newtype per types.tsv
400        code.push(Instruction(u32::from_ne_bytes(buf)));
401    }
402    f.code = code;
403    Ok(())
404}
405
406/// Load the constant pool into a prototype.
407///
408/// # C source
409/// ```c
410///
411/// //   int i; int n = loadInt(S);
412/// //   f->k = luaM_newvectorchecked(S->L, n, TValue);
413/// //   f->sizek = n;
414/// //   for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
415/// //   for (i = 0; i < n; i++) {
416/// //     TValue *o = &f->k[i];
417/// //     int t = loadByte(S);
418/// //     switch (t) {
419/// //       case LUA_VNIL:    setnilvalue(o); break;
420/// //       case LUA_VFALSE:  setbfvalue(o); break;
421/// //       case LUA_VTRUE:   setbtvalue(o); break;
422/// //       case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
423/// //       case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
424/// //       case LUA_VSHRSTR:
425/// //       case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
426/// //       default: lua_assert(0);
427/// //     }
428/// //   }
429/// // }
430/// ```
431///
432/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
433/// safety in C.  In Rust, `Vec` is always in a valid state; we skip it.
434fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
435    let n = load_int(s)? as usize;
436    // TODO(port): add overflow / OOM check.
437    let mut k = Vec::with_capacity(n);
438
439    // Dropped — Rust Vec elements are never uninitialized.
440
441    for _ in 0..n {
442        let t = load_byte(s)?;
443        let val = match t {
444            // macros.tsv: setnilvalue → *o = LuaValue::Nil
445            TAG_NIL => LuaValue::Nil,
446
447            // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
448            TAG_FALSE => LuaValue::Bool(false),
449
450            // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
451            TAG_TRUE => LuaValue::Bool(true),
452
453            // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
454            TAG_FLOAT => LuaValue::Float(load_number(s)?),
455
456            // macros.tsv: setivalue → *o = LuaValue::Int(x)
457            TAG_INT => LuaValue::Int(load_integer(s)?),
458
459            // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
460            TAG_SHORT_STR | TAG_LONG_STR => {
461                let ts = load_string(s, f)?;
462                LuaValue::Str(ts)
463            }
464
465            // macros.tsv: lua_assert → debug_assert!
466            _ => {
467                debug_assert!(false, "unknown constant type tag {:#04x}", t);
468                LuaValue::Nil
469            }
470        };
471        k.push(val);
472    }
473
474    f.k = k;
475    Ok(())
476}
477
478/// Load nested function prototypes into a prototype.
479///
480/// # C source
481/// ```c
482///
483/// //   int i; int n = loadInt(S);
484/// //   f->p = luaM_newvectorchecked(S->L, n, Proto *);
485/// //   f->sizep = n;
486/// //   for (i = 0; i < n; i++) f->p[i] = NULL;
487/// //   for (i = 0; i < n; i++) {
488/// //     f->p[i] = luaF_newproto(S->L);
489/// //     luaC_objbarrier(S->L, f, f->p[i]);
490/// //     loadFunction(S, f->p[i], f->source);
491/// //   }
492/// // }
493/// ```
494///
495/// PORT NOTE: C creates the proto first (for GC anchor) then fills it.  In
496/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
497/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
498fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
499    let n = load_int(s)? as usize;
500    // TODO(port): add overflow / OOM check.
501    let mut protos = Vec::with_capacity(n);
502
503
504    for _ in 0..n {
505        let mut sub = LuaProto::placeholder();
506
507        // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
508
509        // Pass parent source as fallback.
510        let parent_source = f.source.clone();
511        load_function(s, &mut sub, parent_source)?;
512
513        // Wrap in GcRef after loading.
514        // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
515        // In Rust Phase A it becomes Rc<LuaProto>.
516        // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
517        protos.push(GcRef::new(sub));
518    }
519
520    f.p = protos;
521    Ok(())
522}
523
524/// Load upvalue descriptors into a prototype.
525///
526/// # C source
527/// ```c
528///
529/// //   int i, n;
530/// //   n = loadInt(S);
531/// //   f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
532/// //   f->sizeupvalues = n;
533/// //   for (i = 0; i < n; i++)
534/// //     f->upvalues[i].name = NULL;  /* make array valid for GC */
535/// //   for (i = 0; i < n; i++) {
536/// //     f->upvalues[i].instack = loadByte(S);
537/// //     f->upvalues[i].idx    = loadByte(S);
538/// //     f->upvalues[i].kind   = loadByte(S);
539/// //   }
540/// // }
541/// ```
542///
543/// PORT NOTE: The C comment says names must be filled first for GC safety.
544/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
545/// in [`load_debug`].  This requires `UpvalDesc.name` to be
546/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
547/// types.tsv.  Phase B should reconcile the types.tsv entry.
548///
549/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
550fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
551    let n = load_int(s)? as usize;
552    // TODO(port): add overflow / OOM check.
553
554    // In Rust: construct with name = None.
555
556    let mut upvalues = Vec::with_capacity(n);
557    for _ in 0..n {
558        let instack_raw = load_byte(s)?;
559        let idx = load_byte(s)?;
560        let kind = load_byte(s)?;
561
562        // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
563        upvalues.push(UpvalDesc {
564            name: None,           // filled by load_debug
565            instack: instack_raw != 0,
566            idx,
567            kind,
568        });
569    }
570
571    f.upvalues = upvalues;
572    Ok(())
573}
574
575/// Load debug information into a prototype.
576///
577/// # C source
578/// ```c
579///
580/// //   int i, n;
581/// //   n = loadInt(S);
582/// //   f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
583/// //   f->sizelineinfo = n;
584/// //   loadVector(S, f->lineinfo, n);
585/// //   n = loadInt(S);
586/// //   f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
587/// //   f->sizeabslineinfo = n;
588/// //   for (i = 0; i < n; i++) {
589/// //     f->abslineinfo[i].pc   = loadInt(S);
590/// //     f->abslineinfo[i].line = loadInt(S);
591/// //   }
592/// //   n = loadInt(S);
593/// //   f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
594/// //   f->sizelocvars = n;
595/// //   for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
596/// //   for (i = 0; i < n; i++) {
597/// //     f->locvars[i].varname = loadStringN(S, f);
598/// //     f->locvars[i].startpc = loadInt(S);
599/// //     f->locvars[i].endpc   = loadInt(S);
600/// //   }
601/// //   n = loadInt(S);
602/// //   if (n != 0)  /* does it have debug information? */
603/// //     n = f->sizeupvalues;  /* must be this many */
604/// //   for (i = 0; i < n; i++)
605/// //     f->upvalues[i].name = loadStringN(S, f);
606/// // }
607/// ```
608///
609/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
610/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
611/// We read them as `u8` then reinterpret as `i8` via cast.
612///
613/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
614/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
615///
616/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
617/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
618/// See also the note on [`load_upvalues`].
619fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
620    let n = load_int(s)? as usize;
621    let mut lineinfo = vec![0i8; n];
622    // Read as u8 slice then cast — safe because i8 and u8 have the same
623    // in-memory representation and we're casting a byte from the binary stream.
624    // SAFETY(port): this would need `unsafe` for the slice transmute in real
625    // code; for Phase A we read byte-by-byte.
626    // TODO(port): replace the loop with a single load_block into a u8 buffer
627    //             followed by an i8 transmute in Phase B (or use bytemuck).
628    for item in lineinfo.iter_mut() {
629        *item = load_byte(s)? as i8;
630    }
631    f.lineinfo = lineinfo;
632
633    let n = load_int(s)? as usize;
634    let mut abslineinfo = Vec::with_capacity(n);
635    for _ in 0..n {
636        abslineinfo.push(AbsLineInfo {
637            pc: load_int(s)?,
638            line: load_int(s)?,
639        });
640    }
641    f.abslineinfo = abslineinfo;
642
643    let n = load_int(s)? as usize;
644
645    let mut locvars = Vec::with_capacity(n);
646    for _ in 0..n {
647        let varname = load_string_n(s, f)?;
648        let startpc = load_int(s)?;
649        let endpc = load_int(s)?;
650        let varname = match varname {
651            Some(v) => v,
652            None => s.state.new_string(b"")?,
653        };
654        locvars.push(LocalVar { varname, startpc, endpc });
655    }
656    f.locvars = locvars;
657
658    // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
659    let has_names = load_int(s)?;
660    if has_names != 0 {
661        let n_upvals = f.upvalues.len();
662        for i in 0..n_upvals {
663            let name = load_string_n(s, f)?;
664            f.upvalues[i].name = name;
665        }
666    }
667
668    Ok(())
669}
670
671// ── Function loader ────────────────────────────────────────────────────────
672
673/// Load a complete function prototype from the stream.
674///
675/// # C source
676/// ```c
677///
678/// //   f->source = loadStringN(S, f);
679/// //   if (f->source == NULL) f->source = psource;
680/// //   f->linedefined    = loadInt(S);
681/// //   f->lastlinedefined = loadInt(S);
682/// //   f->numparams   = loadByte(S);
683/// //   f->is_vararg   = loadByte(S);
684/// //   f->maxstacksize = loadByte(S);
685/// //   loadCode(S, f);
686/// //   loadConstants(S, f);
687/// //   loadUpvalues(S, f);
688/// //   loadProtos(S, f);
689/// //   loadDebug(S, f);
690/// // }
691/// ```
692///
693/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
694/// the top-level call passes `NULL` (mapped to `None`).  `f->source` in `LuaProto`
695/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
696/// `Option<GcRef<LuaString>>` to express "inherited from parent".  Phase B
697/// should align types.tsv or add a dedicated `Option` wrapper there.
698///
699/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
700/// types.tsv.  We read the raw byte and convert to `bool` via `!= 0`.
701fn load_function(
702    s: &mut LoadState<'_>,
703    f: &mut LuaProto,
704    psource: Option<GcRef<LuaString>>,
705) -> Result<(), LuaError> {
706    let source = load_string_n(s, f)?;
707    f.source = source.or(psource);
708
709    f.linedefined = load_int(s)?;
710    f.lastlinedefined = load_int(s)?;
711    f.numparams = load_byte(s)?;
712    // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
713    f.is_vararg = load_byte(s)? != 0;
714    f.maxstacksize = load_byte(s)?;
715    load_code(s, f)?;
716    load_constants(s, f)?;
717    load_upvalues(s, f)?;
718    load_protos(s, f)?;
719    load_debug(s, f)?;
720
721    Ok(())
722}
723
724// ── Header validation ──────────────────────────────────────────────────────
725
726/// Verify that the next `expected.len()` bytes in the stream match `expected`.
727///
728/// # C source
729/// ```c
730///
731/// //   char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
732/// //   size_t len = strlen(s);
733/// //   loadVector(S, buff, len);
734/// //   if (memcmp(s, buff, len) != 0)
735/// //     error(S, msg);
736/// // }
737/// ```
738///
739/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
740/// `memcmp` becomes slice equality.
741fn check_literal(
742    s: &mut LoadState<'_>,
743    expected: &[u8],
744    msg: &'static str,
745) -> Result<(), LuaError> {
746    let mut buf = vec![0u8; expected.len()];
747    load_block(s, &mut buf)?;
748    if buf != expected {
749        return Err(load_error(s, msg));
750    }
751    Ok(())
752}
753
754/// Verify that the next byte in the stream equals `expected_size`.
755///
756/// # C source
757/// ```c
758///
759/// //   if (loadByte(S) != size)
760/// //     error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
761/// // }
762/// ```
763///
764/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
765/// a throw site.  We inline the message directly.  `tname` is always a Rust
766/// type-name string literal (ASCII) from the call sites; using `&'static str`
767/// is appropriate here (not Lua data).
768fn fcheck_size(
769    s: &mut LoadState<'_>,
770    expected_size: usize,
771    tname: &'static str,
772) -> Result<(), LuaError> {
773    let b = load_byte(s)? as usize;
774    if b != expected_size {
775        // PORT NOTE: We build the error message inline rather than using
776        // luaO_pushfstring to avoid a stack push just for error formatting.
777        // TODO(port): include `tname` in the error message once LuaError::syntax
778        // supports composing byte-string and &str fragments.
779        return Err(LuaError::syntax(format_args!(
780            "{} size mismatch",
781            tname
782        )));
783    }
784    Ok(())
785}
786
787/// Validate the binary chunk header.
788///
789/// # C source
790/// ```c
791///
792/// //   checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
793/// //   if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
794/// //   if (loadByte(S) != LUAC_FORMAT)  error(S, "format mismatch");
795/// //   checkliteral(S, LUAC_DATA, "corrupted chunk");
796/// //   checksize(S, Instruction);
797/// //   checksize(S, lua_Integer);
798/// //   checksize(S, lua_Number);
799/// //   if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
800/// //   if (loadNumber(S)  != LUAC_NUM) error(S, "float format mismatch");
801/// // }
802/// ```
803///
804/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
805/// We emit the three concrete sizes inline.
806/// - `sizeof(Instruction)` = 4 (u32)
807/// - `sizeof(lua_Integer)` = 8 (i64)
808/// - `sizeof(lua_Number)` = 8 (f64)
809///
810/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
811/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
812/// of the signature (`"Lua"`).
813fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
814    // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
815    check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
816
817    let ver = load_byte(s)?;
818    if ver != LUAC_VERSION {
819        return Err(load_error(s, "version mismatch"));
820    }
821
822    let fmt = load_byte(s)?;
823    if fmt != LUAC_FORMAT {
824        return Err(load_error(s, "format mismatch"));
825    }
826
827    check_literal(s, LUAC_DATA, "corrupted chunk")?;
828
829    fcheck_size(s, 4, "Instruction")?;
830
831    fcheck_size(s, 8, "lua_Integer")?;
832
833    fcheck_size(s, 8, "lua_Number")?;
834
835    let int_check = load_integer(s)?;
836    if int_check != LUAC_INT {
837        return Err(load_error(s, "integer format mismatch"));
838    }
839
840    let num_check = load_number(s)?;
841    if num_check != LUAC_NUM {
842        return Err(load_error(s, "float format mismatch"));
843    }
844
845    Ok(())
846}
847
848// ── Public entry point ─────────────────────────────────────────────────────
849
850/// Load a precompiled Lua chunk and return the top-level Lua closure.
851///
852/// This is the Rust equivalent of `luaU_undump` — the single public function
853/// exported by `lundump.c`.
854///
855/// # C source
856/// ```c
857///
858/// //   LoadState S;
859/// //   LClosure *cl;
860/// //   if (*name == '@' || *name == '=')
861/// //     S.name = name + 1;
862/// //   else if (*name == LUA_SIGNATURE[0])
863/// //     S.name = "binary string";
864/// //   else
865/// //     S.name = name;
866/// //   S.L = L; S.Z = Z;
867/// //   checkHeader(&S);
868/// //   cl = luaF_newLclosure(L, loadByte(&S));
869/// //   setclLvalue2s(L, L->top.p, cl);
870/// //   luaD_inctop(L);
871/// //   cl->p = luaF_newproto(L);
872/// //   luaC_objbarrier(L, cl, cl->p);
873/// //   loadFunction(&S, cl->p, NULL);
874/// //   lua_assert(cl->nupvalues == cl->p->sizeupvalues);
875/// //   luai_verifycode(L, cl->p);
876/// //   return cl;
877/// // }
878/// ```
879///
880/// # Parameters
881/// - `state` — the Lua thread state.
882/// - `z` — input stream positioned at the start of the binary chunk
883///   (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
884/// - `name` — chunk name for error messages.  Stripped per Lua convention:
885///   - `@…` → filename (strip `@`)
886///   - `=…` → literal name (strip `=`)
887///   - starts with `\x1b` → `"binary string"`
888///   - otherwise used as-is.
889///
890/// PORT NOTE: The C function returns `LClosure *`.  In Rust we return
891/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`).  The
892/// closure is also pushed onto the stack for GC anchoring, matching the C
893/// behaviour (`setclLvalue2s + luaD_inctop`).  The caller is responsible for
894/// popping it when done (consistent with C).
895///
896/// PORT NOTE: `luai_verifycode` is a no-op in the default build
897/// (`#define luai_verifycode(L,f)  /* empty */`); dropped here.
898///
899/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
900/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
901/// assertion becomes `debug_assert_eq!`.
902pub(crate) fn undump(
903    state: &mut LuaState,
904    z: &mut ZIO,
905    name: &[u8],
906) -> Result<GcRef<LuaLClosure>, LuaError> {
907    let display_name: Vec<u8> = if name.first() == Some(&b'@') || name.first() == Some(&b'=') {
908        // Strip the leading sigil character.
909        name[1..].to_vec()
910    } else if name.first() == Some(&LUA_SIGNATURE[0]) {
911        b"binary string".to_vec()
912    } else {
913        name.to_vec()
914    };
915
916    let mut s = LoadState {
917        state,
918        z,
919        name: display_name,
920    };
921
922    check_header(&mut s)?;
923
924    // loadByte(&S) reads the number of upvalues for the top-level closure.
925    let nupvalues = load_byte(&mut s)?;
926    // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
927    // upvalue slots.  In Rust Phase A we construct the struct directly; the
928    // GcRef wrapping happens after the proto is loaded.
929    // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
930    // once lfunc.rs is translated and the API is settled.
931    let mut cl = LuaLClosure::placeholder();
932    let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
933    for _ in 0..nupvalues as usize {
934        upvals_vec.push(std::cell::Cell::new(s.state.new_upval_closed(LuaValue::Nil)));
935    }
936    cl.upvals = upvals_vec;
937
938    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
939    // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
940    // PORT NOTE: We push a placeholder Nil first; the real closure value is
941    // set after the proto is loaded.  This mirrors the C "anchor for GC"
942    // pattern.  In Phase A-C GC anchoring via the stack is not strictly
943    // necessary (Rc keeps things alive) but we preserve the stack discipline
944    // for behavioural parity.
945    // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
946    // the real value here instead of a placeholder.
947    s.state.push(LuaValue::Nil); // placeholder; replaced below
948
949    let mut proto = LuaProto::placeholder();
950
951    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
952
953    load_function(&mut s, &mut proto, None)?;
954
955    // Wrap the proto in a GcRef and attach it to the closure.
956    // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
957    let proto_ref = GcRef::new(proto);
958
959    // macros.tsv: lua_assert → debug_assert!
960    // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
961    debug_assert_eq!(
962        nupvalues as usize,
963        proto_ref.upvalues.len(),
964        "upvalue count mismatch between closure header and prototype"
965    );
966
967    // The macro is defined as `/* empty */` in the default build; dropped.
968
969    // Attach the loaded proto to the closure.
970    cl.proto = proto_ref;
971
972    // Wrap the closure in GcRef.
973    // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
974    let cl_ref = GcRef::new(cl);
975
976    // Replace the stack placeholder with the real closure value.
977    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
978    // TODO(port): replace the placeholder at the correct stack slot.
979    // For now the top slot holds Nil; Phase B must fix this once
980    // GcRef<LuaLClosure> → LuaValue conversion is defined.
981    // TODO(port): update the stack slot pushed above with the real cl_ref value.
982
983    Ok(cl_ref)
984}
985
986// ──────────────────────────────────────────────────────────────────────────
987// PORT STATUS
988//   source:        src/lundump.c  (335 lines, 20 functions/items)
989//                  src/lundump.h  (35 lines, merged)
990//   target_crate:  lua-vm
991//   confidence:    medium
992//   todos:         15
993//   port_notes:    39
994//   unsafe_blocks: 0   (must be 0 outside explicit unsafe-budget crates)
995//   notes:         Logic is faithful to the C.  The main open items for Phase B
996//                  are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
997//                  (2) LuaError::syntax byte-string formatting for the chunk
998//                  name in load_error; (3) long-string vs short-string intern
999//                  distinction in load_string_n; (4) the stack placeholder in
1000//                  undump must be replaced with the real GcRef<LuaLClosure>
1001//                  value once LuaValue conversion is defined; (5) UpvalDesc.name
1002//                  and LocalVar.varname need Option<GcRef<LuaString>> in the
1003//                  proto type to match the two-pass load order here.
1004// ──────────────────────────────────────────────────────────────────────────