Skip to main content

lua_vm/
undump.rs

1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B.  These are best-guess paths based on other translated files.
11#[allow(unused_imports)]
12use crate::prelude::*;
13use crate::state::LuaState;
14use crate::zio::ZIO;
15use lua_types::error::LuaError;
16use lua_types::value::LuaValue;
17
18// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
19// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
20// crates.  All paths below are provisional for Phase A.
21use lua_types::closure::LuaLClosure;
22use lua_types::gc::GcRef;
23use lua_types::opcode::Instruction;
24use lua_types::proto::{AbsLineInfo, LocalVar, LuaProto, UpvalDesc};
25use lua_types::string::LuaString;
26use lua_types::LuaVersion;
27
28// ── Constants (from lundump.h) ─────────────────────────────────────────────
29
30/// Six-byte data marker in the chunk header used to catch conversion errors.
31const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
32
33/// Reference integer written in the header to detect integer endianness/size
34/// mismatches.
35const LUAC_INT: i64 = 0x5678;
36
37// macros.tsv: cast_num → x as f64
38/// Reference float written in the header to detect float format mismatches.
39const LUAC_NUM: f64 = 370.5;
40
41const LUAC_INT_55: i64 = -0x5678;
42
43const LUAC_INST_55: u32 = 0x12345678;
44
45const LUAC_NUM_55: f64 = -370.5;
46
47// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
48/// One-byte version tag: upper nibble = major, lower nibble = minor.
49const LUAC_VERSION_54: u8 = 0x54;
50const LUAC_VERSION_55: u8 = 0x55;
51
52const LUAC_FORMAT: u8 = 0;
53
54const LUA_SIGNATURE: &[u8] = b"\x1bLua";
55
56// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
57const MAX_SHORT_LEN: usize = 40;
58
59// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
60//
61// These are the byte values written by ldump.c into the constants array.
62// makevariant(t, v) = t | (v << 4).
63//
64// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
65// values* used in the binary format are the raw tag integers from lobject.h.
66// We define them here as u8 constants so the match in load_constants is
67// self-documenting.
68
69const TAG_NIL: u8 = 0x00;
70const TAG_FALSE: u8 = 0x01;
71const TAG_TRUE: u8 = 0x11;
72const TAG_INT: u8 = 0x03;
73const TAG_FLOAT: u8 = 0x13;
74const TAG_SHORT_STR: u8 = 0x04;
75const TAG_LONG_STR: u8 = 0x14;
76
77// ── LoadState ──────────────────────────────────────────────────────────────
78
79/// Loader state bundled for convenience: Lua state, input stream, and the
80/// chunk name used in error messages.
81///
82/// # C mapping
83/// ```c
84///
85/// ```
86///
87/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
88/// In Rust these become references with a shared lifetime `'a`.  The struct is
89/// always stack-allocated inside [`undump`] and never escapes the call.
90struct LoadState<'a> {
91    state: &'a mut LuaState,
92    z: &'a mut ZIO,
93}
94
95// ── Error helper ───────────────────────────────────────────────────────────
96
97/// Build a syntax error for a malformed binary chunk.
98///
99/// # C source
100/// ```c
101///
102/// //   luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
103/// //   luaD_throw(S->L, LUA_ERRSYNTAX);
104/// // }
105/// ```
106///
107/// PORT NOTE: `l_noret` in C (diverges via `longjmp`).  In Rust we return
108/// `LuaError` and the caller does `return Err(load_error(...))`.  The C
109/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
110/// single `LuaError::syntax` per error_sites.tsv.
111///
112/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
113/// which requires an `std::fmt::Display` implementor.  `Vec<u8>` does not
114/// implement `Display`.  Phase B should add a byte-string formatting path to
115/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
116/// in the message.
117fn load_error(_s: &LoadState<'_>, why: &'static str) -> LuaError {
118    LuaError::syntax(format_args!("bad binary format ({})", why))
119}
120
121// ── Low-level I/O ──────────────────────────────────────────────────────────
122
123/// Read exactly `buf.len()` bytes from the stream into `buf`.
124///
125/// # C source
126/// ```c
127///
128/// //   if (luaZ_read(S->Z, b, size) != 0)
129/// //     error(S, "truncated chunk");
130/// // }
131/// ```
132///
133/// PORT NOTE: C takes `void *b` + explicit `size`.  In Rust we use `&mut [u8]`
134/// whose length encodes the byte count.  `luaZ_read` returns the number of
135/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
136fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
137    // macros.tsv: luaZ_read → z.read(buf)  (returns usize unread)
138    if s.z.read(buf) != 0 {
139        return Err(load_error(s, "truncated chunk"));
140    }
141    Ok(())
142}
143
144/// Read a single byte from the stream.
145///
146/// # C source
147/// ```c
148///
149/// //   int b = zgetc(S->Z);
150/// //   if (b == EOZ)
151/// //     error(S, "truncated chunk");
152/// //   return cast_byte(b);
153/// // }
154/// ```
155///
156/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
157fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
158    // macros.tsv: zgetc → z.getc()  returning i32
159    let b = s.z.getc();
160    if b == crate::zio::EOZ {
161        return Err(load_error(s, "truncated chunk"));
162    }
163    // macros.tsv: cast_byte → x as u8
164    Ok(b as u8)
165}
166
167/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
168/// MSB-first continuation flag).
169///
170/// # C source
171/// ```c
172///
173/// //   size_t x = 0;
174/// //   int b;
175/// //   limit >>= 7;
176/// //   do {
177/// //     b = loadByte(S);
178/// //     if (x >= limit)
179/// //       error(S, "integer overflow");
180/// //     x = (x << 7) | (b & 0x7f);
181/// //   } while ((b & 0x80) == 0);
182/// //   return x;
183/// // }
184/// ```
185///
186/// PORT NOTE: The encoding terminates when a byte with the high bit set is
187/// seen (the *last* byte has bit 7 = 1).  That is the opposite of the more
188/// common LEB128 where the continuation bit means "more follows".
189fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
190    let mut x: usize = 0;
191    let limit = limit >> 7;
192    loop {
193        let b = load_byte(s)? as usize;
194        if x >= limit {
195            return Err(load_error(s, "integer overflow"));
196        }
197        x = (x << 7) | (b & 0x7f);
198        if (b & 0x80) != 0 {
199            break;
200        }
201    }
202    Ok(x)
203}
204
205/// Read a `size_t`-sized unsigned value.
206///
207/// # C source
208/// ```c
209///
210/// //   return loadUnsigned(S, MAX_SIZET);
211/// // }
212/// ```
213///
214/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
215fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
216    // macros.tsv: MAX_SIZET → usize::MAX
217    load_unsigned(s, usize::MAX)
218}
219
220/// Read a signed `int`-sized value.
221///
222/// # C source
223/// ```c
224///
225/// //   return cast_int(loadUnsigned(S, INT_MAX));
226/// // }
227/// ```
228///
229/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv.  `INT_MAX` → `i32::MAX
230/// as usize`.
231fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
232    // macros.tsv: cast_int → x as i32
233    let v = load_unsigned(s, i32::MAX as usize)?;
234    Ok(v as i32)
235}
236
237/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
238///
239/// # C source
240/// ```c
241///
242/// //   lua_Number x;
243/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
244/// //   return x;
245/// // }
246/// ```
247///
248/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
249/// into the value.  In Rust we use `f64::from_ne_bytes` (native endian) to
250/// reconstruct the value from the eight bytes.  The binary format is host-
251/// endian for these fields; the header check verifies endianness compatibility
252/// via `LUAC_INT` and `LUAC_NUM` sentinels.
253fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
254    let mut buf = [0u8; 8];
255    load_block(s, &mut buf)?;
256    // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
257    Ok(f64::from_ne_bytes(buf))
258}
259
260/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
261///
262/// # C source
263/// ```c
264///
265/// //   lua_Integer x;
266/// //   loadVar(S, x);   /* expands to loadBlock(S, &x, sizeof(x)) */
267/// //   return x;
268/// // }
269/// ```
270///
271/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
272fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
273    let mut buf = [0u8; 8];
274    load_block(s, &mut buf)?;
275    Ok(i64::from_ne_bytes(buf))
276}
277
278fn load_raw_i32(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
279    let mut buf = [0u8; 4];
280    load_block(s, &mut buf)?;
281    Ok(i32::from_ne_bytes(buf))
282}
283
284fn load_raw_u32(s: &mut LoadState<'_>) -> Result<u32, LuaError> {
285    let mut buf = [0u8; 4];
286    load_block(s, &mut buf)?;
287    Ok(u32::from_ne_bytes(buf))
288}
289
290// ── String loading ─────────────────────────────────────────────────────────
291
292/// Load a nullable string.  Returns `None` if the stored size is zero.
293///
294/// # C source
295/// ```c
296///
297/// //   lua_State *L = S->L;
298/// //   TString *ts;
299/// //   size_t size = loadSize(S);
300/// //   if (size == 0) return NULL;
301/// //   else if (--size <= LUAI_MAXSHORTLEN) {  /* short string? */
302/// //     char buff[LUAI_MAXSHORTLEN];
303/// //     loadVector(S, buff, size);
304/// //     ts = luaS_newlstr(L, buff, size);
305/// //   } else {  /* long string */
306/// //     ts = luaS_createlngstrobj(L, size);
307/// //     setsvalue2s(L, L->top.p, ts);  /* anchor it (loadVector can GC) */
308/// //     luaD_inctop(L);
309/// //     loadVector(S, getlngstr(ts), size);
310/// //     L->top.p--;
311/// //   }
312/// //   luaC_objbarrier(L, p, ts);
313/// //   return ts;
314/// // }
315/// ```
316///
317/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
318/// is the null-string sentinel.  After reading `raw_size`, the actual byte
319/// count is `raw_size - 1`.
320///
321/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
322/// and then filled in-place via `getlngstr`.  In Rust, GC anchoring is not
323/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
324/// then create the string.
325///
326/// TODO(port): `luaS_newlstr` interns the string (short strings only);
327/// `luaS_createlngstrobj` does NOT intern.  Phase A uses `state.intern_str()`
328/// for both.  Phase B should add a `state.create_long_str()` path that skips
329/// the intern table, matching C semantics.
330///
331/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
332/// for `luaC_objbarrier(L, p, ts)`.  The barrier is a no-op in Phase A–C
333/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
334fn load_string_n(
335    s: &mut LoadState<'_>,
336    _proto: &LuaProto,
337) -> Result<Option<GcRef<LuaString>>, LuaError> {
338    let raw_size = load_size(s)?;
339    if raw_size == 0 {
340        return Ok(None);
341    }
342    let size = raw_size - 1;
343
344    // Read the raw bytes regardless of short/long distinction.
345    let mut buf = vec![0u8; size];
346
347    if size <= MAX_SHORT_LEN {
348        load_block(s, &mut buf)?;
349    } else {
350        load_block(s, &mut buf)?;
351    }
352
353    // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
354    // TODO(port): long strings should not be interned; see doc-comment above.
355    let ts = s.state.intern_str(&buf)?;
356
357    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
358    // (dropped — Phase A GC is Rc, no barrier needed)
359
360    Ok(Some(ts))
361}
362
363/// Load a non-nullable string; error if the stream encodes a null string.
364///
365/// # C source
366/// ```c
367///
368/// //   TString *st = loadStringN(S, p);
369/// //   if (st == NULL)
370/// //     error(S, "bad format for constant string");
371/// //   return st;
372/// // }
373/// ```
374fn load_string(s: &mut LoadState<'_>, proto: &LuaProto) -> Result<GcRef<LuaString>, LuaError> {
375    match load_string_n(s, proto)? {
376        Some(ts) => Ok(ts),
377        None => Err(load_error(s, "bad format for constant string")),
378    }
379}
380
381// ── Proto-field loaders ────────────────────────────────────────────────────
382
383/// Load the bytecode instruction array into a prototype.
384///
385/// # C source
386/// ```c
387///
388/// //   int n = loadInt(S);
389/// //   f->code = luaM_newvectorchecked(S->L, n, Instruction);
390/// //   f->sizecode = n;
391/// //   loadVector(S, f->code, n);
392/// // }
393/// ```
394///
395/// PORT NOTE: `loadVector(S, f->code, n)` expands to
396/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
397/// We read each `u32` in native-endian order, consistent with how
398/// [`load_number`] and [`load_integer`] work.
399///
400/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
401/// (types.tsv: `Proto.sizecode → removed`).
402fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
403    let n = load_int(s)? as usize;
404    // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
405    // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
406    // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
407    let mut code = Vec::with_capacity(n);
408    for _ in 0..n {
409        let mut buf = [0u8; 4];
410        load_block(s, &mut buf)?;
411        // Instruction is a u32 newtype per types.tsv
412        code.push(Instruction(u32::from_ne_bytes(buf)));
413    }
414    f.code = code;
415    Ok(())
416}
417
418/// Load the constant pool into a prototype.
419///
420/// # C source
421/// ```c
422///
423/// //   int i; int n = loadInt(S);
424/// //   f->k = luaM_newvectorchecked(S->L, n, TValue);
425/// //   f->sizek = n;
426/// //   for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
427/// //   for (i = 0; i < n; i++) {
428/// //     TValue *o = &f->k[i];
429/// //     int t = loadByte(S);
430/// //     switch (t) {
431/// //       case LUA_VNIL:    setnilvalue(o); break;
432/// //       case LUA_VFALSE:  setbfvalue(o); break;
433/// //       case LUA_VTRUE:   setbtvalue(o); break;
434/// //       case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
435/// //       case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
436/// //       case LUA_VSHRSTR:
437/// //       case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
438/// //       default: lua_assert(0);
439/// //     }
440/// //   }
441/// // }
442/// ```
443///
444/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
445/// safety in C.  In Rust, `Vec` is always in a valid state; we skip it.
446fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
447    let n = load_int(s)? as usize;
448    // TODO(port): add overflow / OOM check.
449    let mut k = Vec::with_capacity(n);
450
451    // Dropped — Rust Vec elements are never uninitialized.
452
453    for _ in 0..n {
454        let t = load_byte(s)?;
455        let val = match t {
456            // macros.tsv: setnilvalue → *o = LuaValue::Nil
457            TAG_NIL => LuaValue::Nil,
458
459            // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
460            TAG_FALSE => LuaValue::Bool(false),
461
462            // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
463            TAG_TRUE => LuaValue::Bool(true),
464
465            // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
466            TAG_FLOAT => LuaValue::Float(load_number(s)?),
467
468            // macros.tsv: setivalue → *o = LuaValue::Int(x)
469            TAG_INT => LuaValue::Int(load_integer(s)?),
470
471            // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
472            TAG_SHORT_STR | TAG_LONG_STR => {
473                let ts = load_string(s, f)?;
474                LuaValue::Str(ts)
475            }
476
477            // macros.tsv: lua_assert → debug_assert!
478            _ => {
479                debug_assert!(false, "unknown constant type tag {:#04x}", t);
480                LuaValue::Nil
481            }
482        };
483        k.push(val);
484    }
485
486    f.k = k;
487    Ok(())
488}
489
490/// Load nested function prototypes into a prototype.
491///
492/// # C source
493/// ```c
494///
495/// //   int i; int n = loadInt(S);
496/// //   f->p = luaM_newvectorchecked(S->L, n, Proto *);
497/// //   f->sizep = n;
498/// //   for (i = 0; i < n; i++) f->p[i] = NULL;
499/// //   for (i = 0; i < n; i++) {
500/// //     f->p[i] = luaF_newproto(S->L);
501/// //     luaC_objbarrier(S->L, f, f->p[i]);
502/// //     loadFunction(S, f->p[i], f->source);
503/// //   }
504/// // }
505/// ```
506///
507/// PORT NOTE: C creates the proto first (for GC anchor) then fills it.  In
508/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
509/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
510fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
511    let n = load_int(s)? as usize;
512    // TODO(port): add overflow / OOM check.
513    let mut protos = Vec::with_capacity(n);
514
515    for _ in 0..n {
516        let mut sub = LuaProto::placeholder();
517
518        // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
519
520        // Pass parent source as fallback.
521        let parent_source = f.source.clone();
522        load_function(s, &mut sub, parent_source)?;
523
524        // Wrap in GcRef after loading.
525        // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
526        // In Rust Phase A it becomes Rc<LuaProto>.
527        // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
528        let sub_ref = GcRef::new(sub);
529        sub_ref.account_buffer(sub_ref.buffer_bytes() as isize);
530        protos.push(sub_ref);
531    }
532
533    f.p = protos;
534    Ok(())
535}
536
537/// Load upvalue descriptors into a prototype.
538///
539/// # C source
540/// ```c
541///
542/// //   int i, n;
543/// //   n = loadInt(S);
544/// //   f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
545/// //   f->sizeupvalues = n;
546/// //   for (i = 0; i < n; i++)
547/// //     f->upvalues[i].name = NULL;  /* make array valid for GC */
548/// //   for (i = 0; i < n; i++) {
549/// //     f->upvalues[i].instack = loadByte(S);
550/// //     f->upvalues[i].idx    = loadByte(S);
551/// //     f->upvalues[i].kind   = loadByte(S);
552/// //   }
553/// // }
554/// ```
555///
556/// PORT NOTE: The C comment says names must be filled first for GC safety.
557/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
558/// in [`load_debug`].  This requires `UpvalDesc.name` to be
559/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
560/// types.tsv.  Phase B should reconcile the types.tsv entry.
561///
562/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
563fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
564    let n = load_int(s)? as usize;
565    // TODO(port): add overflow / OOM check.
566
567    // In Rust: construct with name = None.
568
569    let mut upvalues = Vec::with_capacity(n);
570    for _ in 0..n {
571        let instack_raw = load_byte(s)?;
572        let idx = load_byte(s)?;
573        let kind = load_byte(s)?;
574
575        // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
576        upvalues.push(UpvalDesc {
577            name: None, // filled by load_debug
578            instack: instack_raw != 0,
579            idx,
580            kind,
581        });
582    }
583
584    f.upvalues = upvalues;
585    Ok(())
586}
587
588/// Load debug information into a prototype.
589///
590/// # C source
591/// ```c
592///
593/// //   int i, n;
594/// //   n = loadInt(S);
595/// //   f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
596/// //   f->sizelineinfo = n;
597/// //   loadVector(S, f->lineinfo, n);
598/// //   n = loadInt(S);
599/// //   f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
600/// //   f->sizeabslineinfo = n;
601/// //   for (i = 0; i < n; i++) {
602/// //     f->abslineinfo[i].pc   = loadInt(S);
603/// //     f->abslineinfo[i].line = loadInt(S);
604/// //   }
605/// //   n = loadInt(S);
606/// //   f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
607/// //   f->sizelocvars = n;
608/// //   for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
609/// //   for (i = 0; i < n; i++) {
610/// //     f->locvars[i].varname = loadStringN(S, f);
611/// //     f->locvars[i].startpc = loadInt(S);
612/// //     f->locvars[i].endpc   = loadInt(S);
613/// //   }
614/// //   n = loadInt(S);
615/// //   if (n != 0)  /* does it have debug information? */
616/// //     n = f->sizeupvalues;  /* must be this many */
617/// //   for (i = 0; i < n; i++)
618/// //     f->upvalues[i].name = loadStringN(S, f);
619/// // }
620/// ```
621///
622/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
623/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
624/// We read them as `u8` then reinterpret as `i8` via cast.
625///
626/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
627/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
628///
629/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
630/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
631/// See also the note on [`load_upvalues`].
632fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
633    let n = load_int(s)? as usize;
634    let mut lineinfo = vec![0i8; n];
635    // Read as u8 slice then cast — safe because i8 and u8 have the same
636    // in-memory representation and we're casting a byte from the binary stream.
637    // SAFETY(port): this would need `unsafe` for the slice transmute in real
638    // code; for Phase A we read byte-by-byte.
639    // TODO(port): replace the loop with a single load_block into a u8 buffer
640    //             followed by an i8 transmute in Phase B (or use bytemuck).
641    for item in lineinfo.iter_mut() {
642        *item = load_byte(s)? as i8;
643    }
644    f.lineinfo = lineinfo;
645
646    let n = load_int(s)? as usize;
647    let mut abslineinfo = Vec::with_capacity(n);
648    for _ in 0..n {
649        abslineinfo.push(AbsLineInfo {
650            pc: load_int(s)?,
651            line: load_int(s)?,
652        });
653    }
654    f.abslineinfo = abslineinfo;
655
656    let n = load_int(s)? as usize;
657
658    let mut locvars = Vec::with_capacity(n);
659    for _ in 0..n {
660        let varname = load_string_n(s, f)?;
661        let startpc = load_int(s)?;
662        let endpc = load_int(s)?;
663        let varname = match varname {
664            Some(v) => v,
665            None => s.state.new_string(b"")?,
666        };
667        locvars.push(LocalVar {
668            varname,
669            startpc,
670            endpc,
671        });
672    }
673    f.locvars = locvars;
674
675    // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
676    let has_names = load_int(s)?;
677    if has_names != 0 {
678        let n_upvals = f.upvalues.len();
679        for i in 0..n_upvals {
680            let name = load_string_n(s, f)?;
681            f.upvalues[i].name = name;
682        }
683    }
684
685    Ok(())
686}
687
688// ── Function loader ────────────────────────────────────────────────────────
689
690/// Load a complete function prototype from the stream.
691///
692/// # C source
693/// ```c
694///
695/// //   f->source = loadStringN(S, f);
696/// //   if (f->source == NULL) f->source = psource;
697/// //   f->linedefined    = loadInt(S);
698/// //   f->lastlinedefined = loadInt(S);
699/// //   f->numparams   = loadByte(S);
700/// //   f->is_vararg   = loadByte(S);
701/// //   f->maxstacksize = loadByte(S);
702/// //   loadCode(S, f);
703/// //   loadConstants(S, f);
704/// //   loadUpvalues(S, f);
705/// //   loadProtos(S, f);
706/// //   loadDebug(S, f);
707/// // }
708/// ```
709///
710/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
711/// the top-level call passes `NULL` (mapped to `None`).  `f->source` in `LuaProto`
712/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
713/// `Option<GcRef<LuaString>>` to express "inherited from parent".  Phase B
714/// should align types.tsv or add a dedicated `Option` wrapper there.
715///
716/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
717/// types.tsv.  We read the raw byte and convert to `bool` via `!= 0`.
718fn load_function(
719    s: &mut LoadState<'_>,
720    f: &mut LuaProto,
721    psource: Option<GcRef<LuaString>>,
722) -> Result<(), LuaError> {
723    let source = load_string_n(s, f)?;
724    f.source = source.or(psource);
725
726    f.linedefined = load_int(s)?;
727    f.lastlinedefined = load_int(s)?;
728    f.numparams = load_byte(s)?;
729    // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
730    f.is_vararg = load_byte(s)? != 0;
731    f.maxstacksize = load_byte(s)?;
732    load_code(s, f)?;
733    reconstruct_vararg_table_reg(f);
734    load_constants(s, f)?;
735    load_upvalues(s, f)?;
736    load_protos(s, f)?;
737    load_debug(s, f)?;
738
739    Ok(())
740}
741
742/// Recover `LuaProto.vararg_table_reg` from the loaded bytecode instead of from
743/// the wire format, so a precompiled chunk keeps Lua 5.5 named-vararg aliasing
744/// (`function f(...t)`) without lua-rs's `string.dump` output diverging from
745/// C's bytecode layout (which the structural oracle compares).
746///
747/// A named-vararg function emits exactly one `OP_VARARGPACK` (opcode 84) at
748/// entry; its A operand is the register holding the shared vararg table. Its
749/// k bit records whether the table must be materialized.
750fn reconstruct_vararg_table_reg(f: &mut LuaProto) {
751    const OP_VARARGPACK: u32 = 84;
752    const OPCODE_MASK: u32 = 0x7F;
753    const POS_K: u32 = 15;
754    if let Some((reg, needed)) = f.code.iter().find_map(|inst| {
755        let raw = inst.raw();
756        (raw & OPCODE_MASK == OP_VARARGPACK).then(|| {
757            let reg = ((raw >> 7) & 0xFF) as u8;
758            let needed = ((raw >> POS_K) & 1) != 0;
759            (reg, needed)
760        })
761    }) {
762        f.vararg_table_reg = Some(reg);
763        f.vararg_table_needed = needed;
764    }
765}
766
767// ── Header validation ──────────────────────────────────────────────────────
768
769/// Verify that the next `expected.len()` bytes in the stream match `expected`.
770///
771/// # C source
772/// ```c
773///
774/// //   char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
775/// //   size_t len = strlen(s);
776/// //   loadVector(S, buff, len);
777/// //   if (memcmp(s, buff, len) != 0)
778/// //     error(S, msg);
779/// // }
780/// ```
781///
782/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
783/// `memcmp` becomes slice equality.
784fn check_literal(
785    s: &mut LoadState<'_>,
786    expected: &[u8],
787    msg: &'static str,
788) -> Result<(), LuaError> {
789    let mut buf = vec![0u8; expected.len()];
790    load_block(s, &mut buf)?;
791    if buf != expected {
792        return Err(load_error(s, msg));
793    }
794    Ok(())
795}
796
797/// Verify that the next byte in the stream equals `expected_size`.
798///
799/// # C source
800/// ```c
801///
802/// //   if (loadByte(S) != size)
803/// //     error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
804/// // }
805/// ```
806///
807/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
808/// a throw site.  We inline the message directly.  `tname` is always a Rust
809/// type-name string literal (ASCII) from the call sites; using `&'static str`
810/// is appropriate here (not Lua data).
811fn fcheck_size(
812    s: &mut LoadState<'_>,
813    expected_size: usize,
814    tname: &'static str,
815) -> Result<(), LuaError> {
816    let b = load_byte(s)? as usize;
817    if b != expected_size {
818        // PORT NOTE: We build the error message inline rather than using
819        // luaO_pushfstring to avoid a stack push just for error formatting.
820        // TODO(port): include `tname` in the error message once LuaError::syntax
821        // supports composing byte-string and &str fragments.
822        return Err(LuaError::syntax(format_args!("{} size mismatch", tname)));
823    }
824    Ok(())
825}
826
827/// Validate the binary chunk header.
828///
829/// # C source
830/// ```c
831///
832/// //   checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
833/// //   if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
834/// //   if (loadByte(S) != LUAC_FORMAT)  error(S, "format mismatch");
835/// //   checkliteral(S, LUAC_DATA, "corrupted chunk");
836/// //   checksize(S, Instruction);
837/// //   checksize(S, lua_Integer);
838/// //   checksize(S, lua_Number);
839/// //   if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
840/// //   if (loadNumber(S)  != LUAC_NUM) error(S, "float format mismatch");
841/// // }
842/// ```
843///
844/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
845/// We emit the three concrete sizes inline.
846/// - `sizeof(Instruction)` = 4 (u32)
847/// - `sizeof(lua_Integer)` = 8 (i64)
848/// - `sizeof(lua_Number)` = 8 (f64)
849///
850/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
851/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
852/// of the signature (`"Lua"`).
853fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
854    // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
855    check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
856
857    let version = s.state.global().lua_version;
858    let expected_version = if matches!(version, LuaVersion::V55) {
859        LUAC_VERSION_55
860    } else {
861        LUAC_VERSION_54
862    };
863    let ver = load_byte(s)?;
864    if ver != expected_version {
865        return Err(load_error(s, "version mismatch"));
866    }
867
868    let fmt = load_byte(s)?;
869    if fmt != LUAC_FORMAT {
870        return Err(load_error(s, "format mismatch"));
871    }
872
873    check_literal(s, LUAC_DATA, "corrupted chunk")?;
874
875    if matches!(version, LuaVersion::V55) {
876        fcheck_size(s, 4, "int")?;
877        if load_raw_i32(s)? != LUAC_INT_55 as i32 {
878            return Err(load_error(s, "int format mismatch"));
879        }
880
881        fcheck_size(s, 4, "instruction")?;
882        if load_raw_u32(s)? != LUAC_INST_55 {
883            return Err(load_error(s, "instruction format mismatch"));
884        }
885
886        fcheck_size(s, 8, "Lua integer")?;
887        if load_integer(s)? != LUAC_INT_55 {
888            return Err(load_error(s, "Lua integer format mismatch"));
889        }
890
891        fcheck_size(s, 8, "Lua number")?;
892        if load_number(s)? != LUAC_NUM_55 {
893            return Err(load_error(s, "Lua number format mismatch"));
894        }
895    } else {
896        fcheck_size(s, 4, "Instruction")?;
897
898        fcheck_size(s, 8, "lua_Integer")?;
899
900        fcheck_size(s, 8, "lua_Number")?;
901
902        let int_check = load_integer(s)?;
903        if int_check != LUAC_INT {
904            return Err(load_error(s, "integer format mismatch"));
905        }
906
907        let num_check = load_number(s)?;
908        if num_check != LUAC_NUM {
909            return Err(load_error(s, "float format mismatch"));
910        }
911    }
912
913    Ok(())
914}
915
916// ── Public entry point ─────────────────────────────────────────────────────
917
918/// Load a precompiled Lua chunk and return the top-level Lua closure.
919///
920/// This is the Rust equivalent of `luaU_undump` — the single public function
921/// exported by `lundump.c`.
922///
923/// # C source
924/// ```c
925///
926/// //   LoadState S;
927/// //   LClosure *cl;
928/// //   if (*name == '@' || *name == '=')
929/// //     S.name = name + 1;
930/// //   else if (*name == LUA_SIGNATURE[0])
931/// //     S.name = "binary string";
932/// //   else
933/// //     S.name = name;
934/// //   S.L = L; S.Z = Z;
935/// //   checkHeader(&S);
936/// //   cl = luaF_newLclosure(L, loadByte(&S));
937/// //   setclLvalue2s(L, L->top.p, cl);
938/// //   luaD_inctop(L);
939/// //   cl->p = luaF_newproto(L);
940/// //   luaC_objbarrier(L, cl, cl->p);
941/// //   loadFunction(&S, cl->p, NULL);
942/// //   lua_assert(cl->nupvalues == cl->p->sizeupvalues);
943/// //   luai_verifycode(L, cl->p);
944/// //   return cl;
945/// // }
946/// ```
947///
948/// # Parameters
949/// - `state` — the Lua thread state.
950/// - `z` — input stream positioned at the start of the binary chunk
951///   (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
952/// - `name` — chunk name for error messages.  Stripped per Lua convention:
953///   - `@…` → filename (strip `@`)
954///   - `=…` → literal name (strip `=`)
955///   - starts with `\x1b` → `"binary string"`
956///   - otherwise used as-is.
957///
958/// PORT NOTE: The C function returns `LClosure *`.  In Rust we return
959/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`).  The
960/// closure is also pushed onto the stack for GC anchoring, matching the C
961/// behaviour (`setclLvalue2s + luaD_inctop`).  The caller is responsible for
962/// popping it when done (consistent with C).
963///
964/// PORT NOTE: `luai_verifycode` is a no-op in the default build
965/// (`#define luai_verifycode(L,f)  /* empty */`); dropped here.
966///
967/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
968/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
969/// assertion becomes `debug_assert_eq!`.
970pub(crate) fn undump(
971    state: &mut LuaState,
972    z: &mut ZIO,
973    _name: &[u8],
974) -> Result<GcRef<LuaLClosure>, LuaError> {
975    let mut s = LoadState { state, z };
976
977    check_header(&mut s)?;
978
979    // loadByte(&S) reads the number of upvalues for the top-level closure.
980    let nupvalues = load_byte(&mut s)?;
981    // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
982    // upvalue slots.  In Rust Phase A we construct the struct directly; the
983    // GcRef wrapping happens after the proto is loaded.
984    // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
985    // once lfunc.rs is translated and the API is settled.
986    let mut cl = LuaLClosure::placeholder();
987    let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
988    for _ in 0..nupvalues as usize {
989        upvals_vec.push(std::cell::Cell::new(
990            s.state.new_upval_closed(LuaValue::Nil),
991        ));
992    }
993    cl.upvals = upvals_vec;
994
995    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
996    // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
997    // PORT NOTE: We push a placeholder Nil first; the real closure value is
998    // set after the proto is loaded.  This mirrors the C "anchor for GC"
999    // pattern.  In Phase A-C GC anchoring via the stack is not strictly
1000    // necessary (Rc keeps things alive) but we preserve the stack discipline
1001    // for behavioural parity.
1002    // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
1003    // the real value here instead of a placeholder.
1004    s.state.push(LuaValue::Nil); // placeholder; replaced below
1005
1006    let mut proto = LuaProto::placeholder();
1007
1008    // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o)  no-op Phase A
1009
1010    load_function(&mut s, &mut proto, None)?;
1011
1012    // Wrap the proto in a GcRef and attach it to the closure.
1013    // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
1014    let proto_ref = GcRef::new(proto);
1015    proto_ref.account_buffer(proto_ref.buffer_bytes() as isize);
1016
1017    // macros.tsv: lua_assert → debug_assert!
1018    // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
1019    debug_assert_eq!(
1020        nupvalues as usize,
1021        proto_ref.upvalues.len(),
1022        "upvalue count mismatch between closure header and prototype"
1023    );
1024
1025    // The macro is defined as `/* empty */` in the default build; dropped.
1026
1027    // Attach the loaded proto to the closure.
1028    cl.proto = proto_ref;
1029
1030    // Wrap the closure in GcRef.
1031    // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
1032    let cl_ref = GcRef::new(cl);
1033    cl_ref.account_buffer(cl_ref.buffer_bytes() as isize);
1034
1035    // Replace the stack placeholder with the real closure value.
1036    // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
1037    // TODO(port): replace the placeholder at the correct stack slot.
1038    // For now the top slot holds Nil; Phase B must fix this once
1039    // GcRef<LuaLClosure> → LuaValue conversion is defined.
1040    // TODO(port): update the stack slot pushed above with the real cl_ref value.
1041
1042    Ok(cl_ref)
1043}
1044
1045// ──────────────────────────────────────────────────────────────────────────
1046// PORT STATUS
1047//   source:        src/lundump.c  (335 lines, 20 functions/items)
1048//                  src/lundump.h  (35 lines, merged)
1049//   target_crate:  lua-vm
1050//   confidence:    medium
1051//   todos:         15
1052//   port_notes:    39
1053//   unsafe_blocks: 0   (must be 0 outside explicit unsafe-budget crates)
1054//   notes:         Logic is faithful to the C.  The main open items for Phase B
1055//                  are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
1056//                  (2) LuaError::syntax byte-string formatting for the chunk
1057//                  name in load_error; (3) long-string vs short-string intern
1058//                  distinction in load_string_n; (4) the stack placeholder in
1059//                  undump must be replaced with the real GcRef<LuaLClosure>
1060//                  value once LuaValue conversion is defined; (5) UpvalDesc.name
1061//                  and LocalVar.varname need Option<GcRef<LuaString>> in the
1062//                  proto type to match the two-pass load order here.
1063// ──────────────────────────────────────────────────────────────────────────