lua_vm/undump.rs
1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B. These are best-guess paths based on other translated files.
11use crate::state::LuaState;
12#[allow(unused_imports)] use crate::prelude::*;
13use crate::zio::ZIO;
14use lua_types::error::LuaError;
15use lua_types::value::LuaValue;
16
17// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
18// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
19// crates. All paths below are provisional for Phase A.
20use lua_types::proto::{LuaProto, UpvalDesc, LocalVar, AbsLineInfo};
21use lua_types::closure::LuaLClosure;
22use lua_types::string::LuaString;
23use lua_types::gc::GcRef;
24use lua_types::opcode::Instruction;
25
26// ── Constants (from lundump.h) ─────────────────────────────────────────────
27
28/// Six-byte data marker in the chunk header used to catch conversion errors.
29const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
30
31/// Reference integer written in the header to detect integer endianness/size
32/// mismatches.
33const LUAC_INT: i64 = 0x5678;
34
35// macros.tsv: cast_num → x as f64
36/// Reference float written in the header to detect float format mismatches.
37const LUAC_NUM: f64 = 370.5;
38
39// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
40/// One-byte version tag: upper nibble = major, lower nibble = minor.
41const LUAC_VERSION: u8 = 0x54;
42
43const LUAC_FORMAT: u8 = 0;
44
45const LUA_SIGNATURE: &[u8] = b"\x1bLua";
46
47// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
48const MAX_SHORT_LEN: usize = 40;
49
50// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
51//
52// These are the byte values written by ldump.c into the constants array.
53// makevariant(t, v) = t | (v << 4).
54//
55// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
56// values* used in the binary format are the raw tag integers from lobject.h.
57// We define them here as u8 constants so the match in load_constants is
58// self-documenting.
59
60const TAG_NIL: u8 = 0x00;
61const TAG_FALSE: u8 = 0x01;
62const TAG_TRUE: u8 = 0x11;
63const TAG_INT: u8 = 0x03;
64const TAG_FLOAT: u8 = 0x13;
65const TAG_SHORT_STR: u8 = 0x04;
66const TAG_LONG_STR: u8 = 0x14;
67
68// ── LoadState ──────────────────────────────────────────────────────────────
69
70/// Loader state bundled for convenience: Lua state, input stream, and the
71/// chunk name used in error messages.
72///
73/// # C mapping
74/// ```c
75///
76/// ```
77///
78/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
79/// In Rust these become references with a shared lifetime `'a`. The struct is
80/// always stack-allocated inside [`undump`] and never escapes the call.
81struct LoadState<'a> {
82 state: &'a mut LuaState,
83 z: &'a mut ZIO,
84}
85
86// ── Error helper ───────────────────────────────────────────────────────────
87
88/// Build a syntax error for a malformed binary chunk.
89///
90/// # C source
91/// ```c
92///
93/// // luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
94/// // luaD_throw(S->L, LUA_ERRSYNTAX);
95/// // }
96/// ```
97///
98/// PORT NOTE: `l_noret` in C (diverges via `longjmp`). In Rust we return
99/// `LuaError` and the caller does `return Err(load_error(...))`. The C
100/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
101/// single `LuaError::syntax` per error_sites.tsv.
102///
103/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
104/// which requires an `std::fmt::Display` implementor. `Vec<u8>` does not
105/// implement `Display`. Phase B should add a byte-string formatting path to
106/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
107/// in the message.
108fn load_error(_s: &LoadState<'_>, why: &'static str) -> LuaError {
109 LuaError::syntax(format_args!("bad binary format ({})", why))
110}
111
112// ── Low-level I/O ──────────────────────────────────────────────────────────
113
114/// Read exactly `buf.len()` bytes from the stream into `buf`.
115///
116/// # C source
117/// ```c
118///
119/// // if (luaZ_read(S->Z, b, size) != 0)
120/// // error(S, "truncated chunk");
121/// // }
122/// ```
123///
124/// PORT NOTE: C takes `void *b` + explicit `size`. In Rust we use `&mut [u8]`
125/// whose length encodes the byte count. `luaZ_read` returns the number of
126/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
127fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
128 // macros.tsv: luaZ_read → z.read(buf) (returns usize unread)
129 if s.z.read(buf) != 0 {
130 return Err(load_error(s, "truncated chunk"));
131 }
132 Ok(())
133}
134
135/// Read a single byte from the stream.
136///
137/// # C source
138/// ```c
139///
140/// // int b = zgetc(S->Z);
141/// // if (b == EOZ)
142/// // error(S, "truncated chunk");
143/// // return cast_byte(b);
144/// // }
145/// ```
146///
147/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
148fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
149 // macros.tsv: zgetc → z.getc() returning i32
150 let b = s.z.getc();
151 if b == crate::zio::EOZ {
152 return Err(load_error(s, "truncated chunk"));
153 }
154 // macros.tsv: cast_byte → x as u8
155 Ok(b as u8)
156}
157
158/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
159/// MSB-first continuation flag).
160///
161/// # C source
162/// ```c
163///
164/// // size_t x = 0;
165/// // int b;
166/// // limit >>= 7;
167/// // do {
168/// // b = loadByte(S);
169/// // if (x >= limit)
170/// // error(S, "integer overflow");
171/// // x = (x << 7) | (b & 0x7f);
172/// // } while ((b & 0x80) == 0);
173/// // return x;
174/// // }
175/// ```
176///
177/// PORT NOTE: The encoding terminates when a byte with the high bit set is
178/// seen (the *last* byte has bit 7 = 1). That is the opposite of the more
179/// common LEB128 where the continuation bit means "more follows".
180fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
181 let mut x: usize = 0;
182 let limit = limit >> 7;
183 loop {
184 let b = load_byte(s)? as usize;
185 if x >= limit {
186 return Err(load_error(s, "integer overflow"));
187 }
188 x = (x << 7) | (b & 0x7f);
189 if (b & 0x80) != 0 {
190 break;
191 }
192 }
193 Ok(x)
194}
195
196/// Read a `size_t`-sized unsigned value.
197///
198/// # C source
199/// ```c
200///
201/// // return loadUnsigned(S, MAX_SIZET);
202/// // }
203/// ```
204///
205/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
206fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
207 // macros.tsv: MAX_SIZET → usize::MAX
208 load_unsigned(s, usize::MAX)
209}
210
211/// Read a signed `int`-sized value.
212///
213/// # C source
214/// ```c
215///
216/// // return cast_int(loadUnsigned(S, INT_MAX));
217/// // }
218/// ```
219///
220/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv. `INT_MAX` → `i32::MAX
221/// as usize`.
222fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
223 // macros.tsv: cast_int → x as i32
224 let v = load_unsigned(s, i32::MAX as usize)?;
225 Ok(v as i32)
226}
227
228/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
229///
230/// # C source
231/// ```c
232///
233/// // lua_Number x;
234/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
235/// // return x;
236/// // }
237/// ```
238///
239/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
240/// into the value. In Rust we use `f64::from_ne_bytes` (native endian) to
241/// reconstruct the value from the eight bytes. The binary format is host-
242/// endian for these fields; the header check verifies endianness compatibility
243/// via `LUAC_INT` and `LUAC_NUM` sentinels.
244fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
245 let mut buf = [0u8; 8];
246 load_block(s, &mut buf)?;
247 // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
248 Ok(f64::from_ne_bytes(buf))
249}
250
251/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
252///
253/// # C source
254/// ```c
255///
256/// // lua_Integer x;
257/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
258/// // return x;
259/// // }
260/// ```
261///
262/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
263fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
264 let mut buf = [0u8; 8];
265 load_block(s, &mut buf)?;
266 Ok(i64::from_ne_bytes(buf))
267}
268
269// ── String loading ─────────────────────────────────────────────────────────
270
271/// Load a nullable string. Returns `None` if the stored size is zero.
272///
273/// # C source
274/// ```c
275///
276/// // lua_State *L = S->L;
277/// // TString *ts;
278/// // size_t size = loadSize(S);
279/// // if (size == 0) return NULL;
280/// // else if (--size <= LUAI_MAXSHORTLEN) { /* short string? */
281/// // char buff[LUAI_MAXSHORTLEN];
282/// // loadVector(S, buff, size);
283/// // ts = luaS_newlstr(L, buff, size);
284/// // } else { /* long string */
285/// // ts = luaS_createlngstrobj(L, size);
286/// // setsvalue2s(L, L->top.p, ts); /* anchor it (loadVector can GC) */
287/// // luaD_inctop(L);
288/// // loadVector(S, getlngstr(ts), size);
289/// // L->top.p--;
290/// // }
291/// // luaC_objbarrier(L, p, ts);
292/// // return ts;
293/// // }
294/// ```
295///
296/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
297/// is the null-string sentinel. After reading `raw_size`, the actual byte
298/// count is `raw_size - 1`.
299///
300/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
301/// and then filled in-place via `getlngstr`. In Rust, GC anchoring is not
302/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
303/// then create the string.
304///
305/// TODO(port): `luaS_newlstr` interns the string (short strings only);
306/// `luaS_createlngstrobj` does NOT intern. Phase A uses `state.intern_str()`
307/// for both. Phase B should add a `state.create_long_str()` path that skips
308/// the intern table, matching C semantics.
309///
310/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
311/// for `luaC_objbarrier(L, p, ts)`. The barrier is a no-op in Phase A–C
312/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
313fn load_string_n(
314 s: &mut LoadState<'_>,
315 _proto: &LuaProto,
316) -> Result<Option<GcRef<LuaString>>, LuaError> {
317 let raw_size = load_size(s)?;
318 if raw_size == 0 {
319 return Ok(None);
320 }
321 let size = raw_size - 1;
322
323 // Read the raw bytes regardless of short/long distinction.
324 let mut buf = vec![0u8; size];
325
326 if size <= MAX_SHORT_LEN {
327 load_block(s, &mut buf)?;
328 } else {
329 load_block(s, &mut buf)?;
330 }
331
332 // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
333 // TODO(port): long strings should not be interned; see doc-comment above.
334 let ts = s.state.intern_str(&buf)?;
335
336 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
337 // (dropped — Phase A GC is Rc, no barrier needed)
338
339 Ok(Some(ts))
340}
341
342/// Load a non-nullable string; error if the stream encodes a null string.
343///
344/// # C source
345/// ```c
346///
347/// // TString *st = loadStringN(S, p);
348/// // if (st == NULL)
349/// // error(S, "bad format for constant string");
350/// // return st;
351/// // }
352/// ```
353fn load_string(
354 s: &mut LoadState<'_>,
355 proto: &LuaProto,
356) -> Result<GcRef<LuaString>, LuaError> {
357 match load_string_n(s, proto)? {
358 Some(ts) => Ok(ts),
359 None => Err(load_error(s, "bad format for constant string")),
360 }
361}
362
363// ── Proto-field loaders ────────────────────────────────────────────────────
364
365/// Load the bytecode instruction array into a prototype.
366///
367/// # C source
368/// ```c
369///
370/// // int n = loadInt(S);
371/// // f->code = luaM_newvectorchecked(S->L, n, Instruction);
372/// // f->sizecode = n;
373/// // loadVector(S, f->code, n);
374/// // }
375/// ```
376///
377/// PORT NOTE: `loadVector(S, f->code, n)` expands to
378/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
379/// We read each `u32` in native-endian order, consistent with how
380/// [`load_number`] and [`load_integer`] work.
381///
382/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
383/// (types.tsv: `Proto.sizecode → removed`).
384fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
385 let n = load_int(s)? as usize;
386 // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
387 // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
388 // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
389 let mut code = Vec::with_capacity(n);
390 for _ in 0..n {
391 let mut buf = [0u8; 4];
392 load_block(s, &mut buf)?;
393 // Instruction is a u32 newtype per types.tsv
394 code.push(Instruction(u32::from_ne_bytes(buf)));
395 }
396 f.code = code;
397 Ok(())
398}
399
400/// Load the constant pool into a prototype.
401///
402/// # C source
403/// ```c
404///
405/// // int i; int n = loadInt(S);
406/// // f->k = luaM_newvectorchecked(S->L, n, TValue);
407/// // f->sizek = n;
408/// // for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
409/// // for (i = 0; i < n; i++) {
410/// // TValue *o = &f->k[i];
411/// // int t = loadByte(S);
412/// // switch (t) {
413/// // case LUA_VNIL: setnilvalue(o); break;
414/// // case LUA_VFALSE: setbfvalue(o); break;
415/// // case LUA_VTRUE: setbtvalue(o); break;
416/// // case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
417/// // case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
418/// // case LUA_VSHRSTR:
419/// // case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
420/// // default: lua_assert(0);
421/// // }
422/// // }
423/// // }
424/// ```
425///
426/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
427/// safety in C. In Rust, `Vec` is always in a valid state; we skip it.
428fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
429 let n = load_int(s)? as usize;
430 // TODO(port): add overflow / OOM check.
431 let mut k = Vec::with_capacity(n);
432
433 // Dropped — Rust Vec elements are never uninitialized.
434
435 for _ in 0..n {
436 let t = load_byte(s)?;
437 let val = match t {
438 // macros.tsv: setnilvalue → *o = LuaValue::Nil
439 TAG_NIL => LuaValue::Nil,
440
441 // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
442 TAG_FALSE => LuaValue::Bool(false),
443
444 // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
445 TAG_TRUE => LuaValue::Bool(true),
446
447 // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
448 TAG_FLOAT => LuaValue::Float(load_number(s)?),
449
450 // macros.tsv: setivalue → *o = LuaValue::Int(x)
451 TAG_INT => LuaValue::Int(load_integer(s)?),
452
453 // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
454 TAG_SHORT_STR | TAG_LONG_STR => {
455 let ts = load_string(s, f)?;
456 LuaValue::Str(ts)
457 }
458
459 // macros.tsv: lua_assert → debug_assert!
460 _ => {
461 debug_assert!(false, "unknown constant type tag {:#04x}", t);
462 LuaValue::Nil
463 }
464 };
465 k.push(val);
466 }
467
468 f.k = k;
469 Ok(())
470}
471
472/// Load nested function prototypes into a prototype.
473///
474/// # C source
475/// ```c
476///
477/// // int i; int n = loadInt(S);
478/// // f->p = luaM_newvectorchecked(S->L, n, Proto *);
479/// // f->sizep = n;
480/// // for (i = 0; i < n; i++) f->p[i] = NULL;
481/// // for (i = 0; i < n; i++) {
482/// // f->p[i] = luaF_newproto(S->L);
483/// // luaC_objbarrier(S->L, f, f->p[i]);
484/// // loadFunction(S, f->p[i], f->source);
485/// // }
486/// // }
487/// ```
488///
489/// PORT NOTE: C creates the proto first (for GC anchor) then fills it. In
490/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
491/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
492fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
493 let n = load_int(s)? as usize;
494 // TODO(port): add overflow / OOM check.
495 let mut protos = Vec::with_capacity(n);
496
497
498 for _ in 0..n {
499 let mut sub = LuaProto::placeholder();
500
501 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
502
503 // Pass parent source as fallback.
504 let parent_source = f.source.clone();
505 load_function(s, &mut sub, parent_source)?;
506
507 // Wrap in GcRef after loading.
508 // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
509 // In Rust Phase A it becomes Rc<LuaProto>.
510 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
511 protos.push(GcRef::new(sub));
512 }
513
514 f.p = protos;
515 Ok(())
516}
517
518/// Load upvalue descriptors into a prototype.
519///
520/// # C source
521/// ```c
522///
523/// // int i, n;
524/// // n = loadInt(S);
525/// // f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
526/// // f->sizeupvalues = n;
527/// // for (i = 0; i < n; i++)
528/// // f->upvalues[i].name = NULL; /* make array valid for GC */
529/// // for (i = 0; i < n; i++) {
530/// // f->upvalues[i].instack = loadByte(S);
531/// // f->upvalues[i].idx = loadByte(S);
532/// // f->upvalues[i].kind = loadByte(S);
533/// // }
534/// // }
535/// ```
536///
537/// PORT NOTE: The C comment says names must be filled first for GC safety.
538/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
539/// in [`load_debug`]. This requires `UpvalDesc.name` to be
540/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
541/// types.tsv. Phase B should reconcile the types.tsv entry.
542///
543/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
544fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
545 let n = load_int(s)? as usize;
546 // TODO(port): add overflow / OOM check.
547
548 // In Rust: construct with name = None.
549
550 let mut upvalues = Vec::with_capacity(n);
551 for _ in 0..n {
552 let instack_raw = load_byte(s)?;
553 let idx = load_byte(s)?;
554 let kind = load_byte(s)?;
555
556 // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
557 upvalues.push(UpvalDesc {
558 name: None, // filled by load_debug
559 instack: instack_raw != 0,
560 idx,
561 kind,
562 });
563 }
564
565 f.upvalues = upvalues;
566 Ok(())
567}
568
569/// Load debug information into a prototype.
570///
571/// # C source
572/// ```c
573///
574/// // int i, n;
575/// // n = loadInt(S);
576/// // f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
577/// // f->sizelineinfo = n;
578/// // loadVector(S, f->lineinfo, n);
579/// // n = loadInt(S);
580/// // f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
581/// // f->sizeabslineinfo = n;
582/// // for (i = 0; i < n; i++) {
583/// // f->abslineinfo[i].pc = loadInt(S);
584/// // f->abslineinfo[i].line = loadInt(S);
585/// // }
586/// // n = loadInt(S);
587/// // f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
588/// // f->sizelocvars = n;
589/// // for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
590/// // for (i = 0; i < n; i++) {
591/// // f->locvars[i].varname = loadStringN(S, f);
592/// // f->locvars[i].startpc = loadInt(S);
593/// // f->locvars[i].endpc = loadInt(S);
594/// // }
595/// // n = loadInt(S);
596/// // if (n != 0) /* does it have debug information? */
597/// // n = f->sizeupvalues; /* must be this many */
598/// // for (i = 0; i < n; i++)
599/// // f->upvalues[i].name = loadStringN(S, f);
600/// // }
601/// ```
602///
603/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
604/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
605/// We read them as `u8` then reinterpret as `i8` via cast.
606///
607/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
608/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
609///
610/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
611/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
612/// See also the note on [`load_upvalues`].
613fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
614 let n = load_int(s)? as usize;
615 let mut lineinfo = vec![0i8; n];
616 // Read as u8 slice then cast — safe because i8 and u8 have the same
617 // in-memory representation and we're casting a byte from the binary stream.
618 // SAFETY(port): this would need `unsafe` for the slice transmute in real
619 // code; for Phase A we read byte-by-byte.
620 // TODO(port): replace the loop with a single load_block into a u8 buffer
621 // followed by an i8 transmute in Phase B (or use bytemuck).
622 for item in lineinfo.iter_mut() {
623 *item = load_byte(s)? as i8;
624 }
625 f.lineinfo = lineinfo;
626
627 let n = load_int(s)? as usize;
628 let mut abslineinfo = Vec::with_capacity(n);
629 for _ in 0..n {
630 abslineinfo.push(AbsLineInfo {
631 pc: load_int(s)?,
632 line: load_int(s)?,
633 });
634 }
635 f.abslineinfo = abslineinfo;
636
637 let n = load_int(s)? as usize;
638
639 let mut locvars = Vec::with_capacity(n);
640 for _ in 0..n {
641 let varname = load_string_n(s, f)?;
642 let startpc = load_int(s)?;
643 let endpc = load_int(s)?;
644 let varname = match varname {
645 Some(v) => v,
646 None => s.state.new_string(b"")?,
647 };
648 locvars.push(LocalVar { varname, startpc, endpc });
649 }
650 f.locvars = locvars;
651
652 // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
653 let has_names = load_int(s)?;
654 if has_names != 0 {
655 let n_upvals = f.upvalues.len();
656 for i in 0..n_upvals {
657 let name = load_string_n(s, f)?;
658 f.upvalues[i].name = name;
659 }
660 }
661
662 Ok(())
663}
664
665// ── Function loader ────────────────────────────────────────────────────────
666
667/// Load a complete function prototype from the stream.
668///
669/// # C source
670/// ```c
671///
672/// // f->source = loadStringN(S, f);
673/// // if (f->source == NULL) f->source = psource;
674/// // f->linedefined = loadInt(S);
675/// // f->lastlinedefined = loadInt(S);
676/// // f->numparams = loadByte(S);
677/// // f->is_vararg = loadByte(S);
678/// // f->maxstacksize = loadByte(S);
679/// // loadCode(S, f);
680/// // loadConstants(S, f);
681/// // loadUpvalues(S, f);
682/// // loadProtos(S, f);
683/// // loadDebug(S, f);
684/// // }
685/// ```
686///
687/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
688/// the top-level call passes `NULL` (mapped to `None`). `f->source` in `LuaProto`
689/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
690/// `Option<GcRef<LuaString>>` to express "inherited from parent". Phase B
691/// should align types.tsv or add a dedicated `Option` wrapper there.
692///
693/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
694/// types.tsv. We read the raw byte and convert to `bool` via `!= 0`.
695fn load_function(
696 s: &mut LoadState<'_>,
697 f: &mut LuaProto,
698 psource: Option<GcRef<LuaString>>,
699) -> Result<(), LuaError> {
700 let source = load_string_n(s, f)?;
701 f.source = source.or(psource);
702
703 f.linedefined = load_int(s)?;
704 f.lastlinedefined = load_int(s)?;
705 f.numparams = load_byte(s)?;
706 // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
707 f.is_vararg = load_byte(s)? != 0;
708 f.maxstacksize = load_byte(s)?;
709 load_code(s, f)?;
710 load_constants(s, f)?;
711 load_upvalues(s, f)?;
712 load_protos(s, f)?;
713 load_debug(s, f)?;
714
715 Ok(())
716}
717
718// ── Header validation ──────────────────────────────────────────────────────
719
720/// Verify that the next `expected.len()` bytes in the stream match `expected`.
721///
722/// # C source
723/// ```c
724///
725/// // char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
726/// // size_t len = strlen(s);
727/// // loadVector(S, buff, len);
728/// // if (memcmp(s, buff, len) != 0)
729/// // error(S, msg);
730/// // }
731/// ```
732///
733/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
734/// `memcmp` becomes slice equality.
735fn check_literal(
736 s: &mut LoadState<'_>,
737 expected: &[u8],
738 msg: &'static str,
739) -> Result<(), LuaError> {
740 let mut buf = vec![0u8; expected.len()];
741 load_block(s, &mut buf)?;
742 if buf != expected {
743 return Err(load_error(s, msg));
744 }
745 Ok(())
746}
747
748/// Verify that the next byte in the stream equals `expected_size`.
749///
750/// # C source
751/// ```c
752///
753/// // if (loadByte(S) != size)
754/// // error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
755/// // }
756/// ```
757///
758/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
759/// a throw site. We inline the message directly. `tname` is always a Rust
760/// type-name string literal (ASCII) from the call sites; using `&'static str`
761/// is appropriate here (not Lua data).
762fn fcheck_size(
763 s: &mut LoadState<'_>,
764 expected_size: usize,
765 tname: &'static str,
766) -> Result<(), LuaError> {
767 let b = load_byte(s)? as usize;
768 if b != expected_size {
769 // PORT NOTE: We build the error message inline rather than using
770 // luaO_pushfstring to avoid a stack push just for error formatting.
771 // TODO(port): include `tname` in the error message once LuaError::syntax
772 // supports composing byte-string and &str fragments.
773 return Err(LuaError::syntax(format_args!(
774 "{} size mismatch",
775 tname
776 )));
777 }
778 Ok(())
779}
780
781/// Validate the binary chunk header.
782///
783/// # C source
784/// ```c
785///
786/// // checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
787/// // if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
788/// // if (loadByte(S) != LUAC_FORMAT) error(S, "format mismatch");
789/// // checkliteral(S, LUAC_DATA, "corrupted chunk");
790/// // checksize(S, Instruction);
791/// // checksize(S, lua_Integer);
792/// // checksize(S, lua_Number);
793/// // if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
794/// // if (loadNumber(S) != LUAC_NUM) error(S, "float format mismatch");
795/// // }
796/// ```
797///
798/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
799/// We emit the three concrete sizes inline.
800/// - `sizeof(Instruction)` = 4 (u32)
801/// - `sizeof(lua_Integer)` = 8 (i64)
802/// - `sizeof(lua_Number)` = 8 (f64)
803///
804/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
805/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
806/// of the signature (`"Lua"`).
807fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
808 // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
809 check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
810
811 let ver = load_byte(s)?;
812 if ver != LUAC_VERSION {
813 return Err(load_error(s, "version mismatch"));
814 }
815
816 let fmt = load_byte(s)?;
817 if fmt != LUAC_FORMAT {
818 return Err(load_error(s, "format mismatch"));
819 }
820
821 check_literal(s, LUAC_DATA, "corrupted chunk")?;
822
823 fcheck_size(s, 4, "Instruction")?;
824
825 fcheck_size(s, 8, "lua_Integer")?;
826
827 fcheck_size(s, 8, "lua_Number")?;
828
829 let int_check = load_integer(s)?;
830 if int_check != LUAC_INT {
831 return Err(load_error(s, "integer format mismatch"));
832 }
833
834 let num_check = load_number(s)?;
835 if num_check != LUAC_NUM {
836 return Err(load_error(s, "float format mismatch"));
837 }
838
839 Ok(())
840}
841
842// ── Public entry point ─────────────────────────────────────────────────────
843
844/// Load a precompiled Lua chunk and return the top-level Lua closure.
845///
846/// This is the Rust equivalent of `luaU_undump` — the single public function
847/// exported by `lundump.c`.
848///
849/// # C source
850/// ```c
851///
852/// // LoadState S;
853/// // LClosure *cl;
854/// // if (*name == '@' || *name == '=')
855/// // S.name = name + 1;
856/// // else if (*name == LUA_SIGNATURE[0])
857/// // S.name = "binary string";
858/// // else
859/// // S.name = name;
860/// // S.L = L; S.Z = Z;
861/// // checkHeader(&S);
862/// // cl = luaF_newLclosure(L, loadByte(&S));
863/// // setclLvalue2s(L, L->top.p, cl);
864/// // luaD_inctop(L);
865/// // cl->p = luaF_newproto(L);
866/// // luaC_objbarrier(L, cl, cl->p);
867/// // loadFunction(&S, cl->p, NULL);
868/// // lua_assert(cl->nupvalues == cl->p->sizeupvalues);
869/// // luai_verifycode(L, cl->p);
870/// // return cl;
871/// // }
872/// ```
873///
874/// # Parameters
875/// - `state` — the Lua thread state.
876/// - `z` — input stream positioned at the start of the binary chunk
877/// (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
878/// - `name` — chunk name for error messages. Stripped per Lua convention:
879/// - `@…` → filename (strip `@`)
880/// - `=…` → literal name (strip `=`)
881/// - starts with `\x1b` → `"binary string"`
882/// - otherwise used as-is.
883///
884/// PORT NOTE: The C function returns `LClosure *`. In Rust we return
885/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`). The
886/// closure is also pushed onto the stack for GC anchoring, matching the C
887/// behaviour (`setclLvalue2s + luaD_inctop`). The caller is responsible for
888/// popping it when done (consistent with C).
889///
890/// PORT NOTE: `luai_verifycode` is a no-op in the default build
891/// (`#define luai_verifycode(L,f) /* empty */`); dropped here.
892///
893/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
894/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
895/// assertion becomes `debug_assert_eq!`.
896pub(crate) fn undump(
897 state: &mut LuaState,
898 z: &mut ZIO,
899 _name: &[u8],
900) -> Result<GcRef<LuaLClosure>, LuaError> {
901 let mut s = LoadState {
902 state,
903 z,
904 };
905
906 check_header(&mut s)?;
907
908 // loadByte(&S) reads the number of upvalues for the top-level closure.
909 let nupvalues = load_byte(&mut s)?;
910 // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
911 // upvalue slots. In Rust Phase A we construct the struct directly; the
912 // GcRef wrapping happens after the proto is loaded.
913 // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
914 // once lfunc.rs is translated and the API is settled.
915 let mut cl = LuaLClosure::placeholder();
916 let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
917 for _ in 0..nupvalues as usize {
918 upvals_vec.push(std::cell::Cell::new(s.state.new_upval_closed(LuaValue::Nil)));
919 }
920 cl.upvals = upvals_vec;
921
922 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
923 // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
924 // PORT NOTE: We push a placeholder Nil first; the real closure value is
925 // set after the proto is loaded. This mirrors the C "anchor for GC"
926 // pattern. In Phase A-C GC anchoring via the stack is not strictly
927 // necessary (Rc keeps things alive) but we preserve the stack discipline
928 // for behavioural parity.
929 // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
930 // the real value here instead of a placeholder.
931 s.state.push(LuaValue::Nil); // placeholder; replaced below
932
933 let mut proto = LuaProto::placeholder();
934
935 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
936
937 load_function(&mut s, &mut proto, None)?;
938
939 // Wrap the proto in a GcRef and attach it to the closure.
940 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
941 let proto_ref = GcRef::new(proto);
942
943 // macros.tsv: lua_assert → debug_assert!
944 // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
945 debug_assert_eq!(
946 nupvalues as usize,
947 proto_ref.upvalues.len(),
948 "upvalue count mismatch between closure header and prototype"
949 );
950
951 // The macro is defined as `/* empty */` in the default build; dropped.
952
953 // Attach the loaded proto to the closure.
954 cl.proto = proto_ref;
955
956 // Wrap the closure in GcRef.
957 // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
958 let cl_ref = GcRef::new(cl);
959
960 // Replace the stack placeholder with the real closure value.
961 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
962 // TODO(port): replace the placeholder at the correct stack slot.
963 // For now the top slot holds Nil; Phase B must fix this once
964 // GcRef<LuaLClosure> → LuaValue conversion is defined.
965 // TODO(port): update the stack slot pushed above with the real cl_ref value.
966
967 Ok(cl_ref)
968}
969
970// ──────────────────────────────────────────────────────────────────────────
971// PORT STATUS
972// source: src/lundump.c (335 lines, 20 functions/items)
973// src/lundump.h (35 lines, merged)
974// target_crate: lua-vm
975// confidence: medium
976// todos: 15
977// port_notes: 39
978// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
979// notes: Logic is faithful to the C. The main open items for Phase B
980// are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
981// (2) LuaError::syntax byte-string formatting for the chunk
982// name in load_error; (3) long-string vs short-string intern
983// distinction in load_string_n; (4) the stack placeholder in
984// undump must be replaced with the real GcRef<LuaLClosure>
985// value once LuaValue conversion is defined; (5) UpvalDesc.name
986// and LocalVar.varname need Option<GcRef<LuaString>> in the
987// proto type to match the two-pass load order here.
988// ──────────────────────────────────────────────────────────────────────────