lua_vm/undump.rs
1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B. These are best-guess paths based on other translated files.
11use crate::state::LuaState;
12#[allow(unused_imports)] use crate::prelude::*;
13use crate::zio::ZIO;
14use lua_types::error::LuaError;
15use lua_types::value::LuaValue;
16
17// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
18// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
19// crates. All paths below are provisional for Phase A.
20use lua_types::proto::{LuaProto, UpvalDesc, LocalVar, AbsLineInfo};
21use lua_types::closure::LuaLClosure;
22use lua_types::string::LuaString;
23use lua_types::gc::GcRef;
24use lua_types::opcode::Instruction;
25
26// ── Constants (from lundump.h) ─────────────────────────────────────────────
27
28/// Six-byte data marker in the chunk header used to catch conversion errors.
29const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
30
31/// Reference integer written in the header to detect integer endianness/size
32/// mismatches.
33const LUAC_INT: i64 = 0x5678;
34
35// macros.tsv: cast_num → x as f64
36/// Reference float written in the header to detect float format mismatches.
37const LUAC_NUM: f64 = 370.5;
38
39// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
40/// One-byte version tag: upper nibble = major, lower nibble = minor.
41const LUAC_VERSION: u8 = 0x54;
42
43const LUAC_FORMAT: u8 = 0;
44
45const LUA_SIGNATURE: &[u8] = b"\x1bLua";
46
47// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
48const MAX_SHORT_LEN: usize = 40;
49
50// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
51//
52// These are the byte values written by ldump.c into the constants array.
53// makevariant(t, v) = t | (v << 4).
54//
55// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
56// values* used in the binary format are the raw tag integers from lobject.h.
57// We define them here as u8 constants so the match in load_constants is
58// self-documenting.
59
60const TAG_NIL: u8 = 0x00;
61const TAG_FALSE: u8 = 0x01;
62const TAG_TRUE: u8 = 0x11;
63const TAG_INT: u8 = 0x03;
64const TAG_FLOAT: u8 = 0x13;
65const TAG_SHORT_STR: u8 = 0x04;
66const TAG_LONG_STR: u8 = 0x14;
67
68// ── LoadState ──────────────────────────────────────────────────────────────
69
70/// Loader state bundled for convenience: Lua state, input stream, and the
71/// chunk name used in error messages.
72///
73/// # C mapping
74/// ```c
75///
76/// ```
77///
78/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
79/// In Rust these become references with a shared lifetime `'a`. The struct is
80/// always stack-allocated inside [`undump`] and never escapes the call.
81struct LoadState<'a> {
82 state: &'a mut LuaState,
83 z: &'a mut ZIO,
84}
85
86// ── Error helper ───────────────────────────────────────────────────────────
87
88/// Build a syntax error for a malformed binary chunk.
89///
90/// # C source
91/// ```c
92///
93/// // luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
94/// // luaD_throw(S->L, LUA_ERRSYNTAX);
95/// // }
96/// ```
97///
98/// PORT NOTE: `l_noret` in C (diverges via `longjmp`). In Rust we return
99/// `LuaError` and the caller does `return Err(load_error(...))`. The C
100/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
101/// single `LuaError::syntax` per error_sites.tsv.
102///
103/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
104/// which requires an `std::fmt::Display` implementor. `Vec<u8>` does not
105/// implement `Display`. Phase B should add a byte-string formatting path to
106/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
107/// in the message.
108fn load_error(_s: &LoadState<'_>, why: &'static str) -> LuaError {
109 LuaError::syntax(format_args!("bad binary format ({})", why))
110}
111
112// ── Low-level I/O ──────────────────────────────────────────────────────────
113
114/// Read exactly `buf.len()` bytes from the stream into `buf`.
115///
116/// # C source
117/// ```c
118///
119/// // if (luaZ_read(S->Z, b, size) != 0)
120/// // error(S, "truncated chunk");
121/// // }
122/// ```
123///
124/// PORT NOTE: C takes `void *b` + explicit `size`. In Rust we use `&mut [u8]`
125/// whose length encodes the byte count. `luaZ_read` returns the number of
126/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
127fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
128 // macros.tsv: luaZ_read → z.read(buf) (returns usize unread)
129 if s.z.read(buf) != 0 {
130 return Err(load_error(s, "truncated chunk"));
131 }
132 Ok(())
133}
134
135/// Read a single byte from the stream.
136///
137/// # C source
138/// ```c
139///
140/// // int b = zgetc(S->Z);
141/// // if (b == EOZ)
142/// // error(S, "truncated chunk");
143/// // return cast_byte(b);
144/// // }
145/// ```
146///
147/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
148fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
149 // macros.tsv: zgetc → z.getc() returning i32
150 let b = s.z.getc();
151 if b == crate::zio::EOZ {
152 return Err(load_error(s, "truncated chunk"));
153 }
154 // macros.tsv: cast_byte → x as u8
155 Ok(b as u8)
156}
157
158/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
159/// MSB-first continuation flag).
160///
161/// # C source
162/// ```c
163///
164/// // size_t x = 0;
165/// // int b;
166/// // limit >>= 7;
167/// // do {
168/// // b = loadByte(S);
169/// // if (x >= limit)
170/// // error(S, "integer overflow");
171/// // x = (x << 7) | (b & 0x7f);
172/// // } while ((b & 0x80) == 0);
173/// // return x;
174/// // }
175/// ```
176///
177/// PORT NOTE: The encoding terminates when a byte with the high bit set is
178/// seen (the *last* byte has bit 7 = 1). That is the opposite of the more
179/// common LEB128 where the continuation bit means "more follows".
180fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
181 let mut x: usize = 0;
182 let limit = limit >> 7;
183 loop {
184 let b = load_byte(s)? as usize;
185 if x >= limit {
186 return Err(load_error(s, "integer overflow"));
187 }
188 x = (x << 7) | (b & 0x7f);
189 if (b & 0x80) != 0 {
190 break;
191 }
192 }
193 Ok(x)
194}
195
196/// Read a `size_t`-sized unsigned value.
197///
198/// # C source
199/// ```c
200///
201/// // return loadUnsigned(S, MAX_SIZET);
202/// // }
203/// ```
204///
205/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
206fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
207 // macros.tsv: MAX_SIZET → usize::MAX
208 load_unsigned(s, usize::MAX)
209}
210
211/// Read a signed `int`-sized value.
212///
213/// # C source
214/// ```c
215///
216/// // return cast_int(loadUnsigned(S, INT_MAX));
217/// // }
218/// ```
219///
220/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv. `INT_MAX` → `i32::MAX
221/// as usize`.
222fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
223 // macros.tsv: cast_int → x as i32
224 let v = load_unsigned(s, i32::MAX as usize)?;
225 Ok(v as i32)
226}
227
228/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
229///
230/// # C source
231/// ```c
232///
233/// // lua_Number x;
234/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
235/// // return x;
236/// // }
237/// ```
238///
239/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
240/// into the value. In Rust we use `f64::from_ne_bytes` (native endian) to
241/// reconstruct the value from the eight bytes. The binary format is host-
242/// endian for these fields; the header check verifies endianness compatibility
243/// via `LUAC_INT` and `LUAC_NUM` sentinels.
244fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
245 let mut buf = [0u8; 8];
246 load_block(s, &mut buf)?;
247 // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
248 Ok(f64::from_ne_bytes(buf))
249}
250
251/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
252///
253/// # C source
254/// ```c
255///
256/// // lua_Integer x;
257/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
258/// // return x;
259/// // }
260/// ```
261///
262/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
263fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
264 let mut buf = [0u8; 8];
265 load_block(s, &mut buf)?;
266 Ok(i64::from_ne_bytes(buf))
267}
268
269// ── String loading ─────────────────────────────────────────────────────────
270
271/// Load a nullable string. Returns `None` if the stored size is zero.
272///
273/// # C source
274/// ```c
275///
276/// // lua_State *L = S->L;
277/// // TString *ts;
278/// // size_t size = loadSize(S);
279/// // if (size == 0) return NULL;
280/// // else if (--size <= LUAI_MAXSHORTLEN) { /* short string? */
281/// // char buff[LUAI_MAXSHORTLEN];
282/// // loadVector(S, buff, size);
283/// // ts = luaS_newlstr(L, buff, size);
284/// // } else { /* long string */
285/// // ts = luaS_createlngstrobj(L, size);
286/// // setsvalue2s(L, L->top.p, ts); /* anchor it (loadVector can GC) */
287/// // luaD_inctop(L);
288/// // loadVector(S, getlngstr(ts), size);
289/// // L->top.p--;
290/// // }
291/// // luaC_objbarrier(L, p, ts);
292/// // return ts;
293/// // }
294/// ```
295///
296/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
297/// is the null-string sentinel. After reading `raw_size`, the actual byte
298/// count is `raw_size - 1`.
299///
300/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
301/// and then filled in-place via `getlngstr`. In Rust, GC anchoring is not
302/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
303/// then create the string.
304///
305/// TODO(port): `luaS_newlstr` interns the string (short strings only);
306/// `luaS_createlngstrobj` does NOT intern. Phase A uses `state.intern_str()`
307/// for both. Phase B should add a `state.create_long_str()` path that skips
308/// the intern table, matching C semantics.
309///
310/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
311/// for `luaC_objbarrier(L, p, ts)`. The barrier is a no-op in Phase A–C
312/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
313fn load_string_n(
314 s: &mut LoadState<'_>,
315 _proto: &LuaProto,
316) -> Result<Option<GcRef<LuaString>>, LuaError> {
317 let raw_size = load_size(s)?;
318 if raw_size == 0 {
319 return Ok(None);
320 }
321 let size = raw_size - 1;
322
323 // Read the raw bytes regardless of short/long distinction.
324 let mut buf = vec![0u8; size];
325
326 if size <= MAX_SHORT_LEN {
327 load_block(s, &mut buf)?;
328 } else {
329 load_block(s, &mut buf)?;
330 }
331
332 // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
333 // TODO(port): long strings should not be interned; see doc-comment above.
334 let ts = s.state.intern_str(&buf)?;
335
336 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
337 // (dropped — Phase A GC is Rc, no barrier needed)
338
339 Ok(Some(ts))
340}
341
342/// Load a non-nullable string; error if the stream encodes a null string.
343///
344/// # C source
345/// ```c
346///
347/// // TString *st = loadStringN(S, p);
348/// // if (st == NULL)
349/// // error(S, "bad format for constant string");
350/// // return st;
351/// // }
352/// ```
353fn load_string(
354 s: &mut LoadState<'_>,
355 proto: &LuaProto,
356) -> Result<GcRef<LuaString>, LuaError> {
357 match load_string_n(s, proto)? {
358 Some(ts) => Ok(ts),
359 None => Err(load_error(s, "bad format for constant string")),
360 }
361}
362
363// ── Proto-field loaders ────────────────────────────────────────────────────
364
365/// Load the bytecode instruction array into a prototype.
366///
367/// # C source
368/// ```c
369///
370/// // int n = loadInt(S);
371/// // f->code = luaM_newvectorchecked(S->L, n, Instruction);
372/// // f->sizecode = n;
373/// // loadVector(S, f->code, n);
374/// // }
375/// ```
376///
377/// PORT NOTE: `loadVector(S, f->code, n)` expands to
378/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
379/// We read each `u32` in native-endian order, consistent with how
380/// [`load_number`] and [`load_integer`] work.
381///
382/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
383/// (types.tsv: `Proto.sizecode → removed`).
384fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
385 let n = load_int(s)? as usize;
386 // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
387 // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
388 // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
389 let mut code = Vec::with_capacity(n);
390 for _ in 0..n {
391 let mut buf = [0u8; 4];
392 load_block(s, &mut buf)?;
393 // Instruction is a u32 newtype per types.tsv
394 code.push(Instruction(u32::from_ne_bytes(buf)));
395 }
396 f.code = code;
397 Ok(())
398}
399
400/// Load the constant pool into a prototype.
401///
402/// # C source
403/// ```c
404///
405/// // int i; int n = loadInt(S);
406/// // f->k = luaM_newvectorchecked(S->L, n, TValue);
407/// // f->sizek = n;
408/// // for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
409/// // for (i = 0; i < n; i++) {
410/// // TValue *o = &f->k[i];
411/// // int t = loadByte(S);
412/// // switch (t) {
413/// // case LUA_VNIL: setnilvalue(o); break;
414/// // case LUA_VFALSE: setbfvalue(o); break;
415/// // case LUA_VTRUE: setbtvalue(o); break;
416/// // case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
417/// // case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
418/// // case LUA_VSHRSTR:
419/// // case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
420/// // default: lua_assert(0);
421/// // }
422/// // }
423/// // }
424/// ```
425///
426/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
427/// safety in C. In Rust, `Vec` is always in a valid state; we skip it.
428fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
429 let n = load_int(s)? as usize;
430 // TODO(port): add overflow / OOM check.
431 let mut k = Vec::with_capacity(n);
432
433 // Dropped — Rust Vec elements are never uninitialized.
434
435 for _ in 0..n {
436 let t = load_byte(s)?;
437 let val = match t {
438 // macros.tsv: setnilvalue → *o = LuaValue::Nil
439 TAG_NIL => LuaValue::Nil,
440
441 // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
442 TAG_FALSE => LuaValue::Bool(false),
443
444 // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
445 TAG_TRUE => LuaValue::Bool(true),
446
447 // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
448 TAG_FLOAT => LuaValue::Float(load_number(s)?),
449
450 // macros.tsv: setivalue → *o = LuaValue::Int(x)
451 TAG_INT => LuaValue::Int(load_integer(s)?),
452
453 // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
454 TAG_SHORT_STR | TAG_LONG_STR => {
455 let ts = load_string(s, f)?;
456 LuaValue::Str(ts)
457 }
458
459 // macros.tsv: lua_assert → debug_assert!
460 _ => {
461 debug_assert!(false, "unknown constant type tag {:#04x}", t);
462 LuaValue::Nil
463 }
464 };
465 k.push(val);
466 }
467
468 f.k = k;
469 Ok(())
470}
471
472/// Load nested function prototypes into a prototype.
473///
474/// # C source
475/// ```c
476///
477/// // int i; int n = loadInt(S);
478/// // f->p = luaM_newvectorchecked(S->L, n, Proto *);
479/// // f->sizep = n;
480/// // for (i = 0; i < n; i++) f->p[i] = NULL;
481/// // for (i = 0; i < n; i++) {
482/// // f->p[i] = luaF_newproto(S->L);
483/// // luaC_objbarrier(S->L, f, f->p[i]);
484/// // loadFunction(S, f->p[i], f->source);
485/// // }
486/// // }
487/// ```
488///
489/// PORT NOTE: C creates the proto first (for GC anchor) then fills it. In
490/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
491/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
492fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
493 let n = load_int(s)? as usize;
494 // TODO(port): add overflow / OOM check.
495 let mut protos = Vec::with_capacity(n);
496
497
498 for _ in 0..n {
499 let mut sub = LuaProto::placeholder();
500
501 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
502
503 // Pass parent source as fallback.
504 let parent_source = f.source.clone();
505 load_function(s, &mut sub, parent_source)?;
506
507 // Wrap in GcRef after loading.
508 // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
509 // In Rust Phase A it becomes Rc<LuaProto>.
510 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
511 let sub_ref = GcRef::new(sub);
512 sub_ref.account_buffer(sub_ref.buffer_bytes() as isize);
513 protos.push(sub_ref);
514 }
515
516 f.p = protos;
517 Ok(())
518}
519
520/// Load upvalue descriptors into a prototype.
521///
522/// # C source
523/// ```c
524///
525/// // int i, n;
526/// // n = loadInt(S);
527/// // f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
528/// // f->sizeupvalues = n;
529/// // for (i = 0; i < n; i++)
530/// // f->upvalues[i].name = NULL; /* make array valid for GC */
531/// // for (i = 0; i < n; i++) {
532/// // f->upvalues[i].instack = loadByte(S);
533/// // f->upvalues[i].idx = loadByte(S);
534/// // f->upvalues[i].kind = loadByte(S);
535/// // }
536/// // }
537/// ```
538///
539/// PORT NOTE: The C comment says names must be filled first for GC safety.
540/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
541/// in [`load_debug`]. This requires `UpvalDesc.name` to be
542/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
543/// types.tsv. Phase B should reconcile the types.tsv entry.
544///
545/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
546fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
547 let n = load_int(s)? as usize;
548 // TODO(port): add overflow / OOM check.
549
550 // In Rust: construct with name = None.
551
552 let mut upvalues = Vec::with_capacity(n);
553 for _ in 0..n {
554 let instack_raw = load_byte(s)?;
555 let idx = load_byte(s)?;
556 let kind = load_byte(s)?;
557
558 // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
559 upvalues.push(UpvalDesc {
560 name: None, // filled by load_debug
561 instack: instack_raw != 0,
562 idx,
563 kind,
564 });
565 }
566
567 f.upvalues = upvalues;
568 Ok(())
569}
570
571/// Load debug information into a prototype.
572///
573/// # C source
574/// ```c
575///
576/// // int i, n;
577/// // n = loadInt(S);
578/// // f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
579/// // f->sizelineinfo = n;
580/// // loadVector(S, f->lineinfo, n);
581/// // n = loadInt(S);
582/// // f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
583/// // f->sizeabslineinfo = n;
584/// // for (i = 0; i < n; i++) {
585/// // f->abslineinfo[i].pc = loadInt(S);
586/// // f->abslineinfo[i].line = loadInt(S);
587/// // }
588/// // n = loadInt(S);
589/// // f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
590/// // f->sizelocvars = n;
591/// // for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
592/// // for (i = 0; i < n; i++) {
593/// // f->locvars[i].varname = loadStringN(S, f);
594/// // f->locvars[i].startpc = loadInt(S);
595/// // f->locvars[i].endpc = loadInt(S);
596/// // }
597/// // n = loadInt(S);
598/// // if (n != 0) /* does it have debug information? */
599/// // n = f->sizeupvalues; /* must be this many */
600/// // for (i = 0; i < n; i++)
601/// // f->upvalues[i].name = loadStringN(S, f);
602/// // }
603/// ```
604///
605/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
606/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
607/// We read them as `u8` then reinterpret as `i8` via cast.
608///
609/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
610/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
611///
612/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
613/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
614/// See also the note on [`load_upvalues`].
615fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
616 let n = load_int(s)? as usize;
617 let mut lineinfo = vec![0i8; n];
618 // Read as u8 slice then cast — safe because i8 and u8 have the same
619 // in-memory representation and we're casting a byte from the binary stream.
620 // SAFETY(port): this would need `unsafe` for the slice transmute in real
621 // code; for Phase A we read byte-by-byte.
622 // TODO(port): replace the loop with a single load_block into a u8 buffer
623 // followed by an i8 transmute in Phase B (or use bytemuck).
624 for item in lineinfo.iter_mut() {
625 *item = load_byte(s)? as i8;
626 }
627 f.lineinfo = lineinfo;
628
629 let n = load_int(s)? as usize;
630 let mut abslineinfo = Vec::with_capacity(n);
631 for _ in 0..n {
632 abslineinfo.push(AbsLineInfo {
633 pc: load_int(s)?,
634 line: load_int(s)?,
635 });
636 }
637 f.abslineinfo = abslineinfo;
638
639 let n = load_int(s)? as usize;
640
641 let mut locvars = Vec::with_capacity(n);
642 for _ in 0..n {
643 let varname = load_string_n(s, f)?;
644 let startpc = load_int(s)?;
645 let endpc = load_int(s)?;
646 let varname = match varname {
647 Some(v) => v,
648 None => s.state.new_string(b"")?,
649 };
650 locvars.push(LocalVar { varname, startpc, endpc });
651 }
652 f.locvars = locvars;
653
654 // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
655 let has_names = load_int(s)?;
656 if has_names != 0 {
657 let n_upvals = f.upvalues.len();
658 for i in 0..n_upvals {
659 let name = load_string_n(s, f)?;
660 f.upvalues[i].name = name;
661 }
662 }
663
664 Ok(())
665}
666
667// ── Function loader ────────────────────────────────────────────────────────
668
669/// Load a complete function prototype from the stream.
670///
671/// # C source
672/// ```c
673///
674/// // f->source = loadStringN(S, f);
675/// // if (f->source == NULL) f->source = psource;
676/// // f->linedefined = loadInt(S);
677/// // f->lastlinedefined = loadInt(S);
678/// // f->numparams = loadByte(S);
679/// // f->is_vararg = loadByte(S);
680/// // f->maxstacksize = loadByte(S);
681/// // loadCode(S, f);
682/// // loadConstants(S, f);
683/// // loadUpvalues(S, f);
684/// // loadProtos(S, f);
685/// // loadDebug(S, f);
686/// // }
687/// ```
688///
689/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
690/// the top-level call passes `NULL` (mapped to `None`). `f->source` in `LuaProto`
691/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
692/// `Option<GcRef<LuaString>>` to express "inherited from parent". Phase B
693/// should align types.tsv or add a dedicated `Option` wrapper there.
694///
695/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
696/// types.tsv. We read the raw byte and convert to `bool` via `!= 0`.
697fn load_function(
698 s: &mut LoadState<'_>,
699 f: &mut LuaProto,
700 psource: Option<GcRef<LuaString>>,
701) -> Result<(), LuaError> {
702 let source = load_string_n(s, f)?;
703 f.source = source.or(psource);
704
705 f.linedefined = load_int(s)?;
706 f.lastlinedefined = load_int(s)?;
707 f.numparams = load_byte(s)?;
708 // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
709 f.is_vararg = load_byte(s)? != 0;
710 f.maxstacksize = load_byte(s)?;
711 load_code(s, f)?;
712 reconstruct_vararg_table_reg(f);
713 load_constants(s, f)?;
714 load_upvalues(s, f)?;
715 load_protos(s, f)?;
716 load_debug(s, f)?;
717
718 Ok(())
719}
720
721/// Recover `LuaProto.vararg_table_reg` from the loaded bytecode instead of from
722/// the wire format, so a precompiled chunk keeps Lua 5.5 named-vararg aliasing
723/// (`function f(...t)`) without lua-rs's `string.dump` output diverging from
724/// C's bytecode layout (which the structural oracle compares).
725///
726/// A named-vararg function emits exactly one `OP_VARARGPACK` (opcode 84) at
727/// entry; its A operand is the register holding the shared vararg table. The
728/// opcode occupies the low 7 bits of the instruction word and A the next 8.
729fn reconstruct_vararg_table_reg(f: &mut LuaProto) {
730 const OP_VARARGPACK: u32 = 84;
731 const OPCODE_MASK: u32 = 0x7F;
732 f.vararg_table_reg = f.code.iter().find_map(|inst| {
733 let raw = inst.raw();
734 (raw & OPCODE_MASK == OP_VARARGPACK).then(|| ((raw >> 7) & 0xFF) as u8)
735 });
736}
737
738// ── Header validation ──────────────────────────────────────────────────────
739
740/// Verify that the next `expected.len()` bytes in the stream match `expected`.
741///
742/// # C source
743/// ```c
744///
745/// // char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
746/// // size_t len = strlen(s);
747/// // loadVector(S, buff, len);
748/// // if (memcmp(s, buff, len) != 0)
749/// // error(S, msg);
750/// // }
751/// ```
752///
753/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
754/// `memcmp` becomes slice equality.
755fn check_literal(
756 s: &mut LoadState<'_>,
757 expected: &[u8],
758 msg: &'static str,
759) -> Result<(), LuaError> {
760 let mut buf = vec![0u8; expected.len()];
761 load_block(s, &mut buf)?;
762 if buf != expected {
763 return Err(load_error(s, msg));
764 }
765 Ok(())
766}
767
768/// Verify that the next byte in the stream equals `expected_size`.
769///
770/// # C source
771/// ```c
772///
773/// // if (loadByte(S) != size)
774/// // error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
775/// // }
776/// ```
777///
778/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
779/// a throw site. We inline the message directly. `tname` is always a Rust
780/// type-name string literal (ASCII) from the call sites; using `&'static str`
781/// is appropriate here (not Lua data).
782fn fcheck_size(
783 s: &mut LoadState<'_>,
784 expected_size: usize,
785 tname: &'static str,
786) -> Result<(), LuaError> {
787 let b = load_byte(s)? as usize;
788 if b != expected_size {
789 // PORT NOTE: We build the error message inline rather than using
790 // luaO_pushfstring to avoid a stack push just for error formatting.
791 // TODO(port): include `tname` in the error message once LuaError::syntax
792 // supports composing byte-string and &str fragments.
793 return Err(LuaError::syntax(format_args!(
794 "{} size mismatch",
795 tname
796 )));
797 }
798 Ok(())
799}
800
801/// Validate the binary chunk header.
802///
803/// # C source
804/// ```c
805///
806/// // checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
807/// // if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
808/// // if (loadByte(S) != LUAC_FORMAT) error(S, "format mismatch");
809/// // checkliteral(S, LUAC_DATA, "corrupted chunk");
810/// // checksize(S, Instruction);
811/// // checksize(S, lua_Integer);
812/// // checksize(S, lua_Number);
813/// // if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
814/// // if (loadNumber(S) != LUAC_NUM) error(S, "float format mismatch");
815/// // }
816/// ```
817///
818/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
819/// We emit the three concrete sizes inline.
820/// - `sizeof(Instruction)` = 4 (u32)
821/// - `sizeof(lua_Integer)` = 8 (i64)
822/// - `sizeof(lua_Number)` = 8 (f64)
823///
824/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
825/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
826/// of the signature (`"Lua"`).
827fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
828 // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
829 check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
830
831 let ver = load_byte(s)?;
832 if ver != LUAC_VERSION {
833 return Err(load_error(s, "version mismatch"));
834 }
835
836 let fmt = load_byte(s)?;
837 if fmt != LUAC_FORMAT {
838 return Err(load_error(s, "format mismatch"));
839 }
840
841 check_literal(s, LUAC_DATA, "corrupted chunk")?;
842
843 fcheck_size(s, 4, "Instruction")?;
844
845 fcheck_size(s, 8, "lua_Integer")?;
846
847 fcheck_size(s, 8, "lua_Number")?;
848
849 let int_check = load_integer(s)?;
850 if int_check != LUAC_INT {
851 return Err(load_error(s, "integer format mismatch"));
852 }
853
854 let num_check = load_number(s)?;
855 if num_check != LUAC_NUM {
856 return Err(load_error(s, "float format mismatch"));
857 }
858
859 Ok(())
860}
861
862// ── Public entry point ─────────────────────────────────────────────────────
863
864/// Load a precompiled Lua chunk and return the top-level Lua closure.
865///
866/// This is the Rust equivalent of `luaU_undump` — the single public function
867/// exported by `lundump.c`.
868///
869/// # C source
870/// ```c
871///
872/// // LoadState S;
873/// // LClosure *cl;
874/// // if (*name == '@' || *name == '=')
875/// // S.name = name + 1;
876/// // else if (*name == LUA_SIGNATURE[0])
877/// // S.name = "binary string";
878/// // else
879/// // S.name = name;
880/// // S.L = L; S.Z = Z;
881/// // checkHeader(&S);
882/// // cl = luaF_newLclosure(L, loadByte(&S));
883/// // setclLvalue2s(L, L->top.p, cl);
884/// // luaD_inctop(L);
885/// // cl->p = luaF_newproto(L);
886/// // luaC_objbarrier(L, cl, cl->p);
887/// // loadFunction(&S, cl->p, NULL);
888/// // lua_assert(cl->nupvalues == cl->p->sizeupvalues);
889/// // luai_verifycode(L, cl->p);
890/// // return cl;
891/// // }
892/// ```
893///
894/// # Parameters
895/// - `state` — the Lua thread state.
896/// - `z` — input stream positioned at the start of the binary chunk
897/// (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
898/// - `name` — chunk name for error messages. Stripped per Lua convention:
899/// - `@…` → filename (strip `@`)
900/// - `=…` → literal name (strip `=`)
901/// - starts with `\x1b` → `"binary string"`
902/// - otherwise used as-is.
903///
904/// PORT NOTE: The C function returns `LClosure *`. In Rust we return
905/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`). The
906/// closure is also pushed onto the stack for GC anchoring, matching the C
907/// behaviour (`setclLvalue2s + luaD_inctop`). The caller is responsible for
908/// popping it when done (consistent with C).
909///
910/// PORT NOTE: `luai_verifycode` is a no-op in the default build
911/// (`#define luai_verifycode(L,f) /* empty */`); dropped here.
912///
913/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
914/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
915/// assertion becomes `debug_assert_eq!`.
916pub(crate) fn undump(
917 state: &mut LuaState,
918 z: &mut ZIO,
919 _name: &[u8],
920) -> Result<GcRef<LuaLClosure>, LuaError> {
921 let mut s = LoadState {
922 state,
923 z,
924 };
925
926 check_header(&mut s)?;
927
928 // loadByte(&S) reads the number of upvalues for the top-level closure.
929 let nupvalues = load_byte(&mut s)?;
930 // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
931 // upvalue slots. In Rust Phase A we construct the struct directly; the
932 // GcRef wrapping happens after the proto is loaded.
933 // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
934 // once lfunc.rs is translated and the API is settled.
935 let mut cl = LuaLClosure::placeholder();
936 let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
937 for _ in 0..nupvalues as usize {
938 upvals_vec.push(std::cell::Cell::new(s.state.new_upval_closed(LuaValue::Nil)));
939 }
940 cl.upvals = upvals_vec;
941
942 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
943 // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
944 // PORT NOTE: We push a placeholder Nil first; the real closure value is
945 // set after the proto is loaded. This mirrors the C "anchor for GC"
946 // pattern. In Phase A-C GC anchoring via the stack is not strictly
947 // necessary (Rc keeps things alive) but we preserve the stack discipline
948 // for behavioural parity.
949 // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
950 // the real value here instead of a placeholder.
951 s.state.push(LuaValue::Nil); // placeholder; replaced below
952
953 let mut proto = LuaProto::placeholder();
954
955 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
956
957 load_function(&mut s, &mut proto, None)?;
958
959 // Wrap the proto in a GcRef and attach it to the closure.
960 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
961 let proto_ref = GcRef::new(proto);
962 proto_ref.account_buffer(proto_ref.buffer_bytes() as isize);
963
964 // macros.tsv: lua_assert → debug_assert!
965 // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
966 debug_assert_eq!(
967 nupvalues as usize,
968 proto_ref.upvalues.len(),
969 "upvalue count mismatch between closure header and prototype"
970 );
971
972 // The macro is defined as `/* empty */` in the default build; dropped.
973
974 // Attach the loaded proto to the closure.
975 cl.proto = proto_ref;
976
977 // Wrap the closure in GcRef.
978 // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
979 let cl_ref = GcRef::new(cl);
980 cl_ref.account_buffer(cl_ref.buffer_bytes() as isize);
981
982 // Replace the stack placeholder with the real closure value.
983 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
984 // TODO(port): replace the placeholder at the correct stack slot.
985 // For now the top slot holds Nil; Phase B must fix this once
986 // GcRef<LuaLClosure> → LuaValue conversion is defined.
987 // TODO(port): update the stack slot pushed above with the real cl_ref value.
988
989 Ok(cl_ref)
990}
991
992// ──────────────────────────────────────────────────────────────────────────
993// PORT STATUS
994// source: src/lundump.c (335 lines, 20 functions/items)
995// src/lundump.h (35 lines, merged)
996// target_crate: lua-vm
997// confidence: medium
998// todos: 15
999// port_notes: 39
1000// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
1001// notes: Logic is faithful to the C. The main open items for Phase B
1002// are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
1003// (2) LuaError::syntax byte-string formatting for the chunk
1004// name in load_error; (3) long-string vs short-string intern
1005// distinction in load_string_n; (4) the stack placeholder in
1006// undump must be replaced with the real GcRef<LuaLClosure>
1007// value once LuaValue conversion is defined; (5) UpvalDesc.name
1008// and LocalVar.varname need Option<GcRef<LuaString>> in the
1009// proto type to match the two-pass load order here.
1010// ──────────────────────────────────────────────────────────────────────────