lua_vm/undump.rs
1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B. These are best-guess paths based on other translated files.
11use crate::state::LuaState;
12#[allow(unused_imports)] use crate::prelude::*;
13use crate::zio::ZIO;
14use lua_types::error::LuaError;
15use lua_types::value::LuaValue;
16
17// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
18// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
19// crates. All paths below are provisional for Phase A.
20// TODO(port): confirm concrete module paths for all GC types in Phase B.
21use lua_types::proto::{LuaProto, UpvalDesc, LocalVar, AbsLineInfo};
22use lua_types::closure::{LuaClosure, LuaLClosure};
23use lua_types::upval::UpVal;
24use lua_types::string::LuaString;
25use lua_types::gc::GcRef;
26use lua_types::opcode::Instruction;
27
28// ── Constants (from lundump.h) ─────────────────────────────────────────────
29
30/// Six-byte data marker in the chunk header used to catch conversion errors.
31const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
32
33/// Reference integer written in the header to detect integer endianness/size
34/// mismatches.
35const LUAC_INT: i64 = 0x5678;
36
37// macros.tsv: cast_num → x as f64
38/// Reference float written in the header to detect float format mismatches.
39const LUAC_NUM: f64 = 370.5;
40
41// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
42/// One-byte version tag: upper nibble = major, lower nibble = minor.
43const LUAC_VERSION: u8 = 0x54;
44
45const LUAC_FORMAT: u8 = 0;
46
47const LUA_SIGNATURE: &[u8] = b"\x1bLua";
48
49// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
50const MAX_SHORT_LEN: usize = 40;
51
52// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
53//
54// These are the byte values written by ldump.c into the constants array.
55// makevariant(t, v) = t | (v << 4).
56//
57// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
58// values* used in the binary format are the raw tag integers from lobject.h.
59// We define them here as u8 constants so the match in load_constants is
60// self-documenting.
61
62const TAG_NIL: u8 = 0x00;
63const TAG_FALSE: u8 = 0x01;
64const TAG_TRUE: u8 = 0x11;
65const TAG_INT: u8 = 0x03;
66const TAG_FLOAT: u8 = 0x13;
67const TAG_SHORT_STR: u8 = 0x04;
68const TAG_LONG_STR: u8 = 0x14;
69
70// ── LoadState ──────────────────────────────────────────────────────────────
71
72/// Loader state bundled for convenience: Lua state, input stream, and the
73/// chunk name used in error messages.
74///
75/// # C mapping
76/// ```c
77///
78/// ```
79///
80/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
81/// In Rust these become references with a shared lifetime `'a`. The struct is
82/// always stack-allocated inside [`undump`] and never escapes the call.
83struct LoadState<'a> {
84 state: &'a mut LuaState,
85 z: &'a mut ZIO,
86 // PORT NOTE: C uses const char * (a C string). In Rust we own a Vec<u8>
87 // because the name slice may be a sub-slice of the caller's &[u8].
88 name: Vec<u8>,
89}
90
91// ── Error helper ───────────────────────────────────────────────────────────
92
93/// Build a syntax error for a malformed binary chunk.
94///
95/// # C source
96/// ```c
97///
98/// // luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
99/// // luaD_throw(S->L, LUA_ERRSYNTAX);
100/// // }
101/// ```
102///
103/// PORT NOTE: `l_noret` in C (diverges via `longjmp`). In Rust we return
104/// `LuaError` and the caller does `return Err(load_error(...))`. The C
105/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
106/// single `LuaError::syntax` per error_sites.tsv.
107///
108/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
109/// which requires an `std::fmt::Display` implementor. `Vec<u8>` does not
110/// implement `Display`. Phase B should add a byte-string formatting path to
111/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
112/// in the message.
113fn load_error(s: &LoadState<'_>, why: &'static str) -> LuaError {
114 // error_sites.tsv: luaD_throw(L, LUA_ERRSYNTAX) → LuaError::syntax(...)
115 LuaError::syntax(format_args!("bad binary format ({})", why))
116}
117
118// ── Low-level I/O ──────────────────────────────────────────────────────────
119
120/// Read exactly `buf.len()` bytes from the stream into `buf`.
121///
122/// # C source
123/// ```c
124///
125/// // if (luaZ_read(S->Z, b, size) != 0)
126/// // error(S, "truncated chunk");
127/// // }
128/// ```
129///
130/// PORT NOTE: C takes `void *b` + explicit `size`. In Rust we use `&mut [u8]`
131/// whose length encodes the byte count. `luaZ_read` returns the number of
132/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
133fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
134 // macros.tsv: luaZ_read → z.read(buf) (returns usize unread)
135 if s.z.read(buf) != 0 {
136 return Err(load_error(s, "truncated chunk"));
137 }
138 Ok(())
139}
140
141/// Read a single byte from the stream.
142///
143/// # C source
144/// ```c
145///
146/// // int b = zgetc(S->Z);
147/// // if (b == EOZ)
148/// // error(S, "truncated chunk");
149/// // return cast_byte(b);
150/// // }
151/// ```
152///
153/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
154fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
155 // macros.tsv: zgetc → z.getc() returning i32
156 let b = s.z.getc();
157 if b == crate::zio::EOZ {
158 return Err(load_error(s, "truncated chunk"));
159 }
160 // macros.tsv: cast_byte → x as u8
161 Ok(b as u8)
162}
163
164/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
165/// MSB-first continuation flag).
166///
167/// # C source
168/// ```c
169///
170/// // size_t x = 0;
171/// // int b;
172/// // limit >>= 7;
173/// // do {
174/// // b = loadByte(S);
175/// // if (x >= limit)
176/// // error(S, "integer overflow");
177/// // x = (x << 7) | (b & 0x7f);
178/// // } while ((b & 0x80) == 0);
179/// // return x;
180/// // }
181/// ```
182///
183/// PORT NOTE: The encoding terminates when a byte with the high bit set is
184/// seen (the *last* byte has bit 7 = 1). That is the opposite of the more
185/// common LEB128 where the continuation bit means "more follows".
186fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
187 let mut x: usize = 0;
188 let limit = limit >> 7;
189 loop {
190 let b = load_byte(s)? as usize;
191 if x >= limit {
192 return Err(load_error(s, "integer overflow"));
193 }
194 x = (x << 7) | (b & 0x7f);
195 if (b & 0x80) != 0 {
196 break;
197 }
198 }
199 Ok(x)
200}
201
202/// Read a `size_t`-sized unsigned value.
203///
204/// # C source
205/// ```c
206///
207/// // return loadUnsigned(S, MAX_SIZET);
208/// // }
209/// ```
210///
211/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
212fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
213 // macros.tsv: MAX_SIZET → usize::MAX
214 load_unsigned(s, usize::MAX)
215}
216
217/// Read a signed `int`-sized value.
218///
219/// # C source
220/// ```c
221///
222/// // return cast_int(loadUnsigned(S, INT_MAX));
223/// // }
224/// ```
225///
226/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv. `INT_MAX` → `i32::MAX
227/// as usize`.
228fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
229 // macros.tsv: cast_int → x as i32
230 let v = load_unsigned(s, i32::MAX as usize)?;
231 Ok(v as i32)
232}
233
234/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
235///
236/// # C source
237/// ```c
238///
239/// // lua_Number x;
240/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
241/// // return x;
242/// // }
243/// ```
244///
245/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
246/// into the value. In Rust we use `f64::from_ne_bytes` (native endian) to
247/// reconstruct the value from the eight bytes. The binary format is host-
248/// endian for these fields; the header check verifies endianness compatibility
249/// via `LUAC_INT` and `LUAC_NUM` sentinels.
250fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
251 let mut buf = [0u8; 8];
252 load_block(s, &mut buf)?;
253 // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
254 Ok(f64::from_ne_bytes(buf))
255}
256
257/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
258///
259/// # C source
260/// ```c
261///
262/// // lua_Integer x;
263/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
264/// // return x;
265/// // }
266/// ```
267///
268/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
269fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
270 let mut buf = [0u8; 8];
271 load_block(s, &mut buf)?;
272 Ok(i64::from_ne_bytes(buf))
273}
274
275// ── String loading ─────────────────────────────────────────────────────────
276
277/// Load a nullable string. Returns `None` if the stored size is zero.
278///
279/// # C source
280/// ```c
281///
282/// // lua_State *L = S->L;
283/// // TString *ts;
284/// // size_t size = loadSize(S);
285/// // if (size == 0) return NULL;
286/// // else if (--size <= LUAI_MAXSHORTLEN) { /* short string? */
287/// // char buff[LUAI_MAXSHORTLEN];
288/// // loadVector(S, buff, size);
289/// // ts = luaS_newlstr(L, buff, size);
290/// // } else { /* long string */
291/// // ts = luaS_createlngstrobj(L, size);
292/// // setsvalue2s(L, L->top.p, ts); /* anchor it (loadVector can GC) */
293/// // luaD_inctop(L);
294/// // loadVector(S, getlngstr(ts), size);
295/// // L->top.p--;
296/// // }
297/// // luaC_objbarrier(L, p, ts);
298/// // return ts;
299/// // }
300/// ```
301///
302/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
303/// is the null-string sentinel. After reading `raw_size`, the actual byte
304/// count is `raw_size - 1`.
305///
306/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
307/// and then filled in-place via `getlngstr`. In Rust, GC anchoring is not
308/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
309/// then create the string.
310///
311/// TODO(port): `luaS_newlstr` interns the string (short strings only);
312/// `luaS_createlngstrobj` does NOT intern. Phase A uses `state.intern_str()`
313/// for both. Phase B should add a `state.create_long_str()` path that skips
314/// the intern table, matching C semantics.
315///
316/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
317/// for `luaC_objbarrier(L, p, ts)`. The barrier is a no-op in Phase A–C
318/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
319fn load_string_n(
320 s: &mut LoadState<'_>,
321 _proto: &LuaProto,
322) -> Result<Option<GcRef<LuaString>>, LuaError> {
323 let raw_size = load_size(s)?;
324 if raw_size == 0 {
325 return Ok(None);
326 }
327 let size = raw_size - 1;
328
329 // Read the raw bytes regardless of short/long distinction.
330 let mut buf = vec![0u8; size];
331
332 if size <= MAX_SHORT_LEN {
333 load_block(s, &mut buf)?;
334 } else {
335 load_block(s, &mut buf)?;
336 }
337
338 // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
339 // TODO(port): long strings should not be interned; see doc-comment above.
340 let ts = s.state.intern_str(&buf)?;
341
342 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
343 // (dropped — Phase A GC is Rc, no barrier needed)
344
345 Ok(Some(ts))
346}
347
348/// Load a non-nullable string; error if the stream encodes a null string.
349///
350/// # C source
351/// ```c
352///
353/// // TString *st = loadStringN(S, p);
354/// // if (st == NULL)
355/// // error(S, "bad format for constant string");
356/// // return st;
357/// // }
358/// ```
359fn load_string(
360 s: &mut LoadState<'_>,
361 proto: &LuaProto,
362) -> Result<GcRef<LuaString>, LuaError> {
363 match load_string_n(s, proto)? {
364 Some(ts) => Ok(ts),
365 None => Err(load_error(s, "bad format for constant string")),
366 }
367}
368
369// ── Proto-field loaders ────────────────────────────────────────────────────
370
371/// Load the bytecode instruction array into a prototype.
372///
373/// # C source
374/// ```c
375///
376/// // int n = loadInt(S);
377/// // f->code = luaM_newvectorchecked(S->L, n, Instruction);
378/// // f->sizecode = n;
379/// // loadVector(S, f->code, n);
380/// // }
381/// ```
382///
383/// PORT NOTE: `loadVector(S, f->code, n)` expands to
384/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
385/// We read each `u32` in native-endian order, consistent with how
386/// [`load_number`] and [`load_integer`] work.
387///
388/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
389/// (types.tsv: `Proto.sizecode → removed`).
390fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
391 let n = load_int(s)? as usize;
392 // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
393 // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
394 // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
395 let mut code = Vec::with_capacity(n);
396 for _ in 0..n {
397 let mut buf = [0u8; 4];
398 load_block(s, &mut buf)?;
399 // Instruction is a u32 newtype per types.tsv
400 code.push(Instruction(u32::from_ne_bytes(buf)));
401 }
402 f.code = code;
403 Ok(())
404}
405
406/// Load the constant pool into a prototype.
407///
408/// # C source
409/// ```c
410///
411/// // int i; int n = loadInt(S);
412/// // f->k = luaM_newvectorchecked(S->L, n, TValue);
413/// // f->sizek = n;
414/// // for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
415/// // for (i = 0; i < n; i++) {
416/// // TValue *o = &f->k[i];
417/// // int t = loadByte(S);
418/// // switch (t) {
419/// // case LUA_VNIL: setnilvalue(o); break;
420/// // case LUA_VFALSE: setbfvalue(o); break;
421/// // case LUA_VTRUE: setbtvalue(o); break;
422/// // case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
423/// // case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
424/// // case LUA_VSHRSTR:
425/// // case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
426/// // default: lua_assert(0);
427/// // }
428/// // }
429/// // }
430/// ```
431///
432/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
433/// safety in C. In Rust, `Vec` is always in a valid state; we skip it.
434fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
435 let n = load_int(s)? as usize;
436 // TODO(port): add overflow / OOM check.
437 let mut k = Vec::with_capacity(n);
438
439 // Dropped — Rust Vec elements are never uninitialized.
440
441 for _ in 0..n {
442 let t = load_byte(s)?;
443 let val = match t {
444 // macros.tsv: setnilvalue → *o = LuaValue::Nil
445 TAG_NIL => LuaValue::Nil,
446
447 // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
448 TAG_FALSE => LuaValue::Bool(false),
449
450 // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
451 TAG_TRUE => LuaValue::Bool(true),
452
453 // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
454 TAG_FLOAT => LuaValue::Float(load_number(s)?),
455
456 // macros.tsv: setivalue → *o = LuaValue::Int(x)
457 TAG_INT => LuaValue::Int(load_integer(s)?),
458
459 // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
460 TAG_SHORT_STR | TAG_LONG_STR => {
461 let ts = load_string(s, f)?;
462 LuaValue::Str(ts)
463 }
464
465 // macros.tsv: lua_assert → debug_assert!
466 _ => {
467 debug_assert!(false, "unknown constant type tag {:#04x}", t);
468 LuaValue::Nil
469 }
470 };
471 k.push(val);
472 }
473
474 f.k = k;
475 Ok(())
476}
477
478/// Load nested function prototypes into a prototype.
479///
480/// # C source
481/// ```c
482///
483/// // int i; int n = loadInt(S);
484/// // f->p = luaM_newvectorchecked(S->L, n, Proto *);
485/// // f->sizep = n;
486/// // for (i = 0; i < n; i++) f->p[i] = NULL;
487/// // for (i = 0; i < n; i++) {
488/// // f->p[i] = luaF_newproto(S->L);
489/// // luaC_objbarrier(S->L, f, f->p[i]);
490/// // loadFunction(S, f->p[i], f->source);
491/// // }
492/// // }
493/// ```
494///
495/// PORT NOTE: C creates the proto first (for GC anchor) then fills it. In
496/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
497/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
498fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
499 let n = load_int(s)? as usize;
500 // TODO(port): add overflow / OOM check.
501 let mut protos = Vec::with_capacity(n);
502
503
504 for _ in 0..n {
505 let mut sub = LuaProto::placeholder();
506
507 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
508
509 // Pass parent source as fallback.
510 let parent_source = f.source.clone();
511 load_function(s, &mut sub, parent_source)?;
512
513 // Wrap in GcRef after loading.
514 // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
515 // In Rust Phase A it becomes Rc<LuaProto>.
516 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
517 protos.push(GcRef::new(sub));
518 }
519
520 f.p = protos;
521 Ok(())
522}
523
524/// Load upvalue descriptors into a prototype.
525///
526/// # C source
527/// ```c
528///
529/// // int i, n;
530/// // n = loadInt(S);
531/// // f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
532/// // f->sizeupvalues = n;
533/// // for (i = 0; i < n; i++)
534/// // f->upvalues[i].name = NULL; /* make array valid for GC */
535/// // for (i = 0; i < n; i++) {
536/// // f->upvalues[i].instack = loadByte(S);
537/// // f->upvalues[i].idx = loadByte(S);
538/// // f->upvalues[i].kind = loadByte(S);
539/// // }
540/// // }
541/// ```
542///
543/// PORT NOTE: The C comment says names must be filled first for GC safety.
544/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
545/// in [`load_debug`]. This requires `UpvalDesc.name` to be
546/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
547/// types.tsv. Phase B should reconcile the types.tsv entry.
548///
549/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
550fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
551 let n = load_int(s)? as usize;
552 // TODO(port): add overflow / OOM check.
553
554 // In Rust: construct with name = None.
555
556 let mut upvalues = Vec::with_capacity(n);
557 for _ in 0..n {
558 let instack_raw = load_byte(s)?;
559 let idx = load_byte(s)?;
560 let kind = load_byte(s)?;
561
562 // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
563 upvalues.push(UpvalDesc {
564 name: None, // filled by load_debug
565 instack: instack_raw != 0,
566 idx,
567 kind,
568 });
569 }
570
571 f.upvalues = upvalues;
572 Ok(())
573}
574
575/// Load debug information into a prototype.
576///
577/// # C source
578/// ```c
579///
580/// // int i, n;
581/// // n = loadInt(S);
582/// // f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
583/// // f->sizelineinfo = n;
584/// // loadVector(S, f->lineinfo, n);
585/// // n = loadInt(S);
586/// // f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
587/// // f->sizeabslineinfo = n;
588/// // for (i = 0; i < n; i++) {
589/// // f->abslineinfo[i].pc = loadInt(S);
590/// // f->abslineinfo[i].line = loadInt(S);
591/// // }
592/// // n = loadInt(S);
593/// // f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
594/// // f->sizelocvars = n;
595/// // for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
596/// // for (i = 0; i < n; i++) {
597/// // f->locvars[i].varname = loadStringN(S, f);
598/// // f->locvars[i].startpc = loadInt(S);
599/// // f->locvars[i].endpc = loadInt(S);
600/// // }
601/// // n = loadInt(S);
602/// // if (n != 0) /* does it have debug information? */
603/// // n = f->sizeupvalues; /* must be this many */
604/// // for (i = 0; i < n; i++)
605/// // f->upvalues[i].name = loadStringN(S, f);
606/// // }
607/// ```
608///
609/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
610/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
611/// We read them as `u8` then reinterpret as `i8` via cast.
612///
613/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
614/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
615///
616/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
617/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
618/// See also the note on [`load_upvalues`].
619fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
620 let n = load_int(s)? as usize;
621 let mut lineinfo = vec![0i8; n];
622 // Read as u8 slice then cast — safe because i8 and u8 have the same
623 // in-memory representation and we're casting a byte from the binary stream.
624 // SAFETY(port): this would need `unsafe` for the slice transmute in real
625 // code; for Phase A we read byte-by-byte.
626 // TODO(port): replace the loop with a single load_block into a u8 buffer
627 // followed by an i8 transmute in Phase B (or use bytemuck).
628 for item in lineinfo.iter_mut() {
629 *item = load_byte(s)? as i8;
630 }
631 f.lineinfo = lineinfo;
632
633 let n = load_int(s)? as usize;
634 let mut abslineinfo = Vec::with_capacity(n);
635 for _ in 0..n {
636 abslineinfo.push(AbsLineInfo {
637 pc: load_int(s)?,
638 line: load_int(s)?,
639 });
640 }
641 f.abslineinfo = abslineinfo;
642
643 let n = load_int(s)? as usize;
644
645 let mut locvars = Vec::with_capacity(n);
646 for _ in 0..n {
647 let varname = load_string_n(s, f)?;
648 let startpc = load_int(s)?;
649 let endpc = load_int(s)?;
650 let varname = match varname {
651 Some(v) => v,
652 None => s.state.new_string(b"")?,
653 };
654 locvars.push(LocalVar { varname, startpc, endpc });
655 }
656 f.locvars = locvars;
657
658 // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
659 let has_names = load_int(s)?;
660 if has_names != 0 {
661 let n_upvals = f.upvalues.len();
662 for i in 0..n_upvals {
663 let name = load_string_n(s, f)?;
664 f.upvalues[i].name = name;
665 }
666 }
667
668 Ok(())
669}
670
671// ── Function loader ────────────────────────────────────────────────────────
672
673/// Load a complete function prototype from the stream.
674///
675/// # C source
676/// ```c
677///
678/// // f->source = loadStringN(S, f);
679/// // if (f->source == NULL) f->source = psource;
680/// // f->linedefined = loadInt(S);
681/// // f->lastlinedefined = loadInt(S);
682/// // f->numparams = loadByte(S);
683/// // f->is_vararg = loadByte(S);
684/// // f->maxstacksize = loadByte(S);
685/// // loadCode(S, f);
686/// // loadConstants(S, f);
687/// // loadUpvalues(S, f);
688/// // loadProtos(S, f);
689/// // loadDebug(S, f);
690/// // }
691/// ```
692///
693/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
694/// the top-level call passes `NULL` (mapped to `None`). `f->source` in `LuaProto`
695/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
696/// `Option<GcRef<LuaString>>` to express "inherited from parent". Phase B
697/// should align types.tsv or add a dedicated `Option` wrapper there.
698///
699/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
700/// types.tsv. We read the raw byte and convert to `bool` via `!= 0`.
701fn load_function(
702 s: &mut LoadState<'_>,
703 f: &mut LuaProto,
704 psource: Option<GcRef<LuaString>>,
705) -> Result<(), LuaError> {
706 let source = load_string_n(s, f)?;
707 f.source = source.or(psource);
708
709 f.linedefined = load_int(s)?;
710 f.lastlinedefined = load_int(s)?;
711 f.numparams = load_byte(s)?;
712 // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
713 f.is_vararg = load_byte(s)? != 0;
714 f.maxstacksize = load_byte(s)?;
715 load_code(s, f)?;
716 load_constants(s, f)?;
717 load_upvalues(s, f)?;
718 load_protos(s, f)?;
719 load_debug(s, f)?;
720
721 Ok(())
722}
723
724// ── Header validation ──────────────────────────────────────────────────────
725
726/// Verify that the next `expected.len()` bytes in the stream match `expected`.
727///
728/// # C source
729/// ```c
730///
731/// // char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
732/// // size_t len = strlen(s);
733/// // loadVector(S, buff, len);
734/// // if (memcmp(s, buff, len) != 0)
735/// // error(S, msg);
736/// // }
737/// ```
738///
739/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
740/// `memcmp` becomes slice equality.
741fn check_literal(
742 s: &mut LoadState<'_>,
743 expected: &[u8],
744 msg: &'static str,
745) -> Result<(), LuaError> {
746 let mut buf = vec![0u8; expected.len()];
747 load_block(s, &mut buf)?;
748 if buf != expected {
749 return Err(load_error(s, msg));
750 }
751 Ok(())
752}
753
754/// Verify that the next byte in the stream equals `expected_size`.
755///
756/// # C source
757/// ```c
758///
759/// // if (loadByte(S) != size)
760/// // error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
761/// // }
762/// ```
763///
764/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
765/// a throw site. We inline the message directly. `tname` is always a Rust
766/// type-name string literal (ASCII) from the call sites; using `&'static str`
767/// is appropriate here (not Lua data).
768fn fcheck_size(
769 s: &mut LoadState<'_>,
770 expected_size: usize,
771 tname: &'static str,
772) -> Result<(), LuaError> {
773 let b = load_byte(s)? as usize;
774 if b != expected_size {
775 // PORT NOTE: We build the error message inline rather than using
776 // luaO_pushfstring to avoid a stack push just for error formatting.
777 // TODO(port): include `tname` in the error message once LuaError::syntax
778 // supports composing byte-string and &str fragments.
779 return Err(LuaError::syntax(format_args!(
780 "{} size mismatch",
781 tname
782 )));
783 }
784 Ok(())
785}
786
787/// Validate the binary chunk header.
788///
789/// # C source
790/// ```c
791///
792/// // checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
793/// // if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
794/// // if (loadByte(S) != LUAC_FORMAT) error(S, "format mismatch");
795/// // checkliteral(S, LUAC_DATA, "corrupted chunk");
796/// // checksize(S, Instruction);
797/// // checksize(S, lua_Integer);
798/// // checksize(S, lua_Number);
799/// // if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
800/// // if (loadNumber(S) != LUAC_NUM) error(S, "float format mismatch");
801/// // }
802/// ```
803///
804/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
805/// We emit the three concrete sizes inline.
806/// - `sizeof(Instruction)` = 4 (u32)
807/// - `sizeof(lua_Integer)` = 8 (i64)
808/// - `sizeof(lua_Number)` = 8 (f64)
809///
810/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
811/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
812/// of the signature (`"Lua"`).
813fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
814 // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
815 check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
816
817 let ver = load_byte(s)?;
818 if ver != LUAC_VERSION {
819 return Err(load_error(s, "version mismatch"));
820 }
821
822 let fmt = load_byte(s)?;
823 if fmt != LUAC_FORMAT {
824 return Err(load_error(s, "format mismatch"));
825 }
826
827 check_literal(s, LUAC_DATA, "corrupted chunk")?;
828
829 fcheck_size(s, 4, "Instruction")?;
830
831 fcheck_size(s, 8, "lua_Integer")?;
832
833 fcheck_size(s, 8, "lua_Number")?;
834
835 let int_check = load_integer(s)?;
836 if int_check != LUAC_INT {
837 return Err(load_error(s, "integer format mismatch"));
838 }
839
840 let num_check = load_number(s)?;
841 if num_check != LUAC_NUM {
842 return Err(load_error(s, "float format mismatch"));
843 }
844
845 Ok(())
846}
847
848// ── Public entry point ─────────────────────────────────────────────────────
849
850/// Load a precompiled Lua chunk and return the top-level Lua closure.
851///
852/// This is the Rust equivalent of `luaU_undump` — the single public function
853/// exported by `lundump.c`.
854///
855/// # C source
856/// ```c
857///
858/// // LoadState S;
859/// // LClosure *cl;
860/// // if (*name == '@' || *name == '=')
861/// // S.name = name + 1;
862/// // else if (*name == LUA_SIGNATURE[0])
863/// // S.name = "binary string";
864/// // else
865/// // S.name = name;
866/// // S.L = L; S.Z = Z;
867/// // checkHeader(&S);
868/// // cl = luaF_newLclosure(L, loadByte(&S));
869/// // setclLvalue2s(L, L->top.p, cl);
870/// // luaD_inctop(L);
871/// // cl->p = luaF_newproto(L);
872/// // luaC_objbarrier(L, cl, cl->p);
873/// // loadFunction(&S, cl->p, NULL);
874/// // lua_assert(cl->nupvalues == cl->p->sizeupvalues);
875/// // luai_verifycode(L, cl->p);
876/// // return cl;
877/// // }
878/// ```
879///
880/// # Parameters
881/// - `state` — the Lua thread state.
882/// - `z` — input stream positioned at the start of the binary chunk
883/// (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
884/// - `name` — chunk name for error messages. Stripped per Lua convention:
885/// - `@…` → filename (strip `@`)
886/// - `=…` → literal name (strip `=`)
887/// - starts with `\x1b` → `"binary string"`
888/// - otherwise used as-is.
889///
890/// PORT NOTE: The C function returns `LClosure *`. In Rust we return
891/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`). The
892/// closure is also pushed onto the stack for GC anchoring, matching the C
893/// behaviour (`setclLvalue2s + luaD_inctop`). The caller is responsible for
894/// popping it when done (consistent with C).
895///
896/// PORT NOTE: `luai_verifycode` is a no-op in the default build
897/// (`#define luai_verifycode(L,f) /* empty */`); dropped here.
898///
899/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
900/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
901/// assertion becomes `debug_assert_eq!`.
902pub(crate) fn undump(
903 state: &mut LuaState,
904 z: &mut ZIO,
905 name: &[u8],
906) -> Result<GcRef<LuaLClosure>, LuaError> {
907 let display_name: Vec<u8> = if name.first() == Some(&b'@') || name.first() == Some(&b'=') {
908 // Strip the leading sigil character.
909 name[1..].to_vec()
910 } else if name.first() == Some(&LUA_SIGNATURE[0]) {
911 b"binary string".to_vec()
912 } else {
913 name.to_vec()
914 };
915
916 let mut s = LoadState {
917 state,
918 z,
919 name: display_name,
920 };
921
922 check_header(&mut s)?;
923
924 // loadByte(&S) reads the number of upvalues for the top-level closure.
925 let nupvalues = load_byte(&mut s)?;
926 // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
927 // upvalue slots. In Rust Phase A we construct the struct directly; the
928 // GcRef wrapping happens after the proto is loaded.
929 // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
930 // once lfunc.rs is translated and the API is settled.
931 let mut cl = LuaLClosure::placeholder();
932 let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
933 for _ in 0..nupvalues as usize {
934 upvals_vec.push(std::cell::Cell::new(s.state.new_upval_closed(LuaValue::Nil)));
935 }
936 cl.upvals = upvals_vec;
937
938 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
939 // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
940 // PORT NOTE: We push a placeholder Nil first; the real closure value is
941 // set after the proto is loaded. This mirrors the C "anchor for GC"
942 // pattern. In Phase A-C GC anchoring via the stack is not strictly
943 // necessary (Rc keeps things alive) but we preserve the stack discipline
944 // for behavioural parity.
945 // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
946 // the real value here instead of a placeholder.
947 s.state.push(LuaValue::Nil); // placeholder; replaced below
948
949 let mut proto = LuaProto::placeholder();
950
951 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
952
953 load_function(&mut s, &mut proto, None)?;
954
955 // Wrap the proto in a GcRef and attach it to the closure.
956 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
957 let proto_ref = GcRef::new(proto);
958
959 // macros.tsv: lua_assert → debug_assert!
960 // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
961 debug_assert_eq!(
962 nupvalues as usize,
963 proto_ref.upvalues.len(),
964 "upvalue count mismatch between closure header and prototype"
965 );
966
967 // The macro is defined as `/* empty */` in the default build; dropped.
968
969 // Attach the loaded proto to the closure.
970 cl.proto = proto_ref;
971
972 // Wrap the closure in GcRef.
973 // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
974 let cl_ref = GcRef::new(cl);
975
976 // Replace the stack placeholder with the real closure value.
977 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
978 // TODO(port): replace the placeholder at the correct stack slot.
979 // For now the top slot holds Nil; Phase B must fix this once
980 // GcRef<LuaLClosure> → LuaValue conversion is defined.
981 // TODO(port): update the stack slot pushed above with the real cl_ref value.
982
983 Ok(cl_ref)
984}
985
986// ──────────────────────────────────────────────────────────────────────────
987// PORT STATUS
988// source: src/lundump.c (335 lines, 20 functions/items)
989// src/lundump.h (35 lines, merged)
990// target_crate: lua-vm
991// confidence: medium
992// todos: 15
993// port_notes: 39
994// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
995// notes: Logic is faithful to the C. The main open items for Phase B
996// are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
997// (2) LuaError::syntax byte-string formatting for the chunk
998// name in load_error; (3) long-string vs short-string intern
999// distinction in load_string_n; (4) the stack placeholder in
1000// undump must be replaced with the real GcRef<LuaLClosure>
1001// value once LuaValue conversion is defined; (5) UpvalDesc.name
1002// and LocalVar.varname need Option<GcRef<LuaString>> in the
1003// proto type to match the two-pass load order here.
1004// ──────────────────────────────────────────────────────────────────────────