lua_vm/undump.rs
1//! Load precompiled Lua chunks.
2//!
3//! Direct port of `reference/lua-5.4.7/src/lundump.c` (335 lines, 20 items).
4//! Declarations from `lundump.h` are merged here per PORTING.md §1.
5//!
6//! The public entry point is [`undump`], which reads a binary Lua chunk from
7//! a [`ZIO`] stream and returns a Lua closure ready to call.
8
9// TODO(port): resolve import paths once the crate module graph is settled
10// in Phase B. These are best-guess paths based on other translated files.
11#[allow(unused_imports)]
12use crate::prelude::*;
13use crate::state::LuaState;
14use crate::zio::ZIO;
15use lua_types::error::LuaError;
16use lua_types::value::LuaValue;
17
18// PORT NOTE: GcRef<T>, LuaProto, LuaClosure, LuaString, UpvalDesc, LocalVar,
19// AbsLineInfo, and Instruction are expected to live in lua_types or lua_vm
20// crates. All paths below are provisional for Phase A.
21use lua_types::closure::LuaLClosure;
22use lua_types::gc::GcRef;
23use lua_types::opcode::Instruction;
24use lua_types::proto::{AbsLineInfo, LocalVar, LuaProto, UpvalDesc};
25use lua_types::string::LuaString;
26use lua_types::LuaVersion;
27
28// ── Constants (from lundump.h) ─────────────────────────────────────────────
29
30/// Six-byte data marker in the chunk header used to catch conversion errors.
31const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
32
33/// Reference integer written in the header to detect integer endianness/size
34/// mismatches.
35const LUAC_INT: i64 = 0x5678;
36
37// macros.tsv: cast_num → x as f64
38/// Reference float written in the header to detect float format mismatches.
39const LUAC_NUM: f64 = 370.5;
40
41const LUAC_INT_55: i64 = -0x5678;
42
43const LUAC_INST_55: u32 = 0x12345678;
44
45const LUAC_NUM_55: f64 = -370.5;
46
47// LUA_VERSION_NUM = 504 → ((5 * 16) + 4) = 0x54 = 84
48/// One-byte version tag: upper nibble = major, lower nibble = minor.
49const LUAC_VERSION_54: u8 = 0x54;
50const LUAC_VERSION_55: u8 = 0x55;
51
52const LUAC_FORMAT: u8 = 0;
53
54const LUA_SIGNATURE: &[u8] = b"\x1bLua";
55
56// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
57const MAX_SHORT_LEN: usize = 40;
58
59// ── Constant-pool type tags (from lobject.h makevariant) ───────────────────
60//
61// These are the byte values written by ldump.c into the constants array.
62// makevariant(t, v) = t | (v << 4).
63//
64// PORT NOTE: types.tsv maps LUA_VNIL → LuaValue::Nil etc. but the *byte
65// values* used in the binary format are the raw tag integers from lobject.h.
66// We define them here as u8 constants so the match in load_constants is
67// self-documenting.
68
69const TAG_NIL: u8 = 0x00;
70const TAG_FALSE: u8 = 0x01;
71const TAG_TRUE: u8 = 0x11;
72const TAG_INT: u8 = 0x03;
73const TAG_FLOAT: u8 = 0x13;
74const TAG_SHORT_STR: u8 = 0x04;
75const TAG_LONG_STR: u8 = 0x14;
76
77// ── LoadState ──────────────────────────────────────────────────────────────
78
79/// Loader state bundled for convenience: Lua state, input stream, and the
80/// chunk name used in error messages.
81///
82/// # C mapping
83/// ```c
84///
85/// ```
86///
87/// PORT NOTE: In C, `LoadState` holds raw pointers to `lua_State` and `ZIO`.
88/// In Rust these become references with a shared lifetime `'a`. The struct is
89/// always stack-allocated inside [`undump`] and never escapes the call.
90struct LoadState<'a> {
91 state: &'a mut LuaState,
92 z: &'a mut ZIO,
93}
94
95// ── Error helper ───────────────────────────────────────────────────────────
96
97/// Build a syntax error for a malformed binary chunk.
98///
99/// # C source
100/// ```c
101///
102/// // luaO_pushfstring(S->L, "%s: bad binary format (%s)", S->name, why);
103/// // luaD_throw(S->L, LUA_ERRSYNTAX);
104/// // }
105/// ```
106///
107/// PORT NOTE: `l_noret` in C (diverges via `longjmp`). In Rust we return
108/// `LuaError` and the caller does `return Err(load_error(...))`. The C
109/// pattern `luaO_pushfstring + luaD_throw(LUA_ERRSYNTAX)` collapses to a
110/// single `LuaError::syntax` per error_sites.tsv.
111///
112/// TODO(port): `s.name` is `Vec<u8>`; `LuaError::syntax` takes `format_args!`
113/// which requires an `std::fmt::Display` implementor. `Vec<u8>` does not
114/// implement `Display`. Phase B should add a byte-string formatting path to
115/// `LuaError::syntax_bytes` or similar, so the chunk name is included verbatim
116/// in the message.
117fn load_error(_s: &LoadState<'_>, why: &'static str) -> LuaError {
118 LuaError::syntax(format_args!("bad binary format ({})", why))
119}
120
121// ── Low-level I/O ──────────────────────────────────────────────────────────
122
123/// Read exactly `buf.len()` bytes from the stream into `buf`.
124///
125/// # C source
126/// ```c
127///
128/// // if (luaZ_read(S->Z, b, size) != 0)
129/// // error(S, "truncated chunk");
130/// // }
131/// ```
132///
133/// PORT NOTE: C takes `void *b` + explicit `size`. In Rust we use `&mut [u8]`
134/// whose length encodes the byte count. `luaZ_read` returns the number of
135/// bytes NOT read (0 = success), matching `ZIO::read`'s contract.
136fn load_block(s: &mut LoadState<'_>, buf: &mut [u8]) -> Result<(), LuaError> {
137 // macros.tsv: luaZ_read → z.read(buf) (returns usize unread)
138 if s.z.read(buf) != 0 {
139 return Err(load_error(s, "truncated chunk"));
140 }
141 Ok(())
142}
143
144/// Read a single byte from the stream.
145///
146/// # C source
147/// ```c
148///
149/// // int b = zgetc(S->Z);
150/// // if (b == EOZ)
151/// // error(S, "truncated chunk");
152/// // return cast_byte(b);
153/// // }
154/// ```
155///
156/// PORT NOTE: `cast_byte` → `as u8` per macros.tsv; `zgetc` → `z.getc()`.
157fn load_byte(s: &mut LoadState<'_>) -> Result<u8, LuaError> {
158 // macros.tsv: zgetc → z.getc() returning i32
159 let b = s.z.getc();
160 if b == crate::zio::EOZ {
161 return Err(load_error(s, "truncated chunk"));
162 }
163 // macros.tsv: cast_byte → x as u8
164 Ok(b as u8)
165}
166
167/// Read a variable-length unsigned integer (7 bits per byte, big-endian,
168/// MSB-first continuation flag).
169///
170/// # C source
171/// ```c
172///
173/// // size_t x = 0;
174/// // int b;
175/// // limit >>= 7;
176/// // do {
177/// // b = loadByte(S);
178/// // if (x >= limit)
179/// // error(S, "integer overflow");
180/// // x = (x << 7) | (b & 0x7f);
181/// // } while ((b & 0x80) == 0);
182/// // return x;
183/// // }
184/// ```
185///
186/// PORT NOTE: The encoding terminates when a byte with the high bit set is
187/// seen (the *last* byte has bit 7 = 1). That is the opposite of the more
188/// common LEB128 where the continuation bit means "more follows".
189fn load_unsigned(s: &mut LoadState<'_>, limit: usize) -> Result<usize, LuaError> {
190 let mut x: usize = 0;
191 let limit = limit >> 7;
192 loop {
193 let b = load_byte(s)? as usize;
194 if x >= limit {
195 return Err(load_error(s, "integer overflow"));
196 }
197 x = (x << 7) | (b & 0x7f);
198 if (b & 0x80) != 0 {
199 break;
200 }
201 }
202 Ok(x)
203}
204
205/// Read a `size_t`-sized unsigned value.
206///
207/// # C source
208/// ```c
209///
210/// // return loadUnsigned(S, MAX_SIZET);
211/// // }
212/// ```
213///
214/// PORT NOTE: `MAX_SIZET` → `usize::MAX` per macros.tsv.
215fn load_size(s: &mut LoadState<'_>) -> Result<usize, LuaError> {
216 // macros.tsv: MAX_SIZET → usize::MAX
217 load_unsigned(s, usize::MAX)
218}
219
220/// Read a signed `int`-sized value.
221///
222/// # C source
223/// ```c
224///
225/// // return cast_int(loadUnsigned(S, INT_MAX));
226/// // }
227/// ```
228///
229/// PORT NOTE: `cast_int` → `x as i32` per macros.tsv. `INT_MAX` → `i32::MAX
230/// as usize`.
231fn load_int(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
232 // macros.tsv: cast_int → x as i32
233 let v = load_unsigned(s, i32::MAX as usize)?;
234 Ok(v as i32)
235}
236
237/// Read a `lua_Number` (f64) as eight raw native-endian bytes.
238///
239/// # C source
240/// ```c
241///
242/// // lua_Number x;
243/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
244/// // return x;
245/// // }
246/// ```
247///
248/// PORT NOTE: `loadVar` reads `sizeof(lua_Number) = 8` raw bytes directly
249/// into the value. In Rust we use `f64::from_ne_bytes` (native endian) to
250/// reconstruct the value from the eight bytes. The binary format is host-
251/// endian for these fields; the header check verifies endianness compatibility
252/// via `LUAC_INT` and `LUAC_NUM` sentinels.
253fn load_number(s: &mut LoadState<'_>) -> Result<f64, LuaError> {
254 let mut buf = [0u8; 8];
255 load_block(s, &mut buf)?;
256 // PERF(port): f64::from_ne_bytes is zero-cost — same as C's union cast
257 Ok(f64::from_ne_bytes(buf))
258}
259
260/// Read a `lua_Integer` (i64) as eight raw native-endian bytes.
261///
262/// # C source
263/// ```c
264///
265/// // lua_Integer x;
266/// // loadVar(S, x); /* expands to loadBlock(S, &x, sizeof(x)) */
267/// // return x;
268/// // }
269/// ```
270///
271/// PORT NOTE: Same reasoning as [`load_number`] — uses `i64::from_ne_bytes`.
272fn load_integer(s: &mut LoadState<'_>) -> Result<i64, LuaError> {
273 let mut buf = [0u8; 8];
274 load_block(s, &mut buf)?;
275 Ok(i64::from_ne_bytes(buf))
276}
277
278fn load_raw_i32(s: &mut LoadState<'_>) -> Result<i32, LuaError> {
279 let mut buf = [0u8; 4];
280 load_block(s, &mut buf)?;
281 Ok(i32::from_ne_bytes(buf))
282}
283
284fn load_raw_u32(s: &mut LoadState<'_>) -> Result<u32, LuaError> {
285 let mut buf = [0u8; 4];
286 load_block(s, &mut buf)?;
287 Ok(u32::from_ne_bytes(buf))
288}
289
290// ── String loading ─────────────────────────────────────────────────────────
291
292/// Load a nullable string. Returns `None` if the stored size is zero.
293///
294/// # C source
295/// ```c
296///
297/// // lua_State *L = S->L;
298/// // TString *ts;
299/// // size_t size = loadSize(S);
300/// // if (size == 0) return NULL;
301/// // else if (--size <= LUAI_MAXSHORTLEN) { /* short string? */
302/// // char buff[LUAI_MAXSHORTLEN];
303/// // loadVector(S, buff, size);
304/// // ts = luaS_newlstr(L, buff, size);
305/// // } else { /* long string */
306/// // ts = luaS_createlngstrobj(L, size);
307/// // setsvalue2s(L, L->top.p, ts); /* anchor it (loadVector can GC) */
308/// // luaD_inctop(L);
309/// // loadVector(S, getlngstr(ts), size);
310/// // L->top.p--;
311/// // }
312/// // luaC_objbarrier(L, p, ts);
313/// // return ts;
314/// // }
315/// ```
316///
317/// PORT NOTE: The Lua binary format stores `actual_length + 1` so that size=0
318/// is the null-string sentinel. After reading `raw_size`, the actual byte
319/// count is `raw_size - 1`.
320///
321/// PORT NOTE: In C, long strings are created first (to anchor them from GC)
322/// and then filled in-place via `getlngstr`. In Rust, GC anchoring is not
323/// needed in Phase A–C (Rc keeps objects alive); we read into a buffer and
324/// then create the string.
325///
326/// TODO(port): `luaS_newlstr` interns the string (short strings only);
327/// `luaS_createlngstrobj` does NOT intern. Phase A uses `state.intern_str()`
328/// for both. Phase B should add a `state.create_long_str()` path that skips
329/// the intern table, matching C semantics.
330///
331/// PORT NOTE: The `_proto` parameter corresponds to C's `Proto *p` used only
332/// for `luaC_objbarrier(L, p, ts)`. The barrier is a no-op in Phase A–C
333/// (macros.tsv: `luaC_objbarrier → state.gc().obj_barrier(p, o)` no-op).
334fn load_string_n(
335 s: &mut LoadState<'_>,
336 _proto: &LuaProto,
337) -> Result<Option<GcRef<LuaString>>, LuaError> {
338 let raw_size = load_size(s)?;
339 if raw_size == 0 {
340 return Ok(None);
341 }
342 let size = raw_size - 1;
343
344 // Read the raw bytes regardless of short/long distinction.
345 let mut buf = vec![0u8; size];
346
347 if size <= MAX_SHORT_LEN {
348 load_block(s, &mut buf)?;
349 } else {
350 load_block(s, &mut buf)?;
351 }
352
353 // macros.tsv: luaS_newlstr → state.intern_str(&s[..n])
354 // TODO(port): long strings should not be interned; see doc-comment above.
355 let ts = s.state.intern_str(&buf)?;
356
357 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
358 // (dropped — Phase A GC is Rc, no barrier needed)
359
360 Ok(Some(ts))
361}
362
363/// Load a non-nullable string; error if the stream encodes a null string.
364///
365/// # C source
366/// ```c
367///
368/// // TString *st = loadStringN(S, p);
369/// // if (st == NULL)
370/// // error(S, "bad format for constant string");
371/// // return st;
372/// // }
373/// ```
374fn load_string(s: &mut LoadState<'_>, proto: &LuaProto) -> Result<GcRef<LuaString>, LuaError> {
375 match load_string_n(s, proto)? {
376 Some(ts) => Ok(ts),
377 None => Err(load_error(s, "bad format for constant string")),
378 }
379}
380
381// ── Proto-field loaders ────────────────────────────────────────────────────
382
383/// Load the bytecode instruction array into a prototype.
384///
385/// # C source
386/// ```c
387///
388/// // int n = loadInt(S);
389/// // f->code = luaM_newvectorchecked(S->L, n, Instruction);
390/// // f->sizecode = n;
391/// // loadVector(S, f->code, n);
392/// // }
393/// ```
394///
395/// PORT NOTE: `loadVector(S, f->code, n)` expands to
396/// `loadBlock(S, f->code, n * sizeof(Instruction))` — `n` raw 4-byte words.
397/// We read each `u32` in native-endian order, consistent with how
398/// [`load_number`] and [`load_integer`] work.
399///
400/// PORT NOTE: `f->sizecode` is removed in Rust — `Vec::len()` covers it
401/// (types.tsv: `Proto.sizecode → removed`).
402fn load_code(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
403 let n = load_int(s)? as usize;
404 // macros.tsv: luaM_newvectorchecked → vec_checked::<T>(n)?
405 // PORT NOTE: Phase A uses Vec directly; overflow check omitted for brevity.
406 // TODO(port): add overflow / OOM check matching luaM_newvectorchecked.
407 let mut code = Vec::with_capacity(n);
408 for _ in 0..n {
409 let mut buf = [0u8; 4];
410 load_block(s, &mut buf)?;
411 // Instruction is a u32 newtype per types.tsv
412 code.push(Instruction(u32::from_ne_bytes(buf)));
413 }
414 f.code = code;
415 Ok(())
416}
417
418/// Load the constant pool into a prototype.
419///
420/// # C source
421/// ```c
422///
423/// // int i; int n = loadInt(S);
424/// // f->k = luaM_newvectorchecked(S->L, n, TValue);
425/// // f->sizek = n;
426/// // for (i = 0; i < n; i++) setnilvalue(&f->k[i]);
427/// // for (i = 0; i < n; i++) {
428/// // TValue *o = &f->k[i];
429/// // int t = loadByte(S);
430/// // switch (t) {
431/// // case LUA_VNIL: setnilvalue(o); break;
432/// // case LUA_VFALSE: setbfvalue(o); break;
433/// // case LUA_VTRUE: setbtvalue(o); break;
434/// // case LUA_VNUMFLT: setfltvalue(o, loadNumber(S)); break;
435/// // case LUA_VNUMINT: setivalue(o, loadInteger(S)); break;
436/// // case LUA_VSHRSTR:
437/// // case LUA_VLNGSTR: setsvalue2n(S->L, o, loadString(S, f)); break;
438/// // default: lua_assert(0);
439/// // }
440/// // }
441/// // }
442/// ```
443///
444/// PORT NOTE: The initial `setnilvalue` loop initialises the vector for GC
445/// safety in C. In Rust, `Vec` is always in a valid state; we skip it.
446fn load_constants(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
447 let n = load_int(s)? as usize;
448 // TODO(port): add overflow / OOM check.
449 let mut k = Vec::with_capacity(n);
450
451 // Dropped — Rust Vec elements are never uninitialized.
452
453 for _ in 0..n {
454 let t = load_byte(s)?;
455 let val = match t {
456 // macros.tsv: setnilvalue → *o = LuaValue::Nil
457 TAG_NIL => LuaValue::Nil,
458
459 // macros.tsv: setbfvalue → *o = LuaValue::Bool(false)
460 TAG_FALSE => LuaValue::Bool(false),
461
462 // macros.tsv: setbtvalue → *o = LuaValue::Bool(true)
463 TAG_TRUE => LuaValue::Bool(true),
464
465 // macros.tsv: setfltvalue → *o = LuaValue::Float(x)
466 TAG_FLOAT => LuaValue::Float(load_number(s)?),
467
468 // macros.tsv: setivalue → *o = LuaValue::Int(x)
469 TAG_INT => LuaValue::Int(load_integer(s)?),
470
471 // macros.tsv: setsvalue2n → *dst = LuaValue::Str(s.clone())
472 TAG_SHORT_STR | TAG_LONG_STR => {
473 let ts = load_string(s, f)?;
474 LuaValue::Str(ts)
475 }
476
477 // macros.tsv: lua_assert → debug_assert!
478 _ => {
479 debug_assert!(false, "unknown constant type tag {:#04x}", t);
480 LuaValue::Nil
481 }
482 };
483 k.push(val);
484 }
485
486 f.k = k;
487 Ok(())
488}
489
490/// Load nested function prototypes into a prototype.
491///
492/// # C source
493/// ```c
494///
495/// // int i; int n = loadInt(S);
496/// // f->p = luaM_newvectorchecked(S->L, n, Proto *);
497/// // f->sizep = n;
498/// // for (i = 0; i < n; i++) f->p[i] = NULL;
499/// // for (i = 0; i < n; i++) {
500/// // f->p[i] = luaF_newproto(S->L);
501/// // luaC_objbarrier(S->L, f, f->p[i]);
502/// // loadFunction(S, f->p[i], f->source);
503/// // }
504/// // }
505/// ```
506///
507/// PORT NOTE: C creates the proto first (for GC anchor) then fills it. In
508/// Rust we create a default `LuaProto`, fill it, then wrap in `GcRef`.
509/// `f->sizep` is removed per types.tsv (`Proto.sizep → removed`).
510fn load_protos(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
511 let n = load_int(s)? as usize;
512 // TODO(port): add overflow / OOM check.
513 let mut protos = Vec::with_capacity(n);
514
515 for _ in 0..n {
516 let mut sub = LuaProto::placeholder();
517
518 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
519
520 // Pass parent source as fallback.
521 let parent_source = f.source.clone();
522 load_function(s, &mut sub, parent_source)?;
523
524 // Wrap in GcRef after loading.
525 // PORT NOTE: In C f->p[i] is a Proto * held by the proto's GC roots.
526 // In Rust Phase A it becomes Rc<LuaProto>.
527 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
528 let sub_ref = GcRef::new(sub);
529 sub_ref.account_buffer(sub_ref.buffer_bytes() as isize);
530 protos.push(sub_ref);
531 }
532
533 f.p = protos;
534 Ok(())
535}
536
537/// Load upvalue descriptors into a prototype.
538///
539/// # C source
540/// ```c
541///
542/// // int i, n;
543/// // n = loadInt(S);
544/// // f->upvalues = luaM_newvectorchecked(S->L, n, Upvaldesc);
545/// // f->sizeupvalues = n;
546/// // for (i = 0; i < n; i++)
547/// // f->upvalues[i].name = NULL; /* make array valid for GC */
548/// // for (i = 0; i < n; i++) {
549/// // f->upvalues[i].instack = loadByte(S);
550/// // f->upvalues[i].idx = loadByte(S);
551/// // f->upvalues[i].kind = loadByte(S);
552/// // }
553/// // }
554/// ```
555///
556/// PORT NOTE: The C comment says names must be filled first for GC safety.
557/// In Rust we build `UpvalDesc` values with `name: None` and fill names later
558/// in [`load_debug`]. This requires `UpvalDesc.name` to be
559/// `Option<GcRef<LuaString>>` rather than `GcRef<LuaString>` as listed in
560/// types.tsv. Phase B should reconcile the types.tsv entry.
561///
562/// PORT NOTE: `f->sizeupvalues` is removed per types.tsv.
563fn load_upvalues(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
564 let n = load_int(s)? as usize;
565 // TODO(port): add overflow / OOM check.
566
567 // In Rust: construct with name = None.
568
569 let mut upvalues = Vec::with_capacity(n);
570 for _ in 0..n {
571 let instack_raw = load_byte(s)?;
572 let idx = load_byte(s)?;
573 let kind = load_byte(s)?;
574
575 // types.tsv: Upvaldesc.instack → bool (stored as lu_byte in C)
576 upvalues.push(UpvalDesc {
577 name: None, // filled by load_debug
578 instack: instack_raw != 0,
579 idx,
580 kind,
581 });
582 }
583
584 f.upvalues = upvalues;
585 Ok(())
586}
587
588/// Load debug information into a prototype.
589///
590/// # C source
591/// ```c
592///
593/// // int i, n;
594/// // n = loadInt(S);
595/// // f->lineinfo = luaM_newvectorchecked(S->L, n, ls_byte);
596/// // f->sizelineinfo = n;
597/// // loadVector(S, f->lineinfo, n);
598/// // n = loadInt(S);
599/// // f->abslineinfo = luaM_newvectorchecked(S->L, n, AbsLineInfo);
600/// // f->sizeabslineinfo = n;
601/// // for (i = 0; i < n; i++) {
602/// // f->abslineinfo[i].pc = loadInt(S);
603/// // f->abslineinfo[i].line = loadInt(S);
604/// // }
605/// // n = loadInt(S);
606/// // f->locvars = luaM_newvectorchecked(S->L, n, LocVar);
607/// // f->sizelocvars = n;
608/// // for (i = 0; i < n; i++) f->locvars[i].varname = NULL;
609/// // for (i = 0; i < n; i++) {
610/// // f->locvars[i].varname = loadStringN(S, f);
611/// // f->locvars[i].startpc = loadInt(S);
612/// // f->locvars[i].endpc = loadInt(S);
613/// // }
614/// // n = loadInt(S);
615/// // if (n != 0) /* does it have debug information? */
616/// // n = f->sizeupvalues; /* must be this many */
617/// // for (i = 0; i < n; i++)
618/// // f->upvalues[i].name = loadStringN(S, f);
619/// // }
620/// ```
621///
622/// PORT NOTE: `ls_byte` (signed byte) maps to `i8` per types.tsv.
623/// `loadVector(S, f->lineinfo, n)` reads `n * sizeof(ls_byte) = n` bytes.
624/// We read them as `u8` then reinterpret as `i8` via cast.
625///
626/// PORT NOTE: Size companion fields (`sizelineinfo`, `sizeabslineinfo`,
627/// `sizelocvars`) are all removed per types.tsv — `Vec::len()` covers them.
628///
629/// PORT NOTE: `LocalVar.varname` and `UpvalDesc.name` are both
630/// `Option<GcRef<LuaString>>` here because `loadStringN` can return `None`.
631/// See also the note on [`load_upvalues`].
632fn load_debug(s: &mut LoadState<'_>, f: &mut LuaProto) -> Result<(), LuaError> {
633 let n = load_int(s)? as usize;
634 let mut lineinfo = vec![0i8; n];
635 // Read as u8 slice then cast — safe because i8 and u8 have the same
636 // in-memory representation and we're casting a byte from the binary stream.
637 // SAFETY(port): this would need `unsafe` for the slice transmute in real
638 // code; for Phase A we read byte-by-byte.
639 // TODO(port): replace the loop with a single load_block into a u8 buffer
640 // followed by an i8 transmute in Phase B (or use bytemuck).
641 for item in lineinfo.iter_mut() {
642 *item = load_byte(s)? as i8;
643 }
644 f.lineinfo = lineinfo;
645
646 let n = load_int(s)? as usize;
647 let mut abslineinfo = Vec::with_capacity(n);
648 for _ in 0..n {
649 abslineinfo.push(AbsLineInfo {
650 pc: load_int(s)?,
651 line: load_int(s)?,
652 });
653 }
654 f.abslineinfo = abslineinfo;
655
656 let n = load_int(s)? as usize;
657
658 let mut locvars = Vec::with_capacity(n);
659 for _ in 0..n {
660 let varname = load_string_n(s, f)?;
661 let startpc = load_int(s)?;
662 let endpc = load_int(s)?;
663 let varname = match varname {
664 Some(v) => v,
665 None => s.state.new_string(b"")?,
666 };
667 locvars.push(LocalVar {
668 varname,
669 startpc,
670 endpc,
671 });
672 }
673 f.locvars = locvars;
674
675 // PORT NOTE: if n == 0 then there is no upvalue name info (stripped).
676 let has_names = load_int(s)?;
677 if has_names != 0 {
678 let n_upvals = f.upvalues.len();
679 for i in 0..n_upvals {
680 let name = load_string_n(s, f)?;
681 f.upvalues[i].name = name;
682 }
683 }
684
685 Ok(())
686}
687
688// ── Function loader ────────────────────────────────────────────────────────
689
690/// Load a complete function prototype from the stream.
691///
692/// # C source
693/// ```c
694///
695/// // f->source = loadStringN(S, f);
696/// // if (f->source == NULL) f->source = psource;
697/// // f->linedefined = loadInt(S);
698/// // f->lastlinedefined = loadInt(S);
699/// // f->numparams = loadByte(S);
700/// // f->is_vararg = loadByte(S);
701/// // f->maxstacksize = loadByte(S);
702/// // loadCode(S, f);
703/// // loadConstants(S, f);
704/// // loadUpvalues(S, f);
705/// // loadProtos(S, f);
706/// // loadDebug(S, f);
707/// // }
708/// ```
709///
710/// PORT NOTE: `TString *psource` becomes `Option<GcRef<LuaString>>` because
711/// the top-level call passes `NULL` (mapped to `None`). `f->source` in `LuaProto`
712/// is typed `GcRef<LuaString>` in types.tsv, but the undump path needs
713/// `Option<GcRef<LuaString>>` to express "inherited from parent". Phase B
714/// should align types.tsv or add a dedicated `Option` wrapper there.
715///
716/// PORT NOTE: `f->is_vararg` is stored as `lu_byte` in C but `bool` in
717/// types.tsv. We read the raw byte and convert to `bool` via `!= 0`.
718fn load_function(
719 s: &mut LoadState<'_>,
720 f: &mut LuaProto,
721 psource: Option<GcRef<LuaString>>,
722) -> Result<(), LuaError> {
723 let source = load_string_n(s, f)?;
724 f.source = source.or(psource);
725
726 f.linedefined = load_int(s)?;
727 f.lastlinedefined = load_int(s)?;
728 f.numparams = load_byte(s)?;
729 // types.tsv: Proto.is_vararg → bool (stored as lu_byte in C)
730 f.is_vararg = load_byte(s)? != 0;
731 f.maxstacksize = load_byte(s)?;
732 load_code(s, f)?;
733 reconstruct_vararg_table_reg(f);
734 load_constants(s, f)?;
735 load_upvalues(s, f)?;
736 load_protos(s, f)?;
737 load_debug(s, f)?;
738
739 Ok(())
740}
741
742/// Recover `LuaProto.vararg_table_reg` from the loaded bytecode instead of from
743/// the wire format, so a precompiled chunk keeps Lua 5.5 named-vararg aliasing
744/// (`function f(...t)`) without lua-rs's `string.dump` output diverging from
745/// C's bytecode layout (which the structural oracle compares).
746///
747/// A named-vararg function emits exactly one `OP_VARARGPACK` (opcode 84) at
748/// entry; its A operand is the register holding the shared vararg table. Its
749/// k bit records whether the table must be materialized.
750fn reconstruct_vararg_table_reg(f: &mut LuaProto) {
751 const OP_VARARGPACK: u32 = 84;
752 const OPCODE_MASK: u32 = 0x7F;
753 const POS_K: u32 = 15;
754 if let Some((reg, needed)) = f.code.iter().find_map(|inst| {
755 let raw = inst.raw();
756 (raw & OPCODE_MASK == OP_VARARGPACK).then(|| {
757 let reg = ((raw >> 7) & 0xFF) as u8;
758 let needed = ((raw >> POS_K) & 1) != 0;
759 (reg, needed)
760 })
761 }) {
762 f.vararg_table_reg = Some(reg);
763 f.vararg_table_needed = needed;
764 }
765}
766
767// ── Header validation ──────────────────────────────────────────────────────
768
769/// Verify that the next `expected.len()` bytes in the stream match `expected`.
770///
771/// # C source
772/// ```c
773///
774/// // char buff[sizeof(LUA_SIGNATURE) + sizeof(LUAC_DATA)];
775/// // size_t len = strlen(s);
776/// // loadVector(S, buff, len);
777/// // if (memcmp(s, buff, len) != 0)
778/// // error(S, msg);
779/// // }
780/// ```
781///
782/// PORT NOTE: `strlen` on a `const char *` becomes `.len()` on a `&[u8]`.
783/// `memcmp` becomes slice equality.
784fn check_literal(
785 s: &mut LoadState<'_>,
786 expected: &[u8],
787 msg: &'static str,
788) -> Result<(), LuaError> {
789 let mut buf = vec![0u8; expected.len()];
790 load_block(s, &mut buf)?;
791 if buf != expected {
792 return Err(load_error(s, msg));
793 }
794 Ok(())
795}
796
797/// Verify that the next byte in the stream equals `expected_size`.
798///
799/// # C source
800/// ```c
801///
802/// // if (loadByte(S) != size)
803/// // error(S, luaO_pushfstring(S->L, "%s size mismatch", tname));
804/// // }
805/// ```
806///
807/// PORT NOTE: `luaO_pushfstring` is used here as a message formatter, not as
808/// a throw site. We inline the message directly. `tname` is always a Rust
809/// type-name string literal (ASCII) from the call sites; using `&'static str`
810/// is appropriate here (not Lua data).
811fn fcheck_size(
812 s: &mut LoadState<'_>,
813 expected_size: usize,
814 tname: &'static str,
815) -> Result<(), LuaError> {
816 let b = load_byte(s)? as usize;
817 if b != expected_size {
818 // PORT NOTE: We build the error message inline rather than using
819 // luaO_pushfstring to avoid a stack push just for error formatting.
820 // TODO(port): include `tname` in the error message once LuaError::syntax
821 // supports composing byte-string and &str fragments.
822 return Err(LuaError::syntax(format_args!("{} size mismatch", tname)));
823 }
824 Ok(())
825}
826
827/// Validate the binary chunk header.
828///
829/// # C source
830/// ```c
831///
832/// // checkliteral(S, &LUA_SIGNATURE[1], "not a binary chunk");
833/// // if (loadByte(S) != LUAC_VERSION) error(S, "version mismatch");
834/// // if (loadByte(S) != LUAC_FORMAT) error(S, "format mismatch");
835/// // checkliteral(S, LUAC_DATA, "corrupted chunk");
836/// // checksize(S, Instruction);
837/// // checksize(S, lua_Integer);
838/// // checksize(S, lua_Number);
839/// // if (loadInteger(S) != LUAC_INT) error(S, "integer format mismatch");
840/// // if (loadNumber(S) != LUAC_NUM) error(S, "float format mismatch");
841/// // }
842/// ```
843///
844/// PORT NOTE: `checksize(S, T)` expands to `fchecksize(S, sizeof(T), #T)`.
845/// We emit the three concrete sizes inline.
846/// - `sizeof(Instruction)` = 4 (u32)
847/// - `sizeof(lua_Integer)` = 8 (i64)
848/// - `sizeof(lua_Number)` = 8 (f64)
849///
850/// PORT NOTE: The first byte of `LUA_SIGNATURE` (`\x1b`) is already consumed
851/// by the caller before `checkHeader` is invoked, so we check only bytes 1..
852/// of the signature (`"Lua"`).
853fn check_header(s: &mut LoadState<'_>) -> Result<(), LuaError> {
854 // Skip LUA_SIGNATURE[0] (\x1b) — already consumed by the caller.
855 check_literal(s, &LUA_SIGNATURE[1..], "not a binary chunk")?;
856
857 let version = s.state.global().lua_version;
858 let expected_version = if matches!(version, LuaVersion::V55) {
859 LUAC_VERSION_55
860 } else {
861 LUAC_VERSION_54
862 };
863 let ver = load_byte(s)?;
864 if ver != expected_version {
865 return Err(load_error(s, "version mismatch"));
866 }
867
868 let fmt = load_byte(s)?;
869 if fmt != LUAC_FORMAT {
870 return Err(load_error(s, "format mismatch"));
871 }
872
873 check_literal(s, LUAC_DATA, "corrupted chunk")?;
874
875 if matches!(version, LuaVersion::V55) {
876 fcheck_size(s, 4, "int")?;
877 if load_raw_i32(s)? != LUAC_INT_55 as i32 {
878 return Err(load_error(s, "int format mismatch"));
879 }
880
881 fcheck_size(s, 4, "instruction")?;
882 if load_raw_u32(s)? != LUAC_INST_55 {
883 return Err(load_error(s, "instruction format mismatch"));
884 }
885
886 fcheck_size(s, 8, "Lua integer")?;
887 if load_integer(s)? != LUAC_INT_55 {
888 return Err(load_error(s, "Lua integer format mismatch"));
889 }
890
891 fcheck_size(s, 8, "Lua number")?;
892 if load_number(s)? != LUAC_NUM_55 {
893 return Err(load_error(s, "Lua number format mismatch"));
894 }
895 } else {
896 fcheck_size(s, 4, "Instruction")?;
897
898 fcheck_size(s, 8, "lua_Integer")?;
899
900 fcheck_size(s, 8, "lua_Number")?;
901
902 let int_check = load_integer(s)?;
903 if int_check != LUAC_INT {
904 return Err(load_error(s, "integer format mismatch"));
905 }
906
907 let num_check = load_number(s)?;
908 if num_check != LUAC_NUM {
909 return Err(load_error(s, "float format mismatch"));
910 }
911 }
912
913 Ok(())
914}
915
916// ── Public entry point ─────────────────────────────────────────────────────
917
918/// Load a precompiled Lua chunk and return the top-level Lua closure.
919///
920/// This is the Rust equivalent of `luaU_undump` — the single public function
921/// exported by `lundump.c`.
922///
923/// # C source
924/// ```c
925///
926/// // LoadState S;
927/// // LClosure *cl;
928/// // if (*name == '@' || *name == '=')
929/// // S.name = name + 1;
930/// // else if (*name == LUA_SIGNATURE[0])
931/// // S.name = "binary string";
932/// // else
933/// // S.name = name;
934/// // S.L = L; S.Z = Z;
935/// // checkHeader(&S);
936/// // cl = luaF_newLclosure(L, loadByte(&S));
937/// // setclLvalue2s(L, L->top.p, cl);
938/// // luaD_inctop(L);
939/// // cl->p = luaF_newproto(L);
940/// // luaC_objbarrier(L, cl, cl->p);
941/// // loadFunction(&S, cl->p, NULL);
942/// // lua_assert(cl->nupvalues == cl->p->sizeupvalues);
943/// // luai_verifycode(L, cl->p);
944/// // return cl;
945/// // }
946/// ```
947///
948/// # Parameters
949/// - `state` — the Lua thread state.
950/// - `z` — input stream positioned at the start of the binary chunk
951/// (the first byte `\x1b` of `LUA_SIGNATURE` must still be present).
952/// - `name` — chunk name for error messages. Stripped per Lua convention:
953/// - `@…` → filename (strip `@`)
954/// - `=…` → literal name (strip `=`)
955/// - starts with `\x1b` → `"binary string"`
956/// - otherwise used as-is.
957///
958/// PORT NOTE: The C function returns `LClosure *`. In Rust we return
959/// `GcRef<LuaLClosure>` (the Lua-closure variant of `LuaClosure`). The
960/// closure is also pushed onto the stack for GC anchoring, matching the C
961/// behaviour (`setclLvalue2s + luaD_inctop`). The caller is responsible for
962/// popping it when done (consistent with C).
963///
964/// PORT NOTE: `luai_verifycode` is a no-op in the default build
965/// (`#define luai_verifycode(L,f) /* empty */`); dropped here.
966///
967/// PORT NOTE: `cl->nupvalues == cl->p->sizeupvalues` — in Rust the nupvalues
968/// count is implicit in `cl.upvals.len()` and `f.upvalues.len()`; the
969/// assertion becomes `debug_assert_eq!`.
970pub(crate) fn undump(
971 state: &mut LuaState,
972 z: &mut ZIO,
973 _name: &[u8],
974) -> Result<GcRef<LuaLClosure>, LuaError> {
975 let mut s = LoadState { state, z };
976
977 check_header(&mut s)?;
978
979 // loadByte(&S) reads the number of upvalues for the top-level closure.
980 let nupvalues = load_byte(&mut s)?;
981 // PORT NOTE: `luaF_newLclosure` allocates a closure with `nupvalues`
982 // upvalue slots. In Rust Phase A we construct the struct directly; the
983 // GcRef wrapping happens after the proto is loaded.
984 // TODO(port): use the proper lfunc::new_lua_closure(state, nupvalues) API
985 // once lfunc.rs is translated and the API is settled.
986 let mut cl = LuaLClosure::placeholder();
987 let mut upvals_vec = Vec::with_capacity(nupvalues as usize);
988 for _ in 0..nupvalues as usize {
989 upvals_vec.push(std::cell::Cell::new(
990 s.state.new_upval_closed(LuaValue::Nil),
991 ));
992 }
993 cl.upvals = upvals_vec;
994
995 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(cl)))
996 // macros.tsv: luaD_inctop → (state.push already increments; use state.push)
997 // PORT NOTE: We push a placeholder Nil first; the real closure value is
998 // set after the proto is loaded. This mirrors the C "anchor for GC"
999 // pattern. In Phase A-C GC anchoring via the stack is not strictly
1000 // necessary (Rc keeps things alive) but we preserve the stack discipline
1001 // for behavioural parity.
1002 // TODO(port): once GcRef<LuaLClosure> is cloneable into LuaValue, push
1003 // the real value here instead of a placeholder.
1004 s.state.push(LuaValue::Nil); // placeholder; replaced below
1005
1006 let mut proto = LuaProto::placeholder();
1007
1008 // macros.tsv: luaC_objbarrier → state.gc().obj_barrier(p, o) no-op Phase A
1009
1010 load_function(&mut s, &mut proto, None)?;
1011
1012 // Wrap the proto in a GcRef and attach it to the closure.
1013 // TODO(D-1c-bridge): wraps fully-populated LuaProto value; state.new_proto produces a placeholder
1014 let proto_ref = GcRef::new(proto);
1015 proto_ref.account_buffer(proto_ref.buffer_bytes() as isize);
1016
1017 // macros.tsv: lua_assert → debug_assert!
1018 // nupvalues is the byte we read; sizeupvalues = proto_ref.upvalues.len()
1019 debug_assert_eq!(
1020 nupvalues as usize,
1021 proto_ref.upvalues.len(),
1022 "upvalue count mismatch between closure header and prototype"
1023 );
1024
1025 // The macro is defined as `/* empty */` in the default build; dropped.
1026
1027 // Attach the loaded proto to the closure.
1028 cl.proto = proto_ref;
1029
1030 // Wrap the closure in GcRef.
1031 // TODO(D-1c-bridge): wraps fully-populated LuaLClosure value; state.new_lclosure makes Nil-filled upvals
1032 let cl_ref = GcRef::new(cl);
1033 cl_ref.account_buffer(cl_ref.buffer_bytes() as isize);
1034
1035 // Replace the stack placeholder with the real closure value.
1036 // macros.tsv: setclLvalue2s → state.set_at(o, LuaValue::Function(LuaClosure::Lua(...)))
1037 // TODO(port): replace the placeholder at the correct stack slot.
1038 // For now the top slot holds Nil; Phase B must fix this once
1039 // GcRef<LuaLClosure> → LuaValue conversion is defined.
1040 // TODO(port): update the stack slot pushed above with the real cl_ref value.
1041
1042 Ok(cl_ref)
1043}
1044
1045// ──────────────────────────────────────────────────────────────────────────
1046// PORT STATUS
1047// source: src/lundump.c (335 lines, 20 functions/items)
1048// src/lundump.h (35 lines, merged)
1049// target_crate: lua-vm
1050// confidence: medium
1051// todos: 15
1052// port_notes: 39
1053// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
1054// notes: Logic is faithful to the C. The main open items for Phase B
1055// are: (1) import paths for GcRef/LuaProto/LuaClosure/etc.;
1056// (2) LuaError::syntax byte-string formatting for the chunk
1057// name in load_error; (3) long-string vs short-string intern
1058// distinction in load_string_n; (4) the stack placeholder in
1059// undump must be replaced with the real GcRef<LuaLClosure>
1060// value once LuaValue conversion is defined; (5) UpvalDesc.name
1061// and LocalVar.varname need Option<GcRef<LuaString>> in the
1062// proto type to match the two-pass load order here.
1063// ──────────────────────────────────────────────────────────────────────────