lua_vm/dump.rs
1//! Pre-compiled Lua chunk serializer.
2//!
3//! Translates `reference/lua-5.4.7/src/ldump.c` (230 lines, 9 functions + 1 public entry point).
4//! Writes a `LuaProto` to a byte sink in the standard Lua 5.4 bytecode format.
5
6
7// TODO(port): Adjust import paths once crate boundaries stabilise in Phase B.
8// The types below are expected to resolve as follows:
9// GcRef — lua_types (or lua-gc Phase D)
10// LuaError — lua_types
11// LuaProto — lua-vm (this crate) or lua-types
12// LuaString — lua-vm / lua-types
13// LuaValue — lua_types
14// LuaState — lua-vm (this crate)
15use std::mem::size_of;
16#[allow(unused_imports)] use crate::prelude::*;
17
18use crate::state::LuaState;
19use lua_types::{GcRef, LuaError, LuaString, LuaValue};
20use lua_types::proto::LuaProto;
21
22// ── Constants from lundump.h ─────────────────────────────────────────────────
23
24// dumpLiteral expands to dumpBlock(D, s, sizeof(s) - sizeof(char)).
25// sizeof("\x1bLua") = 5; minus 1 = 4 bytes, no NUL terminator.
26// b"\x1bLua" is &[u8; 4] in Rust — no NUL — so direct use is correct.
27const LUA_SIGNATURE: &[u8] = b"\x1bLua";
28
29// With LUA_VERSION_NUM = 504 (macros.tsv):
30// (504 / 100) * 16 + 504 % 100 = 5 * 16 + 4 = 84 = 0x54
31const LUA_VERSION_NUM_DUMP: i32 = 504;
32const LUAC_VERSION: u8 =
33 ((LUA_VERSION_NUM_DUMP / 100) * 16 + LUA_VERSION_NUM_DUMP % 100) as u8;
34
35const LUAC_FORMAT: u8 = 0;
36
37// sizeof("\x19\x93\r\n\x1a\n") = 7; minus 1 = 6 bytes written.
38// b"\x19\x93\r\n\x1a\n" is &[u8; 6].
39const LUAC_DATA: &[u8] = b"\x19\x93\r\n\x1a\n";
40
41const LUAC_INT: i64 = 0x5678;
42
43const LUAC_NUM: f64 = 370.5;
44
45const INSTRUCTION_SIZE: u8 = size_of::<u32>() as u8;
46
47const LUA_INTEGER_SIZE: u8 = size_of::<i64>() as u8;
48
49const LUA_NUMBER_SIZE: u8 = size_of::<f64>() as u8;
50
51// ── DumpState ────────────────────────────────────────────────────────────────
52
53/// Internal state threaded through every dump operation.
54///
55///
56/// PORT NOTE: `lua_State *L` removed — it was used only for `lua_lock`/`lua_unlock`, which are
57/// no-ops in the default Lua build and dropped here (macros.tsv). `void *data` is folded into
58/// the writer closure. `int status` is replaced by `Result<(), LuaError>` propagated with `?`.
59struct DumpState<'a> {
60 /// Byte-sink callback. C original: `lua_Writer writer` + `void *data` (combined).
61 /// lua_Writer type is TBD in types.tsv; for dump we use a bare byte-slice callback.
62 writer: &'a mut dyn FnMut(&[u8]) -> Result<(), LuaError>,
63 /// When true, strip all debug information from the output.
64 strip: bool,
65}
66
67impl<'a> DumpState<'a> {
68 // ── Low-level write primitives ────────────────────────────────────────────
69
70 /// Write raw bytes to the output stream.
71 ///
72 ///
73 /// PORT NOTE: C accumulates errors in `D->status` and skips subsequent writes once
74 /// non-zero; Rust returns `Result<(), LuaError>` and short-circuits via `?`.
75 /// `lua_lock`/`lua_unlock` are no-ops in the default build and are dropped (macros.tsv).
76 fn dump_block(&mut self, data: &[u8]) -> Result<(), LuaError> {
77 if !data.is_empty() {
78 (self.writer)(data)?;
79 }
80 Ok(())
81 }
82
83 /// Write one byte.
84 ///
85 /// C body: `lu_byte x = (lu_byte)y; dumpVar(D, x);`
86 /// (`dumpVar(D,x)` expands to `dumpVector(D,&x,1)` expands to `dumpBlock(D,&x,sizeof(x))`)
87 fn dump_byte(&mut self, y: u8) -> Result<(), LuaError> {
88 self.dump_block(&[y])
89 }
90
91 /// Write a `size_t` using Lua's variable-length encoding.
92 ///
93 ///
94 /// Encoding (big-endian 7-bit groups, **last** byte marked with MSB = 1):
95 /// - Each byte holds 7 payload bits.
96 /// - Bytes are written most-significant group first.
97 /// - The final byte (least-significant group) has its MSB set as an end marker.
98 ///
99 /// This differs from standard LEB128, which marks the *continuation* bytes rather than
100 /// the terminating byte.
101 ///
102 fn dump_size(&mut self, mut x: usize) -> Result<(), LuaError> {
103 // DIBS = (usize::BITS + 6) / 7; on 64-bit = (64+6)/7 = 10.
104 const DIBS: usize = (usize::BITS as usize + 6) / 7;
105 let mut buff = [0u8; DIBS];
106 let mut n: usize = 0;
107
108 loop {
109 n += 1;
110 buff[DIBS - n] = (x & 0x7f) as u8; // fill buffer in reverse order
111 x >>= 7;
112 if x == 0 {
113 break;
114 }
115 }
116
117 // The byte at buff[DIBS-1] is the first byte placed (least-significant group).
118 // Setting its MSB marks it as the terminal byte of the encoding.
119 buff[DIBS - 1] |= 0x80;
120
121 self.dump_block(&buff[DIBS - n..])
122 }
123
124 /// Write an `int` as a variable-length size.
125 ///
126 ///
127 /// PORT NOTE: C implicitly casts `int` → `size_t`. All call sites pass non-negative values
128 /// (line numbers, instruction counts, vector lengths); a debug assertion guards this.
129 fn dump_int(&mut self, x: i32) -> Result<(), LuaError> {
130 debug_assert!(
131 x >= 0,
132 "dump_int: negative value {} cast to usize would wrap",
133 x
134 );
135 self.dump_size(x as usize)
136 }
137
138 /// Write a `lua_Number` (f64) in the platform's native byte order.
139 ///
140 ///
141 /// `dumpVar(D,x)` expands to `dumpBlock(D, &x, sizeof(lua_Number))` — 8 bytes, native order.
142 /// `to_ne_bytes()` replicates native-endian serialisation. The bytecode header's `LUAC_NUM`
143 /// sentinel (370.5) lets `lundump` detect byte-order mismatches at load time.
144 fn dump_number(&mut self, x: f64) -> Result<(), LuaError> {
145 self.dump_block(&x.to_ne_bytes())
146 }
147
148 /// Write a `lua_Integer` (i64) in the platform's native byte order.
149 ///
150 fn dump_integer(&mut self, x: i64) -> Result<(), LuaError> {
151 self.dump_block(&x.to_ne_bytes())
152 }
153
154 // ── Mid-level serialisers ─────────────────────────────────────────────────
155
156 /// Write an interned or long string, or a null sentinel (encoded size = 0).
157 ///
158 ///
159 /// Encoding: `dumpSize(len + 1)` followed by `len` raw bytes; size 0 means null/absent.
160 /// `tsslen(s)` → `s.len()` and `getstr(s)` → `s.as_bytes()` (macros.tsv).
161 fn dump_string(&mut self, s: Option<&GcRef<LuaString>>) -> Result<(), LuaError> {
162 match s {
163 None => self.dump_size(0),
164
165 Some(s) => {
166 let bytes = s.as_bytes(); // tsslen → .len(); getstr → .as_bytes()
167 self.dump_size(bytes.len() + 1)?;
168 self.dump_block(bytes)
169 }
170 }
171 }
172
173 /// Write the bytecode instruction array.
174 ///
175 ///
176 /// PORT NOTE: `f->sizecode` is covered by `Vec::len()` (types.tsv).
177 fn dump_code(&mut self, proto: &LuaProto) -> Result<(), LuaError> {
178 self.dump_int(proto.code.len() as i32)?;
179
180 // dumpVector writes n * sizeof(Instruction) = n * 4 bytes in native byte order.
181 for instr in &proto.code {
182 // TODO(port): `Instruction` is a u32 newtype (types.tsv). Accessing the inner u32
183 // via `.0` assumes a tuple-struct layout. If the Instruction API differs (e.g.,
184 // exposes `.raw()` or `u32::from(*instr)`), adjust accordingly in Phase B.
185 self.dump_block(&instr.0.to_ne_bytes())?;
186 }
187 Ok(())
188 }
189
190 /// Write the constant pool.
191 ///
192 ///
193 /// Each constant is written as: one tag byte (`ttypetag`), followed by the payload
194 /// (float: 8 bytes; integer: 8 bytes; string: variable-length; nil/bool: nothing).
195 ///
196 /// PORT NOTE: `f->sizek` is covered by `Vec::len()` (types.tsv).
197 fn dump_constants(&mut self, proto: &LuaProto) -> Result<(), LuaError> {
198 let n = proto.k.len();
199 self.dump_int(n as i32)?;
200
201 for constant in &proto.k {
202 // ttypetag(o) → o.full_type_tag() (macros.tsv)
203 // Returns the C-side tag byte: bits 0-3 base type, bits 4-5 variant, bit 6 collectable.
204 let tag = constant.full_type_tag();
205 self.dump_byte(tag)?;
206
207 match constant {
208 LuaValue::Float(f) => {
209 // fltvalue(o) → o.as_float().expect("not float") or `if let` (macros.tsv)
210 self.dump_number(*f)?;
211 }
212 LuaValue::Int(i) => {
213 self.dump_integer(*i)?;
214 }
215 LuaValue::Str(s) => {
216 // tsvalue(o) → o.as_string().expect("not string") (macros.tsv)
217 self.dump_string(Some(s))?;
218 }
219 LuaValue::Nil | LuaValue::Bool(_) => {
220 // Only the tag byte is written; nil and booleans carry no additional payload.
221 // lua_assert → debug_assert! (macros.tsv)
222 debug_assert!(
223 matches!(constant, LuaValue::Nil | LuaValue::Bool(_)),
224 "dump_constants: default branch reached for unexpected variant"
225 );
226 }
227 _ => {
228 // TODO(port): LuaValue variant not valid as a constant-pool entry.
229 // In C the default branch asserts nil/false/true only. Any other variant
230 // here indicates a malformed proto; flag for Phase B investigation.
231 debug_assert!(false, "dump_constants: unexpected LuaValue variant in constant pool");
232 }
233 }
234 }
235 Ok(())
236 }
237
238 /// Write nested function prototypes (sub-functions defined inside `proto`).
239 ///
240 ///
241 /// PORT NOTE: `f->sizep` is covered by `Vec::len()` (types.tsv).
242 /// The parent's source string is passed down so that children with identical source
243 /// origins can omit the redundant source name (see `dump_function`).
244 fn dump_protos(&mut self, proto: &LuaProto) -> Result<(), LuaError> {
245 let n = proto.p.len();
246 self.dump_int(n as i32)?;
247
248 for sub in &proto.p {
249 // sub: &GcRef<LuaProto>; deref coercion (&GcRef<LuaProto> → &LuaProto) expected
250 // when GcRef<T>: Deref<Target=T> (true for Rc<T> in Phase A).
251 self.dump_function(sub, proto.source.as_ref())?;
252 }
253 Ok(())
254 }
255
256 /// Write upvalue descriptors (instack / idx / kind for each upvalue slot).
257 ///
258 ///
259 /// PORT NOTE: `f->sizeupvalues` is covered by `Vec::len()` (types.tsv).
260 /// `Upvaldesc.instack` is `bool` in Rust (types.tsv); cast to `u8` for the wire format.
261 fn dump_upvalues(&mut self, proto: &LuaProto) -> Result<(), LuaError> {
262 let n = proto.upvalues.len();
263 self.dump_int(n as i32)?;
264
265 for upval in &proto.upvalues {
266 // PORT NOTE: instack is bool in Rust (types.tsv); cast to u8: true→1, false→0.
267 self.dump_byte(upval.instack as u8)?;
268 self.dump_byte(upval.idx)?;
269 self.dump_byte(upval.kind)?;
270 }
271 Ok(())
272 }
273
274 /// Write debug information: per-instruction line deltas, absolute line records,
275 /// local-variable lifetimes, and upvalue names.
276 ///
277 /// All counts are written as zero when `self.strip` is true.
278 ///
279 ///
280 /// PORT NOTE: all `f->size*` fields are covered by `Vec::len()` (types.tsv).
281 fn dump_debug(&mut self, proto: &LuaProto) -> Result<(), LuaError> {
282 let n_lineinfo = if self.strip { 0 } else { proto.lineinfo.len() };
283 self.dump_int(n_lineinfo as i32)?;
284
285 // lineinfo is Vec<i8> (ls_byte per types.tsv). C writes them as raw bytes (sizeof(i8)=1).
286 // Cast each i8 to u8 (same bit pattern) before writing.
287 // PERF(port): iterating one byte at a time vs. bulk write — profile in Phase B.
288 // (A bulk write would require bytemuck::cast_slice or similar to avoid unsafe.)
289 let lineinfo_bytes: Vec<u8> = proto.lineinfo[..n_lineinfo]
290 .iter()
291 .map(|&b| b as u8)
292 .collect();
293 self.dump_block(&lineinfo_bytes)?;
294
295 let n_absline = if self.strip { 0 } else { proto.abslineinfo.len() };
296 self.dump_int(n_absline as i32)?;
297
298 for abs in proto.abslineinfo.iter().take(n_absline) {
299 // AbsLineInfo.pc and .line are i32 (types.tsv); non-negative in valid bytecode.
300 self.dump_int(abs.pc)?;
301 self.dump_int(abs.line)?;
302 }
303
304 let n_locvars = if self.strip { 0 } else { proto.locvars.len() };
305 self.dump_int(n_locvars as i32)?;
306
307 for locvar in proto.locvars.iter().take(n_locvars) {
308 // LocVar.varname is GcRef<LuaString> (types.tsv).
309 self.dump_string(Some(&locvar.varname))?;
310 self.dump_int(locvar.startpc)?;
311 self.dump_int(locvar.endpc)?;
312 }
313
314 // (Re-uses upvalues.len() for the name-writing pass — separate from dumpUpvalues
315 // which wrote structural descriptors; here we write debug names.)
316 let n_upval_names = if self.strip { 0 } else { proto.upvalues.len() };
317 self.dump_int(n_upval_names as i32)?;
318
319 for upval in proto.upvalues.iter().take(n_upval_names) {
320 // PORT NOTE: UpvalDesc.name is GcRef<LuaString> per types.tsv (non-optional).
321 // TODO(port): In C, `TString *name` can be NULL when an upvalue is unnamed (e.g.,
322 // in bytecode compiled without debug info). Verify whether UpvalDesc.name should be
323 // `Option<GcRef<LuaString>>` in the Rust model; if so, change call to pass the Option
324 // directly instead of wrapping in Some.
325 self.dump_string(upval.name.as_ref())?;
326 }
327 Ok(())
328 }
329
330 /// Write a complete function prototype: source name, header bytes, code, constants,
331 /// upvalue descriptors, nested prototypes, and debug information.
332 ///
333 /// `psource` is the parent function's source string. When `f->source == psource` (pointer
334 /// equality — Lua interns short strings so identical source names share an object), the
335 /// source is written as null (size 0) to avoid duplication. The top-level call passes
336 /// `None` to force writing the source.
337 ///
338 ///
339 /// PORT NOTE: `f->source == psource` is a C pointer comparison exploiting string interning.
340 /// In Rust we use `GcRef::ptr_eq` (equivalent to `Rc::ptr_eq` in Phase A) for identity.
341 /// `is_vararg` is `bool` in Rust (types.tsv); cast to `u8` for the wire format.
342 fn dump_function(
343 &mut self,
344 proto: &LuaProto,
345 psource: Option<&GcRef<LuaString>>,
346 ) -> Result<(), LuaError> {
347 // Pointer-equality check: same interned string object means same source file.
348 let same_source = match (psource, proto.source.as_ref()) {
349 (Some(ps), Some(src)) => GcRef::ptr_eq(src, ps),
350 _ => false,
351 };
352
353 if self.strip || same_source {
354 self.dump_string(None)?;
355 } else {
356 self.dump_string(proto.source.as_ref())?;
357 }
358
359 self.dump_int(proto.linedefined)?;
360 self.dump_int(proto.lastlinedefined)?;
361 self.dump_byte(proto.numparams)?;
362 // PORT NOTE: is_vararg is bool in Rust (types.tsv); true → 1u8, false → 0u8.
363 self.dump_byte(proto.is_vararg as u8)?;
364 self.dump_byte(proto.maxstacksize)?;
365
366 self.dump_code(proto)?;
367 self.dump_constants(proto)?;
368 self.dump_upvalues(proto)?;
369 self.dump_protos(proto)?;
370 self.dump_debug(proto)?;
371 Ok(())
372 }
373
374 /// Write the binary chunk header.
375 ///
376 /// The header allows `lundump` (and external tools) to verify the bytecode format,
377 /// platform word sizes, and byte order before attempting to load the chunk.
378 ///
379 fn dump_header(&mut self) -> Result<(), LuaError> {
380 // dumpLiteral(D,s) = dumpBlock(D, s, sizeof(s) - sizeof(char))
381 // b"\x1bLua" is &[u8; 4] (no NUL terminator in Rust byte literals), matching the
382 // C expansion of sizeof("\x1bLua")-1 = 4 bytes.
383 self.dump_block(LUA_SIGNATURE)?;
384
385 self.dump_byte(LUAC_VERSION)?;
386
387 self.dump_byte(LUAC_FORMAT)?;
388
389 // b"\x19\x93\r\n\x1a\n" is &[u8; 6], matching sizeof(LUAC_DATA)-1 = 6 bytes.
390 self.dump_block(LUAC_DATA)?;
391
392 self.dump_byte(INSTRUCTION_SIZE)?;
393
394 self.dump_byte(LUA_INTEGER_SIZE)?;
395
396 self.dump_byte(LUA_NUMBER_SIZE)?;
397
398 self.dump_integer(LUAC_INT)?;
399
400 self.dump_number(LUAC_NUM)?;
401
402 Ok(())
403 }
404}
405
406// ── Public entry point ───────────────────────────────────────────────────────
407
408/// Serialize a compiled Lua function prototype as a precompiled bytecode chunk.
409///
410/// The `writer` callback receives successive slices of the serialised bytes and returns
411/// `Err(LuaError)` to abort. `strip` omits debug info (line numbers, local names, etc.)
412/// from the output.
413///
414///
415/// PORT NOTE: `lua_Writer w` (fn pointer) + `void *data` (userdata) are collapsed into a
416/// single `impl FnMut(&[u8]) -> Result<(), LuaError>` closure — the Rust idiom for the
417/// callback + context pair. `_state` is retained in the signature for API parity but unused
418/// in the body: the C code needed it only for `lua_lock`/`lua_unlock`, which are no-ops per
419/// macros.tsv. Return type changes from `int` (0 = ok, non-zero = writer error) to
420/// `Result<(), LuaError>`.
421pub(crate) fn dump(
422 _state: &LuaState,
423 proto: &GcRef<LuaProto>,
424 writer: &mut dyn FnMut(&[u8]) -> Result<(), LuaError>,
425 strip: bool,
426) -> Result<(), LuaError> {
427 let mut d = DumpState {
428 writer,
429 strip,
430 };
431
432 d.dump_header()?;
433
434 // PORT NOTE: f->sizeupvalues is covered by Vec::len(). Bounded by MAXUPVAL = 255
435 // (macros.tsv), so truncation via `as u8` is safe for well-formed prototypes.
436 d.dump_byte(proto.upvalues.len() as u8)?;
437
438 // psource = None forces the top-level function to always write its source name.
439 // Deref coercion: &GcRef<LuaProto> → &LuaProto (via Deref<Target=LuaProto> on GcRef/Rc).
440 d.dump_function(proto, None)?;
441
442 Ok(())
443}
444
445// ────────────────────────────────────────────────────────────────────────────
446// PORT STATUS
447// source: src/ldump.c (230 lines, 10 functions)
448// target_crate: lua-vm
449// confidence: medium
450// todos: 4
451// port_notes: 12
452// unsafe_blocks: 0
453// notes: Types/imports need Phase B wiring; logic should be faithful.
454// Key uncertainties: (1) Instruction newtype inner-field access (.0 vs
455// method); (2) UpvalDesc.name optionality; (3) GcRef::ptr_eq method
456// existence. Lineinfo bulk-write is done via collect()+dump_block to
457// avoid unsafe transmute of &[i8] → &[u8]; revisit with bytemuck in
458// Phase B for performance. Native-endian serialisation via to_ne_bytes()
459// matches C's raw-memory dumpVector behaviour.
460// ────────────────────────────────────────────────────────────────────────────