lua_vm/zio.rs
1//! Buffered streams — Rust port of `lzio.c` + `lzio.h`.
2//!
3//! Provides two public types:
4//! - [`ZIO`]: a read cursor wrapping an external chunk-supplier callback.
5//! - [`LexBuffer`]: a growable `Vec<u8>` byte buffer with the named interface
6//! that C code accessed through the `luaZ_*buffer*` macro family.
7//!
8//! The lzio header is merged here per PORTING.md §1 ("Headers merge into the
9//! consuming `.rs`"). All macros defined in `lzio.h` are translated at their
10//! call sites and collected as methods or constants in this module.
11//!
12//! # C source files
13//! - `reference/lua-5.4.7/src/lzio.c` (68 lines, 3 functions)
14//! - `reference/lua-5.4.7/src/lzio.h` (66 lines, struct + macros; merged)
15
16// TODO(port): import path for LuaState will need adjustment once the
17// crate-internal module graph is settled in Phase B. Using a local path
18// for now; may become `use lua_types::state::LuaState` or similar.
19use crate::state::LuaState;
20use lua_types::error::LuaError;
21
22// ── Constants ──────────────────────────────────────────────────────────────────
23
24// C: #define EOZ (-1) /* end of stream */
25// macros.tsv: EOZ → const EOZ: i32 = -1
26/// End-of-stream sentinel returned by [`ZIO::getc`] and [`ZIO::fill`].
27pub(crate) const EOZ: i32 = -1;
28
29// ── LexBuffer (was Mbuffer in C) ───────────────────────────────────────────────
30
31/// Growable byte buffer used by the lexer for token text accumulation.
32///
33/// Corresponds to `Mbuffer` in `lzio.h`. The C struct tracked `buffer`,
34/// `n` (used length), and `buffsize` (allocated capacity) as three separate
35/// fields with manual realloc. In Rust all three are implicit in `Vec<u8>`.
36///
37/// # C mapping (types.tsv)
38/// ```text
39/// Mbuffer → LexBuffer
40/// .buffer → Vec<u8> (heap storage)
41/// .n → Vec::len()
42/// .buffsize → Vec::capacity()
43/// ```
44pub struct LexBuffer {
45 // C: char *buffer; size_t n; size_t buffsize;
46 buffer: Vec<u8>,
47}
48
49impl LexBuffer {
50 // C: #define luaZ_initbuffer(L, buff) ((buff)->buffer = NULL, (buff)->buffsize = 0)
51 // macros.tsv: luaZ_initbuffer → buf.init() (most call sites just construct)
52 /// Construct an empty `LexBuffer`. Corresponds to the `luaZ_initbuffer` macro.
53 pub fn new() -> Self {
54 LexBuffer { buffer: Vec::new() }
55 }
56
57 // C: #define luaZ_buffer(buff) ((buff)->buffer)
58 // macros.tsv: luaZ_buffer → buf.as_mut_slice()
59 /// Return the buffer contents as a mutable byte slice.
60 pub fn as_mut_slice(&mut self) -> &mut [u8] {
61 &mut self.buffer
62 }
63
64 // C: #define luaZ_sizebuffer(buff) ((buff)->buffsize)
65 // macros.tsv: luaZ_sizebuffer → buf.capacity()
66 /// Return the buffer's current allocation capacity in bytes.
67 pub fn capacity(&self) -> usize {
68 self.buffer.capacity()
69 }
70
71 // C: #define luaZ_bufflen(buff) ((buff)->n)
72 // macros.tsv: luaZ_bufflen → buf.len()
73 /// Return the number of valid bytes currently stored in the buffer.
74 pub fn len(&self) -> usize {
75 self.buffer.len()
76 }
77
78 // C: #define luaZ_buffremove(buff, i) ((buff)->n -= (i))
79 // macros.tsv: luaZ_buffremove → buf.truncate_by(i)
80 /// Shorten the live contents by `i` bytes without releasing capacity.
81 pub fn truncate_by(&mut self, i: usize) {
82 let new_len = self.buffer.len().saturating_sub(i);
83 self.buffer.truncate(new_len);
84 }
85
86 // C: #define luaZ_resetbuffer(buff) ((buff)->n = 0)
87 // macros.tsv: luaZ_resetbuffer → buf.clear()
88 /// Reset the live length to zero without releasing capacity.
89 pub fn clear(&mut self) {
90 self.buffer.clear();
91 }
92
93 // C: #define luaZ_resizebuffer(L, buff, size) \
94 // ((buff)->buffer = luaM_reallocvchar(L, (buff)->buffer, \
95 // (buff)->buffsize, size), \
96 // (buff)->buffsize = size)
97 // macros.tsv: luaZ_resizebuffer → buf.resize(state, size)?
98 /// Resize the buffer to exactly `size` bytes, filling new bytes with `0`.
99 ///
100 /// Returns `Err(LuaError::Memory)` on allocation failure.
101 ///
102 /// PORT NOTE: the C macro routes through `luaM_reallocvchar` and Lua's
103 /// custom allocator. Phase A uses `Vec::resize` with Rust's global
104 /// allocator; OOM propagation via the custom allocator is a Phase D concern.
105 // PERF(port): luaM_reallocvchar — Vec::resize may over-allocate relative
106 // to the exact-fit C behaviour; profile in Phase B.
107 pub fn resize(&mut self, _state: &mut LuaState, size: usize) -> Result<(), LuaError> {
108 self.buffer.resize(size, 0u8);
109 Ok(())
110 }
111
112 // C: #define luaZ_freebuffer(L, buff) luaZ_resizebuffer(L, buff, 0)
113 // macros.tsv: luaZ_freebuffer → (Rust Drop handles deallocation; drop the call)
114 // PORT NOTE: `Drop for Vec` releases the heap allocation automatically.
115 // Call sites that use `luaZ_freebuffer` can simply let the `LexBuffer` drop.
116}
117
118impl Default for LexBuffer {
119 fn default() -> Self {
120 Self::new()
121 }
122}
123
124// ── ZIO (buffered input stream) ────────────────────────────────────────────────
125
126/// Buffered input stream wrapping an external chunk-reader callback.
127///
128/// Corresponds to `struct Zio` / `ZIO` in `lzio.h`. The C struct stored a
129/// `lua_State *L` back-pointer and a `void *data` opaque pointer alongside a
130/// raw `lua_Reader` function pointer. In Rust:
131///
132/// - `lua_State *L` is removed from the struct; callers hold `&mut LuaState`
133/// directly and pass it to fallible methods (per types.tsv).
134/// - `void *data` is folded into the reader closure (per types.tsv).
135/// - `const char *p` (raw pointer into the reader's internal buffer) becomes a
136/// `usize` index into the owned `current_chunk` field.
137///
138/// # C mapping (types.tsv)
139/// ```text
140/// Zio → ZIO
141/// .n → usize (bytes still unread in current_chunk)
142/// .p → usize (cursor index; was const char *)
143/// .reader+.data → Box<dyn FnMut() -> Option<Vec<u8>>> (combined)
144/// .L → removed; callers pass &mut LuaState to methods
145/// ```
146///
147/// PORT NOTE: The types.tsv entry for `Zio.reader` lists
148/// `Box<dyn FnMut() -> Option<&[u8]>>`, but `&[u8]` cannot name a lifetime
149/// in a `dyn Fn` trait object without HRTB and a pinned source. Phase A uses
150/// `Option<Vec<u8>>` instead; the reader returns an owned chunk. Phase B
151/// should evaluate whether a zero-copy `&[u8]` path is achievable (e.g. by
152/// making the reader hold a pinned internal buffer and returning a slice into
153/// it via HRTB).
154pub struct ZIO {
155 // C: size_t n; /* bytes still unread */
156 n: usize,
157 // C: const char *p; /* current position in buffer */
158 // PORT NOTE: raw pointer replaced by index into `current_chunk`.
159 p: usize,
160 // C: lua_Reader reader; void *data;
161 // PORT NOTE: C reader function pointer + void *data collapsed into one
162 // closure; lua_State *L removed from the struct per types.tsv.
163 // TODO(port): decide in Phase B whether concrete readers need
164 // `&mut LuaState` as a parameter (e.g. for lapi's load callbacks that
165 // may call into Lua). If so, the signature becomes
166 // `Box<dyn FnMut(&mut LuaState) -> Option<Vec<u8>>>` and fill/read must
167 // thread state through.
168 reader: Box<dyn FnMut() -> Option<Vec<u8>>>,
169 // Owned current chunk returned by the reader. Not present as a separate
170 // field in C (C held a raw pointer into the reader's own internal buffer).
171 current_chunk: Vec<u8>,
172}
173
174impl ZIO {
175 // C: LUAI_FUNC void luaZ_init(lua_State *L, ZIO *z, lua_Reader reader, void *data)
176 // macros.tsv: LUAI_FUNC → pub(crate)
177 /// Initialise a `ZIO` with the given reader callback.
178 ///
179 /// Corresponds to `luaZ_init` in `lzio.c`. The C parameters `reader` and
180 /// `data` are combined into a single closure; `L` is no longer stored.
181 ///
182 /// # C source
183 /// ```c
184 /// // C: void luaZ_init(lua_State *L, ZIO *z, lua_Reader reader, void *data) {
185 /// // z->L = L;
186 /// // z->reader = reader;
187 /// // z->data = data;
188 /// // z->n = 0;
189 /// // z->p = NULL;
190 /// // }
191 /// ```
192 pub(crate) fn new(reader: Box<dyn FnMut() -> Option<Vec<u8>>>) -> Self {
193 ZIO {
194 n: 0,
195 p: 0,
196 current_chunk: Vec::new(),
197 reader,
198 }
199 }
200
201 // C: LUAI_FUNC int luaZ_fill(ZIO *z)
202 // macros.tsv: LUAI_FUNC → pub(crate)
203 /// Refill the internal buffer by invoking the reader callback; return the
204 /// first byte of the new chunk as an `i32`, or [`EOZ`] if no more data is
205 /// available.
206 ///
207 /// # C source
208 /// ```c
209 /// // C: int luaZ_fill(ZIO *z) {
210 /// // size_t size;
211 /// // lua_State *L = z->L;
212 /// // const char *buff;
213 /// // lua_unlock(L);
214 /// // buff = z->reader(L, z->data, &size);
215 /// // lua_lock(L);
216 /// // if (buff == NULL || size == 0)
217 /// // return EOZ;
218 /// // z->n = size - 1; /* discount char being returned */
219 /// // z->p = buff;
220 /// // return cast_uchar(*(z->p++));
221 /// // }
222 /// ```
223 ///
224 /// PORT NOTE: `lua_unlock`/`lua_lock` are no-ops in the default build and
225 /// are dropped per macros.tsv. `cast_uchar` → `as u8` per macros.tsv.
226 pub(crate) fn fill(&mut self) -> i32 {
227 // C: lua_unlock(L); -- no-op per macros.tsv; dropped
228 // C: buff = z->reader(L, z->data, &size);
229 let chunk_opt = (self.reader)();
230 // C: lua_lock(L); -- no-op per macros.tsv; dropped
231
232 match chunk_opt {
233 // C: if (buff == NULL || size == 0) return EOZ;
234 None => EOZ,
235 Some(chunk) if chunk.is_empty() => EOZ,
236 Some(chunk) => {
237 // C: z->n = size - 1; /* discount char being returned */
238 self.n = chunk.len() - 1;
239 // C: z->p = buff; (reset cursor to start of new chunk)
240 self.current_chunk = chunk;
241 self.p = 0;
242 // C: return cast_uchar(*(z->p++));
243 // cast_uchar → as u8 per macros.tsv
244 let byte = self.current_chunk[self.p] as u8;
245 self.p += 1;
246 byte as i32
247 }
248 }
249 }
250
251 // C: #define zgetc(z) (((z)->n--)>0 ? cast_uchar(*(z)->p++) : luaZ_fill(z))
252 // macros.tsv: zgetc → z.getc() returning i32 (next byte or EOZ)
253 /// Return the next byte from the stream as an `i32`, or [`EOZ`] at
254 /// end-of-stream.
255 ///
256 /// This is the hot-path inline method corresponding to the `zgetc` macro.
257 /// When bytes remain in the current chunk no allocation occurs.
258 ///
259 /// # C source (macro)
260 /// ```c
261 /// // C: #define zgetc(z) (((z)->n--)>0 ? cast_uchar(*(z)->p++) : luaZ_fill(z))
262 /// ```
263 ///
264 /// PORT NOTE: The C macro uses `(z)->n-- > 0` which reads n, tests it, then
265 /// decrements. When n == 0 the test is false (0 > 0) so fill is called
266 /// without decrementing. The Rust translation preserves this: `if self.n > 0`
267 /// followed by an explicit `self.n -= 1`.
268 #[inline]
269 pub(crate) fn getc(&mut self) -> i32 {
270 // C: ((z)->n--)>0 — if n is non-zero consume one byte from the chunk
271 if self.n > 0 {
272 self.n -= 1;
273 // C: cast_uchar(*(z)->p++) — read the byte then advance the cursor
274 let byte = self.current_chunk[self.p] as u8;
275 self.p += 1;
276 byte as i32
277 } else {
278 // C: luaZ_fill(z) — buffer exhausted; fetch the next chunk
279 self.fill()
280 }
281 }
282
283 // C: LUAI_FUNC size_t luaZ_read(ZIO *z, void *b, size_t n)
284 // macros.tsv: LUAI_FUNC → pub(crate)
285 /// Read exactly `buf.len()` bytes into `buf`.
286 ///
287 /// Returns the number of bytes that could **not** be read: `0` means
288 /// complete success; a non-zero value means end-of-stream was reached with
289 /// that many bytes still outstanding.
290 ///
291 /// # C source
292 /// ```c
293 /// // C: size_t luaZ_read(ZIO *z, void *b, size_t n) {
294 /// // while (n) {
295 /// // size_t m;
296 /// // if (z->n == 0) { /* no bytes in buffer? */
297 /// // if (luaZ_fill(z) == EOZ) /* try to read more */
298 /// // return n; /* no more input; return number of missing bytes */
299 /// // else {
300 /// // z->n++; /* luaZ_fill consumed first byte; put it back */
301 /// // z->p--;
302 /// // }
303 /// // }
304 /// // m = (n <= z->n) ? n : z->n; /* min. between n and z->n */
305 /// // memcpy(b, z->p, m);
306 /// // z->n -= m;
307 /// // z->p += m;
308 /// // b = (char *)b + m;
309 /// // n -= m;
310 /// // }
311 /// // return 0;
312 /// // }
313 /// ```
314 ///
315 /// PORT NOTE: C's `void *b` + explicit `n` become Rust's `&mut [u8]`, whose
316 /// length encodes the requested byte count. `memcpy` becomes
317 /// `copy_from_slice`. The advancing pointer `b = (char *)b + m` is
318 /// replaced by a `dst` index into `buf`.
319 pub(crate) fn read(&mut self, buf: &mut [u8]) -> usize {
320 // C: n (number of bytes still needed)
321 let mut remaining = buf.len();
322 // C: b (advancing output pointer) — represented as an index
323 let mut dst: usize = 0;
324
325 while remaining > 0 {
326 // C: if (z->n == 0) { /* no bytes in buffer? */
327 if self.n == 0 {
328 // C: if (luaZ_fill(z) == EOZ)
329 if self.fill() == EOZ {
330 // C: return n; /* no more input; return number of missing bytes */
331 return remaining;
332 } else {
333 // C: z->n++; /* luaZ_fill consumed first byte; put it back */
334 // C: z->p--;
335 // fill() advanced p by 1 and set n = chunk.len() - 1.
336 // Undoing that makes the whole chunk available to the
337 // copy loop below.
338 self.n += 1;
339 self.p -= 1;
340 }
341 }
342
343 // C: m = (n <= z->n) ? n : z->n; /* min. between n and z->n */
344 let m = remaining.min(self.n);
345
346 // C: memcpy(b, z->p, m);
347 buf[dst..dst + m]
348 .copy_from_slice(&self.current_chunk[self.p..self.p + m]);
349
350 // C: z->n -= m; z->p += m;
351 self.n -= m;
352 self.p += m;
353
354 // C: b = (char *)b + m; n -= m;
355 dst += m;
356 remaining -= m;
357 }
358
359 // C: return 0;
360 0
361 }
362}
363
364// ──────────────────────────────────────────────────────────────────────────────
365// PORT STATUS
366// source: src/lzio.c (68 lines, 3 functions)
367// src/lzio.h (66 lines, merged)
368// target_crate: lua-vm
369// confidence: medium
370// todos: 1
371// port_notes: 4
372// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
373// notes: Logic is faithful. The one open question (TODO) is whether
374// concrete reader callbacks will need `&mut LuaState` as a
375// parameter when load/dofile lands in Phase B. If so,
376// `ZIO::reader`, `fill`, `getc`, and `read` all need a
377// threading change. `LexBuffer::resize` stubs OOM handling
378// (real allocator wiring is Phase D). Import paths for
379// `LuaState` and `LuaError` will require crate-graph fixes
380// in Phase B.
381// ──────────────────────────────────────────────────────────────────────────────