Skip to main content

lua_vm/
zio.rs

1//! Buffered streams — Rust port of `lzio.c` + `lzio.h`.
2//!
3//! Provides two public types:
4//! - [`ZIO`]: a read cursor wrapping an external chunk-supplier callback.
5//! - [`LexBuffer`]: a growable `Vec<u8>` byte buffer with the named interface
6//!   that C code accessed through the `luaZ_*buffer*` macro family.
7//!
8//! The lzio header is merged here per PORTING.md §1 ("Headers merge into the
9//! consuming `.rs`").  All macros defined in `lzio.h` are translated at their
10//! call sites and collected as methods or constants in this module.
11//!
12//! # C source files
13//! - `reference/lua-5.4.7/src/lzio.c`  (68 lines, 3 functions)
14//! - `reference/lua-5.4.7/src/lzio.h`  (66 lines, struct + macros; merged)
15
16// TODO(port): import path for LuaState will need adjustment once the
17// crate-internal module graph is settled in Phase B.  Using a local path
18// for now; may become `use lua_types::state::LuaState` or similar.
19use crate::state::LuaState;
20use lua_types::error::LuaError;
21
22// ── Constants ──────────────────────────────────────────────────────────────────
23
24// C: #define EOZ  (-1)  /* end of stream */
25// macros.tsv: EOZ → const EOZ: i32 = -1
26/// End-of-stream sentinel returned by [`ZIO::getc`] and [`ZIO::fill`].
27pub(crate) const EOZ: i32 = -1;
28
29// ── LexBuffer (was Mbuffer in C) ───────────────────────────────────────────────
30
31/// Growable byte buffer used by the lexer for token text accumulation.
32///
33/// Corresponds to `Mbuffer` in `lzio.h`.  The C struct tracked `buffer`,
34/// `n` (used length), and `buffsize` (allocated capacity) as three separate
35/// fields with manual realloc.  In Rust all three are implicit in `Vec<u8>`.
36///
37/// # C mapping (types.tsv)
38/// ```text
39/// Mbuffer     → LexBuffer
40/// .buffer     → Vec<u8>   (heap storage)
41/// .n          → Vec::len()
42/// .buffsize   → Vec::capacity()
43/// ```
44pub struct LexBuffer {
45    // C: char *buffer;  size_t n;  size_t buffsize;
46    buffer: Vec<u8>,
47}
48
49impl LexBuffer {
50    // C: #define luaZ_initbuffer(L, buff) ((buff)->buffer = NULL, (buff)->buffsize = 0)
51    // macros.tsv: luaZ_initbuffer → buf.init()  (most call sites just construct)
52    /// Construct an empty `LexBuffer`.  Corresponds to the `luaZ_initbuffer` macro.
53    pub fn new() -> Self {
54        LexBuffer { buffer: Vec::new() }
55    }
56
57    // C: #define luaZ_buffer(buff)  ((buff)->buffer)
58    // macros.tsv: luaZ_buffer → buf.as_mut_slice()
59    /// Return the buffer contents as a mutable byte slice.
60    pub fn as_mut_slice(&mut self) -> &mut [u8] {
61        &mut self.buffer
62    }
63
64    // C: #define luaZ_sizebuffer(buff)  ((buff)->buffsize)
65    // macros.tsv: luaZ_sizebuffer → buf.capacity()
66    /// Return the buffer's current allocation capacity in bytes.
67    pub fn capacity(&self) -> usize {
68        self.buffer.capacity()
69    }
70
71    // C: #define luaZ_bufflen(buff)  ((buff)->n)
72    // macros.tsv: luaZ_bufflen → buf.len()
73    /// Return the number of valid bytes currently stored in the buffer.
74    pub fn len(&self) -> usize {
75        self.buffer.len()
76    }
77
78    // C: #define luaZ_buffremove(buff, i)  ((buff)->n -= (i))
79    // macros.tsv: luaZ_buffremove → buf.truncate_by(i)
80    /// Shorten the live contents by `i` bytes without releasing capacity.
81    pub fn truncate_by(&mut self, i: usize) {
82        let new_len = self.buffer.len().saturating_sub(i);
83        self.buffer.truncate(new_len);
84    }
85
86    // C: #define luaZ_resetbuffer(buff)  ((buff)->n = 0)
87    // macros.tsv: luaZ_resetbuffer → buf.clear()
88    /// Reset the live length to zero without releasing capacity.
89    pub fn clear(&mut self) {
90        self.buffer.clear();
91    }
92
93    // C: #define luaZ_resizebuffer(L, buff, size) \
94    //      ((buff)->buffer = luaM_reallocvchar(L, (buff)->buffer, \
95    //                          (buff)->buffsize, size), \
96    //       (buff)->buffsize = size)
97    // macros.tsv: luaZ_resizebuffer → buf.resize(state, size)?
98    /// Resize the buffer to exactly `size` bytes, filling new bytes with `0`.
99    ///
100    /// Returns `Err(LuaError::Memory)` on allocation failure.
101    ///
102    /// PORT NOTE: the C macro routes through `luaM_reallocvchar` and Lua's
103    /// custom allocator.  Phase A uses `Vec::resize` with Rust's global
104    /// allocator; OOM propagation via the custom allocator is a Phase D concern.
105    // PERF(port): luaM_reallocvchar — Vec::resize may over-allocate relative
106    // to the exact-fit C behaviour; profile in Phase B.
107    pub fn resize(&mut self, _state: &mut LuaState, size: usize) -> Result<(), LuaError> {
108        self.buffer.resize(size, 0u8);
109        Ok(())
110    }
111
112    // C: #define luaZ_freebuffer(L, buff)  luaZ_resizebuffer(L, buff, 0)
113    // macros.tsv: luaZ_freebuffer → (Rust Drop handles deallocation; drop the call)
114    // PORT NOTE: `Drop for Vec` releases the heap allocation automatically.
115    // Call sites that use `luaZ_freebuffer` can simply let the `LexBuffer` drop.
116}
117
118impl Default for LexBuffer {
119    fn default() -> Self {
120        Self::new()
121    }
122}
123
124// ── ZIO (buffered input stream) ────────────────────────────────────────────────
125
126/// Buffered input stream wrapping an external chunk-reader callback.
127///
128/// Corresponds to `struct Zio` / `ZIO` in `lzio.h`.  The C struct stored a
129/// `lua_State *L` back-pointer and a `void *data` opaque pointer alongside a
130/// raw `lua_Reader` function pointer.  In Rust:
131///
132/// - `lua_State *L` is removed from the struct; callers hold `&mut LuaState`
133///   directly and pass it to fallible methods (per types.tsv).
134/// - `void *data` is folded into the reader closure (per types.tsv).
135/// - `const char *p` (raw pointer into the reader's internal buffer) becomes a
136///   `usize` index into the owned `current_chunk` field.
137///
138/// # C mapping (types.tsv)
139/// ```text
140/// Zio           → ZIO
141/// .n            → usize         (bytes still unread in current_chunk)
142/// .p            → usize         (cursor index; was const char *)
143/// .reader+.data → Box<dyn FnMut() -> Option<Vec<u8>>>  (combined)
144/// .L            → removed; callers pass &mut LuaState to methods
145/// ```
146///
147/// PORT NOTE: The types.tsv entry for `Zio.reader` lists
148/// `Box<dyn FnMut() -> Option<&[u8]>>`, but `&[u8]` cannot name a lifetime
149/// in a `dyn Fn` trait object without HRTB and a pinned source.  Phase A uses
150/// `Option<Vec<u8>>` instead; the reader returns an owned chunk.  Phase B
151/// should evaluate whether a zero-copy `&[u8]` path is achievable (e.g. by
152/// making the reader hold a pinned internal buffer and returning a slice into
153/// it via HRTB).
154pub struct ZIO {
155    // C: size_t n;  /* bytes still unread */
156    n: usize,
157    // C: const char *p;  /* current position in buffer */
158    // PORT NOTE: raw pointer replaced by index into `current_chunk`.
159    p: usize,
160    // C: lua_Reader reader;  void *data;
161    // PORT NOTE: C reader function pointer + void *data collapsed into one
162    // closure; lua_State *L removed from the struct per types.tsv.
163    // TODO(port): decide in Phase B whether concrete readers need
164    // `&mut LuaState` as a parameter (e.g. for lapi's load callbacks that
165    // may call into Lua).  If so, the signature becomes
166    // `Box<dyn FnMut(&mut LuaState) -> Option<Vec<u8>>>` and fill/read must
167    // thread state through.
168    reader: Box<dyn FnMut() -> Option<Vec<u8>>>,
169    // Owned current chunk returned by the reader.  Not present as a separate
170    // field in C (C held a raw pointer into the reader's own internal buffer).
171    current_chunk: Vec<u8>,
172}
173
174impl ZIO {
175    // C: LUAI_FUNC void luaZ_init(lua_State *L, ZIO *z, lua_Reader reader, void *data)
176    // macros.tsv: LUAI_FUNC → pub(crate)
177    /// Initialise a `ZIO` with the given reader callback.
178    ///
179    /// Corresponds to `luaZ_init` in `lzio.c`.  The C parameters `reader` and
180    /// `data` are combined into a single closure; `L` is no longer stored.
181    ///
182    /// # C source
183    /// ```c
184    /// // C: void luaZ_init(lua_State *L, ZIO *z, lua_Reader reader, void *data) {
185    /// //   z->L = L;
186    /// //   z->reader = reader;
187    /// //   z->data = data;
188    /// //   z->n = 0;
189    /// //   z->p = NULL;
190    /// // }
191    /// ```
192    pub(crate) fn new(reader: Box<dyn FnMut() -> Option<Vec<u8>>>) -> Self {
193        ZIO {
194            n: 0,
195            p: 0,
196            current_chunk: Vec::new(),
197            reader,
198        }
199    }
200
201    // C: LUAI_FUNC int luaZ_fill(ZIO *z)
202    // macros.tsv: LUAI_FUNC → pub(crate)
203    /// Refill the internal buffer by invoking the reader callback; return the
204    /// first byte of the new chunk as an `i32`, or [`EOZ`] if no more data is
205    /// available.
206    ///
207    /// # C source
208    /// ```c
209    /// // C: int luaZ_fill(ZIO *z) {
210    /// //   size_t size;
211    /// //   lua_State *L = z->L;
212    /// //   const char *buff;
213    /// //   lua_unlock(L);
214    /// //   buff = z->reader(L, z->data, &size);
215    /// //   lua_lock(L);
216    /// //   if (buff == NULL || size == 0)
217    /// //     return EOZ;
218    /// //   z->n = size - 1;  /* discount char being returned */
219    /// //   z->p = buff;
220    /// //   return cast_uchar(*(z->p++));
221    /// // }
222    /// ```
223    ///
224    /// PORT NOTE: `lua_unlock`/`lua_lock` are no-ops in the default build and
225    /// are dropped per macros.tsv.  `cast_uchar` → `as u8` per macros.tsv.
226    pub(crate) fn fill(&mut self) -> i32 {
227        // C: lua_unlock(L);  -- no-op per macros.tsv; dropped
228        // C: buff = z->reader(L, z->data, &size);
229        let chunk_opt = (self.reader)();
230        // C: lua_lock(L);  -- no-op per macros.tsv; dropped
231
232        match chunk_opt {
233            // C: if (buff == NULL || size == 0) return EOZ;
234            None => EOZ,
235            Some(chunk) if chunk.is_empty() => EOZ,
236            Some(chunk) => {
237                // C: z->n = size - 1;  /* discount char being returned */
238                self.n = chunk.len() - 1;
239                // C: z->p = buff;   (reset cursor to start of new chunk)
240                self.current_chunk = chunk;
241                self.p = 0;
242                // C: return cast_uchar(*(z->p++));
243                // cast_uchar → as u8  per macros.tsv
244                let byte = self.current_chunk[self.p] as u8;
245                self.p += 1;
246                byte as i32
247            }
248        }
249    }
250
251    // C: #define zgetc(z)  (((z)->n--)>0 ?  cast_uchar(*(z)->p++) : luaZ_fill(z))
252    // macros.tsv: zgetc → z.getc()  returning i32 (next byte or EOZ)
253    /// Return the next byte from the stream as an `i32`, or [`EOZ`] at
254    /// end-of-stream.
255    ///
256    /// This is the hot-path inline method corresponding to the `zgetc` macro.
257    /// When bytes remain in the current chunk no allocation occurs.
258    ///
259    /// # C source (macro)
260    /// ```c
261    /// // C: #define zgetc(z)  (((z)->n--)>0 ?  cast_uchar(*(z)->p++) : luaZ_fill(z))
262    /// ```
263    ///
264    /// PORT NOTE: The C macro uses `(z)->n-- > 0` which reads n, tests it, then
265    /// decrements.  When n == 0 the test is false (0 > 0) so fill is called
266    /// without decrementing.  The Rust translation preserves this: `if self.n > 0`
267    /// followed by an explicit `self.n -= 1`.
268    #[inline]
269    pub(crate) fn getc(&mut self) -> i32 {
270        // C: ((z)->n--)>0  — if n is non-zero consume one byte from the chunk
271        if self.n > 0 {
272            self.n -= 1;
273            // C: cast_uchar(*(z)->p++)  — read the byte then advance the cursor
274            let byte = self.current_chunk[self.p] as u8;
275            self.p += 1;
276            byte as i32
277        } else {
278            // C: luaZ_fill(z)  — buffer exhausted; fetch the next chunk
279            self.fill()
280        }
281    }
282
283    // C: LUAI_FUNC size_t luaZ_read(ZIO *z, void *b, size_t n)
284    // macros.tsv: LUAI_FUNC → pub(crate)
285    /// Read exactly `buf.len()` bytes into `buf`.
286    ///
287    /// Returns the number of bytes that could **not** be read: `0` means
288    /// complete success; a non-zero value means end-of-stream was reached with
289    /// that many bytes still outstanding.
290    ///
291    /// # C source
292    /// ```c
293    /// // C: size_t luaZ_read(ZIO *z, void *b, size_t n) {
294    /// //   while (n) {
295    /// //     size_t m;
296    /// //     if (z->n == 0) {  /* no bytes in buffer? */
297    /// //       if (luaZ_fill(z) == EOZ)  /* try to read more */
298    /// //         return n;  /* no more input; return number of missing bytes */
299    /// //       else {
300    /// //         z->n++;  /* luaZ_fill consumed first byte; put it back */
301    /// //         z->p--;
302    /// //       }
303    /// //     }
304    /// //     m = (n <= z->n) ? n : z->n;  /* min. between n and z->n */
305    /// //     memcpy(b, z->p, m);
306    /// //     z->n -= m;
307    /// //     z->p += m;
308    /// //     b = (char *)b + m;
309    /// //     n -= m;
310    /// //   }
311    /// //   return 0;
312    /// // }
313    /// ```
314    ///
315    /// PORT NOTE: C's `void *b` + explicit `n` become Rust's `&mut [u8]`, whose
316    /// length encodes the requested byte count.  `memcpy` becomes
317    /// `copy_from_slice`.  The advancing pointer `b = (char *)b + m` is
318    /// replaced by a `dst` index into `buf`.
319    pub(crate) fn read(&mut self, buf: &mut [u8]) -> usize {
320        // C: n (number of bytes still needed)
321        let mut remaining = buf.len();
322        // C: b (advancing output pointer) — represented as an index
323        let mut dst: usize = 0;
324
325        while remaining > 0 {
326            // C: if (z->n == 0) {  /* no bytes in buffer? */
327            if self.n == 0 {
328                // C: if (luaZ_fill(z) == EOZ)
329                if self.fill() == EOZ {
330                    // C: return n;  /* no more input; return number of missing bytes */
331                    return remaining;
332                } else {
333                    // C: z->n++;  /* luaZ_fill consumed first byte; put it back */
334                    // C: z->p--;
335                    // fill() advanced p by 1 and set n = chunk.len() - 1.
336                    // Undoing that makes the whole chunk available to the
337                    // copy loop below.
338                    self.n += 1;
339                    self.p -= 1;
340                }
341            }
342
343            // C: m = (n <= z->n) ? n : z->n;  /* min. between n and z->n */
344            let m = remaining.min(self.n);
345
346            // C: memcpy(b, z->p, m);
347            buf[dst..dst + m]
348                .copy_from_slice(&self.current_chunk[self.p..self.p + m]);
349
350            // C: z->n -= m;  z->p += m;
351            self.n -= m;
352            self.p += m;
353
354            // C: b = (char *)b + m;  n -= m;
355            dst += m;
356            remaining -= m;
357        }
358
359        // C: return 0;
360        0
361    }
362}
363
364// ──────────────────────────────────────────────────────────────────────────────
365// PORT STATUS
366//   source:        src/lzio.c  (68 lines, 3 functions)
367//                  src/lzio.h  (66 lines, merged)
368//   target_crate:  lua-vm
369//   confidence:    medium
370//   todos:         1
371//   port_notes:    4
372//   unsafe_blocks: 0   (must be 0 outside explicit unsafe-budget crates)
373//   notes:         Logic is faithful.  The one open question (TODO) is whether
374//                  concrete reader callbacks will need `&mut LuaState` as a
375//                  parameter when load/dofile lands in Phase B.  If so,
376//                  `ZIO::reader`, `fill`, `getc`, and `read` all need a
377//                  threading change.  `LexBuffer::resize` stubs OOM handling
378//                  (real allocator wiring is Phase D).  Import paths for
379//                  `LuaState` and `LuaError` will require crate-graph fixes
380//                  in Phase B.
381// ──────────────────────────────────────────────────────────────────────────────