Skip to main content

lua_vm/
zio.rs

1//! Buffered streams — Rust port of `lzio.c` + `lzio.h`.
2//!
3//! Provides two public types:
4//! - [`ZIO`]: a read cursor wrapping an external chunk-supplier callback.
5//! - [`LexBuffer`]: a growable `Vec<u8>` byte buffer with the named interface
6//!   that C code accessed through the `luaZ_*buffer*` macro family.
7//!
8//! The lzio header is merged here per PORTING.md §1 ("Headers merge into the
9//! consuming `.rs`").  All macros defined in `lzio.h` are translated at their
10//! call sites and collected as methods or constants in this module.
11//!
12//! # C source files
13//! - `reference/lua-5.4.7/src/lzio.c`  (68 lines, 3 functions)
14//! - `reference/lua-5.4.7/src/lzio.h`  (66 lines, struct + macros; merged)
15
16// TODO(port): import path for LuaState will need adjustment once the
17// crate-internal module graph is settled in Phase B.  Using a local path
18// for now; may become `use lua_types::state::LuaState` or similar.
19use crate::state::LuaState;
20use lua_types::error::LuaError;
21
22// ── Constants ──────────────────────────────────────────────────────────────────
23
24// macros.tsv: EOZ → const EOZ: i32 = -1
25/// End-of-stream sentinel returned by [`ZIO::getc`] and [`ZIO::fill`].
26pub(crate) const EOZ: i32 = -1;
27
28// ── LexBuffer (was Mbuffer in C) ───────────────────────────────────────────────
29
30/// Growable byte buffer used by the lexer for token text accumulation.
31///
32/// Corresponds to `Mbuffer` in `lzio.h`.  The C struct tracked `buffer`,
33/// `n` (used length), and `buffsize` (allocated capacity) as three separate
34/// fields with manual realloc.  In Rust all three are implicit in `Vec<u8>`.
35///
36/// # C mapping (types.tsv)
37/// ```text
38/// Mbuffer     → LexBuffer
39/// .buffer     → Vec<u8>   (heap storage)
40/// .n          → Vec::len()
41/// .buffsize   → Vec::capacity()
42/// ```
43pub struct LexBuffer {
44    buffer: Vec<u8>,
45}
46
47impl LexBuffer {
48    // macros.tsv: luaZ_initbuffer → buf.init()  (most call sites just construct)
49    /// Construct an empty `LexBuffer`.  Corresponds to the `luaZ_initbuffer` macro.
50    pub fn new() -> Self {
51        LexBuffer { buffer: Vec::new() }
52    }
53
54    // macros.tsv: luaZ_buffer → buf.as_mut_slice()
55    /// Return the buffer contents as a mutable byte slice.
56    pub fn as_mut_slice(&mut self) -> &mut [u8] {
57        &mut self.buffer
58    }
59
60    // macros.tsv: luaZ_sizebuffer → buf.capacity()
61    /// Return the buffer's current allocation capacity in bytes.
62    pub fn capacity(&self) -> usize {
63        self.buffer.capacity()
64    }
65
66    // macros.tsv: luaZ_bufflen → buf.len()
67    /// Return the number of valid bytes currently stored in the buffer.
68    pub fn len(&self) -> usize {
69        self.buffer.len()
70    }
71
72    // macros.tsv: luaZ_buffremove → buf.truncate_by(i)
73    /// Shorten the live contents by `i` bytes without releasing capacity.
74    pub fn truncate_by(&mut self, i: usize) {
75        let new_len = self.buffer.len().saturating_sub(i);
76        self.buffer.truncate(new_len);
77    }
78
79    // macros.tsv: luaZ_resetbuffer → buf.clear()
80    /// Reset the live length to zero without releasing capacity.
81    pub fn clear(&mut self) {
82        self.buffer.clear();
83    }
84
85    //      ((buff)->buffer = luaM_reallocvchar(L, (buff)->buffer, \
86    //                          (buff)->buffsize, size), \
87    //       (buff)->buffsize = size)
88    // macros.tsv: luaZ_resizebuffer → buf.resize(state, size)?
89    /// Resize the buffer to exactly `size` bytes, filling new bytes with `0`.
90    ///
91    /// Returns `Err(LuaError::Memory)` on allocation failure.
92    ///
93    /// PORT NOTE: the C macro routes through `luaM_reallocvchar` and Lua's
94    /// custom allocator.  Phase A uses `Vec::resize` with Rust's global
95    /// allocator; OOM propagation via the custom allocator is a Phase D concern.
96    // PERF(port): luaM_reallocvchar — Vec::resize may over-allocate relative
97    // to the exact-fit C behaviour; profile in Phase B.
98    pub fn resize(&mut self, _state: &mut LuaState, size: usize) -> Result<(), LuaError> {
99        self.buffer.resize(size, 0u8);
100        Ok(())
101    }
102
103    // macros.tsv: luaZ_freebuffer → (Rust Drop handles deallocation; drop the call)
104    // PORT NOTE: `Drop for Vec` releases the heap allocation automatically.
105    // Call sites that use `luaZ_freebuffer` can simply let the `LexBuffer` drop.
106}
107
108impl Default for LexBuffer {
109    fn default() -> Self {
110        Self::new()
111    }
112}
113
114// ── ZIO (buffered input stream) ────────────────────────────────────────────────
115
116/// Buffered input stream wrapping an external chunk-reader callback.
117///
118/// Corresponds to `struct Zio` / `ZIO` in `lzio.h`.  The C struct stored a
119/// `lua_State *L` back-pointer and a `void *data` opaque pointer alongside a
120/// raw `lua_Reader` function pointer.  In Rust:
121///
122/// - `lua_State *L` is removed from the struct; callers hold `&mut LuaState`
123///   directly and pass it to fallible methods (per types.tsv).
124/// - `void *data` is folded into the reader closure (per types.tsv).
125/// - `const char *p` (raw pointer into the reader's internal buffer) becomes a
126///   `usize` index into the owned `current_chunk` field.
127///
128/// # C mapping (types.tsv)
129/// ```text
130/// Zio           → ZIO
131/// .n            → usize         (bytes still unread in current_chunk)
132/// .p            → usize         (cursor index; was const char *)
133/// .reader+.data → Box<dyn FnMut() -> Option<Vec<u8>>>  (combined)
134/// .L            → removed; callers pass &mut LuaState to methods
135/// ```
136///
137/// PORT NOTE: The types.tsv entry for `Zio.reader` lists
138/// `Box<dyn FnMut() -> Option<&[u8]>>`, but `&[u8]` cannot name a lifetime
139/// in a `dyn Fn` trait object without HRTB and a pinned source.  Phase A uses
140/// `Option<Vec<u8>>` instead; the reader returns an owned chunk.  Phase B
141/// should evaluate whether a zero-copy `&[u8]` path is achievable (e.g. by
142/// making the reader hold a pinned internal buffer and returning a slice into
143/// it via HRTB).
144pub struct ZIO {
145    n: usize,
146    // PORT NOTE: raw pointer replaced by index into `current_chunk`.
147    p: usize,
148    // PORT NOTE: C reader function pointer + void *data collapsed into one
149    // closure; lua_State *L removed from the struct per types.tsv.
150    // TODO(port): decide in Phase B whether concrete readers need
151    // `&mut LuaState` as a parameter (e.g. for lapi's load callbacks that
152    // may call into Lua).  If so, the signature becomes
153    // `Box<dyn FnMut(&mut LuaState) -> Option<Vec<u8>>>` and fill/read must
154    // thread state through.
155    reader: Box<dyn FnMut() -> Option<Vec<u8>>>,
156    // Owned current chunk returned by the reader.  Not present as a separate
157    // field in C (C held a raw pointer into the reader's own internal buffer).
158    current_chunk: Vec<u8>,
159}
160
161impl ZIO {
162    // macros.tsv: LUAI_FUNC → pub(crate)
163    /// Initialise a `ZIO` with the given reader callback.
164    ///
165    /// Corresponds to `luaZ_init` in `lzio.c`.  The C parameters `reader` and
166    /// `data` are combined into a single closure; `L` is no longer stored.
167    ///
168    /// # C source
169    /// ```c
170    ///
171    /// //   z->L = L;
172    /// //   z->reader = reader;
173    /// //   z->data = data;
174    /// //   z->n = 0;
175    /// //   z->p = NULL;
176    /// // }
177    /// ```
178    pub(crate) fn new(reader: Box<dyn FnMut() -> Option<Vec<u8>>>) -> Self {
179        ZIO {
180            n: 0,
181            p: 0,
182            current_chunk: Vec::new(),
183            reader,
184        }
185    }
186
187    // macros.tsv: LUAI_FUNC → pub(crate)
188    /// Refill the internal buffer by invoking the reader callback; return the
189    /// first byte of the new chunk as an `i32`, or [`EOZ`] if no more data is
190    /// available.
191    ///
192    /// # C source
193    /// ```c
194    ///
195    /// //   size_t size;
196    /// //   lua_State *L = z->L;
197    /// //   const char *buff;
198    /// //   lua_unlock(L);
199    /// //   buff = z->reader(L, z->data, &size);
200    /// //   lua_lock(L);
201    /// //   if (buff == NULL || size == 0)
202    /// //     return EOZ;
203    /// //   z->n = size - 1;  /* discount char being returned */
204    /// //   z->p = buff;
205    /// //   return cast_uchar(*(z->p++));
206    /// // }
207    /// ```
208    ///
209    /// PORT NOTE: `lua_unlock`/`lua_lock` are no-ops in the default build and
210    /// are dropped per macros.tsv.  `cast_uchar` → `as u8` per macros.tsv.
211    pub(crate) fn fill(&mut self) -> i32 {
212        let chunk_opt = (self.reader)();
213
214        match chunk_opt {
215            None => EOZ,
216            Some(chunk) if chunk.is_empty() => EOZ,
217            Some(chunk) => {
218                self.n = chunk.len() - 1;
219                self.current_chunk = chunk;
220                self.p = 0;
221                // cast_uchar → as u8  per macros.tsv
222                let byte = self.current_chunk[self.p] as u8;
223                self.p += 1;
224                byte as i32
225            }
226        }
227    }
228
229    // macros.tsv: zgetc → z.getc()  returning i32 (next byte or EOZ)
230    /// Return the next byte from the stream as an `i32`, or [`EOZ`] at
231    /// end-of-stream.
232    ///
233    /// This is the hot-path inline method corresponding to the `zgetc` macro.
234    /// When bytes remain in the current chunk no allocation occurs.
235    ///
236    /// # C source (macro)
237    /// ```c
238    ///
239    /// ```
240    ///
241    /// PORT NOTE: The C macro uses `(z)->n-- > 0` which reads n, tests it, then
242    /// decrements.  When n == 0 the test is false (0 > 0) so fill is called
243    /// without decrementing.  The Rust translation preserves this: `if self.n > 0`
244    /// followed by an explicit `self.n -= 1`.
245    #[inline]
246    pub(crate) fn getc(&mut self) -> i32 {
247        if self.n > 0 {
248            self.n -= 1;
249            let byte = self.current_chunk[self.p] as u8;
250            self.p += 1;
251            byte as i32
252        } else {
253            self.fill()
254        }
255    }
256
257    // macros.tsv: LUAI_FUNC → pub(crate)
258    /// Read exactly `buf.len()` bytes into `buf`.
259    ///
260    /// Returns the number of bytes that could **not** be read: `0` means
261    /// complete success; a non-zero value means end-of-stream was reached with
262    /// that many bytes still outstanding.
263    ///
264    /// # C source
265    /// ```c
266    ///
267    /// //   while (n) {
268    /// //     size_t m;
269    /// //     if (z->n == 0) {  /* no bytes in buffer? */
270    /// //       if (luaZ_fill(z) == EOZ)  /* try to read more */
271    /// //         return n;  /* no more input; return number of missing bytes */
272    /// //       else {
273    /// //         z->n++;  /* luaZ_fill consumed first byte; put it back */
274    /// //         z->p--;
275    /// //       }
276    /// //     }
277    /// //     m = (n <= z->n) ? n : z->n;  /* min. between n and z->n */
278    /// //     memcpy(b, z->p, m);
279    /// //     z->n -= m;
280    /// //     z->p += m;
281    /// //     b = (char *)b + m;
282    /// //     n -= m;
283    /// //   }
284    /// //   return 0;
285    /// // }
286    /// ```
287    ///
288    /// PORT NOTE: C's `void *b` + explicit `n` become Rust's `&mut [u8]`, whose
289    /// length encodes the requested byte count.  `memcpy` becomes
290    /// `copy_from_slice`.  The advancing pointer `b = (char *)b + m` is
291    /// replaced by a `dst` index into `buf`.
292    pub(crate) fn read(&mut self, buf: &mut [u8]) -> usize {
293        let mut remaining = buf.len();
294        let mut dst: usize = 0;
295
296        while remaining > 0 {
297            if self.n == 0 {
298                if self.fill() == EOZ {
299                    return remaining;
300                } else {
301                    // fill() advanced p by 1 and set n = chunk.len() - 1.
302                    // Undoing that makes the whole chunk available to the
303                    // copy loop below.
304                    self.n += 1;
305                    self.p -= 1;
306                }
307            }
308
309            let m = remaining.min(self.n);
310
311            buf[dst..dst + m]
312                .copy_from_slice(&self.current_chunk[self.p..self.p + m]);
313
314            self.n -= m;
315            self.p += m;
316
317            dst += m;
318            remaining -= m;
319        }
320
321        0
322    }
323}
324
325// ──────────────────────────────────────────────────────────────────────────────
326// PORT STATUS
327//   source:        src/lzio.c  (68 lines, 3 functions)
328//                  src/lzio.h  (66 lines, merged)
329//   target_crate:  lua-vm
330//   confidence:    medium
331//   todos:         1
332//   port_notes:    4
333//   unsafe_blocks: 0   (must be 0 outside explicit unsafe-budget crates)
334//   notes:         Logic is faithful.  The one open question (TODO) is whether
335//                  concrete reader callbacks will need `&mut LuaState` as a
336//                  parameter when load/dofile lands in Phase B.  If so,
337//                  `ZIO::reader`, `fill`, `getc`, and `read` all need a
338//                  threading change.  `LexBuffer::resize` stubs OOM handling
339//                  (real allocator wiring is Phase D).  Import paths for
340//                  `LuaState` and `LuaError` will require crate-graph fixes
341//                  in Phase B.
342// ──────────────────────────────────────────────────────────────────────────────