lua_lex/lib.rs
1//! Lexical analyzer — port of `llex.c` + `llex.h`.
2//!
3//! Provides the Lua 5.4 lexer: character-by-character scanning of a [`ZIO`]
4//! input stream into [`Token`] values, with one-token lookahead. The
5//! `llex.h` header is merged here per PORTING.md §1.
6//!
7//! # C source files
8//! - `reference/lua-5.4.7/src/llex.c` (581 lines, 24 functions)
9//! - `reference/lua-5.4.7/src/llex.h` (91 lines; merged here)
10//!
11//! # Design notes
12//! - `LexState.L` (back-pointer to `lua_State`) is removed. All functions
13//! that need `LuaState` receive it as `state: &mut LuaState`.
14//! - `Token.token` is `i32` in Phase A (matching the C `int token` field).
15//! Single-byte tokens are their ASCII values; reserved-word tokens start at
16//! `FIRST_RESERVED` (257). A proper `TokenKind` enum is deferred to Phase B.
17//! - `save` / `save_and_next` are now fallible (`Result<(), LuaError>`); the
18//! `?` operator replaces the C noreturn `lexerror` call on buffer overflow.
19//! - The `goto read_save / only_save / no_save` pattern in `read_string` is
20//! translated via the local `EscapeResult` enum.
21
22// TODO(port): resolve remaining cross-crate calls (intern_str, table anchor,
23// number parsing, utf8 encoding) in Phase B. Canonical cross-crate type
24// imports are now in place per harness/type-vocabulary.tsv (see below).
25
26use std::io::Write as IoWrite;
27
28// PORT NOTE: GcRef<T> = Rc<T> in Phases A–C; replaced by real GC pointer in Phase D.
29use lua_types::gc::GcRef;
30
31// Canonical cross-crate types: imported from owner crates per
32// harness/type-vocabulary.tsv. See PORTING.md §7.
33pub use lua_types::LuaError;
34pub use lua_types::LuaString;
35pub use lua_vm::state::LuaState;
36pub use lua_vm::table::LuaTable;
37
38/// Placeholder for `LexBuffer` from `lua_vm::zio`.
39/// TODO(port): replace with `use lua_vm::zio::LexBuffer` in Phase B.
40/// types.tsv: Mbuffer → LexBuffer
41pub struct LexBuffer {
42 buffer: Vec<u8>,
43}
44
45impl LexBuffer {
46 pub fn new() -> Self {
47 LexBuffer { buffer: Vec::new() }
48 }
49
50 /// macros.tsv: luaZ_bufflen → buf.len()
51 pub fn len(&self) -> usize {
52 self.buffer.len()
53 }
54
55 /// macros.tsv: luaZ_sizebuffer → buf.capacity()
56 pub fn capacity(&self) -> usize {
57 self.buffer.capacity()
58 }
59
60 /// macros.tsv: luaZ_buffer → buf.as_mut_slice()
61 pub fn as_slice(&self) -> &[u8] {
62 &self.buffer
63 }
64
65 /// macros.tsv: luaZ_resetbuffer → buf.clear()
66 pub fn clear(&mut self) {
67 self.buffer.clear();
68 }
69
70 /// macros.tsv: luaZ_buffremove → buf.truncate_by(i)
71 pub fn truncate_by(&mut self, i: usize) {
72 let new_len = self.buffer.len().saturating_sub(i);
73 self.buffer.truncate(new_len);
74 }
75
76 /// allocated capacity. In C this changes `buffsize`, not the live byte
77 /// count `n`. The Rust analogue therefore manipulates `Vec::capacity`,
78 /// never `Vec::len` (otherwise `push_byte` would write past the live
79 /// content and leave embedded zero padding inside the token text).
80 pub fn resize(&mut self, _state: &mut LuaState, size: usize) -> Result<(), LuaError> {
81 if size < self.buffer.len() {
82 self.buffer.truncate(size);
83 }
84 if size > self.buffer.capacity() {
85 let extra = size - self.buffer.capacity();
86 self.buffer.reserve_exact(extra);
87 }
88 Ok(())
89 }
90
91 /// Append one byte to the live contents. Panics if capacity exceeded
92 /// (callers must pre-check via `save`).
93 fn push_byte(&mut self, c: u8) {
94 self.buffer.push(c);
95 }
96}
97
98impl Default for LexBuffer {
99 fn default() -> Self {
100 Self::new()
101 }
102}
103
104/// Placeholder for `ZIO` from `lua_vm::zio`.
105/// TODO(port): replace with `use lua_vm::zio::ZIO` in Phase B.
106/// types.tsv: Zio → ZIO
107pub struct ZIO {
108 // TODO(port): full ZIO implementation lives in lua_vm::zio; this is a stub.
109 reader: Box<dyn FnMut() -> Option<Vec<u8>>>,
110 n: usize,
111 p: usize,
112 current_chunk: Vec<u8>,
113}
114
115impl ZIO {
116 /// Construct a ZIO from a reader callback that yields successive chunks.
117 pub fn new(reader: Box<dyn FnMut() -> Option<Vec<u8>>>) -> Self {
118 ZIO { reader, n: 0, p: 0, current_chunk: Vec::new() }
119 }
120
121 /// Construct a ZIO that yields the supplied bytes once and then EOZ.
122 pub fn from_bytes(bytes: Vec<u8>) -> Self {
123 let mut once = Some(bytes);
124 ZIO::new(Box::new(move || once.take()))
125 }
126
127 /// macros.tsv: zgetc → z.getc()
128 pub fn getc(&mut self) -> i32 {
129 if self.n > 0 {
130 self.n -= 1;
131 let b = self.current_chunk[self.p] as u8;
132 self.p += 1;
133 b as i32
134 } else {
135 self.fill()
136 }
137 }
138
139 fn fill(&mut self) -> i32 {
140 match (self.reader)() {
141 None => EOZ,
142 Some(chunk) if chunk.is_empty() => EOZ,
143 Some(chunk) => {
144 self.n = chunk.len() - 1;
145 self.current_chunk = chunk;
146 self.p = 0;
147 let b = self.current_chunk[self.p] as u8;
148 self.p += 1;
149 b as i32
150 }
151 }
152 }
153}
154
155// ── Constants ─────────────────────────────────────────────────────────────────
156
157// macros.tsv: FIRST_RESERVED → const FIRST_RESERVED: i32 = 257
158/// First token kind value that is not a single-byte character.
159/// Single-byte tokens are represented by their ASCII value (0-255).
160pub const FIRST_RESERVED: i32 = 257;
161
162// macros.tsv: LUA_ENV → const LUA_ENV: &[u8] = b"_ENV"
163/// Name of the global environment upvalue.
164pub const LUA_ENV: &[u8] = b"_ENV";
165
166// macros.tsv: NUM_RESERVED → const NUM_RESERVED: usize = (TK_WHILE - FIRST_RESERVED + 1) as usize
167/// Number of reserved words (keywords).
168pub const NUM_RESERVED: usize = (TK_WHILE - FIRST_RESERVED + 1) as usize;
169
170// macros.tsv: EOZ → const EOZ: i32 = -1
171/// End-of-stream sentinel returned by ZIO::getc.
172pub const EOZ: i32 = -1;
173
174// macros.tsv: MAX_SIZE → const MAX_SIZE: usize = ...
175const MAX_SIZE: usize = if std::mem::size_of::<usize>() < std::mem::size_of::<i64>() {
176 usize::MAX
177} else {
178 i64::MAX as usize
179};
180
181// macros.tsv: LUA_MIN_BUFFER → const LUA_MIN_BUFFER: usize = 32
182const LUA_MIN_BUFFER: usize = 32;
183
184// ── Token kind constants (ORDER RESERVED — matches C enum RESERVED) ───────────
185//
186// In C these are enum values. In Rust we use i32 constants for Phase A
187// (faithful to `Token.token: int` in C) with a TODO for a proper enum in Phase B.
188//
189
190/// `and`
191pub const TK_AND: i32 = 257;
192/// `break`
193pub const TK_BREAK: i32 = 258;
194/// `do`
195pub const TK_DO: i32 = 259;
196/// `else`
197pub const TK_ELSE: i32 = 260;
198/// `elseif`
199pub const TK_ELSEIF: i32 = 261;
200/// `end`
201pub const TK_END: i32 = 262;
202/// `false`
203pub const TK_FALSE: i32 = 263;
204/// `for`
205pub const TK_FOR: i32 = 264;
206/// `function`
207pub const TK_FUNCTION: i32 = 265;
208/// `goto`
209pub const TK_GOTO: i32 = 266;
210/// `if`
211pub const TK_IF: i32 = 267;
212/// `in`
213pub const TK_IN: i32 = 268;
214/// `local`
215pub const TK_LOCAL: i32 = 269;
216/// `nil`
217pub const TK_NIL: i32 = 270;
218/// `not`
219pub const TK_NOT: i32 = 271;
220/// `or`
221pub const TK_OR: i32 = 272;
222/// `repeat`
223pub const TK_REPEAT: i32 = 273;
224/// `return`
225pub const TK_RETURN: i32 = 274;
226/// `then`
227pub const TK_THEN: i32 = 275;
228/// `true`
229pub const TK_TRUE: i32 = 276;
230/// `until`
231pub const TK_UNTIL: i32 = 277;
232/// `while` (last keyword; NUM_RESERVED = TK_WHILE - FIRST_RESERVED + 1 = 22)
233pub const TK_WHILE: i32 = 278;
234/// `//` (floor division)
235pub const TK_IDIV: i32 = 279;
236/// `..` (concatenation)
237pub const TK_CONCAT: i32 = 280;
238/// `...` (vararg)
239pub const TK_DOTS: i32 = 281;
240/// `==`
241pub const TK_EQ: i32 = 282;
242/// `>=`
243pub const TK_GE: i32 = 283;
244/// `<=`
245pub const TK_LE: i32 = 284;
246/// `~=`
247pub const TK_NE: i32 = 285;
248/// `<<`
249pub const TK_SHL: i32 = 286;
250/// `>>`
251pub const TK_SHR: i32 = 287;
252/// `::`
253pub const TK_DBCOLON: i32 = 288;
254/// `<eof>`
255pub const TK_EOS: i32 = 289;
256/// `<number>` (float literal)
257pub const TK_FLT: i32 = 290;
258/// `<integer>` (integer literal)
259pub const TK_INT: i32 = 291;
260/// `<name>` (identifier)
261pub const TK_NAME: i32 = 292;
262/// `<string>` (string literal)
263pub const TK_STRING: i32 = 293;
264
265// Lua 5.5 `global`: with the upstream-default LUA_COMPAT_GLOBAL it is NOT a
266// reserved word — it always lexes as TK_NAME (so it stays a valid identifier on
267// every version), and the parser recognizes the `global` declaration statement
268// contextually (see `globalstat`/`statement` in lua-parse). There is therefore
269// no dedicated token id.
270
271// ORDER RESERVED — index 0 = TK_AND - FIRST_RESERVED, etc.
272/// Display strings for tokens, indexed by `token - FIRST_RESERVED`.
273pub static LUAX_TOKENS: &[&[u8]] = &[
274 // keywords (indices 0-21)
275 b"and", b"break", b"do", b"else", b"elseif",
276 b"end", b"false", b"for", b"function", b"goto", b"if",
277 b"in", b"local", b"nil", b"not", b"or", b"repeat",
278 b"return", b"then", b"true", b"until", b"while",
279 // other terminal symbols (indices 22-35)
280 b"//", b"..", b"...", b"==", b">=", b"<=", b"~=",
281 b"<<", b">>", b"::", b"<eof>",
282 b"<number>", b"<integer>", b"<name>", b"<string>",
283];
284
285// ── SemInfo / TokenValue ───────────────────────────────────────────────────────
286
287// types.tsv: SemInfo → TokenValue
288/// Semantic payload carried by a token.
289///
290/// Corresponds to `SemInfo` (a C union) in `llex.h`. In Rust this is a
291/// discriminated union (enum).
292///
293/// # C mapping
294/// ```text
295/// SemInfo.r → TokenValue::Float(f64) (lua_Number)
296/// SemInfo.i → TokenValue::Int(i64) (lua_Integer)
297/// SemInfo.ts → TokenValue::Str(GcRef<LuaString>)
298/// (no C field) → TokenValue::None (default / unset)
299/// ```
300#[derive(Clone)]
301pub enum TokenValue {
302 /// No semantic value (default; used for single-byte and most multi-char tokens).
303 None,
304 /// Float literal payload. C: `seminfo.r` (`lua_Number`).
305 Float(f64),
306 /// Integer literal payload. C: `seminfo.i` (`lua_Integer`).
307 Int(i64),
308 /// String/name payload. C: `seminfo.ts` (`TString *`).
309 Str(GcRef<LuaString>),
310}
311
312// ── Token ─────────────────────────────────────────────────────────────────────
313
314// types.tsv: Token → Token; Token.token → i32 (Phase A; TODO: TokenKind enum Phase B)
315/// A single lexed token with its semantic payload.
316///
317/// `kind` is an `i32` whose value is either an ASCII byte code (for single-byte
318/// tokens like `+`, `-`, `[`) or one of the `TK_*` constants (for reserved
319/// words, multi-char symbols, and literals).
320///
321/// TODO(port): Phase B — replace `kind: i32` with a proper `TokenKind` enum
322/// covering both single-byte and named tokens (e.g. `TokenKind::Char(u8)` +
323/// named variants).
324#[derive(Clone)]
325pub struct Token {
326 pub kind: i32,
327 pub value: TokenValue,
328}
329
330impl Token {
331 /// Construct a token with no semantic value.
332 pub fn new(kind: i32) -> Self {
333 Token { kind, value: TokenValue::None }
334 }
335
336 /// The end-of-stream sentinel token.
337 pub fn eos() -> Self {
338 Token::new(TK_EOS)
339 }
340}
341
342// ── LexState ──────────────────────────────────────────────────────────────────
343
344// types.tsv: LexState → LexState; LexState.L removed (thread via &mut LuaState)
345/// Per-chunk lexer (and shared parser) state.
346///
347/// Corresponds to `LexState` in `llex.h`. Owns the input stream, token
348/// buffer, and current/lookahead tokens.
349///
350/// # C mapping (types.tsv)
351/// ```text
352/// LexState.current → current: i32 (charint; -1 = EOZ)
353/// LexState.linenumber → linenumber: i32
354/// LexState.lastline → lastline: i32
355/// LexState.t → t: Token (current token)
356/// LexState.lookahead → lookahead: Token (one-token lookahead)
357/// LexState.fs → fs: Option<Box<FuncState>> (parser state)
358/// LexState.L → (removed; callers pass &mut LuaState)
359/// LexState.z → z: ZIO (owned input stream)
360/// LexState.buff → buff: LexBuffer (owned token-text buffer)
361/// LexState.h → h: GcRef<LuaTable> (string-anchor table)
362/// LexState.dyd → dyd: DynData (parser dynamic data)
363/// LexState.source → source: GcRef<LuaString>
364/// LexState.envn → envn: GcRef<LuaString>
365/// ```
366pub struct LexState {
367 pub current: i32,
368 pub linenumber: i32,
369 pub lastline: i32,
370 pub t: Token,
371 pub lookahead: Token,
372 // TODO(port): Box<FuncState> once FuncState lands in lua-parse (Phase B)
373 pub fs: Option<()>,
374 // PORT NOTE: C held a pointer; Rust owns the ZIO directly per types.tsv.
375 pub z: ZIO,
376 // PORT NOTE: C held a pointer; Rust owns the LexBuffer directly per types.tsv.
377 pub buff: LexBuffer,
378 // TODO(port): GcRef<LuaTable> once LuaTable is defined in Phase B
379 pub h: Option<GcRef<LuaTable>>,
380 /// Per-parse-session anchor for long strings. C-Lua's `ls->h` is a Lua
381 /// table that deduplicates all literal strings within a chunk (both short
382 /// and long), so e.g. `local s1 <const>="..."` and `local s2 <const>="..."`
383 /// with identical 50-byte payloads share one `TString` object — which is
384 /// what makes `string.format("%p", s1) == string.format("%p", s2)` hold.
385 /// Short strings already share identity via the global `interned_lt` pool,
386 /// but long strings (>LUAI_MAXSHORTLEN = 40) are not globally interned and
387 /// need this session-level map. Keyed by the string bytes; populated lazily
388 /// by `new_string`.
389 pub long_str_anchor: std::collections::HashMap<Vec<u8>, GcRef<LuaString>>,
390 // TODO(port): DynData once parser types land in Phase B
391 pub dyd: Option<()>,
392 pub source: GcRef<LuaString>,
393 pub envn: GcRef<LuaString>,
394}
395
396// ── Character-classification helpers ─────────────────────────────────────────
397//
398// These are simplified ASCII implementations for Phase A.
399// TODO(port): import from lua_vm::ctype in Phase B; the full table handles
400// the LUA_UCID (Unicode identifiers) flag and matches the C bit-table exactly.
401//
402// PORT NOTE: the C macros take `int` (not `char`) so they handle EOZ (-1) safely.
403// These Rust fns match that contract: EOZ returns false for all predicates.
404
405#[inline]
406fn is_digit(c: i32) -> bool {
407 c >= b'0' as i32 && c <= b'9' as i32
408}
409
410#[inline]
411fn is_xdigit(c: i32) -> bool {
412 (c >= b'0' as i32 && c <= b'9' as i32)
413 || (c >= b'a' as i32 && c <= b'f' as i32)
414 || (c >= b'A' as i32 && c <= b'F' as i32)
415}
416
417// ALPHABIT: ASCII letters + '_'
418#[inline]
419fn is_lalpha(c: i32) -> bool {
420 (c >= b'a' as i32 && c <= b'z' as i32)
421 || (c >= b'A' as i32 && c <= b'Z' as i32)
422 || c == b'_' as i32
423}
424
425#[inline]
426fn is_lalnum(c: i32) -> bool {
427 is_lalpha(c) || is_digit(c)
428}
429
430#[inline]
431fn is_space(c: i32) -> bool {
432 matches!(c, 9 | 10 | 11 | 12 | 13 | 32) // \t \n \v \f \r space
433}
434
435// PRINTBIT: printable ASCII (graph + space), i.e. 0x20-0x7E
436#[inline]
437fn is_print(c: i32) -> bool {
438 c >= 0x20 && c <= 0x7E
439}
440
441#[inline]
442fn curr_is_newline(ls: &LexState) -> bool {
443 ls.current == b'\n' as i32 || ls.current == b'\r' as i32
444}
445
446// ── Low-level stream helpers ───────────────────────────────────────────────────
447
448/// Advance the lexer by one character.
449///
450/// Corresponds to the `next(ls)` macro. Named `advance` to avoid collision
451/// with Rust's iterator method.
452#[inline]
453fn advance(ls: &mut LexState) {
454 // macros.tsv: zgetc → z.getc()
455 ls.current = ls.z.getc();
456}
457
458/// Append character `c` to the token buffer, growing it if necessary.
459///
460/// On overflow calls [`lex_error`] which becomes `Err(LuaError::Syntax(...))`.
461///
462/// # C source
463/// ```c
464///
465/// // Mbuffer *b = ls->buff;
466/// // if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
467/// // size_t newsize;
468/// // if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
469/// // lexerror(ls, "lexical element too long", 0);
470/// // newsize = luaZ_sizebuffer(b) * 2;
471/// // luaZ_resizebuffer(ls->L, b, newsize);
472/// // }
473/// // b->buffer[luaZ_bufflen(b)++] = cast_char(c);
474/// // }
475/// ```
476fn save(ls: &mut LexState, state: &mut LuaState, c: i32) -> Result<(), LuaError> {
477 // macros.tsv: luaZ_bufflen → buf.len(); luaZ_sizebuffer → buf.capacity()
478 if ls.buff.len() + 1 > ls.buff.capacity() {
479 if ls.buff.capacity() >= MAX_SIZE / 2 {
480 return Err(lex_error(ls, b"lexical element too long", 0));
481 }
482 // luaZ_resizebuffer(ls->L, b, newsize);
483 // macros.tsv: luaZ_resizebuffer → buf.resize(state, size)?
484 let newsize = ls.buff.capacity() * 2;
485 ls.buff.resize(state, newsize)?;
486 }
487 // macros.tsv: cast_char → x as i8 (C char is signed; Lua bytes stored as-is)
488 // PORT NOTE: we store the byte value directly; the i8 cast in C is for the
489 // C char type but the data is read back as unsigned via cast_uchar everywhere.
490 ls.buff.push_byte(c as u8);
491 Ok(())
492}
493
494/// Save the current character into the token buffer, then advance the stream.
495///
496/// Corresponds to the `save_and_next(ls)` macro. Fallible because `save`
497/// may need to grow the buffer.
498#[inline]
499fn save_and_next(ls: &mut LexState, state: &mut LuaState) -> Result<(), LuaError> {
500 let c = ls.current;
501 save(ls, state, c)?;
502 advance(ls);
503 Ok(())
504}
505
506// ── Error helpers ─────────────────────────────────────────────────────────────
507
508// l_noret → -> ! but in Rust we return LuaError (callers wrap in Err(...))
509// error_sites.tsv: luaX_lexerror → return Err(LuaError::syntax_at(ls, "msg", token))
510/// Build a syntax error, optionally annotated with the offending token text.
511///
512/// Corresponds to the static `lexerror` function in `llex.c`. In C this is
513/// `l_noret` (diverges via `luaD_throw`); in Rust it returns a `LuaError`
514/// value that callers wrap in `Err(...)`.
515///
516/// # C source
517/// ```c
518///
519/// // msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
520/// // if (token)
521/// // luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
522/// // luaD_throw(ls->L, LUA_ERRSYNTAX);
523/// // }
524/// ```
525pub fn lex_error(ls: &mut LexState, msg: &[u8], token: i32) -> LuaError {
526 const LUA_IDSIZE: usize = 60;
527 let mut buff = [0u8; LUA_IDSIZE];
528 let n = lua_vm::object::chunk_id(&mut buff[..], ls.source.as_bytes());
529 let src_part = &buff[..n];
530
531 let mut full_msg: Vec<u8> = Vec::new();
532 full_msg.extend_from_slice(src_part);
533 let _ = write!(full_msg, ":{}: ", ls.linenumber);
534 full_msg.extend_from_slice(msg);
535
536 if token != 0 {
537 let tok_text = txt_token(ls, token);
538 full_msg.extend_from_slice(b" near ");
539 full_msg.extend_from_slice(&tok_text);
540 }
541
542 LuaError::syntax_raw(&full_msg)
543}
544
545// LUAI_FUNC → pub(crate)
546// error_sites.tsv: luaX_syntaxerror → return Err(LuaError::syntax(format_args!("msg")))
547/// Report a syntax error at the current token.
548///
549/// # C source
550/// ```c
551///
552/// // lexerror(ls, msg, ls->t.token);
553/// // }
554/// ```
555pub fn syntax_error(ls: &mut LexState, msg: &[u8]) -> LuaError {
556 let token = ls.t.kind;
557 lex_error(ls, msg, token)
558}
559
560/// Report a semantic error at the current line WITHOUT the `near <token>`
561/// suffix.
562///
563/// Mirrors upstream `luaK_semerror` (`lcode.c`), which sets
564/// `ls->t.token = 0` before calling `luaX_syntaxerror` so the `near` clause is
565/// suppressed. Used for attribute errors (`unknown attribute '<name>'`,
566/// `global variables cannot be to-be-closed`) where the offending construct is
567/// the attribute itself, not the current lookahead token.
568pub fn sem_error(ls: &mut LexState, msg: &[u8]) -> LuaError {
569 lex_error(ls, msg, 0)
570}
571
572/// Produce a human-readable representation of `token` for error messages.
573///
574/// For `TK_NAME`, `TK_STRING`, `TK_FLT`, `TK_INT`: formats the current
575/// token buffer contents as `'<text>'`. For everything else, delegates to
576/// [`token2str`].
577///
578/// # C source
579/// ```c
580///
581/// // switch (token) {
582/// // case TK_NAME: case TK_STRING:
583/// // case TK_FLT: case TK_INT:
584/// // save(ls, '\0');
585/// // return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
586/// // default:
587/// // return luaX_token2str(ls, token);
588/// // }
589/// // }
590/// ```
591///
592/// PORT NOTE: C calls `luaO_pushfstring` which pushes the string onto the
593/// Lua stack (stack-anchored temporary). Rust returns `Vec<u8>` directly
594/// since there is no stack-based string lifecycle for error formatting.
595fn txt_token(ls: &mut LexState, token: i32) -> Vec<u8> {
596 match token {
597 t if t == TK_NAME || t == TK_STRING || t == TK_FLT || t == TK_INT => {
598 let mut v: Vec<u8> = Vec::new();
599 v.push(b'\'');
600 let buff = ls.buff.as_slice();
601 let trimmed = if buff.last() == Some(&0) { &buff[..buff.len() - 1] } else { buff };
602 v.extend_from_slice(trimmed);
603 v.push(b'\'');
604 v
605 }
606 _ => token2str_raw(token),
607 }
608}
609
610// LUAI_FUNC → pub(crate)
611/// Produce a human-readable token description (for error messages and the parser).
612///
613/// Single-byte printable tokens are formatted as `'X'`; non-printable as
614/// `'<\N>'`. Reserved words and multi-char symbols are formatted as `'kw'`.
615/// Literal tokens (`<name>`, `<string>`, etc.) return the bare label.
616///
617/// # C source
618/// ```c
619///
620/// // if (token < FIRST_RESERVED) {
621/// // if (lisprint(token))
622/// // return luaO_pushfstring(ls->L, "'%c'", token);
623/// // else
624/// // return luaO_pushfstring(ls->L, "'<\\%d>'", token);
625/// // }
626/// // else {
627/// // const char *s = luaX_tokens[token - FIRST_RESERVED];
628/// // if (token < TK_EOS)
629/// // return luaO_pushfstring(ls->L, "'%s'", s);
630/// // else
631/// // return s;
632/// // }
633/// // }
634/// ```
635///
636/// PORT NOTE: The `LexState` parameter is retained in the signature for API
637/// parity with the C export, but is unused in Rust because we don't push onto
638/// the Lua stack. The real formatting is in [`token2str_raw`].
639pub fn token2str(_ls: &LexState, token: i32) -> Vec<u8> {
640 token2str_raw(token)
641}
642
643/// Inner implementation of [`token2str`] that does not need `LexState`.
644fn token2str_raw(token: i32) -> Vec<u8> {
645 if token < FIRST_RESERVED {
646 if is_print(token) {
647 vec![b'\'', token as u8, b'\'']
648 } else {
649 // PORT NOTE: uses write! to Vec<u8> to avoid String allocation for Lua data.
650 let mut v: Vec<u8> = Vec::new();
651 v.extend_from_slice(b"'<\\");
652 let _ = write!(&mut v, "{}", token);
653 v.extend_from_slice(b">'");
654 v
655 }
656 } else {
657 let idx = (token - FIRST_RESERVED) as usize;
658 let s = LUAX_TOKENS[idx];
659 if token < TK_EOS {
660 let mut v: Vec<u8> = Vec::with_capacity(s.len() + 2);
661 v.push(b'\'');
662 v.extend_from_slice(s);
663 v.push(b'\'');
664 v
665 } else {
666 s.to_vec()
667 }
668 }
669}
670
671// ── Public init / setup ───────────────────────────────────────────────────────
672
673// LUAI_FUNC → pub(crate)
674/// Initialise the lexer subsystem: intern all reserved words and fix them
675/// in the GC so they are never collected.
676///
677/// Must be called exactly once during VM startup via `luaX_init`.
678///
679/// # C source
680/// ```c
681///
682/// // int i;
683/// // TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */
684/// // luaC_fix(L, obj2gco(e)); /* never collect this name */
685/// // for (i=0; i<NUM_RESERVED; i++) {
686/// // TString *ts = luaS_new(L, luaX_tokens[i]);
687/// // luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
688/// // ts->extra = cast_byte(i+1); /* reserved word */
689/// // }
690/// // }
691/// ```
692pub fn init(state: &mut LuaState) -> Result<(), LuaError> {
693 // macros.tsv: luaS_newliteral → state.intern_str(b"...")
694 // TODO(port): call state.intern_str(LUA_ENV) once LuaState has that method (Phase B)
695 let _e = intern_str_stub(state, LUA_ENV)?;
696
697 // macros.tsv: luaC_objbarrier / luaC_fix — GC fix; no-op in Phases A-C
698 // TODO(port): state.gc().fix(e) in Phase D
699
700 for i in 0..NUM_RESERVED {
701 // macros.tsv: luaS_new → state.intern_str(...)
702 // TODO(port): call state.intern_str(LUAX_TOKENS[i]) in Phase B
703 let ts = intern_str_stub(state, LUAX_TOKENS[i])?;
704
705 // TODO(port): state.gc().fix(ts.clone()) in Phase D
706
707 // macros.tsv: cast_byte → x as u8
708 // PORT NOTE: LuaString.extra uses Cell<u8> interior mutability.
709 // TODO(port): ts.set_extra((i + 1) as u8) — needs pub accessor on LuaString
710 let _ = ts; // suppress unused warning until Phase B
711 }
712
713 Ok(())
714}
715
716// LUAI_FUNC → pub(crate)
717/// Initialise `ls` for lexing a new chunk from stream `z`.
718///
719/// # C source
720/// ```c
721///
722/// // TString *source, int firstchar) {
723/// // ls->t.token = 0;
724/// // ls->L = L;
725/// // ls->current = firstchar;
726/// // ls->lookahead.token = TK_EOS; /* no look-ahead token */
727/// // ls->z = z;
728/// // ls->fs = NULL;
729/// // ls->linenumber = 1;
730/// // ls->lastline = 1;
731/// // ls->source = source;
732/// // ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */
733/// // luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);
734/// // }
735/// ```
736pub fn set_input(
737 state: &mut LuaState,
738 ls: &mut LexState,
739 z: ZIO,
740 source: GcRef<LuaString>,
741 firstchar: i32,
742) -> Result<(), LuaError> {
743 ls.t = Token::new(0);
744 ls.current = firstchar;
745 ls.lookahead = Token::eos();
746 ls.z = z;
747 ls.fs = None;
748 ls.linenumber = 1;
749 ls.lastline = 1;
750 ls.source = source;
751 // macros.tsv: luaS_newliteral → state.intern_str(b"...")
752 // TODO(port): state.intern_str(LUA_ENV) in Phase B
753 ls.envn = intern_str_stub(state, LUA_ENV)?;
754 // macros.tsv: luaZ_resizebuffer → buf.resize(state, size)?
755 ls.buff.resize(state, LUA_MIN_BUFFER)?;
756 Ok(())
757}
758
759// LUAI_FUNC → pub(crate)
760/// Create (or retrieve) a Lua string and anchor it in the parser's GC-protection
761/// table `ls.h` so it cannot be collected before the end of compilation.
762///
763/// Also internalises long strings so that each unique content has exactly one
764/// copy in memory. The table `ls.h` is used as a set: the string is both the
765/// key and the value.
766///
767/// # C source
768/// ```c
769///
770/// // lua_State *L = ls->L;
771/// // TString *ts = luaS_newlstr(L, str, l);
772/// // const TValue *o = luaH_getstr(ls->h, ts);
773/// // if (!ttisnil(o)) /* string already present? */
774/// // ts = keystrval(nodefromval(o)); /* get saved copy */
775/// // else {
776/// // TValue *stv = s2v(L->top.p++); /* reserve stack space */
777/// // setsvalue(L, stv, ts); /* anchor the string */
778/// // luaH_finishset(L, ls->h, stv, o, stv); /* t[string] = string */
779/// // luaC_checkGC(L);
780/// // L->top.p--; /* remove string from stack */
781/// // }
782/// // return ts;
783/// // }
784/// ```
785pub(crate) fn new_string(
786 state: &mut LuaState,
787 ls: &mut LexState,
788 bytes: &[u8],
789) -> Result<GcRef<LuaString>, LuaError> {
790 // PORT NOTE: in C, the anchor table ls->h is a Lua table mapping the string
791 // to itself so a second occurrence of the same literal in the chunk returns
792 // the originally-created TString. We use a plain HashMap on LexState
793 // (`long_str_anchor`) for the equivalent dedup — sufficient because Phase
794 // A-C `GcRef<T>` is `Rc<T>` and identity is determined by the `Rc`
795 // allocation. Short strings already share identity via the global pool;
796 // long strings (>LUAI_MAXSHORTLEN) need this session-level map.
797 if let Some(existing) = ls.long_str_anchor.get(bytes) {
798 return Ok(existing.clone());
799 }
800 let ts = intern_str_stub(state, bytes)?;
801 ls.long_str_anchor.insert(bytes.to_vec(), ts.clone());
802 Ok(ts)
803}
804
805// ── Public advance / lookahead ─────────────────────────────────────────────────
806
807// LUAI_FUNC → pub(crate)
808/// Consume the current token; load the next one from the stream.
809///
810/// If a lookahead token was set, it becomes the current token without re-reading
811/// from the stream.
812///
813/// # C source
814/// ```c
815///
816/// // ls->lastline = ls->linenumber;
817/// // if (ls->lookahead.token != TK_EOS) {
818/// // ls->t = ls->lookahead;
819/// // ls->lookahead.token = TK_EOS;
820/// // }
821/// // else
822/// // ls->t.token = llex(ls, &ls->t.seminfo);
823/// // }
824/// ```
825pub fn next(
826 state: &mut LuaState,
827 ls: &mut LexState,
828) -> Result<(), LuaError> {
829 ls.lastline = ls.linenumber;
830
831 if ls.lookahead.kind != TK_EOS {
832 // Clone to avoid borrow conflict; LuaString inside TokenValue is GcRef (Rc).
833 ls.t = ls.lookahead.clone();
834 ls.lookahead = Token::eos();
835 } else {
836 let mut val = TokenValue::None;
837 let kind = llex(state, ls, &mut val)?;
838 ls.t = Token { kind, value: val };
839 }
840 Ok(())
841}
842
843// LUAI_FUNC → pub(crate)
844/// Peek at the next token without consuming the current one.
845///
846/// The lookahead token is cached in `ls.lookahead` and returned. Only one
847/// token of lookahead is supported; calling this twice without an intervening
848/// [`next`] is a logic error (asserted in debug builds).
849///
850/// # C source
851/// ```c
852///
853/// // lua_assert(ls->lookahead.token == TK_EOS);
854/// // ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
855/// // return ls->lookahead.token;
856/// // }
857/// ```
858pub fn lookahead(
859 state: &mut LuaState,
860 ls: &mut LexState,
861) -> Result<i32, LuaError> {
862 // macros.tsv: lua_assert → debug_assert!
863 debug_assert!(
864 ls.lookahead.kind == TK_EOS,
865 "luaX_lookahead: lookahead already set"
866 );
867
868 let mut val = TokenValue::None;
869 let kind = llex(state, ls, &mut val)?;
870 ls.lookahead = Token { kind, value: val };
871
872 Ok(ls.lookahead.kind)
873}
874
875// ── Private lexer helpers ──────────────────────────────────────────────────────
876
877/// If the current character equals `c`, advance and return `true`.
878///
879/// # C source
880/// ```c
881///
882/// // if (ls->current == c) { next(ls); return 1; }
883/// // else return 0;
884/// // }
885/// ```
886fn check_next1(ls: &mut LexState, c: i32) -> bool {
887 if ls.current == c {
888 advance(ls);
889 true
890 } else {
891 false
892 }
893}
894
895/// If the current character is either of the two bytes in `set`, save-and-advance
896/// and return `true`.
897///
898/// # C source
899/// ```c
900///
901/// // lua_assert(set[2] == '\0');
902/// // if (ls->current == set[0] || ls->current == set[1]) {
903/// // save_and_next(ls);
904/// // return 1;
905/// // }
906/// // else return 0;
907/// // }
908/// ```
909fn check_next2(
910 ls: &mut LexState,
911 state: &mut LuaState,
912 set: &[u8; 2],
913) -> Result<bool, LuaError> {
914 if ls.current == set[0] as i32 || ls.current == set[1] as i32 {
915 save_and_next(ls, state)?;
916 Ok(true)
917 } else {
918 Ok(false)
919 }
920}
921
922/// Increment the line counter and consume the newline sequence.
923///
924/// Handles `\n`, `\r`, `\n\r`, and `\r\n`.
925///
926/// # C source
927/// ```c
928///
929/// // int old = ls->current;
930/// // lua_assert(currIsNewline(ls));
931/// // next(ls); /* skip '\n' or '\r' */
932/// // if (currIsNewline(ls) && ls->current != old)
933/// // next(ls); /* skip '\n\r' or '\r\n' */
934/// // if (++ls->linenumber >= MAX_INT)
935/// // lexerror(ls, "chunk has too many lines", 0);
936/// // }
937/// ```
938fn inc_line_number(ls: &mut LexState, _state: &mut LuaState) -> Result<(), LuaError> {
939 // macros.tsv: lua_assert → debug_assert!
940 debug_assert!(curr_is_newline(ls), "inc_line_number: not at a newline");
941
942 let old = ls.current;
943 advance(ls);
944
945 if curr_is_newline(ls) && ls.current != old {
946 advance(ls);
947 }
948
949 // macros.tsv: MAX_INT → i32::MAX
950 ls.linenumber += 1;
951 if ls.linenumber >= i32::MAX {
952 return Err(lex_error(ls, b"chunk has too many lines", 0));
953 }
954 Ok(())
955}
956
957/// Scan a numeric literal (integer or float, decimal or hex).
958///
959/// The caller may have already read an initial dot. Accepts the pattern:
960/// `%d(%x|%.|(Ee[+-]?))*` or `0[Xx](%x|%.|(Pp[+-]?))*`.
961///
962/// Returns `TK_INT` for integers, `TK_FLT` for floats.
963///
964/// # C source
965/// ```c
966///
967/// // TValue obj;
968/// // const char *expo = "Ee";
969/// // int first = ls->current;
970/// // lua_assert(lisdigit(ls->current));
971/// // save_and_next(ls);
972/// // if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */
973/// // expo = "Pp";
974/// // for (;;) {
975/// // if (check_next2(ls, expo))
976/// // check_next2(ls, "-+");
977/// // else if (lisxdigit(ls->current) || ls->current == '.')
978/// // save_and_next(ls);
979/// // else break;
980/// // }
981/// // if (lislalpha(ls->current)) /* numeral touching a letter? */
982/// // save_and_next(ls); /* force an error */
983/// // save(ls, '\0');
984/// // if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)
985/// // lexerror(ls, "malformed number", TK_FLT);
986/// // if (ttisinteger(&obj)) { seminfo->i = ivalue(&obj); return TK_INT; }
987/// // else { seminfo->r = fltvalue(&obj); return TK_FLT; }
988/// // }
989/// ```
990fn read_numeral(
991 state: &mut LuaState,
992 ls: &mut LexState,
993 seminfo: &mut TokenValue,
994) -> Result<i32, LuaError> {
995 let mut expo: &[u8; 2] = b"Ee";
996
997 let first = ls.current;
998
999 debug_assert!(is_digit(ls.current), "read_numeral: not at a digit");
1000
1001 save_and_next(ls, state)?;
1002
1003 if first == b'0' as i32 && check_next2(ls, state, b"xX")? {
1004 expo = b"Pp";
1005 }
1006
1007 loop {
1008 if check_next2(ls, state, expo)? {
1009 check_next2(ls, state, b"-+")?;
1010 } else if is_xdigit(ls.current) || ls.current == b'.' as i32 {
1011 // save_and_next(ls);
1012 save_and_next(ls, state)?;
1013 } else {
1014 break;
1015 }
1016 }
1017
1018 if is_lalpha(ls.current) {
1019 save_and_next(ls, state)?;
1020 }
1021
1022 // In Rust, luaO_str2num will receive a byte slice; NUL is not needed.
1023 // We save 0 for parity with C, but our str2num stub ignores it.
1024 save(ls, state, 0)?;
1025
1026 // lexerror(ls, "malformed number", TK_FLT);
1027 // macros.tsv: luaZ_buffer → buf.as_mut_slice()
1028 let buf = ls.buff.as_slice();
1029 let num_bytes = if buf.last() == Some(&0) { &buf[..buf.len() - 1] } else { buf };
1030 let mut obj = lua_types::LuaValue::Nil;
1031 if lua_vm::object::str2num(num_bytes, &mut obj) == 0 {
1032 return Err(lex_error(ls, b"malformed number", TK_FLT));
1033 }
1034 match obj {
1035 lua_types::LuaValue::Int(i) => {
1036 // Lua 5.1/5.2 are float-only: `lua_Number` is the only numeric type,
1037 // so every numeric literal is parsed as a float (`lua_str2number`),
1038 // including ones written without a decimal point. A literal like
1039 // 9007199254740993 therefore loses precision exactly as in lua5.2.4
1040 // (prints `9.007199254741e+15`), rather than surviving as an i64.
1041 if is_float_only(state) {
1042 *seminfo = TokenValue::Float(i as f64);
1043 Ok(TK_FLT)
1044 } else {
1045 *seminfo = TokenValue::Int(i);
1046 Ok(TK_INT)
1047 }
1048 }
1049 lua_types::LuaValue::Float(f) => {
1050 *seminfo = TokenValue::Float(f);
1051 Ok(TK_FLT)
1052 }
1053 _ => unreachable!("str2num returned non-numeric LuaValue"),
1054 }
1055}
1056
1057/// Scan a `[=*[` or `]=*]` sequence; leave the last bracket as current char.
1058///
1059/// Returns:
1060/// - `count + 2` if well-formed (where `count` is the number of `=` signs),
1061/// - `1` if a single bracket with no `=`s and no second bracket,
1062/// - `0` if malformed (e.g. `[==` with no closing bracket).
1063///
1064/// # C source
1065/// ```c
1066///
1067/// // size_t count = 0;
1068/// // int s = ls->current;
1069/// // lua_assert(s == '[' || s == ']');
1070/// // save_and_next(ls);
1071/// // while (ls->current == '=') {
1072/// // save_and_next(ls);
1073/// // count++;
1074/// // }
1075/// // return (ls->current == s) ? count + 2
1076/// // : (count == 0) ? 1
1077/// // : 0;
1078/// // }
1079/// ```
1080fn skip_sep(
1081 state: &mut LuaState,
1082 ls: &mut LexState,
1083) -> Result<usize, LuaError> {
1084 let mut count: usize = 0;
1085 let s = ls.current;
1086 debug_assert!(s == b'[' as i32 || s == b']' as i32, "skip_sep: not at bracket");
1087
1088 save_and_next(ls, state)?;
1089
1090 while ls.current == b'=' as i32 {
1091 save_and_next(ls, state)?;
1092 count += 1;
1093 }
1094
1095 if ls.current == s {
1096 Ok(count + 2)
1097 } else if count == 0 {
1098 Ok(1)
1099 } else {
1100 Ok(0)
1101 }
1102}
1103
1104/// Scan a long string or long comment delimited by `[=*[` … `]=*]`.
1105///
1106/// `seminfo` is `Some` when reading a string literal; `None` when skipping a
1107/// long comment. When `None`, buffer contents are discarded on each newline
1108/// to avoid wasting memory.
1109///
1110/// # C source
1111/// ```c
1112///
1113/// // int line = ls->linenumber;
1114/// // save_and_next(ls); /* skip 2nd '[' */
1115/// // if (currIsNewline(ls)) inclinenumber(ls);
1116/// // for (;;) {
1117/// // switch (ls->current) {
1118/// // case EOZ: { /* error */
1119/// // const char *what = (seminfo ? "string" : "comment");
1120/// // const char *msg = luaO_pushfstring(..., what, line);
1121/// // lexerror(ls, msg, TK_EOS);
1122/// // break;
1123/// // }
1124/// // case ']': {
1125/// // if (skip_sep(ls) == sep) {
1126/// // save_and_next(ls); /* skip 2nd ']' */
1127/// // goto endloop;
1128/// // }
1129/// // break;
1130/// // }
1131/// // case '\n': case '\r': {
1132/// // save(ls, '\n');
1133/// // inclinenumber(ls);
1134/// // if (!seminfo) luaZ_resetbuffer(ls->buff);
1135/// // break;
1136/// // }
1137/// // default: {
1138/// // if (seminfo) save_and_next(ls);
1139/// // else next(ls);
1140/// // }
1141/// // }
1142/// // } endloop:
1143/// // if (seminfo)
1144/// // seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
1145/// // luaZ_bufflen(ls->buff) - 2 * sep);
1146/// // }
1147/// ```
1148fn read_long_string(
1149 state: &mut LuaState,
1150 ls: &mut LexState,
1151 seminfo: Option<&mut TokenValue>,
1152 sep: usize,
1153) -> Result<(), LuaError> {
1154 let line = ls.linenumber;
1155
1156 save_and_next(ls, state)?;
1157
1158 if curr_is_newline(ls) {
1159 inc_line_number(ls, state)?;
1160 }
1161
1162 // is_string: whether we are reading a string (true) or a comment (false)
1163 let is_string = seminfo.is_some();
1164
1165 loop {
1166 match ls.current {
1167 c if c == EOZ => {
1168 let what: &[u8] = if is_string { b"string" } else { b"comment" };
1169 // PORT NOTE: build message as Vec<u8> to avoid String allocation.
1170 let mut msg: Vec<u8> = Vec::new();
1171 msg.extend_from_slice(b"unfinished long ");
1172 msg.extend_from_slice(what);
1173 msg.extend_from_slice(b" (starting at line ");
1174 let _ = write!(&mut msg, "{}", line);
1175 msg.push(b')');
1176 return Err(lex_error(ls, &msg, TK_EOS));
1177 }
1178 c if c == b']' as i32 => {
1179 let s = skip_sep(state, ls)?;
1180 if s == sep {
1181 save_and_next(ls, state)?;
1182 break;
1183 }
1184 // else: the ']' sequence wasn't the closing delimiter; continue
1185 }
1186 c if c == b'\n' as i32 || c == b'\r' as i32 => {
1187 save(ls, state, b'\n' as i32)?;
1188 inc_line_number(ls, state)?;
1189 // macros.tsv: luaZ_resetbuffer → buf.clear()
1190 if !is_string {
1191 ls.buff.clear();
1192 }
1193 }
1194 _ => {
1195 if is_string {
1196 save_and_next(ls, state)?;
1197 } else {
1198 advance(ls);
1199 }
1200 }
1201 }
1202 }
1203
1204 // seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
1205 // luaZ_bufflen(ls->buff) - 2 * sep);
1206 if let Some(out) = seminfo {
1207 // The buffer contains: sep bytes of '[=' + content + sep bytes of '=]'
1208 // We want the content in between.
1209 // PORT NOTE: per PORTING.md §4.3, capture the slice into an owned
1210 // Vec so the immutable borrow of ls.buff is dropped before the
1211 // mutable borrow needed by new_string.
1212 let buf = ls.buff.as_slice();
1213 let content: Vec<u8> = buf[sep..buf.len() - sep].to_vec();
1214 let ts = new_string(state, ls, &content)?;
1215 *out = TokenValue::Str(ts);
1216 }
1217 Ok(())
1218}
1219
1220/// Check `c` is non-zero (truthy); if not, save the current char and raise a
1221/// string-escape error.
1222///
1223/// # C source
1224/// ```c
1225///
1226/// // if (!c) {
1227/// // if (ls->current != EOZ)
1228/// // save_and_next(ls); /* add current to buffer for error message */
1229/// // lexerror(ls, msg, TK_STRING);
1230/// // }
1231/// // }
1232/// ```
1233fn esc_check(
1234 state: &mut LuaState,
1235 ls: &mut LexState,
1236 ok: bool,
1237 msg: &[u8],
1238) -> Result<(), LuaError> {
1239 if !ok {
1240 if ls.current != EOZ {
1241 save_and_next(ls, state)?;
1242 }
1243 return Err(lex_error(ls, msg, TK_STRING));
1244 }
1245 Ok(())
1246}
1247
1248/// Save-and-advance, then verify the new current char is a hex digit; return
1249/// its numeric value (0-15).
1250///
1251/// # C source
1252/// ```c
1253///
1254/// // save_and_next(ls);
1255/// // esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
1256/// // return luaO_hexavalue(ls->current);
1257/// // }
1258/// ```
1259fn get_hexa(
1260 state: &mut LuaState,
1261 ls: &mut LexState,
1262) -> Result<u32, LuaError> {
1263 save_and_next(ls, state)?;
1264 esc_check(state, ls, is_xdigit(ls.current), b"hexadecimal digit expected")?;
1265 // TODO(port): call lua_vm::object::hex_value in Phase B
1266 Ok(hex_value_stub(ls.current))
1267}
1268
1269/// Scan a `\xNN` hex escape; return the decoded byte value.
1270///
1271/// # C source
1272/// ```c
1273///
1274/// // int r = gethexa(ls);
1275/// // r = (r << 4) + gethexa(ls);
1276/// // luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */
1277/// // return r;
1278/// // }
1279/// ```
1280fn read_hex_esc(
1281 state: &mut LuaState,
1282 ls: &mut LexState,
1283) -> Result<u32, LuaError> {
1284 let r = get_hexa(state, ls)?;
1285 let r = (r << 4) + get_hexa(state, ls)?;
1286 // macros.tsv: luaZ_buffremove → buf.truncate_by(i)
1287 ls.buff.truncate_by(2);
1288 Ok(r)
1289}
1290
1291/// Scan a `\u{XXXXXX}` UTF-8 escape; return the Unicode codepoint.
1292///
1293/// # C source
1294/// ```c
1295///
1296/// // unsigned long r;
1297/// // int i = 4; /* chars to remove: '\', 'u', '{', first digit */
1298/// // save_and_next(ls); /* skip 'u' */
1299/// // esccheck(ls, ls->current == '{', "missing '{'");
1300/// // r = gethexa(ls); /* must have at least one digit */
1301/// // while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
1302/// // i++;
1303/// // esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
1304/// // r = (r << 4) + luaO_hexavalue(ls->current);
1305/// // }
1306/// // esccheck(ls, ls->current == '}', "missing '}'");
1307/// // next(ls); /* skip '}' */
1308/// // luaZ_buffremove(ls->buff, i);
1309/// // return r;
1310/// // }
1311/// ```
1312fn read_utf8_esc(
1313 state: &mut LuaState,
1314 ls: &mut LexState,
1315) -> Result<u32, LuaError> {
1316 let mut i: usize = 4;
1317
1318 save_and_next(ls, state)?;
1319
1320 esc_check(state, ls, ls.current == b'{' as i32, b"missing '{'")?;
1321
1322 let mut r = get_hexa(state, ls)?;
1323
1324 // The codepoint upper bound is version-gated and the C control flow differs
1325 // between families (`llex.c readutf8esc`):
1326 // * 5.3 (L336-340): `r = (r<<4)+digit; esccheck(r <= 0x10FFFF, ...)` —
1327 // accumulate the digit FIRST, then bound the running value at 0x10FFFF.
1328 // * 5.4 (L351) / 5.5 (L373): `esccheck(r <= (0x7FFFFFFFu >> 4), ...);
1329 // r = (r<<4)+digit` — bound BEFORE the shift, allowing up to 0x7FFFFFFF.
1330 // The order (check-before-shift vs shift-before-check) is reproduced exactly
1331 // because it also determines how many digits land in the `near '...'` buffer
1332 // snippet of the error message.
1333 let is_v53 = matches!(state.global().lua_version, lua_types::LuaVersion::V53);
1334
1335 // cast_void: discard return value
1336 loop {
1337 save_and_next(ls, state)?;
1338 if !is_xdigit(ls.current) {
1339 break;
1340 }
1341 i += 1;
1342 if is_v53 {
1343 // TODO(port): lua_vm::object::hex_value in Phase B
1344 r = (r << 4) + hex_value_stub(ls.current);
1345 esc_check(state, ls, r <= 0x10_FFFF, b"UTF-8 value too large")?;
1346 } else {
1347 esc_check(state, ls, r <= (0x7FFF_FFFFu32 >> 4), b"UTF-8 value too large")?;
1348 // TODO(port): lua_vm::object::hex_value in Phase B
1349 r = (r << 4) + hex_value_stub(ls.current);
1350 }
1351 }
1352
1353 esc_check(state, ls, ls.current == b'}' as i32, b"missing '}'")?;
1354
1355 advance(ls);
1356
1357 ls.buff.truncate_by(i);
1358
1359 Ok(r)
1360}
1361
1362/// Scan `\u{...}` and append the UTF-8 encoding of the codepoint to the buffer.
1363///
1364/// # C source
1365/// ```c
1366///
1367/// // char buff[UTF8BUFFSZ];
1368/// // int n = luaO_utf8esc(buff, readutf8esc(ls));
1369/// // for (; n > 0; n--)
1370/// // save(ls, buff[UTF8BUFFSZ - n]);
1371/// // }
1372/// ```
1373fn utf8_esc(
1374 state: &mut LuaState,
1375 ls: &mut LexState,
1376) -> Result<(), LuaError> {
1377 let codepoint = read_utf8_esc(state, ls)?;
1378
1379 // macros.tsv: UTF8BUFFSZ → const UTF8_BUF_SZ: usize = 8
1380 // TODO(port): call lua_vm::object::utf8_esc_encode(codepoint) in Phase B.
1381 // For Phase A, encode directly here.
1382 let encoded = utf8_encode_stub(codepoint);
1383
1384 for &b in &encoded {
1385 save(ls, state, b as i32)?;
1386 }
1387 Ok(())
1388}
1389
1390/// Scan a decimal escape `\ddd` (up to 3 digits); return the byte value.
1391///
1392/// # C source
1393/// ```c
1394///
1395/// // int i;
1396/// // int r = 0;
1397/// // for (i = 0; i < 3 && lisdigit(ls->current); i++) {
1398/// // r = 10*r + ls->current - '0';
1399/// // save_and_next(ls);
1400/// // }
1401/// // esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
1402/// // luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */
1403/// // return r;
1404/// // }
1405/// ```
1406fn read_dec_esc(
1407 state: &mut LuaState,
1408 ls: &mut LexState,
1409) -> Result<u32, LuaError> {
1410 let mut i: usize = 0;
1411 let mut r: u32 = 0;
1412
1413 while i < 3 && is_digit(ls.current) {
1414 r = 10 * r + (ls.current as u32 - b'0' as u32);
1415 save_and_next(ls, state)?;
1416 i += 1;
1417 }
1418
1419 // UCHAR_MAX = 255 = u8::MAX. Lua 5.1 spells this `escape sequence too
1420 // large` (the `decimal escape too large` wording is 5.2+). Verified against
1421 // lua5.1.5; see specs/followup/5.1-roster-syntax.md §2.
1422 let too_large_msg: &[u8] = if matches!(
1423 state.global().lua_version,
1424 lua_types::LuaVersion::V51
1425 ) {
1426 b"escape sequence too large"
1427 } else {
1428 b"decimal escape too large"
1429 };
1430 esc_check(state, ls, r <= u8::MAX as u32, too_large_msg)?;
1431
1432 ls.buff.truncate_by(i);
1433 Ok(r)
1434}
1435
1436/// Scan a short (single/double-quoted) string literal.
1437///
1438/// The C function uses `goto read_save / only_save / no_save` for escape
1439/// handling. In Rust this is replaced by the `EscapeResult` enum.
1440///
1441/// # C source (see llex.c lines 382-442 for full listing)
1442fn read_string(
1443 state: &mut LuaState,
1444 ls: &mut LexState,
1445 del: i32,
1446 seminfo: &mut TokenValue,
1447) -> Result<(), LuaError> {
1448 // Encoding for what the escape sequence handler needs to do after decoding.
1449 //
1450 // read_save: advance(ls), remove '\' from buffer, save decoded byte
1451 // only_save: remove '\' from buffer, save decoded byte (no advance)
1452 // no_save: nothing (just break from the escape case)
1453 enum EscapeResult {
1454 ReadSave(i32),
1455 OnlySave(i32),
1456 NoSave,
1457 }
1458
1459 save_and_next(ls, state)?;
1460
1461 while ls.current != del {
1462 match ls.current {
1463 c if c == EOZ => {
1464 return Err(lex_error(ls, b"unfinished string", TK_EOS));
1465 }
1466 c if c == b'\n' as i32 || c == b'\r' as i32 => {
1467 return Err(lex_error(ls, b"unfinished string", TK_STRING));
1468 }
1469 c if c == b'\\' as i32 => {
1470 save_and_next(ls, state)?;
1471
1472 // Lua 5.1's lexer does NOT recognize `\x`, `\z`, or `\u`, and it
1473 // does NOT raise on an unknown escape. For any escape char outside
1474 // the known set, the 5.1 lexer silently drops the backslash and
1475 // keeps the next character verbatim (`"\x41"` → bytes `x41`,
1476 // `"\z"` → `z`, `"\q"` → `q`). Decimal escapes (`\ddd`) and the
1477 // standard letter/quote/newline escapes still work. Verified
1478 // against lua5.1.5; see specs/followup/5.1-roster-syntax.md §2.
1479 let is_v51 = matches!(
1480 state.global().lua_version,
1481 lua_types::LuaVersion::V51
1482 );
1483
1484 // Inner switch on the escape character
1485 let esc = match ls.current {
1486 c if c == b'a' as i32 => EscapeResult::ReadSave(b'\x07' as i32),
1487 c if c == b'b' as i32 => EscapeResult::ReadSave(b'\x08' as i32),
1488 c if c == b'f' as i32 => EscapeResult::ReadSave(b'\x0C' as i32),
1489 c if c == b'n' as i32 => EscapeResult::ReadSave(b'\n' as i32),
1490 c if c == b'r' as i32 => EscapeResult::ReadSave(b'\r' as i32),
1491 c if c == b't' as i32 => EscapeResult::ReadSave(b'\t' as i32),
1492 c if c == b'v' as i32 => EscapeResult::ReadSave(b'\x0B' as i32),
1493 c if c == b'x' as i32 && !is_v51 => {
1494 let decoded = read_hex_esc(state, ls)?;
1495 EscapeResult::ReadSave(decoded as i32)
1496 }
1497 c if c == b'u' as i32 && !is_v51 => {
1498 utf8_esc(state, ls)?;
1499 EscapeResult::NoSave
1500 }
1501 c if c == b'\n' as i32 || c == b'\r' as i32 => {
1502 inc_line_number(ls, state)?;
1503 EscapeResult::OnlySave(b'\n' as i32)
1504 }
1505 c if c == b'\\' as i32 || c == b'"' as i32 || c == b'\'' as i32 => {
1506 EscapeResult::ReadSave(c)
1507 }
1508 c if c == EOZ => EscapeResult::NoSave,
1509 c if c == b'z' as i32 && !is_v51 => {
1510 ls.buff.truncate_by(1);
1511 advance(ls);
1512 while is_space(ls.current) {
1513 if curr_is_newline(ls) {
1514 inc_line_number(ls, state)?;
1515 } else {
1516 advance(ls);
1517 }
1518 }
1519 EscapeResult::NoSave
1520 }
1521 c if is_v51 && !is_digit(c) => {
1522 // 5.1 unknown escape: drop the backslash, emit the char.
1523 EscapeResult::ReadSave(c)
1524 }
1525 _ => {
1526 esc_check(
1527 state, ls,
1528 is_digit(ls.current),
1529 b"invalid escape sequence",
1530 )?;
1531 let decoded = read_dec_esc(state, ls)?;
1532 EscapeResult::OnlySave(decoded as i32)
1533 }
1534 };
1535
1536 // Dispatch the C goto targets as match arms.
1537 match esc {
1538 EscapeResult::ReadSave(c) => {
1539 advance(ls);
1540 ls.buff.truncate_by(1);
1541 save(ls, state, c)?;
1542 }
1543 EscapeResult::OnlySave(c) => {
1544 ls.buff.truncate_by(1);
1545 save(ls, state, c)?;
1546 }
1547 EscapeResult::NoSave => {}
1548 }
1549 }
1550 _ => {
1551 save_and_next(ls, state)?;
1552 }
1553 }
1554 }
1555
1556 save_and_next(ls, state)?;
1557
1558 // luaZ_bufflen(ls->buff) - 2);
1559 // Buffer contains: delimiter + content + delimiter; strip both delimiters.
1560 // PORT NOTE: capture into owned Vec to drop the borrow before new_string.
1561 let buf = ls.buff.as_slice();
1562 let content: Vec<u8> = if buf.len() >= 2 {
1563 buf[1..buf.len() - 1].to_vec()
1564 } else {
1565 Vec::new()
1566 };
1567 let ts = new_string(state, ls, &content)?;
1568 *seminfo = TokenValue::Str(ts);
1569 Ok(())
1570}
1571
1572/// Core lexer dispatch: consume and return the next raw token kind.
1573///
1574/// This is the heart of the lexer: a large `for`-`switch` loop that classifies
1575/// the current character and dispatches to the appropriate scanner.
1576///
1577/// # C source (see llex.c lines 445-562 for full listing)
1578/// Whether the active version is the float-only legacy family (5.1/5.2), which
1579/// lacks the 5.3 integer operators (`//`, `<<`, `>>`, and the bitwise binops).
1580fn is_float_only(state: &LuaState) -> bool {
1581 matches!(
1582 state.global().lua_version,
1583 lua_types::LuaVersion::V51 | lua_types::LuaVersion::V52
1584 )
1585}
1586
1587fn llex(
1588 state: &mut LuaState,
1589 ls: &mut LexState,
1590 seminfo: &mut TokenValue,
1591) -> Result<i32, LuaError> {
1592 // macros.tsv: luaZ_resetbuffer → buf.clear()
1593 ls.buff.clear();
1594
1595 loop {
1596 match ls.current {
1597 c if c == b'\n' as i32 || c == b'\r' as i32 => {
1598 inc_line_number(ls, state)?;
1599 // PORT NOTE: skipcomment-equivalent. luaL_loadfile in C-Lua
1600 // strips a leading '#' line (Unix shebang). Our test harness
1601 // prepends a global-setup preamble to every official test, so
1602 // the script's '#' line is not at byte zero. Apply the same
1603 // rule at any token-scan line start: treat a line whose first
1604 // character is '#' as a single-line comment. This sits in
1605 // llex's dispatch loop (not inc_line_number) so it does not
1606 // affect newlines inside long-bracket strings.
1607 if ls.current == b'#' as i32 {
1608 while !curr_is_newline(ls) && ls.current != EOZ {
1609 advance(ls);
1610 }
1611 }
1612 }
1613
1614 c if c == b' ' as i32
1615 || c == b'\x0C' as i32
1616 || c == b'\t' as i32
1617 || c == b'\x0B' as i32 =>
1618 {
1619 advance(ls);
1620 }
1621
1622 c if c == b'-' as i32 => {
1623 advance(ls);
1624 if ls.current != b'-' as i32 {
1625 return Ok(b'-' as i32);
1626 }
1627 advance(ls);
1628
1629 if ls.current == b'[' as i32 {
1630 let sep = skip_sep(state, ls)?;
1631 ls.buff.clear();
1632 if sep >= 2 {
1633 read_long_string(state, ls, None, sep)?;
1634 ls.buff.clear();
1635 continue;
1636 }
1637 }
1638 while !curr_is_newline(ls) && ls.current != EOZ {
1639 advance(ls);
1640 }
1641 // loop continues (no token emitted for comments)
1642 }
1643
1644 c if c == b'[' as i32 => {
1645 let sep = skip_sep(state, ls)?;
1646 if sep >= 2 {
1647 read_long_string(state, ls, Some(seminfo), sep)?;
1648 return Ok(TK_STRING);
1649 } else if sep == 0 {
1650 return Err(lex_error(ls, b"invalid long string delimiter", TK_STRING));
1651 }
1652 // sep == 1: plain '[', no long string
1653 return Ok(b'[' as i32);
1654 }
1655
1656 c if c == b'=' as i32 => {
1657 advance(ls);
1658 if check_next1(ls, b'=' as i32) {
1659 return Ok(TK_EQ);
1660 }
1661 return Ok(b'=' as i32);
1662 }
1663
1664 c if c == b'<' as i32 => {
1665 advance(ls);
1666 if check_next1(ls, b'=' as i32) {
1667 return Ok(TK_LE);
1668 } else if !is_float_only(state) && check_next1(ls, b'<' as i32) {
1669 // The `<<` shift operator is a Lua 5.3 addition. Under the
1670 // float-only legacy family (5.1/5.2) it does not exist: a
1671 // bare `<` is returned, so a second `<` then surfaces
1672 // upstream's "unexpected symbol near '<'".
1673 return Ok(TK_SHL);
1674 }
1675 return Ok(b'<' as i32);
1676 }
1677
1678 c if c == b'>' as i32 => {
1679 advance(ls);
1680 if check_next1(ls, b'=' as i32) {
1681 return Ok(TK_GE);
1682 } else if !is_float_only(state) && check_next1(ls, b'>' as i32) {
1683 // `>>` is a 5.3 addition; absent in 5.1/5.2.
1684 return Ok(TK_SHR);
1685 }
1686 return Ok(b'>' as i32);
1687 }
1688
1689 c if c == b'/' as i32 => {
1690 advance(ls);
1691 if !is_float_only(state) && check_next1(ls, b'/' as i32) {
1692 // Floor division `//` is a 5.3 addition; absent in 5.1/5.2,
1693 // where the second `/` becomes "unexpected symbol near '/'".
1694 return Ok(TK_IDIV);
1695 }
1696 return Ok(b'/' as i32);
1697 }
1698
1699 c if c == b'~' as i32 => {
1700 advance(ls);
1701 if check_next1(ls, b'=' as i32) {
1702 return Ok(TK_NE);
1703 }
1704 return Ok(b'~' as i32);
1705 }
1706
1707 c if c == b':' as i32 => {
1708 advance(ls);
1709 // Lua 5.1 has no `::label::` token; `::` was added with `goto` in
1710 // 5.2. Under V51 the second `:` is left for the parser, which
1711 // reports `unexpected symbol near ':'`. See
1712 // specs/followup/5.1-roster-syntax.md §2.
1713 let is_v51 = matches!(
1714 state.global().lua_version,
1715 lua_types::LuaVersion::V51
1716 );
1717 if !is_v51 && check_next1(ls, b':' as i32) {
1718 return Ok(TK_DBCOLON);
1719 }
1720 return Ok(b':' as i32);
1721 }
1722
1723 c if c == b'"' as i32 || c == b'\'' as i32 => {
1724 let del = ls.current;
1725 read_string(state, ls, del, seminfo)?;
1726 return Ok(TK_STRING);
1727 }
1728
1729 c if c == b'.' as i32 => {
1730 save_and_next(ls, state)?;
1731 if check_next1(ls, b'.' as i32) {
1732 if check_next1(ls, b'.' as i32) {
1733 return Ok(TK_DOTS);
1734 }
1735 return Ok(TK_CONCAT);
1736 } else if !is_digit(ls.current) {
1737 return Ok(b'.' as i32);
1738 } else {
1739 return read_numeral(state, ls, seminfo);
1740 }
1741 }
1742
1743 c if is_digit(c) => {
1744 return read_numeral(state, ls, seminfo);
1745 }
1746
1747 c if c == EOZ => {
1748 return Ok(TK_EOS);
1749 }
1750
1751 c => {
1752 if is_lalpha(c) {
1753 loop {
1754 save_and_next(ls, state)?;
1755 if !is_lalnum(ls.current) {
1756 break;
1757 }
1758 }
1759
1760 // PORT NOTE: copy buffer bytes to drop borrow before new_string.
1761 let content: Vec<u8> = ls.buff.as_slice().to_vec();
1762 let ts = new_string(state, ls, &content)?;
1763
1764 // PORT NOTE: canonical `lua_types::LuaString` lacks the `extra`
1765 // byte that C-Lua uses to mark reserved words. Recover the
1766 // keyword index directly from the interned bytes via the
1767 // `LUAX_TOKENS` table; the first `NUM_RESERVED` entries are
1768 // the keywords in declaration order, so token id =
1769 // `FIRST_RESERVED + index`.
1770 let reserved_token: Option<i32> = LUAX_TOKENS[..NUM_RESERVED]
1771 .iter()
1772 .position(|kw| *kw == content.as_slice())
1773 .map(|i| FIRST_RESERVED + i as i32);
1774 *seminfo = TokenValue::Str(ts);
1775
1776 if let Some(tk) = reserved_token {
1777 // Lua 5.1 has no `goto` keyword — `goto` is an ordinary
1778 // identifier (`local goto = 5` is valid). The keyword and
1779 // the `::label::` grammar were added in 5.2. So under V51
1780 // `goto` lexes as a plain name; the parser then treats
1781 // `goto done` as a name beginning an assignment, yielding
1782 // the incidental `'=' expected near 'done'` the oracle
1783 // reports. See specs/followup/5.1-roster-syntax.md §2.
1784 if tk == TK_GOTO
1785 && matches!(
1786 state.global().lua_version,
1787 lua_types::LuaVersion::V51
1788 )
1789 {
1790 return Ok(TK_NAME);
1791 }
1792 return Ok(tk);
1793 }
1794
1795 // Lua 5.5: with the upstream-default `LUA_COMPAT_GLOBAL`, the
1796 // `global` declaration word is NOT reserved — `global` stays a
1797 // valid identifier, and the parser recognizes the declaration
1798 // statement contextually (see `globalstat` in lua-parse). So
1799 // `global` always lexes as a plain name, on every version.
1800 return Ok(TK_NAME);
1801 } else {
1802 let tok = ls.current;
1803 advance(ls);
1804 return Ok(tok);
1805 }
1806 }
1807 }
1808 }
1809}
1810
1811// ── Phase A stubs for cross-crate helpers ──────────────────────────────────────
1812//
1813// The functions below stand in for cross-crate calls that cannot resolve in
1814// Phase A. They will be replaced by proper imports in Phase B.
1815
1816// TODO(port): replace with state.intern_str(bytes) once LuaState gains that
1817// method (from lua_vm::string::new_lstr wired in Phase B).
1818// TODO_ARCH(phase-b-reconcile): canonical LuaString is constructed via
1819// from_bytes; once LuaState::intern_str is wired, route through there instead.
1820fn intern_str_stub(
1821 state: &mut LuaState,
1822 bytes: &[u8],
1823) -> Result<GcRef<LuaString>, LuaError> {
1824 state.intern_str(bytes)
1825}
1826
1827// TODO(port): replace with lua_vm::object::hex_value(c) in Phase B.
1828fn hex_value_stub(c: i32) -> u32 {
1829 match c {
1830 c if c >= b'0' as i32 && c <= b'9' as i32 => (c - b'0' as i32) as u32,
1831 c if c >= b'a' as i32 && c <= b'f' as i32 => (c - b'a' as i32 + 10) as u32,
1832 c if c >= b'A' as i32 && c <= b'F' as i32 => (c - b'A' as i32 + 10) as u32,
1833 _ => 0,
1834 }
1835}
1836
1837// TODO(port): replace with lua_vm::object::utf8_esc_encode(codepoint) in Phase B.
1838/// Encode a Unicode codepoint as a Lua-extended UTF-8 byte sequence (1 to 6 bytes).
1839///
1840/// Faithful port of `luaO_utf8esc` from lobject.c. Lua permits codepoints up
1841/// to `0x7FFFFFFF` (5- and 6-byte sequences are non-strict UTF-8 but accepted
1842/// by `\u{...}` escapes per literals.lua test cases).
1843fn utf8_encode_stub(codepoint: u32) -> Vec<u8> {
1844 debug_assert!(codepoint <= 0x7FFF_FFFF);
1845 if codepoint < 0x80 {
1846 return vec![codepoint as u8];
1847 }
1848 let mut x = codepoint;
1849 let mut mfb: u32 = 0x3f;
1850 let mut buf: Vec<u8> = Vec::with_capacity(8);
1851 loop {
1852 buf.push(0x80 | ((x & 0x3f) as u8));
1853 x >>= 6;
1854 mfb >>= 1;
1855 if x <= mfb {
1856 break;
1857 }
1858 }
1859 buf.push(((!mfb << 1) | x) as u8);
1860 buf.reverse();
1861 buf
1862}
1863
1864// ──────────────────────────────────────────────────────────────────────────────
1865// PORT STATUS
1866// source: src/llex.c (581 lines, 24 functions)
1867// src/llex.h (91 lines; merged)
1868// target_crate: lua-lex
1869// confidence: medium
1870// todos: 18
1871// port_notes: 12
1872// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
1873// notes: Logic is faithful to the C. The main structural differences:
1874// (1) LexState.L removed — state threaded via fn params;
1875// (2) save/save_and_next/inclinenumber/helpers are all fallible
1876// (Result<_, LuaError>) because lexerror is no longer noreturn;
1877// (3) goto read_save/only_save/no_save in read_string replaced
1878// by EscapeResult enum; (4) Cross-crate calls (intern_str,
1879// luaH_getstr/finishset, luaG_addinfo, luaO_str2num,
1880// luaO_hexavalue, luaO_utf8esc, luaC_fix, luaC_checkGC) are
1881// stubbed with TODO; (5) LuaError, LuaString, ZIO, LexBuffer,
1882// LuaState defined as local stubs — Phase B replaces with real
1883// imports once the crate graph is wired. Key Phase B tasks:
1884// wire import paths; move LuaString.extra accessor to pub;
1885// implement luaX_newstring anchor-table logic. Numeric
1886// literal parsing now delegates to lua_vm::object::str2num
1887// (handles hex integers with wrap-around and hex floats).
1888// ──────────────────────────────────────────────────────────────────────────────