Skip to main content

lua_stdlib/
string_lib.rs

1//! Standard library for string operations and pattern-matching.
2//!
3//! Port of `lstrlib.c` (Lua 5.4.7, 1875 lines, 46 functions).
4//!
5//! Sections:
6//!   1. Basic string operations (byte, char, find, format, gmatch, gsub, len,
7//!      lower, match, rep, reverse, sub, upper)
8//!   2. Pattern-matching engine (MatchState + recursive matcher)
9//!   3. String format (`string.format`)
10//!   4. Pack / unpack (`string.pack`, `string.packsize`, `string.unpack`)
11//!   5. Module registration (`luaopen_string`)
12
13use lua_types::error::LuaError;
14use lua_types::value::LuaValue;
15use lua_types::arith::ArithOp;
16use lua_types::{LuaType};
17use lua_vm::state::LuaTableRefExt as _;
18use crate::state_stub::{LuaState, LuaStateStubExt as _, lua_CFunction, upvalue_index};
19
20// ────────────────────────────────────────────────────────────────────────────
21// Constants
22// ────────────────────────────────────────────────────────────────────────────
23
24const LUA_MAX_CAPTURES: usize = 32;
25
26const MAX_CC_CALLS: i32 = 200;
27
28const L_ESC: u8 = b'%';
29
30const SPECIALS: &[u8] = b"^$*+?.([%-";
31
32const CAP_UNFINISHED: isize = -1;
33
34const CAP_POSITION: isize = -2;
35
36#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
37const MAX_ITEM: usize = 120;
38
39#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
40const MAX_ITEM_F: usize = 418;
41
42#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
43const MAX_FORMAT: usize = 32;
44
45const MAX_INT_SIZE: usize = 16;
46
47// On platforms where size_t is at least as wide as int (all our targets), this
48// collapses to INT_MAX so that packed sizes round-trip through a Lua integer
49// without ambiguity.
50const PACK_MAXSIZE: usize = i32::MAX as usize;
51
52const NB: u32 = 8;
53
54const MC: u8 = 0xFF;
55
56const SZINT: usize = 8; // sizeof(i64) == 8
57
58const PACK_PAD_BYTE: u8 = 0x00;
59
60// ────────────────────────────────────────────────────────────────────────────
61// Pattern-matching types
62// ────────────────────────────────────────────────────────────────────────────
63
64/// One capture record inside MatchState.
65///
66/// In Rust, `init` is an index into `MatchState::src`; `len` is either a
67/// non-negative actual length, `CAP_UNFINISHED`, or `CAP_POSITION`.
68#[derive(Copy, Clone)]
69struct Capture {
70    /// Index into the source slice where this capture started.
71    init: usize,
72    /// CAP_UNFINISHED, CAP_POSITION, or non-negative byte count.
73    len: isize,
74}
75
76impl Default for Capture {
77    fn default() -> Self {
78        Capture { init: 0, len: CAP_UNFINISHED }
79    }
80}
81
82/// State threaded through the recursive pattern-matcher.
83///
84/// Raw C pointers replaced by indices into `src` / `pat` slices.
85struct MatchState<'a> {
86    /// Source string being searched.
87    src: &'a [u8],
88    /// Pattern string.
89    pat: &'a [u8],
90    /// Recursion depth counter; decremented on entry, incremented on return.
91    matchdepth: i32,
92    /// Number of capture records currently in use.
93    level: u8,
94    /// Capture records indexed `0..level`.
95    captures: [Capture; LUA_MAX_CAPTURES],
96    /// Total `match_pat` invocations across the whole operation. Used to bound
97    /// catastrophic backtracking under a sandbox; charged against the
98    /// instruction budget by the caller.
99    steps: u64,
100    /// Maximum `steps` before the matcher stops. `0` means unlimited (no active
101    /// instruction budget), preserving non-sandboxed behavior exactly.
102    step_limit: u64,
103    /// Set when `step_limit` is reached; the matcher then unwinds to the caller,
104    /// which charges the budget and raises the uncatchable sandbox abort.
105    aborted: bool,
106}
107
108impl<'a> MatchState<'a> {
109    fn new(src: &'a [u8], pat: &'a [u8], step_limit: u64) -> Self {
110        MatchState {
111            src,
112            pat,
113            matchdepth: MAX_CC_CALLS,
114            level: 0,
115            captures: [Capture::default(); LUA_MAX_CAPTURES],
116            steps: 0,
117            step_limit,
118            aborted: false,
119        }
120    }
121
122    fn reset_level(&mut self) {
123        self.level = 0;
124        debug_assert!(self.matchdepth == MAX_CC_CALLS);
125    }
126}
127
128/// Iterator state for `string.gmatch`.
129///
130/// Stored as userdata on the Lua stack in the C implementation; in Phase A we
131/// represent it as a plain Rust struct.
132///
133/// TODO(port): In the real port, this needs to live in a Lua userdata object
134/// so that Lua GC can see it. For now it's a plain struct passed by
135/// `state.to_userdata()`.
136#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
137struct GMatchState {
138    /// Current position in `src` (index into the source slice).
139    src_pos: usize,
140    /// The pattern string (owned copy so it survives the closure).
141    pat: Vec<u8>,
142    /// End of the last match (to avoid zero-length infinite loops).
143    last_match: Option<usize>,
144    /// Source string (owned copy).
145    src: Vec<u8>,
146}
147
148// ────────────────────────────────────────────────────────────────────────────
149// Pack/unpack types
150// ────────────────────────────────────────────────────────────────────────────
151
152/// Pack/unpack format option.
153///
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155enum KOption {
156    Int,        // signed integers
157    Uint,       // unsigned integers
158    Float,      // single-precision float (C float)
159    Number,     // Lua native float (lua_Number = f64)
160    Double,     // double-precision float (C double)
161    Char,       // fixed-length string
162    Kstring,    // string with length prefix
163    Zstr,       // zero-terminated string
164    Padding,    // padding byte (x)
165    Paddalign,  // padding to alignment (X)
166    Nop,        // no-op (space, <, >, =, !)
167}
168
169/// Header state for pack/unpack format parsing.
170///
171struct Header {
172    is_little: bool,
173    max_align: usize,
174    /// 5.5 widened `c`/`s`-size parsing from `int` (5.3/5.4) to `size_t`, so
175    /// `c<huge>` numerals that overflowed `int` (and tripped "invalid format
176    /// option '<digit>'") are now accepted up to `LUA_MAXINTEGER`.
177    wide_size: bool,
178}
179
180impl Header {
181    fn new(wide_size: bool) -> Self {
182        Header {
183            is_little: cfg!(target_endian = "little"),
184            max_align: 1,
185            wide_size,
186        }
187    }
188}
189
190// ────────────────────────────────────────────────────────────────────────────
191// §1  Basic string helpers
192// ────────────────────────────────────────────────────────────────────────────
193
194/// Translate a relative initial string position: negative means back from end;
195/// result is clipped to `[1, ∞)`.
196///
197fn pos_relat_i(pos: i64, len: usize) -> usize {
198    if pos > 0 {
199        pos as usize
200    } else if pos == 0 {
201        1
202    } else if pos < -(len as i64) {
203        1
204    } else {
205        len.wrapping_add(pos as usize).wrapping_add(1)
206    }
207}
208
209/// Translate a relative position using Lua 5.3's `posrelat` (`lstrlib.c` 5.3):
210/// non-negatives pass through, an out-of-range negative clamps to `0`, and an
211/// in-range negative counts back from the end. Unlike `posrelat_i`, `0` stays
212/// `0`; `string.unpack` then subtracts one, underflowing into the
213/// "initial position out of string" guard exactly as the 5.3 reference does.
214///
215fn posrelat_53(pos: i64, len: usize) -> usize {
216    if pos >= 0 {
217        pos as usize
218    } else if (pos as i128).unsigned_abs() > len as u128 {
219        0
220    } else {
221        (len as i64 + pos + 1) as usize
222    }
223}
224
225/// Get an optional ending string position from argument `arg`, default `def`.
226/// Negative means back from end; clipped to `[0, len]`.
227///
228fn get_end_pos(pos: i64, len: usize) -> usize {
229    if pos > len as i64 {
230        len
231    } else if pos >= 0 {
232        pos as usize
233    } else if pos < -(len as i64) {
234        0
235    } else {
236        len.wrapping_add(pos as usize).wrapping_add(1)
237    }
238}
239
240// ────────────────────────────────────────────────────────────────────────────
241// §2  Exported string functions (registered in strlib[])
242// ────────────────────────────────────────────────────────────────────────────
243
244/// `string.len(s)` — return byte-length of `s`.
245///
246///
247/// Reads only the byte-length, never the bytes themselves, so go through
248/// `to_lua_string_len` (which never copies) rather than `check_arg_string`
249/// (which `to_vec`s the entire payload only for `.len()` to throw it away).
250pub fn str_len(state: &mut LuaState) -> Result<usize, LuaError> {
251    let l = match state.to_lua_string_len(1) {
252        Some(n) => n,
253        None => {
254            state.check_arg_string(1)?;
255            unreachable!("check_arg_string raises when arg #1 is not a string");
256        }
257    };
258    state.push(LuaValue::Int(l as i64));
259    Ok(1)
260}
261
262/// `string.sub(s, i [, j])` — return substring.
263///
264///
265/// Borrow through `to_lua_string` so the full source string is not copied just
266/// to slice a (typically small) substring out of it. The `GcRef` keeps the
267/// bytes rooted across the `check_arg_integer` / `opt_arg_integer` calls (none
268/// of which can collect the string at arg #1).
269pub fn str_sub(state: &mut LuaState) -> Result<usize, LuaError> {
270    let s_ref = match state.to_lua_string(1) {
271        Some(r) => r,
272        None => {
273            state.check_arg_string(1)?;
274            unreachable!("check_arg_string raises when arg #1 is not a string");
275        }
276    };
277    let s: &[u8] = s_ref.as_bytes();
278    let l = s.len();
279    let start = pos_relat_i(state.check_arg_integer(2)?, l);
280    let end_pos_raw = state.opt_arg_integer(3, -1)?;
281    let end = get_end_pos(end_pos_raw, l);
282    if start <= end {
283        let slice = &s[(start - 1)..end];
284        state.push_string(slice)?;
285    } else {
286        state.push_string(b"")?;
287    }
288    Ok(1)
289}
290
291/// `string.reverse(s)` — return string with bytes reversed.
292///
293///
294/// Borrow the source bytes; the previous `check_arg_string` made a full owned
295/// copy that was discarded after the single iteration.
296pub fn str_reverse(state: &mut LuaState) -> Result<usize, LuaError> {
297    let s_ref = match state.to_lua_string(1) {
298        Some(r) => r,
299        None => {
300            state.check_arg_string(1)?;
301            unreachable!("check_arg_string raises when arg #1 is not a string");
302        }
303    };
304    let s: &[u8] = s_ref.as_bytes();
305    let buf: Vec<u8> = s.iter().copied().rev().collect();
306    state.push_bytes(&buf)?;
307    Ok(1)
308}
309
310/// `string.lower(s)` — return lowercase copy.
311///
312///
313/// Borrow the source bytes; one allocation (the output `Vec`) is unavoidable,
314/// but the intermediate copy from `check_arg_string` was not.
315pub fn str_lower(state: &mut LuaState) -> Result<usize, LuaError> {
316    let s_ref = match state.to_lua_string(1) {
317        Some(r) => r,
318        None => {
319            state.check_arg_string(1)?;
320            unreachable!("check_arg_string raises when arg #1 is not a string");
321        }
322    };
323    let s: &[u8] = s_ref.as_bytes();
324    let buf: Vec<u8> = s.iter().map(|&c| c.to_ascii_lowercase()).collect();
325    state.push_bytes(&buf)?;
326    Ok(1)
327}
328
329/// `string.upper(s)` — return uppercase copy.
330///
331///
332/// Borrow the source bytes; called as the `string.gsub` replacement function
333/// in `string_ops_long` ~700k times against `%w+` matches, so the intermediate
334/// copy from `check_arg_string` added up.
335pub fn str_upper(state: &mut LuaState) -> Result<usize, LuaError> {
336    let s_ref = match state.to_lua_string(1) {
337        Some(r) => r,
338        None => {
339            state.check_arg_string(1)?;
340            unreachable!("check_arg_string raises when arg #1 is not a string");
341        }
342    };
343    let s: &[u8] = s_ref.as_bytes();
344    let buf: Vec<u8> = s.iter().map(|&c| c.to_ascii_uppercase()).collect();
345    state.push_bytes(&buf)?;
346    Ok(1)
347}
348
349/// `string.rep(s, n [, sep])` — return `n` copies of `s` separated by `sep`.
350///
351///
352/// Borrow `s` through `to_lua_string`. The previous version did the
353/// `check_arg_string` copy and then a second redundant `s.to_vec()` inside the
354/// build loop — that double-copy is gone too.
355pub fn str_rep(state: &mut LuaState) -> Result<usize, LuaError> {
356    let s_ref = match state.to_lua_string(1) {
357        Some(r) => r,
358        None => {
359            state.check_arg_string(1)?;
360            unreachable!("check_arg_string raises when arg #1 is not a string");
361        }
362    };
363    let s: &[u8] = s_ref.as_bytes();
364    let l = s.len();
365    let n = state.check_arg_integer(2)?;
366    let sep_owned = state.opt_arg_string(3, b"")?;
367    let sep: &[u8] = &sep_owned;
368    let lsep = sep.len();
369
370    if n <= 0 {
371        state.push_string(b"")?;
372    } else {
373        const MAXSIZE: usize = i32::MAX as usize;
374        let per = l.checked_add(lsep)
375            .ok_or_else(|| LuaError::runtime(format_args!("resulting string too large")))?;
376        if per > MAXSIZE / (n as usize) {
377            return Err(LuaError::runtime(format_args!("resulting string too large")));
378        }
379        let total = per * (n as usize) - lsep;
380
381        if let Some(err) = state.sandbox_reserve(total) {
382            return Err(err);
383        }
384
385        let mut buf: Vec<u8> = Vec::with_capacity(total);
386        for i in 0..(n as usize) {
387            buf.extend_from_slice(s);
388            if i < (n as usize - 1) && lsep > 0 {
389                buf.extend_from_slice(sep);
390            }
391        }
392        state.push_bytes(&buf)?;
393    }
394    Ok(1)
395}
396
397/// `string.byte(s [, i [, j]])` — return numeric codes of characters.
398///
399///
400/// Borrow the source bytes through `to_lua_string` (returns a `GcRef<LuaString>`)
401/// instead of `check_arg_string` (which copies the entire string into a fresh
402/// `Vec<u8>`). On the `string_ops_long` workload `string.byte` is called 700k
403/// times against the same ~14 KB string, so the previous copy was on the order
404/// of 10 GB of memcpy. The `GcRef` keeps the bytes rooted while the borrow lives.
405pub fn str_byte(state: &mut LuaState) -> Result<usize, LuaError> {
406    let s_ref = match state.to_lua_string(1) {
407        Some(r) => r,
408        None => {
409            state.check_arg_string(1)?;
410            unreachable!("check_arg_string raises when arg #1 is not a string");
411        }
412    };
413    let s: &[u8] = s_ref.as_bytes();
414    let l = s.len();
415    let pi = state.opt_arg_integer(2, 1)?;
416    let posi = pos_relat_i(pi, l);
417    let pose_raw = state.opt_arg_integer(3, pi)?;
418    let pose = get_end_pos(pose_raw, l);
419
420    if posi > pose {
421        return Ok(0);
422    }
423    let count = pose.saturating_sub(posi - 1) + 1;
424    if count > i32::MAX as usize {
425        return Err(LuaError::runtime(format_args!("string slice too long")));
426    }
427    let n = (pose - posi + 1) as usize;
428    state.ensure_stack(n as i32, "string slice too long")?;
429
430    for i in 0..n {
431        state.push(LuaValue::Int(s[posi - 1 + i] as i64));
432    }
433    Ok(n)
434}
435
436/// `string.char(...)` — return string built from character codes.
437///
438pub fn str_char(state: &mut LuaState) -> Result<usize, LuaError> {
439    let n = state.get_top();
440    let mut buf = Vec::with_capacity(n as usize);
441    for i in 1..=n {
442        let c = state.check_arg_integer(i)? as u64;
443        if c > u8::MAX as u64 {
444            return Err(lua_vm::debug::arg_error_impl(state, i, b"value out of range"));
445        }
446        buf.push(c as u8);
447    }
448    state.push_bytes(&buf)?;
449    Ok(1)
450}
451
452/// `string.dump(function [, strip])` — serialize a function as binary chunk.
453///
454/// Uses `lua_dump` internally; the writer callback builds a buffer.
455pub fn str_dump(state: &mut LuaState) -> Result<usize, LuaError> {
456    state.check_arg_type(1, LuaType::Function)?;
457    let strip = state.arg_to_bool(2);
458    // PORT NOTE: `state.set_top` (inherent) takes an absolute StackIdx and
459    // would wipe the call frame. `lua_settop` is frame-relative.
460    lua_vm::api::set_top(state, 1)?;
461    // TODO(port): state.dump_function(strip) needs to produce &[u8].
462    // In the C code, lua_dump writes to a writer callback that fills a luaL_Buffer.
463    // In Rust, state.dump() should return Vec<u8> or write to a &mut Vec<u8>.
464    let bytes = state.dump_function(strip)
465        .map_err(|_| LuaError::runtime(format_args!("unable to dump given function")))?;
466    state.push_bytes(&bytes)?;
467    Ok(1)
468}
469
470// ────────────────────────────────────────────────────────────────────────────
471// §3  String metamethods (arithmetic coercion)
472// ────────────────────────────────────────────────────────────────────────────
473
474/// Try to coerce the argument at `arg` to a number, pushing it on the stack.
475/// Returns true on success.
476///
477fn tonum(state: &mut LuaState, arg: i32) -> Result<bool, LuaError> {
478    if state.type_at(arg) == LuaType::Number {
479        state.push_value_at(arg)?;
480        Ok(true)
481    } else {
482        // check whether it is a numerical string
483        //    return (s != NULL && lua_stringtonumber(L, s) == len + 1);
484        if let Some(s) = state.to_lua_string_bytes(arg) {
485            let len = s.len();
486            // PORT NOTE: string_to_number pushes the number if successful
487            let pushed = state.string_to_number_push(&s)?;
488            let ok = pushed == len + 1;
489            // Lua 5.1–5.3: a string coerced in an arithmetic operation always
490            // yields a float (`('16') + 0` is a float in 5.3, an integer in
491            // 5.4). This metamethod path is arithmetic-only, so the promotion
492            // never touches bitwise ops. Verified vs the 5.3.6/5.4.7 oracle.
493            if ok
494                && matches!(
495                    state.global().lua_version,
496                    lua_types::LuaVersion::V51
497                        | lua_types::LuaVersion::V52
498                        | lua_types::LuaVersion::V53
499                )
500            {
501                if let Some(f) = lua_vm::api::to_number_x(state, -1) {
502                    state.pop();
503                    state.push(LuaValue::Float(f));
504                }
505            }
506            Ok(ok)
507        } else {
508            Ok(false)
509        }
510    }
511}
512
513/// Try to invoke the metamethod `mtname` on the two operands.
514///
515fn trymt(state: &mut LuaState, mtname: &[u8]) -> Result<(), LuaError> {
516    // PORT NOTE: `state.set_top` (inherent) takes an absolute StackIdx and
517    // would wipe the call frame's arguments. `lua_settop` is frame-relative
518    // — keep the first two args of the current C function.
519    lua_vm::api::set_top(state, 2)?;
520    let t2_is_string = state.type_at(2) == LuaType::String;
521    // C: `if (lua_type(L,2)==LUA_TSTRING || !luaL_getmetafield(L,2,mtname))`.
522    // The `||` short-circuits: when arg2 is a string, `get_meta_field` is never
523    // called, so the stack stays `[arg1, arg2]` for the error formatter. Calling
524    // it unconditionally would push the string metatable's own metamethod and
525    // shift the operands read by `type_name_at(-2)/(-1)`.
526    if t2_is_string || !state.get_meta_field(2, mtname)? {
527        let op = &mtname[2..]; // skip "__"
528        let msg = format!(
529            "attempt to {} a '{}' with a '{}'",
530            op.escape_ascii(),
531            state.type_name_at(-2).escape_ascii(),
532            state.type_name_at(-1).escape_ascii(),
533        );
534        return crate::auxlib::lua_error(state, msg.as_bytes()).map(|_| ());
535    }
536    state.insert(-3)?;
537    state.call(2, 1)?;
538    Ok(())
539}
540
541/// Generic arithmetic helper: coerce both args and call `op`, else try metamethod.
542///
543fn arith(state: &mut LuaState, op: ArithOp, mtname: &[u8]) -> Result<usize, LuaError> {
544    if tonum(state, 1)? && tonum(state, 2)? {
545        state.arith(op)?;
546    } else {
547        trymt(state, mtname)?;
548    }
549    Ok(1)
550}
551
552pub fn arith_add(state: &mut LuaState) -> Result<usize, LuaError> {
553    arith(state, ArithOp::Add, b"__add")
554}
555pub fn arith_sub(state: &mut LuaState) -> Result<usize, LuaError> {
556    arith(state, ArithOp::Sub, b"__sub")
557}
558pub fn arith_mul(state: &mut LuaState) -> Result<usize, LuaError> {
559    arith(state, ArithOp::Mul, b"__mul")
560}
561pub fn arith_mod(state: &mut LuaState) -> Result<usize, LuaError> {
562    arith(state, ArithOp::Mod, b"__mod")
563}
564pub fn arith_pow(state: &mut LuaState) -> Result<usize, LuaError> {
565    arith(state, ArithOp::Pow, b"__pow")
566}
567pub fn arith_div(state: &mut LuaState) -> Result<usize, LuaError> {
568    arith(state, ArithOp::Div, b"__div")
569}
570pub fn arith_idiv(state: &mut LuaState) -> Result<usize, LuaError> {
571    arith(state, ArithOp::Idiv, b"__idiv")
572}
573pub fn arith_unm(state: &mut LuaState) -> Result<usize, LuaError> {
574    arith(state, ArithOp::Unm, b"__unm")
575}
576
577// ────────────────────────────────────────────────────────────────────────────
578// §4  Pattern-matching engine
579// ────────────────────────────────────────────────────────────────────────────
580
581/// Return `true` if `c` belongs to the character class `cl` (a `%x` letter).
582///
583#[inline]
584fn match_class(c: u8, cl: u8) -> bool {
585    let res = match cl.to_ascii_lowercase() {
586        b'a' => c.is_ascii_alphabetic(),
587        b'c' => c.is_ascii_control(),
588        b'd' => c.is_ascii_digit(),
589        b'g' => c.is_ascii_graphic(),
590        b'l' => c.is_ascii_lowercase(),
591        b'p' => c.is_ascii_punctuation(),
592        b's' => c.is_ascii_whitespace(),
593        b'u' => c.is_ascii_uppercase(),
594        b'w' => c.is_ascii_alphanumeric(),
595        b'x' => c.is_ascii_hexdigit(),
596        b'z' => c == 0,
597        _    => return cl == c,
598    };
599    if cl.is_ascii_lowercase() { res } else { !res }
600}
601
602/// Match character `c` against a bracket class `[p .. ec-1]`.
603///
604/// `p` and `ec` are indices into `pat`.
605#[inline]
606fn matchbracketclass(pat: &[u8], c: u8, mut p: usize, ec: usize) -> bool {
607    let sig = if p + 1 < pat.len() && pat[p + 1] == b'^' {
608        p += 1; // skip '^'
609        false
610    } else {
611        true
612    };
613    p += 1; // advance past '[' or '^'
614    while p < ec {
615        if pat[p] == L_ESC {
616            p += 1;
617            if p < ec && match_class(c, pat[p]) {
618                return sig;
619            }
620        } else if p + 1 < ec && pat[p + 1] == b'-' && p + 2 < ec {
621            let lo = pat[p];
622            p += 2;
623            let hi = pat[p];
624            if lo <= c && c <= hi {
625                return sig;
626            }
627        } else if pat[p] == c {
628            return sig;
629        }
630        p += 1;
631    }
632    !sig
633}
634
635/// Return `true` if the single character at `src[s]` matches the pattern
636/// element starting at `pat[p]` with class end at `ep`.
637///
638#[inline]
639fn singlematch(ms: &MatchState, s: usize, p: usize, ep: usize) -> bool {
640    if s >= ms.src.len() {
641        return false;
642    }
643    let c = ms.src[s];
644    match ms.pat[p] {
645        b'.' => true,
646        L_ESC => match_class(c, ms.pat[p + 1]),
647        b'[' => matchbracketclass(ms.pat, c, p, ep - 1),
648        pc   => pc == c,
649    }
650}
651
652/// Find the end of the pattern element starting at `pat[p]`.
653/// Returns the index one past the element, or an error for malformed patterns.
654///
655fn classend(ms: &MatchState, p: usize) -> Result<usize, LuaError> {
656    let pat = ms.pat;
657    match pat.get(p).copied() {
658        Some(L_ESC) => {
659            if p + 1 >= pat.len() {
660                return Err(LuaError::runtime(format_args!(
661                    "malformed pattern (ends with '%')"
662                )));
663            }
664            Ok(p + 2)
665        }
666        Some(b'[') => {
667            let mut q = p + 1;
668            if q < pat.len() && pat[q] == b'^' {
669                q += 1;
670            }
671            loop {
672                if q >= pat.len() {
673                    return Err(LuaError::runtime(format_args!(
674                        "malformed pattern (missing ']')"
675                    )));
676                }
677                let ch = pat[q];
678                q += 1;
679                if ch == L_ESC && q < pat.len() {
680                    q += 1;
681                }
682                if q < pat.len() && pat[q] == b']' {
683                    return Ok(q + 1);
684                }
685            }
686        }
687        Some(_) => Ok(p + 1),
688        None => Ok(p),
689    }
690}
691
692/// Check that capture `l` (1-based char digit from pattern) is valid.
693/// Returns the 0-based capture index.
694///
695fn check_capture(ms: &MatchState, l: u8) -> Result<usize, LuaError> {
696    let signed = (l as i32) - (b'1' as i32);
697    if signed < 0
698        || signed >= ms.level as i32
699        || ms.captures[signed as usize].len == CAP_UNFINISHED
700    {
701        return Err(LuaError::runtime(format_args!(
702            "invalid capture index %{}",
703            signed + 1
704        )));
705    }
706    Ok(signed as usize)
707}
708
709/// Find the most recent unfinished capture to close.
710///
711fn capture_to_close(ms: &MatchState) -> Result<usize, LuaError> {
712    let mut level = ms.level as usize;
713    while level > 0 {
714        level -= 1;
715        if ms.captures[level].len == CAP_UNFINISHED {
716            return Ok(level);
717        }
718    }
719    Err(LuaError::runtime(format_args!("invalid pattern capture")))
720}
721
722/// Match a balanced string `%bxy` starting at `src[s]`.
723///
724/// Returns the new `s` position after the match, or `None`.
725fn matchbalance(ms: &MatchState, s: usize, p: usize) -> Result<Option<usize>, LuaError> {
726    if p + 1 >= ms.pat.len() {
727        return Err(LuaError::runtime(format_args!(
728            "malformed pattern (missing arguments to '%b')"
729        )));
730    }
731    let b = ms.pat[p];
732    let e = ms.pat[p + 1];
733    if s >= ms.src.len() || ms.src[s] != b {
734        return Ok(None);
735    }
736    let mut cont = 1i32;
737    let mut s = s + 1;
738    while s < ms.src.len() {
739        if ms.src[s] == e {
740            cont -= 1;
741            if cont == 0 {
742                return Ok(Some(s + 1));
743            }
744        } else if ms.src[s] == b {
745            cont += 1;
746        }
747        s += 1;
748    }
749    Ok(None)
750}
751
752/// Greedy match: match as many as possible, then try the rest of the pattern.
753///
754fn max_expand(
755    ms: &mut MatchState,
756    s: usize,
757    p: usize,
758    ep: usize,
759) -> Result<Option<usize>, LuaError> {
760    let mut count: isize = 0;
761    while singlematch(ms, s + count as usize, p, ep) {
762        count += 1;
763    }
764    while count >= 0 {
765        let res = match_pat(ms, s + count as usize, ep + 1)?;
766        if res.is_some() {
767            return Ok(res);
768        }
769        count -= 1;
770    }
771    Ok(None)
772}
773
774/// Lazy match: try the rest of the pattern first, then expand by one.
775///
776fn min_expand(
777    ms: &mut MatchState,
778    mut s: usize,
779    p: usize,
780    ep: usize,
781) -> Result<Option<usize>, LuaError> {
782    loop {
783        let res = match_pat(ms, s, ep + 1)?;
784        if res.is_some() {
785            return Ok(res);
786        } else if singlematch(ms, s, p, ep) {
787            s += 1;
788        } else {
789            return Ok(None);
790        }
791    }
792}
793
794/// Open a new capture at `src[s]`.
795///
796fn start_capture(
797    ms: &mut MatchState,
798    s: usize,
799    p: usize,
800    what: isize,
801) -> Result<Option<usize>, LuaError> {
802    let level = ms.level as usize;
803    if level >= LUA_MAX_CAPTURES {
804        return Err(LuaError::runtime(format_args!("too many captures")));
805    }
806    ms.captures[level].init = s;
807    ms.captures[level].len = what;
808    ms.level += 1;
809    let res = match_pat(ms, s, p)?;
810    if res.is_none() {
811        ms.level -= 1; // undo capture
812    }
813    Ok(res)
814}
815
816/// Close the most recent open capture at `src[s]`.
817///
818fn end_capture(ms: &mut MatchState, s: usize, p: usize) -> Result<Option<usize>, LuaError> {
819    let l = capture_to_close(ms)?;
820    ms.captures[l].len = (s - ms.captures[l].init) as isize;
821    let res = match_pat(ms, s, p)?;
822    if res.is_none() {
823        ms.captures[l].len = CAP_UNFINISHED; // undo
824    }
825    Ok(res)
826}
827
828/// Match a back-reference `%n` against `src[s]`.
829///
830fn match_capture(ms: &MatchState, s: usize, l: u8) -> Result<Option<usize>, LuaError> {
831    let idx = check_capture(ms, l)?;
832    let cap_len = ms.captures[idx].len as usize;
833    let cap_init = ms.captures[idx].init;
834    if ms.src.len() - s >= cap_len
835        && &ms.src[s..s + cap_len] == &ms.src[cap_init..cap_init + cap_len]
836    {
837        Ok(Some(s + cap_len))
838    } else {
839        Ok(None)
840    }
841}
842
843/// Core recursive pattern matcher.
844/// Returns `Ok(Some(new_s))` on match, `Ok(None)` on failure, `Err` on error.
845///
846/// The C code uses `goto init` for tail calls; here we use a loop.
847fn match_pat(ms: &mut MatchState, mut s: usize, mut p: usize) -> Result<Option<usize>, LuaError> {
848    if ms.aborted {
849        return Ok(None);
850    }
851    ms.steps += 1;
852    if ms.step_limit != 0 && ms.steps > ms.step_limit {
853        ms.aborted = true;
854        return Ok(None);
855    }
856    ms.matchdepth -= 1;
857    if ms.matchdepth < 0 {
858        ms.matchdepth = 0;
859        return Err(LuaError::runtime(format_args!("pattern too complex")));
860    }
861
862    // Use a loop to simulate `goto init` (tail-call optimization).
863    let result = 'outer: loop {
864        if p >= ms.pat.len() {
865            // end of pattern — full match up to current s
866            break 'outer Ok(Some(s));
867        }
868
869        match ms.pat[p] {
870            b'(' => {
871                let s2 = if p + 1 < ms.pat.len() && ms.pat[p + 1] == b')' {
872                    // position capture
873                    start_capture(ms, s, p + 2, CAP_POSITION)?
874                } else {
875                    start_capture(ms, s, p + 1, CAP_UNFINISHED)?
876                };
877                break 'outer Ok(s2);
878            }
879            b')' => {
880                let s2 = end_capture(ms, s, p + 1)?;
881                break 'outer Ok(s2);
882            }
883            b'$' => {
884                if p + 1 != ms.pat.len() {
885                    // fall through to default
886                    let ep = classend(ms, p)?;
887                    let s2 = handle_class_with_suffix(ms, s, p, ep)?;
888                    break 'outer Ok(s2);
889                }
890                break 'outer Ok(if s == ms.src.len() { Some(s) } else { None });
891            }
892            L_ESC => {
893                match ms.pat.get(p + 1).copied().unwrap_or(0) {
894                    b'b' => {
895                        let s2 = matchbalance(ms, s, p + 2)?;
896                        if let Some(ns) = s2 {
897                            s = ns;
898                            p += 4;
899                            continue 'outer; // tail call: match(ms, s, p+4)
900                        }
901                        break 'outer Ok(None);
902                    }
903                    b'f' => {
904                        p += 2;
905                        if ms.pat.get(p).copied() != Some(b'[') {
906                            return Err(LuaError::runtime(format_args!(
907                                "missing '[' after '%f' in pattern"
908                            )));
909                        }
910                        let ep = classend(ms, p)?;
911                        let previous = if s == 0 { 0u8 } else { ms.src[s - 1] };
912                        let current = ms.src.get(s).copied().unwrap_or(0);
913                        if !matchbracketclass(ms.pat, previous, p, ep - 1)
914                            && matchbracketclass(ms.pat, current, p, ep - 1)
915                        {
916                            p = ep;
917                            continue 'outer; // tail call: match(ms, s, ep)
918                        }
919                        break 'outer Ok(None);
920                    }
921                    c @ b'0'..=b'9' => {
922                        let s2 = match_capture(ms, s, c)?;
923                        if let Some(ns) = s2 {
924                            s = ns;
925                            p += 2;
926                            continue 'outer; // tail call: match(ms, s, p+2)
927                        }
928                        break 'outer Ok(None);
929                    }
930                    _ => {
931                        // fall through to default class handling
932                        let ep = classend(ms, p)?;
933                        let s2 = handle_class_with_suffix(ms, s, p, ep)?;
934                        break 'outer Ok(s2);
935                    }
936                }
937            }
938            _ => {
939                // default: pattern class plus optional suffix
940                let ep = classend(ms, p)?;
941                let s2 = handle_class_with_suffix(ms, s, p, ep)?;
942                break 'outer Ok(s2);
943            }
944        }
945    };
946
947    ms.matchdepth += 1;
948    result
949}
950
951/// Handle a pattern class element with an optional repetition suffix (`*`, `+`, `?`, `-`).
952///
953/// PORT NOTE: Factored out from `match_pat`'s `default/dflt` label to share
954/// code between the ESC-default and plain-default paths.
955fn handle_class_with_suffix(
956    ms: &mut MatchState,
957    s: usize,
958    p: usize,
959    ep: usize,
960) -> Result<Option<usize>, LuaError> {
961    let matched_once = singlematch(ms, s, p, ep);
962    if !matched_once {
963        //    else s = NULL;
964        match ms.pat.get(ep).copied() {
965            Some(b'*') | Some(b'?') | Some(b'-') => {
966                // Accept zero occurrences: tail-call match(ms, s, ep+1)
967                // We can't do a tail call into match_pat because we're returning
968                // from handle_class_with_suffix, but we can call it directly.
969                return match_pat(ms, s, ep + 1);
970            }
971            _ => return Ok(None),
972        }
973    }
974
975    // Matched at least once
976    match ms.pat.get(ep).copied() {
977        Some(b'?') => {
978            // Optional: try matching with s+1, fall back to ep+1
979            let res = match_pat(ms, s + 1, ep + 1)?;
980            if res.is_some() {
981                Ok(res)
982            } else {
983                match_pat(ms, s, ep + 1)
984            }
985        }
986        Some(b'+') => {
987            // 1 or more: greedy from s+1
988            max_expand(ms, s + 1, p, ep)
989        }
990        Some(b'*') => {
991            // 0 or more: greedy from s
992            max_expand(ms, s, p, ep)
993        }
994        Some(b'-') => {
995            // 0 or more: lazy from s
996            min_expand(ms, s, p, ep)
997        }
998        _ => {
999            // No suffix: match one, advance both s and p
1000            match_pat(ms, s + 1, ep)
1001        }
1002    }
1003}
1004
1005// ────────────────────────────────────────────────────────────────────────────
1006// §5  Pattern-matching public API helpers
1007// ────────────────────────────────────────────────────────────────────────────
1008
1009/// Find `needle` in `haystack` using a plain memmem-style search.
1010///
1011/// Returns the byte-offset of the first occurrence, or `None`.
1012fn lmemfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1013    if needle.is_empty() {
1014        return Some(0);
1015    }
1016    if needle.len() > haystack.len() {
1017        return None;
1018    }
1019    let first = needle[0];
1020    let rest = &needle[1..];
1021    let limit = haystack.len() - rest.len();
1022    let mut s = 0;
1023    while s <= limit {
1024        if let Some(pos) = haystack[s..].iter().position(|&b| b == first) {
1025            let pos = s + pos;
1026            if pos + 1 + rest.len() <= haystack.len()
1027                && &haystack[pos + 1..pos + 1 + rest.len()] == rest
1028            {
1029                return Some(pos);
1030            }
1031            s = pos + 1;
1032        } else {
1033            break;
1034        }
1035    }
1036    None
1037}
1038
1039/// Check whether the pattern `pat` has no special characters (for plain search).
1040///
1041fn nospecials(pat: &[u8]) -> bool {
1042    !pat.iter().any(|b| SPECIALS.contains(b))
1043}
1044
1045/// Information about one capture result.
1046enum CaptureInfo<'a> {
1047    /// A position capture; value is 1-based index.
1048    Position(i64),
1049    /// A string capture (slice of source).
1050    Bytes(&'a [u8]),
1051}
1052
1053/// Get information about the `i`-th capture.
1054/// If there are no captures and `i == 0`, returns the whole match `s..e`.
1055///
1056fn get_one_capture<'a>(
1057    ms: &'a MatchState,
1058    i: usize,
1059    s: usize,
1060    e: usize,
1061) -> Result<CaptureInfo<'a>, LuaError> {
1062    if i >= ms.level as usize {
1063        if i != 0 {
1064            return Err(LuaError::runtime(format_args!(
1065                "invalid capture index %{}",
1066                i + 1
1067            )));
1068        }
1069        // Return whole match
1070        return Ok(CaptureInfo::Bytes(&ms.src[s..e]));
1071    }
1072    let cap = &ms.captures[i];
1073    if cap.len == CAP_UNFINISHED {
1074        return Err(LuaError::runtime(format_args!("unfinished capture")));
1075    }
1076    if cap.len == CAP_POSITION {
1077        return Ok(CaptureInfo::Position((cap.init + 1) as i64));
1078    }
1079    let len = cap.len as usize;
1080    Ok(CaptureInfo::Bytes(&ms.src[cap.init..cap.init + len]))
1081}
1082
1083/// Push all captures onto the stack, returning the number of values pushed.
1084///
1085/// `span` mirrors upstream's `const char *s` argument: `Some((s, e))` means a
1086/// whole-match span is available (so a zero-capture pattern pushes the whole
1087/// match), while `None` mirrors a `NULL s` and pushes nothing when there are no
1088/// explicit captures. Upstream guard: `nlevels = (ms->level == 0 && s) ? 1 : ms->level`.
1089///
1090fn push_captures(
1091    state: &mut LuaState,
1092    ms: &MatchState,
1093    span: Option<(usize, usize)>,
1094) -> Result<usize, LuaError> {
1095    let nlevels = if ms.level == 0 && span.is_some() {
1096        1
1097    } else {
1098        ms.level as usize
1099    };
1100    state.ensure_stack(nlevels as i32, "too many captures")?;
1101    let (s, e) = span.unwrap_or((0, 0));
1102    for i in 0..nlevels {
1103        match get_one_capture(ms, i, s, e)? {
1104            CaptureInfo::Position(n) => state.push(LuaValue::Int(n)),
1105            CaptureInfo::Bytes(b) => state.push_bytes(b)?,
1106        }
1107    }
1108    Ok(nlevels)
1109}
1110
1111// ────────────────────────────────────────────────────────────────────────────
1112// §6  str_find / str_match / gmatch / gsub
1113// ────────────────────────────────────────────────────────────────────────────
1114
1115/// Shared implementation of `string.find` and `string.match`.
1116///
1117fn str_find_aux(state: &mut LuaState, find: bool) -> Result<usize, LuaError> {
1118    let s_ref = match state.to_lua_string(1) {
1119        Some(r) => r,
1120        None => {
1121            state.check_arg_string(1)?;
1122            unreachable!("check_arg_string raises when arg #1 is not a string");
1123        }
1124    };
1125    let p_ref = match state.to_lua_string(2) {
1126        Some(r) => r,
1127        None => {
1128            state.check_arg_string(2)?;
1129            unreachable!("check_arg_string raises when arg #2 is not a string");
1130        }
1131    };
1132    let s: &[u8] = s_ref.as_bytes();
1133    let p: &[u8] = p_ref.as_bytes();
1134    let ls = s.len();
1135    let lp = p.len();
1136    let init_raw = state.opt_arg_integer(3, 1)?;
1137    let init = pos_relat_i(init_raw, ls).saturating_sub(1);
1138
1139    if init > ls {
1140        state.push(LuaValue::Nil);
1141        return Ok(1);
1142    }
1143
1144    if find && (state.arg_to_bool(4) || nospecials(p)) {
1145        // plain search
1146        if let Some(pos) = lmemfind(&s[init..], p) {
1147            let abs = init + pos;
1148            state.push(LuaValue::Int((abs + 1) as i64));
1149            state.push(LuaValue::Int((abs + lp) as i64));
1150            return Ok(2);
1151        }
1152    } else {
1153        let step_limit = state.sandbox_match_step_limit();
1154        let mut ms = MatchState::new(s, p, step_limit);
1155        let anchor = p.first() == Some(&b'^');
1156        let p_slice = if anchor { &p[1..] } else { p };
1157        ms.pat = p_slice;
1158
1159        let mut s1 = init;
1160        let mut matched: Option<usize> = None;
1161        loop {
1162            ms.reset_level();
1163            if let Some(res) = match_pat(&mut ms, s1, 0)? {
1164                matched = Some(res);
1165                break;
1166            }
1167            if ms.aborted || s1 >= ms.src.len() || anchor {
1168                break;
1169            }
1170            s1 += 1;
1171        }
1172
1173        if let Some(err) = state.sandbox_charge(ms.steps) {
1174            return Err(err);
1175        }
1176
1177        if let Some(res) = matched {
1178            if find {
1179                state.push(LuaValue::Int((s1 + 1) as i64));
1180                state.push(LuaValue::Int(res as i64));
1181                let nc = push_captures(state, &ms, None)?;
1182                return Ok(nc + 2);
1183            } else {
1184                return push_captures(state, &ms, Some((s1, res)));
1185            }
1186        }
1187    }
1188
1189    state.push(LuaValue::Nil);
1190    Ok(1)
1191}
1192
1193/// `string.find(s, pattern [, init [, plain]])` — find pattern in `s`.
1194///
1195pub fn str_find(state: &mut LuaState) -> Result<usize, LuaError> {
1196    str_find_aux(state, true)
1197}
1198
1199/// `string.match(s, pattern [, init])` — match pattern against `s`.
1200///
1201pub fn str_match(state: &mut LuaState) -> Result<usize, LuaError> {
1202    str_find_aux(state, false)
1203}
1204
1205/// Continuation function for `string.gmatch` iterator closure.
1206///
1207///
1208/// PORT NOTE: The C version stores `GMatchState` inside a heap-allocated
1209/// userdata referenced by upvalue 3, then mutates fields via the raw pointer
1210/// each iteration. Our Phase-A `LuaCClosure.upvalues` is immutable, so the
1211/// iterator state lives in a Lua table referenced by upvalue 1 with
1212/// integer-keyed slots:
1213///   t[1] = source bytes (string), t[2] = pattern bytes (string),
1214///   t[3] = current source position (1-based; equals `lastmatch` after a
1215///   successful match), t[4] = end of last match (`0` ≡ NULL in C, meaning
1216///   "no match yet").
1217///
1218/// PERF NOTE: The previous version pushed the upvalue table onto the stack
1219/// and then issued six `raw_geti` / `raw_seti` calls plus four `to_lua_string`
1220/// / `to_integer_x` reads — each of which re-resolves the stack index via
1221/// `index_to_value`. That made `index_to_value` the #1 non-algorithm frame in
1222/// `string_ops_long` at 9.4% of wall. The current version resolves the
1223/// upvalue once via `value_at`, extracts the `GcRef<LuaTable>`, and reads /
1224/// writes its integer-keyed slots directly through `LuaTableRefExt`. This is
1225/// the same shape as C-Lua's `luaH_getint` / `luaH_setint` direct table ops
1226/// against the embedded `GMatchState` struct fields — no stack roundtrip
1227/// per probe.
1228pub fn gmatch_aux(state: &mut LuaState) -> Result<usize, LuaError> {
1229    let upval = state.value_at(upvalue_index(1));
1230    let tbl = match upval {
1231        LuaValue::Table(t) => t,
1232        _ => return Ok(0),
1233    };
1234
1235    let s_val = tbl.get_int(1);
1236    let p_val = tbl.get_int(2);
1237    let (LuaValue::Str(s_str), LuaValue::Str(p_str)) = (&s_val, &p_val) else {
1238        return Ok(0);
1239    };
1240    let s: &[u8] = s_str.as_bytes();
1241    let p: &[u8] = p_str.as_bytes();
1242
1243    let pos = match tbl.get_int(3) {
1244        LuaValue::Int(n) => n,
1245        _ => 1,
1246    };
1247    let lastmatch_raw = match tbl.get_int(4) {
1248        LuaValue::Int(n) => n,
1249        _ => 0,
1250    };
1251    let last_match: Option<usize> = if lastmatch_raw <= 0 {
1252        None
1253    } else {
1254        Some((lastmatch_raw - 1) as usize)
1255    };
1256
1257    let ls = s.len();
1258    let start_pos = if pos < 1 { 0usize } else { (pos - 1) as usize };
1259
1260    let step_limit = state.sandbox_match_step_limit();
1261    let mut ms = MatchState::new(s, p, step_limit);
1262
1263    let mut src = start_pos;
1264    let mut hit: Option<(usize, usize)> = None;
1265    while src <= ls {
1266        ms.reset_level();
1267        if let Some(e) = match_pat(&mut ms, src, 0)? {
1268            if Some(e) != last_match {
1269                hit = Some((src, e));
1270                break;
1271            }
1272        }
1273        if ms.aborted {
1274            break;
1275        }
1276        src += 1;
1277    }
1278
1279    if let Some(err) = state.sandbox_charge(ms.steps) {
1280        return Err(err);
1281    }
1282
1283    if let Some((src, e)) = hit {
1284        let e_val = LuaValue::Int((e + 1) as i64);
1285        tbl.raw_set_int(state, 3, e_val.clone())?;
1286        tbl.raw_set_int(state, 4, e_val)?;
1287        return push_captures(state, &ms, Some((src, e)));
1288    }
1289
1290    Ok(0)
1291}
1292
1293/// `string.gmatch(s, pattern [, init])` — return an iterator for all matches.
1294///
1295///
1296/// PORT NOTE: C uses `lua_newuserdatauv` for the GMatchState plus a 3-upvalue
1297/// C closure. Phase-A LuaCClosure upvalues are immutable, so we collapse the
1298/// state into a 4-element Lua table held in a single upvalue (see
1299/// `gmatch_aux`).
1300pub fn gmatch(state: &mut LuaState) -> Result<usize, LuaError> {
1301    let s_ref = match state.to_lua_string(1) {
1302        Some(r) => r,
1303        None => {
1304            state.check_arg_string(1)?;
1305            unreachable!("check_arg_string raises when arg #1 is not a string");
1306        }
1307    };
1308    let ls = s_ref.len();
1309    match state.to_lua_string(2) {
1310        Some(_) => {}
1311        None => {
1312            state.check_arg_string(2)?;
1313            unreachable!("check_arg_string raises when arg #2 is not a string");
1314        }
1315    };
1316    let init_raw = state.opt_arg_integer(3, 1)?;
1317    let mut init = pos_relat_i(init_raw, ls).saturating_sub(1);
1318    if init > ls {
1319        init = ls + 1;
1320    }
1321
1322    lua_vm::api::set_top(state, 2)?;
1323
1324    state.create_table(4, 0)?;
1325    let tbl_idx = state.top();
1326    state.push_value_at(1)?;
1327    state.raw_seti(tbl_idx, 1)?;
1328    state.push_value_at(2)?;
1329    state.raw_seti(tbl_idx, 2)?;
1330    state.push(LuaValue::Int((init + 1) as i64));
1331    state.raw_seti(tbl_idx, 3)?;
1332    state.push(LuaValue::Int(0));
1333    state.raw_seti(tbl_idx, 4)?;
1334
1335    state.push_c_closure(gmatch_aux, 1)?;
1336    Ok(1)
1337}
1338
1339/// Add a replacement string with `%n` capture references to `buf`.
1340///
1341fn add_s(
1342    state: &mut LuaState,
1343    ms: &MatchState,
1344    buf: &mut Vec<u8>,
1345    s: usize,
1346    e: usize,
1347) -> Result<(), LuaError> {
1348    let news_bytes = state.to_lua_string_bytes(3).unwrap_or_default();
1349    let mut i = 0usize;
1350    while i < news_bytes.len() {
1351        if news_bytes[i] != L_ESC {
1352            buf.push(news_bytes[i]);
1353            i += 1;
1354        } else {
1355            i += 1; // skip ESC
1356            if i >= news_bytes.len() {
1357                break;
1358            }
1359            let c = news_bytes[i];
1360            if c == L_ESC {
1361                buf.push(L_ESC);
1362            } else if c == b'0' {
1363                buf.extend_from_slice(&ms.src[s..e]);
1364            } else if c.is_ascii_digit() {
1365                match get_one_capture(ms, (c - b'1') as usize, s, e)? {
1366                    CaptureInfo::Position(n) => {
1367                        // push position then pop into buf
1368                        let formatted = format!("{}", n).into_bytes();
1369                        buf.extend_from_slice(&formatted);
1370                    }
1371                    CaptureInfo::Bytes(b) => {
1372                        buf.extend_from_slice(b);
1373                    }
1374                }
1375            } else {
1376                return Err(LuaError::runtime(format_args!(
1377                    "invalid use of '{}' in replacement string",
1378                    L_ESC as char
1379                )));
1380            }
1381            i += 1;
1382        }
1383    }
1384    Ok(())
1385}
1386
1387/// Add the replacement value (string, table lookup, or function call) to `buf`.
1388/// Returns `true` if the original text was changed.
1389///
1390fn add_value(
1391    state: &mut LuaState,
1392    ms: &MatchState,
1393    buf: &mut Vec<u8>,
1394    s: usize,
1395    e: usize,
1396    tr: LuaType,
1397) -> Result<bool, LuaError> {
1398    match tr {
1399        LuaType::Function => {
1400            state.push_value_at(3)?;
1401            let n = push_captures(state, ms, Some((s, e)))?;
1402            state.call(n as i32, 1)?;
1403        }
1404        LuaType::Table => {
1405            match get_one_capture(ms, 0, s, e)? {
1406                CaptureInfo::Position(n) => state.push(LuaValue::Int(n)),
1407                CaptureInfo::Bytes(b) => state.push_bytes(b)?,
1408            }
1409            state.get_table(3)?;
1410        }
1411        _ => {
1412            // LUA_TNUMBER or LUA_TSTRING: add replacement string directly
1413            add_s(state, ms, buf, s, e)?;
1414            return Ok(true);
1415        }
1416    }
1417
1418    let top_bool = state.arg_to_bool(-1);
1419    if !top_bool {
1420        state.pop_n(1);
1421        buf.extend_from_slice(&ms.src[s..e]);
1422        return Ok(false);
1423    }
1424    if state.type_at(-1) != LuaType::String {
1425        let tname = state.type_name_at(-1).to_owned();
1426        return Err(LuaError::runtime(format_args!(
1427            "invalid replacement value (a {})", tname.escape_ascii()
1428        )));
1429    }
1430    let v = state.to_bytes(-1).unwrap_or_default();
1431    state.pop();
1432    buf.extend_from_slice(&v);
1433    Ok(true)
1434}
1435
1436/// `string.gsub(s, pattern, repl [, n])` — global substitution.
1437///
1438pub fn str_gsub(state: &mut LuaState) -> Result<usize, LuaError> {
1439    let src_bytes = state.check_arg_string(1)?;
1440    let pat_bytes = state.check_arg_string(2)?;
1441    let src_len = src_bytes.len();
1442    let max_s = state.opt_arg_integer(4, (src_len + 1) as i64)?;
1443    let tr = state.type_at(3);
1444
1445    if !matches!(tr, LuaType::Number | LuaType::String | LuaType::Function | LuaType::Table) {
1446        let v = state.arg(3);
1447        return Err(LuaError::type_arg_error(3, "string/function/table", &v));
1448    }
1449
1450    let src_owned = src_bytes;
1451    let pat_owned = pat_bytes;
1452
1453    let anchor = pat_owned.first() == Some(&b'^');
1454    let pat_slice = if anchor { &pat_owned[1..] } else { &pat_owned[..] };
1455
1456    let step_limit = state.sandbox_match_step_limit();
1457    let mut ms = MatchState::new(&src_owned, pat_slice, step_limit);
1458    let mut buf: Vec<u8> = Vec::new();
1459    let mut src_pos = 0usize;
1460    let mut last_match: Option<usize> = None;
1461    let mut n: i64 = 0;
1462    let mut changed = false;
1463
1464    while n < max_s {
1465        ms.reset_level();
1466        let maybe_e = match_pat(&mut ms, src_pos, 0)?;
1467        if let Some(e) = maybe_e {
1468            if last_match != Some(e) {
1469                n += 1;
1470                let delta = add_value(state, &ms, &mut buf, src_pos, e, tr)?;
1471                changed |= delta;
1472                src_pos = e;
1473                last_match = Some(e);
1474            } else if src_pos < ms.src.len() {
1475                buf.push(ms.src[src_pos]);
1476                src_pos += 1;
1477            } else {
1478                break;
1479            }
1480        } else if src_pos < ms.src.len() {
1481            buf.push(ms.src[src_pos]);
1482            src_pos += 1;
1483        } else {
1484            break;
1485        }
1486        if ms.aborted || anchor {
1487            break;
1488        }
1489    }
1490
1491    if let Some(err) = state.sandbox_charge(ms.steps) {
1492        return Err(err);
1493    }
1494
1495    if !changed {
1496        state.push_value_at(1)?;
1497    } else {
1498        buf.extend_from_slice(&ms.src[src_pos..]);
1499        state.push_bytes(&buf)?;
1500    }
1501    state.push(LuaValue::Int(n));
1502    Ok(2)
1503}
1504
1505// ────────────────────────────────────────────────────────────────────────────
1506// §7  String format (`string.format`)
1507// ────────────────────────────────────────────────────────────────────────────
1508
1509/// Add a hex-float digit to buffer and return the fractional remainder.
1510///
1511fn adddigit(buf: &mut Vec<u8>, x: f64) -> f64 {
1512    let dd = x.floor();
1513    let d = dd as i32;
1514    let c = if d < 10 { b'0' + d as u8 } else { b'a' + (d - 10) as u8 };
1515    buf.push(c);
1516    x - dd
1517}
1518
1519/// Convert a float to a hex-float string body (digits only, no sign, no `0x` prefix).
1520///
1521/// Returns `(frac_digits, exponent_string)` for use by `format_hex_float`.
1522///
1523fn num2straux(x: f64) -> Vec<u8> {
1524    format_hex_float(x, None)
1525}
1526
1527/// Produce a hex-float string for `x` with optional precision (digits after the point).
1528///
1529/// When `precision` is `None` the minimum number of digits needed for a round-trip
1530/// is emitted (C's default `%a` behaviour). When `precision` is `Some(p)` exactly `p`
1531/// digits follow the radix point; trailing zeros are added as needed, and excess
1532/// digits are discarded (C truncates rather than rounds, matching the C `printf`
1533/// behaviour on the tested platforms).
1534fn format_hex_float(x: f64, precision: Option<usize>) -> Vec<u8> {
1535    if x.is_nan() {
1536        return b"nan".to_vec();
1537    }
1538    if x.is_infinite() {
1539        return if x < 0.0 { b"-inf".to_vec() } else { b"inf".to_vec() };
1540    }
1541    if x == 0.0 {
1542        let sign: &[u8] = if x.is_sign_negative() { b"-" } else { b"" };
1543        return match precision {
1544            None => [sign, b"0x0p+0"].concat(),
1545            Some(0) => [sign, b"0x0p+0"].concat(),
1546            Some(p) => {
1547                let zeros = "0".repeat(p);
1548                [sign, b"0x0.", zeros.as_bytes(), b"p+0"].concat()
1549            }
1550        };
1551    }
1552
1553    let (m_raw, exp) = frexp(x);
1554    let mut buf: Vec<u8> = Vec::new();
1555    let mut m = m_raw;
1556    if m < 0.0 {
1557        buf.push(b'-');
1558        m = -m;
1559    }
1560    buf.extend_from_slice(b"0x");
1561
1562    let nbfd = 1;
1563    m = adddigit(&mut buf, m * (1 << nbfd) as f64);
1564    let e = exp - nbfd;
1565
1566    match precision {
1567        None => {
1568            if m > 0.0 {
1569                buf.push(b'.');
1570                while m > 0.0 {
1571                    m = adddigit(&mut buf, m * 16.0);
1572                }
1573            }
1574        }
1575        Some(0) => {}
1576        Some(p) => {
1577            buf.push(b'.');
1578            for _ in 0..p {
1579                if m > 0.0 {
1580                    m = adddigit(&mut buf, m * 16.0);
1581                } else {
1582                    buf.push(b'0');
1583                }
1584            }
1585        }
1586    }
1587
1588    let exp_str = format!("p{:+}", e);
1589    buf.extend_from_slice(exp_str.as_bytes());
1590    buf
1591}
1592
1593/// Decompose `x` into mantissa in `[-1.0, -0.5] ∪ [0.5, 1.0)` and exponent.
1594///
1595/// Equivalent to C's `frexp`. The sign of `x` is preserved in the returned mantissa
1596/// so that `num2straux` can emit the leading `-` correctly for negative inputs.
1597fn frexp(x: f64) -> (f64, i32) {
1598    if x == 0.0 || x.is_nan() || x.is_infinite() {
1599        return (x, 0);
1600    }
1601    let bits = x.to_bits();
1602    let sign_bit = bits & 0x8000_0000_0000_0000u64;
1603    let exp_bits = ((bits >> 52) & 0x7FF) as i32;
1604    if exp_bits == 0 {
1605        let (m, e) = frexp(x * (1u64 << 52) as f64);
1606        return (m, e - 52);
1607    }
1608    let exp = exp_bits - 1022;
1609    let mantissa_bits = sign_bit | (bits & 0x000F_FFFF_FFFF_FFFF) | 0x3FE0_0000_0000_0000;
1610    (f64::from_bits(mantissa_bits), exp)
1611}
1612
1613/// Convert float `n` to a Lua-readable literal (hex or special representation).
1614///
1615fn quotefloat(n: f64) -> Vec<u8> {
1616    if n == f64::INFINITY {
1617        return b"1e9999".to_vec();
1618    } else if n == f64::NEG_INFINITY {
1619        return b"-1e9999".to_vec();
1620    } else if n.is_nan() {
1621        return b"(0/0)".to_vec();
1622    }
1623    // hex float, ensuring dot separator
1624    let buf = num2straux(n);
1625    if !buf.contains(&b'.') && !buf.contains(&b'p') {
1626        // try to find locale decimal point and replace with '.'
1627        // PORT NOTE: We always produce '.' so this branch is not taken.
1628    }
1629    buf
1630}
1631
1632/// Add a quoted Lua string literal to `buf`.
1633///
1634fn addquoted(buf: &mut Vec<u8>, s: &[u8]) {
1635    buf.push(b'"');
1636    for (idx, &c) in s.iter().enumerate() {
1637        if c == b'"' || c == b'\\' || c == b'\n' {
1638            buf.push(b'\\');
1639            buf.push(c);
1640        } else if c.is_ascii_control() {
1641            let next_is_digit = s.get(idx + 1).map_or(false, |n| n.is_ascii_digit());
1642            let formatted = if next_is_digit {
1643                format!("\\{:03}", c)
1644            } else {
1645                format!("\\{}", c)
1646            };
1647            buf.extend_from_slice(formatted.as_bytes());
1648        } else {
1649            buf.push(c);
1650        }
1651    }
1652    buf.push(b'"');
1653}
1654
1655/// Add a Lua literal representation of arg `n` to `buf`.
1656///
1657fn addliteral(state: &mut LuaState, buf: &mut Vec<u8>, arg: i32) -> Result<(), LuaError> {
1658    match state.type_at(arg) {
1659        LuaType::String => {
1660            let s = state.check_arg_string(arg)?.to_vec();
1661            addquoted(buf, &s);
1662        }
1663        LuaType::Number => {
1664            if state.is_integer(arg) {
1665                let n = state.to_integer(arg).unwrap_or(0);
1666                let formatted = if n == i64::MIN {
1667                    format!("0x{:016x}", n as u64)
1668                } else {
1669                    format!("{}", n)
1670                };
1671                buf.extend_from_slice(formatted.as_bytes());
1672            } else {
1673                let n = state.to_number(arg).unwrap_or(0.0);
1674                let hex = quotefloat(n);
1675                buf.extend_from_slice(&hex);
1676            }
1677        }
1678        LuaType::Nil => {
1679            buf.extend_from_slice(b"nil");
1680        }
1681        LuaType::Boolean => {
1682            buf.extend_from_slice(if state.to_boolean(arg) { b"true" } else { b"false" });
1683        }
1684        _ => {
1685            return Err(LuaError::arg_error(arg, "value has no literal form"));
1686        }
1687    }
1688    Ok(())
1689}
1690
1691
1692/// Flags allowed per conversion type (matches lstrlib.c constants).
1693const FMT_FLAGS_F: &[u8] = b"-+#0 ";
1694const FMT_FLAGS_X: &[u8] = b"-#0";
1695const FMT_FLAGS_I: &[u8] = b"-+0 ";
1696const FMT_FLAGS_U: &[u8] = b"-0";
1697const FMT_FLAGS_C: &[u8] = b"-";
1698
1699/// Validate a format specifier against allowed flags and width/precision digit counts.
1700///
1701/// `form` is the full specifier slice including the leading `%` and the trailing
1702/// conversion character (e.g. `b"%100.3d"`). `flags` is the allowed-flags byte set for
1703/// this conversion type. `allow_precision` is false for conversions that forbid `.`.
1704///
1705/// Mirrors C `checkformat` in lstrlib.c: consumes flags, then up to 2 width digits,
1706/// then (if allowed) `.` + up to 2 precision digits, then asserts we are at the
1707/// conversion character. Returns `Err("invalid conversion specification")` on failure.
1708fn check_conv_spec(
1709    state: &mut LuaState,
1710    form: &[u8],
1711    flags: &[u8],
1712    allow_precision: bool,
1713) -> Result<(), LuaError> {
1714    let mut i = 1usize; // skip '%'
1715    while i < form.len() && flags.contains(&form[i]) {
1716        i += 1;
1717    }
1718    if i < form.len() && form[i] == b'0' {
1719        return Err(invalid_conv_spec(state, form));
1720    }
1721    if i < form.len() && form[i].is_ascii_digit() {
1722        i += 1;
1723        if i < form.len() && form[i].is_ascii_digit() {
1724            i += 1;
1725        }
1726    }
1727    if allow_precision && i < form.len() && form[i] == b'.' {
1728        i += 1;
1729        if i < form.len() && form[i].is_ascii_digit() {
1730            i += 1;
1731            if i < form.len() && form[i].is_ascii_digit() {
1732                i += 1;
1733            }
1734        }
1735    }
1736    if i != form.len() - 1 {
1737        return Err(invalid_conv_spec(state, form));
1738    }
1739    Ok(())
1740}
1741
1742/// Build the version-appropriate "invalid conversion specification" error,
1743/// prefixed with the calling location like reference `luaL_error`.
1744///
1745/// Lua 5.3 `scanformat` raises `invalid format (width or precision too long)`
1746/// with no offending spec; Lua 5.4/5.5 `checkformat` raises
1747/// `invalid conversion specification: '<form>'`.
1748fn invalid_conv_spec(state: &mut LuaState, form: &[u8]) -> LuaError {
1749    let msg: Vec<u8> = if state.global().lua_version == lua_types::LuaVersion::V53 {
1750        b"invalid format (width or precision too long)".to_vec()
1751    } else {
1752        let mut m = b"invalid conversion specification: '".to_vec();
1753        m.extend_from_slice(form);
1754        m.push(b'\'');
1755        m
1756    };
1757    lua_vm::debug::c_api_runtime(state, msg)
1758}
1759
1760/// Parsed printf-style format specifier (flags, width, precision).
1761#[derive(Default)]
1762struct FmtSpec {
1763    left_align: bool,
1764    plus_sign: bool,
1765    space_sign: bool,
1766    alt_form: bool,
1767    zero_pad: bool,
1768    width: usize,
1769    precision: Option<usize>,
1770}
1771
1772fn parse_fmt_spec(spec: &[u8]) -> FmtSpec {
1773    let mut s = FmtSpec::default();
1774    let mut i = 0;
1775    while i < spec.len() {
1776        match spec[i] {
1777            b'-' => s.left_align = true,
1778            b'+' => s.plus_sign = true,
1779            b' ' => s.space_sign = true,
1780            b'#' => s.alt_form = true,
1781            b'0' => s.zero_pad = true,
1782            _ => break,
1783        }
1784        i += 1;
1785    }
1786    while i < spec.len() && spec[i].is_ascii_digit() {
1787        s.width = s.width * 10 + (spec[i] - b'0') as usize;
1788        i += 1;
1789    }
1790    if i < spec.len() && spec[i] == b'.' {
1791        i += 1;
1792        let mut p = 0usize;
1793        while i < spec.len() && spec[i].is_ascii_digit() {
1794            p = p * 10 + (spec[i] - b'0') as usize;
1795            i += 1;
1796        }
1797        s.precision = Some(p);
1798    }
1799    s
1800}
1801
1802fn pad_str(buf: &mut Vec<u8>, body: &[u8], spec: &FmtSpec) {
1803    let body = match spec.precision {
1804        Some(p) if body.len() > p => &body[..p],
1805        _ => body,
1806    };
1807    if body.len() >= spec.width {
1808        buf.extend_from_slice(body);
1809        return;
1810    }
1811    let pad = spec.width - body.len();
1812    if spec.left_align {
1813        buf.extend_from_slice(body);
1814        for _ in 0..pad { buf.push(b' '); }
1815    } else {
1816        for _ in 0..pad { buf.push(b' '); }
1817        buf.extend_from_slice(body);
1818    }
1819}
1820
1821fn pad_int(buf: &mut Vec<u8>, sign_prefix: &[u8], digits: &[u8], spec: &FmtSpec) {
1822    let min_digits = spec.precision.unwrap_or(0);
1823    let zeroes_for_prec = if digits.len() < min_digits { min_digits - digits.len() } else { 0 };
1824    let core_len = sign_prefix.len() + zeroes_for_prec + digits.len();
1825    if core_len >= spec.width {
1826        buf.extend_from_slice(sign_prefix);
1827        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1828        buf.extend_from_slice(digits);
1829        return;
1830    }
1831    let pad = spec.width - core_len;
1832    let use_zero_pad = spec.zero_pad && !spec.left_align && spec.precision.is_none();
1833    if spec.left_align {
1834        buf.extend_from_slice(sign_prefix);
1835        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1836        buf.extend_from_slice(digits);
1837        for _ in 0..pad { buf.push(b' '); }
1838    } else if use_zero_pad {
1839        buf.extend_from_slice(sign_prefix);
1840        for _ in 0..pad { buf.push(b'0'); }
1841        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1842        buf.extend_from_slice(digits);
1843    } else {
1844        for _ in 0..pad { buf.push(b' '); }
1845        buf.extend_from_slice(sign_prefix);
1846        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1847        buf.extend_from_slice(digits);
1848    }
1849}
1850
1851fn signed_int_parts(n: i64, spec: &FmtSpec) -> (Vec<u8>, Vec<u8>) {
1852    if n == 0 && spec.precision == Some(0) {
1853        return (Vec::new(), Vec::new());
1854    }
1855    let (sign, abs_digits) = if n < 0 {
1856        (b"-".to_vec(), {
1857            let u = (n as i128).unsigned_abs();
1858            format!("{}", u).into_bytes()
1859        })
1860    } else {
1861        let s: Vec<u8> = if spec.plus_sign {
1862            b"+".to_vec()
1863        } else if spec.space_sign {
1864            b" ".to_vec()
1865        } else {
1866            Vec::new()
1867        };
1868        (s, format!("{}", n).into_bytes())
1869    };
1870    (sign, abs_digits)
1871}
1872
1873fn unsigned_int_parts(n: u64, base: u32, upper: bool, spec: &FmtSpec) -> (Vec<u8>, Vec<u8>) {
1874    let digits = if n == 0 && spec.precision == Some(0) {
1875        Vec::new()
1876    } else {
1877        match base {
1878            8 => format!("{:o}", n).into_bytes(),
1879            16 if upper => format!("{:X}", n).into_bytes(),
1880            16 => format!("{:x}", n).into_bytes(),
1881            _ => format!("{}", n).into_bytes(),
1882        }
1883    };
1884    let prefix: Vec<u8> = if spec.alt_form && n != 0 {
1885        match base {
1886            8 => b"0".to_vec(),
1887            16 if upper => b"0X".to_vec(),
1888            16 => b"0x".to_vec(),
1889            _ => Vec::new(),
1890        }
1891    } else {
1892        Vec::new()
1893    };
1894    (prefix, digits)
1895}
1896
1897fn format_float(n: f64, conv: u8, spec: &FmtSpec) -> Vec<u8> {
1898    let prec = spec.precision.unwrap_or(6);
1899    if n.is_nan() {
1900        return if conv.is_ascii_uppercase() { b"NAN".to_vec() } else { b"nan".to_vec() };
1901    }
1902    if n.is_infinite() {
1903        let s: &[u8] = if conv.is_ascii_uppercase() {
1904            if n < 0.0 { b"-INF" } else { b"INF" }
1905        } else if n < 0.0 { b"-inf" } else { b"inf" };
1906        return s.to_vec();
1907    }
1908    match conv {
1909        b'f' | b'F' => {
1910            let mut result = format!("{:.*}", prec, n).into_bytes();
1911            if spec.alt_form && !result.contains(&b'.') {
1912                result.push(b'.');
1913            }
1914            result
1915        }
1916        b'e' => format_exp(n, prec, false, spec.alt_form),
1917        b'E' => {
1918            let mut v = format_exp(n, prec, false, spec.alt_form);
1919            for b in v.iter_mut() { if *b == b'e' { *b = b'E'; } }
1920            v
1921        }
1922        b'g' | b'G' => {
1923            let p = if prec == 0 { 1 } else { prec };
1924            let v = format_g(n, p, spec.alt_form);
1925            if conv == b'G' {
1926                v.into_iter().map(|b| if b == b'e' { b'E' } else { b }).collect()
1927            } else { v }
1928        }
1929        _ => format!("{}", n).into_bytes(),
1930    }
1931}
1932
1933fn format_exp(n: f64, prec: usize, _upper: bool, alt: bool) -> Vec<u8> {
1934    if n == 0.0 {
1935        let mantissa: String = if prec == 0 {
1936            if alt { "0.".to_string() } else { "0".to_string() }
1937        } else {
1938            format!("0.{}", "0".repeat(prec))
1939        };
1940        return format!("{}e+00", mantissa).into_bytes();
1941    }
1942    let abs = n.abs();
1943    let exp = abs.log10().floor() as i32;
1944    let mantissa = n / 10f64.powi(exp);
1945    let mantissa_str = format!("{:.*}", prec, mantissa);
1946    let (mant_final, exp_final) = if let Some(dot_pos) = mantissa_str.find('.') {
1947        let int_part = &mantissa_str[..dot_pos];
1948        let abs_int = int_part.trim_start_matches('-');
1949        if abs_int.len() > 1 {
1950            let new_mant = if prec == 0 {
1951                mantissa_str[..mantissa_str.len()-1].to_string()
1952            } else {
1953                let neg = if int_part.starts_with('-') { "-" } else { "" };
1954                let frac = &mantissa_str[dot_pos+1..];
1955                format!("{}{}.{}{}", neg, &abs_int[..1], &abs_int[1..], frac)
1956            };
1957            (new_mant, exp + (abs_int.len() as i32 - 1))
1958        } else {
1959            (mantissa_str, exp)
1960        }
1961    } else if mantissa_str.trim_start_matches('-').len() > 1 {
1962        let neg = if mantissa_str.starts_with('-') { "-" } else { "" };
1963        let body = mantissa_str.trim_start_matches('-');
1964        let bumped = format!("{}{}.{}", neg, &body[..1], &body[1..]);
1965        (bumped, exp + (body.len() as i32 - 1))
1966    } else {
1967        (mantissa_str, exp)
1968    };
1969    let sign = if exp_final < 0 { '-' } else { '+' };
1970    let mant_out = if alt && !mant_final.contains('.') {
1971        format!("{}.", mant_final)
1972    } else { mant_final };
1973    format!("{}e{}{:02}", mant_out, sign, exp_final.abs()).into_bytes()
1974}
1975
1976fn format_g(n: f64, prec: usize, alt: bool) -> Vec<u8> {
1977    if n == 0.0 {
1978        return if alt { format!("0.{}", "0".repeat(prec.saturating_sub(1))).into_bytes() } else { b"0".to_vec() };
1979    }
1980    let abs = n.abs();
1981    let exp = abs.log10().floor() as i32;
1982    if exp < -4 || exp >= prec as i32 {
1983        let ep = if prec == 0 { 0 } else { prec - 1 };
1984        let mut v = format_exp(n, ep, false, alt);
1985        if !alt {
1986            v = strip_trailing_zeros_exp(&v);
1987        }
1988        v
1989    } else {
1990        let dec_places = (prec as i32 - 1 - exp).max(0) as usize;
1991        let mut v = format!("{:.*}", dec_places, n).into_bytes();
1992        if !alt {
1993            v = strip_trailing_zeros_fixed(&v);
1994        }
1995        v
1996    }
1997}
1998
1999fn strip_trailing_zeros_fixed(s: &[u8]) -> Vec<u8> {
2000    if !s.contains(&b'.') { return s.to_vec(); }
2001    let mut end = s.len();
2002    while end > 0 && s[end-1] == b'0' { end -= 1; }
2003    if end > 0 && s[end-1] == b'.' { end -= 1; }
2004    s[..end].to_vec()
2005}
2006
2007fn strip_trailing_zeros_exp(s: &[u8]) -> Vec<u8> {
2008    let e_pos = match s.iter().position(|&b| b == b'e' || b == b'E') {
2009        Some(p) => p,
2010        None => return s.to_vec(),
2011    };
2012    let mantissa = &s[..e_pos];
2013    let exp_part = &s[e_pos..];
2014    if !mantissa.contains(&b'.') {
2015        let mut out = mantissa.to_vec();
2016        out.extend_from_slice(exp_part);
2017        return out;
2018    }
2019    let mut end = mantissa.len();
2020    while end > 0 && mantissa[end-1] == b'0' { end -= 1; }
2021    if end > 0 && mantissa[end-1] == b'.' { end -= 1; }
2022    let mut out = mantissa[..end].to_vec();
2023    out.extend_from_slice(exp_part);
2024    out
2025}
2026
2027/// `string.format(fmt, ...)` — C-style string formatting.
2028///
2029/// Fetch the integer argument for a `%d`/`%i`/`%u`/`%o`/`%x`/`%X` conversion.
2030///
2031/// On the dual-number versions (5.3+) an integer is required and a non-integral
2032/// number raises "number has no integer representation". On the float-only
2033/// versions (5.1/5.2) there is no integer subtype, so `string.format` truncates
2034/// the number toward zero — `("%d"):format(3.5)` is `3`, `(-3.5)` is `-3` —
2035/// matching lua5.2.4. A value outside the `lua_Integer` range (including inf/nan)
2036/// raises "number has no integer representation", which lua5.2.4 phrases as
2037/// "not a number in proper range"; the harness battery checks the truncation
2038/// cases (the out-of-range message text is a separate 5.2 error-format gap).
2039fn format_int_arg(state: &mut LuaState, arg: i32) -> Result<i64, LuaError> {
2040    if state.global().lua_version.number_model() != lua_types::NumberModel::FloatOnly {
2041        return state.check_arg_integer(arg);
2042    }
2043    let n = state.check_arg_number(arg)?;
2044    let t = n.trunc();
2045    if t.is_finite() && (-9223372036854775808.0..=9223372036854775808.0).contains(&t) {
2046        Ok(t as i64)
2047    } else {
2048        Err(LuaError::arg_error(arg, "number has no integer representation"))
2049    }
2050}
2051
2052pub fn str_format(state: &mut LuaState) -> Result<usize, LuaError> {
2053    let top = state.get_top();
2054    let mut arg = 1i32;
2055    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2056    let mut buf: Vec<u8> = Vec::new();
2057    let mut i = 0usize;
2058
2059    while i < fmt_bytes.len() {
2060        let c = fmt_bytes[i];
2061        if c != L_ESC {
2062            buf.push(c);
2063            i += 1;
2064            continue;
2065        }
2066        i += 1;
2067        if i >= fmt_bytes.len() {
2068            break;
2069        }
2070        if fmt_bytes[i] == L_ESC {
2071            buf.push(L_ESC);
2072            i += 1;
2073            continue;
2074        }
2075
2076        // Parse a format specifier
2077        arg += 1;
2078        if arg > top {
2079            return Err(lua_vm::debug::arg_error_impl(state, arg, b"no value"));
2080        }
2081
2082        // Collect flags, width, precision
2083        let spec_start = i - 1; // includes the initial '%'
2084        // Skip flags: -, +, #, 0, space
2085        while i < fmt_bytes.len() && b"-+#0 ".contains(&fmt_bytes[i]) {
2086            i += 1;
2087        }
2088        // Lua 5.3 `scanformat`: the flags buffer is `FLAGS = "-+ #0"`, so a flags
2089        // run of `sizeof(FLAGS) == 6` or more characters is "repeated flags".
2090        // 5.4/5.5 fold this into the single "(too long)" check below.
2091        if state.global().lua_version == lua_types::LuaVersion::V53
2092            && i - (spec_start + 1) >= 6
2093        {
2094            return Err(lua_vm::debug::c_api_runtime(
2095                state,
2096                b"invalid format (repeated flags)".to_vec(),
2097            ));
2098        }
2099        // Skip width digits
2100        if i < fmt_bytes.len() && fmt_bytes[i] != b'0' {
2101            while i < fmt_bytes.len() && fmt_bytes[i].is_ascii_digit() {
2102                i += 1;
2103            }
2104        }
2105        // Skip precision
2106        if i < fmt_bytes.len() && fmt_bytes[i] == b'.' {
2107            i += 1;
2108            while i < fmt_bytes.len() && fmt_bytes[i].is_ascii_digit() {
2109                i += 1;
2110            }
2111        }
2112
2113        if i >= fmt_bytes.len() {
2114            let form: Vec<u8> = fmt_bytes[spec_start..].to_vec();
2115            return Err(invalid_conv_spec(state, &form));
2116        }
2117
2118        let conv = fmt_bytes[i];
2119        i += 1;
2120
2121        let spec_slice = &fmt_bytes[spec_start + 1..i - 1];
2122        let form = &fmt_bytes[spec_start..i];
2123
2124        // Must check before parse_fmt_spec to avoid overflow on huge widths.
2125        if spec_slice.len() + 1 >= 22 {
2126            return Err(lua_vm::debug::c_api_runtime(state, b"invalid format (too long)".to_vec()));
2127        }
2128
2129        let spec = parse_fmt_spec(spec_slice);
2130
2131        match conv {
2132            b'c' => {
2133                check_conv_spec(state, form, FMT_FLAGS_C, false)?;
2134                let n = state.check_arg_integer(arg)?;
2135                let body = vec![n as u8];
2136                pad_str(&mut buf, &body, &spec);
2137            }
2138            b'd' | b'i' => {
2139                check_conv_spec(state, form, FMT_FLAGS_I, true)?;
2140                let n = format_int_arg(state, arg)?;
2141                let (sign, digits) = signed_int_parts(n, &spec);
2142                pad_int(&mut buf, &sign, &digits, &spec);
2143            }
2144            b'u' => {
2145                check_conv_spec(state, form, FMT_FLAGS_U, true)?;
2146                let n = format_int_arg(state, arg)? as u64;
2147                let (prefix, digits) = unsigned_int_parts(n, 10, false, &spec);
2148                pad_int(&mut buf, &prefix, &digits, &spec);
2149            }
2150            b'o' => {
2151                check_conv_spec(state, form, FMT_FLAGS_X, true)?;
2152                let n = format_int_arg(state, arg)? as u64;
2153                let (prefix, digits) = unsigned_int_parts(n, 8, false, &spec);
2154                pad_int(&mut buf, &prefix, &digits, &spec);
2155            }
2156            b'x' => {
2157                check_conv_spec(state, form, FMT_FLAGS_X, true)?;
2158                let n = format_int_arg(state, arg)? as u64;
2159                let (prefix, digits) = unsigned_int_parts(n, 16, false, &spec);
2160                pad_int(&mut buf, &prefix, &digits, &spec);
2161            }
2162            b'X' => {
2163                check_conv_spec(state, form, FMT_FLAGS_X, true)?;
2164                let n = format_int_arg(state, arg)? as u64;
2165                let (prefix, digits) = unsigned_int_parts(n, 16, true, &spec);
2166                pad_int(&mut buf, &prefix, &digits, &spec);
2167            }
2168            b'a' | b'A' => {
2169                check_conv_spec(state, form, FMT_FLAGS_F, true)?;
2170                let n = state.check_arg_number(arg)?;
2171                let body = format_hex_float(n, spec.precision);
2172                let body: Vec<u8> = if conv == b'A' {
2173                    body.into_iter().map(|b| b.to_ascii_uppercase()).collect()
2174                } else {
2175                    body
2176                };
2177                let (sign, digits): (Vec<u8>, Vec<u8>) =
2178                    if !body.is_empty() && (body[0] == b'-' || body[0] == b'+') {
2179                        (vec![body[0]], body[1..].to_vec())
2180                    } else if spec.plus_sign {
2181                        (b"+".to_vec(), body)
2182                    } else if spec.space_sign {
2183                        (b" ".to_vec(), body)
2184                    } else {
2185                        (Vec::new(), body)
2186                    };
2187                let no_prec_spec = FmtSpec {
2188                    left_align: spec.left_align,
2189                    plus_sign: spec.plus_sign,
2190                    space_sign: spec.space_sign,
2191                    alt_form: spec.alt_form,
2192                    zero_pad: spec.zero_pad,
2193                    width: spec.width,
2194                    precision: None,
2195                };
2196                pad_int(&mut buf, &sign, &digits, &no_prec_spec);
2197            }
2198            b'f' | b'e' | b'E' | b'g' | b'G' => {
2199                check_conv_spec(state, form, FMT_FLAGS_F, true)?;
2200                let n = state.check_arg_number(arg)?;
2201                let body = format_float(n, conv, &spec);
2202                let (sign, digits): (Vec<u8>, Vec<u8>) = if !body.is_empty() && (body[0] == b'-' || body[0] == b'+') {
2203                    (vec![body[0]], body[1..].to_vec())
2204                } else if n >= 0.0 && spec.plus_sign {
2205                    (b"+".to_vec(), body)
2206                } else if n >= 0.0 && spec.space_sign {
2207                    (b" ".to_vec(), body)
2208                } else {
2209                    (Vec::new(), body)
2210                };
2211                let no_prec_spec = FmtSpec {
2212                    left_align: spec.left_align,
2213                    plus_sign: spec.plus_sign,
2214                    space_sign: spec.space_sign,
2215                    alt_form: spec.alt_form,
2216                    zero_pad: spec.zero_pad,
2217                    width: spec.width,
2218                    precision: None,
2219                };
2220                pad_int(&mut buf, &sign, &digits, &no_prec_spec);
2221            }
2222            b'p' => {
2223                check_conv_spec(state, form, FMT_FLAGS_C, false)?;
2224                let s: Vec<u8> = match lua_vm::api::to_pointer(state, arg) {
2225                    Some(p) => format!("0x{:x}", p).into_bytes(),
2226                    None => b"(null)".to_vec(),
2227                };
2228                pad_str(&mut buf, &s, &FmtSpec { precision: None, ..spec });
2229            }
2230            b'q' => {
2231                if form.len() > 2 {
2232                    return Err(LuaError::runtime(format_args!(
2233                        "specifier '%q' cannot have modifiers"
2234                    )));
2235                }
2236                addliteral(state, &mut buf, arg)?;
2237            }
2238            b's' => {
2239                check_conv_spec(state, form, FMT_FLAGS_C, true)?;
2240                let s = state.to_display_string(arg)?;
2241                let has_modifiers = spec.width != 0 || spec.precision.is_some();
2242                if has_modifiers && s.contains(&0u8) {
2243                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string contains zeros"));
2244                }
2245                pad_str(&mut buf, &s, &spec);
2246                state.pop_n(1);
2247            }
2248            _ => {
2249                let verb: &[u8] = if state.global().lua_version == lua_types::LuaVersion::V53 {
2250                    b"option"
2251                } else {
2252                    b"conversion"
2253                };
2254                let mut msg = b"invalid ".to_vec();
2255                msg.extend_from_slice(verb);
2256                msg.extend_from_slice(b" '");
2257                msg.extend_from_slice(form);
2258                msg.extend_from_slice(b"' to 'format'");
2259                return Err(lua_vm::debug::c_api_runtime(state, msg));
2260            }
2261        }
2262    }
2263
2264    state.push_bytes(&buf)?;
2265    Ok(1)
2266}
2267
2268// ────────────────────────────────────────────────────────────────────────────
2269// §8  Pack / unpack
2270// ────────────────────────────────────────────────────────────────────────────
2271
2272/// Return `true` if `c` is an ASCII digit.
2273fn is_digit(c: u8) -> bool {
2274    c.is_ascii_digit()
2275}
2276
2277/// Read an optional integer from the format string, returning `df` if absent.
2278///
2279/// `wide` selects the accumulator width: 5.3/5.4 used `int` (cap `i32::MAX`);
2280/// 5.5 uses `size_t` (cap the host pointer width). The reference stops consuming
2281/// digits once another `*10 + 9` would overflow, leaving the rest to be read as
2282/// the next option — which is why `c<int-overflow>` yields "invalid format
2283/// option '<digit>'" on 5.3/5.4 but parses cleanly on 5.5.
2284fn getnum(fmt: &[u8], pos: &mut usize, df: i64, wide: bool) -> i64 {
2285    if *pos >= fmt.len() || !is_digit(fmt[*pos]) {
2286        return df;
2287    }
2288    let cap: i64 = if wide { i64::MAX } else { i32::MAX as i64 };
2289    let mut a = 0i64;
2290    while *pos < fmt.len() && is_digit(fmt[*pos]) {
2291        a = a * 10 + (fmt[*pos] - b'0') as i64;
2292        *pos += 1;
2293        if a > (cap - 9) / 10 {
2294            break;
2295        }
2296    }
2297    a
2298}
2299
2300/// Read an integer from the format string, error if out of `[1, MAXINTSIZE]`.
2301///
2302fn getnumlimit(fmt: &[u8], pos: &mut usize, df: i64) -> Result<usize, LuaError> {
2303    let sz = getnum(fmt, pos, df, false);
2304    if sz > MAX_INT_SIZE as i64 || sz <= 0 {
2305        return Err(LuaError::runtime(format_args!(
2306            "integral size ({}) out of limits [1,{}]",
2307            sz, MAX_INT_SIZE
2308        )));
2309    }
2310    Ok(sz as usize)
2311}
2312
2313/// Read and classify the next pack format option, filling `size`.
2314///
2315fn getoption(h: &mut Header, fmt: &[u8], pos: &mut usize, size: &mut usize) -> Result<KOption, LuaError> {
2316    // In Rust, the native max-align of a union of f64/void*/size_t is 8 on 64-bit.
2317    const NATIVE_MAX_ALIGN: usize = std::mem::align_of::<f64>();
2318
2319    if *pos >= fmt.len() {
2320        return Ok(KOption::Nop);
2321    }
2322    let opt = fmt[*pos];
2323    *pos += 1;
2324    *size = 0;
2325
2326    match opt {
2327        b'b' => { *size = 1; Ok(KOption::Int) }
2328        b'B' => { *size = 1; Ok(KOption::Uint) }
2329        b'h' => { *size = 2; Ok(KOption::Int) }
2330        b'H' => { *size = 2; Ok(KOption::Uint) }
2331        b'l' => { *size = 8; Ok(KOption::Int) }  // sizeof(long) on 64-bit
2332        b'L' => { *size = 8; Ok(KOption::Uint) }
2333        b'j' => { *size = SZINT; Ok(KOption::Int) }
2334        b'J' => { *size = SZINT; Ok(KOption::Uint) }
2335        b'T' => { *size = std::mem::size_of::<usize>(); Ok(KOption::Uint) }
2336        b'f' => { *size = 4; Ok(KOption::Float) }
2337        b'n' => { *size = 8; Ok(KOption::Number) }  // sizeof(lua_Number) = sizeof(f64) = 8
2338        b'd' => { *size = 8; Ok(KOption::Double) }  // sizeof(double) = 8
2339        b'i' => { *size = getnumlimit(fmt, pos, 4)?; Ok(KOption::Int) }
2340        b'I' => { *size = getnumlimit(fmt, pos, 4)?; Ok(KOption::Uint) }
2341        b's' => { *size = getnumlimit(fmt, pos, std::mem::size_of::<usize>()  as i64)?; Ok(KOption::Kstring) }
2342        b'c' => {
2343            let n = getnum(fmt, pos, -1, h.wide_size);
2344            if n == -1 {
2345                return Err(LuaError::runtime(format_args!("missing size for format option 'c'")));
2346            }
2347            *size = n as usize;
2348            Ok(KOption::Char)
2349        }
2350        b'z' => Ok(KOption::Zstr),
2351        b'x' => { *size = 1; Ok(KOption::Padding) }
2352        b'X' => Ok(KOption::Paddalign),
2353        b' ' => Ok(KOption::Nop),
2354        b'<' => { h.is_little = true; Ok(KOption::Nop) }
2355        b'>' => { h.is_little = false; Ok(KOption::Nop) }
2356        b'=' => { h.is_little = cfg!(target_endian = "little"); Ok(KOption::Nop) }
2357        b'!' => {
2358            let n = getnum(fmt, pos, NATIVE_MAX_ALIGN as i64, false);
2359            h.max_align = getnumlimit(fmt, pos, n)?;
2360            Ok(KOption::Nop)
2361        }
2362        _ => Err(LuaError::runtime(format_args!("invalid format option '{}'", opt as char)))
2363    }
2364}
2365
2366/// Get full details about the next format option, including alignment padding.
2367///
2368fn getdetails(
2369    state: &mut LuaState,
2370    h: &mut Header,
2371    total_size: usize,
2372    fmt: &[u8],
2373    pos: &mut usize,
2374    psize: &mut usize,
2375    ntoalign: &mut usize,
2376) -> Result<KOption, LuaError> {
2377    let opt = getoption(h, fmt, pos, psize)?;
2378    let mut align = *psize;
2379
2380    if opt == KOption::Paddalign {
2381        if *pos >= fmt.len() {
2382            return Err(lua_vm::debug::arg_error_impl(state, 1, b"invalid next option for option 'X'"));
2383        }
2384        let mut dummy_size = 0usize;
2385        let next_opt = getoption(h, fmt, pos, &mut dummy_size)?;
2386        align = dummy_size;
2387        if next_opt == KOption::Char || align == 0 {
2388            return Err(lua_vm::debug::arg_error_impl(state, 1, b"invalid next option for option 'X'"));
2389        }
2390    }
2391
2392    if align <= 1 || opt == KOption::Char {
2393        *ntoalign = 0;
2394    } else {
2395        if align > h.max_align {
2396            align = h.max_align;
2397        }
2398        if (align & (align - 1)) != 0 {
2399            return Err(lua_vm::debug::arg_error_impl(state, 1, b"format asks for alignment not power of 2"));
2400        }
2401        *ntoalign = (align - (total_size & (align - 1))) & (align - 1);
2402    }
2403    Ok(opt)
2404}
2405
2406/// Pack integer `n` with `size` bytes into `buf` with given endianness.
2407///
2408fn packint(buf: &mut Vec<u8>, mut n: u64, is_little: bool, size: usize, neg: bool) {
2409    let start = buf.len();
2410    buf.resize(start + size, 0);
2411    let slice = &mut buf[start..start + size];
2412    // Write LSB first (little-endian), then swap if big-endian
2413    for i in 0..size {
2414        slice[if is_little { i } else { size - 1 - i }] = (n & MC as u64) as u8;
2415        n >>= NB;
2416    }
2417    // Sign extension for negative numbers larger than lua_Integer
2418    if neg && size > SZINT {
2419        for i in SZINT..size {
2420            slice[if is_little { i } else { size - 1 - i }] = MC;
2421        }
2422    }
2423}
2424
2425/// Copy bytes with endianness correction.
2426///
2427fn copywithendian(dest: &mut [u8], src: &[u8], is_little: bool) {
2428    debug_assert_eq!(dest.len(), src.len());
2429    if is_little == cfg!(target_endian = "little") {
2430        dest.copy_from_slice(src);
2431    } else {
2432        for (d, s) in dest.iter_mut().zip(src.iter().rev()) {
2433            *d = *s;
2434        }
2435    }
2436}
2437
2438/// Unpack a (possibly signed) integer from `data[0..size]`.
2439///
2440fn unpackint(_state: &LuaState, data: &[u8], is_little: bool, size: usize, is_signed: bool) -> Result<i64, LuaError> {
2441    let limit = size.min(SZINT);
2442    let mut res: u64 = 0;
2443    for i in (0..limit).rev() {
2444        res <<= NB;
2445        let byte_idx = if is_little { i } else { size - 1 - i };
2446        res |= data[byte_idx] as u64;
2447    }
2448
2449    if size < SZINT {
2450        if is_signed {
2451            let mask: u64 = 1u64 << (size * NB as usize - 1);
2452            res = (res ^ mask).wrapping_sub(mask);
2453        }
2454    } else if size > SZINT {
2455        let mask = if !is_signed || (res as i64) >= 0 { 0u8 } else { MC };
2456        for i in limit..size {
2457            let byte_idx = if is_little { i } else { size - 1 - i };
2458            if data[byte_idx] != mask {
2459                return Err(LuaError::runtime(format_args!(
2460                    "{}-byte integer does not fit into Lua Integer", size
2461                )));
2462            }
2463        }
2464    }
2465    Ok(res as i64)
2466}
2467
2468/// `string.pack(fmt, ...)` — pack values into a binary string.
2469///
2470pub fn str_pack(state: &mut LuaState) -> Result<usize, LuaError> {
2471    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2472    let fmt = &fmt_bytes[..];
2473    let mut h = Header::new(state.global().lua_version == lua_types::LuaVersion::V55);
2474    let mut arg = 1i32;
2475    let mut total_size = 0usize;
2476    let mut buf: Vec<u8> = Vec::new();
2477    let mut pos = 0usize;
2478
2479    while pos < fmt.len() {
2480        let mut size = 0usize;
2481        let mut ntoalign = 0usize;
2482        let opt = getdetails(state, &mut h, total_size, fmt, &mut pos, &mut size, &mut ntoalign)?;
2483        // 5.5 `str_pack` rejects an oversized running total ("result too long")
2484        // BEFORE consuming the value argument; 5.3/5.4 have no such check (their
2485        // `int` sizes cannot reach the limit). MAX_SIZE is the host pointer width.
2486        if h.wide_size {
2487            let space = ntoalign + size;
2488            if space > (i64::MAX as usize) || total_size > (i64::MAX as usize) - space {
2489                return Err(lua_vm::debug::arg_error_impl(state, arg, b"result too long"));
2490            }
2491        }
2492        total_size += ntoalign + size;
2493        for _ in 0..ntoalign {
2494            buf.push(PACK_PAD_BYTE);
2495        }
2496        arg += 1;
2497
2498        match opt {
2499            KOption::Int => {
2500                let n = state.check_arg_integer(arg)?;
2501                if size < SZINT {
2502                    let lim: i64 = 1i64 << (size * NB as usize - 1);
2503                    if !(-lim <= n && n < lim) {
2504                        return Err(lua_vm::debug::arg_error_impl(state, arg, b"integer overflow"));
2505                    }
2506                }
2507                packint(&mut buf, n as u64, h.is_little, size, n < 0);
2508            }
2509            KOption::Uint => {
2510                let n = state.check_arg_integer(arg)?;
2511                if size < SZINT {
2512                    let lim: u64 = 1u64 << (size * NB as usize);
2513                    if (n as u64) >= lim {
2514                        return Err(lua_vm::debug::arg_error_impl(state, arg, b"unsigned overflow"));
2515                    }
2516                }
2517                packint(&mut buf, n as u64, h.is_little, size, false);
2518            }
2519            KOption::Float => {
2520                let f = state.check_arg_number(arg)? as f32;
2521                let start = buf.len();
2522                buf.resize(start + 4, 0);
2523                copywithendian(&mut buf[start..start + 4], &f.to_bits().to_ne_bytes(), h.is_little);
2524            }
2525            KOption::Number => {
2526                let f = state.check_arg_number(arg)?;
2527                let start = buf.len();
2528                buf.resize(start + 8, 0);
2529                copywithendian(&mut buf[start..start + 8], &f.to_bits().to_ne_bytes(), h.is_little);
2530            }
2531            KOption::Double => {
2532                let f = state.check_arg_number(arg)? as f64;
2533                let start = buf.len();
2534                buf.resize(start + 8, 0);
2535                copywithendian(&mut buf[start..start + 8], &f.to_bits().to_ne_bytes(), h.is_little);
2536            }
2537            KOption::Char => {
2538                let s = state.check_arg_string(arg)?.to_vec();
2539                if s.len() > size {
2540                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string longer than given size"));
2541                }
2542                buf.extend_from_slice(&s);
2543                let pad = size - s.len();
2544                for _ in 0..pad {
2545                    buf.push(PACK_PAD_BYTE);
2546                }
2547            }
2548            KOption::Kstring => {
2549                let s = state.check_arg_string(arg)?.to_vec();
2550                let len = s.len();
2551                if size < SZINT && len >= (1usize << (size * 8)) {
2552                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string length does not fit in given size"));
2553                }
2554                packint(&mut buf, len as u64, h.is_little, size, false);
2555                buf.extend_from_slice(&s);
2556                total_size += len;
2557            }
2558            KOption::Zstr => {
2559                let s = state.check_arg_string(arg)?.to_vec();
2560                if s.contains(&0) {
2561                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string contains zeros"));
2562                }
2563                buf.extend_from_slice(&s);
2564                buf.push(0);
2565                total_size += s.len() + 1;
2566            }
2567            KOption::Padding => {
2568                buf.push(PACK_PAD_BYTE);
2569                arg -= 1; // undo increment
2570            }
2571            KOption::Paddalign | KOption::Nop => {
2572                arg -= 1; // undo increment
2573            }
2574        }
2575    }
2576
2577    state.push_bytes(&buf)?;
2578    Ok(1)
2579}
2580
2581/// `string.packsize(fmt)` — return the byte-size the format would produce.
2582///
2583pub fn str_packsize(state: &mut LuaState) -> Result<usize, LuaError> {
2584    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2585    let fmt = &fmt_bytes[..];
2586    let mut h = Header::new(state.global().lua_version == lua_types::LuaVersion::V55);
2587    let mut total_size = 0usize;
2588    let mut pos = 0usize;
2589
2590    while pos < fmt.len() {
2591        let mut size = 0usize;
2592        let mut ntoalign = 0usize;
2593        let opt = getdetails(state, &mut h, total_size, fmt, &mut pos, &mut size, &mut ntoalign)?;
2594        if opt == KOption::Kstring || opt == KOption::Zstr {
2595            return Err(lua_vm::debug::arg_error_impl(state, 1, b"variable-length format"));
2596        }
2597        let space = ntoalign + size;
2598        let max_total: usize = if h.wide_size {
2599            i64::MAX as usize
2600        } else {
2601            PACK_MAXSIZE
2602        };
2603        if space > max_total || total_size > max_total - space {
2604            return Err(lua_vm::debug::arg_error_impl(state, 1, b"format result too large"));
2605        }
2606        total_size += space;
2607    }
2608    state.push(LuaValue::Int(total_size as i64));
2609    Ok(1)
2610}
2611
2612/// `string.unpack(fmt, s [, pos])` — unpack binary data from string.
2613///
2614pub fn str_unpack(state: &mut LuaState) -> Result<usize, LuaError> {
2615    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2616    let data_bytes = state.check_arg_string(2)?.to_vec();
2617    let ld = data_bytes.len();
2618    let pos_raw = state.opt_arg_integer(3, 1)?;
2619    let mut pos = if matches!(state.global().lua_version, lua_types::LuaVersion::V53) {
2620        posrelat_53(pos_raw, ld).wrapping_sub(1)
2621    } else {
2622        pos_relat_i(pos_raw, ld).saturating_sub(1)
2623    };
2624
2625    if pos > ld {
2626        return Err(lua_vm::debug::arg_error_impl(state, 3, b"initial position out of string"));
2627    }
2628
2629    let fmt = &fmt_bytes[..];
2630    let data = &data_bytes[..];
2631    let mut h = Header::new(state.global().lua_version == lua_types::LuaVersion::V55);
2632    let mut fmt_pos = 0usize;
2633    let mut n = 0usize;
2634
2635    while fmt_pos < fmt.len() {
2636        let mut size = 0usize;
2637        let mut ntoalign = 0usize;
2638        let opt = getdetails(state, &mut h, pos, fmt, &mut fmt_pos, &mut size, &mut ntoalign)?;
2639
2640        if ntoalign + size > ld - pos {
2641            return Err(lua_vm::debug::arg_error_impl(state, 2, b"data string too short"));
2642        }
2643        pos += ntoalign;
2644        state.ensure_stack(2, "too many results")?;
2645        n += 1;
2646
2647        match opt {
2648            KOption::Int => {
2649                let v = unpackint(state, &data[pos..pos + size], h.is_little, size, true)?;
2650                state.push(LuaValue::Int(v));
2651            }
2652            KOption::Uint => {
2653                let v = unpackint(state, &data[pos..pos + size], h.is_little, size, false)?;
2654                state.push(LuaValue::Int(v));
2655            }
2656            KOption::Float => {
2657                let mut bytes = [0u8; 4];
2658                copywithendian(&mut bytes, &data[pos..pos + 4], h.is_little);
2659                let f = f32::from_bits(u32::from_ne_bytes(bytes));
2660                state.push(LuaValue::Float(f as f64));
2661            }
2662            KOption::Number => {
2663                let mut bytes = [0u8; 8];
2664                copywithendian(&mut bytes, &data[pos..pos + 8], h.is_little);
2665                let f = f64::from_bits(u64::from_ne_bytes(bytes));
2666                state.push(LuaValue::Float(f));
2667            }
2668            KOption::Double => {
2669                let mut bytes = [0u8; 8];
2670                copywithendian(&mut bytes, &data[pos..pos + 8], h.is_little);
2671                let f = f64::from_bits(u64::from_ne_bytes(bytes));
2672                state.push(LuaValue::Float(f));
2673            }
2674            KOption::Char => {
2675                state.push_bytes(&data[pos..pos + size])?;
2676            }
2677            KOption::Kstring => {
2678                let len = unpackint(state, &data[pos..pos + size], h.is_little, size, false)? as usize;
2679                if len > ld - pos - size {
2680                    return Err(lua_vm::debug::arg_error_impl(state, 2, b"data string too short"));
2681                }
2682                state.push_bytes(&data[pos + size..pos + size + len])?;
2683                pos += len;
2684            }
2685            KOption::Zstr => {
2686                let found = data[pos..].iter().position(|&b| b == 0);
2687                let end = match found {
2688                    Some(e) => e,
2689                    None => return Err(lua_vm::debug::arg_error_impl(state, 2, b"unfinished string for format 'z'")),
2690                };
2691                if pos + end >= ld {
2692                    return Err(lua_vm::debug::arg_error_impl(state, 2, b"unfinished string for format 'z'"));
2693                }
2694                state.push_bytes(&data[pos..pos + end])?;
2695                pos += end + 1;
2696            }
2697            KOption::Paddalign | KOption::Padding | KOption::Nop => {
2698                n -= 1; // undo increment
2699            }
2700        }
2701        pos += size;
2702    }
2703
2704    state.push(LuaValue::Int((pos + 1) as i64));
2705    Ok(n + 1)
2706}
2707
2708// ────────────────────────────────────────────────────────────────────────────
2709// §9  Module registration
2710// ────────────────────────────────────────────────────────────────────────────
2711
2712/// Function table for `string` library.
2713///
2714pub const STRING_LIB: &[(&[u8], lua_CFunction)] = &[
2715    (b"byte",     str_byte),
2716    (b"char",     str_char),
2717    (b"dump",     str_dump),
2718    (b"find",     str_find),
2719    (b"format",   str_format),
2720    (b"gmatch",   gmatch),
2721    (b"gsub",     str_gsub),
2722    (b"len",      str_len),
2723    (b"lower",    str_lower),
2724    (b"match",    str_match),
2725    (b"rep",      str_rep),
2726    (b"reverse",  str_reverse),
2727    (b"sub",      str_sub),
2728    (b"upper",    str_upper),
2729    (b"pack",     str_pack),
2730    (b"packsize", str_packsize),
2731    (b"unpack",   str_unpack),
2732];
2733
2734/// Metamethods to install on the string metatable.
2735///
2736pub const STRING_META_METHODS: &[(&[u8], lua_CFunction)] = &[
2737    (b"__add",  arith_add),
2738    (b"__sub",  arith_sub),
2739    (b"__mul",  arith_mul),
2740    (b"__mod",  arith_mod),
2741    (b"__pow",  arith_pow),
2742    (b"__div",  arith_div),
2743    (b"__idiv", arith_idiv),
2744    (b"__unm",  arith_unm),
2745];
2746
2747/// Create the string metatable and set it as the metatable for all strings.
2748///
2749pub fn createmetatable(state: &mut LuaState) -> Result<(), LuaError> {
2750    state.new_lib_table(STRING_META_METHODS)?;
2751    state.set_funcs(STRING_META_METHODS, 0)?;
2752    state.push_string(b"")?;
2753    let mt_idx = state.top_idx() - 2;
2754    let mt = state.get_at(mt_idx);
2755    state.push(mt);
2756    state.set_metatable(-2)?;
2757    state.pop_n(1);
2758    let strlib_idx = state.top_idx() - 2;
2759    let strlib = state.get_at(strlib_idx);
2760    state.push(strlib);
2761    state.set_field(-2, b"__index")?;
2762    state.pop_n(1);
2763    Ok(())
2764}
2765
2766/// `luaopen_string` — open the string library.
2767///
2768pub fn luaopen_string(state: &mut LuaState) -> Result<usize, LuaError> {
2769    state.new_lib(STRING_LIB)?;
2770    // Lua 5.1 carries `string.gfind`, the pre-5.0 name for `gmatch` (an exact
2771    // alias). It was removed in 5.2. Verified against lua5.1.5:
2772    // `type(string.gfind)` == "function" and it iterates identically to
2773    // `gmatch`. See specs/followup/5.1-roster-syntax.md §1.
2774    if matches!(state.global().lua_version, lua_types::LuaVersion::V51) {
2775        state.push_c_function(gmatch)?;
2776        state.set_field(-2, b"gfind")?;
2777    }
2778    createmetatable(state)?;
2779    Ok(1)
2780}
2781
2782// ────────────────────────────────────────────────────────────────────────────
2783// PORT STATUS
2784//   source:        src/lstrlib.c  (1875 lines, 46 functions)
2785//   target_crate:  lua-stdlib
2786//   confidence:    medium
2787//   todos:         13
2788//   port_notes:    6
2789//   unsafe_blocks: 0
2790//   notes:         Pattern engine uses index-based MatchState (not raw ptrs).
2791//                  string.format delegates numeric widths/precision/flags to
2792//                  Phase B (a sprintf-compatible crate or manual impl).
2793//                  gmatch iterator state holds a 4-element Lua table in the
2794//                  closure's single upvalue (src, pat, pos, lastmatch) instead
2795//                  of the C-Lua GMatchState userdata, because Phase-A
2796//                  LuaCClosure upvalues are immutable. See gmatch_aux.
2797//                  copywithendian uses safe byte-level swapping (no transmute).
2798//                  unpackint sign-extension uses two's-complement bit tricks;
2799//                  logic review needed in Phase B.
2800//                  str_dump requires state.dump_function() which is not yet
2801//                  defined; Phase B wires up the ldump.c port.
2802//                  addquoted uses 3-digit escape for all control chars (slight
2803//                  deviation from C which uses 1-digit when safe); benign.
2804//                  str_len/str_sub/str_byte/str_reverse/str_lower/str_upper/
2805//                  str_rep/gmatch/str_find_aux borrow source bytes through
2806//                  to_lua_string (GcRef) instead of copying via
2807//                  check_arg_string, mirroring the gmatch_aux fix (685482d).
2808//                  string_ops 3.00x→2.00x, string_ops_long 2.25x→1.48x on
2809//                  best-of-5 (Apple M3 Max).
2810//                  gmatch_aux reads / writes its 4-slot state table directly
2811//                  through LuaTableRefExt::{get_int, raw_set_int} after a
2812//                  single value_at(upvalue_index(1)) resolution, replacing
2813//                  six raw_geti / raw_seti + four to_lua_string / to_integer_x
2814//                  calls that each re-resolved the stack index via
2815//                  index_to_value. Drops string_ops_long 1.58x→1.38x
2816//                  (below the 1.5x parity threshold) and index_to_value share
2817//                  9.4%→2.0% on Apple M3 Max best-of-5.
2818// ────────────────────────────────────────────────────────────────────────────