Skip to main content

lua_stdlib/
string_lib.rs

1//! Standard library for string operations and pattern-matching.
2//!
3//! Port of `lstrlib.c` (Lua 5.4.7, 1875 lines, 46 functions).
4//!
5//! Sections:
6//!   1. Basic string operations (byte, char, find, format, gmatch, gsub, len,
7//!      lower, match, rep, reverse, sub, upper)
8//!   2. Pattern-matching engine (MatchState + recursive matcher)
9//!   3. String format (`string.format`)
10//!   4. Pack / unpack (`string.pack`, `string.packsize`, `string.unpack`)
11//!   5. Module registration (`luaopen_string`)
12
13use std::any::Any;
14use std::cell::RefCell;
15use std::rc::Rc;
16
17use lua_types::error::LuaError;
18use lua_types::value::LuaValue;
19use lua_types::arith::ArithOp;
20use lua_types::{LuaType};
21use crate::state_stub::{LuaState, LuaStateStubExt as _, lua_CFunction, upvalue_index};
22
23// ────────────────────────────────────────────────────────────────────────────
24// Constants
25// ────────────────────────────────────────────────────────────────────────────
26
27const LUA_MAX_CAPTURES: usize = 32;
28
29const MAX_CC_CALLS: i32 = 200;
30
31const L_ESC: u8 = b'%';
32
33const SPECIALS: &[u8] = b"^$*+?.([%-";
34
35const CAP_UNFINISHED: isize = -1;
36
37const CAP_POSITION: isize = -2;
38
39#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
40const MAX_ITEM: usize = 120;
41
42#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
43const MAX_ITEM_F: usize = 418;
44
45#[expect(dead_code, reason = "ported stdlib helper; not yet wired into the runtime")]
46const MAX_FORMAT: usize = 32;
47
48const MAX_INT_SIZE: usize = 16;
49
50// On platforms where size_t is at least as wide as int (all our targets), this
51// collapses to INT_MAX so that packed sizes round-trip through a Lua integer
52// without ambiguity.
53const PACK_MAXSIZE: usize = i32::MAX as usize;
54
55const NB: u32 = 8;
56
57const MC: u8 = 0xFF;
58
59const SZINT: usize = 8; // sizeof(i64) == 8
60
61const PACK_PAD_BYTE: u8 = 0x00;
62
63// ────────────────────────────────────────────────────────────────────────────
64// Pattern-matching types
65// ────────────────────────────────────────────────────────────────────────────
66
67/// One capture record inside MatchState.
68///
69/// In Rust, `init` is an index into `MatchState::src`; `len` is either a
70/// non-negative actual length, `CAP_UNFINISHED`, or `CAP_POSITION`.
71#[derive(Copy, Clone)]
72struct Capture {
73    /// Index into the source slice where this capture started.
74    init: usize,
75    /// CAP_UNFINISHED, CAP_POSITION, or non-negative byte count.
76    len: isize,
77}
78
79impl Default for Capture {
80    fn default() -> Self {
81        Capture { init: 0, len: CAP_UNFINISHED }
82    }
83}
84
85/// State threaded through the recursive pattern-matcher.
86///
87/// Raw C pointers replaced by indices into `src` / `pat` slices.
88struct MatchState<'a> {
89    /// Source string being searched.
90    src: &'a [u8],
91    /// Pattern string.
92    pat: &'a [u8],
93    /// Recursion depth counter; decremented on entry, incremented on return.
94    matchdepth: i32,
95    /// Number of capture records currently in use.
96    level: u8,
97    /// Capture records indexed `0..level`.
98    captures: [Capture; LUA_MAX_CAPTURES],
99    /// Total `match_pat` invocations across the whole operation. Used to bound
100    /// catastrophic backtracking under a sandbox; charged against the
101    /// instruction budget by the caller.
102    steps: u64,
103    /// Maximum `steps` before the matcher stops. `0` means unlimited (no active
104    /// instruction budget), preserving non-sandboxed behavior exactly.
105    step_limit: u64,
106    /// Set when `step_limit` is reached; the matcher then unwinds to the caller,
107    /// which charges the budget and raises the uncatchable sandbox abort.
108    aborted: bool,
109}
110
111impl<'a> MatchState<'a> {
112    fn new(src: &'a [u8], pat: &'a [u8], step_limit: u64) -> Self {
113        MatchState {
114            src,
115            pat,
116            matchdepth: MAX_CC_CALLS,
117            level: 0,
118            captures: [Capture::default(); LUA_MAX_CAPTURES],
119            steps: 0,
120            step_limit,
121            aborted: false,
122        }
123    }
124
125    fn reset_level(&mut self) {
126        self.level = 0;
127        debug_assert!(self.matchdepth == MAX_CC_CALLS);
128    }
129}
130
131struct GMatchIterState {
132    /// Current source position as a zero-based byte index.
133    pos: usize,
134    /// End of the last match, used to avoid zero-length infinite loops.
135    last_match: Option<usize>,
136}
137
138// ────────────────────────────────────────────────────────────────────────────
139// Pack/unpack types
140// ────────────────────────────────────────────────────────────────────────────
141
142/// Pack/unpack format option.
143///
144#[derive(Debug, Clone, Copy, PartialEq, Eq)]
145enum KOption {
146    Int,        // signed integers
147    Uint,       // unsigned integers
148    Float,      // single-precision float (C float)
149    Number,     // Lua native float (lua_Number = f64)
150    Double,     // double-precision float (C double)
151    Char,       // fixed-length string
152    Kstring,    // string with length prefix
153    Zstr,       // zero-terminated string
154    Padding,    // padding byte (x)
155    Paddalign,  // padding to alignment (X)
156    Nop,        // no-op (space, <, >, =, !)
157}
158
159/// Header state for pack/unpack format parsing.
160///
161struct Header {
162    is_little: bool,
163    max_align: usize,
164    /// 5.5 widened `c`/`s`-size parsing from `int` (5.3/5.4) to `size_t`, so
165    /// `c<huge>` numerals that overflowed `int` (and tripped "invalid format
166    /// option '<digit>'") are now accepted up to `LUA_MAXINTEGER`.
167    wide_size: bool,
168}
169
170impl Header {
171    fn new(wide_size: bool) -> Self {
172        Header {
173            is_little: cfg!(target_endian = "little"),
174            max_align: 1,
175            wide_size,
176        }
177    }
178}
179
180// ────────────────────────────────────────────────────────────────────────────
181// §1  Basic string helpers
182// ────────────────────────────────────────────────────────────────────────────
183
184/// Translate a relative initial string position: negative means back from end;
185/// result is clipped to `[1, ∞)`.
186///
187fn pos_relat_i(pos: i64, len: usize) -> usize {
188    if pos > 0 {
189        pos as usize
190    } else if pos == 0 {
191        1
192    } else if pos < -(len as i64) {
193        1
194    } else {
195        len.wrapping_add(pos as usize).wrapping_add(1)
196    }
197}
198
199/// Translate a relative position using Lua 5.3's `posrelat` (`lstrlib.c` 5.3):
200/// non-negatives pass through, an out-of-range negative clamps to `0`, and an
201/// in-range negative counts back from the end. Unlike `posrelat_i`, `0` stays
202/// `0`; `string.unpack` then subtracts one, underflowing into the
203/// "initial position out of string" guard exactly as the 5.3 reference does.
204///
205fn posrelat_53(pos: i64, len: usize) -> usize {
206    if pos >= 0 {
207        pos as usize
208    } else if (pos as i128).unsigned_abs() > len as u128 {
209        0
210    } else {
211        (len as i64 + pos + 1) as usize
212    }
213}
214
215/// Get an optional ending string position from argument `arg`, default `def`.
216/// Negative means back from end; clipped to `[0, len]`.
217///
218fn get_end_pos(pos: i64, len: usize) -> usize {
219    if pos > len as i64 {
220        len
221    } else if pos >= 0 {
222        pos as usize
223    } else if pos < -(len as i64) {
224        0
225    } else {
226        len.wrapping_add(pos as usize).wrapping_add(1)
227    }
228}
229
230// ────────────────────────────────────────────────────────────────────────────
231// §2  Exported string functions (registered in strlib[])
232// ────────────────────────────────────────────────────────────────────────────
233
234/// `string.len(s)` — return byte-length of `s`.
235///
236///
237/// Reads only the byte-length, never the bytes themselves, so go through
238/// `to_lua_string_len` (which never copies) rather than `check_arg_string`
239/// (which `to_vec`s the entire payload only for `.len()` to throw it away).
240pub fn str_len(state: &mut LuaState) -> Result<usize, LuaError> {
241    let l = match state.to_lua_string_len(1) {
242        Some(n) => n,
243        None => {
244            state.check_arg_string(1)?;
245            unreachable!("check_arg_string raises when arg #1 is not a string");
246        }
247    };
248    state.push(LuaValue::Int(l as i64));
249    Ok(1)
250}
251
252/// `string.sub(s, i [, j])` — return substring.
253///
254///
255/// Borrow through `to_lua_string` so the full source string is not copied just
256/// to slice a (typically small) substring out of it. The `GcRef` keeps the
257/// bytes rooted across the `check_arg_integer` / `opt_arg_integer` calls (none
258/// of which can collect the string at arg #1).
259pub fn str_sub(state: &mut LuaState) -> Result<usize, LuaError> {
260    let s_ref = match state.to_lua_string(1) {
261        Some(r) => r,
262        None => {
263            state.check_arg_string(1)?;
264            unreachable!("check_arg_string raises when arg #1 is not a string");
265        }
266    };
267    let s: &[u8] = s_ref.as_bytes();
268    let l = s.len();
269    let start = pos_relat_i(state.check_arg_integer(2)?, l);
270    let end_pos_raw = state.opt_arg_integer(3, -1)?;
271    let end = get_end_pos(end_pos_raw, l);
272    if start <= end {
273        let slice = &s[(start - 1)..end];
274        state.push_string(slice)?;
275    } else {
276        state.push_string(b"")?;
277    }
278    Ok(1)
279}
280
281/// `string.reverse(s)` — return string with bytes reversed.
282///
283///
284/// Borrow the source bytes; the previous `check_arg_string` made a full owned
285/// copy that was discarded after the single iteration.
286pub fn str_reverse(state: &mut LuaState) -> Result<usize, LuaError> {
287    let s_ref = match state.to_lua_string(1) {
288        Some(r) => r,
289        None => {
290            state.check_arg_string(1)?;
291            unreachable!("check_arg_string raises when arg #1 is not a string");
292        }
293    };
294    let s: &[u8] = s_ref.as_bytes();
295    let buf: Vec<u8> = s.iter().copied().rev().collect();
296    state.push_bytes(&buf)?;
297    Ok(1)
298}
299
300/// `string.lower(s)` — return lowercase copy.
301///
302///
303/// Borrow the source bytes; one allocation (the output `Vec`) is unavoidable,
304/// but the intermediate copy from `check_arg_string` was not.
305pub fn str_lower(state: &mut LuaState) -> Result<usize, LuaError> {
306    let s_ref = match state.to_lua_string(1) {
307        Some(r) => r,
308        None => {
309            state.check_arg_string(1)?;
310            unreachable!("check_arg_string raises when arg #1 is not a string");
311        }
312    };
313    let s: &[u8] = s_ref.as_bytes();
314    let buf: Vec<u8> = s.iter().map(|&c| c.to_ascii_lowercase()).collect();
315    state.push_bytes(&buf)?;
316    Ok(1)
317}
318
319/// `string.upper(s)` — return uppercase copy.
320///
321///
322/// Borrow the source bytes; called as the `string.gsub` replacement function
323/// in `string_ops_long` ~700k times against `%w+` matches, so the intermediate
324/// copy from `check_arg_string` added up.
325pub fn str_upper(state: &mut LuaState) -> Result<usize, LuaError> {
326    let s_ref = match state.to_lua_string(1) {
327        Some(r) => r,
328        None => {
329            state.check_arg_string(1)?;
330            unreachable!("check_arg_string raises when arg #1 is not a string");
331        }
332    };
333    let s: &[u8] = s_ref.as_bytes();
334    let buf: Vec<u8> = s.iter().map(|&c| c.to_ascii_uppercase()).collect();
335    state.push_bytes(&buf)?;
336    Ok(1)
337}
338
339/// `string.rep(s, n [, sep])` — return `n` copies of `s` separated by `sep`.
340///
341///
342/// Borrow `s` through `to_lua_string`. The previous version did the
343/// `check_arg_string` copy and then a second redundant `s.to_vec()` inside the
344/// build loop — that double-copy is gone too.
345pub fn str_rep(state: &mut LuaState) -> Result<usize, LuaError> {
346    let s_ref = match state.to_lua_string(1) {
347        Some(r) => r,
348        None => {
349            state.check_arg_string(1)?;
350            unreachable!("check_arg_string raises when arg #1 is not a string");
351        }
352    };
353    let s: &[u8] = s_ref.as_bytes();
354    let l = s.len();
355    let n = state.check_arg_integer(2)?;
356    let sep_owned = state.opt_arg_string(3, b"")?;
357    let sep: &[u8] = &sep_owned;
358    let lsep = sep.len();
359
360    if n <= 0 {
361        state.push_string(b"")?;
362    } else {
363        const MAXSIZE: usize = i32::MAX as usize;
364        let per = l.checked_add(lsep)
365            .ok_or_else(|| LuaError::runtime(format_args!("resulting string too large")))?;
366        if per > MAXSIZE / (n as usize) {
367            return Err(LuaError::runtime(format_args!("resulting string too large")));
368        }
369        let total = per * (n as usize) - lsep;
370
371        if let Some(err) = state.sandbox_reserve(total) {
372            return Err(err);
373        }
374
375        let mut buf: Vec<u8> = Vec::with_capacity(total);
376        for i in 0..(n as usize) {
377            buf.extend_from_slice(s);
378            if i < (n as usize - 1) && lsep > 0 {
379                buf.extend_from_slice(sep);
380            }
381        }
382        state.push_bytes(&buf)?;
383    }
384    Ok(1)
385}
386
387/// `string.byte(s [, i [, j]])` — return numeric codes of characters.
388///
389///
390/// Borrow the source bytes through `to_lua_string` (returns a `GcRef<LuaString>`)
391/// instead of `check_arg_string` (which copies the entire string into a fresh
392/// `Vec<u8>`). On the `string_ops_long` workload `string.byte` is called 700k
393/// times against the same ~14 KB string, so the previous copy was on the order
394/// of 10 GB of memcpy. The `GcRef` keeps the bytes rooted while the borrow lives.
395pub fn str_byte(state: &mut LuaState) -> Result<usize, LuaError> {
396    let s_ref = match state.to_lua_string(1) {
397        Some(r) => r,
398        None => {
399            state.check_arg_string(1)?;
400            unreachable!("check_arg_string raises when arg #1 is not a string");
401        }
402    };
403    let s: &[u8] = s_ref.as_bytes();
404    let l = s.len();
405    let pi = state.opt_arg_integer(2, 1)?;
406    let posi = pos_relat_i(pi, l);
407    let pose_raw = state.opt_arg_integer(3, pi)?;
408    let pose = get_end_pos(pose_raw, l);
409
410    if posi > pose {
411        return Ok(0);
412    }
413    let count = pose.saturating_sub(posi - 1) + 1;
414    if count > i32::MAX as usize {
415        return Err(LuaError::runtime(format_args!("string slice too long")));
416    }
417    let n = (pose - posi + 1) as usize;
418    state.ensure_stack(n as i32, "string slice too long")?;
419
420    for i in 0..n {
421        state.push(LuaValue::Int(s[posi - 1 + i] as i64));
422    }
423    Ok(n)
424}
425
426/// `string.char(...)` — return string built from character codes.
427///
428pub fn str_char(state: &mut LuaState) -> Result<usize, LuaError> {
429    let n = state.get_top();
430    let mut buf = Vec::with_capacity(n as usize);
431    for i in 1..=n {
432        let c = state.check_arg_integer(i)? as u64;
433        if c > u8::MAX as u64 {
434            return Err(lua_vm::debug::arg_error_impl(state, i, b"value out of range"));
435        }
436        buf.push(c as u8);
437    }
438    state.push_bytes(&buf)?;
439    Ok(1)
440}
441
442/// `string.dump(function [, strip])` — serialize a function as binary chunk.
443///
444/// Uses `lua_dump` internally; the writer callback builds a buffer.
445pub fn str_dump(state: &mut LuaState) -> Result<usize, LuaError> {
446    state.check_arg_type(1, LuaType::Function)?;
447    let strip = state.arg_to_bool(2);
448    // PORT NOTE: `state.set_top` (inherent) takes an absolute StackIdx and
449    // would wipe the call frame. `lua_settop` is frame-relative.
450    lua_vm::api::set_top(state, 1)?;
451    // TODO(port): state.dump_function(strip) needs to produce &[u8].
452    // In the C code, lua_dump writes to a writer callback that fills a luaL_Buffer.
453    // In Rust, state.dump() should return Vec<u8> or write to a &mut Vec<u8>.
454    let bytes = state.dump_function(strip)
455        .map_err(|_| LuaError::runtime(format_args!("unable to dump given function")))?;
456    state.push_bytes(&bytes)?;
457    Ok(1)
458}
459
460// ────────────────────────────────────────────────────────────────────────────
461// §3  String metamethods (arithmetic coercion)
462// ────────────────────────────────────────────────────────────────────────────
463
464/// Try to coerce the argument at `arg` to a number, pushing it on the stack.
465/// Returns true on success.
466///
467fn tonum(state: &mut LuaState, arg: i32) -> Result<bool, LuaError> {
468    if state.type_at(arg) == LuaType::Number {
469        state.push_value_at(arg)?;
470        Ok(true)
471    } else {
472        // check whether it is a numerical string
473        //    return (s != NULL && lua_stringtonumber(L, s) == len + 1);
474        if let Some(s) = state.to_lua_string_bytes(arg) {
475            let len = s.len();
476            // PORT NOTE: string_to_number pushes the number if successful
477            let pushed = state.string_to_number_push(&s)?;
478            let ok = pushed == len + 1;
479            // Lua 5.1–5.3: a string coerced in an arithmetic operation always
480            // yields a float (`('16') + 0` is a float in 5.3, an integer in
481            // 5.4). This metamethod path is arithmetic-only, so the promotion
482            // never touches bitwise ops. Verified vs the 5.3.6/5.4.7 oracle.
483            if ok
484                && matches!(
485                    state.global().lua_version,
486                    lua_types::LuaVersion::V51
487                        | lua_types::LuaVersion::V52
488                        | lua_types::LuaVersion::V53
489                )
490            {
491                if let Some(f) = lua_vm::api::to_number_x(state, -1) {
492                    state.pop();
493                    state.push(LuaValue::Float(f));
494                }
495            }
496            Ok(ok)
497        } else {
498            Ok(false)
499        }
500    }
501}
502
503/// Try to invoke the metamethod `mtname` on the two operands.
504///
505fn trymt(state: &mut LuaState, mtname: &[u8]) -> Result<(), LuaError> {
506    // PORT NOTE: `state.set_top` (inherent) takes an absolute StackIdx and
507    // would wipe the call frame's arguments. `lua_settop` is frame-relative
508    // — keep the first two args of the current C function.
509    lua_vm::api::set_top(state, 2)?;
510    let t2_is_string = state.type_at(2) == LuaType::String;
511    // C: `if (lua_type(L,2)==LUA_TSTRING || !luaL_getmetafield(L,2,mtname))`.
512    // The `||` short-circuits: when arg2 is a string, `get_meta_field` is never
513    // called, so the stack stays `[arg1, arg2]` for the error formatter. Calling
514    // it unconditionally would push the string metatable's own metamethod and
515    // shift the operands read by `type_name_at(-2)/(-1)`.
516    if t2_is_string || !state.get_meta_field(2, mtname)? {
517        let op = &mtname[2..]; // skip "__"
518        let msg = format!(
519            "attempt to {} a '{}' with a '{}'",
520            op.escape_ascii(),
521            state.type_name_at(-2).escape_ascii(),
522            state.type_name_at(-1).escape_ascii(),
523        );
524        return crate::auxlib::lua_error(state, msg.as_bytes()).map(|_| ());
525    }
526    state.insert(-3)?;
527    state.call(2, 1)?;
528    Ok(())
529}
530
531/// Generic arithmetic helper: coerce both args and call `op`, else try metamethod.
532///
533fn arith(state: &mut LuaState, op: ArithOp, mtname: &[u8]) -> Result<usize, LuaError> {
534    if tonum(state, 1)? && tonum(state, 2)? {
535        state.arith(op)?;
536    } else {
537        trymt(state, mtname)?;
538    }
539    Ok(1)
540}
541
542pub fn arith_add(state: &mut LuaState) -> Result<usize, LuaError> {
543    arith(state, ArithOp::Add, b"__add")
544}
545pub fn arith_sub(state: &mut LuaState) -> Result<usize, LuaError> {
546    arith(state, ArithOp::Sub, b"__sub")
547}
548pub fn arith_mul(state: &mut LuaState) -> Result<usize, LuaError> {
549    arith(state, ArithOp::Mul, b"__mul")
550}
551pub fn arith_mod(state: &mut LuaState) -> Result<usize, LuaError> {
552    arith(state, ArithOp::Mod, b"__mod")
553}
554pub fn arith_pow(state: &mut LuaState) -> Result<usize, LuaError> {
555    arith(state, ArithOp::Pow, b"__pow")
556}
557pub fn arith_div(state: &mut LuaState) -> Result<usize, LuaError> {
558    arith(state, ArithOp::Div, b"__div")
559}
560pub fn arith_idiv(state: &mut LuaState) -> Result<usize, LuaError> {
561    arith(state, ArithOp::Idiv, b"__idiv")
562}
563pub fn arith_unm(state: &mut LuaState) -> Result<usize, LuaError> {
564    arith(state, ArithOp::Unm, b"__unm")
565}
566
567// ────────────────────────────────────────────────────────────────────────────
568// §4  Pattern-matching engine
569// ────────────────────────────────────────────────────────────────────────────
570
571/// Return `true` if `c` belongs to the character class `cl` (a `%x` letter).
572///
573#[inline(always)]
574fn match_class(c: u8, cl: u8) -> bool {
575    let res = match cl.to_ascii_lowercase() {
576        b'a' => c.is_ascii_alphabetic(),
577        b'c' => c.is_ascii_control(),
578        b'd' => c.is_ascii_digit(),
579        b'g' => c.is_ascii_graphic(),
580        b'l' => c.is_ascii_lowercase(),
581        b'p' => c.is_ascii_punctuation(),
582        b's' => c.is_ascii_whitespace(),
583        b'u' => c.is_ascii_uppercase(),
584        b'w' => c.is_ascii_alphanumeric(),
585        b'x' => c.is_ascii_hexdigit(),
586        b'z' => c == 0,
587        _    => return cl == c,
588    };
589    if cl.is_ascii_lowercase() { res } else { !res }
590}
591
592/// Match character `c` against a bracket class `[p .. ec-1]`.
593///
594/// `p` and `ec` are indices into `pat`.
595#[inline]
596fn matchbracketclass(pat: &[u8], c: u8, mut p: usize, ec: usize) -> bool {
597    let sig = if p + 1 < pat.len() && pat[p + 1] == b'^' {
598        p += 1; // skip '^'
599        false
600    } else {
601        true
602    };
603    p += 1; // advance past '[' or '^'
604    while p < ec {
605        if pat[p] == L_ESC {
606            p += 1;
607            if p < ec && match_class(c, pat[p]) {
608                return sig;
609            }
610        } else if p + 1 < ec && pat[p + 1] == b'-' && p + 2 < ec {
611            let lo = pat[p];
612            p += 2;
613            let hi = pat[p];
614            if lo <= c && c <= hi {
615                return sig;
616            }
617        } else if pat[p] == c {
618            return sig;
619        }
620        p += 1;
621    }
622    !sig
623}
624
625/// Return `true` if the single character at `src[s]` matches the pattern
626/// element starting at `pat[p]` with class end at `ep`.
627///
628#[inline(always)]
629fn singlematch(ms: &MatchState, s: usize, p: usize, ep: usize) -> bool {
630    if s >= ms.src.len() {
631        return false;
632    }
633    let c = ms.src[s];
634    match ms.pat[p] {
635        b'.' => true,
636        L_ESC => match_class(c, ms.pat[p + 1]),
637        b'[' => matchbracketclass(ms.pat, c, p, ep - 1),
638        pc   => pc == c,
639    }
640}
641
642/// Find the end of the pattern element starting at `pat[p]`.
643/// Returns the index one past the element, or an error for malformed patterns.
644///
645#[inline(always)]
646fn classend(ms: &MatchState, p: usize) -> Result<usize, LuaError> {
647    let pat = ms.pat;
648    match pat.get(p).copied() {
649        Some(L_ESC) => {
650            if p + 1 >= pat.len() {
651                return Err(LuaError::runtime(format_args!(
652                    "malformed pattern (ends with '%')"
653                )));
654            }
655            Ok(p + 2)
656        }
657        Some(b'[') => {
658            let mut q = p + 1;
659            if q < pat.len() && pat[q] == b'^' {
660                q += 1;
661            }
662            loop {
663                if q >= pat.len() {
664                    return Err(LuaError::runtime(format_args!(
665                        "malformed pattern (missing ']')"
666                    )));
667                }
668                let ch = pat[q];
669                q += 1;
670                if ch == L_ESC && q < pat.len() {
671                    q += 1;
672                }
673                if q < pat.len() && pat[q] == b']' {
674                    return Ok(q + 1);
675                }
676            }
677        }
678        Some(_) => Ok(p + 1),
679        None => Ok(p),
680    }
681}
682
683/// Check that capture `l` (1-based char digit from pattern) is valid.
684/// Returns the 0-based capture index.
685///
686fn check_capture(ms: &MatchState, l: u8) -> Result<usize, LuaError> {
687    let signed = (l as i32) - (b'1' as i32);
688    if signed < 0
689        || signed >= ms.level as i32
690        || ms.captures[signed as usize].len == CAP_UNFINISHED
691    {
692        return Err(LuaError::runtime(format_args!(
693            "invalid capture index %{}",
694            signed + 1
695        )));
696    }
697    Ok(signed as usize)
698}
699
700/// Find the most recent unfinished capture to close.
701///
702fn capture_to_close(ms: &MatchState) -> Result<usize, LuaError> {
703    let mut level = ms.level as usize;
704    while level > 0 {
705        level -= 1;
706        if ms.captures[level].len == CAP_UNFINISHED {
707            return Ok(level);
708        }
709    }
710    Err(LuaError::runtime(format_args!("invalid pattern capture")))
711}
712
713/// Match a balanced string `%bxy` starting at `src[s]`.
714///
715/// Returns the new `s` position after the match, or `None`.
716fn matchbalance(ms: &MatchState, s: usize, p: usize) -> Result<Option<usize>, LuaError> {
717    if p + 1 >= ms.pat.len() {
718        return Err(LuaError::runtime(format_args!(
719            "malformed pattern (missing arguments to '%b')"
720        )));
721    }
722    let b = ms.pat[p];
723    let e = ms.pat[p + 1];
724    if s >= ms.src.len() || ms.src[s] != b {
725        return Ok(None);
726    }
727    let mut cont = 1i32;
728    let mut s = s + 1;
729    while s < ms.src.len() {
730        if ms.src[s] == e {
731            cont -= 1;
732            if cont == 0 {
733                return Ok(Some(s + 1));
734            }
735        } else if ms.src[s] == b {
736            cont += 1;
737        }
738        s += 1;
739    }
740    Ok(None)
741}
742
743/// Greedy match: match as many as possible, then try the rest of the pattern.
744///
745fn max_expand(
746    ms: &mut MatchState,
747    s: usize,
748    p: usize,
749    ep: usize,
750) -> Result<Option<usize>, LuaError> {
751    let mut count: isize = 0;
752    while singlematch(ms, s + count as usize, p, ep) {
753        count += 1;
754    }
755    while count >= 0 {
756        let res = match_pat(ms, s + count as usize, ep + 1)?;
757        if res.is_some() {
758            return Ok(res);
759        }
760        count -= 1;
761    }
762    Ok(None)
763}
764
765/// Lazy match: try the rest of the pattern first, then expand by one.
766///
767fn min_expand(
768    ms: &mut MatchState,
769    mut s: usize,
770    p: usize,
771    ep: usize,
772) -> Result<Option<usize>, LuaError> {
773    loop {
774        let res = match_pat(ms, s, ep + 1)?;
775        if res.is_some() {
776            return Ok(res);
777        } else if singlematch(ms, s, p, ep) {
778            s += 1;
779        } else {
780            return Ok(None);
781        }
782    }
783}
784
785/// Open a new capture at `src[s]`.
786///
787fn start_capture(
788    ms: &mut MatchState,
789    s: usize,
790    p: usize,
791    what: isize,
792) -> Result<Option<usize>, LuaError> {
793    let level = ms.level as usize;
794    if level >= LUA_MAX_CAPTURES {
795        return Err(LuaError::runtime(format_args!("too many captures")));
796    }
797    ms.captures[level].init = s;
798    ms.captures[level].len = what;
799    ms.level += 1;
800    let res = match_pat(ms, s, p)?;
801    if res.is_none() {
802        ms.level -= 1; // undo capture
803    }
804    Ok(res)
805}
806
807/// Close the most recent open capture at `src[s]`.
808///
809fn end_capture(ms: &mut MatchState, s: usize, p: usize) -> Result<Option<usize>, LuaError> {
810    let l = capture_to_close(ms)?;
811    ms.captures[l].len = (s - ms.captures[l].init) as isize;
812    let res = match_pat(ms, s, p)?;
813    if res.is_none() {
814        ms.captures[l].len = CAP_UNFINISHED; // undo
815    }
816    Ok(res)
817}
818
819/// Match a back-reference `%n` against `src[s]`.
820///
821fn match_capture(ms: &MatchState, s: usize, l: u8) -> Result<Option<usize>, LuaError> {
822    let idx = check_capture(ms, l)?;
823    let cap_len = ms.captures[idx].len as usize;
824    let cap_init = ms.captures[idx].init;
825    if ms.src.len() - s >= cap_len
826        && &ms.src[s..s + cap_len] == &ms.src[cap_init..cap_init + cap_len]
827    {
828        Ok(Some(s + cap_len))
829    } else {
830        Ok(None)
831    }
832}
833
834/// Core recursive pattern matcher.
835/// Returns `Ok(Some(new_s))` on match, `Ok(None)` on failure, `Err` on error.
836///
837/// The C code uses `goto init` for tail calls; here we use a loop.
838fn match_pat(ms: &mut MatchState, mut s: usize, mut p: usize) -> Result<Option<usize>, LuaError> {
839    if ms.aborted {
840        return Ok(None);
841    }
842    ms.steps += 1;
843    if ms.step_limit != 0 && ms.steps > ms.step_limit {
844        ms.aborted = true;
845        return Ok(None);
846    }
847    ms.matchdepth -= 1;
848    if ms.matchdepth < 0 {
849        ms.matchdepth = 0;
850        return Err(LuaError::runtime(format_args!("pattern too complex")));
851    }
852
853    // Use a loop to simulate `goto init` (tail-call optimization).
854    let result = 'outer: loop {
855        if p >= ms.pat.len() {
856            // end of pattern — full match up to current s
857            break 'outer Ok(Some(s));
858        }
859
860        match ms.pat[p] {
861            b'(' => {
862                let s2 = if p + 1 < ms.pat.len() && ms.pat[p + 1] == b')' {
863                    // position capture
864                    start_capture(ms, s, p + 2, CAP_POSITION)?
865                } else {
866                    start_capture(ms, s, p + 1, CAP_UNFINISHED)?
867                };
868                break 'outer Ok(s2);
869            }
870            b')' => {
871                let s2 = end_capture(ms, s, p + 1)?;
872                break 'outer Ok(s2);
873            }
874            b'$' => {
875                if p + 1 != ms.pat.len() {
876                    // fall through to default
877                    let ep = classend(ms, p)?;
878                    let s2 = handle_class_with_suffix(ms, s, p, ep)?;
879                    break 'outer Ok(s2);
880                }
881                break 'outer Ok(if s == ms.src.len() { Some(s) } else { None });
882            }
883            L_ESC => {
884                match ms.pat.get(p + 1).copied().unwrap_or(0) {
885                    b'b' => {
886                        let s2 = matchbalance(ms, s, p + 2)?;
887                        if let Some(ns) = s2 {
888                            s = ns;
889                            p += 4;
890                            continue 'outer; // tail call: match(ms, s, p+4)
891                        }
892                        break 'outer Ok(None);
893                    }
894                    b'f' => {
895                        p += 2;
896                        if ms.pat.get(p).copied() != Some(b'[') {
897                            return Err(LuaError::runtime(format_args!(
898                                "missing '[' after '%f' in pattern"
899                            )));
900                        }
901                        let ep = classend(ms, p)?;
902                        let previous = if s == 0 { 0u8 } else { ms.src[s - 1] };
903                        let current = ms.src.get(s).copied().unwrap_or(0);
904                        if !matchbracketclass(ms.pat, previous, p, ep - 1)
905                            && matchbracketclass(ms.pat, current, p, ep - 1)
906                        {
907                            p = ep;
908                            continue 'outer; // tail call: match(ms, s, ep)
909                        }
910                        break 'outer Ok(None);
911                    }
912                    c @ b'0'..=b'9' => {
913                        let s2 = match_capture(ms, s, c)?;
914                        if let Some(ns) = s2 {
915                            s = ns;
916                            p += 2;
917                            continue 'outer; // tail call: match(ms, s, p+2)
918                        }
919                        break 'outer Ok(None);
920                    }
921                    _ => {
922                        // fall through to default class handling
923                        let ep = classend(ms, p)?;
924                        let s2 = handle_class_with_suffix(ms, s, p, ep)?;
925                        break 'outer Ok(s2);
926                    }
927                }
928            }
929            _ => {
930                // default: pattern class plus optional suffix
931                let ep = classend(ms, p)?;
932                let s2 = handle_class_with_suffix(ms, s, p, ep)?;
933                break 'outer Ok(s2);
934            }
935        }
936    };
937
938    ms.matchdepth += 1;
939    result
940}
941
942/// Handle a pattern class element with an optional repetition suffix (`*`, `+`, `?`, `-`).
943///
944/// PORT NOTE: Factored out from `match_pat`'s `default/dflt` label to share
945/// code between the ESC-default and plain-default paths.
946#[inline(always)]
947fn handle_class_with_suffix(
948    ms: &mut MatchState,
949    s: usize,
950    p: usize,
951    ep: usize,
952) -> Result<Option<usize>, LuaError> {
953    let matched_once = singlematch(ms, s, p, ep);
954    if !matched_once {
955        //    else s = NULL;
956        match ms.pat.get(ep).copied() {
957            Some(b'*') | Some(b'?') | Some(b'-') => {
958                // Accept zero occurrences: tail-call match(ms, s, ep+1)
959                // We can't do a tail call into match_pat because we're returning
960                // from handle_class_with_suffix, but we can call it directly.
961                return match_pat(ms, s, ep + 1);
962            }
963            _ => return Ok(None),
964        }
965    }
966
967    // Matched at least once
968    match ms.pat.get(ep).copied() {
969        Some(b'?') => {
970            // Optional: try matching with s+1, fall back to ep+1
971            let res = match_pat(ms, s + 1, ep + 1)?;
972            if res.is_some() {
973                Ok(res)
974            } else {
975                match_pat(ms, s, ep + 1)
976            }
977        }
978        Some(b'+') => {
979            // 1 or more: greedy from s+1
980            max_expand(ms, s + 1, p, ep)
981        }
982        Some(b'*') => {
983            // 0 or more: greedy from s
984            max_expand(ms, s, p, ep)
985        }
986        Some(b'-') => {
987            // 0 or more: lazy from s
988            min_expand(ms, s, p, ep)
989        }
990        _ => {
991            // No suffix: match one, advance both s and p
992            match_pat(ms, s + 1, ep)
993        }
994    }
995}
996
997// ────────────────────────────────────────────────────────────────────────────
998// §5  Pattern-matching public API helpers
999// ────────────────────────────────────────────────────────────────────────────
1000
1001/// Find `needle` in `haystack` using a plain memmem-style search.
1002///
1003/// Returns the byte-offset of the first occurrence, or `None`.
1004fn lmemfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1005    if needle.is_empty() {
1006        return Some(0);
1007    }
1008    if needle.len() > haystack.len() {
1009        return None;
1010    }
1011    let first = needle[0];
1012    let rest = &needle[1..];
1013    let limit = haystack.len() - rest.len();
1014    let mut s = 0;
1015    while s <= limit {
1016        if let Some(pos) = haystack[s..].iter().position(|&b| b == first) {
1017            let pos = s + pos;
1018            if pos + 1 + rest.len() <= haystack.len()
1019                && &haystack[pos + 1..pos + 1 + rest.len()] == rest
1020            {
1021                return Some(pos);
1022            }
1023            s = pos + 1;
1024        } else {
1025            break;
1026        }
1027    }
1028    None
1029}
1030
1031fn required_start_byte(pat: &[u8]) -> Option<u8> {
1032    let (byte, ep) = match pat.first().copied()? {
1033        L_ESC => {
1034            let escaped = *pat.get(1)?;
1035            if escaped.is_ascii_alphanumeric() {
1036                return None;
1037            }
1038            (escaped, 2)
1039        }
1040        c if !SPECIALS.contains(&c) => (c, 1),
1041        _ => return None,
1042    };
1043    match pat.get(ep).copied() {
1044        Some(b'*') | Some(b'?') | Some(b'-') => None,
1045        _ => Some(byte),
1046    }
1047}
1048
1049fn next_start_with_byte(src: &[u8], pos: usize, byte: u8) -> Option<usize> {
1050    src.get(pos..)?
1051        .iter()
1052        .position(|&c| c == byte)
1053        .map(|offset| pos + offset)
1054}
1055
1056/// Check whether the pattern `pat` has no special characters (for plain search).
1057///
1058fn nospecials(pat: &[u8]) -> bool {
1059    !pat.iter().any(|b| SPECIALS.contains(b))
1060}
1061
1062/// Information about one capture result.
1063enum CaptureInfo<'a> {
1064    /// A position capture; value is 1-based index.
1065    Position(i64),
1066    /// A string capture (slice of source).
1067    Bytes(&'a [u8]),
1068}
1069
1070/// Get information about the `i`-th capture.
1071/// If there are no captures and `i == 0`, returns the whole match `s..e`.
1072///
1073fn get_one_capture<'a>(
1074    ms: &'a MatchState,
1075    i: usize,
1076    s: usize,
1077    e: usize,
1078) -> Result<CaptureInfo<'a>, LuaError> {
1079    if i >= ms.level as usize {
1080        if i != 0 {
1081            return Err(LuaError::runtime(format_args!(
1082                "invalid capture index %{}",
1083                i + 1
1084            )));
1085        }
1086        // Return whole match
1087        return Ok(CaptureInfo::Bytes(&ms.src[s..e]));
1088    }
1089    let cap = &ms.captures[i];
1090    if cap.len == CAP_UNFINISHED {
1091        return Err(LuaError::runtime(format_args!("unfinished capture")));
1092    }
1093    if cap.len == CAP_POSITION {
1094        return Ok(CaptureInfo::Position((cap.init + 1) as i64));
1095    }
1096    let len = cap.len as usize;
1097    Ok(CaptureInfo::Bytes(&ms.src[cap.init..cap.init + len]))
1098}
1099
1100/// Push all captures onto the stack, returning the number of values pushed.
1101///
1102/// `span` mirrors upstream's `const char *s` argument: `Some((s, e))` means a
1103/// whole-match span is available (so a zero-capture pattern pushes the whole
1104/// match), while `None` mirrors a `NULL s` and pushes nothing when there are no
1105/// explicit captures. Upstream guard: `nlevels = (ms->level == 0 && s) ? 1 : ms->level`.
1106///
1107fn push_captures(
1108    state: &mut LuaState,
1109    ms: &MatchState,
1110    span: Option<(usize, usize)>,
1111) -> Result<usize, LuaError> {
1112    let nlevels = if ms.level == 0 && span.is_some() {
1113        1
1114    } else {
1115        ms.level as usize
1116    };
1117    state.ensure_stack(nlevels as i32, "too many captures")?;
1118    let (s, e) = span.unwrap_or((0, 0));
1119    for i in 0..nlevels {
1120        match get_one_capture(ms, i, s, e)? {
1121            CaptureInfo::Position(n) => state.push(LuaValue::Int(n)),
1122            CaptureInfo::Bytes(b) => state.push_bytes(b)?,
1123        }
1124    }
1125    Ok(nlevels)
1126}
1127
1128// ────────────────────────────────────────────────────────────────────────────
1129// §6  str_find / str_match / gmatch / gsub
1130// ────────────────────────────────────────────────────────────────────────────
1131
1132/// Shared implementation of `string.find` and `string.match`.
1133///
1134fn str_find_aux(state: &mut LuaState, find: bool) -> Result<usize, LuaError> {
1135    let s_ref = match state.to_lua_string(1) {
1136        Some(r) => r,
1137        None => {
1138            state.check_arg_string(1)?;
1139            unreachable!("check_arg_string raises when arg #1 is not a string");
1140        }
1141    };
1142    let p_ref = match state.to_lua_string(2) {
1143        Some(r) => r,
1144        None => {
1145            state.check_arg_string(2)?;
1146            unreachable!("check_arg_string raises when arg #2 is not a string");
1147        }
1148    };
1149    let s: &[u8] = s_ref.as_bytes();
1150    let p: &[u8] = p_ref.as_bytes();
1151    let ls = s.len();
1152    let lp = p.len();
1153    let init_raw = state.opt_arg_integer(3, 1)?;
1154    let init = pos_relat_i(init_raw, ls).saturating_sub(1);
1155
1156    if init > ls {
1157        state.push(LuaValue::Nil);
1158        return Ok(1);
1159    }
1160
1161    if find && (state.arg_to_bool(4) || nospecials(p)) {
1162        // plain search
1163        if let Some(pos) = lmemfind(&s[init..], p) {
1164            let abs = init + pos;
1165            state.push(LuaValue::Int((abs + 1) as i64));
1166            state.push(LuaValue::Int((abs + lp) as i64));
1167            return Ok(2);
1168        }
1169    } else {
1170        let step_limit = state.sandbox_match_step_limit();
1171        let mut ms = MatchState::new(s, p, step_limit);
1172        let anchor = p.first() == Some(&b'^');
1173        let p_slice = if anchor { &p[1..] } else { p };
1174        ms.pat = p_slice;
1175        let start_byte = if anchor { None } else { required_start_byte(ms.pat) };
1176
1177        let mut s1 = init;
1178        let mut matched: Option<usize> = None;
1179        loop {
1180            if let Some(byte) = start_byte {
1181                let Some(next) = next_start_with_byte(ms.src, s1, byte) else {
1182                    break;
1183                };
1184                s1 = next;
1185            }
1186            ms.reset_level();
1187            if let Some(res) = match_pat(&mut ms, s1, 0)? {
1188                matched = Some(res);
1189                break;
1190            }
1191            if ms.aborted || s1 >= ms.src.len() || anchor {
1192                break;
1193            }
1194            s1 += 1;
1195        }
1196
1197        if let Some(err) = state.sandbox_charge(ms.steps) {
1198            return Err(err);
1199        }
1200
1201        if let Some(res) = matched {
1202            if find {
1203                state.push(LuaValue::Int((s1 + 1) as i64));
1204                state.push(LuaValue::Int(res as i64));
1205                let nc = push_captures(state, &ms, None)?;
1206                return Ok(nc + 2);
1207            } else {
1208                return push_captures(state, &ms, Some((s1, res)));
1209            }
1210        }
1211    }
1212
1213    state.push(LuaValue::Nil);
1214    Ok(1)
1215}
1216
1217/// `string.find(s, pattern [, init [, plain]])` — find pattern in `s`.
1218///
1219pub fn str_find(state: &mut LuaState) -> Result<usize, LuaError> {
1220    str_find_aux(state, true)
1221}
1222
1223/// `string.match(s, pattern [, init])` — match pattern against `s`.
1224///
1225pub fn str_match(state: &mut LuaState) -> Result<usize, LuaError> {
1226    str_find_aux(state, false)
1227}
1228
1229/// Continuation function for `string.gmatch` iterator closure.
1230///
1231///
1232/// PORT NOTE: C stores source, pattern, and `GMatchState` as three C-closure
1233/// upvalues. The Rust port mirrors that shape: upvalues 1 and 2 are traced Lua
1234/// strings, and upvalue 3 is a full userdata whose host payload stores only the
1235/// mutable byte positions.
1236pub fn gmatch_aux(state: &mut LuaState) -> Result<usize, LuaError> {
1237    let s_val = state.value_at(upvalue_index(1));
1238    let p_val = state.value_at(upvalue_index(2));
1239    let (LuaValue::Str(s_str), LuaValue::Str(p_str)) = (&s_val, &p_val) else {
1240        return Ok(0);
1241    };
1242    let iter_val = state.value_at(upvalue_index(3));
1243    let LuaValue::UserData(iter_ud) = iter_val else {
1244        return Ok(0);
1245    };
1246    let Some(host) = iter_ud.host_value() else {
1247        return Ok(0);
1248    };
1249    let Ok(iter_state) = host.downcast::<RefCell<GMatchIterState>>() else {
1250        return Ok(0);
1251    };
1252
1253    let s: &[u8] = s_str.as_bytes();
1254    let p: &[u8] = p_str.as_bytes();
1255    let (start_pos, last_match) = {
1256        let iter = iter_state.borrow();
1257        (iter.pos, iter.last_match)
1258    };
1259
1260    let ls = s.len();
1261
1262    let step_limit = state.sandbox_match_step_limit();
1263    let mut ms = MatchState::new(s, p, step_limit);
1264    let start_byte = required_start_byte(p);
1265
1266    let mut src = start_pos;
1267    let mut hit: Option<(usize, usize)> = None;
1268    while src <= ls {
1269        if let Some(byte) = start_byte {
1270            let Some(next) = next_start_with_byte(s, src, byte) else {
1271                break;
1272            };
1273            src = next;
1274        }
1275        ms.reset_level();
1276        if let Some(e) = match_pat(&mut ms, src, 0)? {
1277            if Some(e) != last_match {
1278                hit = Some((src, e));
1279                break;
1280            }
1281        }
1282        if ms.aborted {
1283            break;
1284        }
1285        src += 1;
1286    }
1287
1288    if let Some(err) = state.sandbox_charge(ms.steps) {
1289        return Err(err);
1290    }
1291
1292    if let Some((src, e)) = hit {
1293        {
1294            let mut iter = iter_state.borrow_mut();
1295            iter.pos = e;
1296            iter.last_match = Some(e);
1297        }
1298        return push_captures(state, &ms, Some((src, e)));
1299    }
1300
1301    Ok(0)
1302}
1303
1304/// `string.gmatch(s, pattern [, init])` — return an iterator for all matches.
1305///
1306///
1307/// PORT NOTE: C uses `lua_newuserdatauv` for the GMatchState plus a 3-upvalue
1308/// C closure. The port stores the two strings as traced closure upvalues and
1309/// the mutable byte positions in the userdata host payload.
1310pub fn gmatch(state: &mut LuaState) -> Result<usize, LuaError> {
1311    let s_ref = match state.to_lua_string(1) {
1312        Some(r) => r,
1313        None => {
1314            state.check_arg_string(1)?;
1315            unreachable!("check_arg_string raises when arg #1 is not a string");
1316        }
1317    };
1318    let ls = s_ref.len();
1319    match state.to_lua_string(2) {
1320        Some(_) => {}
1321        None => {
1322            state.check_arg_string(2)?;
1323            unreachable!("check_arg_string raises when arg #2 is not a string");
1324        }
1325    };
1326    let init_raw = state.opt_arg_integer(3, 1)?;
1327    let mut init = pos_relat_i(init_raw, ls).saturating_sub(1);
1328    if init > ls {
1329        init = ls + 1;
1330    }
1331
1332    lua_vm::api::set_top(state, 2)?;
1333
1334    state.push_value_at(1)?;
1335    state.push_value_at(2)?;
1336    let iter_ud = state.new_userdata_typed(b"string.gmatch.state", 0, 0)?;
1337    let iter_state: Rc<dyn Any> = Rc::new(RefCell::new(GMatchIterState {
1338        pos: init,
1339        last_match: None,
1340    }));
1341    iter_ud.set_host_value(Some(iter_state));
1342
1343    state.push_c_closure(gmatch_aux, 3)?;
1344    Ok(1)
1345}
1346
1347/// Add a replacement string with `%n` capture references to `buf`.
1348///
1349fn add_s(
1350    state: &mut LuaState,
1351    ms: &MatchState,
1352    buf: &mut Vec<u8>,
1353    s: usize,
1354    e: usize,
1355) -> Result<(), LuaError> {
1356    let news_bytes = state.to_lua_string_bytes(3).unwrap_or_default();
1357    let mut i = 0usize;
1358    while i < news_bytes.len() {
1359        if news_bytes[i] != L_ESC {
1360            buf.push(news_bytes[i]);
1361            i += 1;
1362        } else {
1363            i += 1; // skip ESC
1364            if i >= news_bytes.len() {
1365                break;
1366            }
1367            let c = news_bytes[i];
1368            if c == L_ESC {
1369                buf.push(L_ESC);
1370            } else if c == b'0' {
1371                buf.extend_from_slice(&ms.src[s..e]);
1372            } else if c.is_ascii_digit() {
1373                match get_one_capture(ms, (c - b'1') as usize, s, e)? {
1374                    CaptureInfo::Position(n) => {
1375                        // push position then pop into buf
1376                        let formatted = format!("{}", n).into_bytes();
1377                        buf.extend_from_slice(&formatted);
1378                    }
1379                    CaptureInfo::Bytes(b) => {
1380                        buf.extend_from_slice(b);
1381                    }
1382                }
1383            } else {
1384                return Err(LuaError::runtime(format_args!(
1385                    "invalid use of '{}' in replacement string",
1386                    L_ESC as char
1387                )));
1388            }
1389            i += 1;
1390        }
1391    }
1392    Ok(())
1393}
1394
1395/// Add the replacement value (string, table lookup, or function call) to `buf`.
1396/// Returns `true` if the original text was changed.
1397///
1398fn add_value(
1399    state: &mut LuaState,
1400    ms: &MatchState,
1401    buf: &mut Vec<u8>,
1402    s: usize,
1403    e: usize,
1404    tr: LuaType,
1405) -> Result<bool, LuaError> {
1406    match tr {
1407        LuaType::Function => {
1408            state.push_value_at(3)?;
1409            let n = push_captures(state, ms, Some((s, e)))?;
1410            state.call(n as i32, 1)?;
1411        }
1412        LuaType::Table => {
1413            match get_one_capture(ms, 0, s, e)? {
1414                CaptureInfo::Position(n) => state.push(LuaValue::Int(n)),
1415                CaptureInfo::Bytes(b) => state.push_bytes(b)?,
1416            }
1417            state.get_table(3)?;
1418        }
1419        _ => {
1420            // LUA_TNUMBER or LUA_TSTRING: add replacement string directly
1421            add_s(state, ms, buf, s, e)?;
1422            return Ok(true);
1423        }
1424    }
1425
1426    let top_bool = state.arg_to_bool(-1);
1427    if !top_bool {
1428        state.pop_n(1);
1429        buf.extend_from_slice(&ms.src[s..e]);
1430        return Ok(false);
1431    }
1432    if state.type_at(-1) != LuaType::String {
1433        let tname = state.type_name_at(-1).to_owned();
1434        return Err(LuaError::runtime(format_args!(
1435            "invalid replacement value (a {})", tname.escape_ascii()
1436        )));
1437    }
1438    let v = state.to_bytes(-1).unwrap_or_default();
1439    state.pop();
1440    buf.extend_from_slice(&v);
1441    Ok(true)
1442}
1443
1444/// `string.gsub(s, pattern, repl [, n])` — global substitution.
1445///
1446pub fn str_gsub(state: &mut LuaState) -> Result<usize, LuaError> {
1447    let src_ref = match state.to_lua_string(1) {
1448        Some(r) => r,
1449        None => {
1450            state.check_arg_string(1)?;
1451            unreachable!("check_arg_string raises when arg #1 is not a string");
1452        }
1453    };
1454    let pat_ref = match state.to_lua_string(2) {
1455        Some(r) => r,
1456        None => {
1457            state.check_arg_string(2)?;
1458            unreachable!("check_arg_string raises when arg #2 is not a string");
1459        }
1460    };
1461    let src: &[u8] = src_ref.as_bytes();
1462    let pat: &[u8] = pat_ref.as_bytes();
1463    let src_len = src.len();
1464    let max_s = state.opt_arg_integer(4, (src_len + 1) as i64)?;
1465    let tr = state.type_at(3);
1466
1467    if !matches!(tr, LuaType::Number | LuaType::String | LuaType::Function | LuaType::Table) {
1468        let v = state.arg(3);
1469        return Err(LuaError::type_arg_error(3, "string/function/table", &v));
1470    }
1471
1472    let anchor = pat.first() == Some(&b'^');
1473    let pat_slice = if anchor { &pat[1..] } else { pat };
1474
1475    let step_limit = state.sandbox_match_step_limit();
1476    let mut ms = MatchState::new(src, pat_slice, step_limit);
1477    let start_byte = if anchor { None } else { required_start_byte(ms.pat) };
1478    let mut buf: Vec<u8> = Vec::with_capacity(src_len);
1479    let mut src_pos = 0usize;
1480    let mut last_match: Option<usize> = None;
1481    let mut n: i64 = 0;
1482    let mut changed = false;
1483
1484    while n < max_s {
1485        if let Some(byte) = start_byte {
1486            let Some(next) = next_start_with_byte(ms.src, src_pos, byte) else {
1487                buf.extend_from_slice(&ms.src[src_pos..]);
1488                src_pos = ms.src.len();
1489                break;
1490            };
1491            if next > src_pos {
1492                buf.extend_from_slice(&ms.src[src_pos..next]);
1493                src_pos = next;
1494            }
1495        }
1496        ms.reset_level();
1497        let maybe_e = match_pat(&mut ms, src_pos, 0)?;
1498        if let Some(e) = maybe_e {
1499            if last_match != Some(e) {
1500                n += 1;
1501                let delta = add_value(state, &ms, &mut buf, src_pos, e, tr)?;
1502                changed |= delta;
1503                src_pos = e;
1504                last_match = Some(e);
1505            } else if src_pos < ms.src.len() {
1506                buf.push(ms.src[src_pos]);
1507                src_pos += 1;
1508            } else {
1509                break;
1510            }
1511        } else if src_pos < ms.src.len() {
1512            buf.push(ms.src[src_pos]);
1513            src_pos += 1;
1514        } else {
1515            break;
1516        }
1517        if ms.aborted || anchor {
1518            break;
1519        }
1520    }
1521
1522    if let Some(err) = state.sandbox_charge(ms.steps) {
1523        return Err(err);
1524    }
1525
1526    if !changed {
1527        state.push_value_at(1)?;
1528    } else {
1529        buf.extend_from_slice(&ms.src[src_pos..]);
1530        state.push_bytes(&buf)?;
1531    }
1532    state.push(LuaValue::Int(n));
1533    Ok(2)
1534}
1535
1536// ────────────────────────────────────────────────────────────────────────────
1537// §7  String format (`string.format`)
1538// ────────────────────────────────────────────────────────────────────────────
1539
1540/// Add a hex-float digit to buffer and return the fractional remainder.
1541///
1542fn adddigit(buf: &mut Vec<u8>, x: f64) -> f64 {
1543    let dd = x.floor();
1544    let d = dd as i32;
1545    let c = if d < 10 { b'0' + d as u8 } else { b'a' + (d - 10) as u8 };
1546    buf.push(c);
1547    x - dd
1548}
1549
1550/// Convert a float to a hex-float string body (digits only, no sign, no `0x` prefix).
1551///
1552/// Returns `(frac_digits, exponent_string)` for use by `format_hex_float`.
1553///
1554fn num2straux(x: f64) -> Vec<u8> {
1555    format_hex_float(x, None)
1556}
1557
1558/// Produce a hex-float string for `x` with optional precision (digits after the point).
1559///
1560/// When `precision` is `None` the minimum number of digits needed for a round-trip
1561/// is emitted (C's default `%a` behaviour). When `precision` is `Some(p)` exactly `p`
1562/// digits follow the radix point; trailing zeros are added as needed, and excess
1563/// digits are discarded (C truncates rather than rounds, matching the C `printf`
1564/// behaviour on the tested platforms).
1565fn format_hex_float(x: f64, precision: Option<usize>) -> Vec<u8> {
1566    if x.is_nan() {
1567        return b"nan".to_vec();
1568    }
1569    if x.is_infinite() {
1570        return if x < 0.0 { b"-inf".to_vec() } else { b"inf".to_vec() };
1571    }
1572    if x == 0.0 {
1573        let sign: &[u8] = if x.is_sign_negative() { b"-" } else { b"" };
1574        return match precision {
1575            None => [sign, b"0x0p+0"].concat(),
1576            Some(0) => [sign, b"0x0p+0"].concat(),
1577            Some(p) => {
1578                let zeros = "0".repeat(p);
1579                [sign, b"0x0.", zeros.as_bytes(), b"p+0"].concat()
1580            }
1581        };
1582    }
1583
1584    let (m_raw, exp) = frexp(x);
1585    let mut buf: Vec<u8> = Vec::new();
1586    let mut m = m_raw;
1587    if m < 0.0 {
1588        buf.push(b'-');
1589        m = -m;
1590    }
1591    buf.extend_from_slice(b"0x");
1592
1593    let nbfd = 1;
1594    m = adddigit(&mut buf, m * (1 << nbfd) as f64);
1595    let e = exp - nbfd;
1596
1597    match precision {
1598        None => {
1599            if m > 0.0 {
1600                buf.push(b'.');
1601                while m > 0.0 {
1602                    m = adddigit(&mut buf, m * 16.0);
1603                }
1604            }
1605        }
1606        Some(0) => {}
1607        Some(p) => {
1608            buf.push(b'.');
1609            for _ in 0..p {
1610                if m > 0.0 {
1611                    m = adddigit(&mut buf, m * 16.0);
1612                } else {
1613                    buf.push(b'0');
1614                }
1615            }
1616        }
1617    }
1618
1619    let exp_str = format!("p{:+}", e);
1620    buf.extend_from_slice(exp_str.as_bytes());
1621    buf
1622}
1623
1624/// Decompose `x` into mantissa in `[-1.0, -0.5] ∪ [0.5, 1.0)` and exponent.
1625///
1626/// Equivalent to C's `frexp`. The sign of `x` is preserved in the returned mantissa
1627/// so that `num2straux` can emit the leading `-` correctly for negative inputs.
1628fn frexp(x: f64) -> (f64, i32) {
1629    if x == 0.0 || x.is_nan() || x.is_infinite() {
1630        return (x, 0);
1631    }
1632    let bits = x.to_bits();
1633    let sign_bit = bits & 0x8000_0000_0000_0000u64;
1634    let exp_bits = ((bits >> 52) & 0x7FF) as i32;
1635    if exp_bits == 0 {
1636        let (m, e) = frexp(x * (1u64 << 52) as f64);
1637        return (m, e - 52);
1638    }
1639    let exp = exp_bits - 1022;
1640    let mantissa_bits = sign_bit | (bits & 0x000F_FFFF_FFFF_FFFF) | 0x3FE0_0000_0000_0000;
1641    (f64::from_bits(mantissa_bits), exp)
1642}
1643
1644/// Convert float `n` to a Lua-readable literal (hex or special representation).
1645///
1646fn quotefloat(n: f64) -> Vec<u8> {
1647    if n == f64::INFINITY {
1648        return b"1e9999".to_vec();
1649    } else if n == f64::NEG_INFINITY {
1650        return b"-1e9999".to_vec();
1651    } else if n.is_nan() {
1652        return b"(0/0)".to_vec();
1653    }
1654    // hex float, ensuring dot separator
1655    let buf = num2straux(n);
1656    if !buf.contains(&b'.') && !buf.contains(&b'p') {
1657        // try to find locale decimal point and replace with '.'
1658        // PORT NOTE: We always produce '.' so this branch is not taken.
1659    }
1660    buf
1661}
1662
1663/// Add a quoted Lua string literal to `buf`.
1664///
1665fn addquoted(buf: &mut Vec<u8>, s: &[u8]) {
1666    buf.push(b'"');
1667    for (idx, &c) in s.iter().enumerate() {
1668        if c == b'"' || c == b'\\' || c == b'\n' {
1669            buf.push(b'\\');
1670            buf.push(c);
1671        } else if c.is_ascii_control() {
1672            let next_is_digit = s.get(idx + 1).map_or(false, |n| n.is_ascii_digit());
1673            let formatted = if next_is_digit {
1674                format!("\\{:03}", c)
1675            } else {
1676                format!("\\{}", c)
1677            };
1678            buf.extend_from_slice(formatted.as_bytes());
1679        } else {
1680            buf.push(c);
1681        }
1682    }
1683    buf.push(b'"');
1684}
1685
1686/// Add a Lua literal representation of arg `n` to `buf`.
1687///
1688fn addliteral(state: &mut LuaState, buf: &mut Vec<u8>, arg: i32) -> Result<(), LuaError> {
1689    match state.type_at(arg) {
1690        LuaType::String => {
1691            let s = state.check_arg_string(arg)?.to_vec();
1692            addquoted(buf, &s);
1693        }
1694        LuaType::Number => {
1695            if state.is_integer(arg) {
1696                let n = state.to_integer(arg).unwrap_or(0);
1697                let formatted = if n == i64::MIN {
1698                    format!("0x{:016x}", n as u64)
1699                } else {
1700                    format!("{}", n)
1701                };
1702                buf.extend_from_slice(formatted.as_bytes());
1703            } else {
1704                let n = state.to_number(arg).unwrap_or(0.0);
1705                let hex = quotefloat(n);
1706                buf.extend_from_slice(&hex);
1707            }
1708        }
1709        LuaType::Nil => {
1710            buf.extend_from_slice(b"nil");
1711        }
1712        LuaType::Boolean => {
1713            buf.extend_from_slice(if state.to_boolean(arg) { b"true" } else { b"false" });
1714        }
1715        _ => {
1716            return Err(LuaError::arg_error(arg, "value has no literal form"));
1717        }
1718    }
1719    Ok(())
1720}
1721
1722
1723/// Flags allowed per conversion type (matches lstrlib.c constants).
1724const FMT_FLAGS_F: &[u8] = b"-+#0 ";
1725const FMT_FLAGS_X: &[u8] = b"-#0";
1726const FMT_FLAGS_I: &[u8] = b"-+0 ";
1727const FMT_FLAGS_U: &[u8] = b"-0";
1728const FMT_FLAGS_C: &[u8] = b"-";
1729
1730/// Validate a format specifier against allowed flags and width/precision digit counts.
1731///
1732/// `form` is the full specifier slice including the leading `%` and the trailing
1733/// conversion character (e.g. `b"%100.3d"`). `flags` is the allowed-flags byte set for
1734/// this conversion type. `allow_precision` is false for conversions that forbid `.`.
1735///
1736/// Mirrors C `checkformat` in lstrlib.c: consumes flags, then up to 2 width digits,
1737/// then (if allowed) `.` + up to 2 precision digits, then asserts we are at the
1738/// conversion character. Returns `Err("invalid conversion specification")` on failure.
1739fn check_conv_spec(
1740    state: &mut LuaState,
1741    form: &[u8],
1742    flags: &[u8],
1743    allow_precision: bool,
1744) -> Result<(), LuaError> {
1745    let mut i = 1usize; // skip '%'
1746    while i < form.len() && flags.contains(&form[i]) {
1747        i += 1;
1748    }
1749    if i < form.len() && form[i] == b'0' {
1750        return Err(invalid_conv_spec(state, form));
1751    }
1752    if i < form.len() && form[i].is_ascii_digit() {
1753        i += 1;
1754        if i < form.len() && form[i].is_ascii_digit() {
1755            i += 1;
1756        }
1757    }
1758    if allow_precision && i < form.len() && form[i] == b'.' {
1759        i += 1;
1760        if i < form.len() && form[i].is_ascii_digit() {
1761            i += 1;
1762            if i < form.len() && form[i].is_ascii_digit() {
1763                i += 1;
1764            }
1765        }
1766    }
1767    if i != form.len() - 1 {
1768        return Err(invalid_conv_spec(state, form));
1769    }
1770    Ok(())
1771}
1772
1773/// Build the version-appropriate "invalid conversion specification" error,
1774/// prefixed with the calling location like reference `luaL_error`.
1775///
1776/// Lua 5.3 `scanformat` raises `invalid format (width or precision too long)`
1777/// with no offending spec; Lua 5.4/5.5 `checkformat` raises
1778/// `invalid conversion specification: '<form>'`.
1779fn invalid_conv_spec(state: &mut LuaState, form: &[u8]) -> LuaError {
1780    let msg: Vec<u8> = if state.global().lua_version == lua_types::LuaVersion::V53 {
1781        b"invalid format (width or precision too long)".to_vec()
1782    } else {
1783        let mut m = b"invalid conversion specification: '".to_vec();
1784        m.extend_from_slice(form);
1785        m.push(b'\'');
1786        m
1787    };
1788    lua_vm::debug::c_api_runtime(state, msg)
1789}
1790
1791/// Parsed printf-style format specifier (flags, width, precision).
1792#[derive(Default)]
1793struct FmtSpec {
1794    left_align: bool,
1795    plus_sign: bool,
1796    space_sign: bool,
1797    alt_form: bool,
1798    zero_pad: bool,
1799    width: usize,
1800    precision: Option<usize>,
1801}
1802
1803fn parse_fmt_spec(spec: &[u8]) -> FmtSpec {
1804    let mut s = FmtSpec::default();
1805    let mut i = 0;
1806    while i < spec.len() {
1807        match spec[i] {
1808            b'-' => s.left_align = true,
1809            b'+' => s.plus_sign = true,
1810            b' ' => s.space_sign = true,
1811            b'#' => s.alt_form = true,
1812            b'0' => s.zero_pad = true,
1813            _ => break,
1814        }
1815        i += 1;
1816    }
1817    while i < spec.len() && spec[i].is_ascii_digit() {
1818        s.width = s.width * 10 + (spec[i] - b'0') as usize;
1819        i += 1;
1820    }
1821    if i < spec.len() && spec[i] == b'.' {
1822        i += 1;
1823        let mut p = 0usize;
1824        while i < spec.len() && spec[i].is_ascii_digit() {
1825            p = p * 10 + (spec[i] - b'0') as usize;
1826            i += 1;
1827        }
1828        s.precision = Some(p);
1829    }
1830    s
1831}
1832
1833fn pad_str(buf: &mut Vec<u8>, body: &[u8], spec: &FmtSpec) {
1834    let body = match spec.precision {
1835        Some(p) if body.len() > p => &body[..p],
1836        _ => body,
1837    };
1838    if body.len() >= spec.width {
1839        buf.extend_from_slice(body);
1840        return;
1841    }
1842    let pad = spec.width - body.len();
1843    if spec.left_align {
1844        buf.extend_from_slice(body);
1845        for _ in 0..pad { buf.push(b' '); }
1846    } else {
1847        for _ in 0..pad { buf.push(b' '); }
1848        buf.extend_from_slice(body);
1849    }
1850}
1851
1852fn pad_int(buf: &mut Vec<u8>, sign_prefix: &[u8], digits: &[u8], spec: &FmtSpec) {
1853    let min_digits = spec.precision.unwrap_or(0);
1854    let zeroes_for_prec = if digits.len() < min_digits { min_digits - digits.len() } else { 0 };
1855    let core_len = sign_prefix.len() + zeroes_for_prec + digits.len();
1856    if core_len >= spec.width {
1857        buf.extend_from_slice(sign_prefix);
1858        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1859        buf.extend_from_slice(digits);
1860        return;
1861    }
1862    let pad = spec.width - core_len;
1863    let use_zero_pad = spec.zero_pad && !spec.left_align && spec.precision.is_none();
1864    if spec.left_align {
1865        buf.extend_from_slice(sign_prefix);
1866        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1867        buf.extend_from_slice(digits);
1868        for _ in 0..pad { buf.push(b' '); }
1869    } else if use_zero_pad {
1870        buf.extend_from_slice(sign_prefix);
1871        for _ in 0..pad { buf.push(b'0'); }
1872        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1873        buf.extend_from_slice(digits);
1874    } else {
1875        for _ in 0..pad { buf.push(b' '); }
1876        buf.extend_from_slice(sign_prefix);
1877        for _ in 0..zeroes_for_prec { buf.push(b'0'); }
1878        buf.extend_from_slice(digits);
1879    }
1880}
1881
1882fn signed_int_parts(n: i64, spec: &FmtSpec) -> (Vec<u8>, Vec<u8>) {
1883    if n == 0 && spec.precision == Some(0) {
1884        return (Vec::new(), Vec::new());
1885    }
1886    let (sign, abs_digits) = if n < 0 {
1887        (b"-".to_vec(), {
1888            let u = (n as i128).unsigned_abs();
1889            format!("{}", u).into_bytes()
1890        })
1891    } else {
1892        let s: Vec<u8> = if spec.plus_sign {
1893            b"+".to_vec()
1894        } else if spec.space_sign {
1895            b" ".to_vec()
1896        } else {
1897            Vec::new()
1898        };
1899        (s, format!("{}", n).into_bytes())
1900    };
1901    (sign, abs_digits)
1902}
1903
1904fn unsigned_int_parts(n: u64, base: u32, upper: bool, spec: &FmtSpec) -> (Vec<u8>, Vec<u8>) {
1905    let digits = if n == 0 && spec.precision == Some(0) {
1906        Vec::new()
1907    } else {
1908        match base {
1909            8 => format!("{:o}", n).into_bytes(),
1910            16 if upper => format!("{:X}", n).into_bytes(),
1911            16 => format!("{:x}", n).into_bytes(),
1912            _ => format!("{}", n).into_bytes(),
1913        }
1914    };
1915    let prefix: Vec<u8> = if spec.alt_form && n != 0 {
1916        match base {
1917            8 => b"0".to_vec(),
1918            16 if upper => b"0X".to_vec(),
1919            16 => b"0x".to_vec(),
1920            _ => Vec::new(),
1921        }
1922    } else {
1923        Vec::new()
1924    };
1925    (prefix, digits)
1926}
1927
1928fn format_float(n: f64, conv: u8, spec: &FmtSpec) -> Vec<u8> {
1929    let prec = spec.precision.unwrap_or(6);
1930    if n.is_nan() {
1931        return if conv.is_ascii_uppercase() { b"NAN".to_vec() } else { b"nan".to_vec() };
1932    }
1933    if n.is_infinite() {
1934        let s: &[u8] = if conv.is_ascii_uppercase() {
1935            if n < 0.0 { b"-INF" } else { b"INF" }
1936        } else if n < 0.0 { b"-inf" } else { b"inf" };
1937        return s.to_vec();
1938    }
1939    match conv {
1940        b'f' | b'F' => {
1941            let mut result = format!("{:.*}", prec, n).into_bytes();
1942            if spec.alt_form && !result.contains(&b'.') {
1943                result.push(b'.');
1944            }
1945            result
1946        }
1947        b'e' => format_exp(n, prec, false, spec.alt_form),
1948        b'E' => {
1949            let mut v = format_exp(n, prec, false, spec.alt_form);
1950            for b in v.iter_mut() { if *b == b'e' { *b = b'E'; } }
1951            v
1952        }
1953        b'g' | b'G' => {
1954            let p = if prec == 0 { 1 } else { prec };
1955            let v = format_g(n, p, spec.alt_form);
1956            if conv == b'G' {
1957                v.into_iter().map(|b| if b == b'e' { b'E' } else { b }).collect()
1958            } else { v }
1959        }
1960        _ => format!("{}", n).into_bytes(),
1961    }
1962}
1963
1964fn format_exp(n: f64, prec: usize, _upper: bool, alt: bool) -> Vec<u8> {
1965    if n == 0.0 {
1966        let mantissa: String = if prec == 0 {
1967            if alt { "0.".to_string() } else { "0".to_string() }
1968        } else {
1969            format!("0.{}", "0".repeat(prec))
1970        };
1971        return format!("{}e+00", mantissa).into_bytes();
1972    }
1973    let abs = n.abs();
1974    let exp = abs.log10().floor() as i32;
1975    let mantissa = n / 10f64.powi(exp);
1976    let mantissa_str = format!("{:.*}", prec, mantissa);
1977    let (mant_final, exp_final) = if let Some(dot_pos) = mantissa_str.find('.') {
1978        let int_part = &mantissa_str[..dot_pos];
1979        let abs_int = int_part.trim_start_matches('-');
1980        if abs_int.len() > 1 {
1981            let new_mant = if prec == 0 {
1982                mantissa_str[..mantissa_str.len()-1].to_string()
1983            } else {
1984                let neg = if int_part.starts_with('-') { "-" } else { "" };
1985                let frac = &mantissa_str[dot_pos+1..];
1986                format!("{}{}.{}{}", neg, &abs_int[..1], &abs_int[1..], frac)
1987            };
1988            (new_mant, exp + (abs_int.len() as i32 - 1))
1989        } else {
1990            (mantissa_str, exp)
1991        }
1992    } else if mantissa_str.trim_start_matches('-').len() > 1 {
1993        let neg = if mantissa_str.starts_with('-') { "-" } else { "" };
1994        let body = mantissa_str.trim_start_matches('-');
1995        let bumped = format!("{}{}.{}", neg, &body[..1], &body[1..]);
1996        (bumped, exp + (body.len() as i32 - 1))
1997    } else {
1998        (mantissa_str, exp)
1999    };
2000    let sign = if exp_final < 0 { '-' } else { '+' };
2001    let mant_out = if alt && !mant_final.contains('.') {
2002        format!("{}.", mant_final)
2003    } else { mant_final };
2004    format!("{}e{}{:02}", mant_out, sign, exp_final.abs()).into_bytes()
2005}
2006
2007fn format_g(n: f64, prec: usize, alt: bool) -> Vec<u8> {
2008    if n == 0.0 {
2009        return if alt { format!("0.{}", "0".repeat(prec.saturating_sub(1))).into_bytes() } else { b"0".to_vec() };
2010    }
2011    let abs = n.abs();
2012    let exp = abs.log10().floor() as i32;
2013    if exp < -4 || exp >= prec as i32 {
2014        let ep = if prec == 0 { 0 } else { prec - 1 };
2015        let mut v = format_exp(n, ep, false, alt);
2016        if !alt {
2017            v = strip_trailing_zeros_exp(&v);
2018        }
2019        v
2020    } else {
2021        let dec_places = (prec as i32 - 1 - exp).max(0) as usize;
2022        let mut v = format!("{:.*}", dec_places, n).into_bytes();
2023        if !alt {
2024            v = strip_trailing_zeros_fixed(&v);
2025        }
2026        v
2027    }
2028}
2029
2030fn strip_trailing_zeros_fixed(s: &[u8]) -> Vec<u8> {
2031    if !s.contains(&b'.') { return s.to_vec(); }
2032    let mut end = s.len();
2033    while end > 0 && s[end-1] == b'0' { end -= 1; }
2034    if end > 0 && s[end-1] == b'.' { end -= 1; }
2035    s[..end].to_vec()
2036}
2037
2038fn strip_trailing_zeros_exp(s: &[u8]) -> Vec<u8> {
2039    let e_pos = match s.iter().position(|&b| b == b'e' || b == b'E') {
2040        Some(p) => p,
2041        None => return s.to_vec(),
2042    };
2043    let mantissa = &s[..e_pos];
2044    let exp_part = &s[e_pos..];
2045    if !mantissa.contains(&b'.') {
2046        let mut out = mantissa.to_vec();
2047        out.extend_from_slice(exp_part);
2048        return out;
2049    }
2050    let mut end = mantissa.len();
2051    while end > 0 && mantissa[end-1] == b'0' { end -= 1; }
2052    if end > 0 && mantissa[end-1] == b'.' { end -= 1; }
2053    let mut out = mantissa[..end].to_vec();
2054    out.extend_from_slice(exp_part);
2055    out
2056}
2057
2058/// `string.format(fmt, ...)` — C-style string formatting.
2059///
2060/// Fetch the integer argument for a `%d`/`%i`/`%u`/`%o`/`%x`/`%X` conversion.
2061///
2062/// On the dual-number versions (5.3+) an integer is required and a non-integral
2063/// number raises "number has no integer representation". On the float-only
2064/// versions (5.1/5.2) there is no integer subtype, so `string.format` truncates
2065/// the number toward zero — `("%d"):format(3.5)` is `3`, `(-3.5)` is `-3` —
2066/// matching lua5.2.4. A value outside the `lua_Integer` range (including inf/nan)
2067/// raises "number has no integer representation", which lua5.2.4 phrases as
2068/// "not a number in proper range"; the harness battery checks the truncation
2069/// cases (the out-of-range message text is a separate 5.2 error-format gap).
2070fn format_int_arg(state: &mut LuaState, arg: i32) -> Result<i64, LuaError> {
2071    if state.global().lua_version.number_model() != lua_types::NumberModel::FloatOnly {
2072        return state.check_arg_integer(arg);
2073    }
2074    let n = state.check_arg_number(arg)?;
2075    let t = n.trunc();
2076    if t.is_finite() && (-9223372036854775808.0..=9223372036854775808.0).contains(&t) {
2077        Ok(t as i64)
2078    } else {
2079        Err(LuaError::arg_error(arg, "number has no integer representation"))
2080    }
2081}
2082
2083pub fn str_format(state: &mut LuaState) -> Result<usize, LuaError> {
2084    let top = state.get_top();
2085    let mut arg = 1i32;
2086    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2087    let mut buf: Vec<u8> = Vec::new();
2088    let mut i = 0usize;
2089
2090    while i < fmt_bytes.len() {
2091        let c = fmt_bytes[i];
2092        if c != L_ESC {
2093            buf.push(c);
2094            i += 1;
2095            continue;
2096        }
2097        i += 1;
2098        if i >= fmt_bytes.len() {
2099            break;
2100        }
2101        if fmt_bytes[i] == L_ESC {
2102            buf.push(L_ESC);
2103            i += 1;
2104            continue;
2105        }
2106
2107        // Parse a format specifier
2108        arg += 1;
2109        if arg > top {
2110            return Err(lua_vm::debug::arg_error_impl(state, arg, b"no value"));
2111        }
2112
2113        // Collect flags, width, precision
2114        let spec_start = i - 1; // includes the initial '%'
2115        // Skip flags: -, +, #, 0, space
2116        while i < fmt_bytes.len() && b"-+#0 ".contains(&fmt_bytes[i]) {
2117            i += 1;
2118        }
2119        // Lua 5.3 `scanformat`: the flags buffer is `FLAGS = "-+ #0"`, so a flags
2120        // run of `sizeof(FLAGS) == 6` or more characters is "repeated flags".
2121        // 5.4/5.5 fold this into the single "(too long)" check below.
2122        if state.global().lua_version == lua_types::LuaVersion::V53
2123            && i - (spec_start + 1) >= 6
2124        {
2125            return Err(lua_vm::debug::c_api_runtime(
2126                state,
2127                b"invalid format (repeated flags)".to_vec(),
2128            ));
2129        }
2130        // Skip width digits
2131        if i < fmt_bytes.len() && fmt_bytes[i] != b'0' {
2132            while i < fmt_bytes.len() && fmt_bytes[i].is_ascii_digit() {
2133                i += 1;
2134            }
2135        }
2136        // Skip precision
2137        if i < fmt_bytes.len() && fmt_bytes[i] == b'.' {
2138            i += 1;
2139            while i < fmt_bytes.len() && fmt_bytes[i].is_ascii_digit() {
2140                i += 1;
2141            }
2142        }
2143
2144        if i >= fmt_bytes.len() {
2145            let form: Vec<u8> = fmt_bytes[spec_start..].to_vec();
2146            return Err(invalid_conv_spec(state, &form));
2147        }
2148
2149        let conv = fmt_bytes[i];
2150        i += 1;
2151
2152        let spec_slice = &fmt_bytes[spec_start + 1..i - 1];
2153        let form = &fmt_bytes[spec_start..i];
2154
2155        // Must check before parse_fmt_spec to avoid overflow on huge widths.
2156        if spec_slice.len() + 1 >= 22 {
2157            return Err(lua_vm::debug::c_api_runtime(state, b"invalid format (too long)".to_vec()));
2158        }
2159
2160        let spec = parse_fmt_spec(spec_slice);
2161
2162        match conv {
2163            b'c' => {
2164                check_conv_spec(state, form, FMT_FLAGS_C, false)?;
2165                let n = state.check_arg_integer(arg)?;
2166                let body = vec![n as u8];
2167                pad_str(&mut buf, &body, &spec);
2168            }
2169            b'd' | b'i' => {
2170                check_conv_spec(state, form, FMT_FLAGS_I, true)?;
2171                let n = format_int_arg(state, arg)?;
2172                let (sign, digits) = signed_int_parts(n, &spec);
2173                pad_int(&mut buf, &sign, &digits, &spec);
2174            }
2175            b'u' => {
2176                check_conv_spec(state, form, FMT_FLAGS_U, true)?;
2177                let n = format_int_arg(state, arg)? as u64;
2178                let (prefix, digits) = unsigned_int_parts(n, 10, false, &spec);
2179                pad_int(&mut buf, &prefix, &digits, &spec);
2180            }
2181            b'o' => {
2182                check_conv_spec(state, form, FMT_FLAGS_X, true)?;
2183                let n = format_int_arg(state, arg)? as u64;
2184                let (prefix, digits) = unsigned_int_parts(n, 8, false, &spec);
2185                pad_int(&mut buf, &prefix, &digits, &spec);
2186            }
2187            b'x' => {
2188                check_conv_spec(state, form, FMT_FLAGS_X, true)?;
2189                let n = format_int_arg(state, arg)? as u64;
2190                let (prefix, digits) = unsigned_int_parts(n, 16, false, &spec);
2191                pad_int(&mut buf, &prefix, &digits, &spec);
2192            }
2193            b'X' => {
2194                check_conv_spec(state, form, FMT_FLAGS_X, true)?;
2195                let n = format_int_arg(state, arg)? as u64;
2196                let (prefix, digits) = unsigned_int_parts(n, 16, true, &spec);
2197                pad_int(&mut buf, &prefix, &digits, &spec);
2198            }
2199            b'a' | b'A' => {
2200                check_conv_spec(state, form, FMT_FLAGS_F, true)?;
2201                let n = state.check_arg_number(arg)?;
2202                let body = format_hex_float(n, spec.precision);
2203                let body: Vec<u8> = if conv == b'A' {
2204                    body.into_iter().map(|b| b.to_ascii_uppercase()).collect()
2205                } else {
2206                    body
2207                };
2208                let (sign, digits): (Vec<u8>, Vec<u8>) =
2209                    if !body.is_empty() && (body[0] == b'-' || body[0] == b'+') {
2210                        (vec![body[0]], body[1..].to_vec())
2211                    } else if spec.plus_sign {
2212                        (b"+".to_vec(), body)
2213                    } else if spec.space_sign {
2214                        (b" ".to_vec(), body)
2215                    } else {
2216                        (Vec::new(), body)
2217                    };
2218                let no_prec_spec = FmtSpec {
2219                    left_align: spec.left_align,
2220                    plus_sign: spec.plus_sign,
2221                    space_sign: spec.space_sign,
2222                    alt_form: spec.alt_form,
2223                    zero_pad: spec.zero_pad,
2224                    width: spec.width,
2225                    precision: None,
2226                };
2227                pad_int(&mut buf, &sign, &digits, &no_prec_spec);
2228            }
2229            b'f' | b'e' | b'E' | b'g' | b'G' => {
2230                check_conv_spec(state, form, FMT_FLAGS_F, true)?;
2231                let n = state.check_arg_number(arg)?;
2232                let body = format_float(n, conv, &spec);
2233                let (sign, digits): (Vec<u8>, Vec<u8>) = if !body.is_empty() && (body[0] == b'-' || body[0] == b'+') {
2234                    (vec![body[0]], body[1..].to_vec())
2235                } else if n >= 0.0 && spec.plus_sign {
2236                    (b"+".to_vec(), body)
2237                } else if n >= 0.0 && spec.space_sign {
2238                    (b" ".to_vec(), body)
2239                } else {
2240                    (Vec::new(), body)
2241                };
2242                let no_prec_spec = FmtSpec {
2243                    left_align: spec.left_align,
2244                    plus_sign: spec.plus_sign,
2245                    space_sign: spec.space_sign,
2246                    alt_form: spec.alt_form,
2247                    zero_pad: spec.zero_pad,
2248                    width: spec.width,
2249                    precision: None,
2250                };
2251                pad_int(&mut buf, &sign, &digits, &no_prec_spec);
2252            }
2253            b'p' => {
2254                check_conv_spec(state, form, FMT_FLAGS_C, false)?;
2255                let s: Vec<u8> = match lua_vm::api::to_pointer(state, arg) {
2256                    Some(p) => format!("0x{:x}", p).into_bytes(),
2257                    None => b"(null)".to_vec(),
2258                };
2259                pad_str(&mut buf, &s, &FmtSpec { precision: None, ..spec });
2260            }
2261            b'q' => {
2262                if form.len() > 2 {
2263                    return Err(LuaError::runtime(format_args!(
2264                        "specifier '%q' cannot have modifiers"
2265                    )));
2266                }
2267                addliteral(state, &mut buf, arg)?;
2268            }
2269            b's' => {
2270                check_conv_spec(state, form, FMT_FLAGS_C, true)?;
2271                let s = state.to_display_string(arg)?;
2272                let has_modifiers = spec.width != 0 || spec.precision.is_some();
2273                if has_modifiers && s.contains(&0u8) {
2274                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string contains zeros"));
2275                }
2276                pad_str(&mut buf, &s, &spec);
2277                state.pop_n(1);
2278            }
2279            _ => {
2280                let verb: &[u8] = if state.global().lua_version == lua_types::LuaVersion::V53 {
2281                    b"option"
2282                } else {
2283                    b"conversion"
2284                };
2285                let mut msg = b"invalid ".to_vec();
2286                msg.extend_from_slice(verb);
2287                msg.extend_from_slice(b" '");
2288                msg.extend_from_slice(form);
2289                msg.extend_from_slice(b"' to 'format'");
2290                return Err(lua_vm::debug::c_api_runtime(state, msg));
2291            }
2292        }
2293    }
2294
2295    state.push_bytes(&buf)?;
2296    Ok(1)
2297}
2298
2299// ────────────────────────────────────────────────────────────────────────────
2300// §8  Pack / unpack
2301// ────────────────────────────────────────────────────────────────────────────
2302
2303/// Return `true` if `c` is an ASCII digit.
2304fn is_digit(c: u8) -> bool {
2305    c.is_ascii_digit()
2306}
2307
2308/// Read an optional integer from the format string, returning `df` if absent.
2309///
2310/// `wide` selects the accumulator width: 5.3/5.4 used `int` (cap `i32::MAX`);
2311/// 5.5 uses `size_t` (cap the host pointer width). The reference stops consuming
2312/// digits once another `*10 + 9` would overflow, leaving the rest to be read as
2313/// the next option — which is why `c<int-overflow>` yields "invalid format
2314/// option '<digit>'" on 5.3/5.4 but parses cleanly on 5.5.
2315fn getnum(fmt: &[u8], pos: &mut usize, df: i64, wide: bool) -> i64 {
2316    if *pos >= fmt.len() || !is_digit(fmt[*pos]) {
2317        return df;
2318    }
2319    let cap: i64 = if wide { i64::MAX } else { i32::MAX as i64 };
2320    let mut a = 0i64;
2321    while *pos < fmt.len() && is_digit(fmt[*pos]) {
2322        a = a * 10 + (fmt[*pos] - b'0') as i64;
2323        *pos += 1;
2324        if a > (cap - 9) / 10 {
2325            break;
2326        }
2327    }
2328    a
2329}
2330
2331/// Read an integer from the format string, error if out of `[1, MAXINTSIZE]`.
2332///
2333fn getnumlimit(fmt: &[u8], pos: &mut usize, df: i64) -> Result<usize, LuaError> {
2334    let sz = getnum(fmt, pos, df, false);
2335    if sz > MAX_INT_SIZE as i64 || sz <= 0 {
2336        return Err(LuaError::runtime(format_args!(
2337            "integral size ({}) out of limits [1,{}]",
2338            sz, MAX_INT_SIZE
2339        )));
2340    }
2341    Ok(sz as usize)
2342}
2343
2344/// Read and classify the next pack format option, filling `size`.
2345///
2346fn getoption(h: &mut Header, fmt: &[u8], pos: &mut usize, size: &mut usize) -> Result<KOption, LuaError> {
2347    // In Rust, the native max-align of a union of f64/void*/size_t is 8 on 64-bit.
2348    const NATIVE_MAX_ALIGN: usize = std::mem::align_of::<f64>();
2349
2350    if *pos >= fmt.len() {
2351        return Ok(KOption::Nop);
2352    }
2353    let opt = fmt[*pos];
2354    *pos += 1;
2355    *size = 0;
2356
2357    match opt {
2358        b'b' => { *size = 1; Ok(KOption::Int) }
2359        b'B' => { *size = 1; Ok(KOption::Uint) }
2360        b'h' => { *size = 2; Ok(KOption::Int) }
2361        b'H' => { *size = 2; Ok(KOption::Uint) }
2362        b'l' => { *size = 8; Ok(KOption::Int) }  // sizeof(long) on 64-bit
2363        b'L' => { *size = 8; Ok(KOption::Uint) }
2364        b'j' => { *size = SZINT; Ok(KOption::Int) }
2365        b'J' => { *size = SZINT; Ok(KOption::Uint) }
2366        b'T' => { *size = std::mem::size_of::<usize>(); Ok(KOption::Uint) }
2367        b'f' => { *size = 4; Ok(KOption::Float) }
2368        b'n' => { *size = 8; Ok(KOption::Number) }  // sizeof(lua_Number) = sizeof(f64) = 8
2369        b'd' => { *size = 8; Ok(KOption::Double) }  // sizeof(double) = 8
2370        b'i' => { *size = getnumlimit(fmt, pos, 4)?; Ok(KOption::Int) }
2371        b'I' => { *size = getnumlimit(fmt, pos, 4)?; Ok(KOption::Uint) }
2372        b's' => { *size = getnumlimit(fmt, pos, std::mem::size_of::<usize>()  as i64)?; Ok(KOption::Kstring) }
2373        b'c' => {
2374            let n = getnum(fmt, pos, -1, h.wide_size);
2375            if n == -1 {
2376                return Err(LuaError::runtime(format_args!("missing size for format option 'c'")));
2377            }
2378            *size = n as usize;
2379            Ok(KOption::Char)
2380        }
2381        b'z' => Ok(KOption::Zstr),
2382        b'x' => { *size = 1; Ok(KOption::Padding) }
2383        b'X' => Ok(KOption::Paddalign),
2384        b' ' => Ok(KOption::Nop),
2385        b'<' => { h.is_little = true; Ok(KOption::Nop) }
2386        b'>' => { h.is_little = false; Ok(KOption::Nop) }
2387        b'=' => { h.is_little = cfg!(target_endian = "little"); Ok(KOption::Nop) }
2388        b'!' => {
2389            let n = getnum(fmt, pos, NATIVE_MAX_ALIGN as i64, false);
2390            h.max_align = getnumlimit(fmt, pos, n)?;
2391            Ok(KOption::Nop)
2392        }
2393        _ => Err(LuaError::runtime(format_args!("invalid format option '{}'", opt as char)))
2394    }
2395}
2396
2397/// Get full details about the next format option, including alignment padding.
2398///
2399fn getdetails(
2400    state: &mut LuaState,
2401    h: &mut Header,
2402    total_size: usize,
2403    fmt: &[u8],
2404    pos: &mut usize,
2405    psize: &mut usize,
2406    ntoalign: &mut usize,
2407) -> Result<KOption, LuaError> {
2408    let opt = getoption(h, fmt, pos, psize)?;
2409    let mut align = *psize;
2410
2411    if opt == KOption::Paddalign {
2412        if *pos >= fmt.len() {
2413            return Err(lua_vm::debug::arg_error_impl(state, 1, b"invalid next option for option 'X'"));
2414        }
2415        let mut dummy_size = 0usize;
2416        let next_opt = getoption(h, fmt, pos, &mut dummy_size)?;
2417        align = dummy_size;
2418        if next_opt == KOption::Char || align == 0 {
2419            return Err(lua_vm::debug::arg_error_impl(state, 1, b"invalid next option for option 'X'"));
2420        }
2421    }
2422
2423    if align <= 1 || opt == KOption::Char {
2424        *ntoalign = 0;
2425    } else {
2426        if align > h.max_align {
2427            align = h.max_align;
2428        }
2429        if (align & (align - 1)) != 0 {
2430            return Err(lua_vm::debug::arg_error_impl(state, 1, b"format asks for alignment not power of 2"));
2431        }
2432        *ntoalign = (align - (total_size & (align - 1))) & (align - 1);
2433    }
2434    Ok(opt)
2435}
2436
2437/// Pack integer `n` with `size` bytes into `buf` with given endianness.
2438///
2439fn packint(buf: &mut Vec<u8>, mut n: u64, is_little: bool, size: usize, neg: bool) {
2440    let start = buf.len();
2441    buf.resize(start + size, 0);
2442    let slice = &mut buf[start..start + size];
2443    // Write LSB first (little-endian), then swap if big-endian
2444    for i in 0..size {
2445        slice[if is_little { i } else { size - 1 - i }] = (n & MC as u64) as u8;
2446        n >>= NB;
2447    }
2448    // Sign extension for negative numbers larger than lua_Integer
2449    if neg && size > SZINT {
2450        for i in SZINT..size {
2451            slice[if is_little { i } else { size - 1 - i }] = MC;
2452        }
2453    }
2454}
2455
2456/// Copy bytes with endianness correction.
2457///
2458fn copywithendian(dest: &mut [u8], src: &[u8], is_little: bool) {
2459    debug_assert_eq!(dest.len(), src.len());
2460    if is_little == cfg!(target_endian = "little") {
2461        dest.copy_from_slice(src);
2462    } else {
2463        for (d, s) in dest.iter_mut().zip(src.iter().rev()) {
2464            *d = *s;
2465        }
2466    }
2467}
2468
2469/// Unpack a (possibly signed) integer from `data[0..size]`.
2470///
2471fn unpackint(_state: &LuaState, data: &[u8], is_little: bool, size: usize, is_signed: bool) -> Result<i64, LuaError> {
2472    let limit = size.min(SZINT);
2473    let mut res: u64 = 0;
2474    for i in (0..limit).rev() {
2475        res <<= NB;
2476        let byte_idx = if is_little { i } else { size - 1 - i };
2477        res |= data[byte_idx] as u64;
2478    }
2479
2480    if size < SZINT {
2481        if is_signed {
2482            let mask: u64 = 1u64 << (size * NB as usize - 1);
2483            res = (res ^ mask).wrapping_sub(mask);
2484        }
2485    } else if size > SZINT {
2486        let mask = if !is_signed || (res as i64) >= 0 { 0u8 } else { MC };
2487        for i in limit..size {
2488            let byte_idx = if is_little { i } else { size - 1 - i };
2489            if data[byte_idx] != mask {
2490                return Err(LuaError::runtime(format_args!(
2491                    "{}-byte integer does not fit into Lua Integer", size
2492                )));
2493            }
2494        }
2495    }
2496    Ok(res as i64)
2497}
2498
2499/// `string.pack(fmt, ...)` — pack values into a binary string.
2500///
2501pub fn str_pack(state: &mut LuaState) -> Result<usize, LuaError> {
2502    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2503    let fmt = &fmt_bytes[..];
2504    let mut h = Header::new(state.global().lua_version == lua_types::LuaVersion::V55);
2505    let mut arg = 1i32;
2506    let mut total_size = 0usize;
2507    let mut buf: Vec<u8> = Vec::new();
2508    let mut pos = 0usize;
2509
2510    while pos < fmt.len() {
2511        let mut size = 0usize;
2512        let mut ntoalign = 0usize;
2513        let opt = getdetails(state, &mut h, total_size, fmt, &mut pos, &mut size, &mut ntoalign)?;
2514        // 5.5 `str_pack` rejects an oversized running total ("result too long")
2515        // BEFORE consuming the value argument; 5.3/5.4 have no such check (their
2516        // `int` sizes cannot reach the limit). MAX_SIZE is the host pointer width.
2517        if h.wide_size {
2518            let space = ntoalign + size;
2519            if space > (i64::MAX as usize) || total_size > (i64::MAX as usize) - space {
2520                return Err(lua_vm::debug::arg_error_impl(state, arg, b"result too long"));
2521            }
2522        }
2523        total_size += ntoalign + size;
2524        for _ in 0..ntoalign {
2525            buf.push(PACK_PAD_BYTE);
2526        }
2527        arg += 1;
2528
2529        match opt {
2530            KOption::Int => {
2531                let n = state.check_arg_integer(arg)?;
2532                if size < SZINT {
2533                    let lim: i64 = 1i64 << (size * NB as usize - 1);
2534                    if !(-lim <= n && n < lim) {
2535                        return Err(lua_vm::debug::arg_error_impl(state, arg, b"integer overflow"));
2536                    }
2537                }
2538                packint(&mut buf, n as u64, h.is_little, size, n < 0);
2539            }
2540            KOption::Uint => {
2541                let n = state.check_arg_integer(arg)?;
2542                if size < SZINT {
2543                    let lim: u64 = 1u64 << (size * NB as usize);
2544                    if (n as u64) >= lim {
2545                        return Err(lua_vm::debug::arg_error_impl(state, arg, b"unsigned overflow"));
2546                    }
2547                }
2548                packint(&mut buf, n as u64, h.is_little, size, false);
2549            }
2550            KOption::Float => {
2551                let f = state.check_arg_number(arg)? as f32;
2552                let start = buf.len();
2553                buf.resize(start + 4, 0);
2554                copywithendian(&mut buf[start..start + 4], &f.to_bits().to_ne_bytes(), h.is_little);
2555            }
2556            KOption::Number => {
2557                let f = state.check_arg_number(arg)?;
2558                let start = buf.len();
2559                buf.resize(start + 8, 0);
2560                copywithendian(&mut buf[start..start + 8], &f.to_bits().to_ne_bytes(), h.is_little);
2561            }
2562            KOption::Double => {
2563                let f = state.check_arg_number(arg)? as f64;
2564                let start = buf.len();
2565                buf.resize(start + 8, 0);
2566                copywithendian(&mut buf[start..start + 8], &f.to_bits().to_ne_bytes(), h.is_little);
2567            }
2568            KOption::Char => {
2569                let s = state.check_arg_string(arg)?.to_vec();
2570                if s.len() > size {
2571                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string longer than given size"));
2572                }
2573                buf.extend_from_slice(&s);
2574                let pad = size - s.len();
2575                for _ in 0..pad {
2576                    buf.push(PACK_PAD_BYTE);
2577                }
2578            }
2579            KOption::Kstring => {
2580                let s = state.check_arg_string(arg)?.to_vec();
2581                let len = s.len();
2582                if size < SZINT && len >= (1usize << (size * 8)) {
2583                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string length does not fit in given size"));
2584                }
2585                packint(&mut buf, len as u64, h.is_little, size, false);
2586                buf.extend_from_slice(&s);
2587                total_size += len;
2588            }
2589            KOption::Zstr => {
2590                let s = state.check_arg_string(arg)?.to_vec();
2591                if s.contains(&0) {
2592                    return Err(lua_vm::debug::arg_error_impl(state, arg, b"string contains zeros"));
2593                }
2594                buf.extend_from_slice(&s);
2595                buf.push(0);
2596                total_size += s.len() + 1;
2597            }
2598            KOption::Padding => {
2599                buf.push(PACK_PAD_BYTE);
2600                arg -= 1; // undo increment
2601            }
2602            KOption::Paddalign | KOption::Nop => {
2603                arg -= 1; // undo increment
2604            }
2605        }
2606    }
2607
2608    state.push_bytes(&buf)?;
2609    Ok(1)
2610}
2611
2612/// `string.packsize(fmt)` — return the byte-size the format would produce.
2613///
2614pub fn str_packsize(state: &mut LuaState) -> Result<usize, LuaError> {
2615    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2616    let fmt = &fmt_bytes[..];
2617    let mut h = Header::new(state.global().lua_version == lua_types::LuaVersion::V55);
2618    let mut total_size = 0usize;
2619    let mut pos = 0usize;
2620
2621    while pos < fmt.len() {
2622        let mut size = 0usize;
2623        let mut ntoalign = 0usize;
2624        let opt = getdetails(state, &mut h, total_size, fmt, &mut pos, &mut size, &mut ntoalign)?;
2625        if opt == KOption::Kstring || opt == KOption::Zstr {
2626            return Err(lua_vm::debug::arg_error_impl(state, 1, b"variable-length format"));
2627        }
2628        let space = ntoalign + size;
2629        let max_total: usize = if h.wide_size {
2630            i64::MAX as usize
2631        } else {
2632            PACK_MAXSIZE
2633        };
2634        if space > max_total || total_size > max_total - space {
2635            return Err(lua_vm::debug::arg_error_impl(state, 1, b"format result too large"));
2636        }
2637        total_size += space;
2638    }
2639    state.push(LuaValue::Int(total_size as i64));
2640    Ok(1)
2641}
2642
2643/// `string.unpack(fmt, s [, pos])` — unpack binary data from string.
2644///
2645pub fn str_unpack(state: &mut LuaState) -> Result<usize, LuaError> {
2646    let fmt_bytes = state.check_arg_string(1)?.to_vec();
2647    let data_bytes = state.check_arg_string(2)?.to_vec();
2648    let ld = data_bytes.len();
2649    let pos_raw = state.opt_arg_integer(3, 1)?;
2650    let mut pos = if matches!(state.global().lua_version, lua_types::LuaVersion::V53) {
2651        posrelat_53(pos_raw, ld).wrapping_sub(1)
2652    } else {
2653        pos_relat_i(pos_raw, ld).saturating_sub(1)
2654    };
2655
2656    if pos > ld {
2657        return Err(lua_vm::debug::arg_error_impl(state, 3, b"initial position out of string"));
2658    }
2659
2660    let fmt = &fmt_bytes[..];
2661    let data = &data_bytes[..];
2662    let mut h = Header::new(state.global().lua_version == lua_types::LuaVersion::V55);
2663    let mut fmt_pos = 0usize;
2664    let mut n = 0usize;
2665
2666    while fmt_pos < fmt.len() {
2667        let mut size = 0usize;
2668        let mut ntoalign = 0usize;
2669        let opt = getdetails(state, &mut h, pos, fmt, &mut fmt_pos, &mut size, &mut ntoalign)?;
2670
2671        if ntoalign + size > ld - pos {
2672            return Err(lua_vm::debug::arg_error_impl(state, 2, b"data string too short"));
2673        }
2674        pos += ntoalign;
2675        state.ensure_stack(2, "too many results")?;
2676        n += 1;
2677
2678        match opt {
2679            KOption::Int => {
2680                let v = unpackint(state, &data[pos..pos + size], h.is_little, size, true)?;
2681                state.push(LuaValue::Int(v));
2682            }
2683            KOption::Uint => {
2684                let v = unpackint(state, &data[pos..pos + size], h.is_little, size, false)?;
2685                state.push(LuaValue::Int(v));
2686            }
2687            KOption::Float => {
2688                let mut bytes = [0u8; 4];
2689                copywithendian(&mut bytes, &data[pos..pos + 4], h.is_little);
2690                let f = f32::from_bits(u32::from_ne_bytes(bytes));
2691                state.push(LuaValue::Float(f as f64));
2692            }
2693            KOption::Number => {
2694                let mut bytes = [0u8; 8];
2695                copywithendian(&mut bytes, &data[pos..pos + 8], h.is_little);
2696                let f = f64::from_bits(u64::from_ne_bytes(bytes));
2697                state.push(LuaValue::Float(f));
2698            }
2699            KOption::Double => {
2700                let mut bytes = [0u8; 8];
2701                copywithendian(&mut bytes, &data[pos..pos + 8], h.is_little);
2702                let f = f64::from_bits(u64::from_ne_bytes(bytes));
2703                state.push(LuaValue::Float(f));
2704            }
2705            KOption::Char => {
2706                state.push_bytes(&data[pos..pos + size])?;
2707            }
2708            KOption::Kstring => {
2709                let len = unpackint(state, &data[pos..pos + size], h.is_little, size, false)? as usize;
2710                if len > ld - pos - size {
2711                    return Err(lua_vm::debug::arg_error_impl(state, 2, b"data string too short"));
2712                }
2713                state.push_bytes(&data[pos + size..pos + size + len])?;
2714                pos += len;
2715            }
2716            KOption::Zstr => {
2717                let found = data[pos..].iter().position(|&b| b == 0);
2718                let end = match found {
2719                    Some(e) => e,
2720                    None => return Err(lua_vm::debug::arg_error_impl(state, 2, b"unfinished string for format 'z'")),
2721                };
2722                if pos + end >= ld {
2723                    return Err(lua_vm::debug::arg_error_impl(state, 2, b"unfinished string for format 'z'"));
2724                }
2725                state.push_bytes(&data[pos..pos + end])?;
2726                pos += end + 1;
2727            }
2728            KOption::Paddalign | KOption::Padding | KOption::Nop => {
2729                n -= 1; // undo increment
2730            }
2731        }
2732        pos += size;
2733    }
2734
2735    state.push(LuaValue::Int((pos + 1) as i64));
2736    Ok(n + 1)
2737}
2738
2739// ────────────────────────────────────────────────────────────────────────────
2740// §9  Module registration
2741// ────────────────────────────────────────────────────────────────────────────
2742
2743/// Function table for `string` library.
2744///
2745pub const STRING_LIB: &[(&[u8], lua_CFunction)] = &[
2746    (b"byte",     str_byte),
2747    (b"char",     str_char),
2748    (b"dump",     str_dump),
2749    (b"find",     str_find),
2750    (b"format",   str_format),
2751    (b"gmatch",   gmatch),
2752    (b"gsub",     str_gsub),
2753    (b"len",      str_len),
2754    (b"lower",    str_lower),
2755    (b"match",    str_match),
2756    (b"rep",      str_rep),
2757    (b"reverse",  str_reverse),
2758    (b"sub",      str_sub),
2759    (b"upper",    str_upper),
2760    (b"pack",     str_pack),
2761    (b"packsize", str_packsize),
2762    (b"unpack",   str_unpack),
2763];
2764
2765/// Metamethods to install on the string metatable.
2766///
2767pub const STRING_META_METHODS: &[(&[u8], lua_CFunction)] = &[
2768    (b"__add",  arith_add),
2769    (b"__sub",  arith_sub),
2770    (b"__mul",  arith_mul),
2771    (b"__mod",  arith_mod),
2772    (b"__pow",  arith_pow),
2773    (b"__div",  arith_div),
2774    (b"__idiv", arith_idiv),
2775    (b"__unm",  arith_unm),
2776];
2777
2778/// Create the string metatable and set it as the metatable for all strings.
2779///
2780pub fn createmetatable(state: &mut LuaState) -> Result<(), LuaError> {
2781    state.new_lib_table(STRING_META_METHODS)?;
2782    state.set_funcs(STRING_META_METHODS, 0)?;
2783    state.push_string(b"")?;
2784    let mt_idx = state.top_idx() - 2;
2785    let mt = state.get_at(mt_idx);
2786    state.push(mt);
2787    state.set_metatable(-2)?;
2788    state.pop_n(1);
2789    let strlib_idx = state.top_idx() - 2;
2790    let strlib = state.get_at(strlib_idx);
2791    state.push(strlib);
2792    state.set_field(-2, b"__index")?;
2793    state.pop_n(1);
2794    Ok(())
2795}
2796
2797/// `luaopen_string` — open the string library.
2798///
2799pub fn luaopen_string(state: &mut LuaState) -> Result<usize, LuaError> {
2800    state.new_lib(STRING_LIB)?;
2801    // Lua 5.1 carries `string.gfind`, the pre-5.0 name for `gmatch` (an exact
2802    // alias). It was removed in 5.2. Verified against lua5.1.5:
2803    // `type(string.gfind)` == "function" and it iterates identically to
2804    // `gmatch`. See specs/followup/5.1-roster-syntax.md §1.
2805    if matches!(state.global().lua_version, lua_types::LuaVersion::V51) {
2806        state.push_c_function(gmatch)?;
2807        state.set_field(-2, b"gfind")?;
2808    }
2809    createmetatable(state)?;
2810    Ok(1)
2811}
2812
2813// ────────────────────────────────────────────────────────────────────────────
2814// PORT STATUS
2815//   source:        src/lstrlib.c  (1875 lines, 46 functions)
2816//   target_crate:  lua-stdlib
2817//   confidence:    medium
2818//   todos:         13
2819//   port_notes:    6
2820//   unsafe_blocks: 0
2821//   notes:         Pattern engine uses index-based MatchState (not raw ptrs).
2822//                  string.format delegates numeric widths/precision/flags to
2823//                  Phase B (a sprintf-compatible crate or manual impl).
2824//                  gmatch iterator state mirrors C-Lua's closure shape:
2825//                  source string, pattern string, and userdata state. The
2826//                  userdata host payload stores only byte positions; strings
2827//                  stay as traced closure upvalues. See gmatch_aux.
2828//                  copywithendian uses safe byte-level swapping (no transmute).
2829//                  unpackint sign-extension uses two's-complement bit tricks;
2830//                  logic review needed in Phase B.
2831//                  str_dump requires state.dump_function() which is not yet
2832//                  defined; Phase B wires up the ldump.c port.
2833//                  addquoted uses 3-digit escape for all control chars (slight
2834//                  deviation from C which uses 1-digit when safe); benign.
2835//                  str_len/str_sub/str_byte/str_reverse/str_lower/str_upper/
2836//                  str_rep/gmatch/str_find_aux borrow source bytes through
2837//                  to_lua_string (GcRef) instead of copying via
2838//                  check_arg_string, mirroring the gmatch_aux fix (685482d).
2839//                  string_ops 3.00x→2.00x, string_ops_long 2.25x→1.48x on
2840//                  best-of-5 (Apple M3 Max).
2841//                  gmatch_aux originally moved from stack raw_geti/raw_seti to
2842//                  direct table slots, then later from table state to C-shaped
2843//                  userdata/upvalues. The latter pass dropped gmatch_aux's
2844//                  string_ops_long profile share from ~6.9% to ~2.9%.
2845// ────────────────────────────────────────────────────────────────────────────