lua_vm/string.rs
1//! String table and interned-string operations — port of `lstring.c` + `lstring.h`.
2//!
3//! Provides two key abstractions:
4//!
5//! - [`LuaStringImpl`]: the Lua string value, stored as a reference-counted byte slice.
6//! Short strings (`<= MAX_SHORT_LEN` bytes) are interned in the process-global
7//! [`StringPool`]; long strings are heap-allocated on each creation and never
8//! interned.
9//!
10//! - [`StringPool`]: the intern table for short strings, stored on `GlobalState`.
11//! Replaces the C `stringtable` struct, which used an open-addressing hash table
12//! with intrusive chaining through `TString.u.hnext`. In Rust the intrusive
13//! chain is dropped; a `HashMap` provides O(1) lookup and automatic rehashing.
14//! See PORT NOTE on [`StringPool`] for the full rationale.
15//!
16//! The `lstring.h` header is merged into this module per PORTING.md §1.
17//!
18//! # C source files
19//! - `reference/lua-5.4.7/src/lstring.c` (275 lines, 15 functions)
20//! - `reference/lua-5.4.7/src/lstring.h` (57 lines; merged here)
21
22use std::cell::Cell;
23#[allow(unused_imports)] use crate::prelude::*;
24use std::collections::HashMap;
25use std::rc::Rc;
26
27// TODO(port): these import paths will resolve once Phase B wires the crate graph.
28// `LuaState` and `GlobalState` live in crate::state (src/state.rs, from lstate.c).
29// `LuaValue` and `LuaError` live in lua_types (crates/lua-types/src/).
30use crate::state::{GlobalState, LuaState};
31
32// PORT NOTE: `GcRef<T>` is the lua-types newtype around `Rc<T>` per PORT_STRATEGY §3.4.
33// Re-imported here so all string-pool entries share identity with state.rs / api.rs.
34use lua_types::GcRef;
35/// Local alias retained while string.rs's own `LuaStringImpl` is still in use; will
36/// merge with `lua_types::LuaString` in Phase B's string-pool consolidation.
37type LocalGcRef<T> = Rc<T>;
38
39/// Phase-B bridge: converts a lua-vm rich `LuaStringImpl` into a `lua_types::LuaString`.
40/// The two types track different metadata (short/long flag, extra byte) and a real
41/// merge belongs in Phase B once `lua-types::LuaString` grows the needed fields.
42fn impl_to_lt(s: &GcRef<LuaStringImpl>) -> GcRef<lua_types::LuaString> {
43 // TODO(D-1c-bridge): allocation outside state context (free fn)
44 GcRef::new(lua_types::LuaString::from_bytes(s.as_bytes().to_vec()))
45}
46
47// ── Constants (lstring.h macros → macros.tsv) ─────────────────────────────────
48
49// C: #define MEMERRMSG "not enough memory"
50// macros.tsv: MEMERRMSG → const MEMERR_MSG: &[u8] = b"not enough memory"
51/// Pre-allocated OOM error message. Must be created before the allocator
52/// can fail so that the GC can always hand back a valid error string.
53pub(crate) const MEMERR_MSG: &[u8] = b"not enough memory";
54
55// C: #define MINSTRTABSIZE 128 (llimits.h)
56// macros.tsv: MINSTRTABSIZE → const MIN_STR_TAB_SIZE: usize = 128
57const MIN_STR_TAB_SIZE: usize = 128;
58
59// C: #define STRCACHE_N 53 (llimits.h)
60// macros.tsv: STRCACHE_N → const STRCACHE_N: usize = 53
61const STRCACHE_N: usize = 53;
62
63// C: #define STRCACHE_M 2 (llimits.h)
64// macros.tsv: STRCACHE_M → const STRCACHE_M: usize = 2
65const STRCACHE_M: usize = 2;
66
67// C: #define LUAI_MAXSHORTLEN 40 (llimits.h)
68// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
69pub(crate) const MAX_SHORT_LEN: usize = 40;
70
71// C: MAX_SIZE defined via llimits.h conditional on pointer vs i64 width
72// macros.tsv: MAX_SIZE → const MAX_SIZE: usize = if size_of::<usize>() < size_of::<i64>() { usize::MAX } else { i64::MAX as usize }
73const MAX_SIZE: usize = if std::mem::size_of::<usize>() < std::mem::size_of::<i64>() {
74 usize::MAX
75} else {
76 i64::MAX as usize
77};
78
79// C: #define MAXSTRTB cast_int(luaM_limitN(MAX_INT, TString*))
80// macros.tsv: luaM_limitN → std::cmp::min(n, usize::MAX / std::mem::size_of::<T>())
81// cast_int → x as i32
82// Rust: upper bound on the number of hash buckets; derived from MAX_INT / pointer size.
83const MAX_STR_TAB: usize = i32::MAX as usize / std::mem::size_of::<usize>();
84
85// C: #define sizelstring(l) (offsetof(TString, contents) + ((l) + 1) * sizeof(char))
86// macros.tsv: sizelstring → drop — Rust allocates via Box<[u8]> / Rc<[u8]>
87// PORT NOTE: dropped entirely; Rust uses Rc<[u8]> which carries its own length.
88
89// C: #define luaS_newliteral(L, s) (luaS_newlstr(L, "" s, (sizeof(s)/sizeof(char))-1))
90// macros.tsv: luaS_newliteral → state.intern_str(b"...")
91// PORT NOTE: translated at call sites as `new_lstr(state, b"literal")`.
92
93// C: #define isreserved(s) ((s)->tt == LUA_VSHRSTR && (s)->extra > 0)
94// macros.tsv: isreserved → ts.is_reserved_word()
95// PORT NOTE: translated at call sites as the `LuaStringImpl::is_reserved_word()` method.
96
97// C: #define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b))
98// macros.tsv: eqshrstr → Rc::ptr_eq(a, b)
99// PORT NOTE: short strings are interned so pointer equality suffices.
100// Translated at call sites as `Rc::ptr_eq(a, b)`.
101
102// ── LuaStringImpl (was TString in lobject.h) ─────────────────────────────────────
103
104// PORT NOTE: `LuaStringImpl` corresponds to `TString` from `lobject.h`, which maps to
105// `src/object.rs` per file_deps.txt. It is defined here (in `string.rs`) because
106// `lstring.c` owns the string-table internals and most of the type's behaviour.
107// Phase B should reconcile: either keep it here and re-export from `object.rs`,
108// or move it there and import it from `string.rs`.
109
110/// Whether a Lua string is short (interned) or long (not interned).
111///
112/// Corresponds to `LUA_VSHRSTR` / `LUA_VLNGSTR` tags from `lobject.h`.
113///
114/// # C mapping (types.tsv)
115/// ```text
116/// LUA_VSHRSTR → LuaStringImpl::Short (shrlen holds length 0..=40)
117/// LUA_VLNGSTR → LuaStringImpl::Long (shrlen = 0xFF sentinel; u.lnglen holds length)
118/// ```
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub enum StringKind {
121 // C: LUA_VSHRSTR — shrlen byte holds the length; string is interned
122 Short,
123 // C: LUA_VLNGSTR — shrlen = 0xFF sentinel; u.lnglen holds the real length
124 Long,
125}
126
127/// A Lua string: an immutable, reference-counted byte sequence.
128///
129/// Short strings (`<= MAX_SHORT_LEN = 40` bytes) are interned in the
130/// [`StringPool`] on `GlobalState`; two short strings with the same bytes
131/// are guaranteed to be the same `GcRef` (pointer equality via `Rc::ptr_eq`).
132///
133/// Long strings are heap-allocated independently and never interned. Their
134/// hash is computed lazily on first call to [`hash_long_str`] and cached via
135/// interior mutability (`Cell<u32>`).
136///
137/// # C mapping (types.tsv)
138/// ```text
139/// TString → LuaStringImpl
140/// TString.extra → extra: Cell<u8> (reserved-word idx for Short; hash-ready flag for Long)
141/// TString.shrlen → kind: StringKind (0xFF sentinel replaced by enum variant)
142/// TString.hash → hash: Cell<u32>
143/// TString.u.lnglen → bytes.len() (length implicit in Rc<[u8]>)
144/// TString.u.hnext → (removed) (intrusive chain gone; StringPool uses HashMap)
145/// TString.contents → bytes: Rc<[u8]>
146/// ```
147pub struct LuaStringImpl {
148 // C: char contents[]; (flexible array member)
149 bytes: Rc<[u8]>,
150
151 // C: lu_byte shrlen; (0xFF for long strings, actual length for short)
152 // Replaced by the StringKind enum; length is implicit in bytes.len().
153 kind: StringKind,
154
155 // C: unsigned int hash;
156 // Using Cell<u32> so that `hash_long_str` can cache the hash through a
157 // shared `&LuaStringImpl` reference (interior mutability, single-threaded).
158 hash: Cell<u32>,
159
160 // C: lu_byte extra;
161 // Short strings: reserved-word token index (0 = not a keyword).
162 // Long strings: 0 = hash not yet computed; 1 = hash is valid.
163 extra: Cell<u8>,
164}
165
166impl LuaStringImpl {
167 /// Returns the string's bytes.
168 ///
169 /// C: `getstr(ts)` / `getlngstr(ts)` / `getshrstr(ts)` — all map to this.
170 /// macros.tsv: `getstr` / `getlngstr` / `getshrstr` → `ts.as_bytes()`
171 pub fn as_bytes(&self) -> &[u8] {
172 &self.bytes
173 }
174
175 /// Returns the byte length of the string.
176 ///
177 /// C: `tsslen(ts)` — macro returning `ts->shrlen` for Short or `ts->u.lnglen`
178 /// for Long. In Rust both cases are `bytes.len()`.
179 /// macros.tsv: `tsslen` → `ts.len()`
180 pub fn len(&self) -> usize {
181 self.bytes.len()
182 }
183
184 /// Returns `true` if this is a long (non-interned) string.
185 pub fn is_long(&self) -> bool {
186 self.kind == StringKind::Long
187 }
188
189 /// Returns `true` if this is a short (interned) string.
190 pub fn is_short(&self) -> bool {
191 self.kind == StringKind::Short
192 }
193
194 /// Returns `true` if this short string is a Lua reserved word.
195 ///
196 /// C: `isreserved(s)` macro — `(s)->tt == LUA_VSHRSTR && (s)->extra > 0`.
197 /// macros.tsv: `isreserved` → `ts.is_reserved_word()`
198 pub fn is_reserved_word(&self) -> bool {
199 self.kind == StringKind::Short && self.extra.get() > 0
200 }
201
202 /// GC color predicate. Returns `true` if this object is "white" (unreachable)
203 /// in the GC's current wave.
204 ///
205 /// C: `iswhite(obj)` macro.
206 /// macros.tsv: `iswhite` → `obj.is_white()`
207 ///
208 /// PORT NOTE: GC color management is deferred to Phase D. In Phases A–C all
209 /// objects are reachable via `Rc` reference counts and this always returns
210 /// `false` (nothing is white / unreachable).
211 pub fn is_white(&self) -> bool {
212 // TODO(port): Phase D — check the GC marked byte; stub returns false (all live)
213 false
214 }
215
216 /// Flip GC color from white to the current non-white (resurrect a dead object).
217 ///
218 /// C: `changewhite(obj)` macro.
219 /// macros.tsv: `changewhite` → `obj.flip_white()`
220 ///
221 /// PORT NOTE: GC color management deferred to Phase D; no-op in Phases A–C.
222 pub fn flip_white(&self) {
223 // TODO(port): Phase D — update the GC marked byte
224 }
225}
226
227impl PartialEq for LuaStringImpl {
228 /// Equality for Lua strings.
229 ///
230 /// For short strings (interned), pointer equality via `Rc::ptr_eq` is sufficient
231 /// and matches `eqshrstr` in C. For long strings, we fall back to byte
232 /// comparison, matching `luaS_eqlngstr` in C.
233 fn eq(&self, other: &Self) -> bool {
234 if self.kind == StringKind::Short && other.kind == StringKind::Short {
235 // C: eqshrstr(a, b) — pointer equality; macros.tsv: Rc::ptr_eq(a, b)
236 Rc::ptr_eq(&self.bytes, &other.bytes)
237 } else {
238 // C: luaS_eqlngstr — byte comparison for long strings
239 self.bytes == other.bytes
240 }
241 }
242}
243
244impl Eq for LuaStringImpl {}
245
246// ── StringPool (was stringtable in lstate.h) ──────────────────────────────────
247
248// PORT NOTE: `StringPool` corresponds to `stringtable` from `lstate.h`, which maps
249// to `src/state.rs` per file_deps.txt. It is defined here because `lstring.c`
250// owns all of the pool's mutation logic. Phase B should reconcile placement.
251//
252// The C `stringtable` used an open-addressing hash table where each bucket was
253// the head of an intrusive singly-linked list threaded through `TString.u.hnext`.
254// In Rust, `TString.u.hnext` is removed per types.tsv. The `HashMap` replaces
255// both the bucket array and the chain: it provides O(1) average-case lookup,
256// automatic rehashing, and eliminates the need for `tablerehash`.
257//
258// `nuse` and `size` are retained for parity with the C invariants that other
259// code may check (e.g. `growstrtab` tests `nuse >= size`).
260
261/// Intern table for short Lua strings. Lives on `GlobalState`.
262///
263/// # C mapping (types.tsv)
264/// ```text
265/// stringtable → StringPool
266/// stringtable.hash → map: HashMap<Box<[u8]>, GcRef<LuaStringImpl>>
267/// stringtable.nuse → nuse: usize
268/// stringtable.size → size: usize
269/// ```
270pub struct StringPool {
271 // C: TString **hash; (array of chain heads — replaced by HashMap)
272 // PORT NOTE: keyed by owned byte slice; lookup by `&[u8]` via Borrow<[u8]>.
273 map: HashMap<Box<[u8]>, GcRef<LuaStringImpl>>,
274
275 // C: int nuse; (live entry count)
276 // PERF(port): redundant with map.len() in Rust — keep for C-parity; remove in Phase B
277 nuse: usize,
278
279 // C: int size; (bucket count)
280 // In Rust, HashMap manages its own capacity; this tracks the last requested size.
281 size: usize,
282}
283
284impl StringPool {
285 /// Create an empty pool with `MIN_STR_TAB_SIZE` preallocated capacity.
286 ///
287 /// C: corresponds to the `luaM_newvector(L, MINSTRTABSIZE, TString*)` +
288 /// `tablerehash(tb->hash, 0, MINSTRTABSIZE)` sequence in `luaS_init`.
289 pub fn new() -> Self {
290 StringPool {
291 map: HashMap::with_capacity(MIN_STR_TAB_SIZE),
292 nuse: 0,
293 size: MIN_STR_TAB_SIZE,
294 }
295 }
296}
297
298impl Default for StringPool {
299 fn default() -> Self {
300 Self::new()
301 }
302}
303
304// ── LuaUserData (was Udata in lobject.h) ──────────────────────────────────────
305
306// PORT NOTE: `LuaUserData` corresponds to `Udata` from `lobject.h`, which maps to
307// `src/object.rs` per file_deps.txt. Defined here because `luaS_newudata` lives
308// in `lstring.c`. Phase B should reconcile placement.
309
310/// Full userdata: a GC-tracked object carrying a raw byte payload plus optional
311/// Lua user values and an optional metatable.
312///
313/// # C mapping (types.tsv)
314/// ```text
315/// Udata → LuaUserData
316/// Udata.len → len: usize
317/// Udata.nuvalue → nuvalue: u16 (covered by uv.len() but kept for parity)
318/// Udata.metatable → metatable: Option<GcRef<LuaTable>>
319/// Udata.uv → uv: Vec<LuaValue>
320/// (no direct C field) data: Box<[u8]> — the raw byte payload; C used a flexible
321/// array member laid out past the Udata header via
322/// `udatamemoffset` alignment math.
323/// ```
324pub struct LuaUserDataImpl {
325 // C: size_t len;
326 pub len: usize,
327 // C: unsigned short nuvalue;
328 pub nuvalue: u16,
329 // C: struct Table *metatable;
330 // TODO(port): GcRef<LuaTable> — LuaTable not yet defined; Phase B
331 pub metatable: Option<()>,
332 // C: UValue uv[1]; (flexible array of TValues, used as user values)
333 // macros.tsv: setnilvalue → *o = LuaValue::Nil
334 // TODO(port): Vec<LuaValue> — LuaValue not yet defined; Phase B
335 pub uv: Vec<()>,
336 // Port of the raw byte payload that C accessed via udatamemoffset arithmetic.
337 pub data: Box<[u8]>,
338}
339
340// ── Public functions ───────────────────────────────────────────────────────────
341
342// C: int luaS_eqlngstr(TString *a, TString *b)
343// lstring.h: LUAI_FUNC → pub(crate)
344/// Test equality of two long strings.
345///
346/// Two long strings are equal if they have identical byte content. A pointer
347/// equality short-circuit is also applied: if `a` and `b` share the same
348/// underlying `Rc<[u8]>` allocation, they are trivially equal.
349///
350/// # C source
351/// ```c
352/// // C: int luaS_eqlngstr(TString *a, TString *b) {
353/// // size_t len = a->u.lnglen;
354/// // lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR);
355/// // return (a == b) ||
356/// // ((len == b->u.lnglen) &&
357/// // (memcmp(getlngstr(a), getlngstr(b), len) == 0));
358/// // }
359/// ```
360pub(crate) fn eq_long_str(a: &LuaStringImpl, b: &LuaStringImpl) -> bool {
361 // C: lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR);
362 // macros.tsv: lua_assert → debug_assert!
363 debug_assert!(a.is_long() && b.is_long(), "eq_long_str: both arguments must be long strings");
364
365 // C: (a == b) — pointer equality (same TString allocation)
366 // In Rust: check if the Rc<[u8]> byte buffers are the same allocation
367 if Rc::ptr_eq(&a.bytes, &b.bytes) {
368 return true;
369 }
370
371 // C: (len == b->u.lnglen) && (memcmp(getlngstr(a), getlngstr(b), len) == 0)
372 // macros.tsv: getlngstr → ts.as_bytes()
373 a.as_bytes() == b.as_bytes()
374}
375
376// C: unsigned int luaS_hash(const char *str, size_t l, unsigned int seed)
377// lstring.h: LUAI_FUNC → pub(crate)
378/// Hash a byte string with a seed using Lua's FNV-style hash.
379///
380/// This is a pure function with no allocations. The algorithm XORs shifts and
381/// additions over each byte in reverse order, seeded by `seed ^ len`.
382///
383/// # C source
384/// ```c
385/// // C: unsigned int luaS_hash(const char *str, size_t l, unsigned int seed) {
386/// // unsigned int h = seed ^ cast_uint(l);
387/// // for (; l > 0; l--)
388/// // h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
389/// // return h;
390/// // }
391/// ```
392///
393/// PORT NOTE: C parenthesises `(h<<5)` and `(h>>2)` explicitly, so the outer
394/// additions are unambiguous despite C's `<<`/`>>` having lower precedence than
395/// `+`. In Rust `<<` and `>>` have higher precedence than `+`, so the same
396/// expression is computed without extra parentheses; `wrapping_add` is used to
397/// match C's unsigned wrap-around arithmetic.
398pub(crate) fn hash_bytes(bytes: &[u8], seed: u32) -> u32 {
399 // C: unsigned int h = seed ^ cast_uint(l);
400 // macros.tsv: cast_uint → x as u32
401 let mut h: u32 = seed ^ (bytes.len() as u32);
402
403 // C: for (; l > 0; l--)
404 let mut l = bytes.len();
405 while l > 0 {
406 l -= 1;
407 // C: h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
408 // macros.tsv: cast_byte → x as u8 (then as u32 for the arithmetic)
409 h ^= (h << 5)
410 .wrapping_add(h >> 2)
411 .wrapping_add(bytes[l] as u32);
412 }
413
414 h
415}
416
417// C: unsigned int luaS_hashlongstr(TString *ts)
418// lstring.h: LUAI_FUNC → pub(crate)
419/// Compute (and cache) the hash of a long string.
420///
421/// The hash for long strings is computed lazily: on first call the hash is
422/// derived from `hash_bytes` using the seed stored in the `hash` field, then
423/// `extra` is set to `1` to record that the hash is now valid. Subsequent calls
424/// return the cached value directly.
425///
426/// Interior mutability (`Cell<u32>` / `Cell<u8>`) allows mutation through a
427/// shared `&LuaStringImpl` reference, which is necessary because `GcRef<LuaStringImpl>`
428/// is `Rc<LuaStringImpl>` and there is no safe way to get `&mut` through an `Rc`.
429///
430/// # C source
431/// ```c
432/// // C: unsigned int luaS_hashlongstr(TString *ts) {
433/// // lua_assert(ts->tt == LUA_VLNGSTR);
434/// // if (ts->extra == 0) { /* no hash? */
435/// // size_t len = ts->u.lnglen;
436/// // ts->hash = luaS_hash(getlngstr(ts), len, ts->hash);
437/// // ts->extra = 1; /* now it has its hash */
438/// // }
439/// // return ts->hash;
440/// // }
441/// ```
442pub(crate) fn hash_long_str(ts: &LuaStringImpl) -> u32 {
443 // C: lua_assert(ts->tt == LUA_VLNGSTR);
444 debug_assert!(ts.is_long(), "hash_long_str: argument must be a long string");
445
446 // C: if (ts->extra == 0) { /* no hash? */
447 if ts.extra.get() == 0 {
448 // C: ts->hash = luaS_hash(getlngstr(ts), len, ts->hash);
449 // The initial ts->hash holds the per-state seed (set at construction).
450 let computed = hash_bytes(ts.as_bytes(), ts.hash.get());
451 ts.hash.set(computed);
452 // C: ts->extra = 1; /* now it has its hash */
453 ts.extra.set(1);
454 }
455
456 // C: return ts->hash;
457 ts.hash.get()
458}
459
460// C: static void tablerehash(TString **vect, int osize, int nsize) [DROPPED]
461//
462// PORT NOTE: `tablerehash` walked the intrusive `hnext` chain in each bucket and
463// redistributed `TString *` pointers into new bucket slots. In Rust the
464// `HashMap` in `StringPool` handles its own rehashing automatically whenever its
465// load factor is exceeded or `reserve` / `shrink_to` is called. The entire
466// function is therefore dropped; its effects are subsumed by the HashMap.
467
468// C: void luaS_resize(lua_State *L, int nsize)
469// lstring.h: LUAI_FUNC → pub(crate)
470/// Resize the string intern table to approximately `nsize` buckets.
471///
472/// When growing, `HashMap::reserve` hints the desired capacity. When shrinking,
473/// `HashMap::shrink_to` is used as an approximation of the C logic, which
474/// would rehash entries out of the shrinking tail. The C function's graceful
475/// degradation on allocation failure (keep the current size) is preserved:
476/// `HashMap` will simply retain its existing capacity if memory is tight.
477///
478/// # C source
479/// ```c
480/// // C: void luaS_resize(lua_State *L, int nsize) {
481/// // stringtable *tb = &G(L)->strt;
482/// // int osize = tb->size;
483/// // TString **newvect;
484/// // if (nsize < osize)
485/// // tablerehash(tb->hash, osize, nsize); /* depopulate shrinking part */
486/// // newvect = luaM_reallocvector(L, tb->hash, osize, nsize, TString*);
487/// // if (l_unlikely(newvect == NULL)) {
488/// // if (nsize < osize)
489/// // tablerehash(tb->hash, nsize, osize); /* restore to original size */
490/// // } else {
491/// // tb->hash = newvect;
492/// // tb->size = nsize;
493/// // if (nsize > osize)
494/// // tablerehash(newvect, osize, nsize);
495/// // }
496/// // }
497/// ```
498///
499/// PORT NOTE: The three calls to `tablerehash` are dropped because `HashMap`
500/// automatically rehashes. The allocation-failure fallback (restore to `osize`)
501/// has no direct analogue; `HashMap` will retain existing capacity on OOM, which
502/// matches the intent.
503// PERF(port): luaS_resize shrink — HashMap::shrink_to() is a hint, not a
504// guarantee; the C code freed exact memory. Profile in Phase B.
505pub(crate) fn resize(state: &mut LuaState, nsize: usize) {
506 let strt = &mut state.global_mut().strt;
507 let osize = strt.size;
508
509 if nsize > osize {
510 // C: newvect = luaM_reallocvector(...); if (nsize > osize) tablerehash(...)
511 let additional = nsize.saturating_sub(strt.map.len());
512 strt.map.reserve(additional);
513 } else if nsize < osize {
514 // C: if (nsize < osize) tablerehash(tb->hash, osize, nsize) — depopulate
515 // PERF(port): shrink_to is a hint; exact shrink not guaranteed in Rust
516 strt.map.shrink_to(nsize);
517 }
518
519 // C: tb->size = nsize;
520 strt.size = nsize;
521}
522
523// C: void luaS_clearcache(global_State *g)
524// lstring.h: LUAI_FUNC → pub(crate)
525/// Clear the API string cache, replacing any GC-white entries with the
526/// preallocated OOM message (which is never collected).
527///
528/// Called by the GC sweep phase to ensure the cache never holds a pointer to a
529/// collected string.
530///
531/// # C source
532/// ```c
533/// // C: void luaS_clearcache(global_State *g) {
534/// // int i, j;
535/// // for (i = 0; i < STRCACHE_N; i++)
536/// // for (j = 0; j < STRCACHE_M; j++) {
537/// // if (iswhite(g->strcache[i][j])) /* will entry be collected? */
538/// // g->strcache[i][j] = g->memerrmsg;
539/// // }
540/// // }
541/// ```
542///
543/// PORT NOTE: Takes `&mut GlobalState` directly (same as the C signature which
544/// takes `global_State *g`, not `lua_State *L`). The caller accesses this via
545/// `state.global_mut()`.
546pub(crate) fn clear_cache(g: &mut GlobalState) {
547 for i in 0..STRCACHE_N {
548 for j in 0..STRCACHE_M {
549 // C: if (iswhite(g->strcache[i][j]))
550 // macros.tsv: iswhite → obj.is_white()
551 if g.strcache[i][j].is_white() {
552 // C: g->strcache[i][j] = g->memerrmsg;
553 g.strcache[i][j] = g.memerrmsg.clone();
554 }
555 }
556 }
557}
558
559// C: void luaS_init(lua_State *L)
560// lstring.h: LUAI_FUNC → pub(crate)
561/// Initialise the string intern table and the API string cache.
562///
563/// Must be called exactly once during VM startup, before any strings are created.
564/// Pre-creates the memory-error message and fixes it in the GC (so it is never
565/// collected), then fills every cache slot with that same string.
566///
567/// # C source
568/// ```c
569/// // C: void luaS_init(lua_State *L) {
570/// // global_State *g = G(L);
571/// // int i, j;
572/// // stringtable *tb = &G(L)->strt;
573/// // tb->hash = luaM_newvector(L, MINSTRTABSIZE, TString*);
574/// // tablerehash(tb->hash, 0, MINSTRTABSIZE);
575/// // tb->size = MINSTRTABSIZE;
576/// // g->memerrmsg = luaS_newliteral(L, MEMERRMSG);
577/// // luaC_fix(L, obj2gco(g->memerrmsg));
578/// // for (i = 0; i < STRCACHE_N; i++)
579/// // for (j = 0; j < STRCACHE_M; j++)
580/// // g->strcache[i][j] = g->memerrmsg;
581/// // }
582/// ```
583pub(crate) fn init(state: &mut LuaState) -> Result<(), LuaError> {
584 // C: tb->hash = luaM_newvector(L, MINSTRTABSIZE, TString*);
585 // tablerehash(tb->hash, 0, MINSTRTABSIZE);
586 // tb->size = MINSTRTABSIZE;
587 // macros.tsv: luaM_newvector → vec![T::default(); n]
588 // PORT NOTE: StringPool::new() sets the initial capacity to MIN_STR_TAB_SIZE,
589 // replacing both the allocation and the tablerehash clear pass.
590 state.global_mut().strt = StringPool::new();
591
592 // C: g->memerrmsg = luaS_newliteral(L, MEMERRMSG);
593 // macros.tsv: luaS_newliteral → state.intern_str(b"...")
594 let memerrmsg = new_lstr(state, MEMERR_MSG)?;
595
596 // C: luaC_fix(L, obj2gco(g->memerrmsg)); /* it should never be collected */
597 // macros.tsv: luaC_fix — not listed; it marks the object as fixed (non-collectable)
598 // TODO(port): call state.gc().fix(memerrmsg.clone()) when GC is wired in Phase D;
599 // in Phases A–C the Rc keeps it alive as long as GlobalState holds the clone
600 let memerrmsg_lt = impl_to_lt(&memerrmsg);
601 state.global_mut().memerrmsg = memerrmsg_lt.clone();
602
603 // C: for (i = 0; i < STRCACHE_N; i++)
604 // for (j = 0; j < STRCACHE_M; j++)
605 // g->strcache[i][j] = g->memerrmsg;
606 for i in 0..STRCACHE_N {
607 for j in 0..STRCACHE_M {
608 state.global_mut().strcache[i][j] = memerrmsg_lt.clone();
609 }
610 }
611
612 Ok(())
613}
614
615// C: TString *luaS_createlngstrobj(lua_State *L, size_t l)
616// lstring.h: LUAI_FUNC → pub(crate)
617/// Create a new, uninitialized long string of `l` bytes.
618///
619/// The returned string's bytes are all zero. The caller is responsible for
620/// filling the content, if needed; in practice `new_lstr` calls this and then
621/// copies the source bytes in.
622///
623/// # C source
624/// ```c
625/// // C: TString *luaS_createlngstrobj(lua_State *L, size_t l) {
626/// // TString *ts = createstrobj(L, l, LUA_VLNGSTR, G(L)->seed);
627/// // ts->u.lnglen = l;
628/// // ts->shrlen = 0xFF; /* signals that it is a long string */
629/// // return ts;
630/// // }
631/// ```
632///
633/// PORT NOTE: `ts->u.lnglen = l` and `ts->shrlen = 0xFF` are replaced by the
634/// `StringKind::Long` variant which carries the length implicitly through
635/// `Rc<[u8]>::len()`. The `0xFF` sentinel is no longer needed.
636pub(crate) fn create_long_str(state: &mut LuaState, l: usize) -> GcRef<LuaStringImpl> {
637 // C: TString *ts = createstrobj(L, l, LUA_VLNGSTR, G(L)->seed);
638 let seed = state.global().seed;
639 // PORT NOTE: C's createstrobj allocates uninitialised storage then the caller
640 // fills bytes via memcpy. Rust's create_str_obj constructs with zeroed bytes;
641 // callers (e.g. new_lstr) pass the real bytes directly, eliminating the two-step.
642 create_str_obj(state, &vec![0u8; l], StringKind::Long, seed)
643}
644
645// C: void luaS_remove(lua_State *L, TString *ts)
646// lstring.h: LUAI_FUNC → pub(crate)
647/// Remove a short string from the intern table.
648///
649/// Called by the GC sweep when a short string is about to be collected.
650///
651/// # C source
652/// ```c
653/// // C: void luaS_remove(lua_State *L, TString *ts) {
654/// // stringtable *tb = &G(L)->strt;
655/// // TString **p = &tb->hash[lmod(ts->hash, tb->size)];
656/// // while (*p != ts) /* find previous element */
657/// // p = &(*p)->u.hnext;
658/// // *p = (*p)->u.hnext; /* remove element from its list */
659/// // tb->nuse--;
660/// // }
661/// ```
662///
663/// PORT NOTE: The C implementation walks the intrusive `hnext` chain to unlink
664/// `ts`. In Rust the chain does not exist; `HashMap::remove` is O(1) average.
665/// `lmod(ts->hash, tb->size)` (the bucket index) is not needed; the map keys by
666/// byte content.
667pub(crate) fn remove_str(state: &mut LuaState, ts: &LuaStringImpl) {
668 let strt = &mut state.global_mut().strt;
669
670 // C: TString **p = &tb->hash[lmod(ts->hash, tb->size)];
671 // while (*p != ts) p = &(*p)->u.hnext;
672 // *p = (*p)->u.hnext;
673 // PORT NOTE: all of the above replaced by HashMap::remove keyed on bytes
674 strt.map.remove(ts.as_bytes());
675
676 // C: tb->nuse--;
677 strt.nuse = strt.nuse.saturating_sub(1);
678}
679
680// C: TString *luaS_newlstr(lua_State *L, const char *str, size_t l)
681// lstring.h: LUAI_FUNC → pub(crate)
682/// Create or retrieve a Lua string from `bytes`.
683///
684/// If `bytes.len() <= MAX_SHORT_LEN` (40), the string is interned: an existing
685/// identical short string is returned if found, otherwise a new one is created
686/// and inserted into the intern table.
687///
688/// If `bytes.len() > MAX_SHORT_LEN`, a new long string is allocated each time
689/// (long strings are never interned).
690///
691/// # C source
692/// ```c
693/// // C: TString *luaS_newlstr(lua_State *L, const char *str, size_t l) {
694/// // if (l <= LUAI_MAXSHORTLEN) /* short string? */
695/// // return internshrstr(L, str, l);
696/// // else {
697/// // TString *ts;
698/// // if (l_unlikely(l * sizeof(char) >= (MAX_SIZE - sizeof(TString))))
699/// // luaM_toobig(L);
700/// // ts = luaS_createlngstrobj(L, l);
701/// // memcpy(getlngstr(ts), str, l * sizeof(char));
702/// // return ts;
703/// // }
704/// // }
705/// ```
706pub(crate) fn new_lstr(state: &mut LuaState, bytes: &[u8]) -> Result<GcRef<LuaStringImpl>, LuaError> {
707 // C: if (l <= LUAI_MAXSHORTLEN)
708 if bytes.len() <= MAX_SHORT_LEN {
709 intern_short_str(state, bytes)
710 } else {
711 // C: if (l_unlikely(l * sizeof(char) >= (MAX_SIZE - sizeof(TString))))
712 // luaM_toobig(L);
713 // macros.tsv: luaM_toobig → return Err(LuaError::Memory)
714 // PORT NOTE: sizeof(TString) is a C-specific overhead; in Rust we just
715 // check that the byte count fits within MAX_SIZE.
716 if bytes.len() >= MAX_SIZE {
717 return Err(LuaError::Memory);
718 }
719
720 // C: ts = luaS_createlngstrobj(L, l);
721 // memcpy(getlngstr(ts), str, l * sizeof(char));
722 // PORT NOTE: Rather than creating a zeroed buffer and then copying,
723 // we construct the LuaStringImpl directly from `bytes`.
724 let seed = state.global().seed;
725 let h = hash_bytes(bytes, seed);
726 let ts = create_str_obj(state, bytes, StringKind::Long, h);
727 Ok(ts)
728 }
729}
730
731// C: TString *luaS_new(lua_State *L, const char *str)
732// lstring.h: LUAI_FUNC → pub(crate)
733/// Create or retrieve a Lua string, using a small two-slot LRU cache per hash
734/// bucket to accelerate repeated calls with the same byte sequence.
735///
736/// In C, the cache bucket is selected by casting the C string pointer to a `u32`
737/// (`point2uint`). In Rust, `point2uint` is restricted to `lua-gc`/`lua-coro`
738/// (raw-pointer cast requiring `unsafe`). We substitute a content-hash based
739/// bucket index instead. Functional semantics are identical; cache hit rates for
740/// repeated calls with the same `bytes` may differ.
741///
742/// # C source
743/// ```c
744/// // C: TString *luaS_new(lua_State *L, const char *str) {
745/// // unsigned int i = point2uint(str) % STRCACHE_N; /* hash */
746/// // int j;
747/// // TString **p = G(L)->strcache[i];
748/// // for (j = 0; j < STRCACHE_M; j++) {
749/// // if (strcmp(str, getstr(p[j])) == 0) /* hit? */
750/// // return p[j]; /* that is it */
751/// // }
752/// // /* normal route */
753/// // for (j = STRCACHE_M - 1; j > 0; j--)
754/// // p[j] = p[j - 1]; /* move out last element */
755/// // p[0] = luaS_newlstr(L, str, strlen(str));
756/// // return p[0];
757/// // }
758/// ```
759///
760/// PORT NOTE: `point2uint(str) % STRCACHE_N` used the raw pointer address as a
761/// fast key, exploiting the fact that C string literals have stable addresses.
762/// In Rust we use `hash_bytes(bytes, seed) % STRCACHE_N` instead. The replacement
763/// is fully safe and has identical semantics (but different cache behaviour for
764/// calls from different `&[u8]` slices with identical content).
765pub(crate) fn new(state: &mut LuaState, bytes: &[u8]) -> Result<GcRef<LuaStringImpl>, LuaError> {
766 // C: unsigned int i = point2uint(str) % STRCACHE_N;
767 // PORT NOTE: pointer hash replaced by content hash (see doc above)
768 let seed = state.global().seed;
769 let i = (hash_bytes(bytes, seed) as usize) % STRCACHE_N;
770
771 // C: for (j = 0; j < STRCACHE_M; j++) { if (strcmp(str, getstr(p[j])) == 0) ... }
772 // macros.tsv: getstr → ts.as_bytes()
773 for j in 0..STRCACHE_M {
774 if state.global().strcache[i][j].as_bytes() == bytes {
775 // C: return p[j];
776 // TODO(phase-b): strcache currently holds lua_types::LuaString; rebuild
777 // a rich LuaStringImpl from the bytes. Phase B should unify the types.
778 let cached_bytes = state.global().strcache[i][j].as_bytes().to_vec();
779 // TODO(D-1c-bridge): LuaStringImpl is the rich local type; state helper produces lua_types::LuaString
780 return Ok(GcRef::new(LuaStringImpl {
781 bytes: cached_bytes.into(),
782 kind: if bytes.len() <= MAX_SHORT_LEN { StringKind::Short } else { StringKind::Long },
783 hash: Cell::new(hash_bytes(bytes, seed)),
784 extra: Cell::new(0),
785 }));
786 }
787 }
788
789 // C: /* normal route */
790 // Create the string before mutating the cache
791 let new_str = new_lstr(state, bytes)?;
792
793 // C: for (j = STRCACHE_M - 1; j > 0; j--) p[j] = p[j - 1];
794 // Shift entries toward the back to make room at slot 0
795 for j in (1..STRCACHE_M).rev() {
796 // Clone first to avoid borrow conflict between getter and setter
797 let prev = state.global().strcache[i][j - 1].clone();
798 state.global_mut().strcache[i][j] = prev;
799 }
800
801 // C: p[0] = luaS_newlstr(L, str, strlen(str));
802 state.global_mut().strcache[i][0] = impl_to_lt(&new_str);
803
804 Ok(new_str)
805}
806
807// C: Udata *luaS_newudata(lua_State *L, size_t s, int nuvalue)
808// lstring.h: LUAI_FUNC → pub(crate)
809/// Allocate a new full userdata of `s` raw bytes with `nuvalue` Lua user values.
810///
811/// The raw byte payload is zeroed. All user values are initialised to `nil`.
812/// The metatable is `None`.
813///
814/// # C source
815/// ```c
816/// // C: Udata *luaS_newudata(lua_State *L, size_t s, int nuvalue) {
817/// // Udata *u;
818/// // int i;
819/// // GCObject *o;
820/// // if (l_unlikely(s > MAX_SIZE - udatamemoffset(nuvalue)))
821/// // luaM_toobig(L);
822/// // o = luaC_newobj(L, LUA_VUSERDATA, sizeudata(nuvalue, s));
823/// // u = gco2u(o);
824/// // u->len = s;
825/// // u->nuvalue = nuvalue;
826/// // u->metatable = NULL;
827/// // for (i = 0; i < nuvalue; i++)
828/// // setnilvalue(&u->uv[i].uv);
829/// // return u;
830/// // }
831/// ```
832pub(crate) fn new_userdata(
833 state: &mut LuaState,
834 s: usize,
835 nuvalue: usize,
836) -> Result<GcRef<LuaUserDataImpl>, LuaError> {
837 // C: if (l_unlikely(s > MAX_SIZE - udatamemoffset(nuvalue)))
838 // luaM_toobig(L);
839 // macros.tsv: luaM_toobig → return Err(LuaError::Memory)
840 // TODO(port): udatamemoffset(nuvalue) computes C-specific alignment padding
841 // for the flexible-array Udata layout. In Rust, LuaUserData allocates `data`
842 // and `uv` separately (Box<[u8]> + Vec<LuaValue>); the combined size bound
843 // differs. Conservative check: reject if s alone exceeds MAX_SIZE.
844 if s > MAX_SIZE {
845 return Err(LuaError::Memory);
846 }
847
848 // C: o = luaC_newobj(L, LUA_VUSERDATA, sizeudata(nuvalue, s));
849 // u = gco2u(o);
850 // TODO(port): register with GC tracking (state.gc().new_obj(...));
851 // Phase A–C stub: allocate via Rc without GC registration.
852 // TODO(D-1c-bridge): LuaUserDataImpl is the rich local type; state.new_userdata is still todo!()
853 let u = GcRef::new(LuaUserDataImpl {
854 // C: u->len = s;
855 len: s,
856 // C: u->nuvalue = nuvalue;
857 nuvalue: nuvalue as u16,
858 // C: u->metatable = NULL;
859 metatable: None,
860 // C: for (i = 0; i < nuvalue; i++) setnilvalue(&u->uv[i].uv);
861 // macros.tsv: setnilvalue → *o = LuaValue::Nil
862 // TODO(port): Vec<LuaValue> once LuaValue is defined in lua-types
863 uv: vec![(); nuvalue],
864 // Raw byte payload; zero-initialised.
865 data: vec![0u8; s].into_boxed_slice(),
866 });
867
868 // TODO(port): push into state.global_mut().allgc for GC tracking (Phase D)
869 Ok(u)
870}
871
872// ── Private helpers ───────────────────────────────────────────────────────────
873
874// C: static TString *createstrobj(lua_State *L, size_t l, int tag, unsigned int h)
875/// Allocate and initialise a new `LuaStringImpl` with the given bytes, kind, and hash.
876///
877/// In C, `createstrobj` allocated uninitialised memory via `luaC_newobj` and set
878/// the header fields; the caller then filled the content via `memcpy`. In Rust
879/// we construct the string directly from the provided `bytes`, eliminating the
880/// two-step pattern.
881///
882/// # C source
883/// ```c
884/// // C: static TString *createstrobj(lua_State *L, size_t l, int tag, unsigned int h) {
885/// // TString *ts;
886/// // GCObject *o;
887/// // size_t totalsize = sizelstring(l);
888/// // o = luaC_newobj(L, tag, totalsize);
889/// // ts = gco2ts(o);
890/// // ts->hash = h;
891/// // ts->extra = 0;
892/// // getstr(ts)[l] = '\0'; /* ending 0 */
893/// // return ts;
894/// // }
895/// ```
896///
897/// PORT NOTE: `sizelstring(l)` computed the total allocation size including the
898/// nul terminator. In Rust, `Rc<[u8]>` stores the bytes without a nul; the
899/// nul terminator is dropped. Callers that need a nul-terminated `*const u8`
900/// for FFI must use a temporary `CString` or equivalent.
901fn create_str_obj(
902 state: &mut LuaState,
903 bytes: &[u8],
904 kind: StringKind,
905 hash: u32,
906) -> GcRef<LuaStringImpl> {
907 // C: o = luaC_newobj(L, tag, totalsize);
908 // macros.tsv: luaM_newobject → state.gc().new_obj(tag, sz)
909 // TODO(port): register with GC tracking list (state.global_mut().allgc)
910 // in Phase D; Phase A–C creates a bare Rc
911 let _ = state; // state needed for GC registration in Phase D
912 // TODO(D-1c-bridge): LuaStringImpl is the rich local type; state helper produces lua_types::LuaString
913 GcRef::new(LuaStringImpl {
914 // C: ts->hash = h;
915 hash: Cell::new(hash),
916 // C: ts->extra = 0;
917 extra: Cell::new(0),
918 // C: getstr(ts)[l] = '\0'; /* content written by caller via memcpy */
919 // PORT NOTE: we receive bytes directly; no separate memcpy step needed
920 bytes: Rc::from(bytes),
921 kind,
922 })
923}
924
925// C: static void growstrtab(lua_State *L, stringtable *tb)
926/// Grow the string intern table, first attempting a GC collection if the table is
927/// at its absolute maximum size.
928///
929/// # C source
930/// ```c
931/// // C: static void growstrtab(lua_State *L, stringtable *tb) {
932/// // if (l_unlikely(tb->nuse == MAX_INT)) { /* too many strings? */
933/// // luaC_fullgc(L, 1); /* try to free some... */
934/// // if (tb->nuse == MAX_INT) /* still too many? */
935/// // luaM_error(L); /* cannot even create a message... */
936/// // }
937/// // if (tb->size <= MAXSTRTB / 2) /* can grow string table? */
938/// // luaS_resize(L, tb->size * 2);
939/// // }
940/// ```
941fn grow_str_tab(state: &mut LuaState) -> Result<(), LuaError> {
942 // C: if (l_unlikely(tb->nuse == MAX_INT)) {
943 // macros.tsv: MAX_INT → i32::MAX
944 let nuse = state.global().strt.nuse;
945 if nuse == i32::MAX as usize {
946 // C: luaC_fullgc(L, 1);
947 // macros.tsv: luaC_fullgc → state.gc().full_collect()
948 // TODO(port): state.gc().full_collect() — GC not yet wired in Phase A–C; no-op
949 // (When GC is live this call may reduce nuse by sweeping dead short strings.)
950
951 // C: if (tb->nuse == MAX_INT) luaM_error(L);
952 // macros.tsv: luaM_error → return Err(LuaError::Memory)
953 if state.global().strt.nuse == i32::MAX as usize {
954 return Err(LuaError::Memory);
955 }
956 }
957
958 // C: if (tb->size <= MAXSTRTB / 2) luaS_resize(L, tb->size * 2);
959 let size = state.global().strt.size;
960 if size <= MAX_STR_TAB / 2 {
961 resize(state, size * 2);
962 }
963
964 Ok(())
965}
966
967// C: static TString *internshrstr(lua_State *L, const char *str, size_t l)
968/// Look up `bytes` in the intern table; create and insert a new short string if
969/// not found.
970///
971/// The `isdead` / `changewhite` resurrection path is elided in Phases A–C because
972/// `Rc`-based reference counting keeps objects alive until all references drop
973/// (there are no dead-but-not-collected strings in Phase A–C).
974///
975/// # C source
976/// ```c
977/// // C: static TString *internshrstr(lua_State *L, const char *str, size_t l) {
978/// // TString *ts;
979/// // global_State *g = G(L);
980/// // stringtable *tb = &g->strt;
981/// // unsigned int h = luaS_hash(str, l, g->seed);
982/// // TString **list = &tb->hash[lmod(h, tb->size)];
983/// // lua_assert(str != NULL);
984/// // for (ts = *list; ts != NULL; ts = ts->u.hnext) {
985/// // if (l == ts->shrlen && (memcmp(str, getshrstr(ts), l) == 0)) {
986/// // if (isdead(g, ts)) changewhite(ts); /* resurrect it */
987/// // return ts;
988/// // }
989/// // }
990/// // if (tb->nuse >= tb->size) {
991/// // growstrtab(L, tb);
992/// // list = &tb->hash[lmod(h, tb->size)];
993/// // }
994/// // ts = createstrobj(L, l, LUA_VSHRSTR, h);
995/// // ts->shrlen = cast_byte(l);
996/// // memcpy(getshrstr(ts), str, l);
997/// // ts->u.hnext = *list;
998/// // *list = ts;
999/// // tb->nuse++;
1000/// // return ts;
1001/// // }
1002/// ```
1003///
1004/// PORT NOTE: `lmod(h, tb->size)` (power-of-two bucket modulo via
1005/// `macros.tsv: lmod → (s & (size - 1)) as usize`) and the `hnext` chain walk
1006/// are both gone. `HashMap::get` replaces the linear bucket scan.
1007fn intern_short_str(
1008 state: &mut LuaState,
1009 bytes: &[u8],
1010) -> Result<GcRef<LuaStringImpl>, LuaError> {
1011 // C: lua_assert(str != NULL);
1012 // In Rust, &[u8] slices are never null; the assertion is trivially satisfied.
1013
1014 // C: unsigned int h = luaS_hash(str, l, g->seed);
1015 let seed = state.global().seed;
1016 let h = hash_bytes(bytes, seed);
1017
1018 // C: for (ts = *list; ...) { if (memcmp matches) { if (isdead) changewhite; return ts; } }
1019 // PORT NOTE: intrusive hnext chain replaced by HashMap lookup
1020 // Clone the existing GcRef<LuaStringImpl> so the immutable borrow on `state` ends
1021 // before any mutable access below.
1022 let existing = state.global().strt.map.get(bytes).cloned();
1023 if let Some(ts) = existing {
1024 // C: if (isdead(g, ts)) changewhite(ts); /* resurrect it */
1025 // macros.tsv: isdead → g.is_dead(obj); changewhite → obj.flip_white()
1026 // PORT NOTE: GC color management deferred to Phase D; in Phases A–C all
1027 // Rc-held objects are live by definition (Rc keeps them alive).
1028 return Ok(ts);
1029 }
1030
1031 // C: if (tb->nuse >= tb->size) { growstrtab(L, tb); ... }
1032 let needs_grow = {
1033 let strt = &state.global().strt;
1034 strt.nuse >= strt.size
1035 };
1036 if needs_grow {
1037 grow_str_tab(state)?;
1038 }
1039
1040 // C: ts = createstrobj(L, l, LUA_VSHRSTR, h);
1041 // ts->shrlen = cast_byte(l); — encoded in StringKind::Short
1042 // memcpy(getshrstr(ts), str, l); — bytes passed directly to create_str_obj
1043 let ts = create_str_obj(state, bytes, StringKind::Short, h);
1044
1045 // C: ts->u.hnext = *list; *list = ts; — intrusive chain; gone in Rust
1046 // C: tb->nuse++;
1047 state
1048 .global_mut()
1049 .strt
1050 .map
1051 .insert(bytes.to_vec().into_boxed_slice(), ts.clone());
1052 state.global_mut().strt.nuse += 1;
1053
1054 Ok(ts)
1055}
1056
1057// ── Re-export marker for type defined here ────────────────────────────────────
1058
1059// TODO(port): LuaError is used in function signatures above but is not yet defined
1060// in lua-types. Phase B must add LuaError to lua-types/src/error.rs per
1061// PORTING.md §6 before this file can compile. The expected variants are:
1062// LuaError::Runtime(LuaValue)
1063// LuaError::Memory
1064// LuaError::Syntax(LuaValue)
1065// ... (full list in PORTING.md §6)
1066// For now, reference LuaError as an opaque import from the future lua-types crate.
1067use lua_types::LuaError;
1068
1069// ──────────────────────────────────────────────────────────────────────────────
1070// PORT STATUS
1071// source: src/lstring.c (275 lines, 15 functions)
1072// src/lstring.h (57 lines; merged)
1073// target_crate: lua-vm
1074// confidence: medium
1075// todos: 14
1076// port_notes: 30
1077// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
1078// notes: Logic is faithful to the C. The two largest structural changes
1079// are: (1) `tablerehash` + intrusive `hnext` chain replaced by
1080// `HashMap` in `StringPool`; (2) `luaS_new`'s `point2uint`
1081// pointer-hash replaced by a content hash (safe, same semantics).
1082// Key TODOs: GC registration in create_str_obj (Phase D),
1083// GC registration in new_userdata (Phase D), luaC_fix in init
1084// (Phase D), full_collect stub in grow_str_tab (Phase D),
1085// udatamemoffset size check in new_userdata (Phase B),
1086// LuaValue in LuaUserData.uv (Phase B), LuaError import path
1087// (Phase B), GcRef typedef (Phase B). Phase B priority: wire
1088// import paths for LuaState, GlobalState, LuaError, LuaValue,
1089// and move LuaStringImpl/StringPool/LuaUserData to their canonical
1090// modules (object.rs / state.rs).
1091// ──────────────────────────────────────────────────────────────────────────────